aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/CMakeLists.txt22
-rw-r--r--src/plugins/abf/FEATURE.yaml7
-rw-r--r--src/plugins/abf/abf.api2
-rw-r--r--src/plugins/abf/abf_api.c27
-rw-r--r--src/plugins/abf/abf_itf_attach.c23
-rw-r--r--src/plugins/abf/abf_policy.c102
-rw-r--r--src/plugins/acl/CMakeLists.txt11
-rw-r--r--src/plugins/acl/acl.api42
-rw-r--r--src/plugins/acl/acl.c162
-rw-r--r--src/plugins/acl/acl_hash_lookup_doc.md240
-rw-r--r--src/plugins/acl/acl_hash_lookup_doc.rst243
-rw-r--r--src/plugins/acl/acl_lookup_context.md125
-rw-r--r--src/plugins/acl/acl_lookup_context.rst138
-rw-r--r--src/plugins/acl/acl_multicore_doc.md349
-rw-r--r--src/plugins/acl/acl_multicore_doc.rst354
-rw-r--r--src/plugins/acl/acl_test.c100
-rw-r--r--src/plugins/acl/dataplane_node.c6
-rw-r--r--src/plugins/acl/dataplane_node_nonip.c6
-rw-r--r--src/plugins/acl/exports.h6
-rw-r--r--src/plugins/acl/fa_node.h2
-rw-r--r--src/plugins/acl/hash_lookup.c8
-rw-r--r--src/plugins/acl/public_inlines.h8
-rw-r--r--src/plugins/acl/sess_mgmt_node.c25
-rw-r--r--src/plugins/adl/adl.api2
-rw-r--r--src/plugins/adl/adl.c8
-rw-r--r--src/plugins/adl/adl_api.c1
-rw-r--r--src/plugins/adl/ip4_allowlist.c75
-rw-r--r--src/plugins/adl/setup.pg72
-rw-r--r--src/plugins/af_packet/CMakeLists.txt (renamed from src/plugins/gbp/CMakeLists.txt)54
-rw-r--r--src/plugins/af_packet/FEATURE.yaml16
-rw-r--r--src/plugins/af_packet/af_packet.api200
-rw-r--r--src/plugins/af_packet/af_packet.c1054
-rw-r--r--src/plugins/af_packet/af_packet.h182
-rw-r--r--src/plugins/af_packet/af_packet_api.c253
-rw-r--r--src/plugins/af_packet/cli.c341
-rw-r--r--src/plugins/af_packet/device.c793
-rw-r--r--src/plugins/af_packet/dir.dox29
-rw-r--r--src/plugins/af_packet/node.c832
-rw-r--r--src/plugins/af_packet/plugin.c12
-rw-r--r--src/plugins/af_xdp/CMakeLists.txt34
-rw-r--r--src/plugins/af_xdp/af_xdp.api97
-rw-r--r--src/plugins/af_xdp/af_xdp.h9
-rw-r--r--src/plugins/af_xdp/af_xdp_doc.md129
-rw-r--r--src/plugins/af_xdp/af_xdp_doc.rst164
-rw-r--r--src/plugins/af_xdp/api.c69
-rw-r--r--src/plugins/af_xdp/cli.c8
-rw-r--r--src/plugins/af_xdp/device.c425
-rw-r--r--src/plugins/af_xdp/input.c9
-rw-r--r--src/plugins/af_xdp/output.c51
-rw-r--r--src/plugins/af_xdp/plugin.c2
-rw-r--r--src/plugins/af_xdp/test_api.c105
-rw-r--r--src/plugins/af_xdp/unformat.c2
-rw-r--r--src/plugins/arping/arping.api23
-rw-r--r--src/plugins/arping/arping.c11
-rw-r--r--src/plugins/arping/arping_api.c37
-rw-r--r--src/plugins/arping/arping_test.c19
-rw-r--r--src/plugins/avf/CMakeLists.txt1
-rw-r--r--src/plugins/avf/README.md107
-rw-r--r--src/plugins/avf/README.rst135
-rw-r--r--src/plugins/avf/avf.h10
-rw-r--r--src/plugins/avf/avf_advanced_flow.h408
-rw-r--r--src/plugins/avf/avf_api.c13
-rw-r--r--src/plugins/avf/avf_fdir_lib.c82
-rw-r--r--src/plugins/avf/avf_rss_lib.c2690
-rw-r--r--src/plugins/avf/cli.c62
-rw-r--r--src/plugins/avf/device.c124
-rw-r--r--src/plugins/avf/flow.c317
-rw-r--r--src/plugins/avf/input.c10
-rw-r--r--src/plugins/avf/output.c57
-rw-r--r--src/plugins/avf/plugin.c2
-rw-r--r--src/plugins/avf/virtchnl.h2
-rw-r--r--src/plugins/bpf_trace_filter/CMakeLists.txt45
-rw-r--r--src/plugins/bpf_trace_filter/FEATURE.yaml8
-rw-r--r--src/plugins/bpf_trace_filter/api.c97
-rw-r--r--src/plugins/bpf_trace_filter/bpf_trace_filter.api35
-rw-r--r--src/plugins/bpf_trace_filter/bpf_trace_filter.c112
-rw-r--r--src/plugins/bpf_trace_filter/bpf_trace_filter.h42
-rw-r--r--src/plugins/bpf_trace_filter/bpf_trace_filter.rst4
-rw-r--r--src/plugins/bpf_trace_filter/cli.c99
-rw-r--r--src/plugins/bpf_trace_filter/plugin.c33
-rw-r--r--src/plugins/bufmon/CMakeLists.txt20
-rw-r--r--src/plugins/bufmon/FEATURE.yaml8
-rw-r--r--src/plugins/bufmon/bufmon.c314
-rw-r--r--src/plugins/bufmon/bufmon_doc.rst33
-rw-r--r--src/plugins/builtinurl/builtins.c69
-rw-r--r--src/plugins/builtinurl/builtinurl.c4
-rw-r--r--src/plugins/cdp/cdp.c4
-rw-r--r--src/plugins/cdp/cdp.pg12
-rw-r--r--src/plugins/cdp/cdp_input.c45
-rw-r--r--src/plugins/cdp/cdp_node.c2
-rw-r--r--src/plugins/cdp/cdp_periodic.c6
-rw-r--r--src/plugins/cnat/CMakeLists.txt1
-rw-r--r--src/plugins/cnat/FEATURE.yaml2
-rw-r--r--src/plugins/cnat/cnat.api8
-rw-r--r--src/plugins/cnat/cnat.rst42
-rw-r--r--src/plugins/cnat/cnat_api.c8
-rw-r--r--src/plugins/cnat/cnat_bihash.h9
-rw-r--r--src/plugins/cnat/cnat_client.c76
-rw-r--r--src/plugins/cnat/cnat_client.h41
-rw-r--r--src/plugins/cnat/cnat_inline.h104
-rw-r--r--src/plugins/cnat/cnat_maglev.c379
-rw-r--r--src/plugins/cnat/cnat_maglev.h21
-rw-r--r--src/plugins/cnat/cnat_node.h485
-rw-r--r--src/plugins/cnat/cnat_node_feature.c20
-rw-r--r--src/plugins/cnat/cnat_node_snat.c10
-rw-r--r--src/plugins/cnat/cnat_node_vip.c8
-rw-r--r--src/plugins/cnat/cnat_scanner.c1
-rw-r--r--src/plugins/cnat/cnat_session.c77
-rw-r--r--src/plugins/cnat/cnat_session.h5
-rw-r--r--src/plugins/cnat/cnat_snat_policy.c15
-rw-r--r--src/plugins/cnat/cnat_snat_policy.h3
-rw-r--r--src/plugins/cnat/cnat_src_policy.c4
-rw-r--r--src/plugins/cnat/cnat_translation.c136
-rw-r--r--src/plugins/cnat/cnat_translation.h22
-rw-r--r--src/plugins/cnat/cnat_types.c25
-rw-r--r--src/plugins/cnat/cnat_types.h52
-rw-r--r--src/plugins/crypto_ipsecmb/CMakeLists.txt10
-rw-r--r--src/plugins/crypto_ipsecmb/ipsecmb.c294
-rw-r--r--src/plugins/crypto_native/CMakeLists.txt14
-rw-r--r--src/plugins/crypto_native/FEATURE.yaml2
-rw-r--r--src/plugins/crypto_native/aes.h451
-rw-r--r--src/plugins/crypto_native/aes_cbc.c478
-rw-r--r--src/plugins/crypto_native/aes_ctr.c130
-rw-r--r--src/plugins/crypto_native/aes_gcm.c1213
-rw-r--r--src/plugins/crypto_native/crypto_native.h68
-rw-r--r--src/plugins/crypto_native/ghash.h419
-rw-r--r--src/plugins/crypto_native/main.c108
-rw-r--r--src/plugins/crypto_native/sha2.c186
-rw-r--r--src/plugins/crypto_openssl/CMakeLists.txt3
-rw-r--r--src/plugins/crypto_openssl/crypto_openssl.h20
-rw-r--r--src/plugins/crypto_openssl/main.c319
-rw-r--r--src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.api2
-rw-r--r--src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.h18
-rw-r--r--src/plugins/crypto_sw_scheduler/main.c579
-rw-r--r--src/plugins/ct6/ct6.c18
-rw-r--r--src/plugins/ct6/ct6.h2
-rw-r--r--src/plugins/ct6/ct6_in2out.c2
-rw-r--r--src/plugins/ct6/ct6_out2in.c2
-rw-r--r--src/plugins/dev_ena/CMakeLists.txt21
-rw-r--r--src/plugins/dev_ena/aenq.c186
-rw-r--r--src/plugins/dev_ena/aq.c359
-rw-r--r--src/plugins/dev_ena/ena.c265
-rw-r--r--src/plugins/dev_ena/ena.h234
-rw-r--r--src/plugins/dev_ena/ena_admin_defs.h685
-rw-r--r--src/plugins/dev_ena/ena_aenq_defs.h107
-rw-r--r--src/plugins/dev_ena/ena_defs.h25
-rw-r--r--src/plugins/dev_ena/ena_inlines.h40
-rw-r--r--src/plugins/dev_ena/ena_io_defs.h179
-rw-r--r--src/plugins/dev_ena/ena_reg_defs.h150
-rw-r--r--src/plugins/dev_ena/format.c146
-rw-r--r--src/plugins/dev_ena/format_aq.c412
-rw-r--r--src/plugins/dev_ena/port.c96
-rw-r--r--src/plugins/dev_ena/queue.c384
-rw-r--r--src/plugins/dev_ena/reg.c172
-rw-r--r--src/plugins/dev_ena/rx_node.c457
-rw-r--r--src/plugins/dev_ena/tx_node.c514
-rw-r--r--src/plugins/dev_iavf/CMakeLists.txt20
-rw-r--r--src/plugins/dev_iavf/adminq.c485
-rw-r--r--src/plugins/dev_iavf/counters.c128
-rw-r--r--src/plugins/dev_iavf/format.c112
-rw-r--r--src/plugins/dev_iavf/iavf.c307
-rw-r--r--src/plugins/dev_iavf/iavf.h218
-rw-r--r--src/plugins/dev_iavf/iavf_desc.h125
-rw-r--r--src/plugins/dev_iavf/iavf_regs.h364
-rw-r--r--src/plugins/dev_iavf/port.c543
-rw-r--r--src/plugins/dev_iavf/queue.c178
-rw-r--r--src/plugins/dev_iavf/rx_node.c529
-rw-r--r--src/plugins/dev_iavf/tx_node.c517
-rw-r--r--src/plugins/dev_iavf/virtchnl.c372
-rw-r--r--src/plugins/dev_iavf/virtchnl.h570
-rw-r--r--src/plugins/dev_iavf/virtchnl_funcs.h241
-rw-r--r--src/plugins/dev_octeon/CMakeLists.txt42
-rw-r--r--src/plugins/dev_octeon/common.h29
-rw-r--r--src/plugins/dev_octeon/flow.c505
-rw-r--r--src/plugins/dev_octeon/format.c183
-rw-r--r--src/plugins/dev_octeon/hw_defs.h98
-rw-r--r--src/plugins/dev_octeon/init.c312
-rw-r--r--src/plugins/dev_octeon/octeon.h186
-rw-r--r--src/plugins/dev_octeon/port.c493
-rw-r--r--src/plugins/dev_octeon/queue.c311
-rw-r--r--src/plugins/dev_octeon/roc_helper.c181
-rw-r--r--src/plugins/dev_octeon/rx_node.c392
-rw-r--r--src/plugins/dev_octeon/tx_node.c435
-rw-r--r--src/plugins/dhcp/FEATURE.yaml2
-rw-r--r--src/plugins/dhcp/client.c14
-rw-r--r--src/plugins/dhcp/dhcp.api9
-rw-r--r--src/plugins/dhcp/dhcp4_proxy_node.c24
-rw-r--r--src/plugins/dhcp/dhcp6_client_common_dp.c4
-rw-r--r--src/plugins/dhcp/dhcp6_ia_na_client_cp.c18
-rw-r--r--src/plugins/dhcp/dhcp6_ia_na_client_dp.c2
-rw-r--r--src/plugins/dhcp/dhcp6_packet.h16
-rw-r--r--src/plugins/dhcp/dhcp6_pd_client_cp.c26
-rw-r--r--src/plugins/dhcp/dhcp6_pd_client_dp.c2
-rw-r--r--src/plugins/dhcp/dhcp6_pd_doc.md86
-rw-r--r--src/plugins/dhcp/dhcp6_pd_doc.rst113
-rw-r--r--src/plugins/dhcp/dhcp6_proxy_node.c30
-rw-r--r--src/plugins/dhcp/dhcp_api.c47
-rw-r--r--src/plugins/dhcp/dhcp_client_detect.c2
-rw-r--r--src/plugins/dhcp/dhcp_test.c17
-rw-r--r--src/plugins/dispatch-trace/CMakeLists.txt3
-rw-r--r--src/plugins/dispatch-trace/main.c2
-rw-r--r--src/plugins/dma_intel/CMakeLists.txt11
-rw-r--r--src/plugins/dma_intel/dsa.c452
-rw-r--r--src/plugins/dma_intel/dsa_intel.h160
-rw-r--r--src/plugins/dma_intel/format.c15
-rw-r--r--src/plugins/dma_intel/main.c272
-rw-r--r--src/plugins/dns/dns.c231
-rw-r--r--src/plugins/dns/dns_packet.h6
-rw-r--r--src/plugins/dns/reply_node.c2
-rw-r--r--src/plugins/dns/request_node.c15
-rw-r--r--src/plugins/dpdk/CMakeLists.txt20
-rw-r--r--src/plugins/dpdk/buffer.c28
-rw-r--r--src/plugins/dpdk/cryptodev/cryptodev.c432
-rw-r--r--src/plugins/dpdk/cryptodev/cryptodev.h204
-rw-r--r--src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c467
-rw-r--r--src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c428
-rw-r--r--src/plugins/dpdk/device/cli.c44
-rw-r--r--src/plugins/dpdk/device/common.c276
-rw-r--r--src/plugins/dpdk/device/device.c137
-rw-r--r--src/plugins/dpdk/device/dpdk.h287
-rw-r--r--src/plugins/dpdk/device/dpdk_priv.h142
-rw-r--r--src/plugins/dpdk/device/driver.c154
-rw-r--r--src/plugins/dpdk/device/flow.c226
-rw-r--r--src/plugins/dpdk/device/format.c601
-rw-r--r--src/plugins/dpdk/device/init.c1331
-rw-r--r--src/plugins/dpdk/device/node.c150
-rw-r--r--src/plugins/dpdk/main.c13
-rw-r--r--src/plugins/dpdk/thread.c85
-rw-r--r--src/plugins/fateshare/CMakeLists.txt25
-rw-r--r--src/plugins/fateshare/fateshare.c309
-rw-r--r--src/plugins/fateshare/fateshare.h48
-rw-r--r--src/plugins/fateshare/vpp_fateshare_monitor.c289
-rw-r--r--src/plugins/flowprobe/FEATURE.yaml9
-rw-r--r--src/plugins/flowprobe/flowprobe.api119
-rw-r--r--src/plugins/flowprobe/flowprobe.c593
-rw-r--r--src/plugins/flowprobe/flowprobe.h19
-rw-r--r--src/plugins/flowprobe/flowprobe_plugin_doc.md13
-rw-r--r--src/plugins/flowprobe/flowprobe_plugin_doc.rst18
-rw-r--r--src/plugins/flowprobe/flowprobe_test.c218
-rw-r--r--src/plugins/flowprobe/node.c327
-rw-r--r--src/plugins/gbp/gbp.api470
-rw-r--r--src/plugins/gbp/gbp.h80
-rw-r--r--src/plugins/gbp/gbp_api.c1154
-rw-r--r--src/plugins/gbp/gbp_bridge_domain.c503
-rw-r--r--src/plugins/gbp/gbp_bridge_domain.h156
-rw-r--r--src/plugins/gbp/gbp_classify.c71
-rw-r--r--src/plugins/gbp/gbp_classify.h94
-rw-r--r--src/plugins/gbp/gbp_classify_node.c628
-rw-r--r--src/plugins/gbp/gbp_contract.c819
-rw-r--r--src/plugins/gbp/gbp_contract.h362
-rw-r--r--src/plugins/gbp/gbp_endpoint.c1597
-rw-r--r--src/plugins/gbp/gbp_endpoint.h376
-rw-r--r--src/plugins/gbp/gbp_endpoint_group.c402
-rw-r--r--src/plugins/gbp/gbp_endpoint_group.h166
-rw-r--r--src/plugins/gbp/gbp_ext_itf.c293
-rw-r--r--src/plugins/gbp/gbp_ext_itf.h92
-rw-r--r--src/plugins/gbp/gbp_fwd.c56
-rw-r--r--src/plugins/gbp/gbp_fwd_dpo.c306
-rw-r--r--src/plugins/gbp/gbp_fwd_dpo.h62
-rw-r--r--src/plugins/gbp/gbp_fwd_node.c163
-rw-r--r--src/plugins/gbp/gbp_itf.c575
-rw-r--r--src/plugins/gbp/gbp_itf.h97
-rw-r--r--src/plugins/gbp/gbp_learn.c76
-rw-r--r--src/plugins/gbp/gbp_learn.h63
-rw-r--r--src/plugins/gbp/gbp_learn_node.c718
-rw-r--r--src/plugins/gbp/gbp_policy.c79
-rw-r--r--src/plugins/gbp/gbp_policy.h57
-rw-r--r--src/plugins/gbp/gbp_policy_dpo.c420
-rw-r--r--src/plugins/gbp/gbp_policy_dpo.h121
-rw-r--r--src/plugins/gbp/gbp_policy_node.c341
-rw-r--r--src/plugins/gbp/gbp_recirc.c292
-rw-r--r--src/plugins/gbp/gbp_recirc.h88
-rw-r--r--src/plugins/gbp/gbp_route_domain.c447
-rw-r--r--src/plugins/gbp/gbp_route_domain.h84
-rw-r--r--src/plugins/gbp/gbp_scanner.c136
-rw-r--r--src/plugins/gbp/gbp_subnet.c598
-rw-r--r--src/plugins/gbp/gbp_subnet.h53
-rw-r--r--src/plugins/gbp/gbp_vxlan.c654
-rw-r--r--src/plugins/gbp/gbp_vxlan.h135
-rw-r--r--src/plugins/gbp/gbp_vxlan_node.c218
-rw-r--r--src/plugins/geneve/decap.c6
-rw-r--r--src/plugins/geneve/encap.c2
-rw-r--r--src/plugins/geneve/geneve.c67
-rw-r--r--src/plugins/geneve/geneve.h2
-rw-r--r--src/plugins/geneve/geneve_api.c12
-rw-r--r--src/plugins/geneve/geneve_test.c14
-rw-r--r--src/plugins/gre/CMakeLists.txt (renamed from src/plugins/l2e/CMakeLists.txt)25
-rw-r--r--src/plugins/gre/FEATURE.yaml13
-rw-r--r--src/plugins/gre/error.def23
-rw-r--r--src/plugins/gre/gre.api110
-rw-r--r--src/plugins/gre/gre.c842
-rw-r--r--src/plugins/gre/gre.h439
-rw-r--r--src/plugins/gre/gre_api.c212
-rw-r--r--src/plugins/gre/interface.c826
-rw-r--r--src/plugins/gre/node.c574
-rw-r--r--src/plugins/gre/pg.c84
-rw-r--r--src/plugins/gre/plugin.c (renamed from src/plugins/gbp/gbp_scanner.h)24
-rw-r--r--src/plugins/gtpu/gtpu.api196
-rw-r--r--src/plugins/gtpu/gtpu.c457
-rw-r--r--src/plugins/gtpu/gtpu.h118
-rw-r--r--src/plugins/gtpu/gtpu_api.c248
-rw-r--r--src/plugins/gtpu/gtpu_decap.c1543
-rw-r--r--src/plugins/gtpu/gtpu_encap.c218
-rw-r--r--src/plugins/gtpu/gtpu_error.def2
-rw-r--r--src/plugins/gtpu/gtpu_test.c384
-rw-r--r--src/plugins/hs_apps/CMakeLists.txt9
-rw-r--r--src/plugins/hs_apps/echo_client.c1348
-rw-r--r--src/plugins/hs_apps/echo_client.h116
-rw-r--r--src/plugins/hs_apps/echo_server.c515
-rw-r--r--src/plugins/hs_apps/hs_apps.c2
-rw-r--r--src/plugins/hs_apps/hs_test.h212
-rw-r--r--src/plugins/hs_apps/http_cli.c676
-rw-r--r--src/plugins/hs_apps/http_cli.h27
-rw-r--r--src/plugins/hs_apps/http_client_cli.c555
-rw-r--r--src/plugins/hs_apps/http_server.c1004
-rw-r--r--src/plugins/hs_apps/http_tps.c839
-rw-r--r--src/plugins/hs_apps/proxy.c265
-rw-r--r--src/plugins/hs_apps/proxy.h29
-rw-r--r--src/plugins/hs_apps/sapi/vpp_echo.c184
-rw-r--r--src/plugins/hs_apps/sapi/vpp_echo_bapi.c40
-rw-r--r--src/plugins/hs_apps/sapi/vpp_echo_common.h15
-rw-r--r--src/plugins/hs_apps/sapi/vpp_echo_proto_quic.c2
-rw-r--r--src/plugins/hs_apps/sapi/vpp_echo_sapi.c330
-rw-r--r--src/plugins/hs_apps/vcl/sock_test_client.c160
-rw-r--r--src/plugins/hs_apps/vcl/sock_test_server.c74
-rw-r--r--src/plugins/hs_apps/vcl/vcl_test.h217
-rw-r--r--src/plugins/hs_apps/vcl/vcl_test_client.c731
-rw-r--r--src/plugins/hs_apps/vcl/vcl_test_protos.c109
-rw-r--r--src/plugins/hs_apps/vcl/vcl_test_server.c103
-rw-r--r--src/plugins/hsi/CMakeLists.txt17
-rw-r--r--src/plugins/hsi/FEATURE.yaml8
-rw-r--r--src/plugins/hsi/hsi.c404
-rw-r--r--src/plugins/hsi/hsi.h29
-rw-r--r--src/plugins/hsi/hsi_error.def16
-rw-r--r--src/plugins/http/CMakeLists.txt19
-rw-r--r--src/plugins/http/http.c1504
-rw-r--r--src/plugins/http/http.h287
-rw-r--r--src/plugins/http/http_buffer.c219
-rw-r--r--src/plugins/http/http_buffer.h82
-rw-r--r--src/plugins/http/http_timer.c91
-rw-r--r--src/plugins/http/http_timer.h91
-rw-r--r--src/plugins/http_static/CMakeLists.txt4
-rw-r--r--src/plugins/http_static/FEATURE.yaml20
-rw-r--r--src/plugins/http_static/builtinurl/json_urls.c192
-rw-r--r--src/plugins/http_static/http_cache.c450
-rw-r--r--src/plugins/http_static/http_cache.h78
-rw-r--r--src/plugins/http_static/http_static.c99
-rw-r--r--src/plugins/http_static/http_static.h226
-rw-r--r--src/plugins/http_static/static_server.c1737
-rw-r--r--src/plugins/idpf/CMakeLists.txt28
-rw-r--r--src/plugins/idpf/README.rst59
-rw-r--r--src/plugins/idpf/cli.c135
-rw-r--r--src/plugins/idpf/device.c2265
-rw-r--r--src/plugins/idpf/format.c77
-rw-r--r--src/plugins/idpf/idpf.api80
-rw-r--r--src/plugins/idpf/idpf.h929
-rw-r--r--src/plugins/idpf/idpf_api.c111
-rw-r--r--src/plugins/idpf/idpf_controlq.c890
-rw-r--r--src/plugins/idpf/idpf_test.c169
-rw-r--r--src/plugins/idpf/plugin.c35
-rw-r--r--src/plugins/idpf/virtchnl2.h855
-rw-r--r--src/plugins/idpf/virtchnl2_lan_desc.h610
-rw-r--r--src/plugins/igmp/igmp.c27
-rw-r--r--src/plugins/igmp/igmp.h1
-rw-r--r--src/plugins/igmp/igmp_api.c21
-rw-r--r--src/plugins/igmp/igmp_cli.c18
-rw-r--r--src/plugins/igmp/igmp_config.c4
-rw-r--r--src/plugins/igmp/igmp_group.c10
-rw-r--r--src/plugins/igmp/igmp_input.c8
-rw-r--r--src/plugins/igmp/igmp_pkt.c4
-rw-r--r--src/plugins/igmp/igmp_proxy.c9
-rw-r--r--src/plugins/igmp/igmp_query.c2
-rw-r--r--src/plugins/igmp/igmp_ssm_range.c4
-rw-r--r--src/plugins/igmp/igmp_timer.c2
-rw-r--r--src/plugins/ikev2/CMakeLists.txt9
-rw-r--r--src/plugins/ikev2/ikev2.api74
-rw-r--r--src/plugins/ikev2/ikev2.c872
-rw-r--r--src/plugins/ikev2/ikev2.h11
-rw-r--r--src/plugins/ikev2/ikev2_api.c413
-rw-r--r--src/plugins/ikev2/ikev2_cli.c51
-rw-r--r--src/plugins/ikev2/ikev2_crypto.c6
-rw-r--r--src/plugins/ikev2/ikev2_payload.c42
-rw-r--r--src/plugins/ikev2/ikev2_priv.h64
-rw-r--r--src/plugins/ikev2/ikev2_test.c223
-rw-r--r--src/plugins/ikev2/ikev2_types.api76
-rw-r--r--src/plugins/ila/ila.c12
-rw-r--r--src/plugins/ioam/analyse/ioam_summary_export.c36
-rw-r--r--src/plugins/ioam/analyse/ioam_summary_export.h8
-rw-r--r--src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.c4
-rw-r--r--src/plugins/ioam/analyse/ip6/node.c2
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_e2e.c2
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_e2e.h2
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_pot.c2
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_pot.h2
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_trace.c6
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_trace.h2
-rw-r--r--src/plugins/ioam/export-common/ioam_export.h2
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c2
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c2
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c2
-rw-r--r--src/plugins/ioam/export/ioam_export.c2
-rw-r--r--src/plugins/ioam/ioam_plugin_doc.md464
-rw-r--r--src/plugins/ioam/ioam_plugin_doc.rst490
-rw-r--r--src/plugins/ioam/ip6/ioam_cache.h21
-rw-r--r--src/plugins/ioam/ip6/ioam_cache_node.c4
-rw-r--r--src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c6
-rw-r--r--src/plugins/ioam/ipfixcollector/node.c2
-rw-r--r--src/plugins/ioam/lib-e2e/e2e_util.h2
-rw-r--r--src/plugins/ioam/lib-pot/math64.h31
-rw-r--r--src/plugins/ioam/lib-trace/trace_util.c6
-rw-r--r--src/plugins/ioam/lib-trace/trace_util.h2
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c2
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c2
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c2
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c4
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c5
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c15
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c6
-rw-r--r--src/plugins/ioam/udp-ping/udp_ping_export.c34
-rw-r--r--src/plugins/ioam/udp-ping/udp_ping_node.c10
-rw-r--r--src/plugins/ip_session_redirect/CMakeLists.txt27
-rw-r--r--src/plugins/ip_session_redirect/FEATURE.yaml9
-rw-r--r--src/plugins/ip_session_redirect/api.c124
-rw-r--r--src/plugins/ip_session_redirect/ip_session_redirect.api106
-rw-r--r--src/plugins/ip_session_redirect/ip_session_redirect.h33
-rw-r--r--src/plugins/ip_session_redirect/ip_session_redirect_doc.rst42
-rw-r--r--src/plugins/ip_session_redirect/punt_redirect.vpp48
-rw-r--r--src/plugins/ip_session_redirect/redirect.c463
-rw-r--r--src/plugins/ip_session_redirect/test_api.c195
-rw-r--r--src/plugins/l2e/l2e.c198
-rw-r--r--src/plugins/l2e/l2e.h84
-rw-r--r--src/plugins/l2e/l2e_api.c89
-rw-r--r--src/plugins/l2e/l2e_node.c283
-rw-r--r--src/plugins/l2tp/decap.c4
-rw-r--r--src/plugins/l2tp/encap.c2
-rw-r--r--src/plugins/l2tp/l2tp.c34
-rw-r--r--src/plugins/l2tp/l2tp_api.c6
-rw-r--r--src/plugins/l2tp/l2tp_test.c14
-rw-r--r--src/plugins/l2tp/packet.h2
-rw-r--r--src/plugins/l3xc/FEATURE.yaml2
-rw-r--r--src/plugins/l3xc/l3xc.c20
-rw-r--r--src/plugins/l3xc/l3xc_api.c12
-rw-r--r--src/plugins/l3xc/l3xc_node.c2
-rw-r--r--src/plugins/lacp/cli.c6
-rw-r--r--src/plugins/lacp/input.c22
-rw-r--r--src/plugins/lacp/lacp.c32
-rw-r--r--src/plugins/lacp/lacp_api.c16
-rw-r--r--src/plugins/lacp/lacp_doc.md104
-rw-r--r--src/plugins/lacp/lacp_doc.rst109
-rw-r--r--src/plugins/lacp/lacp_test.c2
-rw-r--r--src/plugins/lacp/mux_machine.c2
-rw-r--r--src/plugins/lacp/node.c8
-rw-r--r--src/plugins/lacp/ptx_machine.c2
-rw-r--r--src/plugins/lacp/rx_machine.c4
-rw-r--r--src/plugins/lacp/tx_machine.c2
-rw-r--r--src/plugins/lb/api.c92
-rw-r--r--src/plugins/lb/cli.c33
-rw-r--r--src/plugins/lb/lb.api35
-rw-r--r--src/plugins/lb/lb.c23
-rw-r--r--src/plugins/lb/lb.h18
-rw-r--r--src/plugins/lb/lb_plugin_doc.md192
-rw-r--r--src/plugins/lb/lb_plugin_doc.rst223
-rw-r--r--src/plugins/lb/lb_test.c99
-rw-r--r--src/plugins/lb/lb_types.api10
-rw-r--r--src/plugins/lb/lbhash.h3
-rw-r--r--src/plugins/lb/node.c102
-rw-r--r--src/plugins/linux-cp/CMakeLists.txt17
-rw-r--r--src/plugins/linux-cp/FEATURE.yaml14
-rw-r--r--src/plugins/linux-cp/lcp.api76
-rw-r--r--src/plugins/linux-cp/lcp.c119
-rw-r--r--src/plugins/linux-cp/lcp.h39
-rw-r--r--src/plugins/linux-cp/lcp.rst35
-rw-r--r--src/plugins/linux-cp/lcp_adj.c6
-rw-r--r--src/plugins/linux-cp/lcp_api.c158
-rw-r--r--src/plugins/linux-cp/lcp_cli.c272
-rw-r--r--src/plugins/linux-cp/lcp_interface.c630
-rw-r--r--src/plugins/linux-cp/lcp_interface.h54
-rw-r--r--src/plugins/linux-cp/lcp_interface_sync.c445
-rw-r--r--src/plugins/linux-cp/lcp_mpls_sync.c160
-rw-r--r--src/plugins/linux-cp/lcp_nl.c1043
-rw-r--r--src/plugins/linux-cp/lcp_nl.h161
-rw-r--r--src/plugins/linux-cp/lcp_node.c162
-rw-r--r--src/plugins/linux-cp/lcp_router.c1578
-rw-r--r--src/plugins/lisp/CMakeLists.txt5
-rw-r--r--src/plugins/lisp/lisp-cp/control.c51
-rw-r--r--src/plugins/lisp/lisp-cp/control.h2
-rw-r--r--src/plugins/lisp/lisp-cp/gid_dictionary.c4
-rw-r--r--src/plugins/lisp/lisp-cp/lisp_api.c26
-rw-r--r--src/plugins/lisp/lisp-cp/lisp_cli.c57
-rw-r--r--src/plugins/lisp/lisp-cp/lisp_cp_test.c16
-rw-r--r--src/plugins/lisp/lisp-cp/lisp_msg_serdes.c16
-rw-r--r--src/plugins/lisp/lisp-cp/lisp_types.h7
-rw-r--r--src/plugins/lisp/lisp-cp/one_api.c56
-rw-r--r--src/plugins/lisp/lisp-cp/one_cli.c101
-rw-r--r--src/plugins/lisp/lisp-cp/one_test.c16
-rw-r--r--src/plugins/lisp/lisp-cp/packets.c1
-rw-r--r--src/plugins/lisp/lisp-cp/packets.h1
-rw-r--r--src/plugins/lisp/lisp-gpe/decap.c8
-rw-r--r--src/plugins/lisp/lisp-gpe/interface.c36
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe.c20
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe.h4
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe_adjacency.c9
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe_api.c10
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe_fwd_entry.c10
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe_sub_interface.c13
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe_tenant.c6
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe_test.c16
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe_tunnel.c4
-rw-r--r--src/plugins/lisp/test/lisp_cp_test.c50
-rw-r--r--src/plugins/lldp/lldp.api76
-rw-r--r--src/plugins/lldp/lldp_api.c59
-rw-r--r--src/plugins/lldp/lldp_cli.c16
-rw-r--r--src/plugins/lldp/lldp_doc.md86
-rw-r--r--src/plugins/lldp/lldp_doc.rst84
-rw-r--r--src/plugins/lldp/lldp_input.c4
-rw-r--r--src/plugins/lldp/lldp_node.c6
-rw-r--r--src/plugins/lldp/lldp_protocol.h6
-rw-r--r--src/plugins/lldp/lldp_test.c24
-rw-r--r--src/plugins/mactime/CMakeLists.txt1
-rw-r--r--src/plugins/mactime/builtins.c21
-rw-r--r--src/plugins/mactime/mactime.c24
-rw-r--r--src/plugins/mactime/mactime_test.c2
-rw-r--r--src/plugins/mactime/mactime_top.c22
-rw-r--r--src/plugins/mactime/node.c4
-rwxr-xr-xsrc/plugins/map/examples/gen-rules.py161
-rwxr-xr-xsrc/plugins/map/examples/test_map.py120
-rwxr-xr-xsrc/plugins/map/gen-rules.py124
-rw-r--r--src/plugins/map/ip4_map.c5
-rw-r--r--src/plugins/map/ip4_map_t.c10
-rw-r--r--src/plugins/map/ip6_map.c6
-rw-r--r--src/plugins/map/ip6_map_t.c13
-rw-r--r--src/plugins/map/lpm.c28
-rw-r--r--src/plugins/map/map.c20
-rw-r--r--src/plugins/map/map.h6
-rw-r--r--src/plugins/map/map_api.c8
-rw-r--r--src/plugins/map/map_doc.md69
-rw-r--r--src/plugins/map/map_doc.rst99
-rw-r--r--src/plugins/marvell/README.md65
-rw-r--r--src/plugins/marvell/README.rst85
-rw-r--r--src/plugins/marvell/plugin.c2
-rw-r--r--src/plugins/marvell/pp2/cli.c4
-rw-r--r--src/plugins/marvell/pp2/format.c1
-rw-r--r--src/plugins/marvell/pp2/input.c9
-rw-r--r--src/plugins/marvell/pp2/pp2.c21
-rw-r--r--src/plugins/marvell/pp2/pp2_api.c11
-rw-r--r--src/plugins/mdata/mdata.c37
-rw-r--r--src/plugins/mdata/mdata_doc.md24
-rw-r--r--src/plugins/mdata/mdata_doc.rst26
-rw-r--r--src/plugins/memif/CMakeLists.txt2
-rw-r--r--src/plugins/memif/cli.c125
-rw-r--r--src/plugins/memif/device.c315
-rw-r--r--src/plugins/memif/memif.api92
-rw-r--r--src/plugins/memif/memif.c477
-rw-r--r--src/plugins/memif/memif_api.c150
-rw-r--r--src/plugins/memif/memif_test.c197
-rw-r--r--src/plugins/memif/node.c873
-rw-r--r--src/plugins/memif/private.h101
-rw-r--r--src/plugins/memif/socket.c6
-rw-r--r--src/plugins/mss_clamp/mss_clamp_node.c30
-rw-r--r--src/plugins/nat/CMakeLists.txt4
-rw-r--r--src/plugins/nat/FEATURE.yaml2
-rw-r--r--src/plugins/nat/det44/det44.api4
-rw-r--r--src/plugins/nat/det44/det44.c56
-rw-r--r--src/plugins/nat/det44/det44.h7
-rw-r--r--src/plugins/nat/det44/det44_api.c16
-rw-r--r--src/plugins/nat/det44/det44_cli.c2
-rw-r--r--src/plugins/nat/det44/det44_in2out.c2
-rw-r--r--src/plugins/nat/det44/det44_inlines.h4
-rw-r--r--src/plugins/nat/det44/det44_out2in.c5
-rw-r--r--src/plugins/nat/dslite/dslite.c4
-rw-r--r--src/plugins/nat/dslite/dslite.h2
-rw-r--r--src/plugins/nat/dslite/dslite_api.c6
-rw-r--r--src/plugins/nat/dslite/dslite_ce_decap.c2
-rw-r--r--src/plugins/nat/dslite/dslite_ce_encap.c2
-rw-r--r--src/plugins/nat/dslite/dslite_cli.c6
-rw-r--r--src/plugins/nat/dslite/dslite_in2out.c4
-rw-r--r--src/plugins/nat/dslite/dslite_out2in.c2
-rw-r--r--src/plugins/nat/extras/nat_100ks.py24
-rw-r--r--src/plugins/nat/extras/nat_10Ms.py24
-rw-r--r--src/plugins/nat/extras/nat_10ks.py24
-rw-r--r--src/plugins/nat/extras/nat_1Ms.py24
-rw-r--r--src/plugins/nat/extras/nat_out2in_100ks.py24
-rw-r--r--src/plugins/nat/extras/nat_out2in_10Ms.py24
-rw-r--r--src/plugins/nat/extras/nat_out2in_10ks.py24
-rw-r--r--src/plugins/nat/extras/nat_out2in_1Ms.py24
-rw-r--r--src/plugins/nat/extras/nat_ses_open.py83
-rwxr-xr-xsrc/plugins/nat/extras/nat_static_gen_cfg.py34
-rw-r--r--src/plugins/nat/extras/nat_test_fast_path.py64
-rw-r--r--src/plugins/nat/extras/nat_test_slow_path.py60
-rw-r--r--src/plugins/nat/extras/nat_test_slow_path_with_latency.py81
-rw-r--r--src/plugins/nat/lib/alloc.h3
-rw-r--r--src/plugins/nat/lib/inlines.h53
-rw-r--r--src/plugins/nat/lib/ipfix_logging.c295
-rw-r--r--src/plugins/nat/lib/ipfix_logging.h18
-rw-r--r--src/plugins/nat/lib/lib.c1
-rw-r--r--src/plugins/nat/lib/lib.h51
-rw-r--r--src/plugins/nat/lib/log.h15
-rw-r--r--src/plugins/nat/lib/nat_proto.h76
-rw-r--r--src/plugins/nat/lib/nat_syslog.c109
-rw-r--r--src/plugins/nat/lib/nat_syslog.h13
-rw-r--r--src/plugins/nat/lib/nat_syslog_constants.h62
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed.api798
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed.c2968
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed.h617
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_affinity.c7
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_api.c771
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_classify.c42
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_cli.c758
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_doc.rst729
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_format.c293
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_handoff.c1
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_in2out.c867
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_inlines.h379
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_out2in.c444
-rw-r--r--src/plugins/nat/nat44-ed/tcp_conn_track.rst65
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei.api85
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei.c2589
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei.h142
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_api.c398
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_cli.c249
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_ha.c8
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_ha_doc.md70
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_ha_doc.rst88
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_hairpinning.c756
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_hairpinning.h92
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_handoff.c3
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_in2out.c1213
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_inlines.h24
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_out2in.c218
-rw-r--r--src/plugins/nat/nat64/nat64.c56
-rw-r--r--src/plugins/nat/nat64/nat64.h2
-rw-r--r--src/plugins/nat/nat64/nat64_api.c6
-rw-r--r--src/plugins/nat/nat64/nat64_cli.c6
-rw-r--r--src/plugins/nat/nat64/nat64_db.c40
-rw-r--r--src/plugins/nat/nat64/nat64_db.h9
-rw-r--r--src/plugins/nat/nat64/nat64_doc.md73
-rw-r--r--src/plugins/nat/nat64/nat64_doc.rst91
-rw-r--r--src/plugins/nat/nat64/nat64_in2out.c14
-rw-r--r--src/plugins/nat/nat64/nat64_out2in.c4
-rw-r--r--src/plugins/nat/nat66/nat66_cli.c10
-rw-r--r--src/plugins/nat/nat66/nat66_in2out.c4
-rw-r--r--src/plugins/nat/nat66/nat66_out2in.c2
-rw-r--r--src/plugins/nat/pnat/pnat.api18
-rw-r--r--src/plugins/nat/pnat/pnat.c4
-rw-r--r--src/plugins/nat/pnat/pnat.md37
-rw-r--r--src/plugins/nat/pnat/pnat.rst45
-rw-r--r--src/plugins/nat/pnat/pnat_api.c31
-rw-r--r--src/plugins/nat/pnat/pnat_cli.c6
-rw-r--r--src/plugins/nat/pnat/pnat_node.h1
-rw-r--r--src/plugins/nat/pnat/tests/pnat_test.c8
-rw-r--r--src/plugins/nat/pnat/tests/pnat_test_stubs.h6
-rwxr-xr-xsrc/plugins/nat/pnat/tests/test_genpackets.py30
-rw-r--r--src/plugins/npt66/CMakeLists.txt17
-rw-r--r--src/plugins/npt66/FEATURE.yaml16
-rw-r--r--src/plugins/npt66/npt66.api40
-rw-r--r--src/plugins/npt66/npt66.c124
-rw-r--r--src/plugins/npt66/npt66.h28
-rw-r--r--src/plugins/npt66/npt66_api.c72
-rw-r--r--src/plugins/npt66/npt66_cli.c121
-rw-r--r--src/plugins/npt66/npt66_node.c372
-rw-r--r--src/plugins/nsh/FEATURE.yaml1
-rw-r--r--src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export.c2
-rw-r--r--src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export_thread.c2
-rw-r--r--src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_node.c2
-rw-r--r--src/plugins/nsh/nsh-md2-ioam/md2_ioam_transit.c4
-rw-r--r--src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam.c3
-rw-r--r--src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_api.c1
-rw-r--r--src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_trace.c6
-rw-r--r--src/plugins/nsh/nsh.c46
-rw-r--r--src/plugins/nsh/nsh_api.c8
-rw-r--r--src/plugins/nsh/nsh_cli.c10
-rw-r--r--src/plugins/nsh/nsh_node.c2
-rw-r--r--src/plugins/nsh/nsh_output.c2
-rw-r--r--src/plugins/nsh/nsh_pop.c1
-rw-r--r--src/plugins/nsim/node.c4
-rw-r--r--src/plugins/nsim/nsim.c81
-rw-r--r--src/plugins/nsim/nsim.h3
-rw-r--r--src/plugins/nsim/nsim_input.c2
-rw-r--r--src/plugins/oddbuf/CMakeLists.txt3
-rw-r--r--src/plugins/oddbuf/node.c2
-rw-r--r--src/plugins/oddbuf/oddbuf.c8
-rw-r--r--src/plugins/perfmon/CMakeLists.txt57
-rw-r--r--src/plugins/perfmon/arm/bundle/branch_pred.c140
-rw-r--r--src/plugins/perfmon/arm/bundle/cache_data.c128
-rw-r--r--src/plugins/perfmon/arm/bundle/cache_data_tlb.c106
-rw-r--r--src/plugins/perfmon/arm/bundle/cache_inst.c103
-rw-r--r--src/plugins/perfmon/arm/bundle/cache_inst_tlb.c105
-rw-r--r--src/plugins/perfmon/arm/bundle/inst_clock.c102
-rw-r--r--src/plugins/perfmon/arm/bundle/mem_access.c88
-rw-r--r--src/plugins/perfmon/arm/bundle/stall.c94
-rw-r--r--src/plugins/perfmon/arm/dispatch_wrapper.c142
-rw-r--r--src/plugins/perfmon/arm/dispatch_wrapper.h18
-rw-r--r--src/plugins/perfmon/arm/events.c227
-rw-r--r--src/plugins/perfmon/arm/events.h130
-rw-r--r--src/plugins/perfmon/cli.c231
-rw-r--r--src/plugins/perfmon/dispatch_wrapper.c170
-rw-r--r--src/plugins/perfmon/intel/bundle/backend_bound_core.c100
-rw-r--r--src/plugins/perfmon/intel/bundle/backend_bound_mem.c102
-rw-r--r--src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c90
-rw-r--r--src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c89
-rw-r--r--src/plugins/perfmon/intel/bundle/frontend_bound_lat.c99
-rw-r--r--src/plugins/perfmon/intel/bundle/iio_bw.c263
-rw-r--r--src/plugins/perfmon/intel/bundle/topdown_icelake.c176
-rw-r--r--src/plugins/perfmon/intel/bundle/topdown_metrics.c232
-rw-r--r--src/plugins/perfmon/intel/bundle/topdown_tremont.c85
-rw-r--r--src/plugins/perfmon/intel/core.c78
-rw-r--r--src/plugins/perfmon/intel/core.h129
-rw-r--r--src/plugins/perfmon/intel/dispatch_wrapper.c160
-rw-r--r--src/plugins/perfmon/intel/dispatch_wrapper.h18
-rw-r--r--src/plugins/perfmon/intel/uncore.c60
-rw-r--r--src/plugins/perfmon/intel/uncore.h52
-rw-r--r--src/plugins/perfmon/linux.c7
-rw-r--r--src/plugins/perfmon/perfmon.c70
-rw-r--r--src/plugins/perfmon/perfmon.h120
-rw-r--r--src/plugins/perfmon/table.c273
-rw-r--r--src/plugins/perfmon/table.h98
-rw-r--r--src/plugins/ping/CMakeLists.txt5
-rw-r--r--src/plugins/ping/ping.api (renamed from src/plugins/l2e/l2e.api)31
-rw-r--r--src/plugins/ping/ping.c330
-rw-r--r--src/plugins/ping/ping.h75
-rw-r--r--src/plugins/ping/ping_api.c155
-rw-r--r--src/plugins/pppoe/pppoe.c29
-rw-r--r--src/plugins/pppoe/pppoe.h4
-rw-r--r--src/plugins/pppoe/pppoe_api.c6
-rw-r--r--src/plugins/pppoe/pppoe_cp.c2
-rw-r--r--src/plugins/pppoe/pppoe_decap.c23
-rw-r--r--src/plugins/prom/CMakeLists.txt21
-rw-r--r--src/plugins/prom/FEATURE.yaml10
-rw-r--r--src/plugins/prom/prom.c436
-rw-r--r--src/plugins/prom/prom.h66
-rw-r--r--src/plugins/prom/prom_cli.c153
-rw-r--r--src/plugins/quic/CMakeLists.txt7
-rw-r--r--src/plugins/quic/quic.c97
-rw-r--r--src/plugins/quic/quic.h3
-rw-r--r--src/plugins/quic/quic_crypto.c116
-rw-r--r--src/plugins/quic/quic_crypto.h13
-rw-r--r--src/plugins/rdma/CMakeLists.txt8
-rw-r--r--src/plugins/rdma/api.c52
-rw-r--r--src/plugins/rdma/cli.c14
-rw-r--r--src/plugins/rdma/device.c71
-rw-r--r--src/plugins/rdma/format.c15
-rw-r--r--src/plugins/rdma/input.c90
-rw-r--r--src/plugins/rdma/output.c45
-rw-r--r--src/plugins/rdma/plugin.c2
-rw-r--r--src/plugins/rdma/rdma.api54
-rw-r--r--src/plugins/rdma/rdma_doc.md75
-rw-r--r--src/plugins/rdma/rdma_doc.rst102
-rw-r--r--src/plugins/rdma/rdma_mlx5dv.h12
-rw-r--r--src/plugins/rdma/test_api.c53
-rw-r--r--src/plugins/snort/cli.c15
-rw-r--r--src/plugins/snort/daq_vpp.c37
-rw-r--r--src/plugins/snort/daq_vpp.h2
-rw-r--r--src/plugins/snort/dequeue.c4
-rw-r--r--src/plugins/snort/enqueue.c15
-rw-r--r--src/plugins/snort/main.c65
-rw-r--r--src/plugins/snort/snort.h10
-rw-r--r--src/plugins/srtp/srtp.c29
-rw-r--r--src/plugins/srtp/srtp_plugin.md72
-rw-r--r--src/plugins/srtp/srtp_plugin.rst82
-rw-r--r--src/plugins/srv6-ad-flow/ad-flow.c10
-rw-r--r--src/plugins/srv6-ad-flow/ad_flow_plugin_doc.md25
-rw-r--r--src/plugins/srv6-ad-flow/ad_flow_plugin_doc.rst31
-rw-r--r--src/plugins/srv6-ad/ad.c4
-rw-r--r--src/plugins/srv6-ad/ad_plugin_doc.md73
-rw-r--r--src/plugins/srv6-ad/ad_plugin_doc.rst86
-rw-r--r--src/plugins/srv6-ad/node.c10
-rw-r--r--src/plugins/srv6-am/am.c4
-rw-r--r--src/plugins/srv6-am/am_plugin_doc.md100
-rw-r--r--src/plugins/srv6-am/am_plugin_doc.rst116
-rw-r--r--src/plugins/srv6-am/node.c7
-rw-r--r--src/plugins/srv6-as/as.c4
-rw-r--r--src/plugins/srv6-as/as_plugin_doc.md152
-rw-r--r--src/plugins/srv6-as/as_plugin_doc.rst172
-rw-r--r--src/plugins/srv6-as/node.c11
-rw-r--r--src/plugins/srv6-mobile/CMakeLists.txt5
-rw-r--r--src/plugins/srv6-mobile/FEATURE.yaml2
-rw-r--r--src/plugins/srv6-mobile/extra/Dockerfile.j21
-rw-r--r--src/plugins/srv6-mobile/extra/Dockerfile.j2.release1
-rwxr-xr-xsrc/plugins/srv6-mobile/extra/runner.py832
-rw-r--r--src/plugins/srv6-mobile/extra/runner_doc.md105
-rw-r--r--src/plugins/srv6-mobile/extra/runner_doc.rst135
-rw-r--r--src/plugins/srv6-mobile/gtp4_d.c142
-rw-r--r--src/plugins/srv6-mobile/gtp4_dt.c40
-rw-r--r--src/plugins/srv6-mobile/gtp4_e.c68
-rw-r--r--src/plugins/srv6-mobile/gtp6_d.c142
-rw-r--r--src/plugins/srv6-mobile/gtp6_d_di.c30
-rw-r--r--src/plugins/srv6-mobile/gtp6_dt.c42
-rw-r--r--src/plugins/srv6-mobile/gtp6_e.c39
-rw-r--r--src/plugins/srv6-mobile/mobile.h85
-rw-r--r--src/plugins/srv6-mobile/mobile_plugin_doc.md201
-rw-r--r--src/plugins/srv6-mobile/mobile_plugin_doc.rst278
-rw-r--r--src/plugins/srv6-mobile/node.c2368
-rw-r--r--src/plugins/srv6-mobile/sr_mobile.api79
-rw-r--r--src/plugins/srv6-mobile/sr_mobile_api.c339
-rw-r--r--src/plugins/srv6-mobile/sr_mobile_api.h72
-rw-r--r--src/plugins/srv6-mobile/sr_mobile_types.api24
-rw-r--r--src/plugins/stn/stn.c8
-rw-r--r--src/plugins/stn/stn_api.c14
-rw-r--r--src/plugins/stn/stn_test.c2
-rw-r--r--src/plugins/svs/svs.c10
-rw-r--r--src/plugins/svs/svs_api.c13
-rw-r--r--src/plugins/tlsmbedtls/tls_mbedtls.c35
-rw-r--r--src/plugins/tlsopenssl/CMakeLists.txt1
-rw-r--r--src/plugins/tlsopenssl/tls_async.c4
-rw-r--r--src/plugins/tlsopenssl/tls_openssl.c386
-rw-r--r--src/plugins/tlsopenssl/tls_openssl.h2
-rw-r--r--src/plugins/tlsopenssl/tls_openssl_api.c1
-rw-r--r--src/plugins/tlspicotls/CMakeLists.txt7
-rw-r--r--src/plugins/tlspicotls/pico_vpp_crypto.c103
-rw-r--r--src/plugins/tlspicotls/tls_picotls.c600
-rw-r--r--src/plugins/tlspicotls/tls_picotls.h9
-rw-r--r--src/plugins/tracedump/CMakeLists.txt6
-rw-r--r--src/plugins/tracedump/graph_api.c2
-rw-r--r--src/plugins/tracedump/graph_cli.c12
-rw-r--r--src/plugins/tracedump/graph_test.c2
-rw-r--r--src/plugins/tracedump/setup.pg52
-rw-r--r--src/plugins/tracedump/tracedump.api68
-rw-r--r--src/plugins/tracedump/tracedump.c217
-rw-r--r--src/plugins/tracedump/tracedump_test.c97
-rw-r--r--src/plugins/tracenode/CMakeLists.txt37
-rw-r--r--src/plugins/tracenode/FEATURE.yaml8
-rw-r--r--src/plugins/tracenode/api.c64
-rw-r--r--src/plugins/tracenode/cli.c72
-rw-r--r--src/plugins/tracenode/node.c145
-rw-r--r--src/plugins/tracenode/plugin.c (renamed from src/plugins/gbp/gbp_types.h)21
-rw-r--r--src/plugins/tracenode/test.c93
-rw-r--r--src/plugins/tracenode/tracenode.api42
-rw-r--r--src/plugins/tracenode/tracenode.c71
-rw-r--r--src/plugins/tracenode/tracenode.h43
-rw-r--r--src/plugins/unittest/CMakeLists.txt11
-rw-r--r--src/plugins/unittest/api_fuzz_test.c10
-rw-r--r--src/plugins/unittest/api_test.c102
-rw-r--r--src/plugins/unittest/bier_test.c54
-rw-r--r--src/plugins/unittest/bihash_test.c48
-rw-r--r--src/plugins/unittest/bitmap_test.c219
-rw-r--r--src/plugins/unittest/counter_test.c37
-rw-r--r--src/plugins/unittest/crypto/aes_cbc.c6
-rw-r--r--src/plugins/unittest/crypto/aes_ctr.c6
-rw-r--r--src/plugins/unittest/crypto/aes_gcm.c2
-rw-r--r--src/plugins/unittest/crypto/aes_gmac.c3029
-rw-r--r--src/plugins/unittest/crypto/chacha20_poly1305.c6
-rw-r--r--src/plugins/unittest/crypto/crypto.h4
-rw-r--r--src/plugins/unittest/crypto/rfc2202_hmac_md5.c14
-rw-r--r--src/plugins/unittest/crypto/rfc2202_hmac_sha1.c16
-rw-r--r--src/plugins/unittest/crypto/rfc4231.c14
-rw-r--r--src/plugins/unittest/crypto_test.c12
-rw-r--r--src/plugins/unittest/fib_test.c309
-rw-r--r--src/plugins/unittest/gso_test.c456
-rw-r--r--src/plugins/unittest/hash_test.c331
-rw-r--r--src/plugins/unittest/interface_test.c2
-rw-r--r--src/plugins/unittest/ip_psh_cksum_test.c266
-rw-r--r--src/plugins/unittest/ipsec_test.c326
-rw-r--r--src/plugins/unittest/llist_test.c8
-rw-r--r--src/plugins/unittest/mactime_test.c2
-rw-r--r--src/plugins/unittest/mfib_test.c216
-rw-r--r--src/plugins/unittest/mpcap_node.c2
-rw-r--r--src/plugins/unittest/pool_test.c38
-rw-r--r--src/plugins/unittest/punt_test.c6
-rw-r--r--src/plugins/unittest/rbtree_test.c2
-rw-r--r--src/plugins/unittest/segment_manager_test.c57
-rw-r--r--src/plugins/unittest/session_test.c114
-rw-r--r--src/plugins/unittest/sparse_vec_test.c2
-rw-r--r--src/plugins/unittest/string_test.c466
-rw-r--r--src/plugins/unittest/svm_fifo_test.c14
-rw-r--r--src/plugins/unittest/tcp_test.c4
-rw-r--r--src/plugins/unittest/test_buffer.c310
-rw-r--r--src/plugins/unittest/unittest.c2
-rw-r--r--src/plugins/unittest/util_test.c6
-rw-r--r--src/plugins/unittest/vlib_test.c6
-rw-r--r--src/plugins/urpf/ip4_urpf.c2
-rw-r--r--src/plugins/urpf/ip6_urpf.c2
-rw-r--r--src/plugins/urpf/urpf.api45
-rw-r--r--src/plugins/urpf/urpf.c125
-rw-r--r--src/plugins/urpf/urpf.h21
-rw-r--r--src/plugins/urpf/urpf_api.c110
-rw-r--r--src/plugins/urpf/urpf_dp.h22
-rw-r--r--src/plugins/vhost/CMakeLists.txt34
-rw-r--r--src/plugins/vhost/FEATURE.yaml13
-rw-r--r--src/plugins/vhost/plugin.c12
-rw-r--r--src/plugins/vhost/vhost_std.h69
-rw-r--r--src/plugins/vhost/vhost_user.api201
-rw-r--r--src/plugins/vhost/vhost_user.c2594
-rw-r--r--src/plugins/vhost/vhost_user.h386
-rw-r--r--src/plugins/vhost/vhost_user_api.c357
-rw-r--r--src/plugins/vhost/vhost_user_inline.h494
-rw-r--r--src/plugins/vhost/vhost_user_input.c1472
-rw-r--r--src/plugins/vhost/vhost_user_output.c1143
-rw-r--r--src/plugins/vhost/virtio_std.h188
-rw-r--r--src/plugins/vmxnet3/README.md64
-rw-r--r--src/plugins/vmxnet3/README.rst86
-rw-r--r--src/plugins/vmxnet3/cli.c41
-rw-r--r--src/plugins/vmxnet3/format.c2
-rw-r--r--src/plugins/vmxnet3/input.c15
-rw-r--r--src/plugins/vmxnet3/plugin.c2
-rw-r--r--src/plugins/vmxnet3/vmxnet3.c96
-rw-r--r--src/plugins/vmxnet3/vmxnet3.h19
-rw-r--r--src/plugins/vmxnet3/vmxnet3_api.c19
-rw-r--r--src/plugins/vmxnet3/vmxnet3_test.c2
-rw-r--r--src/plugins/vrrp/node.c60
-rw-r--r--src/plugins/vrrp/setup.pg20
-rw-r--r--src/plugins/vrrp/vrrp.api51
-rw-r--r--src/plugins/vrrp/vrrp.c321
-rw-r--r--src/plugins/vrrp/vrrp.h47
-rw-r--r--src/plugins/vrrp/vrrp_all_api_h.h11
-rw-r--r--src/plugins/vrrp/vrrp_api.c114
-rw-r--r--src/plugins/vrrp/vrrp_cli.c20
-rw-r--r--src/plugins/vrrp/vrrp_msg_enum.h23
-rw-r--r--src/plugins/vrrp/vrrp_packet.c51
-rw-r--r--src/plugins/vrrp/vrrp_packet.h9
-rw-r--r--src/plugins/vrrp/vrrp_periodic.c2
-rw-r--r--src/plugins/vrrp/vrrp_test.c173
-rw-r--r--src/plugins/vxlan/CMakeLists.txt29
-rw-r--r--src/plugins/vxlan/FEATURE.yaml14
-rw-r--r--src/plugins/vxlan/decap.c1322
-rw-r--r--src/plugins/vxlan/dir.dox24
-rw-r--r--src/plugins/vxlan/encap.c538
-rw-r--r--src/plugins/vxlan/plugin.c12
-rw-r--r--src/plugins/vxlan/vxlan.api210
-rw-r--r--src/plugins/vxlan/vxlan.c1331
-rw-r--r--src/plugins/vxlan/vxlan.h240
-rw-r--r--src/plugins/vxlan/vxlan_api.c376
-rw-r--r--src/plugins/vxlan/vxlan_error.def17
-rw-r--r--src/plugins/vxlan/vxlan_packet.h80
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/CMakeLists.txt11
-rw-r--r--src/plugins/wireguard/FEATURE.yaml3
-rwxr-xr-xsrc/plugins/wireguard/README.md55
-rw-r--r--src/plugins/wireguard/README.rst79
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/blake/blake2-impl.h0
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/blake/blake2s.c0
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/blake/blake2s.h0
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard.api79
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard.c65
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard.h79
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_api.c210
-rw-r--r--src/plugins/wireguard/wireguard_chachapoly.c133
-rw-r--r--src/plugins/wireguard/wireguard_chachapoly.h48
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_cli.c103
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_cookie.c218
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_cookie.h43
-rw-r--r--src/plugins/wireguard/wireguard_handoff.c104
-rw-r--r--src/plugins/wireguard/wireguard_hchacha20.h90
-rw-r--r--src/plugins/wireguard/wireguard_if.c180
-rw-r--r--src/plugins/wireguard/wireguard_if.h59
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_index_table.c14
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_index_table.h7
-rw-r--r--src/plugins/wireguard/wireguard_input.c1111
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_key.c0
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_key.h0
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_messages.h0
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_noise.c315
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_noise.h97
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_output_tun.c869
-rw-r--r--src/plugins/wireguard/wireguard_peer.c481
-rw-r--r--src/plugins/wireguard/wireguard_peer.h90
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_send.c194
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_send.h7
-rw-r--r--src/plugins/wireguard/wireguard_timer.c50
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_timer.h14
958 files changed, 104763 insertions, 44749 deletions
diff --git a/src/plugins/CMakeLists.txt b/src/plugins/CMakeLists.txt
index e54eaa2c4cb..43ad4cc2a25 100644
--- a/src/plugins/CMakeLists.txt
+++ b/src/plugins/CMakeLists.txt
@@ -23,7 +23,27 @@ FILE(GLOB files RELATIVE
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/*/CMakeLists.txt
)
+
+set(VPP_EXCLUDED_PLUGINS
+ ""
+ CACHE
+ STRING "Comma-separated list of core plugins excluded from packaging and tests"
+)
+
+# create the list of the plugins that we need to exclude from packaging
+SET(excluded_plugins "")
+STRING(REGEX REPLACE "[,]+" ";" exc_plugins "${VPP_EXCLUDED_PLUGINS}")
+foreach (e ${exc_plugins})
+ message(WARNING "Excluding plugin due to VPP_EXCLUDED_PLUGINS: '${e}'")
+ list(APPEND excluded_plugins ${e})
+endforeach()
+
foreach (f ${files})
get_filename_component(dir ${f} DIRECTORY)
- add_subdirectory(${dir})
+
+ # if a plugin is in the list of excluded plugin, do not add that subdirectory
+ LIST(FIND excluded_plugins "${dir}" exc_index)
+ if(${exc_index} EQUAL "-1")
+ add_subdirectory(${dir})
+ endif()
endforeach()
diff --git a/src/plugins/abf/FEATURE.yaml b/src/plugins/abf/FEATURE.yaml
index b9f3285daa3..7902dbe7800 100644
--- a/src/plugins/abf/FEATURE.yaml
+++ b/src/plugins/abf/FEATURE.yaml
@@ -1,9 +1,12 @@
---
name: ACL Based Forwarding
-maintainer: Neale Ranns <nranns@cisco.com>
+maintainer: Neale Ranns <neale@graphiant.com>
features:
- 'Policy Based Routing'
- - ACLs match traffic to be forwarded
+ - ACLs identify how traffic should be forwarded. Packets matching a permit
+ rule are forwarded using ABF policy. Packets matching a deny rule are
+ excluded from ABF handling and continue traversing the input feature arc on
+ the L3 path.
- Each rule in the ACL has an associated 'path' which determines how the
traffic will be forwarded. This path is described as a FIB path, so anything
possible with basic L3 forwarding is possible with ABF (with the exception
diff --git a/src/plugins/abf/abf.api b/src/plugins/abf/abf.api
index 1cd3da7e557..a748de4522b 100644
--- a/src/plugins/abf/abf.api
+++ b/src/plugins/abf/abf.api
@@ -51,7 +51,7 @@ define abf_plugin_get_version_reply
/** \brief A description of an ABF policy
@param policy_id User chosen Identifier for the policy
@param acl_index The ACL that the policy will match against
- @param n_paths Number of paths
+ @param n_paths Number of paths, 1..255
@param paths The set of forwarding paths that are being added or removed.
*/
typedef abf_policy
diff --git a/src/plugins/abf/abf_api.c b/src/plugins/abf/abf_api.c
index cc55b214e35..2330e7b7d21 100644
--- a/src/plugins/abf/abf_api.c
+++ b/src/plugins/abf/abf_api.c
@@ -34,10 +34,11 @@
#include <abf/abf.api_types.h>
/**
- * Base message ID fot the plugin
+ * Base message ID for the plugin
*/
static u32 abf_base_msg_id;
+#define REPLY_MSG_ID_BASE (abf_base_msg_id)
#include <vlibapi/api_helper_macros.h>
static void
@@ -68,6 +69,12 @@ vl_api_abf_policy_add_del_t_handler (vl_api_abf_policy_add_del_t * mp)
int rv = 0;
u8 pi;
+ if (mp->policy.n_paths == 0)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto done;
+ }
+
vec_validate (paths, mp->policy.n_paths - 1);
for (pi = 0; pi < mp->policy.n_paths; pi++)
@@ -93,7 +100,7 @@ vl_api_abf_policy_add_del_t_handler (vl_api_abf_policy_add_del_t * mp)
done:
vec_free (paths);
- REPLY_MACRO (VL_API_ABF_POLICY_ADD_DEL_REPLY + abf_base_msg_id);
+ REPLY_MACRO (VL_API_ABF_POLICY_ADD_DEL_REPLY);
}
static void
@@ -106,19 +113,17 @@ vl_api_abf_itf_attach_add_del_t_handler (vl_api_abf_itf_attach_add_del_t * mp)
if (mp->is_add)
{
- abf_itf_attach (fproto,
- ntohl (mp->attach.policy_id),
- ntohl (mp->attach.priority),
- ntohl (mp->attach.sw_if_index));
+ rv = abf_itf_attach (fproto, ntohl (mp->attach.policy_id),
+ ntohl (mp->attach.priority),
+ ntohl (mp->attach.sw_if_index));
}
else
{
- abf_itf_detach (fproto,
- ntohl (mp->attach.policy_id),
- ntohl (mp->attach.sw_if_index));
+ rv = abf_itf_detach (fproto, ntohl (mp->attach.policy_id),
+ ntohl (mp->attach.sw_if_index));
}
- REPLY_MACRO (VL_API_ABF_ITF_ATTACH_ADD_DEL_REPLY + abf_base_msg_id);
+ REPLY_MACRO (VL_API_ABF_ITF_ATTACH_ADD_DEL_REPLY);
}
typedef struct abf_dump_walk_ctx_t_
@@ -245,12 +250,10 @@ abf_api_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (abf_api_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Access Control List (ACL) Based Forwarding",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/abf/abf_itf_attach.c b/src/plugins/abf/abf_itf_attach.c
index 4f17f720f3b..04e5c4c40c2 100644
--- a/src/plugins/abf/abf_itf_attach.c
+++ b/src/plugins/abf/abf_itf_attach.c
@@ -399,7 +399,6 @@ abf_itf_attach_cmd (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
/**
* Attach an ABF policy to an interface.
*/
@@ -409,7 +408,6 @@ VLIB_CLI_COMMAND (abf_itf_attach_cmd_node, static) = {
.short_help = "abf attach <ip4|ip6> [del] policy <value> <interface>",
// this is not MP safe
};
-/* *INDENT-ON* */
static clib_error_t *
abf_show_attach_cmd (vlib_main_t * vm,
@@ -438,7 +436,6 @@ abf_show_attach_cmd (vlib_main_t * vm,
vlib_cli_output (vm, "specify an interface");
}
- /* *INDENT-OFF* */
FOR_EACH_FIB_IP_PROTOCOL(fproto)
{
if (sw_if_index < vec_len(abf_per_itf[fproto]))
@@ -453,31 +450,26 @@ abf_show_attach_cmd (vlib_main_t * vm,
}
}
}
- /* *INDENT-ON* */
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (abf_show_attach_cmd_node, static) = {
.path = "show abf attach",
.function = abf_show_attach_cmd,
.short_help = "show abf attach <interface>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
void
abf_itf_attach_walk (abf_itf_attach_walk_cb_t cb, void *ctx)
{
u32 aii;
- /* *INDENT-OFF* */
pool_foreach_index (aii, abf_itf_attach_pool)
{
if (!cb(aii, ctx))
break;
}
- /* *INDENT-ON* */
}
typedef enum abf_next_t_
@@ -567,10 +559,11 @@ abf_input_inline (vlib_main_t * vm,
(FIB_PROTOCOL_IP6 == fproto), 1, 0,
&fa_5tuple0);
- if (acl_plugin_match_5tuple_inline
- (acl_plugin.p_acl_main, lc_index, &fa_5tuple0,
- (FIB_PROTOCOL_IP6 == fproto), &action, &match_acl_pos,
- &match_acl_index, &match_rule_index, &trace_bitmap))
+ if (acl_plugin_match_5tuple_inline (
+ acl_plugin.p_acl_main, lc_index, &fa_5tuple0,
+ (FIB_PROTOCOL_IP6 == fproto), &action, &match_acl_pos,
+ &match_acl_index, &match_rule_index, &trace_bitmap) &&
+ action > 0)
{
/*
* match:
@@ -656,7 +649,6 @@ static char *abf_error_strings[] = {
#undef abf_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (abf_ip4_node) =
{
.function = abf_input_ip4,
@@ -702,7 +694,6 @@ VNET_FEATURE_INIT (abf_ip6_feat, static) =
.node_name = "abf-input-ip6",
.runs_after = VNET_FEATURES ("acl-plugin-in-ip6-fa"),
};
-/* *INDENT-ON* */
static fib_node_t *
abf_itf_attach_get_node (fib_node_index_t index)
@@ -760,7 +751,7 @@ static clib_error_t *
abf_itf_bond_init (vlib_main_t * vm)
{
abf_itf_attach_fib_node_type =
- fib_node_register_new_type (&abf_itf_attach_vft);
+ fib_node_register_new_type ("abf-attach", &abf_itf_attach_vft);
clib_error_t *acl_init_res = acl_plugin_exports_init (&acl_plugin);
if (acl_init_res)
return (acl_init_res);
@@ -771,12 +762,10 @@ abf_itf_bond_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (abf_itf_bond_init) =
{
.runs_after = VLIB_INITS("acl_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/abf/abf_policy.c b/src/plugins/abf/abf_policy.c
index 945434bca27..e6dfe4fff55 100644
--- a/src/plugins/abf/abf_policy.c
+++ b/src/plugins/abf/abf_policy.c
@@ -192,50 +192,45 @@ abf_policy_delete (u32 policy_id, const fib_route_path_t * rpaths)
*/
return (VNET_API_ERROR_INVALID_VALUE);
}
- else
- {
- /*
- * update an existing policy.
- * - add the path to the path-list and swap our ancestry
- * - backwalk to poke all attachments to update
- */
- fib_node_index_t old_pl;
- ap = abf_policy_get (api);
- old_pl = ap->ap_pl;
+ /*
+ * update an existing policy.
+ * - add the path to the path-list and swap our ancestry
+ * - backwalk to poke all attachments to update
+ */
+ fib_node_index_t old_pl;
- fib_path_list_lock (old_pl);
- ap->ap_pl =
- fib_path_list_copy_and_path_remove (ap->ap_pl,
- (FIB_PATH_LIST_FLAG_SHARED |
- FIB_PATH_LIST_FLAG_NO_URPF),
- rpaths);
+ ap = abf_policy_get (api);
+ old_pl = ap->ap_pl;
- fib_path_list_child_remove (old_pl, ap->ap_sibling);
- ap->ap_sibling = ~0;
+ fib_path_list_lock (old_pl);
+ ap->ap_pl = fib_path_list_copy_and_path_remove (
+ ap->ap_pl, (FIB_PATH_LIST_FLAG_SHARED | FIB_PATH_LIST_FLAG_NO_URPF),
+ rpaths);
- if (FIB_NODE_INDEX_INVALID == ap->ap_pl)
- {
- /*
- * no more paths on this policy. It's toast
- * remove the CLI/API's lock
- */
- fib_node_unlock (&ap->ap_node);
- }
- else
- {
- ap->ap_sibling = fib_path_list_child_add (ap->ap_pl,
- abf_policy_fib_node_type,
- api);
+ fib_path_list_child_remove (old_pl, ap->ap_sibling);
+ ap->ap_sibling = ~0;
- fib_node_back_walk_ctx_t ctx = {
- .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE,
- };
+ if (FIB_NODE_INDEX_INVALID == ap->ap_pl)
+ {
+ /*
+ * no more paths on this policy. It's toast
+ * remove the CLI/API's lock
+ */
+ fib_node_unlock (&ap->ap_node);
+ }
+ else
+ {
+ ap->ap_sibling =
+ fib_path_list_child_add (ap->ap_pl, abf_policy_fib_node_type, api);
- fib_walk_sync (abf_policy_fib_node_type, api, &ctx);
- }
- fib_path_list_unlock (old_pl);
+ fib_node_back_walk_ctx_t ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE,
+ };
+
+ fib_walk_sync (abf_policy_fib_node_type, api, &ctx);
}
+ fib_path_list_unlock (old_pl);
return (0);
}
@@ -272,14 +267,25 @@ abf_policy_cmd (vlib_main_t * vm,
unformat_fib_route_path, &rpath, &payload_proto))
vec_add1 (rpaths, rpath);
else
- return (clib_error_return (0, "unknown input '%U'",
- format_unformat_error, line_input));
+ {
+ clib_error_t *err;
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ unformat_free (line_input);
+ return err;
+ }
}
if (INDEX_INVALID == policy_id)
{
vlib_cli_output (vm, "Specify a Policy ID");
- return 0;
+ goto out;
+ }
+
+ if (vec_len (rpaths) == 0)
+ {
+ vlib_cli_output (vm, "Hop path must not be empty");
+ goto out;
}
if (!is_del)
@@ -287,7 +293,7 @@ abf_policy_cmd (vlib_main_t * vm,
if (INDEX_INVALID == acl_index)
{
vlib_cli_output (vm, "ACL index must be set");
- return 0;
+ goto out;
}
rv = abf_policy_update (policy_id, acl_index, rpaths);
@@ -296,7 +302,7 @@ abf_policy_cmd (vlib_main_t * vm,
{
vlib_cli_output (vm,
"ACL index must match existing ACL index in policy");
- return 0;
+ goto out;
}
}
else
@@ -304,11 +310,11 @@ abf_policy_cmd (vlib_main_t * vm,
abf_policy_delete (policy_id, rpaths);
}
+out:
unformat_free (line_input);
return (NULL);
}
-/* *INDENT-OFF* */
/**
* Create an ABF policy.
*/
@@ -318,7 +324,6 @@ VLIB_CLI_COMMAND (abf_policy_cmd_node, static) = {
.short_help = "abf policy [add|del] id <index> acl <index> via ...",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static u8 *
format_abf (u8 * s, va_list * args)
@@ -345,13 +350,11 @@ abf_policy_walk (abf_policy_walk_cb_t cb, void *ctx)
{
u32 api;
- /* *INDENT-OFF* */
pool_foreach_index (api, abf_policy_pool)
{
if (!cb(api, ctx))
break;
}
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -374,12 +377,10 @@ abf_show_policy_cmd (vlib_main_t * vm,
if (INDEX_INVALID == policy_id)
{
- /* *INDENT-OFF* */
pool_foreach (ap, abf_policy_pool)
{
vlib_cli_output(vm, "%U", format_abf, ap);
}
- /* *INDENT-ON* */
}
else
{
@@ -394,14 +395,12 @@ abf_show_policy_cmd (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (abf_policy_show_policy_cmd_node, static) = {
.path = "show abf policy",
.function = abf_show_policy_cmd,
.short_help = "show abf policy <value>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static fib_node_t *
abf_policy_get_node (fib_node_index_t index)
@@ -456,7 +455,8 @@ static const fib_node_vft_t abf_policy_vft = {
static clib_error_t *
abf_policy_init (vlib_main_t * vm)
{
- abf_policy_fib_node_type = fib_node_register_new_type (&abf_policy_vft);
+ abf_policy_fib_node_type =
+ fib_node_register_new_type ("abf-policy", &abf_policy_vft);
return (NULL);
}
diff --git a/src/plugins/acl/CMakeLists.txt b/src/plugins/acl/CMakeLists.txt
index c43dd23ea51..1bb60d94fbe 100644
--- a/src/plugins/acl/CMakeLists.txt
+++ b/src/plugins/acl/CMakeLists.txt
@@ -30,4 +30,15 @@ add_vpp_plugin(acl
API_TEST_SOURCES
acl_test.c
+
+ INSTALL_HEADERS
+ exports.h
+ exported_types.h
+ acl.h
+ fa_node.h
+ public_inlines.h
+ types.h
+ hash_lookup_types.h
+ lookup_context.h
+ hash_lookup_private.h
)
diff --git a/src/plugins/acl/acl.api b/src/plugins/acl/acl.api
index a4706c3e529..404b512abed 100644
--- a/src/plugins/acl/acl.api
+++ b/src/plugins/acl/acl.api
@@ -19,7 +19,7 @@
used to control the ACL plugin
*/
-option version = "2.0.0";
+option version = "2.0.1";
import "plugins/acl/acl_types.api";
import "vnet/interface_types.api";
@@ -497,3 +497,43 @@ autoreply define acl_stats_intf_counters_enable
bool enable;
option vat_help = "[disable]";
};
+
+/** \brief Enable hash-based ACL lookups (default) or disable them (use linear search)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable - whether to enable or disable the usage of hash lookup algorithm
+*/
+
+autoreply define acl_plugin_use_hash_lookup_set
+{
+ option status="in_progress";
+ u32 client_index;
+ u32 context;
+ bool enable;
+};
+
+/** \brief Get if the hash-based ACL lookups are enabled (default) or not (use linear search)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+
+
+define acl_plugin_use_hash_lookup_get
+{
+ option status="in_progress";
+ u32 client_index;
+ u32 context;
+};
+
+
+/** \brief Reply with the previous state of the hash lookup
+ @param context - returned sender context, to match reply w/ request
+ @param prev_enable - previous state of the hash lookup use
+*/
+
+define acl_plugin_use_hash_lookup_get_reply
+{
+ option status="in_progress";
+ u32 context;
+ bool enable;
+};
diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c
index e8b5877ab21..e52e82fcf28 100644
--- a/src/plugins/acl/acl.c
+++ b/src/plugins/acl/acl.c
@@ -36,7 +36,6 @@
#include <acl/acl.api_enum.h>
#include <acl/acl.api_types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#include "fa_node.h"
#include "public_inlines.h"
@@ -53,12 +52,10 @@ acl_main_t acl_main;
#include <vppinfra/bihash_template.h>
#include <vppinfra/bihash_template.c>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Access Control Lists (ACL)",
};
-/* *INDENT-ON* */
/* methods exported from ACL-as-a-service */
static acl_plugin_methods_t acl_plugin;
@@ -110,12 +107,10 @@ vl_api_acl_plugin_control_ping_t_handler (vl_api_acl_plugin_control_ping_t *
acl_main_t *am = &acl_main;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_ACL_PLUGIN_CONTROL_PING_REPLY,
({
rmp->vpe_pid = ntohl (getpid ());
}));
- /* *INDENT-ON* */
}
static void
@@ -310,7 +305,9 @@ static int
acl_api_invalid_prefix (const vl_api_prefix_t * prefix)
{
ip_prefix_t ip_prefix;
- return ip_prefix_decode2 (prefix, &ip_prefix);
+ int valid_af =
+ prefix->address.af == ADDRESS_IP4 || prefix->address.af == ADDRESS_IP6;
+ return (!valid_af) || ip_prefix_decode2 (prefix, &ip_prefix);
}
static int
@@ -339,6 +336,8 @@ acl_add_list (u32 count, vl_api_acl_rule_t rules[],
return VNET_API_ERROR_INVALID_SRC_ADDRESS;
if (acl_api_invalid_prefix (&rules[i].dst_prefix))
return VNET_API_ERROR_INVALID_DST_ADDRESS;
+ if (rules[i].src_prefix.address.af != rules[i].dst_prefix.address.af)
+ return VNET_API_ERROR_INVALID_SRC_ADDRESS;
if (ntohs (rules[i].srcport_or_icmptype_first) >
ntohs (rules[i].srcport_or_icmptype_last))
return VNET_API_ERROR_INVALID_VALUE_2;
@@ -684,7 +683,6 @@ acl_interface_set_inout_acl_list (acl_main_t * am, u32 sw_if_index,
format_bitmap_hex, old_seen_acl_bitmap, format_bitmap_hex,
seen_acl_bitmap, format_bitmap_hex, change_acl_bitmap);
-/* *INDENT-OFF* */
clib_bitmap_foreach (acln, change_acl_bitmap) {
if (clib_bitmap_get(old_seen_acl_bitmap, acln)) {
/* ACL is being removed. */
@@ -698,7 +696,6 @@ acl_interface_set_inout_acl_list (acl_main_t * am, u32 sw_if_index,
vec_add1((*pinout_sw_if_index_vec_by_acl)[acln], sw_if_index);
}
}
-/* *INDENT-ON* */
vec_free ((*pinout_acl_vec_by_sw_if_index)[sw_if_index]);
(*pinout_acl_vec_by_sw_if_index)[sw_if_index] =
@@ -1807,12 +1804,10 @@ vl_api_acl_add_replace_t_handler (vl_api_acl_add_replace_t * mp)
rv = VNET_API_ERROR_INVALID_VALUE;
}
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_ACL_ADD_REPLACE_REPLY,
({
rmp->acl_index = htonl(acl_list_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1974,13 +1969,11 @@ vl_api_acl_dump_t_handler (vl_api_acl_dump_t * mp)
if (mp->acl_index == ~0)
{
- /* *INDENT-OFF* */
/* Just dump all ACLs */
pool_foreach (acl, am->acls)
{
send_acl_details(am, reg, acl, mp->context);
}
- /* *INDENT-ON* */
}
else
{
@@ -2060,12 +2053,10 @@ vl_api_acl_interface_list_dump_t_handler (vl_api_acl_interface_list_dump_t *
if (mp->sw_if_index == ~0)
{
- /* *INDENT-OFF* */
pool_foreach (swif, im->sw_interfaces)
{
send_acl_interface_list_details(am, reg, swif->sw_if_index, mp->context);
}
- /* *INDENT-ON* */
}
else
{
@@ -2096,12 +2087,10 @@ vl_api_macip_acl_add_t_handler (vl_api_macip_acl_add_t * mp)
rv = VNET_API_ERROR_INVALID_VALUE;
}
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_MACIP_ACL_ADD_REPLY,
({
rmp->acl_index = htonl(acl_list_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -2123,12 +2112,10 @@ vl_api_macip_acl_add_replace_t_handler (vl_api_macip_acl_add_replace_t * mp)
rv = VNET_API_ERROR_INVALID_VALUE;
}
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_MACIP_ACL_ADD_REPLACE_REPLY,
({
rmp->acl_index = htonl(acl_list_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -2225,12 +2212,10 @@ vl_api_macip_acl_dump_t_handler (vl_api_macip_acl_dump_t * mp)
if (mp->acl_index == ~0)
{
/* Just dump all ACLs for now, with sw_if_index = ~0 */
- /* *INDENT-OFF* */
pool_foreach (acl, am->macip_acls)
{
send_macip_acl_details (am, reg, acl, mp->context);
}
- /* *INDENT-ON* */
}
else
{
@@ -2434,12 +2419,10 @@ static void
if (mp->sw_if_index == ~0)
{
- /* *INDENT-OFF* */
pool_foreach (swif, im->sw_interfaces)
{
send_acl_interface_etype_whitelist_details(am, reg, swif->sw_if_index, mp->context);
}
- /* *INDENT-ON* */
}
else
{
@@ -2451,6 +2434,45 @@ static void
}
static void
+vl_api_acl_plugin_use_hash_lookup_set_t_handler (
+ vl_api_acl_plugin_use_hash_lookup_set_t *mp)
+{
+ acl_main_t *am = &acl_main;
+ vl_api_acl_plugin_use_hash_lookup_set_reply_t *rmp;
+ vl_api_registration_t *reg;
+ int rv = 0;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ am->use_hash_acl_matching = mp->enable;
+ REPLY_MACRO (VL_API_ACL_PLUGIN_USE_HASH_LOOKUP_SET_REPLY);
+}
+
+static void
+vl_api_acl_plugin_use_hash_lookup_get_t_handler (
+ vl_api_acl_plugin_use_hash_lookup_get_t *mp)
+{
+ acl_main_t *am = &acl_main;
+ vl_api_acl_plugin_use_hash_lookup_get_reply_t *rmp;
+ int msg_size = sizeof (*rmp);
+ vl_api_registration_t *reg;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ rmp = vl_msg_api_alloc (msg_size);
+ clib_memset (rmp, 0, msg_size);
+ rmp->_vl_msg_id =
+ ntohs (VL_API_ACL_PLUGIN_USE_HASH_LOOKUP_GET_REPLY + am->msg_id_base);
+ rmp->context = mp->context;
+ rmp->enable = am->use_hash_acl_matching;
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
acl_set_timeout_sec (int timeout_type, u32 value)
{
acl_main_t *am = &acl_main;
@@ -2802,6 +2824,7 @@ acl_set_aclplugin_interface_fn (vlib_main_t * vm,
break;
}
+ unformat_free (line_input);
if (~0 == sw_if_index)
return (clib_error_return (0, "invalid interface"));
if (~0 == acl_index)
@@ -2809,7 +2832,6 @@ acl_set_aclplugin_interface_fn (vlib_main_t * vm,
acl_interface_add_del_inout_acl (sw_if_index, is_add, is_input, acl_index);
- unformat_free (line_input);
return (NULL);
}
@@ -2832,6 +2854,7 @@ acl_set_aclplugin_acl_fn (vlib_main_t * vm,
int rv;
int rule_idx = 0;
int n_rules_override = -1;
+ u32 acl_index = ~0;
u32 proto = 0;
u32 port1 = 0;
u32 port2 = 0;
@@ -2845,7 +2868,13 @@ acl_set_aclplugin_acl_fn (vlib_main_t * vm,
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "permit+reflect"))
+ if (unformat (line_input, "index %d", &acl_index))
+ {
+ /* operate on this acl index (which must exist),
+ * If not specified, or set to -1, create a new ACL
+ */
+ }
+ else if (unformat (line_input, "permit+reflect"))
{
vec_validate_acl_rules (rules, rule_idx);
rules[rule_idx].is_permit = 2;
@@ -2933,7 +2962,6 @@ acl_set_aclplugin_acl_fn (vlib_main_t * vm,
break;
}
- u32 acl_index = ~0;
if (!tag)
vec_add (tag, "cli", 4);
@@ -2942,6 +2970,7 @@ acl_set_aclplugin_acl_fn (vlib_main_t * vm,
vec_free (rules);
vec_free (tag);
+ unformat_free (line_input);
if (rv)
return (clib_error_return (0, "failed"));
@@ -2951,6 +2980,37 @@ acl_set_aclplugin_acl_fn (vlib_main_t * vm,
}
static clib_error_t *
+acl_delete_aclplugin_acl_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ int rv;
+ u32 acl_index = ~0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "index %d", &acl_index))
+ {
+ /* operate on this acl index (which must exist) */
+ }
+ else
+ break;
+ }
+
+ rv = acl_del_list (acl_index);
+
+ unformat_free (line_input);
+ if (rv)
+ return (clib_error_return (0, "failed"));
+
+ vlib_cli_output (vm, "Deleted ACL index:%d", acl_index);
+ return (NULL);
+}
+
+static clib_error_t *
acl_show_aclplugin_macip_acl_fn (vlib_main_t * vm,
unformat_input_t *
input, vlib_cli_command_t * cmd)
@@ -3270,7 +3330,6 @@ acl_plugin_show_sessions (acl_main_t * am,
vlib_cli_output (vm, " link list id: %u", sess->link_list_id);
}
vlib_cli_output (vm, " connection add/del stats:", wk);
- /* *INDENT-OFF* */
pool_foreach (swif, im->sw_interfaces)
{
u32 sw_if_index = swif->sw_if_index;
@@ -3295,7 +3354,6 @@ acl_plugin_show_sessions (acl_main_t * am,
n_dels,
n_epoch_changes);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, " connection timeout type lists:", wk);
u8 tt = 0;
@@ -3432,6 +3490,8 @@ acl_show_aclplugin_tables_fn (vlib_main_t * vm,
}
vlib_cli_output (vm, "Stats counters enabled for interface ACLs: %d",
acl_main.interface_acl_counters_enabled);
+ vlib_cli_output (vm, "Use hash-based lookup for ACLs: %d",
+ acl_main.use_hash_acl_matching);
if (show_mask_type)
acl_plugin_show_tables_mask_type ();
if (show_acl_hash_info)
@@ -3455,7 +3515,6 @@ acl_clear_aclplugin_fn (vlib_main_t * vm,
return error;
}
- /* *INDENT-OFF* */
VLIB_CLI_COMMAND (aclplugin_set_command, static) = {
.path = "set acl-plugin",
.short_help = "set acl-plugin session timeout {{udp idle}|tcp {idle|transient}} <seconds>",
@@ -3545,26 +3604,45 @@ VLIB_CLI_COMMAND (aclplugin_set_interface_command, static) = {
/*?
* Create an Access Control List (ACL)
- * an ACL is composed of more than one Access control element (ACE). Multiple
+ * If index is not specified, a new one will be created. Otherwise, replace
+ * the one at this index.
+ *
+ * An ACL is composed of more than one Access control element (ACE). Multiple
* ACEs can be specified with this command using a comma separated list.
*
- * Each ACE describes a tuple of src+dst IP prefix, ip protocol, src+dst port ranges.
- * (the ACL plugin also support ICMP types/codes instead of UDP/TCP ports, but
- * this CLI does not).
+ * Each ACE describes a tuple of src+dst IP prefix, ip protocol, src+dst port
+ * ranges. (the ACL plugin also support ICMP types/codes instead of UDP/TCP
+ * ports, but this CLI does not).
*
- * An ACL can optionally be assigned a 'tag' - which is an identifier understood
- * by the client. VPP does not examine it in any way.
+ * An ACL can optionally be assigned a 'tag' - which is an identifier
+ * understood by the client. VPP does not examine it in any way.
*
- * @cliexpar
- * <b><em> set acl-plugin acl <permit|deny> src <PREFIX> dst <PREFIX> proto <TCP|UDP> sport <X-Y> dport <X-Y> [tag FOO] </b></em>
- * @cliexend
+ * @cliexcmd{set acl-plugin acl <permit|deny|permit+reflect> src <PREFIX> dst
+ * <PREFIX> proto <TCP|UDP> sport <X-Y> dport <X-Y> tcpflags <X> mask <X>
+ * [tag FOO]}
?*/
VLIB_CLI_COMMAND (aclplugin_set_acl_command, static) = {
- .path = "set acl-plugin acl",
- .short_help = "set acl-plugin acl <permit|deny> src <PREFIX> dst <PREFIX> proto X sport X-Y dport X-Y [tag FOO] {use comma separated list for multiple rules}",
- .function = acl_set_aclplugin_acl_fn,
+ .path = "set acl-plugin acl",
+ .short_help =
+ "set acl-plugin acl [index <idx>] <permit|deny|permit+reflect> src "
+ "<PREFIX> dst <PREFIX> [proto X] [sport X[-Y]] [dport X[-Y]] [tcpflags "
+ "<int> mask <int>] [tag FOO] {use comma separated list for multiple "
+ "rules}",
+ .function = acl_set_aclplugin_acl_fn,
+};
+
+/*?
+ * Delete an Access Control List (ACL)
+ * Removes an ACL at the specified index, which must exist but not in use by
+ * any interface.
+ *
+ * @cliexcmd{delete acl-plugin acl index <idx>}
+ ?*/
+VLIB_CLI_COMMAND (aclplugin_delete_acl_command, static) = {
+ .path = "delete acl-plugin acl",
+ .short_help = "delete acl-plugin acl index <idx>",
+ .function = acl_delete_aclplugin_acl_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
acl_plugin_config (vlib_main_t * vm, unformat_input_t * input)
@@ -3696,7 +3774,7 @@ acl_init (vlib_main_t * vm)
vec_validate (pw->expired,
ACL_N_TIMEOUTS *
am->fa_max_deleted_sessions_per_interval);
- _vec_len (pw->expired) = 0;
+ vec_set_len (pw->expired, 0);
vec_validate_init_empty (pw->fa_conn_list_head, ACL_N_TIMEOUTS - 1,
FA_SESSION_BOGUS_INDEX);
vec_validate_init_empty (pw->fa_conn_list_tail, ACL_N_TIMEOUTS - 1,
diff --git a/src/plugins/acl/acl_hash_lookup_doc.md b/src/plugins/acl/acl_hash_lookup_doc.md
deleted file mode 100644
index 6b08e1bc953..00000000000
--- a/src/plugins/acl/acl_hash_lookup_doc.md
+++ /dev/null
@@ -1,240 +0,0 @@
-ACL plugin constant-time lookup design {#acl_hash_lookup}
-======================================
-
-The initial implementation of ACL plugin performs a trivial for() cycle,
-going through the assigned ACLs on a per-packet basis. This is not very
-efficient, even if for very short ACLs due to its simplicity it can beat
-more advanced methods.
-
-However, to cover the case of longer ACLs with acceptable performance,
-we need to have a better way of matching. This write-up proposes
-a mechanism to make a lookup from O(M) where M is number of entries
-to O(N) where N is number of different mask combinations.
-
-Preparation of ACL(s)
----------------------
-
-The ACL plugin will maintain a global list of "mask types", i.e. the specific
-configurations of "do not care" bits within the ACEs.
-Upon the creation of a new ACL, a pass will be made through all the
-ACEs, to assign and possibly allocate the "mask type number".
-
-Each ACL has a structure *hash_acl_info_t* representing the "hash-based"
-parts of information related to that ACL, primarily the array of
-*hash_ace_info_t* structures - each of the members of that array
-corresponding to one of the rules (ACEs) in the original ACL,
-for this they have a pair of *(acl_index, ace_index)* to keep track,
-predominantly for debugging.
-
-Why do we need a whole separate structure, and are not adding new fields
-to the existing rule structure? First, encapsulation, to minimize
-the pollution of the main ACL code with the hash-based lookup artifacts.
-Second, one rule may correspond to more than one "hash-based" ACE.
-In fact, most of the rules do correspond to two of those. Why ?
-
-Consider that the current ACL lookup logic is that if a packet
-is not the initial fragment, and there is an L4 entry acting on the packet,
-the comparison will be made only on the L4 protocol field value rather
-than on the protocol and port values. This behavior is governed by
-*l4_match_nonfirst_fragment* flag in the *acl_main*, and is needed to
-maintain the compatibility with the existing software switch implementation.
-
-While for the sequential check in *single_acl_match_5tuple()*
-it is very easy to implement by just breaking out at the right moment,
-in case of hash-based matching this cost us two checks:
-one on full 5-tuple and the flag *pkt.is_nonfirst_fragment* being zero,
-the second on 3-tuple and the flag *pkt.is_nonfirst_fragment* being one,
-with the second check triggered by the *acl_main.l4_match_nonfirst_fragment*
-setting being the default 1. This dictates the necessity of having a "match"
-field in a given *hash_ace_info_t* element, which would reflect the value
-we are supposed to match after applying the mask.
-
-There can be other circumstances when it might be beneficial to expand
-the given rule in the original ACL into multiple - for example, as an
-optimization within the port range handling for small port ranges
-(this is not done as of the time of writing).
-
-Assigning ACLs to an interface
-------------------------------
-
-Once the ACL list is assigned to an interface, or, rather, a new ACL
-is added to the list of the existing ACLs applied to the interface,
-we need to update the bihash accelerating the lookup.
-
-All the entries for the lookups are stored within a single *48_8* bihash,
-which captures the 5-tuple from the packet as well as the miscellaneous
-per-packet information flags, e.g. *l4_valid*, *is_non_first_fragment*,
-and so on. To facilitate the use of the single bihash by all the interfaces,
-the *is_ip6*, *is_input*, *sw_if_index* are part of the key,
-as well as *mask_type_index* - the latter being necessary because
-there can be entries with the same value but different masks, e.g.:
-`permit ::/0, permit::/128`.
-
-At the moment of an ACL being applied to an interface, we need to
-walk the list of *hash_ace_info_t* entries corresponding to that ACL,
-and update the bihash with the keys corresponding to the match
-values in these entries.
-
-The value of the hash match contains the index into a per-*sw_if_index* vector
-of *applied_ace_hash_entry_t* elements, as well as a couple of flags:
-*shadowed* (optimization: if this flag on a matched entry is zero, means
-we can stop the lookup early and declare a match - see below),
-and *need_portrange_check* - meaning that what matched was a superset
-of the actual match, and we need to perform an extra check.
-
-Also, upon insertion, we must keep in mind there can be
-multiple *applied_ace_hash_entry_t* for the same key and must keep
-a list of those. This is necessary to incrementally apply/unapply
-the ACLs as part of the ACL vector: say, two ACLs have
-"permit 2001:db8::1/128 any" - we should be able to retain the entry
-for the second ACL even if we have deleted the first one.
-Also, in case there are two entries with the same key but
-different port ranges, say 0..42 and 142..65535 - we need
-to be able to sequentially match on those if we decide not
-to expand them into individual port-specific entries.
-
-Per-packet lookup
------------------
-
-The simple single-packet lookup is defined in
-*multi_acl_match_get_applied_ace_index*, which returns the index
-of the applied hash ACE if there was a match, or ~0 if there wasn't.
-
-The future optimized per-packet lookup may be batched in three phases:
-
-1. Prepare the keys in the per-worker vector by doing logical AND of
- original 5-tuple record with the elements of the mask vector.
-2. Lookup the keys in the bihash in a batch manner, collecting the
- result with lowest u64 (acl index within vector, ACE index) from
- the hash lookup value, and performing the list walk if necessary
- (for portranges).
-3. Take the action from the ACL record as defined by (ACL#, ACE#) from the
- resulting lookup winner, or, if no match found, then perform default deny.
-
-Shadowed/independent/redundant ACEs
-------------------------------------
-
-During the phase of combining multiple ACLs into one rulebase, when they
-are applied to interface, we also can perform several optimizations.
-
-If a given ACE is a strict subset of another ACE located up in the linear
-search order, we can ignore this ACE completely - because by definition
-it will never match. We will call such an ACE *redundant*. Here is an example:
-
-```
-permit 2001:db8:1::/48 2001:db8:2::/48 (B)
-deny 2001:d8b:1:1::/64 2001:db8:2:1::/64 (A)
-```
-
-A bit more formally, we can define this relationship of an ACE A to ACE B as:
-
-```
-redundant(aceA, aceB) := (contains(protoB, protoA) && contains(srcB, srcA)
- && contains(dstB, dstA) && is_after(A, B))
-```
-
-Here as "contains" we define an operation operating on the sets defined by
-the protocol, (srcIP, srcPortDefinition) and (dstIP, dstPortDefinition)
-respectively, and returning true if all the elements represented by
-the second argument are represented by the first argument. The "is_after"
-is true if A is located below B in the ruleset.
-
-If a given ACE does not intersect at all with any other ACE
-in front of it, we can mark it as such.
-
-Then during the sequence of the lookups the successful hit on this ACE means
-we do not need to look up other mask combinations - thus potentially
-significantly speeding up the match process. Here is an example,
-assuming we have the following ACL:
-
-```
-permit 2001:db8:1::/48 2001:db8:2::/48 (B)
-deny 2001:db8:3::/48 2001:db8:2:1::/64 (A)
-```
-
-In this case if we match the second entry, we do not need to check whether
-we have matched the first one - the source addresses are completely
-different. We call such an ACE *independent* from another.
-
-We can define this as
-
-```
-independent(aceA, aceB) := (!intersect(protoA, protoB) ||
- !intersect(srcA, srcB) ||
- !intersect(dstA, dstB))
-```
-
-where intersect is defined as operation returning true if there are
-elements belonging to the sets of both arguments.
-
-If the entry A is neither redundant nor independent from B, and is below
-B in the ruleset, we call such an entry *shadowed* by B, here is an example:
-
-```
-deny tcp 2001:db8:1::/48 2001:db8:2::/48 (B)
-permit 2001:d8b:1:1::/64 2001:db8:2:1::/64 (A)
-```
-
-This means the earlier rule "carves out" a subset of A, thus leaving
-a "shadow". (Evidently, the action needs to be different for the shadow
-to have an effect, but for for the terminology sake we do not care).
-
-The more formal definition:
-
-```
-shadowed(aceA, aceB) := !redundant(aceA, aceB) &&
- !independent(aceA, aceB) &&
- is_after(aceA, aceB)
-```
-
-Using this terminology, any ruleset can be represented as
-a DAG (Directed Acyclic Graph), with the bottom being the implicit
-"deny any", pointing to the set of rules shadowing it or the ones
-it is redundant for.
-
-These rules may in turn be shadowing each other. There is no cycles in
-this graph because of the natural order of the rules - the rule located
-closer to the end of the ruleset can never shadow or make redundant a rule
-higher up.
-
-The optimization that enables can allow for is to skip matching certain
-masks on a per-lookup basis - if a given rule has matched,
-the only adjustments that can happen is the match with one of
-the shadowing rules.
-
-Also, another avenue for the optimization can be starting the lookup process
-with the mask type that maximizes the chances of the independent ACE match,
-thus resulting in an ACE lookup being a single hash table hit.
-
-
-Plumbing
---------
-
-All the new routines are located in a separate file,
-so we can cleanly experiment with a different approach if this
-does not fit all of the use cases.
-
-The constant-time lookup within the data path has the API with
-the same signature as:
-
-```
-u8
-multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2,
- int is_ip6, int is_input, u32 * acl_match_p,
- u32 * rule_match_p, u32 * trace_bitmap)
-```
-
-There should be a new upper-level function with the same signature, which
-will make a decision whether to use a linear lookup, or to use the
-constant-time lookup implemented by this work, or to add some other
-optimizations (e.g. by keeping the cache of the last N lookups).
-
-The calls to the routine doing preparatory work should happen
-in `acl_add_list()` after creating the linear-lookup structures,
-and the routine doing the preparatory work populating the hashtable
-should be called from `acl_interface_add_del_inout_acl()` or its callees.
-
-The initial implementation will be geared towards looking up a single
-match at a time, with the subsequent optimizations possible to make
-the lookup for more than one packet.
-
diff --git a/src/plugins/acl/acl_hash_lookup_doc.rst b/src/plugins/acl/acl_hash_lookup_doc.rst
new file mode 100644
index 00000000000..72842af423d
--- /dev/null
+++ b/src/plugins/acl/acl_hash_lookup_doc.rst
@@ -0,0 +1,243 @@
+ACL plugin constant-time lookup
+===============================
+
+The initial implementation of ACL plugin performs a trivial for() cycle,
+going through the assigned ACLs on a per-packet basis. This is not very
+efficient, even if for very short ACLs due to its simplicity it can beat
+more advanced methods.
+
+However, to cover the case of longer ACLs with acceptable performance,
+we need to have a better way of matching. This write-up proposes a
+mechanism to make a lookup from O(M) where M is number of entries to
+O(N) where N is number of different mask combinations.
+
+Preparation of ACL(s)
+---------------------
+
+The ACL plugin will maintain a global list of “mask types”, i.e. the
+specific configurations of “do not care” bits within the ACEs. Upon the
+creation of a new ACL, a pass will be made through all the ACEs, to
+assign and possibly allocate the “mask type number”.
+
+Each ACL has a structure *hash_acl_info_t* representing the “hash-based”
+parts of information related to that ACL, primarily the array of
+*hash_ace_info_t* structures - each of the members of that array
+corresponding to one of the rules (ACEs) in the original ACL, for this
+they have a pair of *(acl_index, ace_index)* to keep track,
+predominantly for debugging.
+
+Why do we need a whole separate structure, and are not adding new fields
+to the existing rule structure? First, encapsulation, to minimize the
+pollution of the main ACL code with the hash-based lookup artifacts.
+Second, one rule may correspond to more than one “hash-based” ACE. In
+fact, most of the rules do correspond to two of those. Why ?
+
+Consider that the current ACL lookup logic is that if a packet is not
+the initial fragment, and there is an L4 entry acting on the packet, the
+comparison will be made only on the L4 protocol field value rather than
+on the protocol and port values. This behavior is governed by
+*l4_match_nonfirst_fragment* flag in the *acl_main*, and is needed to
+maintain the compatibility with the existing software switch
+implementation.
+
+While for the sequential check in *single_acl_match_5tuple()* it is very
+easy to implement by just breaking out at the right moment, in case of
+hash-based matching this cost us two checks: one on full 5-tuple and the
+flag *pkt.is_nonfirst_fragment* being zero, the second on 3-tuple and
+the flag *pkt.is_nonfirst_fragment* being one, with the second check
+triggered by the *acl_main.l4_match_nonfirst_fragment* setting being the
+default 1. This dictates the necessity of having a “match” field in a
+given *hash_ace_info_t* element, which would reflect the value we are
+supposed to match after applying the mask.
+
+There can be other circumstances when it might be beneficial to expand
+the given rule in the original ACL into multiple - for example, as an
+optimization within the port range handling for small port ranges (this
+is not done as of the time of writing).
+
+Assigning ACLs to an interface
+------------------------------
+
+Once the ACL list is assigned to an interface, or, rather, a new ACL is
+added to the list of the existing ACLs applied to the interface, we need
+to update the bihash accelerating the lookup.
+
+All the entries for the lookups are stored within a single *48_8*
+bihash, which captures the 5-tuple from the packet as well as the
+miscellaneous per-packet information flags, e.g. *l4_valid*,
+*is_non_first_fragment*, and so on. To facilitate the use of the single
+bihash by all the interfaces, the *is_ip6*, *is_input*, *sw_if_index*
+are part of the key, as well as *mask_type_index* - the latter being
+necessary because there can be entries with the same value but different
+masks, e.g.: ``permit ::/0, permit::/128``.
+
+At the moment of an ACL being applied to an interface, we need to walk
+the list of *hash_ace_info_t* entries corresponding to that ACL, and
+update the bihash with the keys corresponding to the match values in
+these entries.
+
+The value of the hash match contains the index into a per-*sw_if_index*
+vector of *applied_ace_hash_entry_t* elements, as well as a couple of
+flags: *shadowed* (optimization: if this flag on a matched entry is
+zero, means we can stop the lookup early and declare a match - see
+below), and *need_portrange_check* - meaning that what matched was a
+superset of the actual match, and we need to perform an extra check.
+
+Also, upon insertion, we must keep in mind there can be multiple
+*applied_ace_hash_entry_t* for the same key and must keep a list of
+those. This is necessary to incrementally apply/unapply the ACLs as part
+of the ACL vector: say, two ACLs have “permit 2001:db8::1/128 any” - we
+should be able to retain the entry for the second ACL even if we have
+deleted the first one. Also, in case there are two entries with the same
+key but different port ranges, say 0..42 and 142..65535 - we need to be
+able to sequentially match on those if we decide not to expand them into
+individual port-specific entries.
+
+Per-packet lookup
+-----------------
+
+The simple single-packet lookup is defined in
+*multi_acl_match_get_applied_ace_index*, which returns the index of the
+applied hash ACE if there was a match, or ~0 if there wasn’t.
+
+The future optimized per-packet lookup may be batched in three phases:
+
+1. Prepare the keys in the per-worker vector by doing logical AND of
+ original 5-tuple record with the elements of the mask vector.
+2. Lookup the keys in the bihash in a batch manner, collecting the
+ result with lowest u64 (acl index within vector, ACE index) from the
+ hash lookup value, and performing the list walk if necessary (for
+ portranges).
+3. Take the action from the ACL record as defined by (ACL#, ACE#) from
+ the resulting lookup winner, or, if no match found, then perform
+ default deny.
+
+Shadowed/independent/redundant ACEs
+-----------------------------------
+
+During the phase of combining multiple ACLs into one rulebase, when they
+are applied to interface, we also can perform several optimizations.
+
+If a given ACE is a strict subset of another ACE located up in the
+linear search order, we can ignore this ACE completely - because by
+definition it will never match. We will call such an ACE *redundant*.
+Here is an example:
+
+::
+
+ permit 2001:db8:1::/48 2001:db8:2::/48 (B)
+ deny 2001:d8b:1:1::/64 2001:db8:2:1::/64 (A)
+
+A bit more formally, we can define this relationship of an ACE A to ACE
+B as:
+
+::
+
+ redundant(aceA, aceB) := (contains(protoB, protoA) && contains(srcB, srcA)
+ && contains(dstB, dstA) && is_after(A, B))
+
+Here as “contains” we define an operation operating on the sets defined
+by the protocol, (srcIP, srcPortDefinition) and (dstIP,
+dstPortDefinition) respectively, and returning true if all the elements
+represented by the second argument are represented by the first
+argument. The “is_after” is true if A is located below B in the ruleset.
+
+If a given ACE does not intersect at all with any other ACE in front of
+it, we can mark it as such.
+
+Then during the sequence of the lookups the successful hit on this ACE
+means we do not need to look up other mask combinations - thus
+potentially significantly speeding up the match process. Here is an
+example, assuming we have the following ACL:
+
+::
+
+ permit 2001:db8:1::/48 2001:db8:2::/48 (B)
+ deny 2001:db8:3::/48 2001:db8:2:1::/64 (A)
+
+In this case if we match the second entry, we do not need to check
+whether we have matched the first one - the source addresses are
+completely different. We call such an ACE *independent* from another.
+
+We can define this as
+
+::
+
+ independent(aceA, aceB) := (!intersect(protoA, protoB) ||
+ !intersect(srcA, srcB) ||
+ !intersect(dstA, dstB))
+
+where intersect is defined as operation returning true if there are
+elements belonging to the sets of both arguments.
+
+If the entry A is neither redundant nor independent from B, and is below
+B in the ruleset, we call such an entry *shadowed* by B, here is an
+example:
+
+::
+
+ deny tcp 2001:db8:1::/48 2001:db8:2::/48 (B)
+ permit 2001:d8b:1:1::/64 2001:db8:2:1::/64 (A)
+
+This means the earlier rule “carves out” a subset of A, thus leaving a
+“shadow”. (Evidently, the action needs to be different for the shadow to
+have an effect, but for for the terminology sake we do not care).
+
+The more formal definition:
+
+::
+
+ shadowed(aceA, aceB) := !redundant(aceA, aceB) &&
+ !independent(aceA, aceB) &&
+ is_after(aceA, aceB)
+
+Using this terminology, any ruleset can be represented as a DAG
+(Directed Acyclic Graph), with the bottom being the implicit “deny any”,
+pointing to the set of rules shadowing it or the ones it is redundant
+for.
+
+These rules may in turn be shadowing each other. There is no cycles in
+this graph because of the natural order of the rules - the rule located
+closer to the end of the ruleset can never shadow or make redundant a
+rule higher up.
+
+The optimization that enables can allow for is to skip matching certain
+masks on a per-lookup basis - if a given rule has matched, the only
+adjustments that can happen is the match with one of the shadowing
+rules.
+
+Also, another avenue for the optimization can be starting the lookup
+process with the mask type that maximizes the chances of the independent
+ACE match, thus resulting in an ACE lookup being a single hash table
+hit.
+
+Plumbing
+--------
+
+All the new routines are located in a separate file, so we can cleanly
+experiment with a different approach if this does not fit all of the use
+cases.
+
+The constant-time lookup within the data path has the API with the same
+signature as:
+
+::
+
+ u8
+ multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2,
+ int is_ip6, int is_input, u32 * acl_match_p,
+ u32 * rule_match_p, u32 * trace_bitmap)
+
+There should be a new upper-level function with the same signature,
+which will make a decision whether to use a linear lookup, or to use the
+constant-time lookup implemented by this work, or to add some other
+optimizations (e.g. by keeping the cache of the last N lookups).
+
+The calls to the routine doing preparatory work should happen in
+``acl_add_list()`` after creating the linear-lookup structures, and the
+routine doing the preparatory work populating the hashtable should be
+called from ``acl_interface_add_del_inout_acl()`` or its callees.
+
+The initial implementation will be geared towards looking up a single
+match at a time, with the subsequent optimizations possible to make the
+lookup for more than one packet.
diff --git a/src/plugins/acl/acl_lookup_context.md b/src/plugins/acl/acl_lookup_context.md
deleted file mode 100644
index e95f82043f9..00000000000
--- a/src/plugins/acl/acl_lookup_context.md
+++ /dev/null
@@ -1,125 +0,0 @@
-Lookup contexts aka "ACL as a service" {#acl_lookup_context}
-======================================
-
-The initial implementation of the ACL plugin had tightly tied the policy (L3-L4) ACLs
-to ingress/egress processing on an interface.
-
-However, some uses outside of pure traffic control have appeared, for example,
-ACL-based forwarding, etc. Also, improved algorithms of the ACL lookup
-could benefit of the more abstract representation, not coupled to the interfaces.
-
-This describes a way to accommodate these use cases by generalizing the ACL
-lookups into "ACL lookup contexts", not tied to specific interfaces, usable
-by other portions of the code by utilizing the exports.h header file,
-which provides the necessary interface.
-
-
-Why "lookup contexts" and not "match me an ACL#" ?
-================================================
-
-The first reason is the logical grouping of multiple ACLs.
-
-The interface matching code currently allows for matching multiple ACLs
-in a 'first-match' fashion. Some other use cases also fall into a similar
-pattern: they attempt to match a sequence of ACLs, and the first matched ACL
-determines what the outcome is, e.g. where to forward traffic. Thus,
-a match never happens on an ACL in isolation, but always on a group of
-ACLs.
-
-The second reason is potential optimizations in matching.
-
-A naive match on series of ACLs each represented as a vector of ACEs
-does not care about the API level - it could be "match one ACL", or
-"match the set of ACLs" - there will be just a simple loop iterating over
-the ACLs to match, returning the first match. Be it in the ACL code or
-in the user code.
-
-However, for more involved lookup methods, providing a more high-level
-interface of matching over the entire group of ACLs allows for future
-improvements in the algorithms, delivered at once to all the users
-of the API.
-
-What is a "lookup context" ?
-============================
-
-An ACL lookup context is an entity that groups the set of ACL#s
-together for the purposes of a first-match lookup, and may store
-additional internal information needed to optimize the lookups
-for that particular vector of ACLs.
-
-Using ACL contexts in your code
-===============================
-
-In order to use the ACL lookup contexts, you need to include
-plugins/acl/exports.h into your code. This header includes
-all the necessary dependencies required.
-
-As you probably will invoke this code from another plugin,
-the non-inline function calls are implemented via function pointers,
-which you need to initialize by calling acl_plugin_exports_init(&acl_plugin), which,
-if everything succeeds, returns 0 and fills in the acl_plugin structure
-with pointers to the exported methods - else it will return clib_error_t with
-more information about what went wrong.
-
-When you have initialized the symbols, you also need to register yourself
-as a user of the ACL lookups - this allows to track the ACL lookup context
-ownership, as well as make the debug show outputs more user friendly.
-
-To do that, call acl_plugin.register_user_module(caller_module_string, val1_label, val2_label) -
-and record the returned value. This will bethe first parameter that you pass to create a new
-lookup context. The passed strings must be static, and are used as descriptions for the ACL
-contexts themselves, as well as labels for up to two user-supplied u32 labels, used to
-differentiate the lookup contexts for the debugging purposes.
-
-Creating a new context is done by calling acl_plugin.get_lookup_context_index(user_id, val1, val2).
-The first argument is your "user" ID obtained in a registration call earlier, the other two
-arguments are u32s with semantics that you designate. They are used purely for debugging purposes
-in the "show acl lookup context" command.
-
-To set the vector of ACL numbers to be looked up within the context, use the function
-acl_plugin.set_acl_vec_for_context(lc_index, acl_list). The first parameter specifies the context
-that you have created, the second parameter is a vector of u32s, each u32 being the index of the ACL
-which we should be looking up within this context. The command is idempotent, i.e.
-it unapplies the previously applied list of ACLs, and then sets the new list of ACLs.
-
-Subsequent ACL updates for the already applied ACLs will cause the re-application
-on an as-needed basis. Note, that the ACL application is potentially a relatively costly operation,
-so it is only expected that these changes will be done in the control plane, NOT in the datapath.
-
-The matching within the context is done using two functions - acl_plugin.fill_5tuple() and
-acl_plugin.match_5tuple() and their corresponding inline versions, named acl_plugin_fill_5tuple_inline()
-and acl_plugin_match_5tuple_inline(). The inline and non-inline versions have the equivalent functionality,
-in that the non-inline version calls the inline version. These two variants are provided
-for debugging/maintenance reasons.
-
-When you no longer need a particular context, you can return the allocated resources by calling
-acl_plugin.put_lookup_context_index() to mark it as free. The lookup structured associated with
-the vector of ACLs set for the lookup are cleaned up automatically. However, the ACLs themselves
-are not deleted and are available for subsequent reuse by other lookup contexts if needed.
-
-There is one delicate detail that you might want to be aware of.
-When the non-inline functions reference the inline functions,
-they are compiled as part of ACL plugin; whereas when you refer to the inline
-functions from your code, they are compiled as part of your code.
-This makes referring to a single acl_main structure a little trickier.
-
-It is done by having a static p_acl_main within the .h file,
-which points to acl_main of the ACL plugin, and is initialized by a static constructor
-function.
-
-This way the multiple includes and inlines will "just work" as one would expect.
-
-
-Debug CLIs
-==========
-
-To see the state of the ACL lookup contexts, you can issue "show acl-plugin lookup user" to see
-all of the users which registered for the usage of the ACL plugin lookup contexts,
-and "show acl-plugin lookup context" to show the actual contexts created. You will notice
-that the latter command uses the values supplied during the module registration in order to
-make the output more friendly.
-
-The "show acl-plugin acl" and "show acl-plugin interface" commands have also acquired the
-notion of lookup context, but there it is used from the client perspective, since
-with this change the interface ACL lookup itself is a user of ACL lookup contexts.
-
diff --git a/src/plugins/acl/acl_lookup_context.rst b/src/plugins/acl/acl_lookup_context.rst
new file mode 100644
index 00000000000..278e87381f3
--- /dev/null
+++ b/src/plugins/acl/acl_lookup_context.rst
@@ -0,0 +1,138 @@
+ACL Lookup contexts
+===================
+
+The initial implementation of the ACL plugin had tightly tied the policy
+(L3-L4) ACLs to ingress/egress processing on an interface.
+
+However, some uses outside of pure traffic control have appeared, for
+example, ACL-based forwarding, etc. Also, improved algorithms of the ACL
+lookup could benefit of the more abstract representation, not coupled to
+the interfaces.
+
+This describes a way to accommodate these use cases by generalizing the
+ACL lookups into “ACL lookup contexts”, not tied to specific interfaces,
+usable by other portions of the code by utilizing the exports.h header
+file, which provides the necessary interface.
+
+Why “lookup contexts” and not “match me an ACL” ?
+-------------------------------------------------
+
+The first reason is the logical grouping of multiple ACLs.
+
+The interface matching code currently allows for matching multiple ACLs
+in a ‘first-match’ fashion. Some other use cases also fall into a
+similar pattern: they attempt to match a sequence of ACLs, and the first
+matched ACL determines what the outcome is, e.g. where to forward
+traffic. Thus, a match never happens on an ACL in isolation, but always
+on a group of ACLs.
+
+The second reason is potential optimizations in matching.
+
+A naive match on series of ACLs each represented as a vector of ACEs
+does not care about the API level - it could be “match one ACL”, or
+“match the set of ACLs” - there will be just a simple loop iterating
+over the ACLs to match, returning the first match. Be it in the ACL code
+or in the user code.
+
+However, for more involved lookup methods, providing a more high-level
+interface of matching over the entire group of ACLs allows for future
+improvements in the algorithms, delivered at once to all the users of
+the API.
+
+What is a “lookup context” ?
+----------------------------
+
+An ACL lookup context is an entity that groups the set of ACL#s together
+for the purposes of a first-match lookup, and may store additional
+internal information needed to optimize the lookups for that particular
+vector of ACLs.
+
+Using ACL contexts in your code
+-------------------------------
+
+In order to use the ACL lookup contexts, you need to include
+plugins/acl/exports.h into your code. This header includes all the
+necessary dependencies required.
+
+As you probably will invoke this code from another plugin, the
+non-inline function calls are implemented via function pointers, which
+you need to initialize by calling acl_plugin_exports_init(&acl_plugin),
+which, if everything succeeds, returns 0 and fills in the acl_plugin
+structure with pointers to the exported methods - else it will return
+clib_error_t with more information about what went wrong.
+
+When you have initialized the symbols, you also need to register
+yourself as a user of the ACL lookups - this allows to track the ACL
+lookup context ownership, as well as make the debug show outputs more
+user friendly.
+
+To do that, call acl_plugin.register_user_module(caller_module_string,
+val1_label, val2_label) - and record the returned value. This will be the
+first parameter that you pass to create a new lookup context. The passed
+strings must be static, and are used as descriptions for the ACL
+contexts themselves, as well as labels for up to two user-supplied u32
+labels, used to differentiate the lookup contexts for the debugging
+purposes.
+
+Creating a new context is done by calling
+acl_plugin.get_lookup_context_index(user_id, val1, val2). The first
+argument is your “user” ID obtained in a registration call earlier, the
+other two arguments are u32s with semantics that you designate. They are
+used purely for debugging purposes in the “show acl lookup context”
+command.
+
+To set the vector of ACL numbers to be looked up within the context, use
+the function acl_plugin.set_acl_vec_for_context(lc_index, acl_list). The
+first parameter specifies the context that you have created, the second
+parameter is a vector of u32s, each u32 being the index of the ACL which
+we should be looking up within this context. The command is idempotent,
+i.e. it unapplies the previously applied list of ACLs, and then sets the
+new list of ACLs.
+
+Subsequent ACL updates for the already applied ACLs will cause the
+re-application on an as-needed basis. Note, that the ACL application is
+potentially a relatively costly operation, so it is only expected that
+these changes will be done in the control plane, NOT in the datapath.
+
+The matching within the context is done using two functions -
+acl_plugin.fill_5tuple() and acl_plugin.match_5tuple() and their
+corresponding inline versions, named acl_plugin_fill_5tuple_inline() and
+acl_plugin_match_5tuple_inline(). The inline and non-inline versions
+have the equivalent functionality, in that the non-inline version calls
+the inline version. These two variants are provided for
+debugging/maintenance reasons.
+
+When you no longer need a particular context, you can return the
+allocated resources by calling acl_plugin.put_lookup_context_index() to
+mark it as free. The lookup structured associated with the vector of
+ACLs set for the lookup are cleaned up automatically. However, the ACLs
+themselves are not deleted and are available for subsequent reuse by
+other lookup contexts if needed.
+
+There is one delicate detail that you might want to be aware of. When
+the non-inline functions reference the inline functions, they are
+compiled as part of ACL plugin; whereas when you refer to the inline
+functions from your code, they are compiled as part of your code. This
+makes referring to a single acl_main structure a little trickier.
+
+It is done by having a static p_acl_main within the .h file, which
+points to acl_main of the ACL plugin, and is initialized by a static
+constructor function.
+
+This way the multiple includes and inlines will “just work” as one would
+expect.
+
+Debug CLIs
+----------
+
+To see the state of the ACL lookup contexts, you can issue “show
+acl-plugin lookup user” to see all of the users which registered for the
+usage of the ACL plugin lookup contexts, and “show acl-plugin lookup
+context” to show the actual contexts created. You will notice that the
+latter command uses the values supplied during the module registration
+in order to make the output more friendly.
+
+The “show acl-plugin acl” and “show acl-plugin interface” commands have
+also acquired the notion of lookup context, but there it is used from
+the client perspective, since with this change the interface ACL lookup
+itself is a user of ACL lookup contexts.
diff --git a/src/plugins/acl/acl_multicore_doc.md b/src/plugins/acl/acl_multicore_doc.md
deleted file mode 100644
index deec5e9d566..00000000000
--- a/src/plugins/acl/acl_multicore_doc.md
+++ /dev/null
@@ -1,349 +0,0 @@
-Multicore support for ACL plugin {#acl_multicore}
-================================
-
-This captures some considerations and design decisions that I have made,
-both for my own memory later on ("what the hell was I thinking?!?"),
-and for anyone interested to criticize/improve/hack on this code.
-
-One of the factors taken into account while making these decisions,
-was the relative emphasis on the multi-thread vs. single-thread
-use cases: the latter is the vastly more prevalent. But,
-one can not optimize the single-thread performance without
-having a functioning code for multi-thread.
-
-stateless ACLs
-==============
-
-The stateless trivially parallelizes, and the only potential for the
-race between the different threads is during the reconfiguration,
-at the time of replacing the old ACL being checked, with
-the new ACL.
-
-In case an acl_add_replace is being used to replace the rules
-within the existing entry, a reallocation of `am->acls[X].rules`
-vector will happen and potentially a change in count.
-
-acl_match_5tuple() has the following code:
-
-```{.c}
- a = am->acls + acl_index;
- for (i = 0; i < a->count; i++)
- {
- r = a->rules + i;
- . . .
-```
-
-Ideally we should be immune from a->rules changing,
-but the problem arises if the count changes in flight,
-and the new ruleset is smaller - then we will attempt
-to "match" against the free memory.
-
-This can(?) be solved by replacing the for() with while(),
-so the comparison happens at each iteration.
-
-full_acl_match_5tuple(), which iterates over the list
-of ACLs, is a bit less immune, since it takes the pointer
-to the vector to iterate and keeps a local copy of
-that pointer.
-
-This race can be solved by checking the
-current pointer to the vector with the source pointer,
-and seeing if there is an (unlikely) change, and if
-there is, return the "deny" action, or, better,
-restart the check.
-
-Since the check reloads the ACL list on a per-packet basis,
-there is only a window of opportunity of one packet to
-"match" packet against an incorrect rule set.
-The workers also do not change anything, only read.
-Therefore, it looks like building special structures
-to ensure that it does not happen at all might be not
-worth it.
-
-At least not until we have a unit-test able to
-reliably catch this condition and test that
-the measures applied are effective. Adding the code
-which is not possible to exercise is worse than
-not adding any code at all.
-
-So, I opt for "do-nothing" here for the moment.
-
-reflexive ACLs: single-thread
-=============================
-
-Before we talk multi-thread, is worth revisiting the
-design of the reflexive ACLs in the plugin, and
-the history of their evolution.
-
-The very first version of the ACL plugin, shipped in
-1701, mostly did the job using the existing components
-and gluing them together. Because it needed to work
-in bridged forwarding path only, using L2 classifier
-as an insertion point appeared natural, also L2 classifier,
-being a table with sessions, seemed like a good place
-to hold the sessions.
-
-So, the original design had two conceptual nodes:
-one, pointed by the next_miss from the L2 classifier table,
-was checking the actual ACL, and inserting session into
-the L2 classifier table, and the other one, pointed
-to by the next_match within the specific session rule,
-was checking the existing session. The timing out
-of the existing connections was done in the datapath,
-by periodically calling the aging function.
-
-This decision to use the existing components,
-with its attractiveness, did bring a few limitations as well:
-
-* L2 classifier is a simple mask-and-value match, with
-a fixed mask across the table. So, sanely supporting IPv6
-packets with extension headers in that framework was impossible.
-
-* There is no way to get a backpressure from L2 classifier
-depending on memory usage. When it runs out of memory,
-it simply crashes the box. When it runs out of memory ?
-We don't really know. Depends on how it allocates it.
-
-* Since we need to match the *reflected* traffic,
-we had to create *two* full session entries
-in two different directions, which is quite wasteful memory-wise.
-
-* (showstopper): the L2 classifier runs only in
-the bridged data path, so supporting routed data path
-would require creating something else entirely different,
-which would mean much more headaches support-wise going forward.
-
-Because of that, I have moved to a different model of
-creating a session-5-tuple from the packet data - once,
-and then doing all the matching just on that 5-tuple.
-
-This has allowed to add support for skipping IPv6 extension headers.
-
-Also, this new version started to store the sessions in a dedicated
-bihash-per-interface, with the session key data being
-aligned for the ingress packets, and being mirrored for the
-egress packets. This allows of significant savings in memory,
-because now we need to keep only one copy of the session table per
-interface instead of two, and also to only have ONE node for all the lookups,
-(L2/L3 path, in/out, IPv4/IPv6) - significantly reducing the code complexity.
-
-Unfortunately, bihash still has the "lack of backpressure" problem,
-in a sense that if you try to insert too many entries and run out
-of memory in the heap you supplied, you get a crash.
-
-To somewhat workaround against that, there is a "maximum tested number of sessions"
-value, which tracks the currently inserted sessions in the bihash,
-and if this number is being approached, a more aggressive cleanup
-can happen. If this number is reached, two behaviors are possible:
-
-* attempt to do the stateless ACL matching and permit the packet
- if it succeeds
-
-* deny the packet
-
-Currently I have opted for a second one, since it allows for
-a better defined behavior, and if you have to permit
-the traffic in both directions, why using stateful anyway ?
-
-In order to be able to do the cleanup, we need to discriminate between
-the session types, with each session type having its own idle timeout.
-In order to do that, we keep three lists, defined in enum acl_timeout_e:
-ACL_TIMEOUT_UDP_IDLE, ACL_TIMEOUT_TCP_IDLE, ACL_TIMEOUT_TCP_TRANSIENT.
-
-The first one is hopefully obvious - it is just all UDP connections.
-They have an idle timeout of 600 seconds.
-
-The second and third is a bit more subtle. TCP is a complicated protocol,
-and we need to tread the fine line between doing too little and doing
-too much, and triggering the potential compatibility issues because of
-being a "middlebox".
-
-I decided to split the TCP connections into two classes:
-established, and everything else. "Established", means we have seen
-the SYN and ACK from both sides (with PUSH obviously masked out).
-This is the "active" state of any TCP connection and we would like
-to ensure we do not screw it up. So, the connections in this state
-have the default idle timer of 24 hours.
-
-All the rest of the connections have the idle timeout of 2 minutes,
-(inspired by an old value of MSL) and based on the observation
-that the states this class represent are usually very short lived.
-
-Once we have these three baskets of connections, it is trivial to
-imagine a simple cleanup mechanism to deal with this: take a
-TCP transient connection that has been hanging around.
-
-It is debatable whether we want to do discrimination between the
-different TCP transient connections. Assuming we do FIFO (and
-the lists allow us to do just that), it means a given connection
-on the head of the list has been hanging around for longest.
-Thus, if we are short on resources, we might just go ahead and
-reuse it within the datapath.
-
-This is where we are slowly approaching the question
-"Why in the world have not you used timer wheel or such ?"
-
-The answer is simple: within the above constraints, it does
-not buy me much.
-
-Also, timer wheel creates a leaky abstraction with a difficult
-to manage corner case. Which corner case ?
-
-We have a set of objects (sessions) with an event that may
-or may not happen (idle timeout timer firing), and a
-necessity to reset the idle timeout when there is
-activity on the session.
-
-In the worst case, where we had a 10000 of one-packet
-UDP sessions just created 10 minutes ago, we would need
-to deal with a spike of 10000 expired timers.
-
-Of course, if we have the active traffic on all
-of these 10000 connections, then we will not have
-to deal with that ? Right, but we will still have to deal
-with canceling and requeueing the timers.
-
-In the best possible case, requeueing a timer is
-going to be something along the lines of a linked-list
-removal and reinsertion.
-
-However, keep in mind we already need to classify the
-connections for reuse, so therefore we already have
-the linked lists!
-
-And if we just check these linked lists periodically in
-a FIFO fashion, we can get away with a very simple per-packet operation:
-writing back the timestamp of "now" into the connection structure.
-
-Then rather than requeueing the list on a per-packet or per-frame
-basis, we can defer this action until the time this session
-appears on the head of the FIFO list, and the cleaning
-routine makes the decision about whether to discard
-the session (because the interval since last activity is bigger
-than the idle timeout), or to requeue the session back to
-the end of the list (because the last activity was less
-than idle timeout ago).
-
-So, rather than using the timers, we can simply reuse our classification
-FIFOs, with the following heuristic: do not look at the session that was
-enqueued at time X until X+session_timeout. If we enqueue the sessions
-in the order of their initial activity, then we can simply use enqueue
-timestamp of the head session as a decision criterion for when we need
-to get back at looking at it for the timeout purposes.
-
-Since the number of FIFOs is small, we get a slightly worse check
-performance than with timers, but still O(1).
-
-We seemingly do quite a few "useless" operations of requeueing the items
-back to the tail of the list - but, these are the operations we do not
-have to do in the active data path, so overall it is a win.
-
-(Diversion: I believe this problem is congruent to poll vs. epoll or
-events vs. threads, some reading on this subject:
-http://web.archive.org/web/20120225022154/http://sheddingbikes.com/posts/1280829388.html)
-
-We can also can run a TCP-like scheme for adaptively changing
-the wait period in the routine that deals with the connection timeouts:
-we can attempt to check the connections a couple of times per second
-(same as we would advance the timer wheel), and then if we have requeued
-close to a max-per-quantum number of connections, we can half the waiting
-interval, and if we did not requeue any, we can slowly increment the waiting
-interval - which at a steady state should stabilize similar to what the TCP rate
-does.
-
-reflexive ACLs: multi-thread
-=============================
-
-The single-threaded implementation in 1704 used a separate "cleaner" process
-to deal with the timing out of the connections.
-It is all good and great when you know that there is only a single core
-to run everything on, but the existence of the lists proves to be
-a massive difficulty when it comes to operating from multiple threads.
-
-Initial study shows that with a few assumptions (e.g. that the cleaner running in main thread
-and the worker have a demarcation point in time where either one or the other one touches
-the session in the list) it might be possible to make it work, but the resulting
-trickiness of doing it neatly with all the corner cases is quite large.
-
-So, for the multi-threaded scenario, we need to move the connection
-aging back to the same CPU as its creation.
-
-Luckily we can do this with the help of the interrupts.
-
-So, the design is as follows: the aging thread (acl_fa_session_cleaner_process)
-periodically fires the interrupts to the workers interrupt nodes (acl_fa_worker_session_cleaner_process_node.index),
-using vlib_node_set_interrupt_pending(), and
-the interrupt node acl_fa_worker_conn_cleaner_process() calls acl_fa_check_idle_sessions()
-which does the actual job of advancing the lists. And within the actual datapath the only thing we will be
-doing is putting the items onto FIFO, and updating the last active time on the existing connection.
-
-The one "delicate" part is that the worker for one leg of the connection might be different from
-the worker of another leg of the connection - but, even if the "owner" tries to free the connection,
-nothing terrible can happen - worst case the element of the pool (which is nominally free for a short period)
-will get the timestamp updated - same thing about the TCP flags seen.
-
-A slightly trickier issue arises when the packet initially seen by one worker (thus owned by that worker),
-and the return packet processed by another worker, and as a result changes the
-the class of the connection (e.g. becomes TCP_ESTABLISHED from TCP_TRANSIENT or vice versa).
-If the class changes from one with the shorter idle time to the one with the longer idle time,
-then unless we are in the starvation mode where the transient connections are recycled,
-we can simply do nothing and let the normal requeue mechanism kick in. If the class changes from the longer idle
-timer to the shorter idle timer, then we risk keeping the connection around for longer than needed, which
-will affect the resource usage.
-
-One solution to that is to have NxN ring buffers (where N is the number of workers), such that the non-owner
-can signal to the owner the connection# that needs to be requeued out of order.
-
-A simpler solution though, is to ensure that each FIFO's period is equal to that of a shortest timer.
-This way the resource starvation problem is taken care of, at an expense of some additional work.
-
-This all looks sufficiently nice and simple until a skeleton falls out of the closet:
-sometimes we want to clean the connections en masse before they expire.
-
-There few potential scenarios:
-1) removal of an ACL from the interface
-2) removal of an interface
-3) manual action of an operator (in the future).
-
-In order to tackle this, we need to modify the logic which decides whether to requeue the
-connection on the end of the list, or to delete it due to idle timeout:
-
-We define a point in time, and have each worker thread fast-forward through its FIFO,
-in the process looking for sessions that satisfy the criteria, and either keeping them or requeueing them.
-
-To keep the ease of appearance to the outside world, we still process this as an event
-within the connection cleaner thread, but this event handler does as follows:
-1) it creates the bitmap of the sw_if_index values requested to be cleared
-2) for each worker, it waits to ensure there is no cleanup operation in progress (and if there is one,
-it waits), and then makes a copy of the bitmap, sets the per-worker flag of a cleanup operation, and sends an interrupt.
-3) wait until all cleanup operations have completed.
-
-Within the worker interrupt node, we check if the "cleanup in progress" is set,
-and if it is, we check the "fast forward time" value. If unset, we initialize it to value now, and compare the
-requested bitmap of sw_if_index values (pending_clear_sw_if_index_bitmap) with the bitmap of sw_if_index that this worker deals with.
-
-(we set the bit in the bitmap every time we enqueue the packet onto a FIFO - serviced_sw_if_index_bitmap in acl_fa_conn_list_add_session).
-
-If the result of this AND operation is zero - then we can clear the flag of cleanup in progress and return.
-Else we kick off the quantum of cleanup, and make sure we get another interrupt ASAP if that cleanup operation returns non-zero,
-meaning there is more work to do.
-When that operation returns zero, everything has been processed, we can clear the "cleanup-in-progress" flag, and
-zeroize the bitmap of sw_if_index-es requested to be cleaned.
-
-The interrupt node signals its wish to receive an interrupt ASAP by setting interrupt_is_needed
-flag within the per-worker structure. The main thread, while waiting for the
-cleanup operation to complete, checks if there is a request for interrupt,
-and if there is - it sends one.
-
-This approach gives us a way to mass-clean the connections which is reusing the code of the regular idle
-connection cleanup.
-
-One potential inefficiency is the bitmap values set by the session insertion
-in the data path - there is nothing to clear them.
-
-So, if one rearranges the interface placement with the workers, then the cleanups will cause some unnecessary work.
-For now, we consider it an acceptable limitation. It can be resolved by having another per-worker bitmap, which, when set,
-would trigger the cleanup of the bits in the serviced_sw_if_index_bitmap).
-
-=== the end ===
-
diff --git a/src/plugins/acl/acl_multicore_doc.rst b/src/plugins/acl/acl_multicore_doc.rst
new file mode 100644
index 00000000000..142b6b216d2
--- /dev/null
+++ b/src/plugins/acl/acl_multicore_doc.rst
@@ -0,0 +1,354 @@
+Multicore support for ACL plugin
+================================
+
+This captures some considerations and design decisions that I have made,
+both for my own memory later on (“what the hell was I thinking?!?”), and
+for anyone interested to criticize/improve/hack on this code.
+
+One of the factors taken into account while making these decisions, was
+the relative emphasis on the multi-thread vs. single-thread use cases:
+the latter is the vastly more prevalent. But, one can not optimize the
+single-thread performance without having a functioning code for
+multi-thread.
+
+stateless ACLs
+--------------
+
+The stateless trivially parallelizes, and the only potential for the
+race between the different threads is during the reconfiguration, at the
+time of replacing the old ACL being checked, with the new ACL.
+
+In case an acl_add_replace is being used to replace the rules within the
+existing entry, a reallocation of ``am->acls[X].rules`` vector will
+happen and potentially a change in count.
+
+acl_match_5tuple() has the following code:
+
+.. code:: c
+
+ a = am->acls + acl_index;
+ for (i = 0; i < a->count; i++)
+ {
+ r = a->rules + i;
+ . . .
+
+Ideally we should be immune from a->rules changing, but the problem
+arises if the count changes in flight, and the new ruleset is smaller -
+then we will attempt to “match” against the free memory.
+
+This can(?) be solved by replacing the for() with while(), so the
+comparison happens at each iteration.
+
+full_acl_match_5tuple(), which iterates over the list of ACLs, is a bit
+less immune, since it takes the pointer to the vector to iterate and
+keeps a local copy of that pointer.
+
+This race can be solved by checking the current pointer to the vector
+with the source pointer, and seeing if there is an (unlikely) change,
+and if there is, return the “deny” action, or, better, restart the
+check.
+
+Since the check reloads the ACL list on a per-packet basis, there is
+only a window of opportunity of one packet to “match” packet against an
+incorrect rule set. The workers also do not change anything, only read.
+Therefore, it looks like building special structures to ensure that it
+does not happen at all might be not worth it.
+
+At least not until we have a unit-test able to reliably catch this
+condition and test that the measures applied are effective. Adding the
+code which is not possible to exercise is worse than not adding any code
+at all.
+
+So, I opt for “do-nothing” here for the moment.
+
+reflexive ACLs: single-thread
+-----------------------------
+
+Before we talk multi-thread, is worth revisiting the design of the
+reflexive ACLs in the plugin, and the history of their evolution.
+
+The very first version of the ACL plugin, shipped in 1701, mostly did
+the job using the existing components and gluing them together. Because
+it needed to work in bridged forwarding path only, using L2 classifier
+as an insertion point appeared natural, also L2 classifier, being a
+table with sessions, seemed like a good place to hold the sessions.
+
+So, the original design had two conceptual nodes: one, pointed by the
+next_miss from the L2 classifier table, was checking the actual ACL, and
+inserting session into the L2 classifier table, and the other one,
+pointed to by the next_match within the specific session rule, was
+checking the existing session. The timing out of the existing
+connections was done in the datapath, by periodically calling the aging
+function.
+
+This decision to use the existing components, with its attractiveness,
+did bring a few limitations as well:
+
+- L2 classifier is a simple mask-and-value match, with a fixed mask
+ across the table. So, sanely supporting IPv6 packets with extension
+ headers in that framework was impossible.
+
+- There is no way to get a backpressure from L2 classifier depending on
+ memory usage. When it runs out of memory, it simply crashes the box.
+ When it runs out of memory ? We don’t really know. Depends on how it
+ allocates it.
+
+- Since we need to match the *reflected* traffic, we had to create
+ *two* full session entries in two different directions, which is
+ quite wasteful memory-wise.
+
+- (showstopper): the L2 classifier runs only in the bridged data path,
+ so supporting routed data path would require creating something else
+ entirely different, which would mean much more headaches support-wise
+ going forward.
+
+Because of that, I have moved to a different model of creating a
+session-5-tuple from the packet data - once, and then doing all the
+matching just on that 5-tuple.
+
+This has allowed to add support for skipping IPv6 extension headers.
+
+Also, this new version started to store the sessions in a dedicated
+bihash-per-interface, with the session key data being aligned for the
+ingress packets, and being mirrored for the egress packets. This allows
+of significant savings in memory, because now we need to keep only one
+copy of the session table per interface instead of two, and also to only
+have ONE node for all the lookups, (L2/L3 path, in/out, IPv4/IPv6) -
+significantly reducing the code complexity.
+
+Unfortunately, bihash still has the “lack of backpressure” problem, in a
+sense that if you try to insert too many entries and run out of memory
+in the heap you supplied, you get a crash.
+
+To somewhat workaround against that, there is a “maximum tested number
+of sessions” value, which tracks the currently inserted sessions in the
+bihash, and if this number is being approached, a more aggressive
+cleanup can happen. If this number is reached, two behaviors are
+possible:
+
+- attempt to do the stateless ACL matching and permit the packet if it
+ succeeds
+
+- deny the packet
+
+Currently I have opted for a second one, since it allows for a better
+defined behavior, and if you have to permit the traffic in both
+directions, why using stateful anyway ?
+
+In order to be able to do the cleanup, we need to discriminate between
+the session types, with each session type having its own idle timeout.
+In order to do that, we keep three lists, defined in enum acl_timeout_e:
+ACL_TIMEOUT_UDP_IDLE, ACL_TIMEOUT_TCP_IDLE, ACL_TIMEOUT_TCP_TRANSIENT.
+
+The first one is hopefully obvious - it is just all UDP connections.
+They have an idle timeout of 600 seconds.
+
+The second and third is a bit more subtle. TCP is a complicated
+protocol, and we need to tread the fine line between doing too little
+and doing too much, and triggering the potential compatibility issues
+because of being a “middlebox”.
+
+I decided to split the TCP connections into two classes: established,
+and everything else. “Established”, means we have seen the SYN and ACK
+from both sides (with PUSH obviously masked out). This is the “active”
+state of any TCP connection and we would like to ensure we do not screw
+it up. So, the connections in this state have the default idle timer of
+24 hours.
+
+All the rest of the connections have the idle timeout of 2 minutes,
+(inspired by an old value of MSL) and based on the observation that the
+states this class represent are usually very short lived.
+
+Once we have these three baskets of connections, it is trivial to
+imagine a simple cleanup mechanism to deal with this: take a TCP
+transient connection that has been hanging around.
+
+It is debatable whether we want to do discrimination between the
+different TCP transient connections. Assuming we do FIFO (and the lists
+allow us to do just that), it means a given connection on the head of
+the list has been hanging around for longest. Thus, if we are short on
+resources, we might just go ahead and reuse it within the datapath.
+
+This is where we are slowly approaching the question “Why in the world
+have not you used timer wheel or such ?”
+
+The answer is simple: within the above constraints, it does not buy me
+much.
+
+Also, timer wheel creates a leaky abstraction with a difficult to manage
+corner case. Which corner case ?
+
+We have a set of objects (sessions) with an event that may or may not
+happen (idle timeout timer firing), and a necessity to reset the idle
+timeout when there is activity on the session.
+
+In the worst case, where we had a 10000 of one-packet UDP sessions just
+created 10 minutes ago, we would need to deal with a spike of 10000
+expired timers.
+
+Of course, if we have the active traffic on all of these 10000
+connections, then we will not have to deal with that ? Right, but we
+will still have to deal with canceling and requeueing the timers.
+
+In the best possible case, requeueing a timer is going to be something
+along the lines of a linked-list removal and reinsertion.
+
+However, keep in mind we already need to classify the connections for
+reuse, so therefore we already have the linked lists!
+
+And if we just check these linked lists periodically in a FIFO fashion,
+we can get away with a very simple per-packet operation: writing back
+the timestamp of “now” into the connection structure.
+
+Then rather than requeueing the list on a per-packet or per-frame basis,
+we can defer this action until the time this session appears on the head
+of the FIFO list, and the cleaning routine makes the decision about
+whether to discard the session (because the interval since last activity
+is bigger than the idle timeout), or to requeue the session back to the
+end of the list (because the last activity was less than idle timeout
+ago).
+
+So, rather than using the timers, we can simply reuse our classification
+FIFOs, with the following heuristic: do not look at the session that was
+enqueued at time X until X+session_timeout. If we enqueue the sessions
+in the order of their initial activity, then we can simply use enqueue
+timestamp of the head session as a decision criterion for when we need
+to get back at looking at it for the timeout purposes.
+
+Since the number of FIFOs is small, we get a slightly worse check
+performance than with timers, but still O(1).
+
+We seemingly do quite a few “useless” operations of requeueing the items
+back to the tail of the list - but, these are the operations we do not
+have to do in the active data path, so overall it is a win.
+
+(Diversion: I believe this problem is congruent to poll vs. epoll or
+events vs. threads, some reading on this subject:
+http://web.archive.org/web/20120225022154/http://sheddingbikes.com/posts/1280829388.html)
+
+We can also can run a TCP-like scheme for adaptively changing the wait
+period in the routine that deals with the connection timeouts: we can
+attempt to check the connections a couple of times per second (same as
+we would advance the timer wheel), and then if we have requeued close to
+a max-per-quantum number of connections, we can half the waiting
+interval, and if we did not requeue any, we can slowly increment the
+waiting interval - which at a steady state should stabilize similar to
+what the TCP rate does.
+
+reflexive ACLs: multi-thread
+----------------------------
+
+The single-threaded implementation in 1704 used a separate “cleaner”
+process to deal with the timing out of the connections. It is all good
+and great when you know that there is only a single core to run
+everything on, but the existence of the lists proves to be a massive
+difficulty when it comes to operating from multiple threads.
+
+Initial study shows that with a few assumptions (e.g. that the cleaner
+running in main thread and the worker have a demarcation point in time
+where either one or the other one touches the session in the list) it
+might be possible to make it work, but the resulting trickiness of doing
+it neatly with all the corner cases is quite large.
+
+So, for the multi-threaded scenario, we need to move the connection
+aging back to the same CPU as its creation.
+
+Luckily we can do this with the help of the interrupts.
+
+So, the design is as follows: the aging thread
+(acl_fa_session_cleaner_process) periodically fires the interrupts to
+the workers interrupt nodes
+(acl_fa_worker_session_cleaner_process_node.index), using
+vlib_node_set_interrupt_pending(), and the interrupt node
+acl_fa_worker_conn_cleaner_process() calls acl_fa_check_idle_sessions()
+which does the actual job of advancing the lists. And within the actual
+datapath the only thing we will be doing is putting the items onto FIFO,
+and updating the last active time on the existing connection.
+
+The one “delicate” part is that the worker for one leg of the connection
+might be different from the worker of another leg of the connection -
+but, even if the “owner” tries to free the connection, nothing terrible
+can happen - worst case the element of the pool (which is nominally free
+for a short period) will get the timestamp updated - same thing about
+the TCP flags seen.
+
+A slightly trickier issue arises when the packet initially seen by one
+worker (thus owned by that worker), and the return packet processed by
+another worker, and as a result changes the the class of the connection
+(e.g. becomes TCP_ESTABLISHED from TCP_TRANSIENT or vice versa). If the
+class changes from one with the shorter idle time to the one with the
+longer idle time, then unless we are in the starvation mode where the
+transient connections are recycled, we can simply do nothing and let the
+normal requeue mechanism kick in. If the class changes from the longer
+idle timer to the shorter idle timer, then we risk keeping the
+connection around for longer than needed, which will affect the resource
+usage.
+
+One solution to that is to have NxN ring buffers (where N is the number
+of workers), such that the non-owner can signal to the owner the
+connection# that needs to be requeued out of order.
+
+A simpler solution though, is to ensure that each FIFO’s period is equal
+to that of a shortest timer. This way the resource starvation problem is
+taken care of, at an expense of some additional work.
+
+This all looks sufficiently nice and simple until a skeleton falls out
+of the closet: sometimes we want to clean the connections en masse
+before they expire.
+
+There few potential scenarios: 1) removal of an ACL from the interface
+2) removal of an interface 3) manual action of an operator (in the
+future).
+
+In order to tackle this, we need to modify the logic which decides
+whether to requeue the connection on the end of the list, or to delete
+it due to idle timeout:
+
+We define a point in time, and have each worker thread fast-forward
+through its FIFO, in the process looking for sessions that satisfy the
+criteria, and either keeping them or requeueing them.
+
+To keep the ease of appearance to the outside world, we still process
+this as an event within the connection cleaner thread, but this event
+handler does as follows: 1) it creates the bitmap of the sw_if_index
+values requested to be cleared 2) for each worker, it waits to ensure
+there is no cleanup operation in progress (and if there is one, it
+waits), and then makes a copy of the bitmap, sets the per-worker flag of
+a cleanup operation, and sends an interrupt. 3) wait until all cleanup
+operations have completed.
+
+Within the worker interrupt node, we check if the “cleanup in progress”
+is set, and if it is, we check the “fast forward time” value. If unset,
+we initialize it to value now, and compare the requested bitmap of
+sw_if_index values (pending_clear_sw_if_index_bitmap) with the bitmap of
+sw_if_index that this worker deals with.
+
+(we set the bit in the bitmap every time we enqueue the packet onto a
+FIFO - serviced_sw_if_index_bitmap in acl_fa_conn_list_add_session).
+
+If the result of this AND operation is zero - then we can clear the flag
+of cleanup in progress and return. Else we kick off the quantum of
+cleanup, and make sure we get another interrupt ASAP if that cleanup
+operation returns non-zero, meaning there is more work to do. When that
+operation returns zero, everything has been processed, we can clear the
+“cleanup-in-progress” flag, and zeroize the bitmap of sw_if_index-es
+requested to be cleaned.
+
+The interrupt node signals its wish to receive an interrupt ASAP by
+setting interrupt_is_needed flag within the per-worker structure. The
+main thread, while waiting for the cleanup operation to complete, checks
+if there is a request for interrupt, and if there is - it sends one.
+
+This approach gives us a way to mass-clean the connections which is
+reusing the code of the regular idle connection cleanup.
+
+One potential inefficiency is the bitmap values set by the session
+insertion in the data path - there is nothing to clear them.
+
+So, if one rearranges the interface placement with the workers, then the
+cleanups will cause some unnecessary work. For now, we consider it an
+acceptable limitation. It can be resolved by having another per-worker
+bitmap, which, when set, would trigger the cleanup of the bits in the
+serviced_sw_if_index_bitmap).
+
+=== the end ===
diff --git a/src/plugins/acl/acl_test.c b/src/plugins/acl/acl_test.c
index 79058cdc268..8404689dc06 100644
--- a/src/plugins/acl/acl_test.c
+++ b/src/plugins/acl/acl_test.c
@@ -18,6 +18,8 @@
*------------------------------------------------------------------
*/
+#include <byteswap.h>
+
#include <vat/vat.h>
#include <vlibapi/api.h>
#include <vlibmemory/api.h>
@@ -36,8 +38,6 @@ uword unformat_sw_if_index (unformat_input_t * input, va_list * args);
/* Declare message IDs */
#include <acl/acl.api_enum.h>
#include <acl/acl.api_types.h>
-#define vl_print(handle, ...)
-#undef vl_print
#define vl_endianfun /* define message structures */
#include <acl/acl.api.h>
#undef vl_endianfun
@@ -99,6 +99,15 @@ static void vl_api_acl_plugin_get_version_reply_t_handler
vam->result_ready = 1;
}
+ static void
+ vl_api_acl_plugin_use_hash_lookup_get_reply_t_handler (
+ vl_api_acl_plugin_use_hash_lookup_get_reply_t *mp)
+ {
+ vat_main_t *vam = acl_test_main.vat_main;
+ clib_warning ("ACL hash lookups enabled: %d", mp->enable);
+ vam->result_ready = 1;
+ }
+
static void vl_api_acl_interface_list_details_t_handler
(vl_api_acl_interface_list_details_t * mp)
{
@@ -150,9 +159,9 @@ static void vl_api_acl_plugin_get_conn_table_max_entries_reply_t_handler
(vl_api_acl_plugin_get_conn_table_max_entries_reply_t * mp)
{
vat_main_t * vam = acl_test_main.vat_main;
- clib_warning("\nConn table max entries: %d",
- __bswap_64(mp->conn_table_max_entries) );
- vam->result_ready = 1;
+ clib_warning ("\nConn table max entries: %d",
+ clib_net_to_host_u64 (mp->conn_table_max_entries));
+ vam->result_ready = 1;
}
static inline u8 *
@@ -484,10 +493,10 @@ static int api_acl_add_replace (vat_main_t * vam)
if (vec_len(tag) >= sizeof(mp->tag))
{
tag[sizeof(mp->tag)-1] = 0;
- _vec_len(tag) = sizeof(mp->tag);
- }
- clib_memcpy(mp->tag, tag, vec_len(tag));
- vec_free(tag);
+ vec_set_len (tag, sizeof (mp->tag));
+ }
+ clib_memcpy (mp->tag, tag, vec_len (tag));
+ vec_free (tag);
}
mp->acl_index = ntohl(acl_index);
mp->count = htonl(n_rules);
@@ -551,6 +560,63 @@ static int api_acl_stats_intf_counters_enable (vat_main_t * vam)
return ret;
}
+static int
+api_acl_plugin_use_hash_lookup_set (vat_main_t *vam)
+{
+ acl_test_main_t *sm = &acl_test_main;
+ unformat_input_t *i = vam->input;
+ vl_api_acl_plugin_use_hash_lookup_set_t *mp;
+ u32 msg_size = sizeof (*mp);
+ int ret;
+
+ vam->result_ready = 0;
+ mp = vl_msg_api_alloc_as_if_client (msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id =
+ ntohs (VL_API_ACL_PLUGIN_USE_HASH_LOOKUP_SET + sm->msg_id_base);
+ mp->client_index = vam->my_client_index;
+ mp->enable = 1;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "disable"))
+ mp->enable = 0;
+ else if (unformat (i, "enable"))
+ mp->enable = 1;
+ else
+ break;
+ }
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_acl_plugin_use_hash_lookup_get (vat_main_t *vam)
+{
+ acl_test_main_t *sm = &acl_test_main;
+ vl_api_acl_plugin_use_hash_lookup_set_t *mp;
+ u32 msg_size = sizeof (*mp);
+ int ret;
+
+ vam->result_ready = 0;
+ mp = vl_msg_api_alloc_as_if_client (msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id =
+ ntohs (VL_API_ACL_PLUGIN_USE_HASH_LOOKUP_GET + sm->msg_id_base);
+ mp->client_index = vam->my_client_index;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
/*
* Read the series of ACL entries from file in the following format:
@@ -1267,10 +1333,10 @@ static int api_macip_acl_add (vat_main_t * vam)
if (vec_len(tag) >= sizeof(mp->tag))
{
tag[sizeof(mp->tag)-1] = 0;
- _vec_len(tag) = sizeof(mp->tag);
- }
- clib_memcpy(mp->tag, tag, vec_len(tag));
- vec_free(tag);
+ vec_set_len (tag, sizeof (mp->tag));
+ }
+ clib_memcpy (mp->tag, tag, vec_len (tag));
+ vec_free (tag);
}
mp->count = htonl(n_rules);
@@ -1409,10 +1475,10 @@ static int api_macip_acl_add_replace (vat_main_t * vam)
if (vec_len(tag) >= sizeof(mp->tag))
{
tag[sizeof(mp->tag)-1] = 0;
- _vec_len(tag) = sizeof(mp->tag);
- }
- clib_memcpy(mp->tag, tag, vec_len(tag));
- vec_free(tag);
+ vec_set_len (tag, sizeof (mp->tag));
+ }
+ clib_memcpy (mp->tag, tag, vec_len (tag));
+ vec_free (tag);
}
mp->acl_index = ntohl(acl_index);
diff --git a/src/plugins/acl/dataplane_node.c b/src/plugins/acl/dataplane_node.c
index 1a050f54b14..027afc0f660 100644
--- a/src/plugins/acl/dataplane_node.c
+++ b/src/plugins/acl/dataplane_node.c
@@ -44,7 +44,6 @@ typedef struct
u8 action;
} acl_fa_trace_t;
-/* *INDENT-OFF* */
#define foreach_acl_fa_error \
_(ACL_DROP, "ACL deny packets") \
_(ACL_PERMIT, "ACL permit packets") \
@@ -63,7 +62,6 @@ typedef enum
ACL_FA_N_ERROR,
} acl_fa_error_t;
-/* *INDENT-ON* */
always_inline u16
get_current_policy_epoch (acl_main_t * am, int is_input, u32 sw_if_index0)
@@ -176,7 +174,7 @@ prefetch_session_entry (acl_main_t * am, fa_full_session_id_t f_sess_id)
{
fa_session_t *sess = get_session_ptr_no_check (am, f_sess_id.thread_index,
f_sess_id.session_index);
- CLIB_PREFETCH (sess, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (sess, sizeof (*sess), STORE);
}
always_inline u8
@@ -728,7 +726,6 @@ format_acl_plugin_trace (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
static char *acl_fa_error_strings[] = {
#define _(sym,string) string,
@@ -973,7 +970,6 @@ VNET_FEATURE_INIT (acl_out_ip4_fa_feature, static) = {
.runs_before = VNET_FEATURES ("ip4-dvr-reinject", "interface-output"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/acl/dataplane_node_nonip.c b/src/plugins/acl/dataplane_node_nonip.c
index 9954ea0258e..1126b57343d 100644
--- a/src/plugins/acl/dataplane_node_nonip.c
+++ b/src/plugins/acl/dataplane_node_nonip.c
@@ -45,7 +45,6 @@ typedef struct
u8 action;
} acl_fa_trace_t;
-/* *INDENT-OFF* */
#define foreach_acl_fa_error \
_(ACL_DROP, "ACL deny packets") \
_(ACL_PERMIT, "ACL permit packets") \
@@ -64,7 +63,6 @@ typedef enum
ACL_FA_N_ERROR,
} acl_fa_error_t;
-/* *INDENT-ON* */
typedef struct
{
@@ -109,7 +107,6 @@ _(DROP, "dropped outbound non-whitelisted non-ip packets") \
_(PERMIT, "permitted outbound whitelisted non-ip packets") \
-/* *INDENT-OFF* */
typedef enum
{
@@ -138,7 +135,6 @@ static char *fa_out_nonip_error_strings[] = {
foreach_nonip_out_error
#undef _
};
-/* *INDENT-ON* */
always_inline int
@@ -237,7 +233,6 @@ VLIB_NODE_FN (acl_out_nonip_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (acl_in_nonip_node) =
{
@@ -283,7 +278,6 @@ VNET_FEATURE_INIT (acl_out_l2_nonip_fa_feature, static) =
.runs_before = VNET_FEATURES ("l2-output-feat-arc-end"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/acl/exports.h b/src/plugins/acl/exports.h
index d904ad3bbae..04159aa052d 100644
--- a/src/plugins/acl/exports.h
+++ b/src/plugins/acl/exports.h
@@ -24,8 +24,8 @@
#include <vlib/unix/plugin.h>
-#include <plugins/acl/acl.h>
-#include <plugins/acl/fa_node.h>
-#include <plugins/acl/public_inlines.h>
+#include "acl.h"
+#include "fa_node.h"
+#include "public_inlines.h"
#endif /* included_acl_exports_h */
diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h
index c969377ded7..c4a971aada3 100644
--- a/src/plugins/acl/fa_node.h
+++ b/src/plugins/acl/fa_node.h
@@ -5,7 +5,7 @@
#include <vppinfra/bihash_16_8.h>
#include <vppinfra/bihash_40_8.h>
-#include <plugins/acl/exported_types.h>
+#include "exported_types.h"
// #define FA_NODE_VERBOSE_DEBUG 3
diff --git a/src/plugins/acl/hash_lookup.c b/src/plugins/acl/hash_lookup.c
index 85b54b3e8ac..9c3c662a8f1 100644
--- a/src/plugins/acl/hash_lookup.c
+++ b/src/plugins/acl/hash_lookup.c
@@ -261,13 +261,11 @@ static u32
find_mask_type_index(acl_main_t *am, fa_5tuple_t *mask)
{
ace_mask_type_entry_t *mte;
- /* *INDENT-OFF* */
pool_foreach (mte, am->ace_mask_type_pool)
{
if(memcmp(&mte->mask, mask, sizeof(*mask)) == 0)
return (mte - am->ace_mask_type_pool);
}
- /* *INDENT-ON* */
return ~0;
}
@@ -682,7 +680,7 @@ hash_acl_apply(acl_main_t *am, u32 lc_index, int acl_index, u32 acl_position)
if (vec_len(ha->rules) > 0) {
int old_vec_len = vec_len(*applied_hash_aces);
vec_validate((*applied_hash_aces), old_vec_len + vec_len(ha->rules) - 1);
- _vec_len((*applied_hash_aces)) = old_vec_len;
+ vec_set_len ((*applied_hash_aces), old_vec_len);
}
/* add the rules from the ACL to the hash table for lookup and append to the vector*/
@@ -903,7 +901,7 @@ hash_acl_unapply(acl_main_t *am, u32 lc_index, int acl_index)
move_applied_ace_hash_entry(am, lc_index, applied_hash_aces, tail_offset + i, base_offset + i);
}
/* trim the end of the vector */
- _vec_len((*applied_hash_aces)) -= vec_len(ha->rules);
+ vec_dec_len ((*applied_hash_aces), vec_len (ha->rules));
remake_hash_applied_mask_info_vec(am, applied_hash_aces, lc_index);
@@ -1159,7 +1157,6 @@ acl_plugin_show_tables_mask_type (void)
ace_mask_type_entry_t *mte;
vlib_cli_output (vm, "Mask-type entries:");
- /* *INDENT-OFF* */
pool_foreach (mte, am->ace_mask_type_pool)
{
vlib_cli_output(vm, " %3d: %016llx %016llx %016llx %016llx %016llx %016llx refcount %d",
@@ -1167,7 +1164,6 @@ acl_plugin_show_tables_mask_type (void)
mte->mask.kv_40_8.key[0], mte->mask.kv_40_8.key[1], mte->mask.kv_40_8.key[2],
mte->mask.kv_40_8.key[3], mte->mask.kv_40_8.key[4], mte->mask.kv_40_8.value, mte->refcount);
}
- /* *INDENT-ON* */
}
void
diff --git a/src/plugins/acl/public_inlines.h b/src/plugins/acl/public_inlines.h
index 6b69bcef61e..eb9f0de920f 100644
--- a/src/plugins/acl/public_inlines.h
+++ b/src/plugins/acl/public_inlines.h
@@ -19,11 +19,11 @@
#include <stdint.h>
#include <vlib/unix/plugin.h>
-#include <plugins/acl/acl.h>
-#include <plugins/acl/fa_node.h>
-#include <plugins/acl/hash_lookup_private.h>
+#include "acl.h"
+#include "fa_node.h"
+#include "hash_lookup_private.h"
-#include <plugins/acl/exported_types.h>
+#include "exported_types.h"
#define LOAD_SYMBOL_FROM_PLUGIN_TO(p, s, st) \
({ \
diff --git a/src/plugins/acl/sess_mgmt_node.c b/src/plugins/acl/sess_mgmt_node.c
index 3fc4f5e1982..418baef9b6b 100644
--- a/src/plugins/acl/sess_mgmt_node.c
+++ b/src/plugins/acl/sess_mgmt_node.c
@@ -188,8 +188,7 @@ acl_fa_check_idle_sessions (acl_main_t * am, u16 thread_index, u64 now)
}
}
if (pw->wip_session_change_requests)
- _vec_len (pw->wip_session_change_requests) = 0;
-
+ vec_set_len (pw->wip_session_change_requests, 0);
{
u8 tt = 0;
@@ -309,7 +308,7 @@ acl_fa_check_idle_sessions (acl_main_t * am, u16 thread_index, u64 now)
total_expired = vec_len (pw->expired);
/* zero out the vector which we have acted on */
if (pw->expired)
- _vec_len (pw->expired) = 0;
+ vec_set_len (pw->expired, 0);
/* if we were advancing and reached the end
* (no more sessions to recycle), reset the fast-forward timestamp */
@@ -329,7 +328,6 @@ acl_fa_check_idle_sessions (acl_main_t * am, u16 thread_index, u64 now)
*/
-/* *INDENT-OFF* */
#define foreach_acl_fa_cleaner_error \
_(UNKNOWN_EVENT, "unknown event received") \
/* end of errors */
@@ -348,7 +346,6 @@ static char *acl_fa_cleaner_error_strings[] = {
#undef _
};
-/* *INDENT-ON* */
static vlib_node_registration_t acl_fa_session_cleaner_process_node;
static vlib_node_registration_t acl_fa_worker_session_cleaner_process_node;
@@ -374,8 +371,9 @@ send_one_worker_interrupt (vlib_main_t * vm, acl_main_t * am,
}
void
-aclp_post_session_change_request (acl_main_t * am, u32 target_thread,
- u32 target_session, u32 request_type)
+aclp_post_session_change_request (acl_main_t *am, u32 target_thread,
+ u32 target_session,
+ acl_fa_sess_req_t request_type)
{
acl_fa_per_worker_data_t *pw_me =
&am->per_worker_data[os_get_thread_index ()];
@@ -724,6 +722,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
}
else
{
+ clib_bitmap_free (pw0->pending_clear_sw_if_index_bitmap);
if (clear_all)
{
/* if we need to clear all, then just clear the interfaces that we are servicing */
@@ -788,7 +787,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
send_interrupts_to_workers (vm, am);
if (event_data)
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
/*
* If the interrupts were not processed yet, ensure we wait a bit,
@@ -860,10 +859,8 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
{
acl_fa_verify_init_sessions (am);
am->fa_total_enabled_count++;
- void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
ACL_FA_CLEANER_RESCHEDULE, 0);
- clib_mem_set_heap (oldheap);
}
else
{
@@ -874,12 +871,10 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
{
ASSERT (clib_bitmap_get (am->fa_in_acl_on_sw_if_index, sw_if_index) !=
enable_disable);
- void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
vnet_feature_enable_disable ("ip4-unicast", "acl-plugin-in-ip4-fa",
sw_if_index, enable_disable, 0, 0);
vnet_feature_enable_disable ("ip6-unicast", "acl-plugin-in-ip6-fa",
sw_if_index, enable_disable, 0, 0);
- clib_mem_set_heap (oldheap);
am->fa_in_acl_on_sw_if_index =
clib_bitmap_set (am->fa_in_acl_on_sw_if_index, sw_if_index,
enable_disable);
@@ -888,12 +883,10 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
{
ASSERT (clib_bitmap_get (am->fa_out_acl_on_sw_if_index, sw_if_index) !=
enable_disable);
- void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
vnet_feature_enable_disable ("ip4-output", "acl-plugin-out-ip4-fa",
sw_if_index, enable_disable, 0, 0);
vnet_feature_enable_disable ("ip6-output", "acl-plugin-out-ip6-fa",
sw_if_index, enable_disable, 0, 0);
- clib_mem_set_heap (oldheap);
am->fa_out_acl_on_sw_if_index =
clib_bitmap_set (am->fa_out_acl_on_sw_if_index, sw_if_index,
enable_disable);
@@ -905,11 +898,9 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
clib_warning ("ENABLE-DISABLE: clean the connections on interface %d",
sw_if_index);
#endif
- void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX,
sw_if_index);
- clib_mem_set_heap (oldheap);
}
}
@@ -935,7 +926,6 @@ show_fa_sessions_hash (vlib_main_t * vm, u32 verbose)
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (acl_fa_worker_session_cleaner_process_node, static) = {
.function = acl_fa_worker_conn_cleaner_process,
@@ -955,7 +945,6 @@ VLIB_REGISTER_NODE (acl_fa_session_cleaner_process_node, static) = {
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/adl/adl.api b/src/plugins/adl/adl.api
index cbbb026a77c..b80cdc1c90f 100644
--- a/src/plugins/adl/adl.api
+++ b/src/plugins/adl/adl.api
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-option version = "0.0.1";
+option version = "1.0.1";
import "vnet/interface_types.api";
diff --git a/src/plugins/adl/adl.c b/src/plugins/adl/adl.c
index 832bfd4a982..c6ea57a18be 100644
--- a/src/plugins/adl/adl.c
+++ b/src/plugins/adl/adl.c
@@ -150,21 +150,17 @@ adl_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (adl_init) =
{
.runs_after = VLIB_INITS ("ip4_allowlist_init", "ip6_allowlist_init"),
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (adl, static) =
{
.arc_name = "device-input",
.node_name = "adl-input",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON */
int adl_interface_enable_disable (u32 sw_if_index, int enable_disable)
{
@@ -389,7 +385,6 @@ adl_allowlist_enable_disable_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (adl_allowlist_command, static) =
{
.path = "adl allowlist",
@@ -397,15 +392,12 @@ VLIB_CLI_COMMAND (adl_allowlist_command, static) =
"adl allowlist <interface-name> [ip4][ip6][default][fib-id <NN>][disable]",
.function = adl_allowlist_enable_disable_command_fn,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Allow/deny list plugin",
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/adl/adl_api.c b/src/plugins/adl/adl_api.c
index dba3b0cc17d..8bd805d9e06 100644
--- a/src/plugins/adl/adl_api.c
+++ b/src/plugins/adl/adl_api.c
@@ -30,7 +30,6 @@
#include <adl/adl.api_enum.h>
#include <adl/adl.api_types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define REPLY_MSG_ID_BASE am->msg_id_base
#include <vlibapi/api_helper_macros.h>
diff --git a/src/plugins/adl/ip4_allowlist.c b/src/plugins/adl/ip4_allowlist.c
index 316e2cb558b..4c755725ea7 100644
--- a/src/plugins/adl/ip4_allowlist.c
+++ b/src/plugins/adl/ip4_allowlist.c
@@ -82,21 +82,19 @@ VLIB_NODE_FN (ip4_adl_allowlist_node) (vlib_main_t * vm,
ip4_header_t * ip0, * ip1;
adl_config_main_t * ccm0, * ccm1;
adl_config_data_t * c0, * c1;
- ip4_fib_mtrie_t * mtrie0, * mtrie1;
- ip4_fib_mtrie_leaf_t leaf0, leaf1;
- u32 lb_index0, lb_index1;
- const load_balance_t * lb0, *lb1;
- const dpo_id_t *dpo0, *dpo1;
+ u32 lb_index0, lb_index1;
+ const load_balance_t *lb0, *lb1;
+ const dpo_id_t *dpo0, *dpo1;
- /* Prefetch next iteration. */
- {
- vlib_buffer_t * p2, * p3;
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
clib_prefetch_store (p2->data);
clib_prefetch_store (p3->data);
@@ -121,17 +119,8 @@ VLIB_NODE_FN (ip4_adl_allowlist_node) (vlib_main_t * vm,
&ccm0->config_main, &adl_buffer (b0)->adl.current_config_index,
&next0, sizeof (c0[0]));
- mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
-
- leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 2);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 3);
-
- lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lb_index0 =
+ ip4_fib_forwarding_lookup (c0->fib_index, &ip0->src_address);
ASSERT (lb_index0
== ip4_fib_table_lookup_lb (ip4_fib_get(c0->fib_index),
@@ -158,17 +147,10 @@ VLIB_NODE_FN (ip4_adl_allowlist_node) (vlib_main_t * vm,
&adl_buffer (b1)->adl.current_config_index,
&next1,
sizeof (c1[0]));
- mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie;
-
- leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
-
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
- &ip1->src_address, 2);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
- &ip1->src_address, 3);
+ lb_index1 =
+ ip4_fib_forwarding_lookup (c1->fib_index, &ip1->src_address);
- lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
ASSERT (lb_index1
== ip4_fib_table_lookup_lb (ip4_fib_get(c1->fib_index),
&ip1->src_address));
@@ -226,13 +208,11 @@ VLIB_NODE_FN (ip4_adl_allowlist_node) (vlib_main_t * vm,
ip4_header_t * ip0;
adl_config_main_t *ccm0;
adl_config_data_t *c0;
- ip4_fib_mtrie_t * mtrie0;
- ip4_fib_mtrie_leaf_t leaf0;
- u32 lb_index0;
- const load_balance_t * lb0;
- const dpo_id_t *dpo0;
+ u32 lb_index0;
+ const load_balance_t *lb0;
+ const dpo_id_t *dpo0;
- /* speculatively enqueue b0 to the current next frame */
+ /* speculatively enqueue b0 to the current next frame */
bi0 = from[0];
to_next[0] = bi0;
from += 1;
@@ -253,21 +233,12 @@ VLIB_NODE_FN (ip4_adl_allowlist_node) (vlib_main_t * vm,
&next0,
sizeof (c0[0]));
- mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
+ lb_index0 =
+ ip4_fib_forwarding_lookup (c0->fib_index, &ip0->src_address);
- leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 2);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 3);
-
- lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
-
- ASSERT (lb_index0
- == ip4_fib_table_lookup_lb (ip4_fib_get(c0->fib_index),
- &ip0->src_address));
+ ASSERT (lb_index0 ==
+ ip4_fib_table_lookup_lb (ip4_fib_get (c0->fib_index),
+ &ip0->src_address));
lb0 = load_balance_get (lb_index0);
dpo0 = load_balance_get_bucket_i(lb0, 0);
diff --git a/src/plugins/adl/setup.pg b/src/plugins/adl/setup.pg
index 7f816bc0893..7117225789a 100644
--- a/src/plugins/adl/setup.pg
+++ b/src/plugins/adl/setup.pg
@@ -6,46 +6,46 @@ set int ip6 table loop0 0
set int ip address loop0 2001:db01::1/64
set int state loop0 up
-packet-generator new {
- name ip4
- limit 100
- rate 0
- size 128-128
- interface loop0
- node adl-input
- data { IP4: 1.2.40 -> 3cfd.fed0.b6c8
- UDP: 192.168.1.2-192.168.1.10 -> 192.168.2.1
- UDP: 1234 -> 2345
- incrementing 114
- }
+packet-generator new { \
+ name ip4 \
+ limit 100 \
+ rate 0 \
+ size 128-128 \
+ interface loop0 \
+ node adl-input \
+ data { IP4: 1.2.40 -> 3cfd.fed0.b6c8 \
+ UDP: 192.168.1.2-192.168.1.10 -> 192.168.2.1 \
+ UDP: 1234 -> 2345 \
+ incrementing 114 \
+ } \
}
-packet-generator new {
- name ip6-allow
- limit 50
- rate 0
- size 128-128
- interface loop0
- node adl-input
- data { IP6: 1.2.40 -> 3cfd.fed0.b6c8
- UDP: 2001:db01::2 -> 2001:db01::1
- UDP: 1234 -> 2345
- incrementing 80
- }
+packet-generator new { \
+ name ip6-allow \
+ limit 50 \
+ rate 0 \
+ size 128-128 \
+ interface loop0 \
+ node adl-input \
+ data { IP6: 1.2.40 -> 3cfd.fed0.b6c8 \
+ UDP: 2001:db01::2 -> 2001:db01::1 \
+ UDP: 1234 -> 2345 \
+ incrementing 80 \
+ } \
}
-packet-generator new {
- name ip6-drop
- limit 50
- rate 0
- size 128-128
- interface loop0
- node adl-input
- data { IP6: 1.2.40 -> 3cfd.fed0.b6c8
- UDP: 2001:db01::3 -> 2001:db01::1
- UDP: 1234 -> 2345
- incrementing 80
- }
+packet-generator new { \
+ name ip6-drop \
+ limit 50 \
+ rate 0 \
+ size 128-128 \
+ interface loop0 \
+ node adl-input \
+ data { IP6: 1.2.40 -> 3cfd.fed0.b6c8 \
+ UDP: 2001:db01::3 -> 2001:db01::1 \
+ UDP: 1234 -> 2345 \
+ incrementing 80 \
+ } \
}
ip table 1
diff --git a/src/plugins/gbp/CMakeLists.txt b/src/plugins/af_packet/CMakeLists.txt
index 95f664ff08e..818a03107d7 100644
--- a/src/plugins/gbp/CMakeLists.txt
+++ b/src/plugins/af_packet/CMakeLists.txt
@@ -1,4 +1,5 @@
-# Copyright (c) 2018 Cisco and/or its affiliates.
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (c) 2022 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -11,44 +12,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-add_vpp_plugin(gbp
+add_vpp_plugin(af_packet
SOURCES
- gbp_api.c
- gbp_bridge_domain.c
- gbp_classify.c
- gbp_classify_node.c
- gbp_contract.c
- gbp_endpoint.c
- gbp_endpoint_group.c
- gbp_ext_itf.c
- gbp_fwd.c
- gbp_fwd_dpo.c
- gbp_fwd_node.c
- gbp_itf.c
- gbp_learn.c
- gbp_learn_node.c
- gbp_policy.c
- gbp_policy_dpo.c
- gbp_policy_node.c
- gbp_recirc.c
- gbp_route_domain.c
- gbp_scanner.c
- gbp_subnet.c
- gbp_vxlan.c
- gbp_vxlan_node.c
+ plugin.c
+ af_packet.c
+ device.c
+ node.c
+ cli.c
+ af_packet_api.c
MULTIARCH_SOURCES
- gbp_classify_node.c
- gbp_fwd_dpo.c
- gbp_fwd_node.c
- gbp_learn_node.c
- gbp_policy_dpo.c
- gbp_policy_node.c
- gbp_vxlan_node.c
+ node.c
+ device.c
+
+ INSTALL_HEADERS
+ af_packet.h
API_FILES
- gbp.api
+ af_packet.api
- INSTALL_HEADERS
- gbp.h
+ # API_TEST_SOURCES
+ #af_packet_test_api.c
+
+ SUPPORTED_OS_LIST Linux
)
diff --git a/src/plugins/af_packet/FEATURE.yaml b/src/plugins/af_packet/FEATURE.yaml
new file mode 100644
index 00000000000..4a11ea2beb5
--- /dev/null
+++ b/src/plugins/af_packet/FEATURE.yaml
@@ -0,0 +1,16 @@
+---
+name: host-interface Device AF_PACKET
+maintainer: Damjan Marion <damarion@cisco.com>
+features:
+ - L4 checksum offload
+ - GSO offload
+description: "Create a host interface that will attach to a linux AF_PACKET
+ interface, one side of a veth pair. The veth pair must
+ already exist. Once created, a new host interface will
+ exist in VPP with the name 'host-<ifname>', where '<ifname>'
+ is the name of the specified veth pair. Use the 'show interface'
+ command to display host interface details."
+missing:
+ - API dump details beyond sw_if_index and name
+state: production
+properties: [API, CLI, STATS, MULTITHREAD]
diff --git a/src/plugins/af_packet/af_packet.api b/src/plugins/af_packet/af_packet.api
new file mode 100644
index 00000000000..abc7d1a7206
--- /dev/null
+++ b/src/plugins/af_packet/af_packet.api
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "2.0.0";
+
+import "vnet/interface_types.api";
+import "vnet/ethernet/ethernet_types.api";
+
+/** \brief Create host-interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param hw_addr - interface MAC
+ @param use_random_hw_addr - use random generated MAC
+ @param host_if_name - interface name
+*/
+define af_packet_create
+{
+ option deprecated;
+
+ u32 client_index;
+ u32 context;
+
+ vl_api_mac_address_t hw_addr;
+ bool use_random_hw_addr;
+ string host_if_name[64];
+};
+
+/** \brief Create host-interface response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define af_packet_create_reply
+{
+ option deprecated;
+
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief Create host-interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param hw_addr - interface MAC
+ @param use_random_hw_addr - use random generated MAC
+ @param host_if_name - interface name
+ @param rx_frame_size - frame size for RX
+ @param tx_frame_size - frame size for TX
+ @param rx_frames_per_block - frames per block for RX
+ @param tx_frames_per_block - frames per block for TX
+ @param flags - flags for the af_packet interface creation
+ @param num_rx_queues - number of rx queues
+*/
+define af_packet_create_v2
+{
+ option deprecated;
+
+ u32 client_index;
+ u32 context;
+
+ vl_api_mac_address_t hw_addr;
+ bool use_random_hw_addr;
+ string host_if_name[64];
+ u32 rx_frame_size;
+ u32 tx_frame_size;
+ u32 rx_frames_per_block;
+ u32 tx_frames_per_block;
+ u32 flags;
+ u16 num_rx_queues [default=1];
+};
+
+/** \brief Create host-interface response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define af_packet_create_v2_reply
+{
+ option deprecated;
+
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+enum af_packet_mode {
+ AF_PACKET_API_MODE_ETHERNET = 1, /* mode ethernet */
+ AF_PACKET_API_MODE_IP = 2, /* mode ip */
+};
+
+enum af_packet_flags {
+ AF_PACKET_API_FLAG_QDISC_BYPASS = 1, /* enable the qdisc bypass */
+ AF_PACKET_API_FLAG_CKSUM_GSO = 2, /* enable checksum/gso */
+ AF_PACKET_API_FLAG_VERSION_2 = 8 [backwards_compatible], /* af packet v2, default is v3 */
+};
+
+/** \brief Create host-interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mode - 1 - Ethernet, 2 - IP
+ @param hw_addr - interface MAC
+ @param use_random_hw_addr - use random generated MAC
+ @param host_if_name - interface name
+ @param rx_frame_size - frame size for RX
+ @param tx_frame_size - frame size for TX
+ @param rx_frames_per_block - frames per block for RX
+ @param tx_frames_per_block - frames per block for TX
+ @param flags - flags for the af_packet interface creation
+ @param num_rx_queues - number of rx queues
+ @param num_tx_queues - number of tx queues
+*/
+define af_packet_create_v3
+{
+ u32 client_index;
+ u32 context;
+
+ vl_api_af_packet_mode_t mode;
+ vl_api_mac_address_t hw_addr;
+ bool use_random_hw_addr;
+ string host_if_name[64];
+ u32 rx_frame_size;
+ u32 tx_frame_size;
+ u32 rx_frames_per_block;
+ u32 tx_frames_per_block;
+ vl_api_af_packet_flags_t flags;
+ u16 num_rx_queues [default=1];
+ u16 num_tx_queues [default=1];
+};
+
+/** \brief Create host-interface response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define af_packet_create_v3_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief Delete host-interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param host_if_name - interface name
+*/
+autoreply define af_packet_delete
+{
+ u32 client_index;
+ u32 context;
+
+ string host_if_name[64];
+};
+
+/** \brief Set l4 offload checksum calculation
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+autoreply define af_packet_set_l4_cksum_offload
+{
+ u32 client_index;
+ u32 context;
+
+ vl_api_interface_index_t sw_if_index;
+ bool set;
+};
+
+/** \brief Dump af_packet interfaces request */
+define af_packet_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for af_packet dump request
+ @param sw_if_index - software index of af_packet interface
+ @param host_if_name - interface name
+*/
+define af_packet_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ string host_if_name[64];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/af_packet/af_packet.c b/src/plugins/af_packet/af_packet.c
new file mode 100644
index 00000000000..69245429918
--- /dev/null
+++ b/src/plugins/af_packet/af_packet.c
@@ -0,0 +1,1054 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet.c - linux kernel packet interface
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+#include <vppinfra/linux/sysfs.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ip/ip.h>
+#include <vnet/devices/netlink.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
+
+#include <af_packet/af_packet.h>
+
+af_packet_main_t af_packet_main;
+
+VNET_HW_INTERFACE_CLASS (af_packet_ip_device_hw_interface_class, static) = {
+ .name = "af-packet-ip-device",
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+
+#define AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK 1024
+#define AF_PACKET_DEFAULT_TX_FRAME_SIZE (2048 * 33) // GSO packet of 64KB
+#define AF_PACKET_TX_BLOCK_NR 1
+
+#define AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK_V2 1024
+#define AF_PACKET_DEFAULT_RX_FRAME_SIZE_V2 (2048 * 33) // GSO packet of 64KB
+#define AF_PACKET_RX_BLOCK_NR_V2 1
+
+#define AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK 32
+#define AF_PACKET_DEFAULT_RX_FRAME_SIZE 2048
+#define AF_PACKET_RX_BLOCK_NR 160
+
+/*defined in net/if.h but clashes with dpdk headers */
+unsigned int if_nametoindex (const char *ifname);
+
+#define AF_PACKET_OFFLOAD_FLAG_RXCKSUM (1 << 0)
+#define AF_PACKET_OFFLOAD_FLAG_TXCKSUM (1 << 1)
+#define AF_PACKET_OFFLOAD_FLAG_SG (1 << 2)
+#define AF_PACKET_OFFLOAD_FLAG_TSO (1 << 3)
+#define AF_PACKET_OFFLOAD_FLAG_UFO (1 << 4)
+#define AF_PACKET_OFFLOAD_FLAG_GSO (1 << 5)
+#define AF_PACKET_OFFLOAD_FLAG_GRO (1 << 6)
+
+#define AF_PACKET_OFFLOAD_FLAG_MASK \
+ (AF_PACKET_OFFLOAD_FLAG_RXCKSUM | AF_PACKET_OFFLOAD_FLAG_TXCKSUM | \
+ AF_PACKET_OFFLOAD_FLAG_SG | AF_PACKET_OFFLOAD_FLAG_TSO | \
+ AF_PACKET_OFFLOAD_FLAG_UFO | AF_PACKET_OFFLOAD_FLAG_GSO | \
+ AF_PACKET_OFFLOAD_FLAG_GRO)
+
+#define AF_PACKET_IOCTL(fd, a, ...) \
+ if (ioctl (fd, a, __VA_ARGS__) < 0) \
+ { \
+ err = clib_error_return_unix (0, "ioctl(" #a ")"); \
+ vlib_log_err (af_packet_main.log_class, "%U", format_clib_error, err); \
+ goto done; \
+ }
+
+static u32
+af_packet_get_if_capabilities (u8 *host_if_name)
+{
+ struct ifreq ifr;
+ struct ethtool_value e; // { __u32 cmd; __u32 data; };
+ clib_error_t *err = 0;
+ int ctl_fd = -1;
+ u32 oflags = 0;
+
+ if ((ctl_fd = socket (AF_INET, SOCK_STREAM, 0)) == -1)
+ {
+ clib_warning ("Cannot open control socket");
+ goto done;
+ }
+
+ clib_memset (&ifr, 0, sizeof (ifr));
+ clib_memcpy (ifr.ifr_name, host_if_name,
+ strlen ((const char *) host_if_name));
+ ifr.ifr_data = (void *) &e;
+
+ e.cmd = ETHTOOL_GRXCSUM;
+ AF_PACKET_IOCTL (ctl_fd, SIOCETHTOOL, &ifr);
+ if (e.data)
+ oflags |= AF_PACKET_OFFLOAD_FLAG_RXCKSUM;
+
+ e.cmd = ETHTOOL_GTXCSUM;
+ AF_PACKET_IOCTL (ctl_fd, SIOCETHTOOL, &ifr);
+ if (e.data)
+ oflags |= AF_PACKET_OFFLOAD_FLAG_TXCKSUM;
+
+ e.cmd = ETHTOOL_GTSO;
+ AF_PACKET_IOCTL (ctl_fd, SIOCETHTOOL, &ifr);
+ if (e.data)
+ oflags |= AF_PACKET_OFFLOAD_FLAG_TSO;
+
+ e.cmd = ETHTOOL_GGSO;
+ AF_PACKET_IOCTL (ctl_fd, SIOCETHTOOL, &ifr);
+ if (e.data)
+ oflags |= AF_PACKET_OFFLOAD_FLAG_GSO;
+
+ e.cmd = ETHTOOL_GGRO;
+ AF_PACKET_IOCTL (ctl_fd, SIOCETHTOOL, &ifr);
+ if (e.data)
+ oflags |= AF_PACKET_OFFLOAD_FLAG_GRO;
+
+done:
+ if (ctl_fd != -1)
+ close (ctl_fd);
+
+ return oflags;
+}
+
+static clib_error_t *
+af_packet_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+ u32 frame_size)
+{
+ clib_error_t *error, *rv;
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, hi->dev_instance);
+
+ error = vnet_netlink_set_link_mtu (apif->host_if_index,
+ frame_size + hi->frame_overhead);
+
+ if (error)
+ {
+ vlib_log_err (apm->log_class, "netlink failed to change MTU: %U",
+ format_clib_error, error);
+ rv = vnet_error (VNET_ERR_SYSCALL_ERROR_1, "netlink error: %U",
+ format_clib_error, error);
+ clib_error_free (error);
+ return rv;
+ }
+ else
+ apif->host_mtu = frame_size + hi->frame_overhead;
+ return 0;
+}
+
+static int
+af_packet_read_mtu (af_packet_if_t *apif)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ clib_error_t *error;
+ error = vnet_netlink_get_link_mtu (apif->host_if_index, &apif->host_mtu);
+ if (error)
+ {
+ vlib_log_err (apm->log_class, "netlink failed to get MTU: %U",
+ format_clib_error, error);
+ clib_error_free (error);
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+ return 0;
+}
+
+static clib_error_t *
+af_packet_fd_read_ready (clib_file_t * uf)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ /* Schedule the rx node */
+ vnet_hw_if_rx_queue_set_int_pending (vnm, uf->private_data);
+ return 0;
+}
+
+static clib_error_t *
+af_packet_fd_error (clib_file_t *uf)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ clib_error_t *err = 0;
+ u64 u64;
+
+ int ret = read (uf->file_descriptor, (char *) &u64, sizeof (u64));
+
+ if (ret < 0)
+ {
+ err = clib_error_return_unix (0, "");
+ vlib_log_notice (apm->log_class, "fd %u %U", uf->file_descriptor,
+ format_clib_error, err);
+ clib_error_free (err);
+ }
+
+ return 0;
+}
+
+static int
+is_bridge (const u8 * host_if_name)
+{
+ u8 *s;
+ DIR *dir = NULL;
+
+ s = format (0, "/sys/class/net/%s/bridge%c", host_if_name, 0);
+ dir = opendir ((char *) s);
+ vec_free (s);
+
+ if (dir)
+ {
+ closedir (dir);
+ return 0;
+ }
+
+ return -1;
+}
+
+static void
+af_packet_set_rx_queues (vlib_main_t *vm, af_packet_if_t *apif)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ af_packet_queue_t *rx_queue;
+
+ vnet_hw_if_set_input_node (vnm, apif->hw_if_index,
+ af_packet_input_node.index);
+
+ vec_foreach (rx_queue, apif->rx_queues)
+ {
+ rx_queue->queue_index = vnet_hw_if_register_rx_queue (
+ vnm, apif->hw_if_index, rx_queue->queue_id, VNET_HW_IF_RXQ_THREAD_ANY);
+
+ {
+ clib_file_t template = { 0 };
+ template.read_function = af_packet_fd_read_ready;
+ template.error_function = af_packet_fd_error;
+ template.file_descriptor = rx_queue->fd;
+ template.private_data = rx_queue->queue_index;
+ template.description =
+ format (0, "%U queue %u", format_af_packet_device_name,
+ apif->dev_instance, rx_queue->queue_id);
+ rx_queue->clib_file_index = clib_file_add (&file_main, &template);
+ }
+ vnet_hw_if_set_rx_queue_file_index (vnm, rx_queue->queue_index,
+ rx_queue->clib_file_index);
+ vnet_hw_if_set_rx_queue_mode (vnm, rx_queue->queue_index,
+ VNET_HW_IF_RX_MODE_INTERRUPT);
+ rx_queue->mode = VNET_HW_IF_RX_MODE_INTERRUPT;
+ }
+ vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index);
+}
+
+static void
+af_packet_set_tx_queues (vlib_main_t *vm, af_packet_if_t *apif)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_queue_t *tx_queue;
+
+ vec_foreach (tx_queue, apif->tx_queues)
+ {
+ tx_queue->queue_index = vnet_hw_if_register_tx_queue (
+ vnm, apif->hw_if_index, tx_queue->queue_id);
+ }
+
+ if (apif->num_txqs == 0)
+ {
+ vlib_log_err (apm->log_class, "Interface %U has 0 txq",
+ format_vnet_hw_if_index_name, vnm, apif->hw_if_index);
+ return;
+ }
+
+ for (u32 j = 0; j < vlib_get_n_threads (); j++)
+ {
+ u32 qi = apif->tx_queues[j % apif->num_txqs].queue_index;
+ vnet_hw_if_tx_queue_assign_thread (vnm, qi, j);
+ }
+
+ vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index);
+}
+
+static int
+create_packet_sock (int host_if_index, tpacket_req_u_t *rx_req,
+ tpacket_req_u_t *tx_req, int *fd, af_packet_ring_t *ring,
+ u32 fanout_id, af_packet_if_flags_t *flags, int ver)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ struct sockaddr_ll sll;
+ socklen_t req_sz = sizeof (tpacket_req3_t);
+ int ret;
+ u32 ring_sz = 0;
+
+ if ((*fd = socket (AF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0)
+ {
+ vlib_log_err (apm->log_class,
+ "Failed to create AF_PACKET socket: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ /* bind before rx ring is cfged so we don't receive packets from other interfaces */
+ clib_memset (&sll, 0, sizeof (sll));
+ sll.sll_family = PF_PACKET;
+ sll.sll_protocol = htons (ETH_P_ALL);
+ sll.sll_ifindex = host_if_index;
+ if (bind (*fd, (struct sockaddr *) &sll, sizeof (sll)) < 0)
+ {
+ vlib_log_err (apm->log_class,
+ "Failed to bind rx packet socket: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ if (setsockopt (*fd, SOL_PACKET, PACKET_VERSION, &ver, sizeof (ver)) < 0)
+ {
+ vlib_log_err (apm->log_class,
+ "Failed to set rx packet interface version: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ int opt = 1;
+ if (setsockopt (*fd, SOL_PACKET, PACKET_LOSS, &opt, sizeof (opt)) < 0)
+ {
+ vlib_log_err (
+ apm->log_class,
+ "Failed to set packet tx ring error handling option: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ if (*flags & AF_PACKET_IF_FLAGS_CKSUM_GSO)
+ {
+
+ int opt2 = 1;
+ if (setsockopt (*fd, SOL_PACKET, PACKET_VNET_HDR, &opt2, sizeof (opt2)) <
+ 0)
+ {
+ // remove the flag
+ *flags &= ~AF_PACKET_IF_FLAGS_CKSUM_GSO;
+ vlib_log_debug (apm->log_class,
+ "Failed to set packet vnet hdr error handling "
+ "option: %s (errno %d)",
+ strerror (errno), errno);
+ }
+ }
+
+#if defined(PACKET_QDISC_BYPASS)
+ if (*flags & AF_PACKET_IF_FLAGS_QDISC_BYPASS)
+ /* Introduced with Linux 3.14 so the ifdef should eventually be removed */
+ if (setsockopt (*fd, SOL_PACKET, PACKET_QDISC_BYPASS, &opt, sizeof (opt)) <
+ 0)
+ {
+ // remove the flag
+ *flags &= ~AF_PACKET_IF_FLAGS_QDISC_BYPASS;
+ vlib_log_debug (apm->log_class,
+ "Failed to set qdisc bypass error "
+ "handling option: %s (errno %d)",
+ strerror (errno), errno);
+ }
+#endif
+
+ if (rx_req)
+ {
+ if (*flags & AF_PACKET_IF_FLAGS_FANOUT)
+ {
+ int fanout = ((fanout_id & 0xffff) | ((PACKET_FANOUT_HASH) << 16));
+ if (setsockopt (*fd, SOL_PACKET, PACKET_FANOUT, &fanout,
+ sizeof (fanout)) < 0)
+ {
+ // remove the flag
+ *flags &= ~AF_PACKET_IF_FLAGS_FANOUT;
+ vlib_log_err (apm->log_class,
+ "Failed to set fanout options: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+ }
+ if (ver == TPACKET_V2)
+ {
+ req_sz = sizeof (tpacket_req_t);
+ ring_sz += rx_req->req.tp_block_size * rx_req->req.tp_block_nr;
+ }
+ else
+ ring_sz += rx_req->req3.tp_block_size * rx_req->req3.tp_block_nr;
+ if (setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz) < 0)
+ {
+ vlib_log_err (apm->log_class,
+ "Failed to set packet rx ring options: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+ }
+
+ if (tx_req)
+ {
+ if (ver == TPACKET_V2)
+ {
+ req_sz = sizeof (tpacket_req_t);
+ ring_sz += tx_req->req.tp_block_size * tx_req->req.tp_block_nr;
+ }
+ else
+ ring_sz += tx_req->req3.tp_block_size * tx_req->req3.tp_block_nr;
+ if (setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz) < 0)
+ {
+ vlib_log_err (apm->log_class,
+ "Failed to set packet tx ring options: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+ }
+ ring->ring_start_addr = mmap (NULL, ring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_LOCKED, *fd, 0);
+ if (ring->ring_start_addr == MAP_FAILED)
+ {
+ vlib_log_err (apm->log_class, "mmap failure: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ ring->ring_size = ring_sz;
+
+ return 0;
+error:
+ if (*fd >= 0)
+ {
+ close (*fd);
+ *fd = -1;
+ }
+ return ret;
+}
+
+int
+af_packet_queue_init (vlib_main_t *vm, af_packet_if_t *apif,
+ af_packet_create_if_arg_t *arg,
+ af_packet_queue_t *rx_queue, af_packet_queue_t *tx_queue,
+ u8 queue_id)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ tpacket_req_u_t *rx_req = 0;
+ tpacket_req_u_t *tx_req = 0;
+ int ret, fd = -1;
+ af_packet_ring_t ring = { 0 };
+ u8 *ring_addr = 0;
+ u32 rx_frames_per_block, tx_frames_per_block;
+ u32 rx_frame_size, tx_frame_size;
+ u32 i = 0;
+
+ if (rx_queue)
+ {
+ rx_frames_per_block = arg->rx_frames_per_block ?
+ arg->rx_frames_per_block :
+ ((apif->version == TPACKET_V3) ?
+ AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK :
+ AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK_V2);
+
+ rx_frame_size =
+ arg->rx_frame_size ?
+ arg->rx_frame_size :
+ ((apif->version == TPACKET_V3) ? AF_PACKET_DEFAULT_RX_FRAME_SIZE :
+ AF_PACKET_DEFAULT_RX_FRAME_SIZE_V2);
+ vec_validate (rx_queue->rx_req, 0);
+ rx_queue->rx_req->req.tp_block_size =
+ rx_frame_size * rx_frames_per_block;
+ rx_queue->rx_req->req.tp_frame_size = rx_frame_size;
+ rx_queue->rx_req->req.tp_block_nr = (apif->version == TPACKET_V3) ?
+ AF_PACKET_RX_BLOCK_NR :
+ AF_PACKET_RX_BLOCK_NR_V2;
+ rx_queue->rx_req->req.tp_frame_nr =
+ rx_queue->rx_req->req.tp_block_nr * rx_frames_per_block;
+ if (apif->version == TPACKET_V3)
+ {
+ rx_queue->rx_req->req3.tp_retire_blk_tov = 1; // 1 ms block timout
+ rx_queue->rx_req->req3.tp_feature_req_word = 0;
+ rx_queue->rx_req->req3.tp_sizeof_priv = 0;
+ }
+ rx_req = rx_queue->rx_req;
+ }
+ if (tx_queue)
+ {
+ tx_frames_per_block = arg->tx_frames_per_block ?
+ arg->tx_frames_per_block :
+ AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK;
+ tx_frame_size = arg->tx_frame_size ? arg->tx_frame_size :
+ AF_PACKET_DEFAULT_TX_FRAME_SIZE;
+
+ vec_validate (tx_queue->tx_req, 0);
+ tx_queue->tx_req->req.tp_block_size =
+ tx_frame_size * tx_frames_per_block;
+ tx_queue->tx_req->req.tp_frame_size = tx_frame_size;
+ tx_queue->tx_req->req.tp_block_nr = AF_PACKET_TX_BLOCK_NR;
+ tx_queue->tx_req->req.tp_frame_nr =
+ AF_PACKET_TX_BLOCK_NR * tx_frames_per_block;
+ if (apif->version == TPACKET_V3)
+ {
+ tx_queue->tx_req->req3.tp_retire_blk_tov = 0;
+ tx_queue->tx_req->req3.tp_sizeof_priv = 0;
+ tx_queue->tx_req->req3.tp_feature_req_word = 0;
+ }
+ tx_req = tx_queue->tx_req;
+ }
+
+ if (rx_queue || tx_queue)
+ {
+ ret =
+ create_packet_sock (apif->host_if_index, rx_req, tx_req, &fd, &ring,
+ apif->dev_instance, &arg->flags, apif->version);
+
+ if (ret != 0)
+ goto error;
+
+ vec_add1 (apif->fds, fd);
+ vec_add1 (apif->rings, ring);
+ ring_addr = ring.ring_start_addr;
+ }
+
+ if (rx_queue)
+ {
+ rx_queue->fd = fd;
+ vec_validate (rx_queue->rx_ring, rx_queue->rx_req->req.tp_block_nr - 1);
+ vec_foreach_index (i, rx_queue->rx_ring)
+ {
+ rx_queue->rx_ring[i] =
+ ring_addr + i * rx_queue->rx_req->req.tp_block_size;
+ }
+
+ rx_queue->next_rx_block = 0;
+ rx_queue->queue_id = queue_id;
+ rx_queue->is_rx_pending = 0;
+ ring_addr = ring_addr + rx_queue->rx_req->req.tp_block_size *
+ rx_queue->rx_req->req.tp_block_nr;
+ }
+
+ if (tx_queue)
+ {
+ tx_queue->fd = fd;
+ vec_validate (tx_queue->tx_ring, tx_queue->tx_req->req.tp_block_nr - 1);
+ vec_foreach_index (i, tx_queue->tx_ring)
+ {
+ tx_queue->tx_ring[i] =
+ ring_addr + i * tx_queue->tx_req->req.tp_block_size;
+ }
+
+ tx_queue->next_tx_frame = 0;
+ tx_queue->queue_id = queue_id;
+ tx_queue->is_tx_pending = 0;
+ clib_spinlock_init (&tx_queue->lockp);
+ }
+
+ return 0;
+error:
+ vlib_log_err (apm->log_class, "Failed to set queue %u error", queue_id);
+ if (rx_queue)
+ vec_free (rx_queue->rx_req);
+ if (tx_queue)
+ vec_free (tx_queue->tx_req);
+ return ret;
+}
+
+int
+af_packet_device_init (vlib_main_t *vm, af_packet_if_t *apif,
+ af_packet_create_if_arg_t *args)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_queue_t *rx_queue = 0;
+ af_packet_queue_t *tx_queue = 0;
+ u16 nq = clib_min (args->num_rxqs, args->num_txqs);
+ u16 i = 0;
+ int ret = 0;
+
+ // enable fanout feature for multi-rxqs
+ if (args->num_rxqs > 1)
+ args->flags |= AF_PACKET_IF_FLAGS_FANOUT;
+
+ vec_validate (apif->rx_queues, args->num_rxqs - 1);
+ vec_validate (apif->tx_queues, args->num_txqs - 1);
+
+ for (; i < nq; i++)
+ {
+ rx_queue = vec_elt_at_index (apif->rx_queues, i);
+ tx_queue = vec_elt_at_index (apif->tx_queues, i);
+ ret = af_packet_queue_init (vm, apif, args, rx_queue, tx_queue, i);
+ if (ret != 0)
+ goto error;
+ }
+
+ if (args->num_rxqs > args->num_txqs)
+ {
+ for (; i < args->num_rxqs; i++)
+ {
+ rx_queue = vec_elt_at_index (apif->rx_queues, i);
+ ret = af_packet_queue_init (vm, apif, args, rx_queue, 0, i);
+ if (ret != 0)
+ goto error;
+ }
+ }
+ else if (args->num_txqs > args->num_rxqs)
+ {
+ for (; i < args->num_txqs; i++)
+ {
+ tx_queue = vec_elt_at_index (apif->tx_queues, i);
+ ret = af_packet_queue_init (vm, apif, args, 0, tx_queue, i);
+ if (ret != 0)
+ goto error;
+ }
+ }
+
+ apif->num_rxqs = args->num_rxqs;
+ apif->num_txqs = args->num_txqs;
+
+ return 0;
+error:
+ vlib_log_err (apm->log_class, "Failed to init device error");
+ return ret;
+}
+
+int
+af_packet_create_if (af_packet_create_if_arg_t *arg)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vlib_main_t *vm = vlib_get_main ();
+ int fd2 = -1;
+ struct ifreq ifr;
+ af_packet_if_t *apif = 0;
+ u8 hw_addr[6];
+ vnet_sw_interface_t *sw;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_if_caps_t caps = VNET_HW_IF_CAP_INT_MODE;
+ uword *p;
+ uword if_index;
+ u8 *host_if_name_dup = 0;
+ int host_if_index = -1;
+ int ret = 0;
+ u32 oflags = 0, i = 0;
+
+ p = mhash_get (&apm->if_index_by_host_if_name, arg->host_if_name);
+ if (p)
+ {
+ apif = vec_elt_at_index (apm->interfaces, p[0]);
+ arg->sw_if_index = apif->sw_if_index;
+ return VNET_API_ERROR_IF_ALREADY_EXISTS;
+ }
+
+ host_if_name_dup = vec_dup (arg->host_if_name);
+
+ /*
+ * make sure host side of interface is 'UP' before binding AF_PACKET
+ * socket on it.
+ */
+ if ((fd2 = socket (AF_UNIX, SOCK_DGRAM, 0)) < 0)
+ {
+ vlib_log_debug (apm->log_class,
+ "Failed to create AF_UNIX socket: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ clib_memcpy (ifr.ifr_name, (const char *) arg->host_if_name,
+ vec_len (arg->host_if_name));
+ if (ioctl (fd2, SIOCGIFINDEX, &ifr) < 0)
+ {
+ vlib_log_debug (
+ apm->log_class,
+ "Failed to retrieve the interface (%s) index: %s (errno %d)",
+ arg->host_if_name, strerror (errno), errno);
+ ret = VNET_API_ERROR_INVALID_INTERFACE;
+ goto error;
+ }
+
+ host_if_index = ifr.ifr_ifindex;
+ if (ioctl (fd2, SIOCGIFFLAGS, &ifr) < 0)
+ {
+ vlib_log_debug (apm->log_class,
+ "Failed to get the active flag: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ if (!(ifr.ifr_flags & IFF_UP))
+ {
+ ifr.ifr_flags |= IFF_UP;
+ if (ioctl (fd2, SIOCSIFFLAGS, &ifr) < 0)
+ {
+ vlib_log_debug (apm->log_class,
+ "Failed to set the active flag: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+ }
+
+ if (fd2 > -1)
+ {
+ close (fd2);
+ fd2 = -1;
+ }
+
+ // check the host interface capabilities
+ oflags = af_packet_get_if_capabilities (arg->host_if_name);
+
+ ret = is_bridge (arg->host_if_name);
+ if (ret == 0) /* is a bridge, ignore state */
+ host_if_index = -1;
+
+ /* So far everything looks good, let's create interface */
+ pool_get (apm->interfaces, apif);
+ if_index = apif - apm->interfaces;
+
+ apif->dev_instance = if_index;
+ apif->host_if_index = host_if_index;
+ apif->host_if_name = host_if_name_dup;
+ apif->per_interface_next_index = ~0;
+ apif->mode = arg->mode;
+ apif->host_interface_oflags = oflags;
+
+ if (arg->is_v2)
+ apif->version = TPACKET_V2;
+ else
+ apif->version = TPACKET_V3;
+
+ ret = af_packet_device_init (vm, apif, arg);
+ if (ret != 0)
+ goto error;
+
+ ret = af_packet_read_mtu (apif);
+ if (ret != 0)
+ goto error;
+
+
+ if (apif->mode != AF_PACKET_IF_MODE_IP)
+ {
+ vnet_eth_interface_registration_t eir = {};
+ /*use configured or generate random MAC address */
+ if (arg->hw_addr)
+ clib_memcpy (hw_addr, arg->hw_addr, 6);
+ else
+ {
+ f64 now = vlib_time_now (vm);
+ u32 rnd;
+ rnd = (u32) (now * 1e6);
+ rnd = random_u32 (&rnd);
+
+ clib_memcpy (hw_addr + 2, &rnd, sizeof (rnd));
+ hw_addr[0] = 2;
+ hw_addr[1] = 0xfe;
+ }
+
+ eir.dev_class_index = af_packet_device_class.index;
+ eir.dev_instance = apif->dev_instance;
+ eir.address = hw_addr;
+ eir.cb.set_max_frame_size = af_packet_eth_set_max_frame_size;
+ apif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
+ }
+ else
+ {
+ apif->hw_if_index = vnet_register_interface (
+ vnm, af_packet_device_class.index, apif->dev_instance,
+ af_packet_ip_device_hw_interface_class.index, apif->dev_instance);
+ }
+
+ sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index);
+ apif->sw_if_index = sw->sw_if_index;
+
+ af_packet_set_rx_queues (vm, apif);
+ af_packet_set_tx_queues (vm, apif);
+
+ if (arg->flags & AF_PACKET_IF_FLAGS_FANOUT)
+ apif->is_fanout_enabled = 1;
+
+ apif->is_qdisc_bypass_enabled =
+ (arg->flags & AF_PACKET_IF_FLAGS_QDISC_BYPASS);
+
+ if (arg->flags & AF_PACKET_IF_FLAGS_CKSUM_GSO)
+ {
+ if (apif->host_interface_oflags & AF_PACKET_OFFLOAD_FLAG_TXCKSUM)
+ {
+ apif->is_cksum_gso_enabled = 1;
+ caps |= VNET_HW_IF_CAP_TX_IP4_CKSUM | VNET_HW_IF_CAP_TX_TCP_CKSUM |
+ VNET_HW_IF_CAP_TX_UDP_CKSUM;
+ }
+
+ if (apif->host_interface_oflags & AF_PACKET_OFFLOAD_FLAG_GSO)
+ {
+ apif->is_cksum_gso_enabled = 1;
+ caps |= VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_TX_IP4_CKSUM |
+ VNET_HW_IF_CAP_TX_TCP_CKSUM | VNET_HW_IF_CAP_TX_UDP_CKSUM;
+ }
+ }
+ vnet_hw_if_set_caps (vnm, apif->hw_if_index, caps);
+ vnet_hw_interface_set_flags (vnm, apif->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+
+ mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index,
+ 0);
+ arg->sw_if_index = apif->sw_if_index;
+
+ return 0;
+
+error:
+ if (fd2 > -1)
+ {
+ close (fd2);
+ fd2 = -1;
+ }
+
+ vec_free (host_if_name_dup);
+
+ if (apif)
+ {
+ vec_foreach_index (i, apif->fds)
+ if (apif->fds[i] != -1)
+ close (apif->fds[i]);
+ vec_free (apif->fds);
+ memset (apif, 0, sizeof (*apif));
+ pool_put (apm->interfaces, apif);
+ }
+ return ret;
+}
+
+static int
+af_packet_rx_queue_free (af_packet_if_t *apif, af_packet_queue_t *rx_queue)
+{
+ clib_file_del_by_index (&file_main, rx_queue->clib_file_index);
+ rx_queue->fd = -1;
+ rx_queue->rx_ring = NULL;
+ vec_free (rx_queue->rx_req);
+ rx_queue->rx_req = NULL;
+ return 0;
+}
+
+static int
+af_packet_tx_queue_free (af_packet_if_t *apif, af_packet_queue_t *tx_queue)
+{
+ tx_queue->fd = -1;
+ clib_spinlock_free (&tx_queue->lockp);
+ tx_queue->tx_ring = NULL;
+ vec_free (tx_queue->tx_req);
+ tx_queue->tx_req = NULL;
+ return 0;
+}
+
+static int
+af_packet_ring_free (af_packet_if_t *apif, af_packet_ring_t *ring)
+{
+ af_packet_main_t *apm = &af_packet_main;
+
+ if (ring)
+ {
+ // FIXME: unmap the memory
+ if (munmap (ring->ring_start_addr, ring->ring_size))
+ vlib_log_warn (apm->log_class,
+ "Host interface %s could not free ring %p of size %u",
+ apif->host_if_name, ring->ring_start_addr,
+ ring->ring_size);
+ else
+ ring->ring_start_addr = 0;
+ }
+
+ return 0;
+}
+
+int
+af_packet_delete_if (u8 *host_if_name)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif;
+ af_packet_queue_t *rx_queue;
+ af_packet_queue_t *tx_queue;
+ af_packet_ring_t *ring;
+ uword *p;
+ u32 i = 0;
+
+ p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
+ if (p == NULL)
+ {
+ vlib_log_warn (apm->log_class, "Host interface %s does not exist",
+ host_if_name);
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+ apif = pool_elt_at_index (apm->interfaces, p[0]);
+
+ /* bring down the interface */
+ vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0);
+ if (apif->mode != AF_PACKET_IF_MODE_IP)
+ ethernet_delete_interface (vnm, apif->hw_if_index);
+ else
+ vnet_delete_hw_interface (vnm, apif->hw_if_index);
+
+ /* clean up */
+ vec_foreach_index (i, apif->fds)
+ if (apif->fds[i] != -1)
+ close (apif->fds[i]);
+ vec_foreach (rx_queue, apif->rx_queues)
+ af_packet_rx_queue_free (apif, rx_queue);
+ vec_foreach (tx_queue, apif->tx_queues)
+ af_packet_tx_queue_free (apif, tx_queue);
+ vec_foreach (ring, apif->rings)
+ af_packet_ring_free (apif, ring);
+
+ vec_free (apif->fds);
+ apif->fds = NULL;
+ vec_free (apif->rx_queues);
+ apif->rx_queues = NULL;
+ vec_free (apif->tx_queues);
+ apif->tx_queues = NULL;
+ vec_free (apif->rings);
+ apif->rings = NULL;
+
+ vec_free (apif->host_if_name);
+ apif->host_if_name = NULL;
+ apif->host_if_index = -1;
+
+ mhash_unset (&apm->if_index_by_host_if_name, host_if_name, p);
+
+ memset (apif, 0, sizeof (*apif));
+ pool_put (apm->interfaces, apif);
+
+ return 0;
+}
+
+int
+af_packet_enable_disable_qdisc_bypass (u32 sw_if_index, u8 enable_disable)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hw;
+ u32 i;
+
+ hw = vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index);
+
+ if (hw->dev_class_index != af_packet_device_class.index)
+ return VNET_API_ERROR_INVALID_INTERFACE;
+
+ apif = pool_elt_at_index (apm->interfaces, hw->dev_instance);
+
+#if defined(PACKET_QDISC_BYPASS)
+ vec_foreach_index (i, apif->fds)
+ {
+ if (enable_disable)
+ {
+ int opt = 1;
+
+ /* Introduced with Linux 3.14 so the ifdef should eventually be
+ * removed */
+ if (setsockopt (apif->fds[i], SOL_PACKET, PACKET_QDISC_BYPASS, &opt,
+ sizeof (opt)) < 0)
+ {
+ vlib_log_err (apm->log_class,
+ "Failed to enable qdisc bypass error "
+ "handling option: %s (errno %d)",
+ strerror (errno), errno);
+ }
+ apif->is_qdisc_bypass_enabled = 1;
+ }
+ else
+ {
+ int opt = 0;
+ if (setsockopt (apif->fds[i], SOL_PACKET, PACKET_QDISC_BYPASS, &opt,
+ sizeof (opt)) < 0)
+ {
+ vlib_log_err (apm->log_class,
+ "Failed to disable qdisc bypass error "
+ "handling option: %s (errno %d)",
+ strerror (errno), errno);
+ }
+ apif->is_qdisc_bypass_enabled = 0;
+ }
+ }
+
+#endif
+ return 0;
+}
+
+int
+af_packet_set_l4_cksum_offload (u32 sw_if_index, u8 set)
+{
+ // deprecated ...
+ return 0;
+}
+
+int
+af_packet_dump_ifs (af_packet_if_detail_t ** out_af_packet_ifs)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif;
+ af_packet_if_detail_t *r_af_packet_ifs = NULL;
+ af_packet_if_detail_t *af_packet_if = NULL;
+
+ pool_foreach (apif, apm->interfaces)
+ {
+ vec_add2 (r_af_packet_ifs, af_packet_if, 1);
+ af_packet_if->sw_if_index = apif->sw_if_index;
+ if (apif->host_if_name)
+ {
+ clib_memcpy (af_packet_if->host_if_name, apif->host_if_name,
+ MIN (ARRAY_LEN (af_packet_if->host_if_name) - 1,
+ strlen ((const char *) apif->host_if_name)));
+ }
+ }
+
+ *out_af_packet_ifs = r_af_packet_ifs;
+
+ return 0;
+}
+
+static clib_error_t *
+af_packet_init (vlib_main_t * vm)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ clib_memset (apm, 0, sizeof (af_packet_main_t));
+
+ mhash_init_vec_string (&apm->if_index_by_host_if_name, sizeof (uword));
+
+ vec_validate_aligned (apm->rx_buffers, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ apm->log_class = vlib_log_register_class ("af_packet", 0);
+ vlib_log_debug (apm->log_class, "initialized");
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (af_packet_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/af_packet/af_packet.h b/src/plugins/af_packet/af_packet.h
new file mode 100644
index 00000000000..e66a1119ba1
--- /dev/null
+++ b/src/plugins/af_packet/af_packet.h
@@ -0,0 +1,182 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet.h - linux kernel packet interface header file
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <linux/if_packet.h>
+
+#include <vppinfra/lock.h>
+#include <vlib/log.h>
+
+typedef struct tpacket_block_desc block_desc_t;
+typedef struct tpacket_req tpacket_req_t;
+typedef struct tpacket_req3 tpacket_req3_t;
+typedef struct tpacket2_hdr tpacket2_hdr_t;
+typedef struct tpacket3_hdr tpacket3_hdr_t;
+
+typedef union _tpacket_req_u
+{
+ tpacket_req_t req;
+ tpacket_req3_t req3;
+} tpacket_req_u_t;
+
+typedef enum
+{
+ AF_PACKET_IF_MODE_ETHERNET = 1,
+ AF_PACKET_IF_MODE_IP = 2
+} af_packet_if_mode_t;
+
+typedef enum
+{
+ AF_PACKET_IF_FLAGS_QDISC_BYPASS = 1,
+ AF_PACKET_IF_FLAGS_CKSUM_GSO = 2,
+ AF_PACKET_IF_FLAGS_FANOUT = 4,
+ AF_PACKET_IF_FLAGS_VERSION_2 = 8,
+} af_packet_if_flags_t;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u8 host_if_name[64];
+} af_packet_if_detail_t;
+
+typedef struct
+{
+ u8 *ring_start_addr;
+ u32 ring_size;
+} af_packet_ring_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ clib_spinlock_t lockp;
+ int fd;
+ union
+ {
+ tpacket_req_u_t *rx_req;
+ tpacket_req_u_t *tx_req;
+ };
+
+ union
+ {
+ u8 **rx_ring;
+ u8 **tx_ring;
+ };
+
+ union
+ {
+ u32 next_rx_block;
+ u32 next_rx_frame;
+ u32 next_tx_frame;
+ };
+
+ u16 queue_id;
+ u32 queue_index;
+
+ u32 clib_file_index;
+
+ u32 rx_frame_offset;
+ u16 num_rx_pkts;
+ u8 is_rx_pending;
+ u8 is_tx_pending;
+ vnet_hw_if_rx_mode mode;
+} af_packet_queue_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 hw_if_index;
+ u32 sw_if_index;
+ u32 per_interface_next_index;
+ af_packet_if_mode_t mode;
+ u8 is_admin_up;
+ u8 is_cksum_gso_enabled;
+ u8 version;
+ af_packet_queue_t *rx_queues;
+ af_packet_queue_t *tx_queues;
+
+ u8 num_rxqs;
+ u8 num_txqs;
+
+ u8 *host_if_name;
+ int host_if_index;
+
+ u32 host_mtu;
+ u32 dev_instance;
+
+ af_packet_ring_t *rings;
+ u8 is_qdisc_bypass_enabled;
+ u8 is_fanout_enabled;
+ int *fds;
+ u32 host_interface_oflags;
+} af_packet_if_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ af_packet_if_t *interfaces;
+
+ u32 polling_count;
+ /* rx buffer cache */
+ u32 **rx_buffers;
+
+ /* hash of host interface names */
+ mhash_t if_index_by_host_if_name;
+
+ /** log class */
+ vlib_log_class_t log_class;
+} af_packet_main_t;
+
+typedef struct
+{
+ u8 *host_if_name;
+ u8 *hw_addr;
+ u32 rx_frame_size;
+ u32 tx_frame_size;
+ u32 rx_frames_per_block;
+ u32 tx_frames_per_block;
+ u8 num_rxqs;
+ u8 num_txqs;
+ u8 is_v2;
+ af_packet_if_mode_t mode;
+ af_packet_if_flags_t flags;
+
+ /* return */
+ u32 sw_if_index;
+} af_packet_create_if_arg_t;
+
+extern af_packet_main_t af_packet_main;
+extern vnet_device_class_t af_packet_device_class;
+extern vlib_node_registration_t af_packet_input_node;
+
+int af_packet_create_if (af_packet_create_if_arg_t *arg);
+int af_packet_delete_if (u8 *host_if_name);
+int af_packet_set_l4_cksum_offload (u32 sw_if_index, u8 set);
+int af_packet_enable_disable_qdisc_bypass (u32 sw_if_index, u8 enable_disable);
+int af_packet_dump_ifs (af_packet_if_detail_t ** out_af_packet_ifs);
+
+format_function_t format_af_packet_device_name;
+
+#define MIN(x,y) (((x)<(y))?(x):(y))
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/af_packet/af_packet_api.c b/src/plugins/af_packet/af_packet_api.c
new file mode 100644
index 00000000000..ede057659e8
--- /dev/null
+++ b/src/plugins/af_packet/af_packet_api.c
@@ -0,0 +1,253 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet_api.c - af-packet api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <af_packet/af_packet.h>
+
+#include <vnet/format_fns.h>
+#include <af_packet/af_packet.api_enum.h>
+#include <af_packet/af_packet.api_types.h>
+
+#define REPLY_MSG_ID_BASE msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static u16 msg_id_base;
+
+static void
+vl_api_af_packet_create_t_handler (vl_api_af_packet_create_t * mp)
+{
+ af_packet_create_if_arg_t _arg, *arg = &_arg;
+ vl_api_af_packet_create_reply_t *rmp;
+ int rv = 0;
+
+ clib_memset (arg, 0, sizeof (*arg));
+
+ arg->host_if_name = format (0, "%s", mp->host_if_name);
+ vec_add1 (arg->host_if_name, 0);
+
+ arg->hw_addr = mp->use_random_hw_addr ? 0 : mp->hw_addr;
+ arg->mode = AF_PACKET_IF_MODE_ETHERNET;
+ // Default flags
+ arg->flags = AF_PACKET_IF_FLAGS_QDISC_BYPASS | AF_PACKET_IF_FLAGS_CKSUM_GSO;
+ rv = af_packet_create_if (arg);
+
+ vec_free (arg->host_if_name);
+
+ REPLY_MACRO2 (VL_API_AF_PACKET_CREATE_REPLY, ({
+ rmp->sw_if_index = clib_host_to_net_u32 (arg->sw_if_index);
+ }));
+}
+
+static void
+vl_api_af_packet_create_v2_t_handler (vl_api_af_packet_create_v2_t *mp)
+{
+ af_packet_create_if_arg_t _arg, *arg = &_arg;
+ vl_api_af_packet_create_v2_reply_t *rmp;
+ int rv = 0;
+
+ clib_memset (arg, 0, sizeof (*arg));
+
+ arg->host_if_name = format (0, "%s", mp->host_if_name);
+ vec_add1 (arg->host_if_name, 0);
+
+ // Default number of rx/tx queue(s)
+ arg->num_rxqs = 1;
+ arg->num_txqs = 1;
+ arg->rx_frame_size = clib_net_to_host_u32 (mp->rx_frame_size);
+ arg->tx_frame_size = clib_net_to_host_u32 (mp->tx_frame_size);
+ arg->rx_frames_per_block = clib_net_to_host_u32 (mp->rx_frames_per_block);
+ arg->tx_frames_per_block = clib_net_to_host_u32 (mp->tx_frames_per_block);
+ arg->hw_addr = mp->use_random_hw_addr ? 0 : mp->hw_addr;
+ arg->mode = AF_PACKET_IF_MODE_ETHERNET;
+ // Default flags
+ arg->flags = AF_PACKET_IF_FLAGS_QDISC_BYPASS | AF_PACKET_IF_FLAGS_CKSUM_GSO;
+
+ if (mp->num_rx_queues > 1)
+ arg->num_rxqs = clib_net_to_host_u16 (mp->num_rx_queues);
+
+ rv = af_packet_create_if (arg);
+
+ vec_free (arg->host_if_name);
+ REPLY_MACRO2 (VL_API_AF_PACKET_CREATE_V2_REPLY, ({
+ rmp->sw_if_index = clib_host_to_net_u32 (arg->sw_if_index);
+ }));
+}
+
+static void
+vl_api_af_packet_create_v3_t_handler (vl_api_af_packet_create_v3_t *mp)
+{
+ af_packet_create_if_arg_t _arg, *arg = &_arg;
+ vl_api_af_packet_create_v3_reply_t *rmp;
+ int rv = 0;
+
+ clib_memset (arg, 0, sizeof (*arg));
+
+ arg->host_if_name = format (0, "%s", mp->host_if_name);
+ vec_add1 (arg->host_if_name, 0);
+
+ // Default number of rx/tx queue(s)
+ arg->num_rxqs = 1;
+ arg->num_txqs = 1;
+ arg->rx_frame_size = clib_net_to_host_u32 (mp->rx_frame_size);
+ arg->tx_frame_size = clib_net_to_host_u32 (mp->tx_frame_size);
+ arg->rx_frames_per_block = clib_net_to_host_u32 (mp->rx_frames_per_block);
+ arg->tx_frames_per_block = clib_net_to_host_u32 (mp->tx_frames_per_block);
+ arg->hw_addr = mp->use_random_hw_addr ? 0 : mp->hw_addr;
+
+ switch (clib_net_to_host_u32 (mp->mode))
+ {
+ case AF_PACKET_API_MODE_ETHERNET:
+ arg->mode = AF_PACKET_IF_MODE_ETHERNET;
+ break;
+ case AF_PACKET_API_MODE_IP:
+ arg->mode = AF_PACKET_IF_MODE_IP;
+ break;
+ default:
+ arg->sw_if_index = ~0;
+ rv = VNET_ERR_INVALID_VALUE;
+ goto error;
+ }
+
+ STATIC_ASSERT (((int) AF_PACKET_API_FLAG_QDISC_BYPASS ==
+ (int) AF_PACKET_IF_FLAGS_QDISC_BYPASS),
+ "af-packet qdisc-bypass api flag mismatch");
+ STATIC_ASSERT (
+ ((int) AF_PACKET_API_FLAG_CKSUM_GSO == (int) AF_PACKET_IF_FLAGS_CKSUM_GSO),
+ "af-packet checksum/gso offload api flag mismatch");
+
+ STATIC_ASSERT (
+ ((int) AF_PACKET_API_FLAG_VERSION_2 == (int) AF_PACKET_IF_FLAGS_VERSION_2),
+ "af-packet version 2 api flag mismatch");
+
+ // Default flags
+ arg->flags = clib_net_to_host_u32 (mp->flags);
+
+ if (clib_net_to_host_u16 (mp->num_rx_queues) > 1)
+ arg->num_rxqs = clib_net_to_host_u16 (mp->num_rx_queues);
+
+ if (clib_net_to_host_u16 (mp->num_tx_queues) > 1)
+ arg->num_txqs = clib_net_to_host_u16 (mp->num_tx_queues);
+
+ arg->is_v2 = (arg->flags & AF_PACKET_API_FLAG_VERSION_2) ? 1 : 0;
+ rv = af_packet_create_if (arg);
+
+error:
+ vec_free (arg->host_if_name);
+ REPLY_MACRO2 (VL_API_AF_PACKET_CREATE_V3_REPLY, ({
+ rmp->sw_if_index = clib_host_to_net_u32 (arg->sw_if_index);
+ }));
+}
+
+static void
+vl_api_af_packet_delete_t_handler (vl_api_af_packet_delete_t * mp)
+{
+ vl_api_af_packet_delete_reply_t *rmp;
+ int rv = 0;
+ u8 *host_if_name = NULL;
+
+ host_if_name = format (0, "%s", mp->host_if_name);
+ vec_add1 (host_if_name, 0);
+
+ rv = af_packet_delete_if (host_if_name);
+
+ vec_free (host_if_name);
+
+ REPLY_MACRO (VL_API_AF_PACKET_DELETE_REPLY);
+}
+
+static void
+ vl_api_af_packet_set_l4_cksum_offload_t_handler
+ (vl_api_af_packet_set_l4_cksum_offload_t * mp)
+{
+ vl_api_af_packet_delete_reply_t *rmp;
+ int rv = 0;
+
+ rv = af_packet_set_l4_cksum_offload (ntohl (mp->sw_if_index), mp->set);
+ REPLY_MACRO (VL_API_AF_PACKET_SET_L4_CKSUM_OFFLOAD_REPLY);
+}
+
+static void
+af_packet_send_details (vpe_api_main_t * am,
+ vl_api_registration_t * reg,
+ af_packet_if_detail_t * af_packet_if, u32 context)
+{
+ vl_api_af_packet_details_t *mp;
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ clib_memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = htons (REPLY_MSG_ID_BASE + VL_API_AF_PACKET_DETAILS);
+ mp->sw_if_index = htonl (af_packet_if->sw_if_index);
+ clib_memcpy (mp->host_if_name, af_packet_if->host_if_name,
+ MIN (ARRAY_LEN (mp->host_if_name) - 1,
+ strlen ((const char *) af_packet_if->host_if_name)));
+
+ mp->context = context;
+ vl_api_send_msg (reg, (u8 *) mp);
+}
+
+
+static void
+vl_api_af_packet_dump_t_handler (vl_api_af_packet_dump_t * mp)
+{
+ int rv;
+ vpe_api_main_t *am = &vpe_api_main;
+ vl_api_registration_t *reg;
+ af_packet_if_detail_t *out_af_packet_ifs = NULL;
+ af_packet_if_detail_t *af_packet_if = NULL;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ rv = af_packet_dump_ifs (&out_af_packet_ifs);
+ if (rv)
+ return;
+
+ vec_foreach (af_packet_if, out_af_packet_ifs)
+ {
+ af_packet_send_details (am, reg, af_packet_if, mp->context);
+ }
+
+ vec_free (out_af_packet_ifs);
+}
+
+#include <af_packet/af_packet.api.c>
+static clib_error_t *
+af_packet_api_hookup (vlib_main_t * vm)
+{
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ REPLY_MSG_ID_BASE = setup_message_id_table ();
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (af_packet_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/af_packet/cli.c b/src/plugins/af_packet/cli.c
new file mode 100644
index 00000000000..2af3fb17ee5
--- /dev/null
+++ b/src/plugins/af_packet/cli.c
@@ -0,0 +1,341 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet.c - linux kernel packet interface
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <fcntl.h> /* for open */
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h> /* for iovec */
+#include <netinet/in.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <af_packet/af_packet.h>
+
+/**
+ * @file
+ * @brief CLI for Host Interface Device Driver.
+ *
+ * This file contains the source code for CLI for the host interface.
+ */
+
+static clib_error_t *
+af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ af_packet_create_if_arg_t _arg, *arg = &_arg;
+ clib_error_t *error = NULL;
+ u8 hwaddr[6];
+ int r;
+
+ clib_memset (arg, 0, sizeof (*arg));
+
+ // Default mode
+ arg->mode = AF_PACKET_IF_MODE_ETHERNET;
+
+ // Default number of rx/tx queue(s)
+ arg->num_rxqs = 1;
+ arg->num_txqs = 1;
+
+ // Default flags
+ arg->flags = AF_PACKET_IF_FLAGS_QDISC_BYPASS | AF_PACKET_IF_FLAGS_CKSUM_GSO;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &arg->host_if_name))
+ ;
+ else if (unformat (line_input, "rx-size %u", &arg->rx_frame_size))
+ ;
+ else if (unformat (line_input, "tx-size %u", &arg->tx_frame_size))
+ ;
+ else if (unformat (line_input, "rx-per-block %u",
+ &arg->rx_frames_per_block))
+ ;
+ else if (unformat (line_input, "tx-per-block %u",
+ &arg->tx_frames_per_block))
+ ;
+ else if (unformat (line_input, "num-rx-queues %u", &arg->num_rxqs))
+ ;
+ else if (unformat (line_input, "num-tx-queues %u", &arg->num_txqs))
+ ;
+ else if (unformat (line_input, "qdisc-bypass-disable"))
+ arg->flags &= ~AF_PACKET_IF_FLAGS_QDISC_BYPASS;
+ else if (unformat (line_input, "cksum-gso-disable"))
+ arg->flags &= ~AF_PACKET_IF_FLAGS_CKSUM_GSO;
+ else if (unformat (line_input, "mode ip"))
+ arg->mode = AF_PACKET_IF_MODE_IP;
+ else if (unformat (line_input, "v2"))
+ arg->is_v2 = 1;
+ else if (unformat (line_input, "hw-addr %U", unformat_ethernet_address,
+ hwaddr))
+ arg->hw_addr = hwaddr;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (arg->host_if_name == NULL)
+ {
+ error = clib_error_return (0, "missing host interface name");
+ goto done;
+ }
+
+ r = af_packet_create_if (arg);
+
+ if (r == VNET_API_ERROR_SYSCALL_ERROR_1)
+ {
+ error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno);
+ goto done;
+ }
+
+ if (r == VNET_API_ERROR_INVALID_INTERFACE)
+ {
+ error = clib_error_return (0, "Invalid interface name");
+ goto done;
+ }
+
+ if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS)
+ {
+ error = clib_error_return (0, "Interface already exists");
+ goto done;
+ }
+
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (),
+ arg->sw_if_index);
+
+done:
+ vec_free (arg->host_if_name);
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Create a host interface that will attach to a linux AF_PACKET
+ * interface, one side of a veth pair. The veth pair must already
+ * exist. Once created, a new host interface will exist in VPP
+ * with the name '<em>host-<ifname></em>', where '<em><ifname></em>'
+ * is the name of the specified veth pair. Use the
+ * '<em>show interface</em>' command to display host interface details.
+ *
+ * This command has the following optional parameters:
+ *
+ * - <b>hw-addr <mac-addr></b> - Optional ethernet address, can be in either
+ * X:X:X:X:X:X unix or X.X.X cisco format.
+ *
+ * @cliexpar
+ * Example of how to create a host interface tied to one side of an
+ * existing linux veth pair named vpp1:
+ * @cliexstart{create host-interface name vpp1}
+ * host-vpp1
+ * @cliexend
+ * Once the host interface is created, enable the interface using:
+ * @cliexcmd{set interface state host-vpp1 up}
+?*/
+VLIB_CLI_COMMAND (af_packet_create_command, static) = {
+ .path = "create host-interface",
+ .short_help = "create host-interface [v2] name <ifname> [num-rx-queues <n>] "
+ "[num-tx-queues <n>] [hw-addr <mac-addr>] [mode ip] "
+ "[qdisc-bypass-disable] [cksum-gso-disable]",
+ .function = af_packet_create_command_fn,
+};
+
+static clib_error_t *
+af_packet_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 *host_if_name = NULL;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &host_if_name))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (host_if_name == NULL)
+ {
+ error = clib_error_return (0, "missing host interface name");
+ goto done;
+ }
+
+ af_packet_delete_if (host_if_name);
+
+done:
+ vec_free (host_if_name);
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Delete a host interface. Use the linux interface name to identify
+ * the host interface to be deleted. In VPP, host interfaces are
+ * named as '<em>host-<ifname></em>', where '<em><ifname></em>'
+ * is the name of the linux interface.
+ *
+ * @cliexpar
+ * Example of how to delete a host interface named host-vpp1:
+ * @cliexcmd{delete host-interface name vpp1}
+?*/
+VLIB_CLI_COMMAND (af_packet_delete_command, static) = {
+ .path = "delete host-interface",
+ .short_help = "delete host-interface name <ifname>",
+ .function = af_packet_delete_command_fn,
+};
+
+static clib_error_t *
+af_packet_set_l4_cksum_offload_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 set = 0;
+ clib_error_t *error = NULL;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "on"))
+ set = 1;
+ else if (unformat (line_input, "off"))
+ set = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (af_packet_set_l4_cksum_offload (sw_if_index, set) < 0)
+ error = clib_error_return (0, "not an af_packet interface");
+
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+/*?
+ * Set TCP/UDP offload checksum calculation. Use interface
+ * name to identify the interface to set TCP/UDP offload checksum
+ * calculation.
+ *
+ * @cliexpar
+ * Example of how to set TCP/UDP offload checksum calculation on host-vpp0:
+ * @cliexcmd{set host-interface l4-cksum-offload host-vpp0 off}
+ * @cliexcmd{set host-interface l4-cksum-offload host-vpp0 on}
+?*/
+VLIB_CLI_COMMAND (af_packet_set_l4_cksum_offload_command, static) = {
+ .path = "set host-interface l4-cksum-offload",
+ .short_help = "set host-interface l4-cksum-offload <host-if-name> <on|off>",
+ .function = af_packet_set_l4_cksum_offload_command_fn,
+};
+
+static clib_error_t *
+af_packet_enable_disable_qdisc_bypass_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 enable_disable = 0;
+ clib_error_t *error = NULL;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "enable"))
+ enable_disable = 1;
+ else if (unformat (line_input, "disable"))
+ enable_disable = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (af_packet_enable_disable_qdisc_bypass (sw_if_index, enable_disable) < 0)
+ error = clib_error_return (0, "not an af_packet interface");
+
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+VLIB_CLI_COMMAND (af_packet_enable_disable_qdisc_bypass_command, static) = {
+ .path = "set host-interface qdisc-bypass",
+ .short_help =
+ "set host-interface qdisc-bypass <host-if-name> <enable|disable>",
+ .function = af_packet_enable_disable_qdisc_bypass_command_fn,
+};
+
+clib_error_t *
+af_packet_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (af_packet_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/af_packet/device.c b/src/plugins/af_packet/device.c
new file mode 100644
index 00000000000..d76dad3dde0
--- /dev/null
+++ b/src/plugins/af_packet/device.c
@@ -0,0 +1,793 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet.c - linux kernel packet interface
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <linux/if_packet.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip_psh_cksum.h>
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/udp/udp_packet.h>
+
+#include <af_packet/af_packet.h>
+#include <vnet/devices/virtio/virtio_std.h>
+#include <vnet/devices/netlink.h>
+
+#define foreach_af_packet_tx_func_error \
+_(FRAME_NOT_READY, "tx frame not ready") \
+_(TXRING_EAGAIN, "tx sendto temporary failure") \
+_(TXRING_FATAL, "tx sendto fatal failure") \
+_(TXRING_OVERRUN, "tx ring overrun")
+
+typedef enum
+{
+#define _(f,s) AF_PACKET_TX_ERROR_##f,
+ foreach_af_packet_tx_func_error
+#undef _
+ AF_PACKET_TX_N_ERROR,
+} af_packet_tx_func_error_t;
+
+static char *af_packet_tx_func_error_strings[] = {
+#define _(n,s) s,
+ foreach_af_packet_tx_func_error
+#undef _
+};
+
+typedef struct
+{
+ u32 buffer_index;
+ u32 hw_if_index;
+ u16 queue_id;
+ u8 is_v2;
+ union
+ {
+ tpacket2_hdr_t tph2;
+ tpacket3_hdr_t tph3;
+ };
+ vnet_virtio_net_hdr_t vnet_hdr;
+ vlib_buffer_t buffer;
+} af_packet_tx_trace_t;
+
+#ifndef CLIB_MARCH_VARIANT
+u8 *
+format_af_packet_device_name (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, i);
+
+ s = format (s, "host-%s", apif->host_if_name);
+ return s;
+}
+#endif /* CLIB_MARCH_VARIANT */
+
+static u8 *
+format_af_packet_device (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ u32 indent = format_get_indent (s);
+ int __clib_unused verbose = va_arg (*args, int);
+
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, dev_instance);
+ af_packet_queue_t *rx_queue = 0;
+ af_packet_queue_t *tx_queue = 0;
+
+ s = format (s, "Linux PACKET socket interface %s",
+ (apif->version == TPACKET_V2) ? "v2" : "v3");
+ s = format (s, "\n%UFEATURES:", format_white_space, indent);
+ if (apif->is_qdisc_bypass_enabled)
+ s = format (s, "\n%Uqdisc-bpass-enabled", format_white_space, indent + 2);
+ if (apif->is_cksum_gso_enabled)
+ s = format (s, "\n%Ucksum-gso-enabled", format_white_space, indent + 2);
+ if (apif->is_fanout_enabled)
+ s = format (s, "\n%Ufanout-enabled", format_white_space, indent + 2);
+
+ vec_foreach (rx_queue, apif->rx_queues)
+ {
+ u32 rx_block_size = rx_queue->rx_req->req.tp_block_size;
+ u32 rx_frame_size = rx_queue->rx_req->req.tp_frame_size;
+ u32 rx_frame_nr = rx_queue->rx_req->req.tp_frame_nr;
+ u32 rx_block_nr = rx_queue->rx_req->req.tp_block_nr;
+
+ s = format (s, "\n%URX Queue %u:", format_white_space, indent,
+ rx_queue->queue_id);
+ s = format (s, "\n%Ublock size:%d nr:%d frame size:%d nr:%d",
+ format_white_space, indent + 2, rx_block_size, rx_block_nr,
+ rx_frame_size, rx_frame_nr);
+ if (apif->version == TPACKET_V2)
+ s = format (s, " next frame:%d", rx_queue->next_rx_frame);
+ else
+ s = format (s, " next block:%d", rx_queue->next_rx_block);
+ if (rx_queue->is_rx_pending)
+ {
+ s = format (
+ s, "\n%UPending Request: num-rx-pkts:%d next-frame-offset:%d",
+ format_white_space, indent + 2, rx_queue->num_rx_pkts,
+ rx_queue->rx_frame_offset);
+ }
+ }
+
+ vec_foreach (tx_queue, apif->tx_queues)
+ {
+ clib_spinlock_lock (&tx_queue->lockp);
+ u32 tx_block_sz = tx_queue->tx_req->req.tp_block_size;
+ u32 tx_frame_sz = tx_queue->tx_req->req.tp_frame_size;
+ u32 tx_frame_nr = tx_queue->tx_req->req.tp_frame_nr;
+ u32 tx_block_nr = tx_queue->tx_req->req.tp_block_nr;
+ int block = 0;
+ int n_send_req = 0, n_avail = 0, n_sending = 0, n_tot = 0, n_wrong = 0;
+ u8 *tx_block_start = tx_queue->tx_ring[block];
+ u32 tx_frame = tx_queue->next_tx_frame;
+ tpacket3_hdr_t *tph3;
+ tpacket2_hdr_t *tph2;
+
+ s = format (s, "\n%UTX Queue %u:", format_white_space, indent,
+ tx_queue->queue_id);
+ s = format (s, "\n%Ublock size:%d nr:%d frame size:%d nr:%d",
+ format_white_space, indent + 2, tx_block_sz, tx_block_nr,
+ tx_frame_sz, tx_frame_nr);
+ s = format (s, " next frame:%d", tx_queue->next_tx_frame);
+ if (apif->version & TPACKET_V3)
+ do
+ {
+ tph3 =
+ (tpacket3_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz);
+ tx_frame = (tx_frame + 1) % tx_frame_nr;
+ if (tph3->tp_status == 0)
+ n_avail++;
+ else if (tph3->tp_status & TP_STATUS_SEND_REQUEST)
+ n_send_req++;
+ else if (tph3->tp_status & TP_STATUS_SENDING)
+ n_sending++;
+ else
+ n_wrong++;
+ n_tot++;
+ }
+ while (tx_frame != tx_queue->next_tx_frame);
+ else
+ do
+ {
+ tph2 =
+ (tpacket2_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz);
+ tx_frame = (tx_frame + 1) % tx_frame_nr;
+ if (tph2->tp_status == 0)
+ n_avail++;
+ else if (tph2->tp_status & TP_STATUS_SEND_REQUEST)
+ n_send_req++;
+ else if (tph2->tp_status & TP_STATUS_SENDING)
+ n_sending++;
+ else
+ n_wrong++;
+ n_tot++;
+ }
+ while (tx_frame != tx_queue->next_tx_frame);
+ s =
+ format (s, "\n%Uavailable:%d request:%d sending:%d wrong:%d total:%d",
+ format_white_space, indent + 2, n_avail, n_send_req, n_sending,
+ n_wrong, n_tot);
+ clib_spinlock_unlock (&tx_queue->lockp);
+ }
+ return s;
+}
+
+static u8 *
+format_af_packet_tx_trace (u8 *s, va_list *va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ af_packet_tx_trace_t *t = va_arg (*va, af_packet_tx_trace_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "af_packet: hw_if_index %u tx-queue %u", t->hw_if_index,
+ t->queue_id);
+
+ if (t->is_v2)
+ {
+ s = format (
+ s,
+ "\n%Utpacket2_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
+ "\n%Usec 0x%x nsec 0x%x vlan %U"
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ " vlan_tpid %u"
+#endif
+ ,
+ format_white_space, indent + 2, format_white_space, indent + 4,
+ t->tph2.tp_status, t->tph2.tp_len, t->tph2.tp_snaplen, t->tph2.tp_mac,
+ t->tph2.tp_net, format_white_space, indent + 4, t->tph2.tp_sec,
+ t->tph2.tp_nsec, format_ethernet_vlan_tci, t->tph2.tp_vlan_tci
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ ,
+ t->tph2.tp_vlan_tpid
+#endif
+ );
+ }
+ else
+ {
+ s = format (
+ s,
+ "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
+ "\n%Usec 0x%x nsec 0x%x vlan %U"
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ " vlan_tpid %u"
+#endif
+ ,
+ format_white_space, indent + 2, format_white_space, indent + 4,
+ t->tph3.tp_status, t->tph3.tp_len, t->tph3.tp_snaplen, t->tph3.tp_mac,
+ t->tph3.tp_net, format_white_space, indent + 4, t->tph3.tp_sec,
+ t->tph3.tp_nsec, format_ethernet_vlan_tci, t->tph3.hv1.tp_vlan_tci
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ ,
+ t->tph3.hv1.tp_vlan_tpid
+#endif
+ );
+ }
+ s = format (s,
+ "\n%Uvnet-hdr:\n%Uflags 0x%02x gso_type 0x%02x hdr_len %u"
+ "\n%Ugso_size %u csum_start %u csum_offset %u",
+ format_white_space, indent + 2, format_white_space, indent + 4,
+ t->vnet_hdr.flags, t->vnet_hdr.gso_type, t->vnet_hdr.hdr_len,
+ format_white_space, indent + 4, t->vnet_hdr.gso_size,
+ t->vnet_hdr.csum_start, t->vnet_hdr.csum_offset);
+
+ s = format (s, "\n%Ubuffer 0x%x:\n%U%U", format_white_space, indent + 2,
+ t->buffer_index, format_white_space, indent + 4,
+ format_vnet_buffer_no_chain, &t->buffer);
+ s = format (s, "\n%U%U", format_white_space, indent + 2,
+ format_ethernet_header_with_length, t->buffer.pre_data,
+ sizeof (t->buffer.pre_data));
+ return s;
+}
+
+static void
+af_packet_tx_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t *b0, u32 bi, void *tph,
+ vnet_virtio_net_hdr_t *vnet_hdr, u32 hw_if_index,
+ u16 queue_id, u8 is_v2)
+{
+ af_packet_tx_trace_t *t;
+ t = vlib_add_trace (vm, node, b0, sizeof (t[0]));
+ t->hw_if_index = hw_if_index;
+ t->queue_id = queue_id;
+ t->buffer_index = bi;
+ t->is_v2 = is_v2;
+
+ if (is_v2)
+ clib_memcpy_fast (&t->tph2, (tpacket2_hdr_t *) tph,
+ sizeof (tpacket2_hdr_t));
+ else
+ clib_memcpy_fast (&t->tph3, (tpacket3_hdr_t *) tph,
+ sizeof (tpacket3_hdr_t));
+ clib_memcpy_fast (&t->vnet_hdr, vnet_hdr, sizeof (*vnet_hdr));
+ clib_memcpy_fast (&t->buffer, b0, sizeof (*b0) - sizeof (b0->pre_data));
+ clib_memcpy_fast (t->buffer.pre_data, vlib_buffer_get_current (b0),
+ sizeof (t->buffer.pre_data));
+}
+
+static_always_inline void
+fill_gso_offload (vlib_buffer_t *b0, vnet_virtio_net_hdr_t *vnet_hdr)
+{
+ vnet_buffer_oflags_t oflags = vnet_buffer (b0)->oflags;
+ i16 l4_hdr_offset = vnet_buffer (b0)->l4_hdr_offset - b0->current_data;
+ if (b0->flags & VNET_BUFFER_F_IS_IP4)
+ {
+ ip4_header_t *ip4;
+ vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+ vnet_hdr->gso_size = vnet_buffer2 (b0)->gso_size;
+ vnet_hdr->hdr_len = l4_hdr_offset + vnet_buffer2 (b0)->gso_l4_hdr_sz;
+ vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vnet_hdr->csum_start = l4_hdr_offset; // 0x22;
+ vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
+ ip4 = (ip4_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
+ ip4->checksum = ip4_header_checksum (ip4);
+ tcp_header_t *tcp =
+ (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
+ tcp->checksum = ip4_pseudo_header_cksum (ip4);
+ }
+ else if (b0->flags & VNET_BUFFER_F_IS_IP6)
+ {
+ ip6_header_t *ip6;
+ vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+ vnet_hdr->gso_size = vnet_buffer2 (b0)->gso_size;
+ vnet_hdr->hdr_len = l4_hdr_offset + vnet_buffer2 (b0)->gso_l4_hdr_sz;
+ vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vnet_hdr->csum_start = l4_hdr_offset; // 0x36;
+ vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
+ ip6 = (ip6_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ tcp_header_t *tcp =
+ (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
+ tcp->checksum = ip6_pseudo_header_cksum (ip6);
+ }
+}
+
+static_always_inline void
+fill_cksum_offload (vlib_buffer_t *b0, vnet_virtio_net_hdr_t *vnet_hdr)
+{
+ vnet_buffer_oflags_t oflags = vnet_buffer (b0)->oflags;
+ i16 l4_hdr_offset = vnet_buffer (b0)->l4_hdr_offset - b0->current_data;
+ if (b0->flags & VNET_BUFFER_F_IS_IP4)
+ {
+ ip4_header_t *ip4;
+ ip4 = (ip4_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
+ ip4->checksum = ip4_header_checksum (ip4);
+ vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vnet_hdr->csum_start = l4_hdr_offset;
+ if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
+ {
+ tcp_header_t *tcp =
+ (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
+ tcp->checksum = ip4_pseudo_header_cksum (ip4);
+ vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
+ vnet_hdr->hdr_len = l4_hdr_offset + tcp_header_bytes (tcp);
+ }
+ else if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
+ {
+ udp_header_t *udp =
+ (udp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
+ udp->checksum = ip4_pseudo_header_cksum (ip4);
+ vnet_hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
+ vnet_hdr->hdr_len = l4_hdr_offset + sizeof (udp_header_t);
+ }
+ }
+ else if (b0->flags & VNET_BUFFER_F_IS_IP6)
+ {
+ ip6_header_t *ip6;
+ vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vnet_hdr->csum_start = l4_hdr_offset;
+ ip6 = (ip6_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
+ {
+ tcp_header_t *tcp =
+ (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
+ tcp->checksum = ip6_pseudo_header_cksum (ip6);
+ vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
+ vnet_hdr->hdr_len = l4_hdr_offset + tcp_header_bytes (tcp);
+ }
+ else if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
+ {
+ udp_header_t *udp =
+ (udp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
+ udp->checksum = ip6_pseudo_header_cksum (ip6);
+ vnet_hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
+ vnet_hdr->hdr_len = l4_hdr_offset + sizeof (udp_header_t);
+ }
+ }
+}
+
+VNET_DEVICE_CLASS_TX_FN (af_packet_device_class) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
+ u32 *buffers = vlib_frame_vector_args (frame);
+ u32 n_left = frame->n_vectors;
+ u32 n_sent = 0;
+ vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
+ af_packet_if_t *apif =
+ pool_elt_at_index (apm->interfaces, rd->dev_instance);
+ u16 queue_id = tf->queue_id;
+ af_packet_queue_t *tx_queue = vec_elt_at_index (apif->tx_queues, queue_id);
+ u32 block = 0, frame_size = 0, frame_num = 0, tx_frame = 0;
+ u8 *block_start = 0;
+ tpacket3_hdr_t *tph3 = 0;
+ tpacket2_hdr_t *tph2 = 0;
+ u32 frame_not_ready = 0;
+ u8 is_cksum_gso_enabled = (apif->is_cksum_gso_enabled == 1) ? 1 : 0;
+ u32 tpacket_align = 0;
+ u8 is_v2 = (apif->version == TPACKET_V2) ? 1 : 0;
+
+ if (tf->shared_queue)
+ clib_spinlock_lock (&tx_queue->lockp);
+
+ frame_size = tx_queue->tx_req->req.tp_frame_size;
+ frame_num = tx_queue->tx_req->req.tp_frame_nr;
+ block_start = tx_queue->tx_ring[block];
+ tx_frame = tx_queue->next_tx_frame;
+ if (is_v2)
+ {
+ tpacket_align = TPACKET_ALIGN (sizeof (tpacket2_hdr_t));
+ while (n_left)
+ {
+ u32 len;
+ vnet_virtio_net_hdr_t *vnet_hdr = 0;
+ u32 offset = 0;
+ vlib_buffer_t *b0 = 0, *b0_first = 0;
+ u32 bi, bi_first;
+
+ bi = bi_first = buffers[0];
+ n_left--;
+ buffers++;
+
+ tph2 = (tpacket2_hdr_t *) (block_start + tx_frame * frame_size);
+ if (PREDICT_FALSE (tph2->tp_status &
+ (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)))
+ {
+ frame_not_ready++;
+ goto nextv2;
+ }
+
+ b0_first = b0 = vlib_get_buffer (vm, bi);
+
+ if (PREDICT_TRUE (is_cksum_gso_enabled))
+ {
+ vnet_hdr =
+ (vnet_virtio_net_hdr_t *) ((u8 *) tph2 + tpacket_align);
+
+ clib_memset_u8 (vnet_hdr, 0, sizeof (vnet_virtio_net_hdr_t));
+ offset = sizeof (vnet_virtio_net_hdr_t);
+
+ if (b0->flags & VNET_BUFFER_F_GSO)
+ fill_gso_offload (b0, vnet_hdr);
+ else if (b0->flags & VNET_BUFFER_F_OFFLOAD)
+ fill_cksum_offload (b0, vnet_hdr);
+ }
+
+ len = b0->current_length;
+ clib_memcpy_fast ((u8 *) tph2 + tpacket_align + offset,
+ vlib_buffer_get_current (b0), len);
+ offset += len;
+
+ while (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b0 = vlib_get_buffer (vm, b0->next_buffer);
+ len = b0->current_length;
+ clib_memcpy_fast ((u8 *) tph2 + tpacket_align + offset,
+ vlib_buffer_get_current (b0), len);
+ offset += len;
+ }
+
+ tph2->tp_len = tph2->tp_snaplen = offset;
+ tph2->tp_status = TP_STATUS_SEND_REQUEST;
+ n_sent++;
+
+ if (PREDICT_FALSE (b0_first->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ if (PREDICT_TRUE (is_cksum_gso_enabled))
+ af_packet_tx_trace (vm, node, b0_first, bi_first, tph2,
+ vnet_hdr, apif->hw_if_index, queue_id, 1);
+ else
+ {
+ vnet_virtio_net_hdr_t vnet_hdr2 = {};
+ af_packet_tx_trace (vm, node, b0_first, bi_first, tph2,
+ &vnet_hdr2, apif->hw_if_index, queue_id,
+ 1);
+ }
+ }
+ tx_frame = (tx_frame + 1) % frame_num;
+
+ nextv2:
+ /* check if we've exhausted the ring */
+ if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
+ break;
+ }
+ }
+ else
+ {
+ tpacket_align = TPACKET_ALIGN (sizeof (tpacket3_hdr_t));
+
+ while (n_left)
+ {
+ u32 len;
+ vnet_virtio_net_hdr_t *vnet_hdr = 0;
+ u32 offset = 0;
+ vlib_buffer_t *b0 = 0, *b0_first = 0;
+ u32 bi, bi_first;
+
+ bi = bi_first = buffers[0];
+ n_left--;
+ buffers++;
+
+ tph3 = (tpacket3_hdr_t *) (block_start + tx_frame * frame_size);
+ if (PREDICT_FALSE (tph3->tp_status &
+ (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)))
+ {
+ frame_not_ready++;
+ goto nextv3;
+ }
+
+ b0_first = b0 = vlib_get_buffer (vm, bi);
+
+ if (PREDICT_TRUE (is_cksum_gso_enabled))
+ {
+ vnet_hdr =
+ (vnet_virtio_net_hdr_t *) ((u8 *) tph3 + tpacket_align);
+
+ clib_memset_u8 (vnet_hdr, 0, sizeof (vnet_virtio_net_hdr_t));
+ offset = sizeof (vnet_virtio_net_hdr_t);
+
+ if (b0->flags & VNET_BUFFER_F_GSO)
+ fill_gso_offload (b0, vnet_hdr);
+ else if (b0->flags & VNET_BUFFER_F_OFFLOAD)
+ fill_cksum_offload (b0, vnet_hdr);
+ }
+
+ len = b0->current_length;
+ clib_memcpy_fast ((u8 *) tph3 + tpacket_align + offset,
+ vlib_buffer_get_current (b0), len);
+ offset += len;
+
+ while (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b0 = vlib_get_buffer (vm, b0->next_buffer);
+ len = b0->current_length;
+ clib_memcpy_fast ((u8 *) tph3 + tpacket_align + offset,
+ vlib_buffer_get_current (b0), len);
+ offset += len;
+ }
+
+ tph3->tp_len = tph3->tp_snaplen = offset;
+ tph3->tp_status = TP_STATUS_SEND_REQUEST;
+ n_sent++;
+
+ if (PREDICT_FALSE (b0_first->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ if (PREDICT_TRUE (is_cksum_gso_enabled))
+ af_packet_tx_trace (vm, node, b0_first, bi_first, tph3,
+ vnet_hdr, apif->hw_if_index, queue_id, 0);
+ else
+ {
+ vnet_virtio_net_hdr_t vnet_hdr2 = {};
+ af_packet_tx_trace (vm, node, b0_first, bi_first, tph3,
+ &vnet_hdr2, apif->hw_if_index, queue_id,
+ 0);
+ }
+ }
+ tx_frame = (tx_frame + 1) % frame_num;
+
+ nextv3:
+ /* check if we've exhausted the ring */
+ if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
+ break;
+ }
+ }
+ CLIB_MEMORY_BARRIER ();
+
+ if (PREDICT_TRUE (n_sent || tx_queue->is_tx_pending))
+ {
+ tx_queue->next_tx_frame = tx_frame;
+ tx_queue->is_tx_pending = 0;
+
+ if (PREDICT_FALSE (
+ sendto (tx_queue->fd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1))
+ {
+ /* Uh-oh, drop & move on, but count whether it was fatal or not.
+ * Note that we have no reliable way to properly determine the
+ * disposition of the packets we just enqueued for delivery.
+ */
+ uword counter;
+
+ if (unix_error_is_fatal (errno))
+ {
+ counter = AF_PACKET_TX_ERROR_TXRING_FATAL;
+ }
+ else
+ {
+ counter = AF_PACKET_TX_ERROR_TXRING_EAGAIN;
+ /* non-fatal error: kick again next time
+ * note that you could still end up in a deadlock: if you do not
+ * try to send new packets (ie reschedule this tx node), eg.
+ * because your peer is waiting for the unsent packets to reply
+ * to you but your waiting for its reply etc., you are not going
+ * to kick again, and everybody is waiting for the other to talk
+ * 1st... */
+ tx_queue->is_tx_pending = 1;
+ }
+
+ vlib_error_count (vm, node->node_index, counter, 1);
+ }
+ }
+
+ if (tf->shared_queue)
+ clib_spinlock_unlock (&tx_queue->lockp);
+
+ if (PREDICT_FALSE (frame_not_ready))
+ vlib_error_count (vm, node->node_index,
+ AF_PACKET_TX_ERROR_FRAME_NOT_READY, frame_not_ready);
+
+ if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
+ vlib_error_count (vm, node->node_index, AF_PACKET_TX_ERROR_TXRING_OVERRUN,
+ n_left);
+
+ vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
+ return frame->n_vectors;
+}
+
+static void
+af_packet_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ af_packet_if_t *apif =
+ pool_elt_at_index (apm->interfaces, hw->dev_instance);
+
+ /* Shut off redirection */
+ if (node_index == ~0)
+ {
+ apif->per_interface_next_index = node_index;
+ return;
+ }
+
+ apif->per_interface_next_index =
+ vlib_node_add_next (vlib_get_main (), af_packet_input_node.index,
+ node_index);
+}
+
+static void
+af_packet_clear_hw_interface_counters (u32 instance)
+{
+ /* Nothing for now */
+}
+
+static clib_error_t *
+af_packet_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
+ u32 flags)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ af_packet_if_t *apif =
+ pool_elt_at_index (apm->interfaces, hw->dev_instance);
+ u32 hw_flags;
+
+ if (apif->host_if_index < 0)
+ return 0; /* no error */
+
+ apif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ if (apif->is_admin_up)
+ {
+ hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP;
+ vnet_netlink_set_link_state (apif->host_if_index, 1);
+ }
+ else
+ {
+ hw_flags = 0;
+ vnet_netlink_set_link_state (apif->host_if_index, 0);
+ }
+
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+ return 0; /* no error */
+}
+
+static clib_error_t *af_packet_set_mac_address_function
+ (struct vnet_hw_interface_t *hi, const u8 * old_address, const u8 * address)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif =
+ pool_elt_at_index (apm->interfaces, hi->dev_instance);
+ int rv, fd;
+ struct ifreq ifr;
+
+ if (apif->mode == AF_PACKET_IF_MODE_IP)
+ {
+ vlib_log_warn (apm->log_class, "af_packet_%s interface is in IP mode",
+ apif->host_if_name);
+ return clib_error_return (0,
+ " MAC update failed, interface is in IP mode");
+ }
+
+ fd = socket (AF_UNIX, SOCK_DGRAM, 0);
+ if (0 > fd)
+ {
+ vlib_log_warn (apm->log_class, "af_packet_%s could not open socket",
+ apif->host_if_name);
+ return 0;
+ }
+
+ /* if interface is a bridge ignore */
+ if (apif->host_if_index < 0)
+ goto error; /* no error */
+
+ /* use host_if_index in case host name has changed */
+ ifr.ifr_ifindex = apif->host_if_index;
+ if ((rv = ioctl (fd, SIOCGIFNAME, &ifr)) < 0)
+ {
+ vlib_log_warn
+ (apm->log_class,
+ "af_packet_%s ioctl could not retrieve eth name, error: %d",
+ apif->host_if_name, rv);
+ goto error;
+ }
+
+ clib_memcpy (ifr.ifr_hwaddr.sa_data, address, 6);
+ ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
+
+ if ((rv = ioctl (fd, SIOCSIFHWADDR, &ifr)) < 0)
+ {
+ vlib_log_warn (apm->log_class,
+ "af_packet_%s ioctl could not set mac, error: %d",
+ apif->host_if_name, rv);
+ goto error;
+ }
+
+error:
+
+ if (0 <= fd)
+ close (fd);
+
+ return 0; /* no error */
+}
+
+static clib_error_t *
+af_packet_interface_rx_mode_change (vnet_main_t *vnm, u32 hw_if_index, u32 qid,
+ vnet_hw_if_rx_mode mode)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ af_packet_if_t *apif;
+
+ apif = vec_elt_at_index (apm->interfaces, hw->dev_instance);
+
+ if (mode == VNET_HW_IF_RX_MODE_ADAPTIVE)
+ {
+ vlib_log_err (apm->log_class,
+ "af_packet_%s adaptive mode is not supported",
+ apif->host_if_name);
+ return clib_error_return (
+ 0, "af_packet_%s adaptive mode is not supported", apif->host_if_name);
+ }
+
+ af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, qid);
+
+ if (rx_queue->mode != mode)
+ {
+ rx_queue->mode = mode;
+
+ if (mode == VNET_HW_IF_RX_MODE_POLLING)
+ apm->polling_count++;
+ else if (mode == VNET_HW_IF_RX_MODE_INTERRUPT && apm->polling_count > 0)
+ apm->polling_count--;
+ }
+
+ return 0;
+}
+
+VNET_DEVICE_CLASS (af_packet_device_class) = {
+ .name = "af-packet",
+ .format_device_name = format_af_packet_device_name,
+ .format_device = format_af_packet_device,
+ .format_tx_trace = format_af_packet_tx_trace,
+ .tx_function_n_errors = AF_PACKET_TX_N_ERROR,
+ .tx_function_error_strings = af_packet_tx_func_error_strings,
+ .rx_redirect_to_node = af_packet_set_interface_next_node,
+ .clear_counters = af_packet_clear_hw_interface_counters,
+ .admin_up_down_function = af_packet_interface_admin_up_down,
+ .mac_addr_change_function = af_packet_set_mac_address_function,
+ .rx_mode_change_function = af_packet_interface_rx_mode_change,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/af_packet/dir.dox b/src/plugins/af_packet/dir.dox
new file mode 100644
index 00000000000..78991c6d97f
--- /dev/null
+++ b/src/plugins/af_packet/dir.dox
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Doxygen directory documentation */
+
+/**
+@dir
+@brief Host Interface Implementation.
+
+This directory contains the source code for Host Interface driver. The
+Host Interface driver leverages the DPDK AF_PACKET driver.
+
+
+*/
+/*? %%clicmd:group_label Host Interface %% ?*/
+/*? %%syscfg:group_label Host Interface %% ?*/
diff --git a/src/plugins/af_packet/node.c b/src/plugins/af_packet/node.c
new file mode 100644
index 00000000000..279f11c0183
--- /dev/null
+++ b/src/plugins/af_packet/node.c
@@ -0,0 +1,832 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet.c - linux kernel packet interface
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <linux/if_packet.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/feature/feature.h>
+#include <vnet/ethernet/packet.h>
+
+#include <af_packet/af_packet.h>
+#include <vnet/devices/virtio/virtio_std.h>
+
+#define foreach_af_packet_input_error \
+ _ (PARTIAL_PKT, "partial packet") \
+ _ (TIMEDOUT_BLK, "timed out block") \
+ _ (TOTAL_RECV_BLK, "total received block")
+typedef enum
+{
+#define _(f,s) AF_PACKET_INPUT_ERROR_##f,
+ foreach_af_packet_input_error
+#undef _
+ AF_PACKET_INPUT_N_ERROR,
+} af_packet_input_error_t;
+
+static char *af_packet_input_error_strings[] = {
+#define _(n,s) s,
+ foreach_af_packet_input_error
+#undef _
+};
+
+typedef struct
+{
+ u32 next_index;
+ u32 hw_if_index;
+ u16 queue_id;
+ int block;
+ u32 pkt_num;
+ void *block_start;
+ block_desc_t bd;
+ union
+ {
+ tpacket3_hdr_t tph3;
+ tpacket2_hdr_t tph2;
+ };
+ vnet_virtio_net_hdr_t vnet_hdr;
+ u8 is_v3;
+} af_packet_input_trace_t;
+
+static u8 *
+format_af_packet_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ af_packet_input_trace_t *t = va_arg (*args, af_packet_input_trace_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "af_packet: hw_if_index %d rx-queue %u next-index %d",
+ t->hw_if_index, t->queue_id, t->next_index);
+
+ if (t->is_v3)
+ {
+ s = format (
+ s, "\n%Ublock %u:\n%Uaddress %p version %u seq_num %lu pkt_num %u",
+ format_white_space, indent + 2, t->block, format_white_space,
+ indent + 4, t->block_start, t->bd.version, t->bd.hdr.bh1.seq_num,
+ t->pkt_num);
+ s = format (
+ s,
+ "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
+ "\n%Usec 0x%x nsec 0x%x vlan %U"
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ " vlan_tpid %u"
+#endif
+ ,
+ format_white_space, indent + 2, format_white_space, indent + 4,
+ t->tph3.tp_status, t->tph3.tp_len, t->tph3.tp_snaplen, t->tph3.tp_mac,
+ t->tph3.tp_net, format_white_space, indent + 4, t->tph3.tp_sec,
+ t->tph3.tp_nsec, format_ethernet_vlan_tci, t->tph3.hv1.tp_vlan_tci
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ ,
+ t->tph3.hv1.tp_vlan_tpid
+#endif
+ );
+ }
+ else
+ {
+ s = format (
+ s,
+ "\n%Utpacket2_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
+ "\n%Usec 0x%x nsec 0x%x vlan %U"
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ " vlan_tpid %u"
+#endif
+ ,
+ format_white_space, indent + 2, format_white_space, indent + 4,
+ t->tph2.tp_status, t->tph2.tp_len, t->tph2.tp_snaplen, t->tph2.tp_mac,
+ t->tph2.tp_net, format_white_space, indent + 4, t->tph2.tp_sec,
+ t->tph2.tp_nsec, format_ethernet_vlan_tci, t->tph2.tp_vlan_tci
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ ,
+ t->tph2.tp_vlan_tpid
+#endif
+ );
+ }
+
+ s = format (s,
+ "\n%Uvnet-hdr:\n%Uflags 0x%02x gso_type 0x%02x hdr_len %u"
+ "\n%Ugso_size %u csum_start %u csum_offset %u",
+ format_white_space, indent + 2, format_white_space, indent + 4,
+ t->vnet_hdr.flags, t->vnet_hdr.gso_type, t->vnet_hdr.hdr_len,
+ format_white_space, indent + 4, t->vnet_hdr.gso_size,
+ t->vnet_hdr.csum_start, t->vnet_hdr.csum_offset);
+ return s;
+}
+
+always_inline void
+buffer_add_to_chain (vlib_buffer_t *b, vlib_buffer_t *first_b,
+ vlib_buffer_t *prev_b, u32 bi)
+{
+ /* update first buffer */
+ first_b->total_length_not_including_first_buffer += b->current_length;
+
+ /* update previous buffer */
+ prev_b->next_buffer = bi;
+ prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+
+ /* update current buffer */
+ b->next_buffer = ~0;
+}
+
+static_always_inline void
+fill_gso_offload (vlib_buffer_t *b, u32 gso_size, u8 l4_hdr_sz)
+{
+ b->flags |= VNET_BUFFER_F_GSO;
+ vnet_buffer2 (b)->gso_size = gso_size;
+ vnet_buffer2 (b)->gso_l4_hdr_sz = l4_hdr_sz;
+}
+
+static_always_inline void
+fill_cksum_offload (vlib_buffer_t *b, u8 *l4_hdr_sz, u8 is_ip)
+{
+ vnet_buffer_oflags_t oflags = 0;
+ u16 l2hdr_sz = 0;
+ u16 ethertype = 0;
+ u8 l4_proto = 0;
+
+ if (is_ip)
+ {
+ switch (b->data[0] & 0xf0)
+ {
+ case 0x40:
+ ethertype = ETHERNET_TYPE_IP4;
+ break;
+ case 0x60:
+ ethertype = ETHERNET_TYPE_IP6;
+ break;
+ }
+ }
+ else
+ {
+ ethernet_header_t *eth = (ethernet_header_t *) b->data;
+ ethertype = clib_net_to_host_u16 (eth->type);
+ l2hdr_sz = sizeof (ethernet_header_t);
+ if (ethernet_frame_is_tagged (ethertype))
+ {
+ ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eth + 1);
+
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ l2hdr_sz += sizeof (*vlan);
+ if (ethertype == ETHERNET_TYPE_VLAN)
+ {
+ vlan++;
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ l2hdr_sz += sizeof (*vlan);
+ }
+ }
+ }
+
+ vnet_buffer (b)->l2_hdr_offset = 0;
+ vnet_buffer (b)->l3_hdr_offset = l2hdr_sz;
+
+ if (ethertype == ETHERNET_TYPE_IP4)
+ {
+ ip4_header_t *ip4 = (ip4_header_t *) (b->data + l2hdr_sz);
+ vnet_buffer (b)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4);
+ b->flags |= (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+
+ l4_proto = ip4->protocol;
+ }
+ else if (ethertype == ETHERNET_TYPE_IP6)
+ {
+ ip6_header_t *ip6 = (ip6_header_t *) (b->data + l2hdr_sz);
+ b->flags |= (VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+ u16 ip6_hdr_len = sizeof (ip6_header_t);
+
+ if (ip6_ext_hdr (ip6->protocol))
+ {
+ ip6_ext_header_t *p = (void *) (ip6 + 1);
+ ip6_hdr_len += ip6_ext_header_len (p);
+ while (ip6_ext_hdr (p->next_hdr))
+ {
+ ip6_hdr_len += ip6_ext_header_len (p);
+ p = ip6_ext_next_header (p);
+ }
+ l4_proto = p->next_hdr;
+ }
+ else
+ l4_proto = ip6->protocol;
+ vnet_buffer (b)->l4_hdr_offset = l2hdr_sz + ip6_hdr_len;
+ }
+
+ if (l4_proto == IP_PROTOCOL_TCP)
+ {
+ oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
+ tcp_header_t *tcp =
+ (tcp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset);
+ *l4_hdr_sz = tcp_header_bytes (tcp);
+ }
+ else if (l4_proto == IP_PROTOCOL_UDP)
+ {
+ oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
+ *l4_hdr_sz = sizeof (udp_header_t);
+ }
+
+ if (oflags)
+ vnet_buffer_offload_flags_set (b, oflags);
+}
+
+always_inline uword
+af_packet_v3_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, af_packet_if_t *apif,
+ u16 queue_id, u8 is_cksum_gso_enabled)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, queue_id);
+ tpacket3_hdr_t *tph;
+ u32 next_index;
+ u32 n_free_bufs;
+ u32 n_rx_packets = 0;
+ u32 n_rx_bytes = 0;
+ u32 timedout_blk = 0;
+ u32 total = 0;
+ u32 *to_next = 0;
+ u32 block = rx_queue->next_rx_block;
+ u32 block_nr = rx_queue->rx_req->req3.tp_block_nr;
+ u8 *block_start = 0;
+ uword n_trace = vlib_get_trace_count (vm, node);
+ u32 thread_index = vm->thread_index;
+ u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
+ u32 min_bufs = rx_queue->rx_req->req3.tp_frame_size / n_buffer_bytes;
+ u32 num_pkts = 0;
+ u32 rx_frame_offset = 0;
+ block_desc_t *bd = 0;
+ u32 sw_if_index = apif->sw_if_index;
+ u8 is_ip = (apif->mode == AF_PACKET_IF_MODE_IP);
+
+ if (is_ip)
+ next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+ else
+ next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+
+ if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block]))
+ ->hdr.bh1.block_status &
+ TP_STATUS_USER) != 0)
+ {
+ u32 n_required = 0;
+ bd = (block_desc_t *) block_start;
+
+ if (PREDICT_FALSE (rx_queue->is_rx_pending))
+ {
+ num_pkts = rx_queue->num_rx_pkts;
+ rx_frame_offset = rx_queue->rx_frame_offset;
+ rx_queue->is_rx_pending = 0;
+ }
+ else
+ {
+ num_pkts = bd->hdr.bh1.num_pkts;
+ rx_frame_offset = bd->hdr.bh1.offset_to_first_pkt;
+ total++;
+
+ if (TP_STATUS_BLK_TMO & bd->hdr.bh1.block_status)
+ timedout_blk++;
+ }
+
+ n_required = clib_max (num_pkts, VLIB_FRAME_SIZE);
+ n_free_bufs = vec_len (apm->rx_buffers[thread_index]);
+ if (PREDICT_FALSE (n_free_bufs < n_required))
+ {
+ vec_validate (apm->rx_buffers[thread_index],
+ n_required + n_free_bufs - 1);
+ n_free_bufs += vlib_buffer_alloc (
+ vm, &apm->rx_buffers[thread_index][n_free_bufs], n_required);
+ vec_set_len (apm->rx_buffers[thread_index], n_free_bufs);
+ }
+
+ while (num_pkts && (n_free_bufs >= min_bufs))
+ {
+ u32 next0 = next_index;
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (num_pkts && n_left_to_next && (n_free_bufs >= min_bufs))
+ {
+ tph = (tpacket3_hdr_t *) (block_start + rx_frame_offset);
+
+ if (num_pkts > 1)
+ CLIB_PREFETCH (block_start + rx_frame_offset +
+ tph->tp_next_offset,
+ 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+
+ vlib_buffer_t *b0 = 0, *first_b0 = 0, *prev_b0 = 0;
+ vnet_virtio_net_hdr_t *vnet_hdr = 0;
+ u32 data_len = tph->tp_snaplen;
+ u32 offset = 0;
+ u32 bi0 = ~0, first_bi0 = ~0;
+ u8 l4_hdr_sz = 0;
+
+ if (is_cksum_gso_enabled)
+ vnet_hdr =
+ (vnet_virtio_net_hdr_t *) ((u8 *) tph + tph->tp_mac -
+ sizeof (vnet_virtio_net_hdr_t));
+
+ // save current state and return
+ if (PREDICT_FALSE (((data_len / n_buffer_bytes) + 1) >
+ vec_len (apm->rx_buffers[thread_index])))
+ {
+ rx_queue->rx_frame_offset = rx_frame_offset;
+ rx_queue->num_rx_pkts = num_pkts;
+ rx_queue->is_rx_pending = 1;
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ goto done;
+ }
+
+ while (data_len)
+ {
+ /* grab free buffer */
+ u32 last_empty_buffer =
+ vec_len (apm->rx_buffers[thread_index]) - 1;
+ bi0 = apm->rx_buffers[thread_index][last_empty_buffer];
+ vec_set_len (apm->rx_buffers[thread_index],
+ last_empty_buffer);
+ n_free_bufs--;
+
+ /* copy data */
+ u32 bytes_to_copy =
+ data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
+ u32 vlan_len = 0;
+ u32 bytes_copied = 0;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b0->current_data = 0;
+
+ /* Kernel removes VLAN headers, so reconstruct VLAN */
+ if (PREDICT_FALSE (tph->tp_status & TP_STATUS_VLAN_VALID))
+ {
+ if (PREDICT_TRUE (offset == 0))
+ {
+ clib_memcpy_fast (vlib_buffer_get_current (b0),
+ (u8 *) tph + tph->tp_mac,
+ sizeof (ethernet_header_t));
+ ethernet_header_t *eth =
+ vlib_buffer_get_current (b0);
+ ethernet_vlan_header_t *vlan =
+ (ethernet_vlan_header_t *) (eth + 1);
+ vlan->priority_cfi_and_id =
+ clib_host_to_net_u16 (tph->hv1.tp_vlan_tci);
+ vlan->type = eth->type;
+ eth->type =
+ clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
+ vlan_len = sizeof (ethernet_vlan_header_t);
+ bytes_copied = sizeof (ethernet_header_t);
+ }
+ }
+ clib_memcpy_fast (((u8 *) vlib_buffer_get_current (b0)) +
+ bytes_copied + vlan_len,
+ (u8 *) tph + tph->tp_mac + offset +
+ bytes_copied,
+ (bytes_to_copy - bytes_copied));
+
+ /* fill buffer header */
+ b0->current_length = bytes_to_copy + vlan_len;
+
+ if (offset == 0)
+ {
+ b0->total_length_not_including_first_buffer = 0;
+ b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~0;
+ first_b0 = b0;
+ first_bi0 = bi0;
+ if (is_cksum_gso_enabled)
+ {
+ if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
+ fill_cksum_offload (first_b0, &l4_hdr_sz, is_ip);
+ if (vnet_hdr->gso_type & (VIRTIO_NET_HDR_GSO_TCPV4 |
+ VIRTIO_NET_HDR_GSO_TCPV6))
+ fill_gso_offload (first_b0, vnet_hdr->gso_size,
+ l4_hdr_sz);
+ }
+ }
+ else
+ buffer_add_to_chain (b0, first_b0, prev_b0, bi0);
+
+ prev_b0 = b0;
+ offset += bytes_to_copy;
+ data_len -= bytes_to_copy;
+ }
+ n_rx_packets++;
+ n_rx_bytes += tph->tp_snaplen;
+ to_next[0] = first_bi0;
+ to_next += 1;
+ n_left_to_next--;
+
+ /* drop partial packets */
+ if (PREDICT_FALSE (tph->tp_len != tph->tp_snaplen))
+ {
+ next0 = VNET_DEVICE_INPUT_NEXT_DROP;
+ first_b0->error =
+ node->errors[AF_PACKET_INPUT_ERROR_PARTIAL_PKT];
+ }
+ else
+ {
+ if (PREDICT_FALSE (apif->mode == AF_PACKET_IF_MODE_IP))
+ {
+ switch (first_b0->data[0] & 0xf0)
+ {
+ case 0x40:
+ next0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+ break;
+ case 0x60:
+ next0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+ break;
+ default:
+ next0 = VNET_DEVICE_INPUT_NEXT_DROP;
+ break;
+ }
+ if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
+ next0 = apif->per_interface_next_index;
+ }
+ else
+ {
+ next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
+ next0 = apif->per_interface_next_index;
+ /* redirect if feature path enabled */
+ vnet_feature_start_device_input (sw_if_index, &next0,
+ first_b0);
+ }
+ }
+
+ /* trace */
+ if (PREDICT_FALSE (n_trace > 0 &&
+ vlib_trace_buffer (vm, node, next0, first_b0,
+ /* follow_chain */ 0)))
+ {
+ af_packet_input_trace_t *tr;
+ vlib_set_trace_count (vm, node, --n_trace);
+ tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
+ tr->is_v3 = 1;
+ tr->next_index = next0;
+ tr->hw_if_index = apif->hw_if_index;
+ tr->queue_id = queue_id;
+ tr->block = block;
+ tr->block_start = bd;
+ tr->pkt_num = bd->hdr.bh1.num_pkts - num_pkts;
+ clib_memcpy_fast (&tr->bd, bd, sizeof (block_desc_t));
+ clib_memcpy_fast (&tr->tph3, tph, sizeof (tpacket3_hdr_t));
+ if (is_cksum_gso_enabled)
+ clib_memcpy_fast (&tr->vnet_hdr, vnet_hdr,
+ sizeof (vnet_virtio_net_hdr_t));
+ else
+ clib_memset_u8 (&tr->vnet_hdr, 0,
+ sizeof (vnet_virtio_net_hdr_t));
+ }
+
+ /* enque and take next packet */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, first_bi0,
+ next0);
+
+ /* next packet */
+ num_pkts--;
+ rx_frame_offset += tph->tp_next_offset;
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ if (PREDICT_TRUE (num_pkts == 0))
+ {
+ bd->hdr.bh1.block_status = TP_STATUS_KERNEL;
+ block = (block + 1) % block_nr;
+ }
+ else
+ {
+ rx_queue->rx_frame_offset = rx_frame_offset;
+ rx_queue->num_rx_pkts = num_pkts;
+ rx_queue->is_rx_pending = 1;
+ }
+ }
+
+ rx_queue->next_rx_block = block;
+
+done:
+
+ if (apm->polling_count == 0)
+ {
+ if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block]))
+ ->hdr.bh1.block_status &
+ TP_STATUS_USER) != 0)
+ vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_POLLING);
+ else
+ vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_INTERRUPT);
+ }
+
+ vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TOTAL_RECV_BLK,
+ total);
+ vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TIMEDOUT_BLK,
+ timedout_blk);
+
+ vlib_increment_combined_counter
+ (vnet_get_main ()->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
+
+ vnet_device_increment_rx_packets (thread_index, n_rx_packets);
+ return n_rx_packets;
+}
+
+always_inline uword
+af_packet_v2_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, af_packet_if_t *apif,
+ u16 queue_id, u8 is_cksum_gso_enabled)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, queue_id);
+ tpacket2_hdr_t *tph;
+ u32 next_index;
+ u32 block = 0;
+ u32 rx_frame;
+ u32 n_free_bufs;
+ u32 n_rx_packets = 0;
+ u32 n_rx_bytes = 0;
+ u32 *to_next = 0;
+ u32 frame_size = rx_queue->rx_req->req.tp_frame_size;
+ u32 frame_num = rx_queue->rx_req->req.tp_frame_nr;
+ u8 *block_start = rx_queue->rx_ring[block];
+ uword n_trace = vlib_get_trace_count (vm, node);
+ u32 thread_index = vm->thread_index;
+ u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
+ u32 min_bufs = rx_queue->rx_req->req.tp_frame_size / n_buffer_bytes;
+ u32 sw_if_index = apif->sw_if_index;
+ u8 is_ip = (apif->mode == AF_PACKET_IF_MODE_IP);
+
+ if (is_ip)
+ next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+ else
+ next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+
+ n_free_bufs = vec_len (apm->rx_buffers[thread_index]);
+ if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
+ {
+ vec_validate (apm->rx_buffers[thread_index],
+ VLIB_FRAME_SIZE + n_free_bufs - 1);
+ n_free_bufs += vlib_buffer_alloc (
+ vm, &apm->rx_buffers[thread_index][n_free_bufs], VLIB_FRAME_SIZE);
+ vec_set_len (apm->rx_buffers[thread_index], n_free_bufs);
+ }
+
+ rx_frame = rx_queue->next_rx_frame;
+ tph = (tpacket2_hdr_t *) (block_start + rx_frame * frame_size);
+ while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs))
+ {
+ vlib_buffer_t *b0 = 0, *first_b0 = 0, *prev_b0 = 0;
+ u32 next0 = next_index;
+
+ u32 n_left_to_next;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs) &&
+ n_left_to_next)
+ {
+ vnet_virtio_net_hdr_t *vnet_hdr = 0;
+ u32 data_len = tph->tp_snaplen;
+ u32 offset = 0;
+ u32 bi0 = 0, first_bi0 = 0;
+ u8 l4_hdr_sz = 0;
+
+ if (is_cksum_gso_enabled)
+ vnet_hdr =
+ (vnet_virtio_net_hdr_t *) ((u8 *) tph + tph->tp_mac -
+ sizeof (vnet_virtio_net_hdr_t));
+ while (data_len)
+ {
+ /* grab free buffer */
+ u32 last_empty_buffer =
+ vec_len (apm->rx_buffers[thread_index]) - 1;
+ bi0 = apm->rx_buffers[thread_index][last_empty_buffer];
+ b0 = vlib_get_buffer (vm, bi0);
+ vec_set_len (apm->rx_buffers[thread_index], last_empty_buffer);
+ n_free_bufs--;
+
+ /* copy data */
+ u32 bytes_to_copy =
+ data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
+ u32 vlan_len = 0;
+ u32 bytes_copied = 0;
+ b0->current_data = 0;
+ /* Kernel removes VLAN headers, so reconstruct VLAN */
+ if (PREDICT_FALSE (tph->tp_status & TP_STATUS_VLAN_VALID))
+ {
+ if (PREDICT_TRUE (offset == 0))
+ {
+ clib_memcpy_fast (vlib_buffer_get_current (b0),
+ (u8 *) tph + tph->tp_mac,
+ sizeof (ethernet_header_t));
+ ethernet_header_t *eth = vlib_buffer_get_current (b0);
+ ethernet_vlan_header_t *vlan =
+ (ethernet_vlan_header_t *) (eth + 1);
+ vlan->priority_cfi_and_id =
+ clib_host_to_net_u16 (tph->tp_vlan_tci);
+ vlan->type = eth->type;
+ eth->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
+ vlan_len = sizeof (ethernet_vlan_header_t);
+ bytes_copied = sizeof (ethernet_header_t);
+ }
+ }
+ clib_memcpy_fast (((u8 *) vlib_buffer_get_current (b0)) +
+ bytes_copied + vlan_len,
+ (u8 *) tph + tph->tp_mac + offset +
+ bytes_copied,
+ (bytes_to_copy - bytes_copied));
+
+ /* fill buffer header */
+ b0->current_length = bytes_to_copy + vlan_len;
+
+ if (offset == 0)
+ {
+ b0->total_length_not_including_first_buffer = 0;
+ b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~0;
+ first_bi0 = bi0;
+ first_b0 = vlib_get_buffer (vm, first_bi0);
+
+ if (is_cksum_gso_enabled)
+ {
+ if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
+ fill_cksum_offload (first_b0, &l4_hdr_sz, is_ip);
+ if (vnet_hdr->gso_type & (VIRTIO_NET_HDR_GSO_TCPV4 |
+ VIRTIO_NET_HDR_GSO_TCPV6))
+ fill_gso_offload (first_b0, vnet_hdr->gso_size,
+ l4_hdr_sz);
+ }
+ }
+ else
+ buffer_add_to_chain (b0, first_b0, prev_b0, bi0);
+
+ prev_b0 = b0;
+ offset += bytes_to_copy;
+ data_len -= bytes_to_copy;
+ }
+ n_rx_packets++;
+ n_rx_bytes += tph->tp_snaplen;
+ to_next[0] = first_bi0;
+ to_next += 1;
+ n_left_to_next--;
+
+ /* drop partial packets */
+ if (PREDICT_FALSE (tph->tp_len != tph->tp_snaplen))
+ {
+ next0 = VNET_DEVICE_INPUT_NEXT_DROP;
+ first_b0->error =
+ node->errors[AF_PACKET_INPUT_ERROR_PARTIAL_PKT];
+ }
+ else
+ {
+ if (PREDICT_FALSE (is_ip))
+ {
+ switch (first_b0->data[0] & 0xf0)
+ {
+ case 0x40:
+ next0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+ break;
+ case 0x60:
+ next0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+ break;
+ default:
+ next0 = VNET_DEVICE_INPUT_NEXT_DROP;
+ break;
+ }
+ if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
+ next0 = apif->per_interface_next_index;
+ }
+ else
+ {
+ next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
+ next0 = apif->per_interface_next_index;
+ /* redirect if feature path enabled */
+ vnet_feature_start_device_input (sw_if_index, &next0,
+ first_b0);
+ }
+ }
+
+ /* trace */
+ if (PREDICT_FALSE (n_trace > 0 &&
+ vlib_trace_buffer (vm, node, next0, first_b0,
+ /* follow_chain */ 0)))
+ {
+ af_packet_input_trace_t *tr;
+ vlib_set_trace_count (vm, node, --n_trace);
+ tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
+ tr->is_v3 = 0;
+ tr->next_index = next0;
+ tr->hw_if_index = apif->hw_if_index;
+ tr->queue_id = queue_id;
+ clib_memcpy_fast (&tr->tph2, tph, sizeof (struct tpacket2_hdr));
+ if (is_cksum_gso_enabled)
+ clib_memcpy_fast (&tr->vnet_hdr, vnet_hdr,
+ sizeof (vnet_virtio_net_hdr_t));
+ else
+ clib_memset_u8 (&tr->vnet_hdr, 0,
+ sizeof (vnet_virtio_net_hdr_t));
+ }
+
+ /* enque and take next packet */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, first_bi0, next0);
+
+ /* next packet */
+ tph->tp_status = TP_STATUS_KERNEL;
+ rx_frame = (rx_frame + 1) % frame_num;
+ tph = (struct tpacket2_hdr *) (block_start + rx_frame * frame_size);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ rx_queue->next_rx_frame = rx_frame;
+
+ vlib_increment_combined_counter (
+ vnet_get_main ()->interface_main.combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_RX,
+ vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
+
+ vnet_device_increment_rx_packets (thread_index, n_rx_packets);
+ return n_rx_packets;
+}
+
+always_inline uword
+af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, af_packet_if_t *apif,
+ u16 queue_id, u8 is_cksum_gso_enabled)
+
+{
+ if (apif->version == TPACKET_V3)
+ return af_packet_v3_device_input_fn (vm, node, frame, apif, queue_id,
+ is_cksum_gso_enabled);
+ else
+ return af_packet_v2_device_input_fn (vm, node, frame, apif, queue_id,
+ is_cksum_gso_enabled);
+}
+
+VLIB_NODE_FN (af_packet_input_node) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_rx_packets = 0;
+ af_packet_main_t *apm = &af_packet_main;
+ vnet_hw_if_rxq_poll_vector_t *pv;
+ pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
+ for (int i = 0; i < vec_len (pv); i++)
+ {
+ af_packet_if_t *apif;
+ apif = vec_elt_at_index (apm->interfaces, pv[i].dev_instance);
+ if (apif->is_admin_up)
+ {
+ if (apif->is_cksum_gso_enabled)
+ n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif,
+ pv[i].queue_id, 1);
+ else
+ n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif,
+ pv[i].queue_id, 0);
+ }
+ }
+ return n_rx_packets;
+}
+
+VLIB_REGISTER_NODE (af_packet_input_node) = {
+ .name = "af-packet-input",
+ .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
+ .sibling_of = "device-input",
+ .format_trace = format_af_packet_input_trace,
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .n_errors = AF_PACKET_INPUT_N_ERROR,
+ .error_strings = af_packet_input_error_strings,
+};
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/af_packet/plugin.c b/src/plugins/af_packet/plugin.c
new file mode 100644
index 00000000000..ddad52404c7
--- /dev/null
+++ b/src/plugins/af_packet/plugin.c
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "af-packet",
+};
diff --git a/src/plugins/af_xdp/CMakeLists.txt b/src/plugins/af_xdp/CMakeLists.txt
index cbe96aa59dd..fd7ee4e835b 100644
--- a/src/plugins/af_xdp/CMakeLists.txt
+++ b/src/plugins/af_xdp/CMakeLists.txt
@@ -11,36 +11,37 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-vpp_find_path(BPF_INCLUDE_DIR NAMES bpf/xsk.h)
-if (NOT BPF_INCLUDE_DIR)
- message(WARNING "libbpf headers not found - af_xdp plugin disabled")
+vpp_find_path(XDP_INCLUDE_DIR NAMES xdp/xsk.h)
+if (NOT XDP_INCLUDE_DIR)
+ message(WARNING "libxdp headers not found - af_xdp plugin disabled")
return()
endif()
set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS TRUE)
+vpp_plugin_find_library(af_xdp XDP_LIB libxdp.a)
vpp_plugin_find_library(af_xdp BPF_LIB libbpf.a)
-vpp_plugin_find_library(af_xdp BPF_ELF_LIB elf)
-vpp_plugin_find_library(af_xdp BPF_Z_LIB z)
-if (NOT BPF_LIB OR NOT BPF_ELF_LIB OR NOT BPF_Z_LIB)
+vpp_plugin_find_library(af_xdp ELF_LIB elf)
+vpp_plugin_find_library(af_xdp Z_LIB z)
+if (NOT XDP_LIB OR NOT BPF_LIB OR NOT ELF_LIB OR NOT Z_LIB)
message(WARNING "af_xdp plugin - missing libraries - af_xdp plugin disabled")
return()
endif()
set(CMAKE_REQUIRED_FLAGS "-fPIC")
-set(CMAKE_REQUIRED_INCLUDES "${BPF_INCLUDE_DIR}")
-set(CMAKE_REQUIRED_LIBRARIES "${BPF_LIB}" "${BPF_ELF_LIB}" "${BPF_Z_LIB}")
+set(CMAKE_REQUIRED_INCLUDES "${XDP_INCLUDE_DIR}")
+set(CMAKE_REQUIRED_LIBRARIES "${XDP_LIB}" "${BPF_LIB}" "${ELF_LIB}" "${Z_LIB}")
CHECK_C_SOURCE_COMPILES("
-#include <bpf/xsk.h>
+#include <xdp/xsk.h>
int main(void)
{
return xsk_socket__create (0, 0, 0, 0, 0, 0, 0);
-}" BPF_COMPILES_CHECK)
-if (NOT BPF_COMPILES_CHECK)
- message(WARNING "af_xdp plugins - no working libbpf found - af_xdp plugin disabled")
+}" XDP_COMPILES_CHECK)
+if (NOT XDP_COMPILES_CHECK)
+message(WARNING "af_xdp plugins - no working libxdp found - af_xdp plugin disabled")
return()
endif()
-include_directories(${BPF_INCLUDE_DIR})
+include_directories(${XDP_INCLUDE_DIR})
add_vpp_plugin(af_xdp
SOURCES
@@ -65,7 +66,10 @@ add_vpp_plugin(af_xdp
test_api.c
LINK_LIBRARIES
+ ${XDP_LIB}
${BPF_LIB}
- ${BPF_ELF_LIB}
- ${BPF_Z_LIB}
+ ${ELF_LIB}
+ ${Z_LIB}
+
+ SUPPORTED_OS_LIST Linux
)
diff --git a/src/plugins/af_xdp/af_xdp.api b/src/plugins/af_xdp/af_xdp.api
index c6716123703..4c2908e2037 100644
--- a/src/plugins/af_xdp/af_xdp.api
+++ b/src/plugins/af_xdp/af_xdp.api
@@ -15,7 +15,7 @@
*------------------------------------------------------------------
*/
-option version = "0.2.0";
+option version = "1.0.0";
import "vnet/interface_types.api";
enum af_xdp_mode
@@ -57,7 +57,39 @@ define af_xdp_create
vl_api_af_xdp_flag_t flags [default=0];
string prog[256];
option vat_help = "<host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] [prog pathname] [zero-copy|no-zero-copy] [no-syscall-lock]";
- option status="in_progress";
+ option deprecated;
+};
+
+/** \brief
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param host_if - Linux netdev interface name
+ @param name - new af_xdp interface name (optional)
+ @param rxq_num - number of receive queues. 65535 can be used as special value to request all available queues (optional)
+ @param rxq_size - receive queue size (optional)
+ @param txq_size - transmit queue size (optional)
+ @param mode - operation mode (optional)
+ @param flags - flags (optional)
+ @param prog - eBPF program path (optional)
+ @param namespace - netns of nic (optional)
+*/
+
+define af_xdp_create_v2
+{
+ u32 client_index;
+ u32 context;
+
+ string host_if[64];
+ string name[64];
+ u16 rxq_num [default=1];
+ u16 rxq_size [default=0];
+ u16 txq_size [default=0];
+ vl_api_af_xdp_mode_t mode [default=0];
+ vl_api_af_xdp_flag_t flags [default=0];
+ string prog[256];
+ string namespace[64];
+ option vat_help = "<host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] [prog pathname] [netns ns] [zero-copy|no-zero-copy] [no-syscall-lock]";
+ option deprecated;
};
/** \brief
@@ -71,7 +103,21 @@ define af_xdp_create_reply
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
- option status="in_progress";
+ option deprecated;
+};
+
+/** \brief
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+ @param sw_if_index - software index for the new af_xdp interface
+*/
+
+define af_xdp_create_v2_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+ option deprecated;
};
/** \brief
@@ -80,6 +126,50 @@ define af_xdp_create_reply
@param sw_if_index - interface index
*/
+/** \brief
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param host_if - Linux netdev interface name
+ @param name - new af_xdp interface name (optional)
+ @param rxq_num - number of receive queues. 65535 can be used as special value to request all available queues (optional)
+ @param rxq_size - receive queue size (optional)
+ @param txq_size - transmit queue size (optional)
+ @param mode - operation mode (optional)
+ @param flags - flags (optional)
+ @param prog - eBPF program path (optional)
+ @param netns - netns of nic (optional)
+*/
+
+autoendian define af_xdp_create_v3
+{
+ u32 client_index;
+ u32 context;
+
+ string host_if[64];
+ string name[64];
+ u16 rxq_num [default=1];
+ u16 rxq_size [default=0];
+ u16 txq_size [default=0];
+ vl_api_af_xdp_mode_t mode [default=0];
+ vl_api_af_xdp_flag_t flags [default=0];
+ string prog[256];
+ string netns[64];
+ option vat_help = "<host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] [prog pathname] [netns ns] [zero-copy|no-zero-copy] [no-syscall-lock]";
+};
+
+/** \brief
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+ @param sw_if_index - software index for the new af_xdp interface
+*/
+
+autoendian define af_xdp_create_v3_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
autoreply define af_xdp_delete
{
u32 client_index;
@@ -87,7 +177,6 @@ autoreply define af_xdp_delete
vl_api_interface_index_t sw_if_index;
option vat_help = "<sw_if_index index>";
- option status="in_progress";
};
/*
diff --git a/src/plugins/af_xdp/af_xdp.h b/src/plugins/af_xdp/af_xdp.h
index 825a3fb29fd..cf364fc86a8 100644
--- a/src/plugins/af_xdp/af_xdp.h
+++ b/src/plugins/af_xdp/af_xdp.h
@@ -20,7 +20,7 @@
#include <vlib/log.h>
#include <vnet/interface.h>
-#include <bpf/xsk.h>
+#include <xdp/xsk.h>
#define AF_XDP_NUM_RX_QUEUES_ALL ((u16)-1)
@@ -86,6 +86,10 @@ typedef struct
struct xsk_ring_prod tx;
struct xsk_ring_cons cq;
int xsk_fd;
+
+ /* fields below are accessed in control-plane only (cold) */
+
+ u32 queue_index;
} af_xdp_txq_t;
typedef struct
@@ -113,6 +117,8 @@ typedef struct
u8 rxq_num;
+ char *netns;
+
struct xsk_umem **umem;
struct xsk_socket **xsk;
@@ -149,6 +155,7 @@ typedef struct
char *linux_ifname;
char *name;
char *prog;
+ char *netns;
af_xdp_mode_t mode;
af_xdp_create_flag_t flags;
u32 rxq_size;
diff --git a/src/plugins/af_xdp/af_xdp_doc.md b/src/plugins/af_xdp/af_xdp_doc.md
deleted file mode 100644
index f5859dbb901..00000000000
--- a/src/plugins/af_xdp/af_xdp_doc.md
+++ /dev/null
@@ -1,129 +0,0 @@
-# AF_XDP Ethernet driver {#af_xdp_doc}
-
-This driver relies on Linux AF_XDP socket to rx/tx Ethernet packets.
-
-## Maturity level
-Under development: it should work, but has not been thoroughly tested.
-
-## Features
- - copy and zero-copy mode
- - multiqueue
- - API
- - custom eBPF program
- - polling, interrupt and adaptive mode
-
-## Known limitations
-
-### MTU
-Because of AF_XDP restrictions, the MTU is limited to below PAGE_SIZE
-(4096-bytes on most systems) minus 256-bytes, and they are additional
-limitations depending upon specific Linux device drivers.
-As a rule of thumb, a MTU of 3000-bytes or less should be safe.
-
-### Number of buffers
-Furthermore, upon UMEM creation, the kernel allocates a
-physically-contiguous structure, whose size is proportional to the number
-of 4KB pages contained in the UMEM. That allocation might fail when
-the number of buffers allocated by VPP is too high. That number can be
-controlled with the `buffers { buffers-per-numa }` configuration option.
-Finally, note that because of this limitation, this plugin is unlikely
-to be compatible with the use of 1GB hugepages.
-
-### Interrupt mode
-Interrupt and adaptive mode are supported but is limited by default to single
-threaded (no worker) configurations because of a kernel limitation prior to
-5.6. You can bypass the limitation at interface creation time by adding the
-`no-syscall-lock` parameter, but you must be sure that your kernel can
-support it, otherwise you will experience double-frees.
-See
-https://lore.kernel.org/bpf/BYAPR11MB365382C5DB1E5FCC53242609C1549@BYAPR11MB3653.namprd11.prod.outlook.com/
-for more details.
-
-### Mellanox
-When setting the number of queues on Mellanox NIC with `ethtool -L`, you must
-use twice the amount of configured queues: it looks like the Linux driver will
-create separate RX queues and TX queues (but all queues can be used for both
-RX and TX, the NIC will just not sent any packet on "pure" TX queues.
-Confused? So I am.). For example if you set `combined 2` you will effectively
-have to create 4 rx queues in AF_XDP if you want to be sure to receive all
-packets.
-
-## Requirements
-This drivers supports Linux kernel 5.4 and later. Kernels older than 5.4 are
-missing unaligned buffers support.
-
-The Linux kernel interface must be up and have enough queues before
-creating the VPP AF_XDP interface, otherwise Linux will deny creating
-the AF_XDP socket.
-The AF_XDP interface will claim NIC RX queue starting from 0, up to the
-requested number of RX queues (only 1 by default). It means all packets
-destined to NIC RX queue `[0, num_rx_queues[` will be received by the
-AF_XDP interface, and only them. Depending on your configuration, there
-will usually be several RX queues (typically 1 per core) and packets are
-spread accross queues by RSS. In order to receive consistent traffic,
-you **must** program the NIC dispatching accordingly. The simplest way
-to get all the packets is to specify `num-rx-queues all` to grab all
-available queues or to reconfigure the Linux kernel driver to use only
-`num_rx_queues` RX queues (ie all NIC queues will be associated with
-the AF_XDP socket):
-```
-~# ethtool -L <iface> combined <num_rx_queues>
-```
-Additionally, the VPP AF_XDP interface will use a MAC address generated at
-creation time instead of the Linux kernel interface MAC. As Linux kernel
-interface are not in promiscuous mode by default (see below) this will
-results in a useless configuration where the VPP AF_XDP interface only
-receives packets destined to the Linux kernel interface MAC just to drop
-them because the destination MAC does not match VPP AF_XDP interface MAC.
-If you want to use the Linux interface MAC for the VPP AF_XDP interface,
-you can change it afterwards in VPP:
-```
-~# vppctl set int mac address <iface> <mac>
-```
-Finally, if you wish to receive all packets and not only the packets
-destined to the Linux kernel interface MAC you need to set the Linux
-kernel interface in promiscuous mode:
-```
-~# ip link set dev <iface> promisc on
-```
-
-## Security considerations
-When creating an AF_XDP interface, it will receive all packets arriving
-to the NIC RX queue `[0, num_rx_queues[`. You need to configure the Linux
-kernel NIC driver properly to ensure that only intented packets will
-arrive in this queue. There is no way to filter the packets after-the-fact
-using eg. netfilter or eBPF.
-
-## Quickstart
-1. Put the Linux kernel interface up and in promiscuous mode:
-```
-~# ip l set dev enp216s0f0 promisc on up
-```
-2. Create the AF_XDP interface:
-```
-~# vppctl create int af_xdp host-if enp216s0f0 num-rx-queues all
-```
-3. Use the interface as usual, eg.:
-```
-~# vppctl set int ip addr enp216s0f0/0 1.1.1.1/24
-~# vppctl set int st enp216s0f0/0 up
-~# vppctl ping 1.1.1.100`
-```
-
-## Custom eBPF XDP program
-This driver relies on libbpf and as such relies on the `xsks_map` eBPF
-map. The default behavior is to use the XDP program already attached
-to the interface if any, otherwise load the default one.
-You can request to load a custom XDP program with the `prog` option when
-creating the interface in VPP:
-```
-~# vppctl create int af_xdp host-if enp216s0f0 num-rx-queues 4 prog extras/bpf/af_xdp.bpf.o
-```
-In that case it will replace any previously attached program. A custom
-XDP program example is provided in `extras/bpf/`.
-
-## Performance consideration
-AF_XDP relies on the Linux kernel NIC driver to rx/tx packets. To reach
-high-performance (10's MPPS), the Linux kernel NIC driver must support
-zero-copy mode and its RX path must run on a dedicated core in the NUMA
-where the NIC is physically connected.
diff --git a/src/plugins/af_xdp/af_xdp_doc.rst b/src/plugins/af_xdp/af_xdp_doc.rst
new file mode 100644
index 00000000000..de951340a2d
--- /dev/null
+++ b/src/plugins/af_xdp/af_xdp_doc.rst
@@ -0,0 +1,164 @@
+AF_XDP device driver
+====================
+
+This driver relies on Linux AF_XDP socket to rx/tx Ethernet packets.
+
+Maturity level
+--------------
+
+Under development: it should work, but has not been thoroughly tested.
+
+Features
+--------
+
+- copy and zero-copy mode
+- multiqueue
+- API
+- custom eBPF program
+- polling, interrupt and adaptive mode
+
+Known limitations
+-----------------
+
+MTU
+~~~
+
+Because of AF_XDP restrictions, the MTU is limited to below PAGE_SIZE
+(4096-bytes on most systems) minus 256-bytes, and they are additional
+limitations depending upon specific Linux device drivers. As a rule of
+thumb, a MTU of 3000-bytes or less should be safe.
+
+Number of buffers
+~~~~~~~~~~~~~~~~~
+
+Furthermore, upon UMEM creation, the kernel allocates a
+physically-contiguous structure, whose size is proportional to the
+number of 4KB pages contained in the UMEM. That allocation might fail
+when the number of buffers allocated by VPP is too high. That number can
+be controlled with the ``buffers { buffers-per-numa }`` configuration
+option. Finally, note that because of this limitation, this plugin is
+unlikely to be compatible with the use of 1GB hugepages.
+
+Interrupt mode
+~~~~~~~~~~~~~~
+
+Interrupt and adaptive mode are supported but is limited by default to
+single threaded (no worker) configurations because of a kernel
+limitation prior to 5.6. You can bypass the limitation at interface
+creation time by adding the ``no-syscall-lock`` parameter, but you must
+be sure that your kernel can support it, otherwise you will experience
+double-frees. See
+https://lore.kernel.org/bpf/BYAPR11MB365382C5DB1E5FCC53242609C1549@BYAPR11MB3653.namprd11.prod.outlook.com/
+for more details.
+
+Mellanox
+~~~~~~~~
+
+When setting the number of queues on Mellanox NIC with ``ethtool -L``,
+you must use twice the amount of configured queues: it looks like the
+Linux driver will create separate RX queues and TX queues (but all
+queues can be used for both RX and TX, the NIC will just not sent any
+packet on “pure” TX queues. Confused? So I am.). For example if you set
+``combined 2`` you will effectively have to create 4 rx queues in AF_XDP
+if you want to be sure to receive all packets.
+
+Requirements
+------------
+
+This drivers supports Linux kernel 5.4 and later. Kernels older than 5.4
+are missing unaligned buffers support.
+
+The Linux kernel interface must be up and have enough queues before
+creating the VPP AF_XDP interface, otherwise Linux will deny creating
+the AF_XDP socket. The AF_XDP interface will claim NIC RX queue starting
+from 0, up to the requested number of RX queues (only 1 by default). It
+means all packets destined to NIC RX queue ``[0, num_rx_queues[`` will
+be received by the AF_XDP interface, and only them. Depending on your
+configuration, there will usually be several RX queues (typically 1 per
+core) and packets are spread across queues by RSS. In order to receive
+consistent traffic, you **must** program the NIC dispatching
+accordingly. The simplest way to get all the packets is to specify
+``num-rx-queues all`` to grab all available queues or to reconfigure the
+Linux kernel driver to use only ``num_rx_queues`` RX queues (i.e. all NIC
+queues will be associated with the AF_XDP socket):
+
+::
+
+ ~# ethtool -L <iface> combined <num_rx_queues>
+
+Additionally, the VPP AF_XDP interface will use a MAC address generated
+at creation time instead of the Linux kernel interface MAC. As Linux
+kernel interface are not in promiscuous mode by default (see below) this
+will results in a useless configuration where the VPP AF_XDP interface
+only receives packets destined to the Linux kernel interface MAC just to
+drop them because the destination MAC does not match VPP AF_XDP
+interface MAC. If you want to use the Linux interface MAC for the VPP
+AF_XDP interface, you can change it afterwards in VPP:
+
+::
+
+ ~# vppctl set int mac address <iface> <mac>
+
+Finally, if you wish to receive all packets and not only the packets
+destined to the Linux kernel interface MAC you need to set the Linux
+kernel interface in promiscuous mode:
+
+::
+
+ ~# ip link set dev <iface> promisc on
+
+Security considerations
+-----------------------
+
+When creating an AF_XDP interface, it will receive all packets arriving
+to the NIC RX queue ``[0, num_rx_queues[``. You need to configure the
+Linux kernel NIC driver properly to ensure that only intended packets
+will arrive in this queue. There is no way to filter the packets
+after-the-fact using e.g. netfilter or eBPF.
+
+Quickstart
+----------
+
+1. Put the Linux kernel interface up and in promiscuous mode:
+
+::
+
+ ~# ip l set dev enp216s0f0 promisc on up
+
+2. Create the AF_XDP interface:
+
+::
+
+ ~# vppctl create int af_xdp host-if enp216s0f0 num-rx-queues all
+
+3. Use the interface as usual, e.g.:
+
+::
+
+ ~# vppctl set int ip addr enp216s0f0/0 1.1.1.1/24
+ ~# vppctl set int st enp216s0f0/0 up
+ ~# vppctl ping 1.1.1.100`
+
+Custom eBPF XDP program
+-----------------------
+
+This driver relies on libbpf and as such relies on the ``xsks_map`` eBPF
+map. The default behavior is to use the XDP program already attached to
+the interface if any, otherwise load the default one. You can request to
+load a custom XDP program with the ``prog`` option when creating the
+interface in VPP:
+
+::
+
+ ~# vppctl create int af_xdp host-if enp216s0f0 num-rx-queues 4 prog extras/bpf/af_xdp.bpf.o
+
+In that case it will replace any previously attached program. A custom
+XDP program example is provided in ``extras/bpf/``.
+
+Performance consideration
+-------------------------
+
+AF_XDP relies on the Linux kernel NIC driver to rx/tx packets. To reach
+high-performance (10’s MPPS), the Linux kernel NIC driver must support
+zero-copy mode and its RX path must run on a dedicated core in the NUMA
+where the NIC is physically connected.
diff --git a/src/plugins/af_xdp/api.c b/src/plugins/af_xdp/api.c
index 1864c4c2ee9..3e9a3fe2578 100644
--- a/src/plugins/af_xdp/api.c
+++ b/src/plugins/af_xdp/api.c
@@ -27,6 +27,7 @@
#include <af_xdp/af_xdp.api_enum.h>
#include <af_xdp/af_xdp.api_types.h>
+#define REPLY_MSG_ID_BASE (rm->msg_id_base)
#include <vlibapi/api_helper_macros.h>
static af_xdp_mode_t
@@ -78,12 +79,72 @@ vl_api_af_xdp_create_t_handler (vl_api_af_xdp_create_t * mp)
af_xdp_create_if (vm, &args);
rv = args.rv;
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_AF_XDP_CREATE_REPLY + rm->msg_id_base,
+ REPLY_MACRO2 (VL_API_AF_XDP_CREATE_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
+}
+
+static void
+vl_api_af_xdp_create_v2_t_handler (vl_api_af_xdp_create_v2_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ af_xdp_main_t *rm = &af_xdp_main;
+ vl_api_af_xdp_create_v2_reply_t *rmp;
+ af_xdp_create_if_args_t args;
+ int rv;
+
+ clib_memset (&args, 0, sizeof (af_xdp_create_if_args_t));
+
+ args.linux_ifname = mp->host_if[0] ? (char *) mp->host_if : 0;
+ args.name = mp->name[0] ? (char *) mp->name : 0;
+ args.prog = mp->prog[0] ? (char *) mp->prog : 0;
+ args.netns = mp->namespace[0] ? (char *) mp->namespace : 0;
+ args.mode = af_xdp_api_mode (mp->mode);
+ args.flags = af_xdp_api_flags (mp->flags);
+ args.rxq_size = ntohs (mp->rxq_size);
+ args.txq_size = ntohs (mp->txq_size);
+ args.rxq_num = ntohs (mp->rxq_num);
+
+ af_xdp_create_if (vm, &args);
+ rv = args.rv;
+
+ /* clang-format off */
+ REPLY_MACRO2 (VL_API_AF_XDP_CREATE_V2_REPLY,
({
rmp->sw_if_index = ntohl (args.sw_if_index);
}));
- /* *INDENT-ON* */
+ /* clang-format on */
+}
+
+static void
+vl_api_af_xdp_create_v3_t_handler (vl_api_af_xdp_create_v3_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ af_xdp_main_t *rm = &af_xdp_main;
+ vl_api_af_xdp_create_v3_reply_t *rmp;
+ af_xdp_create_if_args_t args;
+ int rv;
+
+ clib_memset (&args, 0, sizeof (af_xdp_create_if_args_t));
+
+ args.linux_ifname = mp->host_if[0] ? (char *) mp->host_if : 0;
+ args.name = mp->name[0] ? (char *) mp->name : 0;
+ args.prog = mp->prog[0] ? (char *) mp->prog : 0;
+ args.netns = mp->netns[0] ? (char *) mp->netns : 0;
+ args.mode = af_xdp_api_mode (mp->mode);
+ args.flags = af_xdp_api_flags (mp->flags);
+ args.rxq_size = mp->rxq_size;
+ args.txq_size = mp->txq_size;
+ args.rxq_num = mp->rxq_num;
+
+ af_xdp_create_if (vm, &args);
+ rv = args.rv;
+
+ /* clang-format off */
+ REPLY_MACRO2_END (VL_API_AF_XDP_CREATE_V3_REPLY,
+ ({
+ rmp->sw_if_index = args.sw_if_index;
+ }));
+ /* clang-format on */
}
static void
@@ -111,7 +172,7 @@ vl_api_af_xdp_delete_t_handler (vl_api_af_xdp_delete_t * mp)
af_xdp_delete_if (vm, rd);
reply:
- REPLY_MACRO (VL_API_AF_XDP_DELETE_REPLY + rm->msg_id_base);
+ REPLY_MACRO (VL_API_AF_XDP_DELETE_REPLY);
}
/* set tup the API message handling tables */
diff --git a/src/plugins/af_xdp/cli.c b/src/plugins/af_xdp/cli.c
index 2f3deffaaee..12d3b875a71 100644
--- a/src/plugins/af_xdp/cli.c
+++ b/src/plugins/af_xdp/cli.c
@@ -40,20 +40,20 @@ af_xdp_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
vec_free (args.linux_ifname);
vec_free (args.name);
+ vec_free (args.prog);
+ vec_free (args.netns);
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (af_xdp_create_command, static) = {
.path = "create interface af_xdp",
.short_help =
"create interface af_xdp <host-if linux-ifname> [name ifname] "
"[rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] "
- "[prog pathname] [zero-copy|no-zero-copy] [no-syscall-lock]",
+ "[prog pathname] [netns ns] [zero-copy|no-zero-copy] [no-syscall-lock]",
.function = af_xdp_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
af_xdp_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -98,14 +98,12 @@ af_xdp_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (af_xdp_delete_command, static) = {
.path = "delete interface af_xdp",
.short_help = "delete interface af_xdp "
"{<interface> | sw_if_index <sw_idx>}",
.function = af_xdp_delete_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
af_xdp_cli_init (vlib_main_t * vm)
diff --git a/src/plugins/af_xdp/device.c b/src/plugins/af_xdp/device.c
index 7a10bce4290..63a276ce51e 100644
--- a/src/plugins/af_xdp/device.c
+++ b/src/plugins/af_xdp/device.c
@@ -17,17 +17,27 @@
#include <stdio.h>
#include <net/if.h>
+#include <sys/ioctl.h>
+#include <linux/ethtool.h>
#include <linux/if_link.h>
-#include <bpf/libbpf.h>
+#include <linux/sockios.h>
+#include <linux/limits.h>
+#include <bpf/bpf.h>
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vlib/pci/pci.h>
+#include <vppinfra/linux/netns.h>
#include <vppinfra/linux/sysfs.h>
#include <vppinfra/unix.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
#include "af_xdp.h"
+#ifndef XDP_UMEM_MIN_CHUNK_SIZE
+#define XDP_UMEM_MIN_CHUNK_SIZE 2048
+#endif
+
af_xdp_main_t af_xdp_main;
typedef struct
@@ -62,6 +72,16 @@ af_xdp_mac_change (vnet_hw_interface_t * hw, const u8 * old, const u8 * new)
return 0;
}
+static clib_error_t *
+af_xdp_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+ u32 frame_size)
+{
+ af_xdp_main_t *am = &af_xdp_main;
+ af_xdp_device_t *ad = vec_elt_at_index (am->devices, hw->dev_instance);
+ af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "set mtu not supported yet");
+ return vnet_error (VNET_ERR_UNSUPPORTED, 0);
+}
+
static u32
af_xdp_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags)
{
@@ -77,15 +97,87 @@ af_xdp_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags)
af_xdp_log (VLIB_LOG_LEVEL_ERR, ad,
"set promiscuous not supported yet");
return ~0;
- case ETHERNET_INTERFACE_FLAG_MTU:
- af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "set mtu not supported yet");
- return ~0;
}
af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "unknown flag %x requested", flags);
return ~0;
}
+int
+af_xdp_enter_netns (char *netns, int *fds)
+{
+ *fds = *(fds + 1) = -1;
+ if (netns != NULL)
+ {
+ *fds = clib_netns_open (NULL /* self */);
+ if ((*(fds + 1) = clib_netns_open ((u8 *) netns)) == -1)
+ return VNET_API_ERROR_SYSCALL_ERROR_8;
+ if (clib_setns (*(fds + 1)) == -1)
+ return VNET_API_ERROR_SYSCALL_ERROR_9;
+ }
+ return 0;
+}
+
+void
+af_xdp_cleanup_netns (int *fds)
+{
+ if (*fds != -1)
+ close (*fds);
+
+ if (*(fds + 1) != -1)
+ close (*(fds + 1));
+
+ *fds = *(fds + 1) = -1;
+}
+
+int
+af_xdp_exit_netns (char *netns, int *fds)
+{
+ int ret = 0;
+ if (netns != NULL)
+ {
+ if (*fds != -1)
+ ret = clib_setns (*fds);
+
+ af_xdp_cleanup_netns (fds);
+ }
+
+ return ret;
+}
+
+static int
+af_xdp_remove_program (af_xdp_device_t *ad)
+{
+ u32 curr_prog_id = 0;
+ int ret;
+ int ns_fds[2];
+
+ af_xdp_enter_netns (ad->netns, ns_fds);
+ ret = bpf_xdp_query_id (ad->linux_ifindex, XDP_FLAGS_UPDATE_IF_NOEXIST,
+ &curr_prog_id);
+ if (ret != 0)
+ {
+ af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "bpf_xdp_query_id failed\n");
+ goto err0;
+ }
+
+ ret = bpf_xdp_detach (ad->linux_ifindex, XDP_FLAGS_UPDATE_IF_NOEXIST, NULL);
+ if (ret != 0)
+ {
+ af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "bpf_xdp_detach failed\n");
+ goto err0;
+ }
+ af_xdp_exit_netns (ad->netns, ns_fds);
+ if (ad->bpf_obj)
+ bpf_object__close (ad->bpf_obj);
+
+ return 0;
+
+err0:
+ af_xdp_exit_netns (ad->netns, ns_fds);
+ return ret;
+}
+
void
af_xdp_delete_if (vlib_main_t * vm, af_xdp_device_t * ad)
{
@@ -101,9 +193,6 @@ af_xdp_delete_if (vlib_main_t * vm, af_xdp_device_t * ad)
ethernet_delete_interface (vnm, ad->hw_if_index);
}
- for (i = 0; i < ad->rxq_num; i++)
- clib_file_del_by_index (&file_main, vec_elt (ad->rxqs, i).file_index);
-
for (i = 0; i < ad->txq_num; i++)
clib_spinlock_free (&vec_elt (ad->txqs, i).lock);
@@ -113,17 +202,20 @@ af_xdp_delete_if (vlib_main_t * vm, af_xdp_device_t * ad)
vec_foreach (umem, ad->umem)
xsk_umem__delete (*umem);
- if (ad->bpf_obj)
- {
- bpf_set_link_xdp_fd (ad->linux_ifindex, -1, 0);
- bpf_object__unload (ad->bpf_obj);
- }
+ for (i = 0; i < ad->rxq_num; i++)
+ clib_file_del_by_index (&file_main, vec_elt (ad->rxqs, i).file_index);
+
+ if (af_xdp_remove_program (ad) != 0)
+ af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "Error while removing XDP program.\n");
vec_free (ad->xsk);
vec_free (ad->umem);
vec_free (ad->buffer_template);
vec_free (ad->rxqs);
vec_free (ad->txqs);
+ vec_free (ad->name);
+ vec_free (ad->linux_ifname);
+ vec_free (ad->netns);
clib_error_free (ad->error);
pool_put (axm->devices, ad);
}
@@ -132,44 +224,49 @@ static int
af_xdp_load_program (af_xdp_create_if_args_t * args, af_xdp_device_t * ad)
{
int fd;
+ struct bpf_program *bpf_prog;
+ struct rlimit r = { RLIM_INFINITY, RLIM_INFINITY };
- ad->linux_ifindex = if_nametoindex (ad->linux_ifname);
- if (!ad->linux_ifindex)
- {
- args->rv = VNET_API_ERROR_INVALID_VALUE;
- args->error =
- clib_error_return_unix (0, "if_nametoindex(%s) failed",
- ad->linux_ifname);
- goto err0;
- }
+ if (setrlimit (RLIMIT_MEMLOCK, &r))
+ af_xdp_log (VLIB_LOG_LEVEL_WARNING, ad,
+ "setrlimit(%s) failed: %s (errno %d)", ad->linux_ifname,
+ strerror (errno), errno);
- if (bpf_prog_load (args->prog, BPF_PROG_TYPE_XDP, &ad->bpf_obj, &fd))
+ ad->bpf_obj = bpf_object__open_file (args->prog, NULL);
+ if (libbpf_get_error (ad->bpf_obj))
{
args->rv = VNET_API_ERROR_SYSCALL_ERROR_5;
- args->error =
- clib_error_return_unix (0, "bpf_prog_load(%s) failed", args->prog);
+ args->error = clib_error_return_unix (
+ 0, "bpf_object__open_file(%s) failed", args->prog);
goto err0;
}
-#ifndef XDP_FLAGS_REPLACE
-#define XDP_FLAGS_REPLACE 0
-#endif
- if (bpf_set_link_xdp_fd (ad->linux_ifindex, fd, XDP_FLAGS_REPLACE))
+ bpf_prog = bpf_object__next_program (ad->bpf_obj, NULL);
+ if (!bpf_prog)
+ goto err1;
+
+ bpf_program__set_type (bpf_prog, BPF_PROG_TYPE_XDP);
+
+ if (bpf_object__load (ad->bpf_obj))
+ goto err1;
+
+ fd = bpf_program__fd (bpf_prog);
+
+ if (bpf_xdp_attach (ad->linux_ifindex, fd, XDP_FLAGS_UPDATE_IF_NOEXIST,
+ NULL))
{
args->rv = VNET_API_ERROR_SYSCALL_ERROR_6;
- args->error =
- clib_error_return_unix (0, "bpf_set_link_xdp_fd(%s) failed",
- ad->linux_ifname);
+ args->error = clib_error_return_unix (0, "bpf_xdp_attach(%s) failed",
+ ad->linux_ifname);
goto err1;
}
return 0;
err1:
- bpf_object__unload (ad->bpf_obj);
+ bpf_object__close (ad->bpf_obj);
ad->bpf_obj = 0;
err0:
- ad->linux_ifindex = ~0;
return -1;
}
@@ -188,16 +285,9 @@ af_xdp_create_queue (vlib_main_t *vm, af_xdp_create_if_args_t *args,
const int is_rx = qid < ad->rxq_num;
const int is_tx = qid < ad->txq_num;
- vec_validate_aligned (ad->umem, qid, CLIB_CACHE_LINE_BYTES);
umem = vec_elt_at_index (ad->umem, qid);
-
- vec_validate_aligned (ad->xsk, qid, CLIB_CACHE_LINE_BYTES);
xsk = vec_elt_at_index (ad->xsk, qid);
-
- vec_validate_aligned (ad->rxqs, qid, CLIB_CACHE_LINE_BYTES);
rxq = vec_elt_at_index (ad->rxqs, qid);
-
- vec_validate_aligned (ad->txqs, qid, CLIB_CACHE_LINE_BYTES);
txq = vec_elt_at_index (ad->txqs, qid);
/*
@@ -221,8 +311,18 @@ af_xdp_create_queue (vlib_main_t *vm, af_xdp_create_if_args_t *args,
(umem, uword_to_pointer (vm->buffer_main->buffer_mem_start, void *),
vm->buffer_main->buffer_mem_size, fq, cq, &umem_config))
{
+ uword sys_page_size = clib_mem_get_page_size ();
args->rv = VNET_API_ERROR_SYSCALL_ERROR_1;
args->error = clib_error_return_unix (0, "xsk_umem__create() failed");
+ /* this should mimic the Linux kernel net/xdp/xdp_umem.c:xdp_umem_reg()
+ * check */
+ if (umem_config.frame_size < XDP_UMEM_MIN_CHUNK_SIZE ||
+ umem_config.frame_size > sys_page_size)
+ args->error = clib_error_return (
+ args->error,
+ "(unsupported data-size? (should be between %d and %d))",
+ XDP_UMEM_MIN_CHUNK_SIZE - sizeof (vlib_buffer_t),
+ sys_page_size - sizeof (vlib_buffer_t));
goto err0;
}
@@ -241,6 +341,8 @@ af_xdp_create_queue (vlib_main_t *vm, af_xdp_create_if_args_t *args,
sock_config.bind_flags |= XDP_ZEROCOPY;
break;
}
+ if (args->prog)
+ sock_config.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
if (xsk_socket__create
(xsk, ad->linux_ifname, qid, *umem, rx, tx, &sock_config))
{
@@ -253,10 +355,27 @@ af_xdp_create_queue (vlib_main_t *vm, af_xdp_create_if_args_t *args,
}
fd = xsk_socket__fd (*xsk);
+ if (args->prog)
+ {
+ struct bpf_map *map =
+ bpf_object__find_map_by_name (ad->bpf_obj, "xsks_map");
+ int ret = xsk_socket__update_xskmap (*xsk, bpf_map__fd (map));
+ if (ret)
+ {
+ args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
+ args->error = clib_error_return_unix (
+ 0, "xsk_socket__update_xskmap %s qid %d return %d",
+ ad->linux_ifname, qid, ret);
+ goto err2;
+ }
+ }
optlen = sizeof (opt);
+#ifndef SOL_XDP
+#define SOL_XDP 283
+#endif
if (getsockopt (fd, SOL_XDP, XDP_OPTIONS, &opt, &optlen))
{
- args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
+ args->rv = VNET_API_ERROR_SYSCALL_ERROR_4;
args->error =
clib_error_return_unix (0, "getsockopt(XDP_OPTIONS) failed");
goto err2;
@@ -269,6 +388,7 @@ af_xdp_create_queue (vlib_main_t *vm, af_xdp_create_if_args_t *args,
if (is_tx)
{
txq->xsk_fd = fd;
+ clib_spinlock_init (&txq->lock);
if (is_rx && (ad->flags & AF_XDP_DEVICE_F_SYSCALL_LOCK))
{
/* This is a shared rx+tx queue and we need to lock before syscalls.
@@ -321,6 +441,31 @@ af_xdp_get_numa (const char *ifname)
return numa;
}
+static void
+af_xdp_get_q_count (const char *ifname, int *rxq_num, int *txq_num)
+{
+ struct ethtool_channels ec = { .cmd = ETHTOOL_GCHANNELS };
+ struct ifreq ifr = { .ifr_data = (void *) &ec };
+ int fd, err;
+
+ *rxq_num = *txq_num = 1;
+
+ fd = socket (AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return;
+
+ snprintf (ifr.ifr_name, sizeof (ifr.ifr_name), "%s", ifname);
+ err = ioctl (fd, SIOCETHTOOL, &ifr);
+
+ close (fd);
+
+ if (err)
+ return;
+
+ *rxq_num = clib_max (ec.combined_count, ec.rx_count);
+ *txq_num = clib_max (ec.combined_count, ec.tx_count);
+}
+
static clib_error_t *
af_xdp_device_rxq_read_ready (clib_file_t * f)
{
@@ -361,22 +506,88 @@ af_xdp_device_set_rxq_mode (const af_xdp_device_t *ad, af_xdp_rxq_t *rxq,
return 0;
}
+static u32
+af_xdp_find_rxq_for_thread (vnet_main_t *vnm, const af_xdp_device_t *ad,
+ const u32 thread)
+{
+ u32 i;
+ for (i = 0; i < ad->rxq_num; i++)
+ {
+ const u32 qid = vec_elt (ad->rxqs, i).queue_index;
+ const u32 tid = vnet_hw_if_get_rx_queue (vnm, qid)->thread_index;
+ if (tid == thread)
+ return i;
+ }
+ return ~0;
+}
+
+static clib_error_t *
+af_xdp_finalize_queues (vnet_main_t *vnm, af_xdp_device_t *ad,
+ const int n_vlib_mains)
+{
+ clib_error_t *err = 0;
+ int i;
+
+ for (i = 0; i < ad->rxq_num; i++)
+ {
+ af_xdp_rxq_t *rxq = vec_elt_at_index (ad->rxqs, i);
+ rxq->queue_index = vnet_hw_if_register_rx_queue (
+ vnm, ad->hw_if_index, i, VNET_HW_IF_RXQ_THREAD_ANY);
+ u8 *desc = format (0, "%U rxq %d", format_af_xdp_device_name,
+ ad->dev_instance, i);
+ clib_file_t f = {
+ .file_descriptor = rxq->xsk_fd,
+ .private_data = rxq->queue_index,
+ .read_function = af_xdp_device_rxq_read_ready,
+ .description = desc,
+ };
+ rxq->file_index = clib_file_add (&file_main, &f);
+ vnet_hw_if_set_rx_queue_file_index (vnm, rxq->queue_index,
+ rxq->file_index);
+ err = af_xdp_device_set_rxq_mode (ad, rxq, AF_XDP_RXQ_MODE_POLLING);
+ if (err)
+ return err;
+ }
+
+ for (i = 0; i < ad->txq_num; i++)
+ vec_elt (ad->txqs, i).queue_index =
+ vnet_hw_if_register_tx_queue (vnm, ad->hw_if_index, i);
+
+ /* We set the rxq and txq of the same queue pair on the same thread
+ * by default to avoid locking because of the syscall lock. */
+ int last_qid = clib_min (ad->rxq_num, ad->txq_num - 1);
+ for (i = 0; i < n_vlib_mains; i++)
+ {
+ /* search for the 1st rxq assigned on this thread, if any */
+ u32 qid = af_xdp_find_rxq_for_thread (vnm, ad, i);
+ /* if this rxq is combined with a txq, use it. Otherwise, we'll
+ * assign txq in a round-robin fashion. We start from the 1st txq
+ * not shared with a rxq if possible... */
+ qid = qid < ad->txq_num ? qid : (last_qid++ % ad->txq_num);
+ vnet_hw_if_tx_queue_assign_thread (
+ vnm, vec_elt (ad->txqs, qid).queue_index, i);
+ }
+
+ vnet_hw_if_update_runtime_data (vnm, ad->hw_if_index);
+ return 0;
+}
+
void
af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
{
vnet_main_t *vnm = vnet_get_main ();
vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vnet_eth_interface_registration_t eir = {};
af_xdp_main_t *am = &af_xdp_main;
af_xdp_device_t *ad;
vnet_sw_interface_t *sw;
- vnet_hw_interface_t *hw;
int rxq_num, txq_num, q_num;
- int i;
+ int ns_fds[2];
+ int i, ret;
args->rxq_size = args->rxq_size ? args->rxq_size : 2 * VLIB_FRAME_SIZE;
args->txq_size = args->txq_size ? args->txq_size : 2 * VLIB_FRAME_SIZE;
- rxq_num = args->rxq_num ? args->rxq_num : 1;
- txq_num = tm->n_vlib_mains;
+ args->rxq_num = args->rxq_num ? args->rxq_num : 1;
if (!args->linux_ifname)
{
@@ -397,6 +608,26 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
goto err0;
}
+ ret = af_xdp_enter_netns (args->netns, ns_fds);
+ if (ret)
+ {
+ args->rv = ret;
+ args->error = clib_error_return (0, "enter netns %s failed, ret %d",
+ args->netns, args->rv);
+ goto err0;
+ }
+
+ af_xdp_get_q_count (args->linux_ifname, &rxq_num, &txq_num);
+ if (args->rxq_num > rxq_num && AF_XDP_NUM_RX_QUEUES_ALL != args->rxq_num)
+ {
+ args->rv = VNET_API_ERROR_INVALID_VALUE;
+ args->error = clib_error_create ("too many rxq requested (%d > %d)",
+ args->rxq_num, rxq_num);
+ goto err1;
+ }
+ rxq_num = clib_min (rxq_num, args->rxq_num);
+ txq_num = clib_min (txq_num, tm->n_vlib_mains);
+
pool_get_zero (am->devices, ad);
if (tm->n_vlib_mains > 1 &&
@@ -406,12 +637,32 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
ad->linux_ifname = (char *) format (0, "%s", args->linux_ifname);
vec_validate (ad->linux_ifname, IFNAMSIZ - 1); /* libbpf expects ifname to be at least IFNAMSIZ */
- if (args->prog && af_xdp_load_program (args, ad))
- goto err1;
+ if (args->netns)
+ ad->netns = (char *) format (0, "%s%c", args->netns, 0);
+
+ ad->linux_ifindex = if_nametoindex (ad->linux_ifname);
+ if (!ad->linux_ifindex)
+ {
+ args->rv = VNET_API_ERROR_INVALID_VALUE;
+ args->error = clib_error_return_unix (0, "if_nametoindex(%s) failed",
+ ad->linux_ifname);
+ ad->linux_ifindex = ~0;
+ goto err1;
+ }
+
+ if (args->prog &&
+ (af_xdp_remove_program (ad) || af_xdp_load_program (args, ad)))
+ goto err2;
q_num = clib_max (rxq_num, txq_num);
ad->rxq_num = rxq_num;
ad->txq_num = txq_num;
+
+ vec_validate_aligned (ad->umem, q_num - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (ad->xsk, q_num - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (ad->rxqs, q_num - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (ad->txqs, q_num - 1, CLIB_CACHE_LINE_BYTES);
+
for (i = 0; i < q_num; i++)
{
if (af_xdp_create_queue (vm, args, ad, i))
@@ -423,6 +674,8 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
* requested 'max'
* we might create less tx queues than workers but this is ok
*/
+ af_xdp_log (VLIB_LOG_LEVEL_DEBUG, ad,
+ "create interface failed to create queue qid=%d", i);
/* fixup vectors length */
vec_set_len (ad->umem, i);
@@ -433,19 +686,14 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
ad->rxq_num = clib_min (i, rxq_num);
ad->txq_num = clib_min (i, txq_num);
- if (i < rxq_num && AF_XDP_NUM_RX_QUEUES_ALL != rxq_num)
+ if (i == 0 ||
+ (i < rxq_num && AF_XDP_NUM_RX_QUEUES_ALL != args->rxq_num))
{
ad->rxq_num = ad->txq_num = 0;
- goto err1; /* failed creating requested rxq: fatal error, bailing
+ goto err2; /* failed creating requested rxq: fatal error, bailing
out */
}
- if (i < txq_num)
- {
- /* we created less txq than threads not an error but initialize lock for shared txq */
- for (i = 0; i < ad->txq_num; i++)
- clib_spinlock_init (&vec_elt (ad->txqs, i).lock);
- }
args->rv = 0;
clib_error_free (args->error);
@@ -453,6 +701,13 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
}
}
+ if (af_xdp_exit_netns (args->netns, ns_fds))
+ {
+ args->rv = VNET_API_ERROR_SYSCALL_ERROR_10;
+ args->error = clib_error_return (0, "exit netns failed");
+ goto err2;
+ }
+
ad->dev_instance = ad - am->devices;
ad->per_interface_next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
ad->pool =
@@ -460,53 +715,43 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
af_xdp_get_numa
(ad->linux_ifname));
if (!args->name)
- ad->name =
- (char *) format (0, "%s/%d", ad->linux_ifname, ad->dev_instance);
+ {
+ char *ifname = ad->linux_ifname;
+ if (args->netns != NULL && strncmp (args->netns, "pid:", 4) == 0)
+ {
+ ad->name =
+ (char *) format (0, "%s/%u", ifname, atoi (args->netns + 4));
+ }
+ else
+ ad->name = (char *) format (0, "%s/%d", ifname, ad->dev_instance);
+ }
else
ad->name = (char *) format (0, "%s", args->name);
ethernet_mac_address_generate (ad->hwaddr);
/* create interface */
- if (ethernet_register_interface (vnm, af_xdp_device_class.index,
- ad->dev_instance, ad->hwaddr,
- &ad->hw_if_index, af_xdp_flag_change))
- {
- args->rv = VNET_API_ERROR_INVALID_INTERFACE;
- args->error =
- clib_error_return (0, "ethernet_register_interface() failed");
- goto err1;
- }
+ eir.dev_class_index = af_xdp_device_class.index;
+ eir.dev_instance = ad->dev_instance;
+ eir.address = ad->hwaddr;
+ eir.cb.flag_change = af_xdp_flag_change;
+ eir.cb.set_max_frame_size = af_xdp_set_max_frame_size;
+ ad->hw_if_index = vnet_eth_register_interface (vnm, &eir);
sw = vnet_get_hw_sw_interface (vnm, ad->hw_if_index);
- hw = vnet_get_hw_interface (vnm, ad->hw_if_index);
args->sw_if_index = ad->sw_if_index = sw->sw_if_index;
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+
+ vnet_hw_if_set_caps (vnm, ad->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
vnet_hw_if_set_input_node (vnm, ad->hw_if_index, af_xdp_input_node.index);
- for (i = 0; i < ad->rxq_num; i++)
+ args->error = af_xdp_finalize_queues (vnm, ad, tm->n_vlib_mains);
+ if (args->error)
{
- af_xdp_rxq_t *rxq = vec_elt_at_index (ad->rxqs, i);
- rxq->queue_index = vnet_hw_if_register_rx_queue (
- vnm, ad->hw_if_index, i, VNET_HW_IF_RXQ_THREAD_ANY);
- u8 *desc = format (0, "%U rxq %d", format_af_xdp_device_name,
- ad->dev_instance, i);
- clib_file_t f = {
- .file_descriptor = rxq->xsk_fd,
- .private_data = rxq->queue_index,
- .read_function = af_xdp_device_rxq_read_ready,
- .description = desc,
- };
- rxq->file_index = clib_file_add (&file_main, &f);
- vnet_hw_if_set_rx_queue_file_index (vnm, rxq->queue_index,
- rxq->file_index);
- if (af_xdp_device_set_rxq_mode (ad, rxq, AF_XDP_RXQ_MODE_POLLING))
- goto err1;
+ args->rv = VNET_API_ERROR_SYSCALL_ERROR_7;
+ goto err2;
}
- vnet_hw_if_update_runtime_data (vnm, ad->hw_if_index);
-
/* buffer template */
vec_validate_aligned (ad->buffer_template, 1, CLIB_CACHE_LINE_BYTES);
ad->buffer_template->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
@@ -517,8 +762,10 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
return;
-err1:
+err2:
af_xdp_delete_if (vm, ad);
+err1:
+ af_xdp_cleanup_netns (ns_fds);
err0:
vlib_log_err (am->log_class, "%U", format_clib_error, args->error);
}
@@ -610,7 +857,6 @@ af_xdp_clear (u32 dev_instance)
clib_error_free (ad->error);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (af_xdp_device_class) = {
.name = "AF_XDP interface",
.format_device = format_af_xdp_device,
@@ -623,7 +869,6 @@ VNET_DEVICE_CLASS (af_xdp_device_class) = {
.mac_addr_change_function = af_xdp_mac_change,
.clear_counters = af_xdp_clear,
};
-/* *INDENT-ON* */
clib_error_t *
af_xdp_init (vlib_main_t * vm)
diff --git a/src/plugins/af_xdp/input.c b/src/plugins/af_xdp/input.c
index 4f3ac5725a4..9177b3ffc5b 100644
--- a/src/plugins/af_xdp/input.c
+++ b/src/plugins/af_xdp/input.c
@@ -15,7 +15,6 @@
*------------------------------------------------------------------
*/
-#include <poll.h>
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vlib/pci/pci.h>
@@ -89,8 +88,7 @@ af_xdp_device_input_refill_db (vlib_main_t * vm,
if (clib_spinlock_trylock_if_init (&rxq->syscall_lock))
{
- struct pollfd fd = { .fd = rxq->xsk_fd, .events = POLLIN | POLLOUT };
- int ret = poll (&fd, 1, 0);
+ int ret = recvmsg (rxq->xsk_fd, 0, MSG_DONTWAIT);
clib_spinlock_unlock_if_init (&rxq->syscall_lock);
if (PREDICT_FALSE (ret < 0))
{
@@ -198,6 +196,7 @@ af_xdp_device_input_ethernet (vlib_main_t * vm, vlib_node_runtime_t * node,
ef = vlib_frame_scalar_args (f);
ef->sw_if_index = sw_if_index;
ef->hw_if_index = hw_if_index;
+ vlib_frame_no_append (f);
}
static_always_inline u32
@@ -297,7 +296,7 @@ af_xdp_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_copy_template (&bt, ad->buffer_template);
next_index = ad->per_interface_next_index;
if (PREDICT_FALSE (vnet_device_input_have_features (ad->sw_if_index)))
- vnet_feature_start_device_input_x1 (ad->sw_if_index, &next_index, &bt);
+ vnet_feature_start_device_input (ad->sw_if_index, &next_index, &bt);
vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
@@ -353,7 +352,6 @@ af_xdp_device_input_refill (af_xdp_device_t *ad)
}
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (af_xdp_input_node) = {
.name = "af_xdp-input",
.sibling_of = "device-input",
@@ -364,7 +362,6 @@ VLIB_REGISTER_NODE (af_xdp_input_node) = {
.error_strings = af_xdp_input_error_strings,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/af_xdp/output.c b/src/plugins/af_xdp/output.c
index 51a56ed866d..a59c01ca6e0 100644
--- a/src/plugins/af_xdp/output.c
+++ b/src/plugins/af_xdp/output.c
@@ -1,5 +1,5 @@
-#include <poll.h>
#include <string.h>
+#include <vppinfra/clib.h>
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vnet/ethernet/ethernet.h>
@@ -101,11 +101,19 @@ af_xdp_device_output_tx_db (vlib_main_t * vm,
if (xsk_ring_prod__needs_wakeup (&txq->tx))
{
- struct pollfd fd = { .fd = txq->xsk_fd, .events = POLLIN | POLLOUT };
- int ret = poll (&fd, 1, 0);
+ const struct msghdr msg = {};
+ int ret;
+ /* On tx, xsk socket will only tx up to TX_BATCH_SIZE, as defined in
+ * kernel net/xdp/xsk.c. Unfortunately we do not know how much this is,
+ * our only option is to retry until everything is sent... */
+ do
+ {
+ ret = sendmsg (txq->xsk_fd, &msg, MSG_DONTWAIT);
+ }
+ while (ret < 0 && EAGAIN == errno);
if (PREDICT_FALSE (ret < 0))
{
- /* something bad is happening */
+ /* not EAGAIN: something bad is happening */
vlib_error_count (vm, node->node_index,
AF_XDP_TX_ERROR_SYSCALL_FAILURES, 1);
af_xdp_device_error (ad, "tx poll() failed");
@@ -147,6 +155,14 @@ wrap_around:
while (n >= 8)
{
+ if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT ||
+ b[1]->flags & VLIB_BUFFER_NEXT_PRESENT ||
+ b[2]->flags & VLIB_BUFFER_NEXT_PRESENT ||
+ b[3]->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ break;
+ }
+
vlib_prefetch_buffer_header (b[4], LOAD);
offset =
(sizeof (vlib_buffer_t) +
@@ -186,6 +202,17 @@ wrap_around:
while (n >= 1)
{
+ if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ if (vlib_buffer_chain_linearize (vm, b[0]) != 1)
+ {
+ af_xdp_log (VLIB_LOG_LEVEL_ERR, ad,
+ "vlib_buffer_chain_linearize failed");
+ vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, b[0]));
+ continue;
+ }
+ }
+
offset =
(sizeof (vlib_buffer_t) +
b[0]->current_data) << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
@@ -215,9 +242,9 @@ VNET_DEVICE_CLASS_TX_FN (af_xdp_device_class) (vlib_main_t * vm,
af_xdp_main_t *rm = &af_xdp_main;
vnet_interface_output_runtime_t *ord = (void *) node->runtime_data;
af_xdp_device_t *ad = pool_elt_at_index (rm->devices, ord->dev_instance);
- u32 thread_index = vm->thread_index;
- af_xdp_txq_t *txq =
- vec_elt_at_index (ad->txqs, (thread_index - 1) % ad->txq_num);
+ const vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
+ const int shared_queue = tf->shared_queue;
+ af_xdp_txq_t *txq = vec_elt_at_index (ad->txqs, tf->queue_id);
u32 *from;
u32 n, n_tx;
int i;
@@ -225,20 +252,22 @@ VNET_DEVICE_CLASS_TX_FN (af_xdp_device_class) (vlib_main_t * vm,
from = vlib_frame_vector_args (frame);
n_tx = frame->n_vectors;
- clib_spinlock_lock_if_init (&txq->lock);
+ if (shared_queue)
+ clib_spinlock_lock (&txq->lock);
for (i = 0, n = 0; i < AF_XDP_TX_RETRIES && n < n_tx; i++)
{
u32 n_enq;
af_xdp_device_output_free (vm, node, txq);
- n_enq = af_xdp_device_output_tx_try (vm, node, ad, txq, n_tx - n, from);
+ n_enq =
+ af_xdp_device_output_tx_try (vm, node, ad, txq, n_tx - n, from + n);
n += n_enq;
- from += n_enq;
}
af_xdp_device_output_tx_db (vm, node, ad, txq, n);
- clib_spinlock_unlock_if_init (&txq->lock);
+ if (shared_queue)
+ clib_spinlock_unlock (&txq->lock);
if (PREDICT_FALSE (n != n_tx))
{
diff --git a/src/plugins/af_xdp/plugin.c b/src/plugins/af_xdp/plugin.c
index 444ee553cbf..7be7afeac83 100644
--- a/src/plugins/af_xdp/plugin.c
+++ b/src/plugins/af_xdp/plugin.c
@@ -19,12 +19,10 @@
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "AF_XDP Device Plugin",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/af_xdp/test_api.c b/src/plugins/af_xdp/test_api.c
index 6dffa29bdd1..581697e341d 100644
--- a/src/plugins/af_xdp/test_api.c
+++ b/src/plugins/af_xdp/test_api.c
@@ -91,6 +91,75 @@ api_af_xdp_create (vat_main_t * vam)
return ret;
}
+/* af_xdp create v2 API */
+static int
+api_af_xdp_create_v2 (vat_main_t *vam)
+{
+ vl_api_af_xdp_create_v2_t *mp;
+ af_xdp_create_if_args_t args;
+ int ret;
+
+ if (!unformat_user (vam->input, unformat_af_xdp_create_if_args, &args))
+ {
+ clib_warning ("unknown input `%U'", format_unformat_error, vam->input);
+ return -99;
+ }
+
+ M (AF_XDP_CREATE, mp);
+
+ snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s",
+ args.linux_ifname ?: "");
+ snprintf ((char *) mp->name, sizeof (mp->name), "%s", args.name ?: "");
+ snprintf ((char *) mp->namespace, sizeof (mp->namespace), "%s",
+ args.netns ?: "");
+ mp->rxq_num = clib_host_to_net_u16 (args.rxq_num);
+ mp->rxq_size = clib_host_to_net_u16 (args.rxq_size);
+ mp->txq_size = clib_host_to_net_u16 (args.txq_size);
+ mp->mode = api_af_xdp_mode (args.mode);
+ if (args.flags & AF_XDP_CREATE_FLAGS_NO_SYSCALL_LOCK)
+ mp->flags |= AF_XDP_API_FLAGS_NO_SYSCALL_LOCK;
+ snprintf ((char *) mp->prog, sizeof (mp->prog), "%s", args.prog ?: "");
+
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
+/* af_xdp create v2 API */
+static int
+api_af_xdp_create_v3 (vat_main_t *vam)
+{
+ vl_api_af_xdp_create_v3_t *mp;
+ af_xdp_create_if_args_t args;
+ int ret;
+
+ if (!unformat_user (vam->input, unformat_af_xdp_create_if_args, &args))
+ {
+ clib_warning ("unknown input `%U'", format_unformat_error, vam->input);
+ return -99;
+ }
+
+ M (AF_XDP_CREATE, mp);
+
+ snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s",
+ args.linux_ifname ?: "");
+ snprintf ((char *) mp->name, sizeof (mp->name), "%s", args.name ?: "");
+ snprintf ((char *) mp->netns, sizeof (mp->netns), "%s", args.netns ?: "");
+ mp->rxq_num = args.rxq_num;
+ mp->rxq_size = args.rxq_size;
+ mp->txq_size = args.txq_size;
+ mp->mode = api_af_xdp_mode (args.mode);
+ if (args.flags & AF_XDP_CREATE_FLAGS_NO_SYSCALL_LOCK)
+ mp->flags |= AF_XDP_API_FLAGS_NO_SYSCALL_LOCK;
+ snprintf ((char *) mp->prog, sizeof (mp->prog), "%s", args.prog ?: "");
+
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
/* af_xdp-create reply handler */
static void
vl_api_af_xdp_create_reply_t_handler (vl_api_af_xdp_create_reply_t * mp)
@@ -109,6 +178,42 @@ vl_api_af_xdp_create_reply_t_handler (vl_api_af_xdp_create_reply_t * mp)
vam->regenerate_interface_table = 1;
}
+/* af_xdp-create v2 reply handler */
+static void
+vl_api_af_xdp_create_v2_reply_t_handler (vl_api_af_xdp_create_v2_reply_t *mp)
+{
+ vat_main_t *vam = af_xdp_test_main.vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (retval == 0)
+ {
+ fformat (vam->ofp, "created af_xdp with sw_if_index %d\n",
+ ntohl (mp->sw_if_index));
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+ vam->regenerate_interface_table = 1;
+}
+
+/* af_xdp-create v3 reply handler */
+static void
+vl_api_af_xdp_create_v3_reply_t_handler (vl_api_af_xdp_create_v2_reply_t *mp)
+{
+ vat_main_t *vam = af_xdp_test_main.vat_main;
+ i32 retval = mp->retval;
+
+ if (retval == 0)
+ {
+ fformat (vam->ofp, "created af_xdp with sw_if_index %d\n",
+ mp->sw_if_index);
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+ vam->regenerate_interface_table = 1;
+}
+
/* af_xdp delete API */
static int
api_af_xdp_delete (vat_main_t * vam)
diff --git a/src/plugins/af_xdp/unformat.c b/src/plugins/af_xdp/unformat.c
index bb4c3048d23..8c0482d83ff 100644
--- a/src/plugins/af_xdp/unformat.c
+++ b/src/plugins/af_xdp/unformat.c
@@ -46,6 +46,8 @@ unformat_af_xdp_create_if_args (unformat_input_t * input, va_list * vargs)
;
else if (unformat (line_input, "prog %s", &args->prog))
;
+ else if (unformat (line_input, "netns %s", &args->netns))
+ ;
else if (unformat (line_input, "no-zero-copy"))
args->mode = AF_XDP_MODE_COPY;
else if (unformat (line_input, "zero-copy"))
diff --git a/src/plugins/arping/arping.api b/src/plugins/arping/arping.api
index f797b8cf3aa..9ec01a9b55d 100644
--- a/src/plugins/arping/arping.api
+++ b/src/plugins/arping/arping.api
@@ -18,6 +18,7 @@
option version = "1.0.0";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
+import "vnet/ethernet/ethernet_types.api";
/** \brief
@param client_index - opaque cookie to identify the sender
@@ -55,6 +56,28 @@ define arping_reply
};
/*
+ * Address Conflict Detection
+ */
+define arping_acd
+{
+ u32 client_index;
+ u32 context;
+ vl_api_address_t address;
+ vl_api_interface_index_t sw_if_index;
+ bool is_garp;
+ u32 repeat [default=1];
+ f64 interval [default=1.0];
+};
+
+define arping_acd_reply
+{
+ u32 context;
+ i32 retval;
+ u32 reply_count;
+ vl_api_mac_address_t mac_address;
+};
+
+/*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
diff --git a/src/plugins/arping/arping.c b/src/plugins/arping/arping.c
index 14f92589ea7..11fb0704dd3 100644
--- a/src/plugins/arping/arping.c
+++ b/src/plugins/arping/arping.c
@@ -500,6 +500,7 @@ arping_neighbor_advertisement (vlib_main_t *vm, arping_args_t *args)
vlib_cli_output (vm, "Sending %u GARP to %U", send_count,
format_ip4_address, &args->address.ip.ip4);
ip4_neighbor_advertise (vm, vnm, args->sw_if_index,
+ vlib_get_thread_index (),
&args->address.ip.ip4);
}
else
@@ -509,6 +510,7 @@ arping_neighbor_advertisement (vlib_main_t *vm, arping_args_t *args)
send_count, format_ip6_address,
&args->address.ip.ip6);
ip6_neighbor_advertise (vm, vnm, args->sw_if_index,
+ vlib_get_thread_index (),
&args->address.ip.ip6);
}
args->repeat--;
@@ -554,7 +556,8 @@ arping_neighbor_probe_dst (vlib_main_t *vm, arping_args_t *args)
arping_intf_t aif;
/* Disallow multiple sends on the same interface for now. Who needs it? */
- if (am->interfaces && (am->interfaces[args->sw_if_index] != 0))
+ if ((vec_len (am->interfaces) > args->sw_if_index) &&
+ (am->interfaces[args->sw_if_index] != 0))
{
error = clib_error_return (
0, "arping command is in progress for the same interface. "
@@ -586,7 +589,8 @@ arping_neighbor_probe_dst (vlib_main_t *vm, arping_args_t *args)
if (args->silence == 0)
vlib_cli_output (vm, "Sending %u ARP Request to %U", send_count,
format_ip4_address, &args->address.ip.ip4);
- ip4_neighbor_probe_dst (args->sw_if_index, &args->address.ip.ip4);
+ ip4_neighbor_probe_dst (args->sw_if_index, vlib_get_thread_index (),
+ &args->address.ip.ip4);
}
else
{
@@ -594,7 +598,8 @@ arping_neighbor_probe_dst (vlib_main_t *vm, arping_args_t *args)
vlib_cli_output (vm, "Sending %u Neighbor Solicitation to %U",
send_count, format_ip6_address,
&args->address.ip.ip6);
- ip6_neighbor_probe_dst (args->sw_if_index, &args->address.ip.ip6);
+ ip6_neighbor_probe_dst (args->sw_if_index, vlib_get_thread_index (),
+ &args->address.ip.ip6);
}
args->repeat--;
if ((args->interval > 0.0) && (args->repeat > 0))
diff --git a/src/plugins/arping/arping_api.c b/src/plugins/arping/arping_api.c
index 015c6148f5e..1b3431f2f39 100644
--- a/src/plugins/arping/arping_api.c
+++ b/src/plugins/arping/arping_api.c
@@ -26,11 +26,13 @@
#include <vlibapi/api.h>
#include <vlibmemory/api.h>
+#include <vnet/ethernet/ethernet_types_api.h>
/* define message IDs */
#include <arping/arping.api_enum.h>
#include <arping/arping.api_types.h>
+#define REPLY_MSG_ID_BASE (am->msg_id_base)
#include <vlibapi/api_helper_macros.h>
static void
@@ -57,10 +59,40 @@ vl_api_arping_t_handler (vl_api_arping_t *mp)
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO2 (VL_API_ARPING_REPLY + am->msg_id_base,
+ REPLY_MACRO2 (VL_API_ARPING_REPLY,
({ rmp->reply_count = ntohl (args.reply_count); }));
}
+static void
+vl_api_arping_acd_t_handler (vl_api_arping_acd_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ arping_main_t *am = &arping_main;
+ vl_api_arping_acd_reply_t *rmp;
+ arping_args_t args = { 0 };
+ int rv;
+
+ if (mp->sw_if_index != ~0)
+ VALIDATE_SW_IF_INDEX (mp);
+
+ ip_address_decode2 (&mp->address, &args.address);
+ args.interval = clib_net_to_host_f64 (mp->interval);
+ args.repeat = ntohl (mp->repeat);
+ args.is_garp = mp->is_garp;
+ args.sw_if_index = ntohl (mp->sw_if_index);
+ args.silence = 1;
+
+ arping_run_command (vm, &args);
+ rv = args.rv;
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO2 (VL_API_ARPING_ACD_REPLY, ({
+ rmp->reply_count = ntohl (args.reply_count);
+ mac_address_encode (&args.recv.from4.mac, rmp->mac_address);
+ }));
+}
+
/* set tup the API message handling tables */
#include <arping/arping.api.c>
clib_error_t *
@@ -73,7 +105,8 @@ arping_plugin_api_hookup (vlib_main_t *vm)
am->msg_id_base = setup_message_id_table ();
/* Mark API as mp safe */
- vam->is_mp_safe[am->msg_id_base + VL_API_ARPING] = 1;
+ vl_api_set_msg_thread_safe (vam, am->msg_id_base + VL_API_ARPING, 1);
+ vl_api_set_msg_thread_safe (vam, am->msg_id_base + VL_API_ARPING_ACD, 1);
return 0;
}
diff --git a/src/plugins/arping/arping_test.c b/src/plugins/arping/arping_test.c
index 9001b7098a7..7cd85912bd2 100644
--- a/src/plugins/arping/arping_test.c
+++ b/src/plugins/arping/arping_test.c
@@ -26,12 +26,12 @@
#define __plugin_msg_base arping_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
+#include <vlibmemory/vlib.api_types.h>
/* declare message IDs */
#include <vnet/format_fns.h>
#include <arping/arping.api_enum.h>
#include <arping/arping.api_types.h>
-#include <vpp/api/vpe.api_types.h>
#include <vnet/ip/ip_types_api.h>
typedef struct
@@ -52,7 +52,6 @@ api_arping (vat_main_t *vam)
arping_args_t args = { 0 };
int ret;
unformat_input_t *input = vam->input;
- vnet_main_t *vnm = vnet_get_main ();
f64 interval = ARPING_DEFAULT_INTERVAL;
vl_api_control_ping_t *mp_ping;
arping_test_main_t *atm = &arping_test_main;
@@ -76,8 +75,7 @@ api_arping (vat_main_t *vam)
return -99;
}
- if (!unformat_user (input, unformat_vnet_sw_interface, vnm,
- &args.sw_if_index))
+ if (!unformat_user (input, api_unformat_sw_if_index, vam, &args.sw_if_index))
{
errmsg ("unknown interface `%U'", format_unformat_error, input);
return -99;
@@ -156,6 +154,19 @@ vl_api_arping_reply_t_handler (vl_api_arping_reply_t *mp)
vam->result_ready = 1;
}
+static int
+api_arping_acd (vat_main_t *vam)
+{
+ // NOT YET IMPLEMENTED
+ return -99;
+}
+
+static void
+vl_api_arping_acd_reply_t_handler (vl_api_arping_reply_t *mp)
+{
+ // NOT YET IMPLEMENTED
+}
+
#include <arping/arping.api_test.c>
/*
diff --git a/src/plugins/avf/CMakeLists.txt b/src/plugins/avf/CMakeLists.txt
index f7900a64958..ca6f2cb6803 100644
--- a/src/plugins/avf/CMakeLists.txt
+++ b/src/plugins/avf/CMakeLists.txt
@@ -23,6 +23,7 @@ add_vpp_plugin(avf
avf_api.c
flow.c
avf_fdir_lib.c
+ avf_rss_lib.c
MULTIARCH_SOURCES
input.c
diff --git a/src/plugins/avf/README.md b/src/plugins/avf/README.md
deleted file mode 100644
index 7aa2661fbba..00000000000
--- a/src/plugins/avf/README.md
+++ /dev/null
@@ -1,107 +0,0 @@
-# Intel AVF device plugin for VPP {#avf_plugin_doc}
-
-##Overview
-This plugins provides native device support for intel Adaptive Virtual
-Function (AVF). AVF is driver specification for current and future
-Intel Virtual Function devices. AVF defines communication channel between
-Physical Functions (PF) and VF.
-In essence, today this driver can be used only with
-Intel XL710 / X710 / XXV710 adapters.
-
-##Prerequisites
- * Driver requires newer i40e PF linux driver to be installed on the system,
-which supports virtualchnl interface. This code is tested with i40e driver
-version 2.4.6.
-
-* Driver requires MSI-X interrupt support, which is not supported by
-uio_pci_generic driver, so vfio-pci needs to be used. On systems without IOMMU
-vfio driver can still be used with recent kernels which support no-iommu mode.
-
-##Known issues
-This driver is still in experimental phase, however it shows very good
-performance numbers.
-
-## Usage
-### System setup
-
-1. load VFIO driver
-```
-sudo modprobe vfio-pci
-```
-
-2. (systems without IOMMU only) enable unsafe NOIOMMU mode
-```
-echo Y | sudo tee /sys/module/vfio/parameters/enable_unsafe_noiommu_mode
-```
-
-3. Create and bind SR-IOV virtual function(s)
-
-Following script creates VF, assigns MAC address and binds VF to vfio-pci
-```bash
-#!/bin/bash
-
-if [ $USER != "root" ] ; then
- echo "Restarting script with sudo..."
- sudo $0 ${*}
- exit
-fi
-
-setup () {
- cd /sys/bus/pci/devices/${1}
- driver=$(basename $(readlink driver))
- if [ "${driver}" != "i40e" ]; then
- echo ${1} | tee driver/unbind
- echo ${1} | tee /sys/bus/pci/drivers/i40e/bind
- fi
- ifname=$(basename net/*)
- echo 0 | tee sriov_numvfs > /dev/null
- echo 1 | tee sriov_numvfs > /dev/null
- ip link set dev ${ifname} vf 0 mac ${2}
- ip link show dev ${ifname}
- vf=$(basename $(readlink virtfn0))
- echo ${vf} | tee virtfn0/driver/unbind
- echo vfio-pci | tee virtfn0/driver_override
- echo ${vf} | sudo tee /sys/bus/pci/drivers/vfio-pci/bind
- echo | tee virtfn0/driver_override
-}
-
-# Setup one VF on PF 0000:3b:00.0 and assign MAC address
-setup 0000:3b:00.0 00:11:22:33:44:00
-# Setup one VF on PF 0000:3b:00.1 and assign MAC address
-setup 0000:3b:00.1 00:11:22:33:44:01
-```
-
-### Promisc mode
-In cases when interface is used in the L2 mode or promisc mode is needed for some other reason,
-trust needs to be set to "on" using the linux "ip link" utility.
-```
-ip link set dev <PF inteface name> vf <VF id> trust on
-```
-
-### L2 spoofing check
-By default Virtual Function is not allowed to send ethernet frames which
-have source MAC address different than address assigned to the VF.
-In some cases it is expected that VPP will send such frames (e.g. L2 bridging,
-bonding, l2 cross-connect) and in such cases spoof chack needs to be turned
-off by issuing following command:
-```
-ip link set dev <PF inteface name> vf <VF id> spoofchk off
-```
-
-### Interface Creation
-Interfaces can be dynamically created by using following CLI:
-```
-create interface avf 0000:3b:02.0
-set int state avf-0/3b/2/0 up
-```
-
-### Interface Deletion
-Interface can be deleted with following CLI:
-```
-delete interface avf <interface name>
-```
-
-### Interface Statistics
-Interface statistics can be displayed with `sh hardware-interface <if-name>`
-command.
-
diff --git a/src/plugins/avf/README.rst b/src/plugins/avf/README.rst
new file mode 100644
index 00000000000..339f5f13c3e
--- /dev/null
+++ b/src/plugins/avf/README.rst
@@ -0,0 +1,135 @@
+Intel AVF device driver
+=======================
+
+Overview
+--------
+
+This plugins provides native device support for intel Adaptive Virtual
+Function (AVF). AVF is driver specification for current and future Intel
+Virtual Function devices. AVF defines communication channel between
+Physical Functions (PF) and VF. In essence, today this driver can be
+used only with Intel XL710 / X710 / XXV710 adapters.
+
+Prerequisites
+-------------
+
+- Driver requires newer i40e PF linux driver to be installed on the
+ system, which supports virtualchnl interface. This code is tested
+ with i40e driver version 2.4.6.
+
+- Driver requires MSI-X interrupt support, which is not supported by
+ uio_pci_generic driver, so vfio-pci needs to be used. On systems
+ without IOMMU vfio driver can still be used with recent kernels which
+ support no-iommu mode.
+
+Known issues
+------------
+
+This driver is still in experimental phase, however it shows very good
+performance numbers.
+
+Usage
+-----
+
+System setup
+~~~~~~~~~~~~
+
+1. load VFIO driver
+
+::
+
+ sudo modprobe vfio-pci
+
+2. (systems without IOMMU only) enable unsafe NOIOMMU mode
+
+::
+
+ echo Y | sudo tee /sys/module/vfio/parameters/enable_unsafe_noiommu_mode
+
+3. Create and bind SR-IOV virtual function(s)
+
+Following script creates VF, assigns MAC address and binds VF to
+vfio-pci
+
+.. code:: bash
+
+ #!/bin/bash
+
+ if [ $USER != "root" ] ; then
+ echo "Restarting script with sudo..."
+ sudo $0 ${*}
+ exit
+ fi
+
+ setup () {
+ cd /sys/bus/pci/devices/${1}
+ driver=$(basename $(readlink driver))
+ if [ "${driver}" != "i40e" ]; then
+ echo ${1} | tee driver/unbind
+ echo ${1} | tee /sys/bus/pci/drivers/i40e/bind
+ fi
+ ifname=$(basename net/*)
+ echo 0 | tee sriov_numvfs > /dev/null
+ echo 1 | tee sriov_numvfs > /dev/null
+ ip link set dev ${ifname} vf 0 mac ${2}
+ ip link show dev ${ifname}
+ vf=$(basename $(readlink virtfn0))
+ echo ${vf} | tee virtfn0/driver/unbind
+ echo vfio-pci | tee virtfn0/driver_override
+ echo ${vf} | sudo tee /sys/bus/pci/drivers/vfio-pci/bind
+ echo | tee virtfn0/driver_override
+ }
+
+ # Setup one VF on PF 0000:3b:00.0 and assign MAC address
+ setup 0000:3b:00.0 00:11:22:33:44:00
+ # Setup one VF on PF 0000:3b:00.1 and assign MAC address
+ setup 0000:3b:00.1 00:11:22:33:44:01
+
+Promisc mode
+~~~~~~~~~~~~
+
+In cases when interface is used in the L2 mode or promisc mode is needed
+for some other reason, trust needs to be set to “on” using the linux “ip
+link” utility.
+
+::
+
+ ip link set dev <PF inteface name> vf <VF id> trust on
+
+L2 spoofing check
+~~~~~~~~~~~~~~~~~
+
+By default Virtual Function is not allowed to send ethernet frames which
+have source MAC address different than address assigned to the VF. In
+some cases it is expected that VPP will send such frames (e.g. L2
+bridging, bonding, l2 cross-connect) and in such cases spoof check needs
+to be turned off by issuing following command:
+
+::
+
+ ip link set dev <PF inteface name> vf <VF id> spoofchk off
+
+Interface Creation
+~~~~~~~~~~~~~~~~~~
+
+Interfaces can be dynamically created by using following CLI:
+
+::
+
+ create interface avf 0000:3b:02.0
+ set int state avf-0/3b/2/0 up
+
+Interface Deletion
+~~~~~~~~~~~~~~~~~~
+
+Interface can be deleted with following CLI:
+
+::
+
+ delete interface avf <interface name>
+
+Interface Statistics
+~~~~~~~~~~~~~~~~~~~~
+
+Interface statistics can be displayed with
+``sh hardware-interface <if-name>`` command.
diff --git a/src/plugins/avf/avf.h b/src/plugins/avf/avf.h
index a1da4c8866b..f6f79cf0e09 100644
--- a/src/plugins/avf/avf.h
+++ b/src/plugins/avf/avf.h
@@ -19,6 +19,7 @@
#define _AVF_H_
#include <avf/virtchnl.h>
+#include <avf/avf_advanced_flow.h>
#include <vppinfra/types.h>
#include <vppinfra/error_bootstrap.h>
@@ -37,6 +38,7 @@
#define AVF_AQ_ENQ_SUSPEND_TIME 50e-6
#define AVF_AQ_ENQ_MAX_WAIT_TIME 250e-3
+#define AVF_AQ_BUF_SIZE 4096
#define AVF_RESET_SUSPEND_TIME 20e-3
#define AVF_RESET_MAX_WAIT_TIME 1
@@ -202,7 +204,9 @@ typedef struct
{
u32 flow_index;
u32 mark;
+ u8 flow_type_flag;
struct avf_fdir_conf *rcfg;
+ struct virtchnl_rss_cfg *rss_cfg;
} avf_flow_entry_t;
typedef struct
@@ -291,6 +295,7 @@ typedef struct
u32 calling_process_index;
u8 eth_addr[6];
int is_add, is_enable;
+ enum virthnl_adv_ops vc_op;
/* below parameters are used for 'program flow' event */
u8 *rule;
@@ -349,7 +354,8 @@ extern vlib_node_registration_t avf_input_node;
extern vlib_node_registration_t avf_process_node;
extern vnet_device_class_t avf_device_class;
-clib_error_t *avf_program_flow (u32 dev_instance, int is_add, u8 *rule,
+clib_error_t *avf_program_flow (u32 dev_instance, int is_add,
+ enum virthnl_adv_ops vc_op, u8 *rule,
u32 rule_len, u8 *program_status,
u32 status_len);
@@ -422,7 +428,7 @@ avf_reg_write (avf_device_t * ad, u32 addr, u32 val)
{
if (ad->flags & AVF_DEVICE_F_ELOG)
avf_elog_reg (ad, addr, val, 0);
- *(volatile u32 *) ((u8 *) ad->bar0 + addr) = val;
+ __atomic_store_n ((u32 *) ((u8 *) ad->bar0 + addr), val, __ATOMIC_RELEASE);
}
static inline u32
diff --git a/src/plugins/avf/avf_advanced_flow.h b/src/plugins/avf/avf_advanced_flow.h
index 42288b7163b..685147a5ed4 100644
--- a/src/plugins/avf/avf_advanced_flow.h
+++ b/src/plugins/avf/avf_advanced_flow.h
@@ -45,6 +45,7 @@
#define AVF_ETHER_TYPE_IPV6 0x86DD /**< IPv6 Protocol. */
#define VIRTCHNL_MAX_NUM_PROTO_HDRS 32
+#define VIRTCHNL_MAX_SIZE_GEN_PACKET 1024
#define PROTO_HDR_SHIFT 5
#define PROTO_HDR_FIELD_START(proto_hdr_type) \
(proto_hdr_type << PROTO_HDR_SHIFT)
@@ -177,6 +178,82 @@
#define AVF_INSET_PFCP_S_FIELD (AVF_PROT_PFCP | AVF_PFCP_S_FIELD)
#define AVF_INSET_PFCP_SEID (AVF_PROT_PFCP | AVF_PFCP_S_FIELD | AVF_PFCP_SEID)
+#define AVF_ETH_RSS_IPV4 BIT_ULL (2)
+#define AVF_ETH_RSS_FRAG_IPV4 BIT_ULL (3)
+#define AVF_ETH_RSS_NONFRAG_IPV4_TCP BIT_ULL (4)
+#define AVF_ETH_RSS_NONFRAG_IPV4_UDP BIT_ULL (5)
+#define AVF_ETH_RSS_NONFRAG_IPV4_SCTP BIT_ULL (6)
+#define AVF_ETH_RSS_NONFRAG_IPV4_OTHER BIT_ULL (7)
+#define AVF_ETH_RSS_IPV6 BIT_ULL (8)
+#define AVF_ETH_RSS_FRAG_IPV6 BIT_ULL (9)
+#define AVF_ETH_RSS_NONFRAG_IPV6_TCP BIT_ULL (10)
+#define AVF_ETH_RSS_NONFRAG_IPV6_UDP BIT_ULL (11)
+#define AVF_ETH_RSS_NONFRAG_IPV6_SCTP BIT_ULL (12)
+#define AVF_ETH_RSS_NONFRAG_IPV6_OTHER BIT_ULL (13)
+#define AVF_ETH_RSS_L2_PAYLOAD BIT_ULL (14)
+#define AVF_ETH_RSS_IPV6_EX BIT_ULL (15)
+#define AVF_ETH_RSS_IPV6_TCP_EX BIT_ULL (16)
+#define AVF_ETH_RSS_IPV6_UDP_EX BIT_ULL (17)
+#define AVF_ETH_RSS_PORT BIT_ULL (18)
+#define AVF_ETH_RSS_VXLAN BIT_ULL (19)
+#define AVF_ETH_RSS_GENEVE BIT_ULL (20)
+#define AVF_ETH_RSS_NVGRE BIT_ULL (21)
+#define AVF_ETH_RSS_GTPU BIT_ULL (23)
+#define AVF_ETH_RSS_ETH BIT_ULL (24)
+#define AVF_ETH_RSS_S_VLAN BIT_ULL (25)
+#define AVF_ETH_RSS_C_VLAN BIT_ULL (26)
+#define AVF_ETH_RSS_ESP BIT_ULL (27)
+#define AVF_ETH_RSS_AH BIT_ULL (28)
+#define AVF_ETH_RSS_L2TPV3 BIT_ULL (29)
+#define AVF_ETH_RSS_PFCP BIT_ULL (30)
+#define AVF_ETH_RSS_PPPOE BIT_ULL (31)
+#define AVF_ETH_RSS_ECPRI BIT_ULL (32)
+#define AVF_ETH_RSS_MPLS BIT_ULL (33)
+#define AVF_ETH_RSS_IPV4_CHKSUM BIT_ULL (34)
+#define AVF_ETH_RSS_L4_CHKSUM BIT_ULL (35)
+#define AVF_ETH_RSS_L2TPV2 BIT_ULL (36)
+#define AVF_ETH_RSS_L3_SRC_ONLY BIT_ULL (63)
+#define AVF_ETH_RSS_L3_DST_ONLY BIT_ULL (62)
+#define AVF_ETH_RSS_L4_SRC_ONLY BIT_ULL (61)
+#define AVF_ETH_RSS_L4_DST_ONLY BIT_ULL (60)
+#define AVF_ETH_RSS_L2_SRC_ONLY BIT_ULL (59)
+#define AVF_ETH_RSS_L2_DST_ONLY BIT_ULL (58)
+#define AVF_ETH_RSS_L3_PRE32 BIT_ULL (57)
+#define AVF_ETH_RSS_L3_PRE40 BIT_ULL (56)
+#define AVF_ETH_RSS_L3_PRE48 BIT_ULL (55)
+#define AVF_ETH_RSS_L3_PRE56 BIT_ULL (54)
+#define AVF_ETH_RSS_L3_PRE64 BIT_ULL (53)
+#define AVF_ETH_RSS_L3_PRE96 BIT_ULL (52)
+
+#define foreach_avf_rss_hf \
+ _ (0, AVF_ETH_RSS_FRAG_IPV4, "ipv4-frag") \
+ _ (1, AVF_ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \
+ _ (2, AVF_ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \
+ _ (3, AVF_ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \
+ _ (4, AVF_ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \
+ _ (5, AVF_ETH_RSS_IPV4, "ipv4") \
+ _ (6, AVF_ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \
+ _ (7, AVF_ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \
+ _ (8, AVF_ETH_RSS_FRAG_IPV6, "ipv6-frag") \
+ _ (9, AVF_ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \
+ _ (10, AVF_ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \
+ _ (11, AVF_ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \
+ _ (12, AVF_ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \
+ _ (13, AVF_ETH_RSS_IPV6_EX, "ipv6-ex") \
+ _ (14, AVF_ETH_RSS_IPV6, "ipv6") \
+ _ (15, AVF_ETH_RSS_L2_PAYLOAD, "l2-payload") \
+ _ (16, AVF_ETH_RSS_PORT, "port") \
+ _ (17, AVF_ETH_RSS_VXLAN, "vxlan") \
+ _ (18, AVF_ETH_RSS_GENEVE, "geneve") \
+ _ (19, AVF_ETH_RSS_NVGRE, "nvgre") \
+ _ (20, AVF_ETH_RSS_GTPU, "gtpu") \
+ _ (21, AVF_ETH_RSS_ESP, "esp") \
+ _ (22, AVF_ETH_RSS_L2TPV3, "l2tpv3") \
+ _ (60, AVF_ETH_RSS_L4_DST_ONLY, "l4-dst-only") \
+ _ (61, AVF_ETH_RSS_L4_SRC_ONLY, "l4-src-only") \
+ _ (62, AVF_ETH_RSS_L3_DST_ONLY, "l3-dst-only") \
+ _ (63, AVF_ETH_RSS_L3_SRC_ONLY, "l3-src-only")
+
/* Protocol header type within a packet segment. A segment consists of one or
* more protocol headers that make up a logical group of protocol headers. Each
* logical group of protocol headers encapsulates or is encapsulated using/by
@@ -202,6 +279,17 @@ enum virtchnl_proto_hdr_type
VIRTCHNL_PROTO_HDR_ESP,
VIRTCHNL_PROTO_HDR_AH,
VIRTCHNL_PROTO_HDR_PFCP,
+ VIRTCHNL_PROTO_HDR_GTPC,
+ VIRTCHNL_PROTO_HDR_ECPRI,
+ VIRTCHNL_PROTO_HDR_L2TPV2,
+ VIRTCHNL_PROTO_HDR_PPP,
+ /* IPv4 and IPv6 Fragment header types are only associated to
+ * VIRTCHNL_PROTO_HDR_IPV4 and VIRTCHNL_PROTO_HDR_IPV6 respectively,
+ * cannot be used independently.
+ */
+ VIRTCHNL_PROTO_HDR_IPV4_FRAG,
+ VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG,
+ VIRTCHNL_PROTO_HDR_GRE,
};
/* Protocol header field within a protocol header. */
@@ -224,6 +312,7 @@ enum virtchnl_proto_hdr_field
VIRTCHNL_PROTO_HDR_IPV4_DSCP,
VIRTCHNL_PROTO_HDR_IPV4_TTL,
VIRTCHNL_PROTO_HDR_IPV4_PROT,
+ VIRTCHNL_PROTO_HDR_IPV4_CHKSUM,
/* IPV6 */
VIRTCHNL_PROTO_HDR_IPV6_SRC =
PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_IPV6),
@@ -231,18 +320,34 @@ enum virtchnl_proto_hdr_field
VIRTCHNL_PROTO_HDR_IPV6_TC,
VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT,
VIRTCHNL_PROTO_HDR_IPV6_PROT,
+ /* IPV6 Prefix */
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX32_SRC,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX32_DST,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX40_SRC,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX40_DST,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX48_SRC,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX48_DST,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX56_SRC,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX56_DST,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_SRC,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_DST,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX96_SRC,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX96_DST,
/* TCP */
VIRTCHNL_PROTO_HDR_TCP_SRC_PORT =
PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_TCP),
VIRTCHNL_PROTO_HDR_TCP_DST_PORT,
+ VIRTCHNL_PROTO_HDR_TCP_CHKSUM,
/* UDP */
VIRTCHNL_PROTO_HDR_UDP_SRC_PORT =
PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_UDP),
VIRTCHNL_PROTO_HDR_UDP_DST_PORT,
+ VIRTCHNL_PROTO_HDR_UDP_CHKSUM,
/* SCTP */
VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT =
PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_SCTP),
VIRTCHNL_PROTO_HDR_SCTP_DST_PORT,
+ VIRTCHNL_PROTO_HDR_SCTP_CHKSUM,
/* GTPU_IP */
VIRTCHNL_PROTO_HDR_GTPU_IP_TEID =
PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_GTPU_IP),
@@ -264,6 +369,28 @@ enum virtchnl_proto_hdr_field
VIRTCHNL_PROTO_HDR_PFCP_S_FIELD =
PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_PFCP),
VIRTCHNL_PROTO_HDR_PFCP_SEID,
+ /* GTPC */
+ VIRTCHNL_PROTO_HDR_GTPC_TEID =
+ PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_GTPC),
+ /* ECPRI */
+ VIRTCHNL_PROTO_HDR_ECPRI_MSG_TYPE =
+ PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_ECPRI),
+ VIRTCHNL_PROTO_HDR_ECPRI_PC_RTC_ID,
+ /* IPv4 Dummy Fragment */
+ VIRTCHNL_PROTO_HDR_IPV4_FRAG_PKID =
+ PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_IPV4_FRAG),
+ /* IPv6 Extension Fragment */
+ VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG_PKID =
+ PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG),
+ /* GTPU_DWN/UP */
+ VIRTCHNL_PROTO_HDR_GTPU_DWN_QFI =
+ PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN),
+ VIRTCHNL_PROTO_HDR_GTPU_UP_QFI =
+ PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP),
+ /* L2TPv2 */
+ VIRTCHNL_PROTO_HDR_L2TPV2_SESS_ID =
+ PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_L2TPV2),
+ VIRTCHNL_PROTO_HDR_L2TPV2_LEN_SESS_ID,
};
struct virtchnl_proto_hdr
@@ -284,14 +411,26 @@ struct virtchnl_proto_hdrs
{
u8 tunnel_level;
/**
- * specify where protocol header start from.
- * 0 - from the outer layer
- * 1 - from the first inner layer
- * 2 - from the second inner layer
+ * specify where protocol header start from. Must be 0 when sending a generic
+ * packet request. 0 - from the outer layer 1 - from the first inner layer 2
+ *- from the second inner layer
* ....
**/
- int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */
- struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS];
+ int count;
+ /**
+ * the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS.
+ * Must be 0 when sending a generic packet request.
+ **/
+ union
+ {
+ struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS];
+ struct
+ {
+ u16 pkt_len;
+ u8 spec[VIRTCHNL_MAX_SIZE_GEN_PACKET];
+ u8 mask[VIRTCHNL_MAX_SIZE_GEN_PACKET];
+ } raw;
+ };
};
VIRTCHNL_CHECK_STRUCT_LEN (2312, virtchnl_proto_hdrs);
@@ -355,6 +494,140 @@ struct virtchnl_rss_cfg
VIRTCHNL_CHECK_STRUCT_LEN (2444, virtchnl_rss_cfg);
+struct avf_pattern_match_item
+{
+ enum avf_flow_item_type *pattern_list;
+ u64 input_set_mask;
+ void *meta;
+};
+
+enum avf_flow_item_type
+{
+ AVF_FLOW_ITEM_TYPE_END,
+ AVF_FLOW_ITEM_TYPE_VOID,
+ AVF_FLOW_ITEM_TYPE_INVERT,
+ AVF_FLOW_ITEM_TYPE_ANY,
+ AVF_FLOW_ITEM_TYPE_PORT_ID,
+ AVF_FLOW_ITEM_TYPE_RAW,
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_ICMP,
+ AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_SCTP,
+ AVF_FLOW_ITEM_TYPE_VXLAN,
+ AVF_FLOW_ITEM_TYPE_E_TAG,
+ AVF_FLOW_ITEM_TYPE_NVGRE,
+ AVF_FLOW_ITEM_TYPE_MPLS,
+ AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_FUZZY,
+ AVF_FLOW_ITEM_TYPE_GTP,
+ AVF_FLOW_ITEM_TYPE_GTPC,
+ AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_ESP,
+ AVF_FLOW_ITEM_TYPE_GENEVE,
+ AVF_FLOW_ITEM_TYPE_VXLAN_GPE,
+ AVF_FLOW_ITEM_TYPE_ARP_ETH_IPV4,
+ AVF_FLOW_ITEM_TYPE_IPV6_EXT,
+ AVF_FLOW_ITEM_TYPE_ICMP6,
+ AVF_FLOW_ITEM_TYPE_ICMP6_ND_NS,
+ AVF_FLOW_ITEM_TYPE_ICMP6_ND_NA,
+ AVF_FLOW_ITEM_TYPE_ICMP6_ND_OPT,
+ AVF_FLOW_ITEM_TYPE_ICMP6_ND_OPT_SLA_ETH,
+ AVF_FLOW_ITEM_TYPE_ICMP6_ND_OPT_TLA_ETH,
+ AVF_FLOW_ITEM_TYPE_MARK,
+ AVF_FLOW_ITEM_TYPE_META,
+ AVF_FLOW_ITEM_TYPE_GRE_KEY,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC,
+ AVF_FLOW_ITEM_TYPE_PPPOES,
+ AVF_FLOW_ITEM_TYPE_PPPOED,
+ AVF_FLOW_ITEM_TYPE_PPPOE_PROTO_ID,
+ AVF_FLOW_ITEM_TYPE_NSH,
+ AVF_FLOW_ITEM_TYPE_IGMP,
+ AVF_FLOW_ITEM_TYPE_AH,
+ AVF_FLOW_ITEM_TYPE_HIGIG2,
+ AVF_FLOW_ITEM_TYPE_TAG,
+ AVF_FLOW_ITEM_TYPE_L2TPV3OIP,
+ AVF_FLOW_ITEM_TYPE_PFCP,
+ AVF_FLOW_ITEM_TYPE_ECPRI,
+ AVF_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
+ AVF_FLOW_ITEM_TYPE_GENEVE_OPT,
+ AVF_FLOW_ITEM_TYPE_INTEGRITY,
+ AVF_FLOW_ITEM_TYPE_CONNTRACK,
+ AVF_FLOW_ITEM_TYPE_PORT_REPRESENTOR,
+ AVF_FLOW_ITEM_TYPE_REPRESENTED_PORT,
+ AVF_FLOW_ITEM_TYPE_FLEX,
+ AVF_FLOW_ITEM_TYPE_L2TPV2,
+ AVF_FLOW_ITEM_TYPE_PPP,
+ AVF_FLOW_ITEM_TYPE_GRE_OPTION,
+ AVF_FLOW_ITEM_TYPE_MACSEC,
+ AVF_FLOW_ITEM_TYPE_METER_COLOR,
+};
+
+enum avf_flow_action_type
+{
+ AVF_FLOW_ACTION_TYPE_END,
+ AVF_FLOW_ACTION_TYPE_VOID,
+ AVF_FLOW_ACTION_TYPE_PASSTHRU,
+ AVF_FLOW_ACTION_TYPE_JUMP,
+ AVF_FLOW_ACTION_TYPE_MARK,
+ AVF_FLOW_ACTION_TYPE_FLAG,
+ AVF_FLOW_ACTION_TYPE_QUEUE,
+ AVF_FLOW_ACTION_TYPE_DROP,
+ AVF_FLOW_ACTION_TYPE_COUNT,
+ AVF_FLOW_ACTION_TYPE_RSS,
+ AVF_FLOW_ACTION_TYPE_PF,
+ AVF_FLOW_ACTION_TYPE_VF,
+ AVF_FLOW_ACTION_TYPE_PORT_ID,
+ AVF_FLOW_ACTION_TYPE_METER,
+ AVF_FLOW_ACTION_TYPE_SECURITY,
+ AVF_FLOW_ACTION_TYPE_OF_DEC_NW_TTL,
+ AVF_FLOW_ACTION_TYPE_OF_POP_VLAN,
+ AVF_FLOW_ACTION_TYPE_OF_PUSH_VLAN,
+ AVF_FLOW_ACTION_TYPE_OF_SET_VLAN_VID,
+ AVF_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP,
+ AVF_FLOW_ACTION_TYPE_OF_POP_MPLS,
+ AVF_FLOW_ACTION_TYPE_OF_PUSH_MPLS,
+ AVF_FLOW_ACTION_TYPE_VXLAN_ENCAP,
+ AVF_FLOW_ACTION_TYPE_VXLAN_DECAP,
+ AVF_FLOW_ACTION_TYPE_NVGRE_ENCAP,
+ AVF_FLOW_ACTION_TYPE_NVGRE_DECAP,
+ AVF_FLOW_ACTION_TYPE_RAW_ENCAP,
+ AVF_FLOW_ACTION_TYPE_RAW_DECAP,
+ AVF_FLOW_ACTION_TYPE_SET_IPV4_SRC,
+ AVF_FLOW_ACTION_TYPE_SET_IPV4_DST,
+ AVF_FLOW_ACTION_TYPE_SET_IPV6_SRC,
+ AVF_FLOW_ACTION_TYPE_SET_IPV6_DST,
+ AVF_FLOW_ACTION_TYPE_SET_TP_SRC,
+ AVF_FLOW_ACTION_TYPE_SET_TP_DST,
+ AVF_FLOW_ACTION_TYPE_MAC_SWAP,
+ AVF_FLOW_ACTION_TYPE_DEC_TTL,
+ AVF_FLOW_ACTION_TYPE_SET_TTL,
+ AVF_FLOW_ACTION_TYPE_SET_MAC_SRC,
+ AVF_FLOW_ACTION_TYPE_SET_MAC_DST,
+ AVF_FLOW_ACTION_TYPE_INC_TCP_SEQ,
+ AVF_FLOW_ACTION_TYPE_DEC_TCP_SEQ,
+ AVF_FLOW_ACTION_TYPE_INC_TCP_ACK,
+ AVF_FLOW_ACTION_TYPE_DEC_TCP_ACK,
+ AVF_FLOW_ACTION_TYPE_SET_TAG,
+ AVF_FLOW_ACTION_TYPE_SET_META,
+ AVF_FLOW_ACTION_TYPE_SET_IPV4_DSCP,
+ AVF_FLOW_ACTION_TYPE_SET_IPV6_DSCP,
+ AVF_FLOW_ACTION_TYPE_AGE,
+ AVF_FLOW_ACTION_TYPE_SAMPLE,
+ AVF_FLOW_ACTION_TYPE_SHARED,
+ AVF_FLOW_ACTION_TYPE_MODIFY_FIELD,
+ AVF_FLOW_ACTION_TYPE_INDIRECT,
+ AVF_FLOW_ACTION_TYPE_CONNTRACK,
+ AVF_FLOW_ACTION_TYPE_METER_COLOR,
+ AVF_FLOW_ACTION_TYPE_PORT_REPRESENTOR,
+ AVF_FLOW_ACTION_TYPE_REPRESENTED_PORT,
+ AVF_FLOW_ACTION_TYPE_METER_MARK,
+ AVF_FLOW_ACTION_TYPE_SEND_TO_KERNEL,
+};
+
enum virtchnl_action
{
/* action types */
@@ -756,15 +1029,16 @@ struct avf_flow_action_mark
struct avf_flow_action
{
- enum virtchnl_action type; /**< Action type. */
+ enum avf_flow_action_type type; /**< Action type. */
const void *conf; /**< Pointer to action configuration object. */
};
struct avf_flow_item
{
- enum virtchnl_proto_hdr_type type; /**< Item type. */
+ enum avf_flow_item_type type; /**< Item type. */
const void *spec; /**< Pointer to item specification structure. */
const void *mask; /**< Bit-mask applied to spec and last. */
+ int is_generic; /* indicate if this item is for a generic flow pattern. */
};
struct avf_fdir_conf
@@ -783,18 +1057,20 @@ enum virthnl_adv_ops
VIRTCHNL_ADV_OP_ADD_FDIR_FILTER = 0,
VIRTCHNL_ADV_OP_DEL_FDIR_FILTER,
VIRTCHNL_ADV_OP_QUERY_FDIR_FILTER,
+ VIRTCHNL_ADV_OP_ADD_RSS_CFG,
+ VIRTCHNL_ADV_OP_DEL_RSS_CFG,
VIRTCHNL_ADV_OP_MAX
};
/* virtual channel op handler */
-typedef int (*avf_fdir_vc_op_t) (void *vc_hdl, enum virthnl_adv_ops vc_op,
+typedef int (*avf_flow_vc_op_t) (void *vc_hdl, enum virthnl_adv_ops vc_op,
void *in, u32 in_len, void *out, u32 out_len);
/* virtual channel context object */
-struct avf_fdir_vc_ctx
+struct avf_flow_vc_ctx
{
void *vc_hdl; /* virtual channel handler */
- avf_fdir_vc_op_t vc_op;
+ avf_flow_vc_op_t vc_op;
};
/**
@@ -955,7 +1231,7 @@ int avf_fdir_rcfg_act_mark (struct avf_fdir_conf *rcfg, const u32 mark,
* 0 = successful.
* < 0 = failure.
*/
-int avf_fdir_rcfg_validate (struct avf_fdir_vc_ctx *ctx,
+int avf_fdir_rcfg_validate (struct avf_flow_vc_ctx *ctx,
struct avf_fdir_conf *rcfg);
/**
@@ -971,7 +1247,7 @@ int avf_fdir_rcfg_validate (struct avf_fdir_vc_ctx *ctx,
* 0 = successfule.
* < 0 = failure.
*/
-int avf_fdir_rule_create (struct avf_fdir_vc_ctx *ctx,
+int avf_fdir_rule_create (struct avf_flow_vc_ctx *ctx,
struct avf_fdir_conf *rcfg);
/**
@@ -986,7 +1262,7 @@ int avf_fdir_rule_create (struct avf_fdir_vc_ctx *ctx,
* 0 = successfule.
* < 0 = failure.
*/
-int avf_fdir_rule_destroy (struct avf_fdir_vc_ctx *ctx,
+int avf_fdir_rule_destroy (struct avf_flow_vc_ctx *ctx,
struct avf_fdir_conf *rcfg);
/*
@@ -1008,6 +1284,24 @@ int avf_fdir_parse_pattern (struct avf_fdir_conf *rcfg,
struct avf_flow_error *error);
/*
+ * Parse avf patterns for generic flow and set pattern fields.
+ *
+ * @param rcfg
+ * flow config
+ * @param avf_items
+ * pattern items
+ * @param error
+ * save error cause
+ *
+ * @return
+ * 0 = successful.
+ * < 0 = failure
+ */
+int avf_fdir_parse_generic_pattern (struct avf_fdir_conf *rcfg,
+ struct avf_flow_item avf_items[],
+ struct avf_flow_error *error);
+
+/*
* Parse flow actions, set actions.
*
* @param actions
@@ -1025,6 +1319,92 @@ int avf_fdir_parse_action (const struct avf_flow_action actions[],
struct avf_fdir_conf *rcfg,
struct avf_flow_error *error);
+/*
+ * Parse flow patterns and rss actions, set rss config.
+ *
+ * @param avf_items
+ * flow pattern
+ * @param avf_actions
+ * flow actions
+ * @param rss_cfg
+ * rss config
+ * @param error
+ * save error cause
+ *
+ * @return
+ * 0 = successful.
+ * < 0 = failure
+ */
+int avf_rss_parse_pattern_action (struct avf_flow_item avf_items[],
+ struct avf_flow_action avf_actions[],
+ struct virtchnl_rss_cfg *rss_cfg,
+ struct avf_flow_error *error);
+
+/**
+ * Create a RSS rule cfg object.
+ *
+ * @param rss_cfg
+ * created rule cfg object.
+ * @param tunnel
+ * tunnel level where protocol header start from
+ * 0 from moster outer layer.
+ * 1 from first inner layer.
+ * 2 form second inner layer.
+ * Must be 0 for generic flow.
+ *
+ * @return
+ * 0 = successful.
+ * < 0 = failure.
+ */
+int avf_rss_cfg_create (struct virtchnl_rss_cfg **rss_cfg, int tunnel_level);
+
+int avf_rss_rcfg_destroy (struct virtchnl_rss_cfg *rss_cfg);
+
+/**
+ * Create a RSS flow rule
+ *
+ * @param ctx
+ * virtual channel context
+ * @param rss_cfg
+ * rule cfg object.
+ *
+ * @return
+ * 0 = successfule.
+ * < 0 = failure.
+ */
+int avf_rss_rule_create (struct avf_flow_vc_ctx *ctx,
+ struct virtchnl_rss_cfg *rss_cfg);
+
+/**
+ * Destroy a RSS flow rule
+ *
+ * @param ctx
+ * virtual channel context
+ * @param rss_cfg
+ * rule cfg object.
+ *
+ * @return
+ * 0 = successfule.
+ * < 0 = failure.
+ */
+int avf_rss_rule_destroy (struct avf_flow_vc_ctx *ctx,
+ struct virtchnl_rss_cfg *rss_cfg);
+
+/**
+ * Parse generic flow pattern to get spec and mask
+ *
+ * @param item
+ * flow item
+ * @param pkt_buf
+ * spec buffer.
+ * @param msk_buf
+ * mask buffer .
+ * @param spec_len
+ * length of spec.
+ */
+void avf_parse_generic_pattern (struct avf_flow_item *item, u8 *pkt_buf,
+ u8 *msk_buf, u16 spec_len);
+
/**
* Initialize flow error structure.
*
diff --git a/src/plugins/avf/avf_api.c b/src/plugins/avf/avf_api.c
index 883b374331f..ee39c87e666 100644
--- a/src/plugins/avf/avf_api.c
+++ b/src/plugins/avf/avf_api.c
@@ -29,6 +29,7 @@
#include <avf/avf.api_enum.h>
#include <avf/avf.api_types.h>
+#define REPLY_MSG_ID_BASE (am->msg_id_base)
#include <vlibapi/api_helper_macros.h>
static void
@@ -51,12 +52,8 @@ vl_api_avf_create_t_handler (vl_api_avf_create_t * mp)
avf_create_if (vm, &args);
rv = args.rv;
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_AVF_CREATE_REPLY + am->msg_id_base,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_AVF_CREATE_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
}
static void
@@ -82,7 +79,7 @@ vl_api_avf_delete_t_handler (vl_api_avf_delete_t * mp)
AVF_PROCESS_EVENT_DELETE_IF, hw->dev_instance);
reply:
- REPLY_MACRO (VL_API_AVF_DELETE_REPLY + am->msg_id_base);
+ REPLY_MACRO (VL_API_AVF_DELETE_REPLY);
}
/* set tup the API message handling tables */
@@ -96,7 +93,7 @@ avf_plugin_api_hookup (vlib_main_t * vm)
/* ask for a correctly-sized block of API message decode slots */
avm->msg_id_base = setup_message_id_table ();
- am->is_mp_safe[avm->msg_id_base + VL_API_AVF_DELETE] = 1;
+ vl_api_set_msg_thread_safe (am, avm->msg_id_base + VL_API_AVF_DELETE, 1);
return 0;
}
diff --git a/src/plugins/avf/avf_fdir_lib.c b/src/plugins/avf/avf_fdir_lib.c
index f38614e87ec..24b796dc91d 100644
--- a/src/plugins/avf/avf_fdir_lib.c
+++ b/src/plugins/avf/avf_fdir_lib.c
@@ -28,7 +28,7 @@
static inline int
fls_u32 (u32 x)
{
- return (x == 0) ? 0 : 32 - count_leading_zeros (x);
+ return (x == 0) ? 0 : 64 - count_leading_zeros (x);
}
static inline int
@@ -100,7 +100,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
const struct avf_flow_eth_hdr *eth_spec, *eth_mask;
struct virtchnl_proto_hdr *hdr;
- enum virtchnl_proto_hdr_type type;
+ enum avf_flow_item_type type;
u16 ether_type;
int ret = 0;
@@ -112,7 +112,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
switch (type)
{
- case VIRTCHNL_PROTO_HDR_ETH:
+ case AVF_FLOW_ITEM_TYPE_ETH:
eth_spec = item->spec;
eth_mask = item->mask;
@@ -160,7 +160,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
}
break;
- case VIRTCHNL_PROTO_HDR_IPV4:
+ case AVF_FLOW_ITEM_TYPE_IPV4:
ipv4_spec = item->spec;
ipv4_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_IPV4;
@@ -211,7 +211,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
}
break;
- case VIRTCHNL_PROTO_HDR_IPV6:
+ case AVF_FLOW_ITEM_TYPE_IPV6:
ipv6_spec = item->spec;
ipv6_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_IPV6;
@@ -257,14 +257,14 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
{
rcfg->input_set |= AVF_INSET_IPV6_DST;
VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT (hdr, IPV6, DST);
-
- clib_memcpy (hdr->buffer, ipv6_spec, sizeof (*ipv6_spec));
}
+
+ clib_memcpy (hdr->buffer, ipv6_spec, sizeof (*ipv6_spec));
}
break;
- case VIRTCHNL_PROTO_HDR_UDP:
+ case AVF_FLOW_ITEM_TYPE_UDP:
udp_spec = item->spec;
udp_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_UDP;
@@ -295,7 +295,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
}
break;
- case VIRTCHNL_PROTO_HDR_TCP:
+ case AVF_FLOW_ITEM_TYPE_TCP:
tcp_spec = item->spec;
tcp_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_TCP;
@@ -329,7 +329,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
break;
- case VIRTCHNL_PROTO_HDR_SCTP:
+ case AVF_FLOW_ITEM_TYPE_SCTP:
sctp_spec = item->spec;
sctp_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_SCTP;
@@ -360,7 +360,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
}
break;
- case VIRTCHNL_PROTO_HDR_GTPU_IP:
+ case AVF_FLOW_ITEM_TYPE_GTPU:
gtp_spec = item->spec;
gtp_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_GTPU_IP;
@@ -387,7 +387,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
break;
- case VIRTCHNL_PROTO_HDR_GTPU_EH:
+ case AVF_FLOW_ITEM_TYPE_GTP_PSC:
gtp_psc_spec = item->spec;
gtp_psc_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_GTPU_EH;
@@ -405,7 +405,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
break;
- case VIRTCHNL_PROTO_HDR_L2TPV3:
+ case AVF_FLOW_ITEM_TYPE_L2TPV3OIP:
l2tpv3oip_spec = item->spec;
l2tpv3oip_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_L2TPV3;
@@ -422,7 +422,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
}
break;
- case VIRTCHNL_PROTO_HDR_ESP:
+ case AVF_FLOW_ITEM_TYPE_ESP:
esp_spec = item->spec;
esp_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_ESP;
@@ -439,7 +439,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
}
break;
- case VIRTCHNL_PROTO_HDR_AH:
+ case AVF_FLOW_ITEM_TYPE_AH:
ah_spec = item->spec;
ah_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_AH;
@@ -456,7 +456,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
}
break;
- case VIRTCHNL_PROTO_HDR_PFCP:
+ case AVF_FLOW_ITEM_TYPE_PFCP:
pfcp_spec = item->spec;
pfcp_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_PFCP;
@@ -591,7 +591,7 @@ avf_fdir_rcfg_act_mark (struct avf_fdir_conf *rcfg, const u32 mark,
}
int
-avf_fdir_rcfg_validate (struct avf_fdir_vc_ctx *ctx,
+avf_fdir_rcfg_validate (struct avf_flow_vc_ctx *ctx,
struct avf_fdir_conf *rcfg)
{
int ret;
@@ -617,7 +617,7 @@ avf_fdir_rcfg_validate (struct avf_fdir_vc_ctx *ctx,
}
int
-avf_fdir_rule_create (struct avf_fdir_vc_ctx *ctx, struct avf_fdir_conf *rcfg)
+avf_fdir_rule_create (struct avf_flow_vc_ctx *ctx, struct avf_fdir_conf *rcfg)
{
int ret;
rcfg->add_fltr.vsi_id = rcfg->vsi;
@@ -644,7 +644,7 @@ avf_fdir_rule_create (struct avf_fdir_vc_ctx *ctx, struct avf_fdir_conf *rcfg)
}
int
-avf_fdir_rule_destroy (struct avf_fdir_vc_ctx *ctx, struct avf_fdir_conf *rcfg)
+avf_fdir_rule_destroy (struct avf_flow_vc_ctx *ctx, struct avf_fdir_conf *rcfg)
{
int ret;
struct virtchnl_fdir_del fdir_ret;
@@ -683,18 +683,18 @@ avf_fdir_parse_action (const struct avf_flow_action actions[],
struct virtchnl_fdir_rule *rule_cfg = &rcfg->add_fltr.rule_cfg;
- for (; actions->type != VIRTCHNL_ACTION_NONE; actions++, act_idx++)
+ for (; actions->type != AVF_FLOW_ACTION_TYPE_END; actions++, act_idx++)
{
switch (actions->type)
{
- case VIRTCHNL_ACTION_PASSTHRU:
+ case AVF_FLOW_ACTION_TYPE_PASSTHRU:
dest_num++;
filter_action = &rule_cfg->action_set.actions[act_idx];
filter_action->type = VIRTCHNL_ACTION_PASSTHRU;
rule_cfg->action_set.count++;
break;
- case VIRTCHNL_ACTION_DROP:
+ case AVF_FLOW_ACTION_TYPE_DROP:
dest_num++;
ret = avf_fdir_rcfg_act_drop (rcfg, act_idx);
if (ret)
@@ -703,7 +703,7 @@ avf_fdir_parse_action (const struct avf_flow_action actions[],
rule_cfg->action_set.count++;
break;
- case VIRTCHNL_ACTION_QUEUE:
+ case AVF_FLOW_ACTION_TYPE_QUEUE:
dest_num++;
act_q = actions->conf;
@@ -722,7 +722,7 @@ avf_fdir_parse_action (const struct avf_flow_action actions[],
rule_cfg->action_set.count++;
break;
- case VIRTCHNL_ACTION_Q_REGION:
+ case AVF_FLOW_ACTION_TYPE_RSS:
dest_num++;
filter_action = &rule_cfg->action_set.actions[act_idx];
ret = avf_fdir_parse_action_qregion (rcfg, actions, act_idx, error);
@@ -732,7 +732,7 @@ avf_fdir_parse_action (const struct avf_flow_action actions[],
rule_cfg->action_set.count++;
break;
- case VIRTCHNL_ACTION_MARK:
+ case AVF_FLOW_ACTION_TYPE_MARK:
mark_num++;
act_msk = actions->conf;
rcfg->mark_flag = 1;
@@ -786,6 +786,36 @@ avf_fdir_parse_action (const struct avf_flow_action actions[],
}
int
+avf_fdir_parse_generic_pattern (struct avf_fdir_conf *rcfg,
+ struct avf_flow_item avf_items[],
+ struct avf_flow_error *error)
+{
+ struct avf_flow_item *item = avf_items;
+ u8 *pkt_buf, *msk_buf;
+ u16 spec_len, pkt_len;
+
+ spec_len = clib_strnlen (item->spec, VIRTCHNL_MAX_SIZE_GEN_PACKET);
+ pkt_len = spec_len / 2;
+
+ pkt_buf = clib_mem_alloc (pkt_len);
+ msk_buf = clib_mem_alloc (pkt_len);
+
+ avf_parse_generic_pattern (item, pkt_buf, msk_buf, spec_len);
+
+ clib_memcpy (rcfg->add_fltr.rule_cfg.proto_hdrs.raw.spec, pkt_buf, pkt_len);
+ clib_memcpy (rcfg->add_fltr.rule_cfg.proto_hdrs.raw.mask, msk_buf, pkt_len);
+
+ rcfg->add_fltr.rule_cfg.proto_hdrs.count = 0;
+ rcfg->add_fltr.rule_cfg.proto_hdrs.tunnel_level = 0;
+ rcfg->add_fltr.rule_cfg.proto_hdrs.raw.pkt_len = pkt_len;
+
+ clib_mem_free (pkt_buf);
+ clib_mem_free (msk_buf);
+
+ return 0;
+}
+
+int
avf_fdir_parse_pattern (struct avf_fdir_conf *rcfg,
struct avf_flow_item avf_items[],
struct avf_flow_error *error)
@@ -794,7 +824,7 @@ avf_fdir_parse_pattern (struct avf_fdir_conf *rcfg,
int ret = 0;
struct avf_flow_item *item;
- for (item = avf_items; item->type != VIRTCHNL_PROTO_HDR_NONE; item++)
+ for (item = avf_items; item->type != AVF_FLOW_ITEM_TYPE_END; item++)
{
ret = avf_fdir_rcfg_set_field (rcfg, layer, item, error);
if (ret)
diff --git a/src/plugins/avf/avf_rss_lib.c b/src/plugins/avf/avf_rss_lib.c
new file mode 100644
index 00000000000..23b0b59b7cc
--- /dev/null
+++ b/src/plugins/avf/avf_rss_lib.c
@@ -0,0 +1,2690 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vppinfra/mem.h>
+#include "avf_advanced_flow.h"
+
+#define AVF_PHINT_NONE 0
+#define AVF_PHINT_GTPU BIT_ULL (0)
+#define AVF_PHINT_GTPU_EH BIT_ULL (1)
+#define AVF_PHINT_GTPU_EH_DWN BIT_ULL (2)
+#define AVF_PHINT_GTPU_EH_UP BIT_ULL (3)
+#define AVF_PHINT_OUTER_IPV4 BIT_ULL (4)
+#define AVF_PHINT_OUTER_IPV6 BIT_ULL (5)
+#define AVF_PHINT_GRE BIT_ULL (6)
+/* the second IP header of GTPoGRE */
+#define AVF_PHINT_MID_IPV4 BIT_ULL (7)
+#define AVF_PHINT_MID_IPV6 BIT_ULL (8)
+/* L2TPv2 */
+#define AVF_PHINT_L2TPV2 BIT_ULL (9)
+#define AVF_PHINT_L2TPV2_LEN BIT_ULL (10)
+/* Raw */
+#define AVF_PHINT_RAW BIT_ULL (11)
+
+#define AVF_PHINT_GTPU_MSK \
+ (AVF_PHINT_GTPU | AVF_PHINT_GTPU_EH | AVF_PHINT_GTPU_EH_DWN | \
+ AVF_PHINT_GTPU_EH_UP)
+
+#define AVF_PHINT_LAYERS_MSK (AVF_PHINT_OUTER_IPV4 | AVF_PHINT_OUTER_IPV6)
+
+#define AVF_GTPU_EH_DWNLINK 0
+#define AVF_GTPU_EH_UPLINK 1
+
+#define FIELD_SELECTOR(proto_hdr_field) \
+ (1UL << ((proto_hdr_field) &PROTO_HDR_FIELD_MASK))
+#define BUFF_NOUSED 0
+
+#define REFINE_PROTO_FLD(op, fld) \
+ VIRTCHNL_##op##_PROTO_HDR_FIELD (hdr, VIRTCHNL_PROTO_HDR_##fld)
+#define REPALCE_PROTO_FLD(fld_1, fld_2) \
+ do \
+ { \
+ REFINE_PROTO_FLD (DEL, fld_1); \
+ REFINE_PROTO_FLD (ADD, fld_2); \
+ } \
+ while (0)
+
+#define proto_hdr_eth \
+ { \
+ VIRTCHNL_PROTO_HDR_ETH, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_ETH_SRC) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_ETH_DST), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_svlan \
+ { \
+ VIRTCHNL_PROTO_HDR_S_VLAN, FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_S_VLAN_ID), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_cvlan \
+ { \
+ VIRTCHNL_PROTO_HDR_C_VLAN, FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_C_VLAN_ID), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_ipv4 \
+ { \
+ VIRTCHNL_PROTO_HDR_IPV4, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV4_SRC) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV4_DST), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_ipv4_with_prot \
+ { \
+ VIRTCHNL_PROTO_HDR_IPV4, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV4_SRC) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV4_DST) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV4_PROT), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_ipv6 \
+ { \
+ VIRTCHNL_PROTO_HDR_IPV6, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV6_SRC) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV6_DST), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_ipv6_frag \
+ { \
+ VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG_PKID), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_ipv6_with_prot \
+ { \
+ VIRTCHNL_PROTO_HDR_IPV6, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV6_SRC) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV6_DST) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV6_PROT), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_udp \
+ { \
+ VIRTCHNL_PROTO_HDR_UDP, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_UDP_DST_PORT), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_tcp \
+ { \
+ VIRTCHNL_PROTO_HDR_TCP, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_TCP_DST_PORT), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_sctp \
+ { \
+ VIRTCHNL_PROTO_HDR_SCTP, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_esp \
+ { \
+ VIRTCHNL_PROTO_HDR_ESP, FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_ESP_SPI), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_ah \
+ { \
+ VIRTCHNL_PROTO_HDR_AH, FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_AH_SPI), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_l2tpv3 \
+ { \
+ VIRTCHNL_PROTO_HDR_L2TPV3, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_pfcp \
+ { \
+ VIRTCHNL_PROTO_HDR_PFCP, FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_PFCP_SEID), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_gtpc \
+ { \
+ VIRTCHNL_PROTO_HDR_GTPC, 0, { BUFF_NOUSED } \
+ }
+
+#define proto_hdr_ecpri \
+ { \
+ VIRTCHNL_PROTO_HDR_ECPRI, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_ECPRI_PC_RTC_ID), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_l2tpv2 \
+ { \
+ VIRTCHNL_PROTO_HDR_L2TPV2, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_L2TPV2_SESS_ID) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_L2TPV2_LEN_SESS_ID), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_ppp \
+ { \
+ VIRTCHNL_PROTO_HDR_PPP, 0, { BUFF_NOUSED } \
+ }
+
+#define TUNNEL_LEVEL_OUTER 0
+#define TUNNEL_LEVEL_INNER 1
+
+/* proto_hdrs template */
+struct virtchnl_proto_hdrs outer_ipv4_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 4,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan, proto_hdr_ipv4 } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv4_udp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan,
+ proto_hdr_ipv4_with_prot, proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv4_tcp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan,
+ proto_hdr_ipv4_with_prot, proto_hdr_tcp } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv4_sctp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan, proto_hdr_ipv4,
+ proto_hdr_sctp } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv6_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 4,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan, proto_hdr_ipv6 } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv6_frag_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan, proto_hdr_ipv6,
+ proto_hdr_ipv6_frag } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv6_udp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan,
+ proto_hdr_ipv6_with_prot, proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv6_tcp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan,
+ proto_hdr_ipv6_with_prot, proto_hdr_tcp } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv6_sctp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan, proto_hdr_ipv6,
+ proto_hdr_sctp } }
+};
+
+struct virtchnl_proto_hdrs inner_ipv4_tmplt = { TUNNEL_LEVEL_INNER,
+ 1,
+ { { proto_hdr_ipv4 } } };
+
+struct virtchnl_proto_hdrs inner_ipv4_udp_tmplt = {
+ TUNNEL_LEVEL_INNER, 2, { { proto_hdr_ipv4_with_prot, proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs inner_ipv4_tcp_tmplt = {
+ TUNNEL_LEVEL_INNER, 2, { { proto_hdr_ipv4_with_prot, proto_hdr_tcp } }
+};
+
+struct virtchnl_proto_hdrs second_inner_ipv4_tmplt = {
+ 2, 1, { { proto_hdr_ipv4 } }
+};
+
+struct virtchnl_proto_hdrs second_inner_ipv4_udp_tmplt = {
+ 2, 2, { { proto_hdr_ipv4_with_prot, proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs second_inner_ipv4_tcp_tmplt = {
+ 2, 2, { { proto_hdr_ipv4_with_prot, proto_hdr_tcp } }
+};
+
+struct virtchnl_proto_hdrs second_inner_ipv6_tmplt = {
+ 2, 1, { { proto_hdr_ipv6 } }
+};
+
+struct virtchnl_proto_hdrs second_inner_ipv6_udp_tmplt = {
+ 2, 2, { { proto_hdr_ipv6_with_prot, proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs second_inner_ipv6_tcp_tmplt = {
+ 2, 2, { { proto_hdr_ipv6_with_prot, proto_hdr_tcp } }
+};
+
+struct virtchnl_proto_hdrs inner_ipv4_sctp_tmplt = {
+ TUNNEL_LEVEL_INNER, 2, { { proto_hdr_ipv4, proto_hdr_sctp } }
+};
+
+struct virtchnl_proto_hdrs inner_ipv6_tmplt = { TUNNEL_LEVEL_INNER,
+ 1,
+ { { proto_hdr_ipv6 } } };
+
+struct virtchnl_proto_hdrs inner_ipv6_udp_tmplt = {
+ TUNNEL_LEVEL_INNER, 2, { { proto_hdr_ipv6_with_prot, proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs inner_ipv6_tcp_tmplt = {
+ TUNNEL_LEVEL_INNER, 2, { { proto_hdr_ipv6_with_prot, proto_hdr_tcp } }
+};
+
+struct virtchnl_proto_hdrs inner_ipv6_sctp_tmplt = {
+ TUNNEL_LEVEL_INNER, 2, { { proto_hdr_ipv6, proto_hdr_sctp } }
+};
+
+struct virtchnl_proto_hdrs ipv4_esp_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv4, proto_hdr_esp } }
+};
+
+struct virtchnl_proto_hdrs ipv4_udp_esp_tmplt = {
+ TUNNEL_LEVEL_OUTER, 3, { { proto_hdr_ipv4, proto_hdr_udp, proto_hdr_esp } }
+};
+
+struct virtchnl_proto_hdrs ipv4_ah_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv4, proto_hdr_ah } }
+};
+
+struct virtchnl_proto_hdrs ipv6_esp_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv6, proto_hdr_esp } }
+};
+
+struct virtchnl_proto_hdrs ipv6_udp_esp_tmplt = {
+ TUNNEL_LEVEL_OUTER, 3, { { proto_hdr_ipv6, proto_hdr_udp, proto_hdr_esp } }
+};
+
+struct virtchnl_proto_hdrs ipv6_ah_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv6, proto_hdr_ah } }
+};
+
+struct virtchnl_proto_hdrs ipv4_l2tpv3_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv4, proto_hdr_l2tpv3 } }
+};
+
+struct virtchnl_proto_hdrs ipv6_l2tpv3_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv6, proto_hdr_l2tpv3 } }
+};
+
+struct virtchnl_proto_hdrs ipv4_pfcp_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv4, proto_hdr_pfcp } }
+};
+
+struct virtchnl_proto_hdrs ipv6_pfcp_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv6, proto_hdr_pfcp } }
+};
+
+struct virtchnl_proto_hdrs ipv4_udp_gtpc_tmplt = {
+ TUNNEL_LEVEL_OUTER, 3, { { proto_hdr_ipv4, proto_hdr_udp, proto_hdr_gtpc } }
+};
+
+struct virtchnl_proto_hdrs ipv6_udp_gtpc_tmplt = {
+ TUNNEL_LEVEL_OUTER, 3, { { proto_hdr_ipv6, proto_hdr_udp, proto_hdr_gtpc } }
+};
+
+struct virtchnl_proto_hdrs eth_ecpri_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_eth, proto_hdr_ecpri } }
+};
+
+struct virtchnl_proto_hdrs ipv4_ecpri_tmplt = {
+ TUNNEL_LEVEL_OUTER, 3, { { proto_hdr_ipv4, proto_hdr_udp, proto_hdr_ecpri } }
+};
+
+struct virtchnl_proto_hdrs udp_l2tpv2_ppp_ipv4_tmplt = {
+ TUNNEL_LEVEL_INNER,
+ 3,
+ { { proto_hdr_l2tpv2, proto_hdr_ppp, proto_hdr_ipv4 } }
+};
+
+struct virtchnl_proto_hdrs udp_l2tpv2_ppp_ipv6_tmplt = {
+ TUNNEL_LEVEL_INNER,
+ 3,
+ { { proto_hdr_l2tpv2, proto_hdr_ppp, proto_hdr_ipv6 } }
+};
+
+struct virtchnl_proto_hdrs udp_l2tpv2_ppp_ipv4_udp_tmplt = {
+ TUNNEL_LEVEL_INNER,
+ 4,
+ { { proto_hdr_l2tpv2, proto_hdr_ppp, proto_hdr_ipv4_with_prot,
+ proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs udp_l2tpv2_ppp_ipv4_tcp_tmplt = {
+ TUNNEL_LEVEL_INNER,
+ 4,
+ { { proto_hdr_l2tpv2, proto_hdr_ppp, proto_hdr_ipv4_with_prot,
+ proto_hdr_tcp } }
+};
+
+struct virtchnl_proto_hdrs udp_l2tpv2_ppp_ipv6_udp_tmplt = {
+ TUNNEL_LEVEL_INNER,
+ 4,
+ { { proto_hdr_l2tpv2, proto_hdr_ppp, proto_hdr_ipv6_with_prot,
+ proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs udp_l2tpv2_ppp_ipv6_tcp_tmplt = {
+ TUNNEL_LEVEL_INNER,
+ 4,
+ { { proto_hdr_l2tpv2, proto_hdr_ppp, proto_hdr_ipv6_with_prot,
+ proto_hdr_tcp } }
+
+};
+
+struct virtchnl_proto_hdrs ipv4_l2tpv2_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 4,
+ { { proto_hdr_eth, proto_hdr_ipv4, proto_hdr_udp, proto_hdr_l2tpv2 } }
+};
+
+struct virtchnl_proto_hdrs ipv6_l2tpv2_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 4,
+ { { proto_hdr_eth, proto_hdr_ipv6, proto_hdr_udp, proto_hdr_l2tpv2 } }
+};
+
+struct virtchnl_proto_hdrs ipv4_l2tpv2_ppp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_ipv4, proto_hdr_udp, proto_hdr_l2tpv2,
+ proto_hdr_ppp } }
+};
+
+struct virtchnl_proto_hdrs ipv6_l2tpv2_ppp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_ipv6, proto_hdr_udp, proto_hdr_l2tpv2,
+ proto_hdr_ppp } }
+};
+
+/* rss type super set */
+
+#define AVF_INSET_NONE 0ULL
+
+/* IPv4 outer */
+#define AVF_RSS_TYPE_OUTER_IPV4 \
+ (AVF_ETH_RSS_ETH | AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_FRAG_IPV4 | \
+ AVF_ETH_RSS_IPV4_CHKSUM)
+#define AVF_RSS_TYPE_OUTER_IPV4_UDP \
+ (AVF_RSS_TYPE_OUTER_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_UDP | \
+ AVF_ETH_RSS_L4_CHKSUM)
+#define AVF_RSS_TYPE_OUTER_IPV4_TCP \
+ (AVF_RSS_TYPE_OUTER_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_TCP | \
+ AVF_ETH_RSS_L4_CHKSUM)
+#define AVF_RSS_TYPE_OUTER_IPV4_SCTP \
+ (AVF_RSS_TYPE_OUTER_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_SCTP | \
+ AVF_ETH_RSS_L4_CHKSUM)
+/* IPv6 outer */
+#define AVF_RSS_TYPE_OUTER_IPV6 (AVF_ETH_RSS_ETH | AVF_ETH_RSS_IPV6)
+#define AVF_RSS_TYPE_OUTER_IPV6_FRAG \
+ (AVF_RSS_TYPE_OUTER_IPV6 | AVF_ETH_RSS_FRAG_IPV6)
+#define AVF_RSS_TYPE_OUTER_IPV6_UDP \
+ (AVF_RSS_TYPE_OUTER_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_UDP | \
+ AVF_ETH_RSS_L4_CHKSUM)
+#define AVF_RSS_TYPE_OUTER_IPV6_TCP \
+ (AVF_RSS_TYPE_OUTER_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_TCP | \
+ AVF_ETH_RSS_L4_CHKSUM)
+#define AVF_RSS_TYPE_OUTER_IPV6_SCTP \
+ (AVF_RSS_TYPE_OUTER_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_SCTP | \
+ AVF_ETH_RSS_L4_CHKSUM)
+/* VLAN IPV4 */
+#define AVF_RSS_TYPE_VLAN_IPV4 \
+ (AVF_RSS_TYPE_OUTER_IPV4 | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+#define AVF_RSS_TYPE_VLAN_IPV4_UDP \
+ (AVF_RSS_TYPE_OUTER_IPV4_UDP | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+#define AVF_RSS_TYPE_VLAN_IPV4_TCP \
+ (AVF_RSS_TYPE_OUTER_IPV4_TCP | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+#define AVF_RSS_TYPE_VLAN_IPV4_SCTP \
+ (AVF_RSS_TYPE_OUTER_IPV4_SCTP | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+/* VLAN IPv6 */
+#define AVF_RSS_TYPE_VLAN_IPV6 \
+ (AVF_RSS_TYPE_OUTER_IPV6 | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+#define AVF_RSS_TYPE_VLAN_IPV6_FRAG \
+ (AVF_RSS_TYPE_OUTER_IPV6_FRAG | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+#define AVF_RSS_TYPE_VLAN_IPV6_UDP \
+ (AVF_RSS_TYPE_OUTER_IPV6_UDP | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+#define AVF_RSS_TYPE_VLAN_IPV6_TCP \
+ (AVF_RSS_TYPE_OUTER_IPV6_TCP | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+#define AVF_RSS_TYPE_VLAN_IPV6_SCTP \
+ (AVF_RSS_TYPE_OUTER_IPV6_SCTP | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+/* IPv4 inner */
+#define AVF_RSS_TYPE_INNER_IPV4 AVF_ETH_RSS_IPV4
+#define AVF_RSS_TYPE_INNER_IPV4_UDP \
+ (AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_UDP)
+#define AVF_RSS_TYPE_INNER_IPV4_TCP \
+ (AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_TCP)
+#define AVF_RSS_TYPE_INNER_IPV4_SCTP \
+ (AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_SCTP)
+/* IPv6 inner */
+#define AVF_RSS_TYPE_INNER_IPV6 AVF_ETH_RSS_IPV6
+#define AVF_RSS_TYPE_INNER_IPV6_UDP \
+ (AVF_ETH_RSS_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_UDP)
+#define AVF_RSS_TYPE_INNER_IPV6_TCP \
+ (AVF_ETH_RSS_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_TCP)
+#define AVF_RSS_TYPE_INNER_IPV6_SCTP \
+ (AVF_ETH_RSS_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_SCTP)
+/* GTPU IPv4 */
+#define AVF_RSS_TYPE_GTPU_IPV4 (AVF_RSS_TYPE_INNER_IPV4 | AVF_ETH_RSS_GTPU)
+#define AVF_RSS_TYPE_GTPU_IPV4_UDP \
+ (AVF_RSS_TYPE_INNER_IPV4_UDP | AVF_ETH_RSS_GTPU)
+#define AVF_RSS_TYPE_GTPU_IPV4_TCP \
+ (AVF_RSS_TYPE_INNER_IPV4_TCP | AVF_ETH_RSS_GTPU)
+/* GTPU IPv6 */
+#define AVF_RSS_TYPE_GTPU_IPV6 (AVF_RSS_TYPE_INNER_IPV6 | AVF_ETH_RSS_GTPU)
+#define AVF_RSS_TYPE_GTPU_IPV6_UDP \
+ (AVF_RSS_TYPE_INNER_IPV6_UDP | AVF_ETH_RSS_GTPU)
+#define AVF_RSS_TYPE_GTPU_IPV6_TCP \
+ (AVF_RSS_TYPE_INNER_IPV6_TCP | AVF_ETH_RSS_GTPU)
+/* ESP, AH, L2TPV3 and PFCP */
+#define AVF_RSS_TYPE_IPV4_ESP (AVF_ETH_RSS_ESP | AVF_ETH_RSS_IPV4)
+#define AVF_RSS_TYPE_IPV4_AH (AVF_ETH_RSS_AH | AVF_ETH_RSS_IPV4)
+#define AVF_RSS_TYPE_IPV6_ESP (AVF_ETH_RSS_ESP | AVF_ETH_RSS_IPV6)
+#define AVF_RSS_TYPE_IPV6_AH (AVF_ETH_RSS_AH | AVF_ETH_RSS_IPV6)
+#define AVF_RSS_TYPE_IPV4_L2TPV3 (AVF_ETH_RSS_L2TPV3 | AVF_ETH_RSS_IPV4)
+#define AVF_RSS_TYPE_IPV6_L2TPV3 (AVF_ETH_RSS_L2TPV3 | AVF_ETH_RSS_IPV6)
+#define AVF_RSS_TYPE_IPV4_PFCP (AVF_ETH_RSS_PFCP | AVF_ETH_RSS_IPV4)
+#define AVF_RSS_TYPE_IPV6_PFCP (AVF_ETH_RSS_PFCP | AVF_ETH_RSS_IPV6)
+
+/* L2TPv2 */
+#define AVF_RSS_TYPE_ETH_L2TPV2 (AVF_ETH_RSS_ETH | AVF_ETH_RSS_L2TPV2)
+
+#define VALID_RSS_IPV4_L4 \
+ (AVF_ETH_RSS_NONFRAG_IPV4_UDP | AVF_ETH_RSS_NONFRAG_IPV4_TCP | \
+ AVF_ETH_RSS_NONFRAG_IPV4_SCTP)
+
+#define VALID_RSS_IPV6_L4 \
+ (AVF_ETH_RSS_NONFRAG_IPV6_UDP | AVF_ETH_RSS_NONFRAG_IPV6_TCP | \
+ AVF_ETH_RSS_NONFRAG_IPV6_SCTP)
+
+#define VALID_RSS_IPV4 \
+ (AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_FRAG_IPV4 | VALID_RSS_IPV4_L4)
+#define VALID_RSS_IPV6 \
+ (AVF_ETH_RSS_IPV6 | AVF_ETH_RSS_FRAG_IPV6 | VALID_RSS_IPV6_L4)
+#define VALID_RSS_L3 (VALID_RSS_IPV4 | VALID_RSS_IPV6)
+#define VALID_RSS_L4 (VALID_RSS_IPV4_L4 | VALID_RSS_IPV6_L4)
+
+#define VALID_RSS_ATTR \
+ (AVF_ETH_RSS_L3_SRC_ONLY | AVF_ETH_RSS_L3_DST_ONLY | \
+ AVF_ETH_RSS_L4_SRC_ONLY | AVF_ETH_RSS_L4_DST_ONLY | \
+ AVF_ETH_RSS_L2_SRC_ONLY | AVF_ETH_RSS_L2_DST_ONLY | AVF_ETH_RSS_L3_PRE64)
+
+#define INVALID_RSS_ATTR \
+ (AVF_ETH_RSS_L3_PRE32 | AVF_ETH_RSS_L3_PRE40 | AVF_ETH_RSS_L3_PRE48 | \
+ AVF_ETH_RSS_L3_PRE56 | AVF_ETH_RSS_L3_PRE96)
+
+static u64 invalid_rss_comb[] = {
+ AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_UDP,
+ AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_TCP,
+ AVF_ETH_RSS_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_UDP,
+ AVF_ETH_RSS_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_TCP,
+ AVF_ETH_RSS_L3_PRE32 | AVF_ETH_RSS_L3_PRE40 | AVF_ETH_RSS_L3_PRE48 |
+ AVF_ETH_RSS_L3_PRE56 | AVF_ETH_RSS_L3_PRE96
+};
+
+struct rss_attr_type
+{
+ u64 attr;
+ u64 type;
+};
+
+static struct rss_attr_type rss_attr_to_valid_type[] = {
+ { AVF_ETH_RSS_L2_SRC_ONLY | AVF_ETH_RSS_L2_DST_ONLY, AVF_ETH_RSS_ETH },
+ { AVF_ETH_RSS_L3_SRC_ONLY | AVF_ETH_RSS_L3_DST_ONLY, VALID_RSS_L3 },
+ { AVF_ETH_RSS_L4_SRC_ONLY | AVF_ETH_RSS_L4_DST_ONLY, VALID_RSS_L4 },
+ /* current ipv6 prefix only supports prefix 64 bits*/
+ { AVF_ETH_RSS_L3_PRE64, VALID_RSS_IPV6 },
+ { INVALID_RSS_ATTR, 0 }
+};
+
+/* raw */
+enum avf_flow_item_type avf_pattern_raw[] = {
+ AVF_FLOW_ITEM_TYPE_RAW,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* empty */
+enum avf_flow_item_type avf_pattern_empty[] = {
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* L2 */
+enum avf_flow_item_type avf_pattern_ethertype[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_ethertype_vlan[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_ethertype_qinq[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* ARP */
+enum avf_flow_item_type avf_pattern_eth_arp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_ARP_ETH_IPV4,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* non-tunnel IPv4 */
+enum avf_flow_item_type avf_pattern_eth_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_sctp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_SCTP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv4_sctp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_SCTP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv4_sctp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_SCTP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_ICMP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv4_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_ICMP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv4_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_ICMP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* non-tunnel IPv6 */
+enum avf_flow_item_type avf_pattern_eth_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_frag_ext[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv6_frag_ext[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv6_frag_ext[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_IPV6_FRAG_EXT, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_sctp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_SCTP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv6_sctp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_SCTP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv6_sctp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_SCTP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_icmp6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_ICMP6,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv6_icmp6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_ICMP6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv6_icmp6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_ICMP6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPv4 GTPC */
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpc[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GTPU (EH) */
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPv6 GTPC */
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpc[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GTPU (EH) */
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GTPU IPv4 */
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv4_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_ICMP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv4 UDP GTPU IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv4 UDP GTPU IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv6 UDP GTPU IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv6 UDP GTPU IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GRE IPv4 UDP GTPU IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv4 UDP GTPU IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GRE IPv6 UDP GTPU IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GRE IPv6 UDP GTPU IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv4 UDP GTPU EH IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv4 UDP GTPU IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv6 UDP GTPU EH IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv6 UDP GTPU EH IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GRE IPv4 UDP GTPU EH IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv4 UDP GTPU EH IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GRE IPv6 UDP GTPU EH IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GRE IPv6 UDP GTPU EH IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GTPU IPv6 */
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv6_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_ICMP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GTPU IPv4 */
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv4_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_ICMP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GTPU IPv6 */
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv6_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_ICMP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GTPU EH IPv4 */
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv4_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_ICMP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GTPU EH IPv6 */
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv6_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_ICMP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GTPU EH IPv4 */
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv4_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_ICMP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GTPU EH IPv6 */
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv6_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_ICMP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* ESP */
+enum avf_flow_item_type avf_pattern_eth_ipv4_esp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_ESP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_esp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_ESP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_esp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_ESP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_esp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_ESP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* AH */
+enum avf_flow_item_type avf_pattern_eth_ipv4_ah[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_AH,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_ah[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_AH,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* L2TPV3 */
+enum avf_flow_item_type avf_pattern_eth_ipv4_l2tpv3[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_L2TPV3OIP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_l2tpv3[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_L2TPV3OIP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* PFCP */
+enum avf_flow_item_type avf_pattern_eth_ipv4_pfcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_PFCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_pfcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_PFCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* ECPRI */
+enum avf_flow_item_type avf_pattern_eth_ecpri[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_ECPRI,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_ecpri[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_ECPRI, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* GRE */
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2_ppp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2_ppp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* PPPoL2TPv2oUDP */
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+static struct avf_pattern_match_item avf_hash_pattern_list[] = {
+ /* IPv4 */
+ { avf_pattern_raw, AVF_INSET_NONE, NULL },
+ { avf_pattern_eth_ipv4, AVF_RSS_TYPE_OUTER_IPV4, &outer_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_udp, AVF_RSS_TYPE_OUTER_IPV4_UDP,
+ &outer_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_tcp, AVF_RSS_TYPE_OUTER_IPV4_TCP,
+ &outer_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_sctp, AVF_RSS_TYPE_OUTER_IPV4_SCTP,
+ &outer_ipv4_sctp_tmplt },
+ { avf_pattern_eth_vlan_ipv4, AVF_RSS_TYPE_VLAN_IPV4, &outer_ipv4_tmplt },
+ { avf_pattern_eth_vlan_ipv4_udp, AVF_RSS_TYPE_VLAN_IPV4_UDP,
+ &outer_ipv4_udp_tmplt },
+ { avf_pattern_eth_vlan_ipv4_tcp, AVF_RSS_TYPE_VLAN_IPV4_TCP,
+ &outer_ipv4_tcp_tmplt },
+ { avf_pattern_eth_vlan_ipv4_sctp, AVF_RSS_TYPE_VLAN_IPV4_SCTP,
+ &outer_ipv4_sctp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu, AVF_ETH_RSS_IPV4, &outer_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_eh_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_eh_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_eh_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_eh_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_eh_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_eh_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_esp, AVF_RSS_TYPE_IPV4_ESP, &ipv4_esp_tmplt },
+ { avf_pattern_eth_ipv4_udp_esp, AVF_RSS_TYPE_IPV4_ESP, &ipv4_udp_esp_tmplt },
+ { avf_pattern_eth_ipv4_ah, AVF_RSS_TYPE_IPV4_AH, &ipv4_ah_tmplt },
+ { avf_pattern_eth_ipv4_l2tpv3, AVF_RSS_TYPE_IPV4_L2TPV3,
+ &ipv4_l2tpv3_tmplt },
+ { avf_pattern_eth_ipv4_pfcp, AVF_RSS_TYPE_IPV4_PFCP, &ipv4_pfcp_tmplt },
+ { avf_pattern_eth_ipv4_gtpc, AVF_ETH_RSS_IPV4, &ipv4_udp_gtpc_tmplt },
+ { avf_pattern_eth_ecpri, AVF_ETH_RSS_ECPRI, &eth_ecpri_tmplt },
+ { avf_pattern_eth_ipv4_ecpri, AVF_ETH_RSS_ECPRI, &ipv4_ecpri_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4, AVF_RSS_TYPE_INNER_IPV4,
+ &inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4, AVF_RSS_TYPE_INNER_IPV4,
+ &inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_tcp, AVF_RSS_TYPE_INNER_IPV4_TCP,
+ &inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_tcp, AVF_RSS_TYPE_INNER_IPV4_TCP,
+ &inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_udp, AVF_RSS_TYPE_INNER_IPV4_UDP,
+ &inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_udp, AVF_RSS_TYPE_INNER_IPV4_UDP,
+ &inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2, AVF_RSS_TYPE_ETH_L2TPV2,
+ &ipv4_l2tpv2_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2_ppp, AVF_RSS_TYPE_ETH_L2TPV2,
+ &ipv4_l2tpv2_ppp_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv4, AVF_RSS_TYPE_INNER_IPV4,
+ &udp_l2tpv2_ppp_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv4_udp, AVF_RSS_TYPE_INNER_IPV4_UDP,
+ &udp_l2tpv2_ppp_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv4_tcp, AVF_RSS_TYPE_INNER_IPV4_TCP,
+ &udp_l2tpv2_ppp_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv4, AVF_RSS_TYPE_INNER_IPV4,
+ &udp_l2tpv2_ppp_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv4_udp, AVF_RSS_TYPE_INNER_IPV4_UDP,
+ &udp_l2tpv2_ppp_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv4_tcp, AVF_RSS_TYPE_INNER_IPV4_TCP,
+ &udp_l2tpv2_ppp_ipv4_tcp_tmplt },
+
+ /* IPv6 */
+ { avf_pattern_eth_ipv6, AVF_RSS_TYPE_OUTER_IPV6, &outer_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_frag_ext, AVF_RSS_TYPE_OUTER_IPV6_FRAG,
+ &outer_ipv6_frag_tmplt },
+ { avf_pattern_eth_ipv6_udp, AVF_RSS_TYPE_OUTER_IPV6_UDP,
+ &outer_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_tcp, AVF_RSS_TYPE_OUTER_IPV6_TCP,
+ &outer_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_sctp, AVF_RSS_TYPE_OUTER_IPV6_SCTP,
+ &outer_ipv6_sctp_tmplt },
+ { avf_pattern_eth_vlan_ipv6, AVF_RSS_TYPE_VLAN_IPV6, &outer_ipv6_tmplt },
+ { avf_pattern_eth_vlan_ipv6_frag_ext, AVF_RSS_TYPE_OUTER_IPV6_FRAG,
+ &outer_ipv6_frag_tmplt },
+ { avf_pattern_eth_vlan_ipv6_udp, AVF_RSS_TYPE_VLAN_IPV6_UDP,
+ &outer_ipv6_udp_tmplt },
+ { avf_pattern_eth_vlan_ipv6_tcp, AVF_RSS_TYPE_VLAN_IPV6_TCP,
+ &outer_ipv6_tcp_tmplt },
+ { avf_pattern_eth_vlan_ipv6_sctp, AVF_RSS_TYPE_VLAN_IPV6_SCTP,
+ &outer_ipv6_sctp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu, AVF_ETH_RSS_IPV6, &outer_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_eh_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_eh_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_eh_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_eh_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_eh_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_eh_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_esp, AVF_RSS_TYPE_IPV6_ESP, &ipv6_esp_tmplt },
+ { avf_pattern_eth_ipv6_udp_esp, AVF_RSS_TYPE_IPV6_ESP, &ipv6_udp_esp_tmplt },
+ { avf_pattern_eth_ipv6_ah, AVF_RSS_TYPE_IPV6_AH, &ipv6_ah_tmplt },
+ { avf_pattern_eth_ipv6_l2tpv3, AVF_RSS_TYPE_IPV6_L2TPV3,
+ &ipv6_l2tpv3_tmplt },
+ { avf_pattern_eth_ipv6_pfcp, AVF_RSS_TYPE_IPV6_PFCP, &ipv6_pfcp_tmplt },
+ { avf_pattern_eth_ipv6_gtpc, AVF_ETH_RSS_IPV6, &ipv6_udp_gtpc_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6, AVF_RSS_TYPE_INNER_IPV6,
+ &inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6, AVF_RSS_TYPE_INNER_IPV6,
+ &inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_tcp, AVF_RSS_TYPE_INNER_IPV6_TCP,
+ &inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_tcp, AVF_RSS_TYPE_INNER_IPV6_TCP,
+ &inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_udp, AVF_RSS_TYPE_INNER_IPV6_UDP,
+ &inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_udp, AVF_RSS_TYPE_INNER_IPV6_UDP,
+ &inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2, AVF_RSS_TYPE_ETH_L2TPV2,
+ &ipv6_l2tpv2_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2_ppp, AVF_RSS_TYPE_ETH_L2TPV2,
+ &ipv6_l2tpv2_ppp_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv6, AVF_RSS_TYPE_INNER_IPV6,
+ &udp_l2tpv2_ppp_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv6_udp, AVF_RSS_TYPE_INNER_IPV6_UDP,
+ &udp_l2tpv2_ppp_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv6_tcp, AVF_RSS_TYPE_INNER_IPV6_TCP,
+ &udp_l2tpv2_ppp_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv6, AVF_RSS_TYPE_INNER_IPV6,
+ &udp_l2tpv2_ppp_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv6_udp, AVF_RSS_TYPE_INNER_IPV6_UDP,
+ &udp_l2tpv2_ppp_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv6_tcp, AVF_RSS_TYPE_INNER_IPV6_TCP,
+ &udp_l2tpv2_ppp_ipv6_tcp_tmplt },
+
+};
+
+static inline u64
+avf_eth_rss_hf_refine (u64 rss_hf)
+{
+ if ((rss_hf & AVF_ETH_RSS_L3_SRC_ONLY) && (rss_hf & AVF_ETH_RSS_L3_DST_ONLY))
+ rss_hf &= ~(AVF_ETH_RSS_L3_SRC_ONLY | AVF_ETH_RSS_L3_DST_ONLY);
+
+ if ((rss_hf & AVF_ETH_RSS_L4_SRC_ONLY) && (rss_hf & AVF_ETH_RSS_L4_DST_ONLY))
+ rss_hf &= ~(AVF_ETH_RSS_L4_SRC_ONLY | AVF_ETH_RSS_L4_DST_ONLY);
+
+ return rss_hf;
+}
+
+static int
+avf_any_invalid_rss_type (enum avf_eth_hash_function rss_func, u64 rss_type,
+ u64 allow_rss_type)
+{
+ u32 i;
+
+ /**
+ * Check if l3/l4 SRC/DST_ONLY is set for SYMMETRIC_TOEPLITZ
+ * hash function.
+ */
+ if (rss_func == AVF_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ)
+ {
+ if (rss_type & (AVF_ETH_RSS_L3_SRC_ONLY | AVF_ETH_RSS_L3_DST_ONLY |
+ AVF_ETH_RSS_L4_SRC_ONLY | AVF_ETH_RSS_L4_DST_ONLY))
+ return 1;
+
+ if (!(rss_type &
+ (AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_IPV6 |
+ AVF_ETH_RSS_NONFRAG_IPV4_UDP | AVF_ETH_RSS_NONFRAG_IPV6_UDP |
+ AVF_ETH_RSS_NONFRAG_IPV4_TCP | AVF_ETH_RSS_NONFRAG_IPV6_TCP |
+ AVF_ETH_RSS_NONFRAG_IPV4_SCTP | AVF_ETH_RSS_NONFRAG_IPV6_SCTP)))
+ return 1;
+ }
+
+ /* check invalid combination */
+ for (i = 0; i < _vec_len (invalid_rss_comb); i++)
+ {
+ if (__builtin_popcountll (rss_type & invalid_rss_comb[i]) > 1)
+ return 1;
+ }
+
+ /* check invalid RSS attribute */
+ for (i = 0; i < _vec_len (rss_attr_to_valid_type); i++)
+ {
+ struct rss_attr_type *rat = &rss_attr_to_valid_type[i];
+
+ if (rat->attr & rss_type && !(rat->type & rss_type))
+ return 1;
+ }
+
+ /* check not allowed RSS type */
+ rss_type &= ~VALID_RSS_ATTR;
+
+ return ((rss_type & allow_rss_type) != rss_type);
+}
+
+int
+avf_rss_cfg_create (struct virtchnl_rss_cfg **rss_cfg, int tunnel_level)
+{
+ *rss_cfg = clib_mem_alloc (sizeof (**rss_cfg));
+
+ clib_memset (*rss_cfg, 0, sizeof (**rss_cfg));
+
+ (*rss_cfg)->proto_hdrs.tunnel_level = tunnel_level;
+
+ return 0;
+}
+
+int
+avf_rss_rcfg_destroy (struct virtchnl_rss_cfg *rss_cfg)
+{
+ clib_mem_free (rss_cfg);
+
+ return 0;
+}
+
+/* refine proto hdrs base on gtpu rss type */
+static void
+avf_refine_proto_hdrs_gtpu (struct virtchnl_proto_hdrs *proto_hdrs,
+ u64 rss_type)
+{
+ struct virtchnl_proto_hdr *hdr;
+ int i;
+
+ if (!(rss_type & AVF_ETH_RSS_GTPU))
+ return;
+
+ for (i = 0; i < proto_hdrs->count; i++)
+ {
+ hdr = &proto_hdrs->proto_hdr[i];
+ switch (hdr->type)
+ {
+ case VIRTCHNL_PROTO_HDR_GTPU_IP:
+ REFINE_PROTO_FLD (ADD, GTPU_IP_TEID);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void
+avf_hash_add_fragment_hdr (struct virtchnl_proto_hdrs *hdrs, int layer)
+{
+ struct virtchnl_proto_hdr *hdr1;
+ struct virtchnl_proto_hdr *hdr2;
+ int i;
+
+ if (layer < 0 || layer > hdrs->count)
+ return;
+
+ /* shift headers layer */
+ for (i = hdrs->count; i >= layer; i--)
+ {
+ hdr1 = &hdrs->proto_hdr[i];
+ hdr2 = &hdrs->proto_hdr[i - 1];
+ *hdr1 = *hdr2;
+ }
+
+ /* adding dummy fragment header */
+ hdr1 = &hdrs->proto_hdr[layer];
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr1, IPV4_FRAG);
+ hdrs->count = ++layer;
+}
+
+/* refine proto hdrs base on l2, l3, l4 rss type */
+static void
+avf_refine_proto_hdrs_l234 (struct virtchnl_proto_hdrs *proto_hdrs,
+ u64 rss_type)
+{
+ struct virtchnl_proto_hdr *hdr;
+ int i;
+
+ for (i = 0; i < proto_hdrs->count; i++)
+ {
+ hdr = &proto_hdrs->proto_hdr[i];
+ switch (hdr->type)
+ {
+ case VIRTCHNL_PROTO_HDR_ETH:
+ if (!(rss_type & AVF_ETH_RSS_ETH))
+ hdr->field_selector = 0;
+ else if (rss_type & AVF_ETH_RSS_L2_SRC_ONLY)
+ REFINE_PROTO_FLD (DEL, ETH_DST);
+ else if (rss_type & AVF_ETH_RSS_L2_DST_ONLY)
+ REFINE_PROTO_FLD (DEL, ETH_SRC);
+ break;
+ case VIRTCHNL_PROTO_HDR_IPV4:
+ if (rss_type &
+ (AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_FRAG_IPV4 |
+ AVF_ETH_RSS_NONFRAG_IPV4_UDP | AVF_ETH_RSS_NONFRAG_IPV4_TCP |
+ AVF_ETH_RSS_NONFRAG_IPV4_SCTP))
+ {
+ if (rss_type & AVF_ETH_RSS_FRAG_IPV4)
+ {
+ avf_hash_add_fragment_hdr (proto_hdrs, i + 1);
+ }
+ else if (rss_type & AVF_ETH_RSS_L3_SRC_ONLY)
+ {
+ REFINE_PROTO_FLD (DEL, IPV4_DST);
+ }
+ else if (rss_type & AVF_ETH_RSS_L3_DST_ONLY)
+ {
+ REFINE_PROTO_FLD (DEL, IPV4_SRC);
+ }
+ else if (rss_type &
+ (AVF_ETH_RSS_L4_SRC_ONLY | AVF_ETH_RSS_L4_DST_ONLY))
+ {
+ REFINE_PROTO_FLD (DEL, IPV4_DST);
+ REFINE_PROTO_FLD (DEL, IPV4_SRC);
+ }
+ }
+ else
+ {
+ hdr->field_selector = 0;
+ }
+
+ if (rss_type & AVF_ETH_RSS_IPV4_CHKSUM)
+ REFINE_PROTO_FLD (ADD, IPV4_CHKSUM);
+
+ break;
+ case VIRTCHNL_PROTO_HDR_IPV4_FRAG:
+ if (rss_type &
+ (AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_FRAG_IPV4 |
+ AVF_ETH_RSS_NONFRAG_IPV4_UDP | AVF_ETH_RSS_NONFRAG_IPV4_TCP |
+ AVF_ETH_RSS_NONFRAG_IPV4_SCTP))
+ {
+ if (rss_type & AVF_ETH_RSS_FRAG_IPV4)
+ REFINE_PROTO_FLD (ADD, IPV4_FRAG_PKID);
+ }
+ else
+ {
+ hdr->field_selector = 0;
+ }
+
+ if (rss_type & AVF_ETH_RSS_IPV4_CHKSUM)
+ REFINE_PROTO_FLD (ADD, IPV4_CHKSUM);
+
+ break;
+ case VIRTCHNL_PROTO_HDR_IPV6:
+ if (rss_type &
+ (AVF_ETH_RSS_IPV6 | AVF_ETH_RSS_FRAG_IPV6 |
+ AVF_ETH_RSS_NONFRAG_IPV6_UDP | AVF_ETH_RSS_NONFRAG_IPV6_TCP |
+ AVF_ETH_RSS_NONFRAG_IPV6_SCTP))
+ {
+ if (rss_type & AVF_ETH_RSS_L3_SRC_ONLY)
+ {
+ REFINE_PROTO_FLD (DEL, IPV6_DST);
+ }
+ else if (rss_type & AVF_ETH_RSS_L3_DST_ONLY)
+ {
+ REFINE_PROTO_FLD (DEL, IPV6_SRC);
+ }
+ else if (rss_type &
+ (AVF_ETH_RSS_L4_SRC_ONLY | AVF_ETH_RSS_L4_DST_ONLY))
+ {
+ REFINE_PROTO_FLD (DEL, IPV6_DST);
+ REFINE_PROTO_FLD (DEL, IPV6_SRC);
+ }
+ }
+ else
+ {
+ hdr->field_selector = 0;
+ }
+ if (rss_type & AVF_ETH_RSS_L3_PRE64)
+ {
+ if (REFINE_PROTO_FLD (TEST, IPV6_SRC))
+ REPALCE_PROTO_FLD (IPV6_SRC, IPV6_PREFIX64_SRC);
+ if (REFINE_PROTO_FLD (TEST, IPV6_DST))
+ REPALCE_PROTO_FLD (IPV6_DST, IPV6_PREFIX64_DST);
+ }
+ break;
+ case VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG:
+ if (rss_type & AVF_ETH_RSS_FRAG_IPV6)
+ REFINE_PROTO_FLD (ADD, IPV6_EH_FRAG_PKID);
+ else
+ hdr->field_selector = 0;
+
+ break;
+ case VIRTCHNL_PROTO_HDR_UDP:
+ if (rss_type &
+ (AVF_ETH_RSS_NONFRAG_IPV4_UDP | AVF_ETH_RSS_NONFRAG_IPV6_UDP))
+ {
+ if (rss_type & AVF_ETH_RSS_L4_SRC_ONLY)
+ REFINE_PROTO_FLD (DEL, UDP_DST_PORT);
+ else if (rss_type & AVF_ETH_RSS_L4_DST_ONLY)
+ REFINE_PROTO_FLD (DEL, UDP_SRC_PORT);
+ else if (rss_type &
+ (AVF_ETH_RSS_L3_SRC_ONLY | AVF_ETH_RSS_L3_DST_ONLY))
+ hdr->field_selector = 0;
+ }
+ else
+ {
+ hdr->field_selector = 0;
+ }
+
+ if (rss_type & AVF_ETH_RSS_L4_CHKSUM)
+ REFINE_PROTO_FLD (ADD, UDP_CHKSUM);
+ break;
+ case VIRTCHNL_PROTO_HDR_TCP:
+ if (rss_type &
+ (AVF_ETH_RSS_NONFRAG_IPV4_TCP | AVF_ETH_RSS_NONFRAG_IPV6_TCP))
+ {
+ if (rss_type & AVF_ETH_RSS_L4_SRC_ONLY)
+ REFINE_PROTO_FLD (DEL, TCP_DST_PORT);
+ else if (rss_type & AVF_ETH_RSS_L4_DST_ONLY)
+ REFINE_PROTO_FLD (DEL, TCP_SRC_PORT);
+ else if (rss_type &
+ (AVF_ETH_RSS_L3_SRC_ONLY | AVF_ETH_RSS_L3_DST_ONLY))
+ hdr->field_selector = 0;
+ }
+ else
+ {
+ hdr->field_selector = 0;
+ }
+
+ if (rss_type & AVF_ETH_RSS_L4_CHKSUM)
+ REFINE_PROTO_FLD (ADD, TCP_CHKSUM);
+ break;
+ case VIRTCHNL_PROTO_HDR_SCTP:
+ if (rss_type &
+ (AVF_ETH_RSS_NONFRAG_IPV4_SCTP | AVF_ETH_RSS_NONFRAG_IPV6_SCTP))
+ {
+ if (rss_type & AVF_ETH_RSS_L4_SRC_ONLY)
+ REFINE_PROTO_FLD (DEL, SCTP_DST_PORT);
+ else if (rss_type & AVF_ETH_RSS_L4_DST_ONLY)
+ REFINE_PROTO_FLD (DEL, SCTP_SRC_PORT);
+ else if (rss_type &
+ (AVF_ETH_RSS_L3_SRC_ONLY | AVF_ETH_RSS_L3_DST_ONLY))
+ hdr->field_selector = 0;
+ }
+ else
+ {
+ hdr->field_selector = 0;
+ }
+
+ if (rss_type & AVF_ETH_RSS_L4_CHKSUM)
+ REFINE_PROTO_FLD (ADD, SCTP_CHKSUM);
+ break;
+ case VIRTCHNL_PROTO_HDR_S_VLAN:
+ if (!(rss_type & AVF_ETH_RSS_S_VLAN))
+ hdr->field_selector = 0;
+ break;
+ case VIRTCHNL_PROTO_HDR_C_VLAN:
+ if (!(rss_type & AVF_ETH_RSS_C_VLAN))
+ hdr->field_selector = 0;
+ break;
+ case VIRTCHNL_PROTO_HDR_L2TPV3:
+ if (!(rss_type & AVF_ETH_RSS_L2TPV3))
+ hdr->field_selector = 0;
+ break;
+ case VIRTCHNL_PROTO_HDR_ESP:
+ if (!(rss_type & AVF_ETH_RSS_ESP))
+ hdr->field_selector = 0;
+ break;
+ case VIRTCHNL_PROTO_HDR_AH:
+ if (!(rss_type & AVF_ETH_RSS_AH))
+ hdr->field_selector = 0;
+ break;
+ case VIRTCHNL_PROTO_HDR_PFCP:
+ if (!(rss_type & AVF_ETH_RSS_PFCP))
+ hdr->field_selector = 0;
+ break;
+ case VIRTCHNL_PROTO_HDR_ECPRI:
+ if (!(rss_type & AVF_ETH_RSS_ECPRI))
+ hdr->field_selector = 0;
+ break;
+ case VIRTCHNL_PROTO_HDR_L2TPV2:
+ if (!(rss_type & AVF_ETH_RSS_L2TPV2))
+ hdr->field_selector = 0;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void
+avf_refine_proto_hdrs_by_pattern (struct virtchnl_proto_hdrs *proto_hdrs,
+ u64 phint)
+{
+ struct virtchnl_proto_hdr *hdr1;
+ struct virtchnl_proto_hdr *hdr2;
+ int i, shift_count = 1;
+ int tun_lvl = proto_hdrs->tunnel_level;
+
+ if (!(phint & AVF_PHINT_GTPU_MSK) && !(phint & AVF_PHINT_GRE))
+ return;
+
+ while (tun_lvl)
+ {
+ if (phint & AVF_PHINT_LAYERS_MSK)
+ shift_count = 2;
+
+ /* shift headers layer */
+ for (i = proto_hdrs->count - 1 + shift_count; i > shift_count - 1; i--)
+ {
+ hdr1 = &proto_hdrs->proto_hdr[i];
+ hdr2 = &proto_hdrs->proto_hdr[i - shift_count];
+ *hdr1 = *hdr2;
+ }
+
+ if (shift_count == 1)
+ {
+ /* adding tunnel header at layer 0 */
+ hdr1 = &proto_hdrs->proto_hdr[0];
+ }
+ else
+ {
+ /* adding tunnel header and outer ip header */
+ hdr1 = &proto_hdrs->proto_hdr[1];
+ hdr2 = &proto_hdrs->proto_hdr[0];
+ hdr2->field_selector = 0;
+ proto_hdrs->count++;
+ tun_lvl--;
+
+ if (tun_lvl == TUNNEL_LEVEL_OUTER)
+ {
+ if (phint & AVF_PHINT_OUTER_IPV4)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr2, IPV4);
+ else if (phint & AVF_PHINT_OUTER_IPV6)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr2, IPV6);
+ }
+ else if (tun_lvl == TUNNEL_LEVEL_INNER)
+ {
+ if (phint & AVF_PHINT_MID_IPV4)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr2, IPV4);
+ else if (phint & AVF_PHINT_MID_IPV6)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr2, IPV6);
+ }
+ }
+
+ hdr1->field_selector = 0;
+ proto_hdrs->count++;
+
+ if (phint & AVF_PHINT_GTPU_EH_DWN)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr1, GTPU_EH_PDU_DWN);
+ else if (phint & AVF_PHINT_GTPU_EH_UP)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr1, GTPU_EH_PDU_UP);
+ else if (phint & AVF_PHINT_GTPU_EH)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr1, GTPU_EH);
+ else if (phint & AVF_PHINT_GTPU)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr1, GTPU_IP);
+
+ if (phint & AVF_PHINT_GRE)
+ {
+ if (phint & AVF_PHINT_GTPU)
+ {
+ /* if GTPoGRE, add GRE header at the outer tunnel */
+ if (tun_lvl == TUNNEL_LEVEL_OUTER)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr1, GRE);
+ }
+ else
+ {
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr1, GRE);
+ }
+ }
+ }
+ proto_hdrs->tunnel_level = tun_lvl;
+}
+
+static void
+avf_refine_proto_hdrs (struct virtchnl_proto_hdrs *proto_hdrs, u64 rss_type,
+ u64 phint)
+{
+ avf_refine_proto_hdrs_l234 (proto_hdrs, rss_type);
+ avf_refine_proto_hdrs_by_pattern (proto_hdrs, phint);
+ avf_refine_proto_hdrs_gtpu (proto_hdrs, rss_type);
+}
+
+static int
+avf_rss_parse_action (const struct avf_flow_action actions[],
+ struct virtchnl_rss_cfg *rss_cfg,
+ struct avf_pattern_match_item *match_item, u64 phint,
+ struct avf_flow_error *error)
+{
+ const struct avf_flow_action_rss *rss;
+ const struct avf_flow_action *action;
+ u64 rss_type;
+ int ret;
+
+ for (action = actions; action->type != AVF_FLOW_ACTION_TYPE_END; action++)
+ {
+ switch (action->type)
+ {
+ case AVF_FLOW_ACTION_TYPE_RSS:
+ rss = action->conf;
+
+ if (rss->func == AVF_ETH_HASH_FUNCTION_SIMPLE_XOR)
+ {
+ rss_cfg->rss_algorithm = VIRTCHNL_RSS_ALG_XOR_ASYMMETRIC;
+ ret = avf_flow_error_set (error, AVF_FAILURE,
+ AVF_FLOW_ERROR_TYPE_ACTION, actions,
+ "simple xor is not supported.");
+ return ret;
+ }
+ else if (rss->func == AVF_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ)
+ {
+ rss_cfg->rss_algorithm = VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC;
+ }
+ else
+ {
+ rss_cfg->rss_algorithm = VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC;
+ }
+
+ if (rss->level)
+ return avf_flow_error_set (
+ error, AVF_FAILURE, AVF_FLOW_ERROR_TYPE_ACTION, actions,
+ "a nonzero RSS encapsulation level is not supported");
+
+ if (rss->key_len)
+ return avf_flow_error_set (
+ error, AVF_FAILURE, AVF_FLOW_ERROR_TYPE_ACTION, actions,
+ "a nonzero RSS key_len is not supported");
+
+ if (rss->queue_num)
+ return avf_flow_error_set (
+ error, AVF_FAILURE, AVF_FLOW_ERROR_TYPE_ACTION, actions,
+ "a non-NULL RSS queue is not supported");
+
+ if (phint == AVF_PHINT_RAW)
+ break;
+
+ rss_type = avf_eth_rss_hf_refine (rss->types);
+
+ if (avf_any_invalid_rss_type (rss->func, rss_type,
+ match_item->input_set_mask))
+ return avf_flow_error_set (error, AVF_FAILURE,
+ AVF_FLOW_ERROR_TYPE_ACTION, actions,
+ "RSS type not supported");
+
+ memcpy (&rss_cfg->proto_hdrs, match_item->meta,
+ sizeof (struct virtchnl_proto_hdrs));
+
+ avf_refine_proto_hdrs (&rss_cfg->proto_hdrs, rss_type, phint);
+
+ break;
+
+ default:
+ return avf_flow_error_set (error, AVF_FAILURE,
+ AVF_FLOW_ERROR_TYPE_ACTION, actions,
+ "Invalid action.");
+ }
+ }
+
+ return 0;
+}
+
+static int
+avf_rss_parse_generic_pattern (struct virtchnl_rss_cfg *rss_cfg,
+ struct avf_flow_item avf_items[],
+ struct avf_flow_error *error)
+{
+ struct avf_flow_item *item = avf_items;
+ u8 *pkt_buf, *msk_buf;
+ u16 spec_len, pkt_len;
+
+ spec_len = clib_strnlen (item->spec, VIRTCHNL_MAX_SIZE_GEN_PACKET);
+ pkt_len = spec_len / 2;
+
+ pkt_buf = clib_mem_alloc (pkt_len);
+ msk_buf = clib_mem_alloc (pkt_len);
+
+ avf_parse_generic_pattern (item, pkt_buf, msk_buf, spec_len);
+
+ clib_memcpy (rss_cfg->proto_hdrs.raw.spec, pkt_buf, pkt_len);
+ clib_memcpy (rss_cfg->proto_hdrs.raw.mask, msk_buf, pkt_len);
+
+ rss_cfg->proto_hdrs.count = 0;
+ rss_cfg->proto_hdrs.tunnel_level = 0;
+ rss_cfg->proto_hdrs.raw.pkt_len = pkt_len;
+
+ clib_mem_free (pkt_buf);
+ clib_mem_free (msk_buf);
+
+ return 0;
+}
+
+/* Find the first VOID or non-VOID item pointer */
+static const struct avf_flow_item *
+avf_find_first_item (const struct avf_flow_item *item, int is_void)
+{
+ int is_find;
+
+ while (item->type != AVF_FLOW_ITEM_TYPE_END)
+ {
+ if (is_void)
+ is_find = item->type == AVF_FLOW_ITEM_TYPE_VOID;
+ else
+ is_find = item->type != AVF_FLOW_ITEM_TYPE_VOID;
+ if (is_find)
+ break;
+ item++;
+ }
+ return item;
+}
+
+/* Skip all VOID items of the pattern */
+static void
+avf_pattern_skip_void_item (struct avf_flow_item *items,
+ const struct avf_flow_item *pattern)
+{
+ u32 cpy_count = 0;
+ const struct avf_flow_item *pb = pattern, *pe = pattern;
+
+ for (;;)
+ {
+ /* Find a non-void item first */
+ pb = avf_find_first_item (pb, 0);
+ if (pb->type == AVF_FLOW_ITEM_TYPE_END)
+ {
+ pe = pb;
+ break;
+ }
+
+ /* Find a void item */
+ pe = avf_find_first_item (pb + 1, 1);
+
+ cpy_count = pe - pb;
+ clib_memcpy (items, pb, sizeof (struct avf_flow_item) * cpy_count);
+
+ items += cpy_count;
+
+ if (pe->type == AVF_FLOW_ITEM_TYPE_END)
+ break;
+
+ pb = pe + 1;
+ }
+ /* Copy the END item. */
+ clib_memcpy (items, pe, sizeof (struct avf_flow_item));
+}
+
+/* Check if the pattern matches a supported item type array */
+static int
+avf_match_pattern (enum avf_flow_item_type *item_array,
+ const struct avf_flow_item *pattern)
+{
+ const struct avf_flow_item *item = pattern;
+
+ while ((*item_array == item->type) &&
+ (*item_array != AVF_FLOW_ITEM_TYPE_END))
+ {
+ item_array++;
+ item++;
+ }
+
+ return (*item_array == AVF_FLOW_ITEM_TYPE_END &&
+ item->type == AVF_FLOW_ITEM_TYPE_END);
+}
+
+static int
+avf_rss_search_pattern_match_item (const struct avf_flow_item pattern[],
+ struct avf_pattern_match_item **match_item,
+ struct avf_flow_error *error)
+{
+ u16 i = 0;
+ struct avf_pattern_match_item *array = avf_hash_pattern_list;
+ u32 array_len =
+ sizeof (avf_hash_pattern_list) / sizeof (avf_hash_pattern_list[0]);
+ /* need free by each filter */
+ struct avf_flow_item *items; /* used for pattern without VOID items */
+ u32 item_num = 0; /* non-void item number */
+
+ /* Get the non-void item number of pattern */
+ while ((pattern + i)->type != AVF_FLOW_ITEM_TYPE_END)
+ {
+ if ((pattern + i)->type != AVF_FLOW_ITEM_TYPE_VOID)
+ item_num++;
+ i++;
+ }
+ item_num++;
+
+ items = clib_mem_alloc (item_num * sizeof (struct avf_flow_item));
+ avf_pattern_skip_void_item (items, pattern);
+
+ for (i = 0; i < array_len; i++)
+ if (avf_match_pattern (array[i].pattern_list, items))
+ {
+ *match_item = &array[i];
+ clib_mem_free (items);
+ return 0;
+ }
+ avf_flow_error_set (error, AVF_FAILURE, AVF_FLOW_ERROR_TYPE_ITEM, pattern,
+ "Unsupported pattern");
+
+ *match_item = NULL;
+ clib_mem_free (items);
+ return -1;
+}
+
+static void
+avf_rss_parse_pattern (const struct avf_flow_item pattern[], u64 *phint)
+{
+ const struct avf_flow_item *item = pattern;
+ const struct avf_gtp_psc_hdr *psc;
+
+ for (item = pattern; item->type != AVF_FLOW_ITEM_TYPE_END; item++)
+ {
+
+ switch (item->type)
+ {
+ case AVF_FLOW_ITEM_TYPE_RAW:
+ *phint |= AVF_PHINT_RAW;
+ break;
+ case AVF_FLOW_ITEM_TYPE_IPV4:
+ if (!(*phint & AVF_PHINT_GTPU_MSK) && !(*phint & AVF_PHINT_GRE) &&
+ !(*phint & AVF_PHINT_L2TPV2))
+ *phint |= AVF_PHINT_OUTER_IPV4;
+ if ((*phint & AVF_PHINT_GRE) && !(*phint & AVF_PHINT_GTPU_MSK))
+ *phint |= AVF_PHINT_MID_IPV4;
+ break;
+ case AVF_FLOW_ITEM_TYPE_IPV6:
+ if (!(*phint & AVF_PHINT_GTPU_MSK) && !(*phint & AVF_PHINT_GRE) &&
+ !(*phint & AVF_PHINT_L2TPV2))
+ *phint |= AVF_PHINT_OUTER_IPV6;
+ if ((*phint & AVF_PHINT_GRE) && !(*phint & AVF_PHINT_GTPU_MSK))
+ *phint |= AVF_PHINT_MID_IPV6;
+ break;
+ case AVF_FLOW_ITEM_TYPE_GTPU:
+ *phint |= AVF_PHINT_GTPU;
+ break;
+ case AVF_FLOW_ITEM_TYPE_GTP_PSC:
+ *phint |= AVF_PHINT_GTPU_EH;
+ psc = item->spec;
+ if (!psc)
+ break;
+ else if (psc->pdu_type == AVF_GTPU_EH_UPLINK)
+ *phint |= AVF_PHINT_GTPU_EH_UP;
+ else if (psc->pdu_type == AVF_GTPU_EH_DWNLINK)
+ *phint |= AVF_PHINT_GTPU_EH_DWN;
+ break;
+ case AVF_FLOW_ITEM_TYPE_GRE:
+ *phint |= AVF_PHINT_GRE;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+int
+avf_rss_parse_pattern_action (struct avf_flow_item avf_items[],
+ struct avf_flow_action avf_actions[],
+ struct virtchnl_rss_cfg *rss_cfg,
+ struct avf_flow_error *error)
+{
+ struct avf_pattern_match_item *match_item = NULL;
+ u64 pattern_hint = 0;
+ int ret = 0;
+
+ ret = avf_rss_search_pattern_match_item (avf_items, &match_item, error);
+ if (ret)
+ return ret;
+
+ avf_rss_parse_pattern (avf_items, &pattern_hint);
+
+ if (pattern_hint == AVF_PHINT_RAW)
+ {
+ ret = avf_rss_parse_generic_pattern (rss_cfg, avf_items, error);
+ if (ret)
+ return ret;
+ }
+
+ ret = avf_rss_parse_action (avf_actions, rss_cfg, match_item, pattern_hint,
+ error);
+ return ret;
+}
+
+int
+avf_rss_rule_create (struct avf_flow_vc_ctx *ctx,
+ struct virtchnl_rss_cfg *rss_cfg)
+{
+ int ret;
+
+ ret = ctx->vc_op (ctx->vc_hdl, VIRTCHNL_ADV_OP_ADD_RSS_CFG, rss_cfg,
+ sizeof (*rss_cfg), 0, 0);
+
+ return ret;
+}
+
+int
+avf_rss_rule_destroy (struct avf_flow_vc_ctx *ctx,
+ struct virtchnl_rss_cfg *rss_cfg)
+{
+ int ret;
+
+ ret = ctx->vc_op (ctx->vc_hdl, VIRTCHNL_ADV_OP_DEL_RSS_CFG, rss_cfg,
+ sizeof (*rss_cfg), 0, 0);
+
+ return ret;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/avf/cli.c b/src/plugins/avf/cli.c
index 47b4b9236f2..391ff25567a 100644
--- a/src/plugins/avf/cli.c
+++ b/src/plugins/avf/cli.c
@@ -30,37 +30,31 @@ static clib_error_t *
avf_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- unformat_input_t _line_input, *line_input = &_line_input;
avf_create_if_args_t args;
u32 tmp;
clib_memset (&args, 0, sizeof (avf_create_if_args_t));
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "%U", unformat_vlib_pci_addr, &args.addr))
+ if (unformat (input, "%U", unformat_vlib_pci_addr, &args.addr))
;
- else if (unformat (line_input, "elog"))
+ else if (unformat (input, "elog"))
args.enable_elog = 1;
- else if (unformat (line_input, "rx-queue-size %u", &tmp))
+ else if (unformat (input, "rx-queue-size %u", &tmp))
args.rxq_size = tmp;
- else if (unformat (line_input, "tx-queue-size %u", &tmp))
+ else if (unformat (input, "tx-queue-size %u", &tmp))
args.txq_size = tmp;
- else if (unformat (line_input, "num-rx-queues %u", &tmp))
+ else if (unformat (input, "num-rx-queues %u", &tmp))
args.rxq_num = tmp;
- else if (unformat (line_input, "num-tx-queues %u", &tmp))
+ else if (unformat (input, "num-tx-queues %u", &tmp))
args.txq_num = tmp;
- else if (unformat (line_input, "name %s", &args.name))
+ else if (unformat (input, "name %s", &args.name))
;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
- unformat_free (line_input);
avf_create_if (vm, &args);
@@ -69,7 +63,6 @@ avf_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (avf_create_command, static) = {
.path = "create interface avf",
.short_help = "create interface avf <pci-address> "
@@ -77,33 +70,26 @@ VLIB_CLI_COMMAND (avf_create_command, static) = {
"[num-rx-queues <size>]",
.function = avf_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
avf_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- unformat_input_t _line_input, *line_input = &_line_input;
u32 sw_if_index = ~0;
vnet_hw_interface_t *hw;
vnet_main_t *vnm = vnet_get_main ();
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ if (unformat (input, "sw_if_index %d", &sw_if_index))
;
- else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
- vnm, &sw_if_index))
+ else if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
- unformat_free (line_input);
if (sw_if_index == ~0)
return clib_error_return (0,
@@ -119,7 +105,6 @@ avf_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (avf_delete_command, static) = {
.path = "delete interface avf",
.short_help = "delete interface avf "
@@ -127,41 +112,34 @@ VLIB_CLI_COMMAND (avf_delete_command, static) = {
.function = avf_delete_command_fn,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
avf_test_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- unformat_input_t _line_input, *line_input = &_line_input;
u32 sw_if_index = ~0;
vnet_hw_interface_t *hw;
avf_device_t *ad;
vnet_main_t *vnm = vnet_get_main ();
int test_irq = 0, enable_elog = 0, disable_elog = 0;
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ if (unformat (input, "sw_if_index %d", &sw_if_index))
;
- else if (unformat (line_input, "irq"))
+ else if (unformat (input, "irq"))
test_irq = 1;
- else if (unformat (line_input, "elog-on"))
+ else if (unformat (input, "elog-on"))
enable_elog = 1;
- else if (unformat (line_input, "elog-off"))
+ else if (unformat (input, "elog-off"))
disable_elog = 1;
- else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
- vnm, &sw_if_index))
+ else if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
- unformat_free (line_input);
if (sw_if_index == ~0)
return clib_error_return (0,
@@ -185,14 +163,12 @@ avf_test_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (avf_test_command, static) = {
.path = "test avf",
.short_help = "test avf [<interface> | sw_if_index <sw_idx>] [irq] "
"[elog-on] [elog-off]",
.function = avf_test_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
avf_cli_init (vlib_main_t * vm)
diff --git a/src/plugins/avf/device.c b/src/plugins/avf/device.c
index dbaf4a4a7f2..1618800c432 100644
--- a/src/plugins/avf/device.c
+++ b/src/plugins/avf/device.c
@@ -156,7 +156,6 @@ avf_aq_desc_enq (vlib_main_t * vm, avf_device_t * ad, avf_aq_desc_t * dt,
if (ad->flags & AVF_DEVICE_F_ELOG)
clib_memcpy_fast (&dc, d, sizeof (avf_aq_desc_t));
- CLIB_MEMORY_BARRIER ();
ad->atq_next_slot = (ad->atq_next_slot + 1) % AVF_MBOX_LEN;
avf_reg_write (ad, AVF_ATQT, ad->atq_next_slot);
avf_reg_flush (ad);
@@ -610,18 +609,25 @@ avf_op_config_rss_lut (vlib_main_t * vm, avf_device_t * ad)
clib_error_t *
avf_op_config_rss_key (vlib_main_t * vm, avf_device_t * ad)
{
+ /* from DPDK i40e... */
+ static uint32_t rss_key_default[] = { 0x6b793944, 0x23504cb5, 0x5bea75b6,
+ 0x309f4f12, 0x3dc0a2b8, 0x024ddcdf,
+ 0x339b8ca0, 0x4c4af64a, 0x34fac605,
+ 0x55d85839, 0x3a58997d, 0x2ec938e1,
+ 0x66031581 };
int msg_len = sizeof (virtchnl_rss_key_t) + ad->rss_key_size - 1;
- int i;
u8 msg[msg_len];
virtchnl_rss_key_t *rk;
+ if (sizeof (rss_key_default) != ad->rss_key_size)
+ return clib_error_create ("unsupported RSS key size (expected %d, got %d)",
+ sizeof (rss_key_default), ad->rss_key_size);
+
clib_memset (msg, 0, msg_len);
rk = (virtchnl_rss_key_t *) msg;
rk->vsi_id = ad->vsi_id;
rk->key_len = ad->rss_key_size;
- u32 seed = random_default_seed ();
- for (i = 0; i < ad->rss_key_size; i++)
- rk->key[i] = (u8) random_u32 (&seed);
+ memcpy_s (rk->key, rk->key_len, rss_key_default, sizeof (rss_key_default));
avf_log_debug (ad, "config_rss_key: vsi_id %u rss_key_size %u key 0x%U",
rk->vsi_id, rk->key_len, format_hex_bytes_no_wrap, rk->key,
@@ -1031,6 +1037,12 @@ avf_device_init (vlib_main_t * vm, avf_main_t * am, avf_device_t * ad,
outer = vc.offloads.stripping_support.outer & mask;
inner = vc.offloads.stripping_support.inner & mask;
+ /* Check for ability to modify the VLAN setting */
+ outer =
+ vc.offloads.stripping_support.outer & VIRTCHNL_VLAN_TOGGLE ? outer : 0;
+ inner =
+ vc.offloads.stripping_support.inner & VIRTCHNL_VLAN_TOGGLE ? inner : 0;
+
if ((outer || inner) &&
(error = avf_op_disable_vlan_stripping_v2 (vm, ad, outer, inner)))
return error;
@@ -1119,7 +1131,6 @@ avf_process_one_device (vlib_main_t * vm, avf_device_t * ad, int is_irq)
if (is_irq == 0)
avf_op_get_stats (vm, ad, &ad->eth_stats);
- /* *INDENT-OFF* */
vec_foreach (e, ad->events)
{
avf_log_debug (ad, "event: %s (%u) sev %d",
@@ -1162,8 +1173,9 @@ avf_process_one_device (vlib_main_t * vm, avf_device_t * ad, int is_irq)
flags |= (VNET_HW_INTERFACE_FLAG_FULL_DUPLEX |
VNET_HW_INTERFACE_FLAG_LINK_UP);
vnet_hw_interface_set_flags (vnm, ad->hw_if_index, flags);
- vnet_hw_interface_set_link_speed (vnm, ad->hw_if_index,
- mbps * 1000);
+ vnet_hw_interface_set_link_speed (
+ vnm, ad->hw_if_index,
+ (mbps == UINT32_MAX) ? UINT32_MAX : mbps * 1000);
ad->link_speed = mbps;
}
else if (!link_up && (ad->flags & AVF_DEVICE_F_LINK_UP) != 0)
@@ -1214,7 +1226,6 @@ avf_process_one_device (vlib_main_t * vm, avf_device_t * ad, int is_irq)
}
}
}
- /* *INDENT-ON* */
vec_reset_length (ad->events);
return;
@@ -1227,16 +1238,32 @@ error:
clib_error_t *
avf_op_program_flow (vlib_main_t *vm, avf_device_t *ad, int is_create,
- u8 *rule, u32 rule_len, u8 *program_status,
- u32 status_len)
+ enum virthnl_adv_ops vc_op, u8 *rule, u32 rule_len,
+ u8 *program_status, u32 status_len)
{
+ virtchnl_ops_t op;
+
avf_log_debug (ad, "avf_op_program_flow: vsi_id %u is_create %u", ad->vsi_id,
is_create);
- return avf_send_to_pf (vm, ad,
- is_create ? VIRTCHNL_OP_ADD_FDIR_FILTER :
- VIRTCHNL_OP_DEL_FDIR_FILTER,
- rule, rule_len, program_status, status_len);
+ switch (vc_op)
+ {
+ case VIRTCHNL_ADV_OP_ADD_FDIR_FILTER:
+ case VIRTCHNL_ADV_OP_DEL_FDIR_FILTER:
+ op =
+ is_create ? VIRTCHNL_OP_ADD_FDIR_FILTER : VIRTCHNL_OP_DEL_FDIR_FILTER;
+ break;
+ case VIRTCHNL_ADV_OP_ADD_RSS_CFG:
+ case VIRTCHNL_ADV_OP_DEL_RSS_CFG:
+ op = is_create ? VIRTCHNL_OP_ADD_RSS_CFG : VIRTCHNL_OP_DEL_RSS_CFG;
+ break;
+ default:
+ return clib_error_return (0, "invalid virtchnl opcode");
+ ;
+ }
+
+ return avf_send_to_pf (vm, ad, op, rule, rule_len, program_status,
+ status_len);
}
static void
@@ -1250,9 +1277,9 @@ avf_process_handle_request (vlib_main_t * vm, avf_process_req_t * req)
else if (req->type == AVF_PROCESS_REQ_CONFIG_PROMISC_MDDE)
req->error = avf_op_config_promisc_mode (vm, ad, req->is_enable);
else if (req->type == AVF_PROCESS_REQ_PROGRAM_FLOW)
- req->error =
- avf_op_program_flow (vm, ad, req->is_add, req->rule, req->rule_len,
- req->program_status, req->status_len);
+ req->error = avf_op_program_flow (vm, ad, req->is_add, req->vc_op,
+ req->rule, req->rule_len,
+ req->program_status, req->status_len);
else
clib_panic ("BUG: unknown avf proceess request type");
@@ -1374,7 +1401,6 @@ avf_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
/* create local list of device pointers as device pool may grow
* during suspend */
vec_reset_length (dev_pointers);
- /* *INDENT-OFF* */
pool_foreach_index (i, am->devices)
{
vec_add1 (dev_pointers, avf_get_device (i));
@@ -1384,19 +1410,16 @@ avf_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
{
avf_process_one_device (vm, dev_pointers[i], irq);
};
- /* *INDENT-ON* */
last_run_duration = vlib_time_now (vm) - last_periodic_time;
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (avf_process_node) = {
.function = avf_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "avf-process",
};
-/* *INDENT-ON* */
static void
avf_irq_0_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line)
@@ -1409,13 +1432,11 @@ avf_irq_0_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line)
if (ad->flags & AVF_DEVICE_F_ELOG)
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (el) =
{
.format = "avf[%d] irq 0: icr0 0x%x",
.format_args = "i4i4",
};
- /* *INDENT-ON* */
struct
{
u32 dev_instance;
@@ -1445,13 +1466,11 @@ avf_irq_n_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line)
if (ad->flags & AVF_DEVICE_F_ELOG)
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (el) =
{
.format = "avf[%d] irq %d: received",
.format_args = "i4i2",
};
- /* *INDENT-ON* */
struct
{
u32 dev_instance;
@@ -1497,7 +1516,6 @@ avf_delete_if (vlib_main_t * vm, avf_device_t * ad, int with_barrier)
vlib_physmem_free (vm, ad->atq_bufs);
vlib_physmem_free (vm, ad->arq_bufs);
- /* *INDENT-OFF* */
vec_foreach_index (i, ad->rxqs)
{
avf_rxq_t *rxq = vec_elt_at_index (ad->rxqs, i);
@@ -1507,10 +1525,8 @@ avf_delete_if (vlib_main_t * vm, avf_device_t * ad, int with_barrier)
rxq->n_enqueued);
vec_free (rxq->bufs);
}
- /* *INDENT-ON* */
vec_free (ad->rxqs);
- /* *INDENT-OFF* */
vec_foreach_index (i, ad->txqs)
{
avf_txq_t *txq = vec_elt_at_index (ad->txqs, i);
@@ -1530,7 +1546,6 @@ avf_delete_if (vlib_main_t * vm, avf_device_t * ad, int with_barrier)
vec_free (txq->tmp_descs);
clib_spinlock_free (&txq->lock);
}
- /* *INDENT-ON* */
vec_free (ad->txqs);
vec_free (ad->name);
@@ -1582,6 +1597,7 @@ void
avf_create_if (vlib_main_t * vm, avf_create_if_args_t * args)
{
vnet_main_t *vnm = vnet_get_main ();
+ vnet_eth_interface_registration_t eir = {};
avf_main_t *am = &avf_main;
avf_device_t *ad, **adp;
vlib_pci_dev_handle_t h;
@@ -1592,7 +1608,6 @@ avf_create_if (vlib_main_t * vm, avf_create_if_args_t * args)
if (avf_validate_queue_size (args) != 0)
return;
- /* *INDENT-OFF* */
pool_foreach (adp, am->devices) {
if ((*adp)->pci_addr.as_u32 == args->addr.as_u32)
{
@@ -1603,7 +1618,6 @@ avf_create_if (vlib_main_t * vm, avf_create_if_args_t * args)
return;
}
}
- /* *INDENT-ON* */
pool_get (am->devices, adp);
adp[0] = ad = clib_mem_alloc_aligned (sizeof (avf_device_t),
@@ -1715,29 +1729,25 @@ avf_create_if (vlib_main_t * vm, avf_create_if_args_t * args)
goto error;
/* create interface */
- error = ethernet_register_interface (vnm, avf_device_class.index,
- ad->dev_instance, ad->hwaddr,
- &ad->hw_if_index, avf_flag_change);
+ eir.dev_class_index = avf_device_class.index;
+ eir.dev_instance = ad->dev_instance;
+ eir.address = ad->hwaddr;
+ eir.cb.flag_change = avf_flag_change;
+ ad->hw_if_index = vnet_eth_register_interface (vnm, &eir);
- if (error)
- goto error;
-
- /* Indicate ability to support L3 DMAC filtering and
- * initialize interface to L3 non-promisc mode */
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, ad->hw_if_index);
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER |
- VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO;
ethernet_set_flags (vnm, ad->hw_if_index,
ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
vnet_sw_interface_t *sw = vnet_get_hw_sw_interface (vnm, ad->hw_if_index);
args->sw_if_index = ad->sw_if_index = sw->sw_if_index;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, ad->hw_if_index);
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
vnet_hw_if_set_input_node (vnm, ad->hw_if_index, avf_input_node.index);
+ /* set hw interface caps */
+ vnet_hw_if_set_caps (vnm, ad->hw_if_index,
+ VNET_HW_IF_CAP_INT_MODE | VNET_HW_IF_CAP_MAC_FILTER |
+ VNET_HW_IF_CAP_TX_CKSUM | VNET_HW_IF_CAP_TCP_GSO);
+
for (i = 0; i < ad->n_rx_queues; i++)
{
u32 qi, fi;
@@ -1883,8 +1893,8 @@ avf_clear_hw_interface_counters (u32 instance)
}
clib_error_t *
-avf_program_flow (u32 dev_instance, int is_add, u8 *rule, u32 rule_len,
- u8 *program_status, u32 status_len)
+avf_program_flow (u32 dev_instance, int is_add, enum virthnl_adv_ops vc_op,
+ u8 *rule, u32 rule_len, u8 *program_status, u32 status_len)
{
vlib_main_t *vm = vlib_get_main ();
avf_process_req_t req;
@@ -1892,6 +1902,7 @@ avf_program_flow (u32 dev_instance, int is_add, u8 *rule, u32 rule_len,
req.dev_instance = dev_instance;
req.type = AVF_PROCESS_REQ_PROGRAM_FLOW;
req.is_add = is_add;
+ req.vc_op = vc_op;
req.rule = rule;
req.rule_len = rule_len;
req.program_status = program_status;
@@ -1900,7 +1911,6 @@ avf_program_flow (u32 dev_instance, int is_add, u8 *rule, u32 rule_len,
return avf_process_request (vm, &req);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (avf_device_class, ) = {
.name = "Adaptive Virtual Function (AVF) interface",
.clear_counters = avf_clear_hw_interface_counters,
@@ -1914,7 +1924,6 @@ VNET_DEVICE_CLASS (avf_device_class, ) = {
.tx_function_error_strings = avf_tx_func_error_strings,
.flow_ops_function = avf_flow_ops_fn,
};
-/* *INDENT-ON* */
clib_error_t *
avf_init (vlib_main_t * vm)
@@ -1928,17 +1937,4 @@ avf_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
-VLIB_INIT_FUNCTION (avf_init) =
-{
- .runs_after = VLIB_INITS ("pci_bus_init"),
-};
-/* *INDENT-OFF* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+VLIB_INIT_FUNCTION (avf_init);
diff --git a/src/plugins/avf/flow.c b/src/plugins/avf/flow.c
index e0d21cd96a2..91c5e018c71 100644
--- a/src/plugins/avf/flow.c
+++ b/src/plugins/avf/flow.c
@@ -44,6 +44,8 @@
(f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED) || \
(f->type == VNET_FLOW_TYPE_IP6_VXLAN))
+#define FLOW_IS_GENERIC_CLASS(f) (f->type == VNET_FLOW_TYPE_GENERIC)
+
/* check if flow is L3 type */
#define FLOW_IS_L3_TYPE(f) \
((f->type == VNET_FLOW_TYPE_IP4) || (f->type == VNET_FLOW_TYPE_IP6))
@@ -62,8 +64,25 @@
(f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
(f->type == VNET_FLOW_TYPE_IP4_GTPU))
+static inline void
+avf_flow_convert_rss_types (u64 type, u64 *avf_rss_type)
+{
+#define BIT_IS_SET(v, b) ((v) & (u64) 1 << (b))
+
+ *avf_rss_type = 0;
+
+#undef _
+#define _(n, f, s) \
+ if (n != -1 && BIT_IS_SET (type, n)) \
+ *avf_rss_type |= f;
+
+ foreach_avf_rss_hf
+#undef _
+ return;
+}
+
int
-avf_fdir_vc_op_callback (void *vc_hdl, enum virthnl_adv_ops vc_op, void *in,
+avf_flow_vc_op_callback (void *vc_hdl, enum virthnl_adv_ops vc_op, void *in,
u32 in_len, void *out, u32 out_len)
{
u32 dev_instance = *(u32 *) vc_hdl;
@@ -79,9 +98,11 @@ avf_fdir_vc_op_callback (void *vc_hdl, enum virthnl_adv_ops vc_op, void *in,
switch (vc_op)
{
case VIRTCHNL_ADV_OP_ADD_FDIR_FILTER:
+ case VIRTCHNL_ADV_OP_ADD_RSS_CFG:
is_add = 1;
break;
case VIRTCHNL_ADV_OP_DEL_FDIR_FILTER:
+ case VIRTCHNL_ADV_OP_DEL_RSS_CFG:
is_add = 0;
break;
default:
@@ -90,18 +111,114 @@ avf_fdir_vc_op_callback (void *vc_hdl, enum virthnl_adv_ops vc_op, void *in,
return -1;
}
- err = avf_program_flow (dev_instance, is_add, in, in_len, out, out_len);
+ err =
+ avf_program_flow (dev_instance, is_add, vc_op, in, in_len, out, out_len);
if (err != 0)
{
- avf_log_err (ad, "avf fdir program failed: %U", format_clib_error, err);
+ avf_log_err (ad, "avf flow program failed: %U", format_clib_error, err);
clib_error_free (err);
return -1;
}
- avf_log_debug (ad, "avf fdir program success");
+ avf_log_debug (ad, "avf flow program success");
return 0;
}
+static inline enum avf_eth_hash_function
+avf_flow_convert_rss_func (vnet_rss_function_t func)
+{
+ enum avf_eth_hash_function rss_func;
+
+ switch (func)
+ {
+ case VNET_RSS_FUNC_DEFAULT:
+ rss_func = AVF_ETH_HASH_FUNCTION_DEFAULT;
+ break;
+ case VNET_RSS_FUNC_TOEPLITZ:
+ rss_func = AVF_ETH_HASH_FUNCTION_TOEPLITZ;
+ break;
+ case VNET_RSS_FUNC_SIMPLE_XOR:
+ rss_func = AVF_ETH_HASH_FUNCTION_SIMPLE_XOR;
+ break;
+ case VNET_RSS_FUNC_SYMMETRIC_TOEPLITZ:
+ rss_func = AVF_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ;
+ break;
+ default:
+ rss_func = AVF_ETH_HASH_FUNCTION_MAX;
+ break;
+ }
+
+ return rss_func;
+}
+
+/** Maximum number of queue indices in struct avf_flow_action_rss. */
+#define ACTION_RSS_QUEUE_NUM 128
+
+static inline void
+avf_flow_convert_rss_queues (u32 queue_index, u32 queue_num,
+ struct avf_flow_action_rss *act_rss)
+{
+ u16 *queues = clib_mem_alloc (sizeof (*queues) * ACTION_RSS_QUEUE_NUM);
+ int i;
+
+ for (i = 0; i < queue_num; i++)
+ queues[i] = queue_index++;
+
+ act_rss->queue_num = queue_num;
+ act_rss->queue = queues;
+
+ return;
+}
+
+void
+avf_parse_generic_pattern (struct avf_flow_item *item, u8 *pkt_buf,
+ u8 *msk_buf, u16 spec_len)
+{
+ u8 *raw_spec, *raw_mask;
+ u8 tmp_val = 0;
+ u8 tmp_c = 0;
+ int i, j;
+
+ raw_spec = (u8 *) item->spec;
+ raw_mask = (u8 *) item->mask;
+
+ /* convert string to int array */
+ for (i = 0, j = 0; i < spec_len; i += 2, j++)
+ {
+ tmp_c = raw_spec[i];
+ if (tmp_c >= 'a' && tmp_c <= 'f')
+ tmp_val = tmp_c - 'a' + 10;
+ if (tmp_c >= 'A' && tmp_c <= 'F')
+ tmp_val = tmp_c - 'A' + 10;
+ if (tmp_c >= '0' && tmp_c <= '9')
+ tmp_val = tmp_c - '0';
+
+ tmp_c = raw_spec[i + 1];
+ if (tmp_c >= 'a' && tmp_c <= 'f')
+ pkt_buf[j] = tmp_val * 16 + tmp_c - 'a' + 10;
+ if (tmp_c >= 'A' && tmp_c <= 'F')
+ pkt_buf[j] = tmp_val * 16 + tmp_c - 'A' + 10;
+ if (tmp_c >= '0' && tmp_c <= '9')
+ pkt_buf[j] = tmp_val * 16 + tmp_c - '0';
+
+ tmp_c = raw_mask[i];
+ if (tmp_c >= 'a' && tmp_c <= 'f')
+ tmp_val = tmp_c - 0x57;
+ if (tmp_c >= 'A' && tmp_c <= 'F')
+ tmp_val = tmp_c - 0x37;
+ if (tmp_c >= '0' && tmp_c <= '9')
+ tmp_val = tmp_c - '0';
+
+ tmp_c = raw_mask[i + 1];
+ if (tmp_c >= 'a' && tmp_c <= 'f')
+ msk_buf[j] = tmp_val * 16 + tmp_c - 'a' + 10;
+ if (tmp_c >= 'A' && tmp_c <= 'F')
+ msk_buf[j] = tmp_val * 16 + tmp_c - 'A' + 10;
+ if (tmp_c >= '0' && tmp_c <= '9')
+ msk_buf[j] = tmp_val * 16 + tmp_c - '0';
+ }
+}
+
static int
avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
{
@@ -112,13 +229,15 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
u16 src_port_mask = 0, dst_port_mask = 0;
u8 protocol = IP_PROTOCOL_RESERVED;
bool fate = false;
+ bool is_fdir = true;
struct avf_flow_error error;
int layer = 0;
int action_count = 0;
- struct avf_fdir_vc_ctx vc_ctx;
+ struct avf_flow_vc_ctx vc_ctx;
struct avf_fdir_conf *filter;
+ struct virtchnl_rss_cfg *rss_cfg;
struct avf_flow_item avf_items[VIRTCHNL_MAX_NUM_PROTO_HDRS];
struct avf_flow_action avf_actions[VIRTCHNL_MAX_NUM_ACTIONS];
@@ -133,6 +252,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
struct avf_flow_action_queue act_q = {};
struct avf_flow_action_mark act_msk = {};
+ struct avf_flow_action_rss act_rss = {};
enum
{
@@ -140,6 +260,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
FLOW_ETHERNET_CLASS,
FLOW_IPV4_CLASS,
FLOW_IPV6_CLASS,
+ FLOW_GENERIC_CLASS,
} flow_class = FLOW_UNKNOWN_CLASS;
if (FLOW_IS_ETHERNET_CLASS (f))
@@ -148,6 +269,8 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
flow_class = FLOW_IPV4_CLASS;
else if (FLOW_IS_IPV6_CLASS (f))
flow_class = FLOW_IPV6_CLASS;
+ else if (FLOW_IS_GENERIC_CLASS (f))
+ flow_class = FLOW_GENERIC_CLASS;
else
return VNET_FLOW_ERROR_NOT_SUPPORTED;
@@ -158,15 +281,35 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
goto done;
}
+ ret = avf_rss_cfg_create (&rss_cfg, 0);
+ if (ret)
+ {
+ rv = VNET_FLOW_ERROR_INTERNAL;
+ goto done;
+ }
+
/* init a virtual channel context */
vc_ctx.vc_hdl = &dev_instance;
- vc_ctx.vc_op = avf_fdir_vc_op_callback;
+ vc_ctx.vc_op = avf_flow_vc_op_callback;
clib_memset (avf_items, 0, sizeof (avf_actions));
clib_memset (avf_actions, 0, sizeof (avf_actions));
+ /* Handle generic flow first */
+ if (flow_class == FLOW_GENERIC_CLASS)
+ {
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_RAW;
+ avf_items[layer].is_generic = true;
+ avf_items[layer].spec = f->generic.pattern.spec;
+ avf_items[layer].mask = f->generic.pattern.mask;
+
+ layer++;
+
+ goto pattern_end;
+ }
+
/* Ethernet Layer */
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_ETH;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_ETH;
avf_items[layer].spec = NULL;
avf_items[layer].mask = NULL;
layer++;
@@ -176,7 +319,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
vnet_flow_ip4_t *ip4_ptr = &f->ip4;
/* IPv4 Layer */
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_IPV4;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_IPV4;
avf_items[layer].spec = &ip4_spec;
avf_items[layer].mask = &ip4_mask;
layer++;
@@ -215,7 +358,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
vnet_flow_ip6_t *ip6_ptr = &f->ip6;
/* IPv6 Layer */
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_IPV6;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_IPV6;
avf_items[layer].spec = &ip6_spec;
avf_items[layer].mask = &ip6_mask;
layer++;
@@ -260,7 +403,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
switch (protocol)
{
case IP_PROTOCOL_L2TP:
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_L2TPV3;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_L2TPV3OIP;
avf_items[layer].spec = &l2tpv3_spec;
avf_items[layer].mask = &l2tpv3_mask;
layer++;
@@ -271,7 +414,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
break;
case IP_PROTOCOL_IPSEC_ESP:
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_ESP;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_ESP;
avf_items[layer].spec = &esp_spec;
avf_items[layer].mask = &esp_mask;
layer++;
@@ -282,7 +425,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
break;
case IP_PROTOCOL_IPSEC_AH:
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_AH;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_AH;
avf_items[layer].spec = &ah_spec;
avf_items[layer].mask = &ah_mask;
layer++;
@@ -293,7 +436,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
break;
case IP_PROTOCOL_TCP:
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_TCP;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_TCP;
avf_items[layer].spec = &tcp_spec;
avf_items[layer].mask = &tcp_mask;
layer++;
@@ -311,7 +454,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
break;
case IP_PROTOCOL_UDP:
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_UDP;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_UDP;
avf_items[layer].spec = &udp_spec;
avf_items[layer].mask = &udp_mask;
layer++;
@@ -330,7 +473,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
/* handle the UDP tunnels */
if (f->type == VNET_FLOW_TYPE_IP4_GTPU)
{
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_GTPU_IP;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_GTPU;
avf_items[layer].spec = &gtp_spec;
avf_items[layer].mask = &gtp_mask;
layer++;
@@ -348,30 +491,56 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
pattern_end:
/* pattern end flag */
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_NONE;
- ret = avf_fdir_parse_pattern (filter, avf_items, &error);
- if (ret)
- {
- avf_log_err (ad, "avf fdir parse pattern failed: %s", error.message);
- rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
- goto done;
- }
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_END;
/* Action */
/* Only one 'fate' can be assigned */
+ if (f->actions & VNET_FLOW_ACTION_RSS)
+ {
+ is_fdir = false;
+ avf_actions[action_count].conf = &act_rss;
+ avf_actions[action_count].type = AVF_FLOW_ACTION_TYPE_RSS;
+
+ avf_flow_convert_rss_types (f->rss_types, &act_rss.types);
+
+ if ((act_rss.func = avf_flow_convert_rss_func (f->rss_fun)) ==
+ AVF_ETH_HASH_FUNCTION_MAX)
+ {
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+ goto done;
+ }
+
+ if (f->queue_num)
+ {
+ /* convert rss queues to array */
+ avf_flow_convert_rss_queues (f->queue_index, f->queue_num, &act_rss);
+ is_fdir = true;
+ }
+
+ fate = true;
+ action_count++;
+ }
+
if (f->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE)
{
- avf_actions[action_count].type = VIRTCHNL_ACTION_QUEUE;
+ avf_actions[action_count].type = AVF_FLOW_ACTION_TYPE_QUEUE;
avf_actions[action_count].conf = &act_q;
act_q.index = f->redirect_queue;
- fate = true;
+ if (fate == true)
+ {
+ rv = VNET_FLOW_ERROR_INTERNAL;
+ goto done;
+ }
+ else
+ fate = true;
+
action_count++;
}
if (f->actions & VNET_FLOW_ACTION_DROP)
{
- avf_actions[action_count].type = VIRTCHNL_ACTION_DROP;
+ avf_actions[action_count].type = AVF_FLOW_ACTION_TYPE_DROP;
avf_actions[action_count].conf = NULL;
if (fate == true)
@@ -381,13 +550,12 @@ pattern_end:
}
else
fate = true;
-
action_count++;
}
if (fate == false)
{
- avf_actions[action_count].type = VIRTCHNL_ACTION_PASSTHRU;
+ avf_actions[action_count].type = AVF_FLOW_ACTION_TYPE_PASSTHRU;
avf_actions[action_count].conf = NULL;
fate = true;
@@ -396,7 +564,7 @@ pattern_end:
if (f->actions & VNET_FLOW_ACTION_MARK)
{
- avf_actions[action_count].type = VIRTCHNL_ACTION_MARK;
+ avf_actions[action_count].type = AVF_FLOW_ACTION_TYPE_MARK;
avf_actions[action_count].conf = &act_msk;
action_count++;
@@ -404,16 +572,41 @@ pattern_end:
}
/* action end flag */
- avf_actions[action_count].type = VIRTCHNL_ACTION_NONE;
+ avf_actions[action_count].type = AVF_FLOW_ACTION_TYPE_END;
- /* parse action */
- ret = avf_fdir_parse_action (avf_actions, filter, &error);
- if (ret)
+ /* parse pattern and actions */
+ if (is_fdir)
{
- avf_log_err (ad, "avf fdir parse action failed: %s", error.message);
- rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
- goto done;
- }
+ if (flow_class == FLOW_GENERIC_CLASS)
+ {
+ ret = avf_fdir_parse_generic_pattern (filter, avf_items, &error);
+ if (ret)
+ {
+ avf_log_err (ad, "avf fdir parse generic pattern failed: %s",
+ error.message);
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+ goto done;
+ }
+ }
+ else
+ {
+ ret = avf_fdir_parse_pattern (filter, avf_items, &error);
+ if (ret)
+ {
+ avf_log_err (ad, "avf fdir parse pattern failed: %s",
+ error.message);
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+ goto done;
+ }
+ }
+
+ ret = avf_fdir_parse_action (avf_actions, filter, &error);
+ if (ret)
+ {
+ avf_log_err (ad, "avf fdir parse action failed: %s", error.message);
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+ goto done;
+ }
/* create flow rule, save rule */
ret = avf_fdir_rule_create (&vc_ctx, filter);
@@ -428,7 +621,36 @@ pattern_end:
else
{
fe->rcfg = filter;
+ fe->flow_type_flag = 1;
+ }
}
+ else
+ {
+ ret =
+ avf_rss_parse_pattern_action (avf_items, avf_actions, rss_cfg, &error);
+ if (ret)
+ {
+ avf_log_err (ad, "avf rss parse pattern action failed: %s",
+ error.message);
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+ goto done;
+ }
+ /* create flow rule, save rule */
+ ret = avf_rss_rule_create (&vc_ctx, rss_cfg);
+
+ if (ret)
+ {
+ avf_log_err (ad, "avf rss rule create failed");
+ rv = VNET_FLOW_ERROR_INTERNAL;
+ goto done;
+ }
+ else
+ {
+ fe->rss_cfg = rss_cfg;
+ fe->flow_type_flag = 0;
+ }
+ }
+
done:
return rv;
@@ -495,6 +717,7 @@ avf_flow_ops_fn (vnet_main_t *vm, vnet_flow_dev_op_t op, u32 dev_instance,
case VNET_FLOW_TYPE_IP4_L2TPV3OIP:
case VNET_FLOW_TYPE_IP4_IPSEC_ESP:
case VNET_FLOW_TYPE_IP4_IPSEC_AH:
+ case VNET_FLOW_TYPE_GENERIC:
if ((rv = avf_flow_add (dev_instance, flow, fe)))
goto done;
break;
@@ -509,13 +732,22 @@ avf_flow_ops_fn (vnet_main_t *vm, vnet_flow_dev_op_t op, u32 dev_instance,
{
fe = vec_elt_at_index (ad->flow_entries, *private_data);
- struct avf_fdir_vc_ctx ctx;
+ struct avf_flow_vc_ctx ctx;
ctx.vc_hdl = &dev_instance;
- ctx.vc_op = avf_fdir_vc_op_callback;
+ ctx.vc_op = avf_flow_vc_op_callback;
- rv = avf_fdir_rule_destroy (&ctx, fe->rcfg);
- if (rv)
- return VNET_FLOW_ERROR_INTERNAL;
+ if (fe->flow_type_flag)
+ {
+ rv = avf_fdir_rule_destroy (&ctx, fe->rcfg);
+ if (rv)
+ return VNET_FLOW_ERROR_INTERNAL;
+ }
+ else
+ {
+ rv = avf_rss_rule_destroy (&ctx, fe->rss_cfg);
+ if (rv)
+ return VNET_FLOW_ERROR_INTERNAL;
+ }
if (fe->mark)
{
@@ -525,6 +757,7 @@ avf_flow_ops_fn (vnet_main_t *vm, vnet_flow_dev_op_t op, u32 dev_instance,
}
(void) avf_fdir_rcfg_destroy (fe->rcfg);
+ (void) avf_rss_rcfg_destroy (fe->rss_cfg);
clib_memset (fe, 0, sizeof (*fe));
pool_put (ad->flow_entries, fe);
goto disable_rx_offload;
diff --git a/src/plugins/avf/input.c b/src/plugins/avf/input.c
index 1406d789e0b..06007db540d 100644
--- a/src/plugins/avf/input.c
+++ b/src/plugins/avf/input.c
@@ -125,6 +125,9 @@ avf_rxq_refill (vlib_main_t * vm, vlib_node_runtime_t * node, avf_rxq_t * rxq,
n_alloc -= 8;
}
+ /* RXQ can be smaller than 256 packets, especially if jumbo. */
+ rxq->descs[slot].qword[1] = 0;
+
avf_tail_write (rxq->qrx_tail, slot);
}
@@ -296,7 +299,7 @@ avf_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
next_index = ad->per_interface_next_index;
if (PREDICT_FALSE (vnet_device_input_have_features (ad->sw_if_index)))
- vnet_feature_start_device_input_x1 (ad->sw_if_index, &next_index, bt);
+ vnet_feature_start_device_input (ad->sw_if_index, &next_index, bt);
vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
@@ -423,9 +426,6 @@ no_more_desc:
rxq->next = next;
rxq->n_enqueued -= n_rx_packets + n_tail_desc;
- /* avoid eating our own tail */
- rxq->descs[(next + rxq->n_enqueued) & mask].qword[1] = 0;
-
#if defined(CLIB_HAVE_VEC256) || defined(CLIB_HAVE_VEC128)
or_qw1 |= or_q1x4[0] | or_q1x4[1] | or_q1x4[2] | or_q1x4[3];
#endif
@@ -566,7 +566,6 @@ VLIB_NODE_FN (avf_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return n_rx;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (avf_input_node) = {
.name = "avf-input",
.sibling_of = "device-input",
@@ -578,7 +577,6 @@ VLIB_REGISTER_NODE (avf_input_node) = {
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/avf/output.c b/src/plugins/avf/output.c
index 4cc9d5a49c1..daa86ae86b2 100644
--- a/src/plugins/avf/output.c
+++ b/src/plugins/avf/output.c
@@ -19,6 +19,7 @@
#include <vlib/unix/unix.h>
#include <vlib/pci/pci.h>
#include <vppinfra/ring.h>
+#include <vppinfra/vector/ip_csum.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/ip/ip4_packet.h>
@@ -110,7 +111,7 @@ avf_tx_prepare_cksum (vlib_buffer_t * b, u8 is_tso)
is_tso ? 0 :
clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
(l4_hdr_offset - l3_hdr_offset));
- sum = ~ip_csum (&psh, sizeof (psh));
+ sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh));
}
else
{
@@ -119,11 +120,9 @@ avf_tx_prepare_cksum (vlib_buffer_t * b, u8 is_tso)
psh.dst = ip6->dst_address;
psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
psh.l4len = is_tso ? 0 : ip6->payload_length;
- sum = ~ip_csum (&psh, sizeof (psh));
+ sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh));
}
- /* ip_csum does a byte swap for some reason... */
- sum = clib_net_to_host_u16 (sum);
if (is_tcp)
tcp->checksum = sum;
else
@@ -231,7 +230,11 @@ avf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node, avf_txq_t *txq,
{
const u64 cmd_eop = AVF_TXD_CMD_EOP;
u16 n_free_desc, n_desc_left, n_packets_left = n_packets;
+#if defined CLIB_HAVE_VEC512
+ vlib_buffer_t *b[8];
+#else
vlib_buffer_t *b[4];
+#endif
avf_tx_desc_t *d = txq->tmp_descs;
u32 *tb = txq->tmp_bufs;
@@ -242,11 +245,30 @@ avf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node, avf_txq_t *txq,
while (n_packets_left && n_desc_left)
{
+#if defined CLIB_HAVE_VEC512
+ u32 flags;
+ u64x8 or_flags_vec512;
+ u64x8 flags_mask_vec512;
+#else
u32 flags, or_flags;
+#endif
+#if defined CLIB_HAVE_VEC512
+ if (n_packets_left < 8 || n_desc_left < 8)
+#else
if (n_packets_left < 8 || n_desc_left < 4)
+#endif
goto one_by_one;
+#if defined CLIB_HAVE_VEC512
+ u64x8 base_ptr = u64x8_splat (vm->buffer_main->buffer_mem_start);
+ u32x8 buf_indices = u32x8_load_unaligned (buffers);
+
+ *(u64x8 *) &b = base_ptr + u64x8_from_u32x8 (
+ buf_indices << CLIB_LOG2_CACHE_LINE_BYTES);
+
+ or_flags_vec512 = u64x8_i64gather (u64x8_load_unaligned (b), 0, 1);
+#else
vlib_prefetch_buffer_with_index (vm, buffers[4], LOAD);
vlib_prefetch_buffer_with_index (vm, buffers[5], LOAD);
vlib_prefetch_buffer_with_index (vm, buffers[6], LOAD);
@@ -258,12 +280,37 @@ avf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node, avf_txq_t *txq,
b[3] = vlib_get_buffer (vm, buffers[3]);
or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
+#endif
+#if defined CLIB_HAVE_VEC512
+ flags_mask_vec512 = u64x8_splat (
+ VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD | VNET_BUFFER_F_GSO);
+ if (PREDICT_FALSE (
+ !u64x8_is_all_zero (or_flags_vec512 & flags_mask_vec512)))
+#else
if (PREDICT_FALSE (or_flags &
(VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD |
VNET_BUFFER_F_GSO)))
+#endif
goto one_by_one;
+#if defined CLIB_HAVE_VEC512
+ vlib_buffer_copy_indices (tb, buffers, 8);
+ avf_tx_fill_data_desc (vm, d + 0, b[0], cmd_eop, use_va_dma);
+ avf_tx_fill_data_desc (vm, d + 1, b[1], cmd_eop, use_va_dma);
+ avf_tx_fill_data_desc (vm, d + 2, b[2], cmd_eop, use_va_dma);
+ avf_tx_fill_data_desc (vm, d + 3, b[3], cmd_eop, use_va_dma);
+ avf_tx_fill_data_desc (vm, d + 4, b[4], cmd_eop, use_va_dma);
+ avf_tx_fill_data_desc (vm, d + 5, b[5], cmd_eop, use_va_dma);
+ avf_tx_fill_data_desc (vm, d + 6, b[6], cmd_eop, use_va_dma);
+ avf_tx_fill_data_desc (vm, d + 7, b[7], cmd_eop, use_va_dma);
+
+ buffers += 8;
+ n_packets_left -= 8;
+ n_desc_left -= 8;
+ d += 8;
+ tb += 8;
+#else
vlib_buffer_copy_indices (tb, buffers, 4);
avf_tx_fill_data_desc (vm, d + 0, b[0], cmd_eop, use_va_dma);
@@ -276,6 +323,8 @@ avf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node, avf_txq_t *txq,
n_desc_left -= 4;
d += 4;
tb += 4;
+#endif
+
continue;
one_by_one:
diff --git a/src/plugins/avf/plugin.c b/src/plugins/avf/plugin.c
index b5123175fe7..928dee543a6 100644
--- a/src/plugins/avf/plugin.c
+++ b/src/plugins/avf/plugin.c
@@ -19,12 +19,10 @@
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Intel Adaptive Virtual Function (AVF) Device Driver",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/avf/virtchnl.h b/src/plugins/avf/virtchnl.h
index ae4fe4a5e3c..98d6f4adf8d 100644
--- a/src/plugins/avf/virtchnl.h
+++ b/src/plugins/avf/virtchnl.h
@@ -97,6 +97,8 @@ enum
_ (31, DISABLE_CHANNELS) \
_ (32, ADD_CLOUD_FILTER) \
_ (33, DEL_CLOUD_FILTER) \
+ _ (45, ADD_RSS_CFG) \
+ _ (46, DEL_RSS_CFG) \
_ (47, ADD_FDIR_FILTER) \
_ (48, DEL_FDIR_FILTER) \
_ (49, QUERY_FDIR_FILTER) \
diff --git a/src/plugins/bpf_trace_filter/CMakeLists.txt b/src/plugins/bpf_trace_filter/CMakeLists.txt
new file mode 100644
index 00000000000..4268022c281
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/CMakeLists.txt
@@ -0,0 +1,45 @@
+# Copyright (c) 2023 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vpp_find_path(PCAP_INCLUDE_DIR NAMES pcap.h)
+if (NOT PCAP_INCLUDE_DIR)
+ message(WARNING "libpcap headers not found - bpf_trace_filter plugin disabled")
+ return()
+endif()
+
+vpp_plugin_find_library(bpf_trace_filter PCAP_LIB libpcap.so)
+
+if (NOT PCAP_LIB)
+ message(WARNING "bpf_trace_filter plugin - missing libraries - bpf_trace_filter plugin disabled")
+ return()
+endif()
+
+set(CMAKE_REQUIRED_FLAGS "-fPIC")
+set(CMAKE_REQUIRED_INCLUDES "${PCAP_INCLUDE_DIR}")
+set(CMAKE_REQUIRED_LIBRARIES "${PCAP_LIB}")
+
+include_directories(${PCAP_INCLUDE_DIR})
+
+add_vpp_plugin(bpf_trace_filter
+ SOURCES
+ cli.c
+ plugin.c
+ bpf_trace_filter.c
+ api.c
+
+ API_FILES
+ bpf_trace_filter.api
+
+ LINK_LIBRARIES
+ ${PCAP_LIB}
+)
diff --git a/src/plugins/bpf_trace_filter/FEATURE.yaml b/src/plugins/bpf_trace_filter/FEATURE.yaml
new file mode 100644
index 00000000000..101572f731d
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/FEATURE.yaml
@@ -0,0 +1,8 @@
+---
+name: BPF Trace Filter
+maintainer: Mohammed Hawari <mohammed@hawari.fr>
+features:
+ - BPF Trace Filtering
+description: "BPF Trace Filtering"
+state: experimental
+properties: [CLI, API]
diff --git a/src/plugins/bpf_trace_filter/api.c b/src/plugins/bpf_trace_filter/api.c
new file mode 100644
index 00000000000..30beaddd201
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/api.c
@@ -0,0 +1,97 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <bpf_trace_filter/bpf_trace_filter.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+/* define message IDs */
+#include <bpf_trace_filter/bpf_trace_filter.api_enum.h>
+#include <bpf_trace_filter/bpf_trace_filter.api_types.h>
+
+#define REPLY_MSG_ID_BASE (bm->msg_id_base)
+#include <vlibapi/api_helper_macros.h>
+
+static void
+vl_api_bpf_trace_filter_set_t_handler (vl_api_bpf_trace_filter_set_t *mp)
+{
+ bpf_trace_filter_main_t *bm = &bpf_trace_filter_main;
+ vl_api_bpf_trace_filter_set_reply_t *rmp;
+ clib_error_t *err = 0;
+ int rv = 0;
+ u8 is_del = !mp->is_add;
+ char *bpf_expr;
+
+ bpf_expr = vl_api_from_api_to_new_c_string (&mp->filter);
+ err = bpf_trace_filter_set_unset (bpf_expr, is_del, 0);
+
+ if (err)
+ {
+ rv = -1;
+ clib_error_report (err);
+ }
+ vec_free (bpf_expr);
+
+ REPLY_MACRO (VL_API_BPF_TRACE_FILTER_SET_REPLY);
+}
+
+static void
+vl_api_bpf_trace_filter_set_v2_t_handler (vl_api_bpf_trace_filter_set_v2_t *mp)
+{
+ bpf_trace_filter_main_t *bm = &bpf_trace_filter_main;
+ vl_api_bpf_trace_filter_set_v2_reply_t *rmp;
+ clib_error_t *err = 0;
+ int rv = 0;
+ u8 is_del = !mp->is_add;
+ u8 optimize = !!mp->optimize;
+ char *bpf_expr;
+
+ bpf_expr = vl_api_from_api_to_new_c_string (&mp->filter);
+ err = bpf_trace_filter_set_unset (bpf_expr, is_del, optimize);
+
+ if (err)
+ {
+ rv = -1;
+ clib_error_report (err);
+ }
+ vec_free (bpf_expr);
+
+ REPLY_MACRO (VL_API_BPF_TRACE_FILTER_SET_V2_REPLY);
+}
+
+#include <bpf_trace_filter/bpf_trace_filter.api.c>
+
+static clib_error_t *
+bpf_trace_filter_plugin_api_hookup (vlib_main_t *vm)
+{
+ bpf_trace_filter_main_t *bm = &bpf_trace_filter_main;
+
+ /* ask for a correctly-sized block of API message decode slots */
+ bm->msg_id_base = setup_message_id_table ();
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (bpf_trace_filter_plugin_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */ \ No newline at end of file
diff --git a/src/plugins/bpf_trace_filter/bpf_trace_filter.api b/src/plugins/bpf_trace_filter/bpf_trace_filter.api
new file mode 100644
index 00000000000..c2d47c8b3bf
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/bpf_trace_filter.api
@@ -0,0 +1,35 @@
+/*
+ * bpf_trace_filter.api - BPF Trace filter API
+ *
+ * Copyright (c) 2023 Cisco and/or its affiliates
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ option version = "0.1.0";
+
+ autoreply define bpf_trace_filter_set
+ {
+ u32 client_index;
+ u32 context;
+ bool is_add [default = true];
+ string filter[];
+ };
+
+ autoreply define bpf_trace_filter_set_v2
+ {
+ u32 client_index;
+ u32 context;
+ bool is_add [default = true];
+ bool optimize [default = true];
+ string filter[];
+ }; \ No newline at end of file
diff --git a/src/plugins/bpf_trace_filter/bpf_trace_filter.c b/src/plugins/bpf_trace_filter/bpf_trace_filter.c
new file mode 100644
index 00000000000..9d86c8483a6
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/bpf_trace_filter.c
@@ -0,0 +1,112 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <bpf_trace_filter/bpf_trace_filter.h>
+
+clib_error_t *
+bpf_trace_filter_init (vlib_main_t *vm)
+{
+ bpf_trace_filter_main_t *btm = &bpf_trace_filter_main;
+ btm->pcap = pcap_open_dead (DLT_EN10MB, 65535);
+
+ return 0;
+}
+
+int vnet_is_packet_traced (vlib_buffer_t *b, u32 classify_table_index,
+ int func);
+
+u8 *
+format_bpf_trace_filter (u8 *s, va_list *a)
+{
+ bpf_trace_filter_main_t *btm = va_arg (*a, bpf_trace_filter_main_t *);
+ struct bpf_insn *insn;
+
+ if (!btm->prog_set)
+ return format (s, "bpf trace filter is not set");
+
+ insn = btm->prog.bf_insns;
+ for (int i = 0; i < btm->prog.bf_len; insn++, i++)
+ s = format (s, "%s\n", bpf_image (insn, i));
+
+ return s;
+}
+
+clib_error_t *
+bpf_trace_filter_set_unset (const char *bpf_expr, u8 is_del, u8 optimize)
+{
+ bpf_trace_filter_main_t *btm = &bpf_trace_filter_main;
+ if (is_del)
+ {
+ if (btm->prog_set)
+ {
+ btm->prog_set = 0;
+ pcap_freecode (&btm->prog);
+ }
+ }
+ else if (bpf_expr)
+ {
+ if (btm->prog_set)
+ pcap_freecode (&btm->prog);
+ btm->prog_set = 0;
+ if (pcap_compile (btm->pcap, &btm->prog, (char *) bpf_expr, optimize,
+ PCAP_NETMASK_UNKNOWN))
+ {
+ return clib_error_return (0, "Failed pcap_compile of %s", bpf_expr);
+ }
+ btm->prog_set = 1;
+ }
+ return 0;
+};
+
+int
+bpf_is_packet_traced (vlib_buffer_t *b, u32 classify_table_index, int func)
+{
+ bpf_trace_filter_main_t *bfm = &bpf_trace_filter_main;
+ struct pcap_pkthdr phdr = { 0 };
+ int res;
+ int res1;
+
+ if (classify_table_index != ~0 &&
+ (res1 = vnet_is_packet_traced (b, classify_table_index, 0)) != 1)
+ return res1;
+
+ if (!bfm->prog_set)
+ return 1;
+
+ phdr.caplen = b->current_length;
+ phdr.len = b->current_length;
+ res = pcap_offline_filter (&bfm->prog, &phdr, vlib_buffer_get_current (b));
+ return res != 0;
+}
+
+VLIB_REGISTER_TRACE_FILTER_FUNCTION (bpf_trace_filter_fn, static) = {
+ .name = "bpf_trace_filter",
+ .description = "bpf based trace filter",
+ .priority = 10,
+ .function = bpf_is_packet_traced
+};
+
+VLIB_INIT_FUNCTION (bpf_trace_filter_init);
+bpf_trace_filter_main_t bpf_trace_filter_main;
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/bpf_trace_filter/bpf_trace_filter.h b/src/plugins/bpf_trace_filter/bpf_trace_filter.h
new file mode 100644
index 00000000000..52413ebe0ad
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/bpf_trace_filter.h
@@ -0,0 +1,42 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _BPF_TRACE_FILTER_H_
+#define _BPF_TRACE_FILTER_H_
+#include <vlib/vlib.h>
+#include <pcap.h>
+typedef struct
+{
+ pcap_t *pcap;
+ u16 msg_id_base;
+ u8 prog_set;
+ struct bpf_program prog;
+} bpf_trace_filter_main_t;
+
+extern bpf_trace_filter_main_t bpf_trace_filter_main;
+clib_error_t *bpf_trace_filter_set_unset (const char *bpf_expr, u8 is_del,
+ u8 optimize);
+u8 *format_bpf_trace_filter (u8 *s, va_list *a);
+#endif /* _BPF_TRACE_FILTER_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */ \ No newline at end of file
diff --git a/src/plugins/bpf_trace_filter/bpf_trace_filter.rst b/src/plugins/bpf_trace_filter/bpf_trace_filter.rst
new file mode 100644
index 00000000000..63deddbc5ab
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/bpf_trace_filter.rst
@@ -0,0 +1,4 @@
+BPF Trace Filter Function
+============================
+This plugin provides a trace filter function that relies on a BPF interpreter to select which packets
+must be traced. \ No newline at end of file
diff --git a/src/plugins/bpf_trace_filter/cli.c b/src/plugins/bpf_trace_filter/cli.c
new file mode 100644
index 00000000000..f340b1667e1
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/cli.c
@@ -0,0 +1,99 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <inttypes.h>
+
+#include <vlib/vlib.h>
+#include <bpf_trace_filter/bpf_trace_filter.h>
+
+static clib_error_t *
+set_bpf_trace_filter_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 *bpf_expr = 0;
+ u8 is_del = 0;
+ u8 optimize = 1;
+ clib_error_t *err = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "no-optimize"))
+ optimize = 0;
+ else if (unformat (line_input, "%s", &bpf_expr))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+ unformat_free (line_input);
+
+ if (err != 0)
+ return err;
+
+ err = bpf_trace_filter_set_unset ((char *) bpf_expr, is_del, optimize);
+
+ return err;
+}
+
+VLIB_CLI_COMMAND (set_bpf_trace_filter, static) = {
+ .path = "set bpf trace filter",
+ .short_help = "set bpf trace filter [del] [no-optimize] {<pcap string>}",
+ .function = set_bpf_trace_filter_command_fn,
+};
+
+static clib_error_t *
+show_bpf_trace_filter_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ bpf_trace_filter_main_t *btm = &bpf_trace_filter_main;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ return (clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input));
+ }
+
+ vlib_cli_output (vm, "%U", format_bpf_trace_filter, btm);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_bpf_trace_filter, static) = {
+ .path = "show bpf trace filter",
+ .short_help = "show bpf trace filter",
+ .function = show_bpf_trace_filter_command_fn,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/bpf_trace_filter/plugin.c b/src/plugins/bpf_trace_filter/plugin.c
new file mode 100644
index 00000000000..db5d6111d85
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/plugin.c
@@ -0,0 +1,33 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "BPF Trace Filter Plugin",
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/bufmon/CMakeLists.txt b/src/plugins/bufmon/CMakeLists.txt
new file mode 100644
index 00000000000..b20ccff8075
--- /dev/null
+++ b/src/plugins/bufmon/CMakeLists.txt
@@ -0,0 +1,20 @@
+# Copyright (c) 2020 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(bufmon
+ SOURCES
+ bufmon.c
+
+ COMPONENT
+ vpp-plugin-devtools
+)
diff --git a/src/plugins/bufmon/FEATURE.yaml b/src/plugins/bufmon/FEATURE.yaml
new file mode 100644
index 00000000000..81dc6e8fa5a
--- /dev/null
+++ b/src/plugins/bufmon/FEATURE.yaml
@@ -0,0 +1,8 @@
+---
+name: Buffers monitoring plugin
+maintainer: Benoît Ganne <bganne@cisco.com>
+features:
+ - monitor buffer utilization in VPP graph nodes
+description: "monitor buffer utilization in VPP graph nodes"
+state: production
+properties: [CLI, MULTITHREAD]
diff --git a/src/plugins/bufmon/bufmon.c b/src/plugins/bufmon/bufmon.c
new file mode 100644
index 00000000000..30cf6576e7a
--- /dev/null
+++ b/src/plugins/bufmon/bufmon.c
@@ -0,0 +1,314 @@
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+typedef struct
+{
+ u64 in;
+ u64 out;
+ u64 alloc;
+ u64 free;
+} bufmon_per_node_data_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ bufmon_per_node_data_t *pnd;
+ u32 cur_node;
+} bufmon_per_thread_data_t;
+
+typedef struct
+{
+ bufmon_per_thread_data_t *ptd;
+ int enabled;
+} bufmon_main_t;
+
+static bufmon_main_t bufmon_main;
+
+static u32
+bufmon_alloc_free_callback (vlib_main_t *vm, u32 n_buffers, const int is_free)
+{
+ bufmon_main_t *bm = &bufmon_main;
+ bufmon_per_thread_data_t *ptd;
+ bufmon_per_node_data_t *pnd;
+ u32 cur_node;
+
+ if (PREDICT_FALSE (vm->thread_index >= vec_len (bm->ptd)))
+ {
+ clib_warning ("bufmon: thread index %d unknown for buffer %s (%d)",
+ vm->thread_index, is_free ? "free" : "alloc", n_buffers);
+ return n_buffers;
+ }
+
+ ptd = vec_elt_at_index (bm->ptd, vm->thread_index);
+
+ cur_node = ptd->cur_node;
+ if (cur_node >= vec_len (ptd->pnd))
+ {
+ cur_node = vlib_get_current_process_node_index (vm);
+ vec_validate_aligned (ptd->pnd, cur_node, CLIB_CACHE_LINE_BYTES);
+ }
+
+ pnd = vec_elt_at_index (ptd->pnd, cur_node);
+
+ if (is_free)
+ pnd->free += n_buffers;
+ else
+ pnd->alloc += n_buffers;
+
+ return n_buffers;
+}
+
+static u32
+bufmon_alloc_callback (vlib_main_t *vm, u8 buffer_pool_index, u32 *buffers,
+ u32 n_buffers)
+{
+ return bufmon_alloc_free_callback (vm, n_buffers, 0 /* is_free */);
+}
+
+static u32
+bufmon_free_callback (vlib_main_t *vm, u8 buffer_pool_index, u32 *buffers,
+ u32 n_buffers)
+{
+ return bufmon_alloc_free_callback (vm, n_buffers, 1 /* is_free */);
+}
+
+static u32
+bufmon_count_buffers (vlib_main_t *vm, vlib_frame_t *frame)
+{
+ vlib_buffer_t *b[VLIB_FRAME_SIZE];
+ u32 *from = vlib_frame_vector_args (frame);
+ const u32 n = frame->n_vectors;
+ u32 nc = 0;
+ u32 i;
+
+ vlib_get_buffers (vm, from, b, n);
+
+ for (i = 0; i < n; i++)
+ {
+ const vlib_buffer_t *cb = b[i];
+ while (cb->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ nc++;
+ cb = vlib_get_buffer (vm, cb->next_buffer);
+ }
+ }
+
+ return n + nc;
+}
+
+static uword
+bufmon_dispatch_wrapper (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ bufmon_main_t *bm = &bufmon_main;
+ bufmon_per_thread_data_t *ptd;
+ bufmon_per_node_data_t *pnd;
+ int pending_frames;
+ uword rv;
+
+ ptd = vec_elt_at_index (bm->ptd, vm->thread_index);
+ vec_validate_aligned (ptd->pnd, node->node_index, CLIB_CACHE_LINE_BYTES);
+ pnd = vec_elt_at_index (ptd->pnd, node->node_index);
+
+ if (frame)
+ pnd->in += bufmon_count_buffers (vm, frame);
+
+ pending_frames = vec_len (nm->pending_frames);
+ ptd->cur_node = node->node_index;
+
+ rv = node->function (vm, node, frame);
+
+ ptd->cur_node = ~0;
+ for (; pending_frames < vec_len (nm->pending_frames); pending_frames++)
+ {
+ vlib_pending_frame_t *p =
+ vec_elt_at_index (nm->pending_frames, pending_frames);
+ pnd->out += bufmon_count_buffers (vm, vlib_get_frame (vm, p->frame));
+ }
+
+ return rv;
+}
+
+static void
+bufmon_unregister_callbacks (vlib_main_t *vm)
+{
+ vlib_buffer_set_alloc_free_callback (vm, 0, 0);
+ foreach_vlib_main ()
+ vlib_node_set_dispatch_wrapper (this_vlib_main, 0);
+}
+
+static clib_error_t *
+bufmon_register_callbacks (vlib_main_t *vm)
+{
+ if (vlib_buffer_set_alloc_free_callback (vm, bufmon_alloc_callback,
+ bufmon_free_callback))
+ goto err0;
+
+ foreach_vlib_main ()
+ if (vlib_node_set_dispatch_wrapper (this_vlib_main,
+ bufmon_dispatch_wrapper))
+ goto err1;
+
+ vec_validate_aligned (bufmon_main.ptd, vlib_thread_main.n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+ return 0;
+
+err1:
+ foreach_vlib_main ()
+ vlib_node_set_dispatch_wrapper (this_vlib_main, 0);
+err0:
+ vlib_buffer_set_alloc_free_callback (vm, 0, 0);
+ return clib_error_return (0, "failed to register callback");
+}
+
+static clib_error_t *
+bufmon_enable_disable (vlib_main_t *vm, int enable)
+{
+ bufmon_main_t *bm = &bufmon_main;
+
+ if (enable)
+ {
+ if (bm->enabled)
+ return 0;
+ clib_error_t *error = bufmon_register_callbacks (vm);
+ if (error)
+ return error;
+ bm->enabled = 1;
+ }
+ else
+ {
+ if (!bm->enabled)
+ return 0;
+ bufmon_unregister_callbacks (vm);
+ bm->enabled = 0;
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+set_buffer_traces (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ int on = 1;
+
+ if (unformat_user (input, unformat_line_input, line_input))
+ {
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "on"))
+ on = 1;
+ else if (unformat (line_input, "off"))
+ on = 0;
+ else
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ }
+ unformat_free (line_input);
+ }
+
+ return bufmon_enable_disable (vm, on);
+}
+
+VLIB_CLI_COMMAND (set_buffer_traces_command, static) = {
+ .path = "set buffer traces",
+ .short_help = "set buffer traces [on|off]",
+ .function = set_buffer_traces,
+};
+
+static clib_error_t *
+show_buffer_traces (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ const bufmon_main_t *bm = &bufmon_main;
+ const bufmon_per_thread_data_t *ptd;
+ const bufmon_per_node_data_t *pnd;
+ int verbose = 0;
+ int status = 0;
+
+ if (unformat_user (input, unformat_line_input, line_input))
+ {
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "verbose"))
+ verbose = 1;
+ else if (unformat (line_input, "status"))
+ status = 1;
+ else
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ }
+ unformat_free (line_input);
+ }
+
+ if (status)
+ {
+ vlib_cli_output (vm, "buffers tracing is %s",
+ bm->enabled ? "on" : "off");
+ return 0;
+ }
+
+ vlib_cli_output (vm, "%U\n\n", format_vlib_buffer_pool_all, vm);
+ vlib_cli_output (vm, "%30s%20s%20s%20s%20s%20s", "Node", "Allocated",
+ "Freed", "In", "Out", "Buffered");
+ vec_foreach (ptd, bm->ptd)
+ {
+ vec_foreach (pnd, ptd->pnd)
+ {
+ const u64 in = pnd->alloc + pnd->in;
+ const u64 out = pnd->free + pnd->out;
+ const i64 buffered = in - out;
+ if (0 == in && 0 == out)
+ continue; /* skip nodes w/o activity */
+ if (0 == buffered && !verbose)
+ continue; /* if not verbose, skip nodes w/o buffered buffers */
+ vlib_cli_output (vm, "%30U%20lu%20lu%20lu%20lu%20ld",
+ format_vlib_node_name, vm, pnd - ptd->pnd,
+ pnd->alloc, pnd->free, pnd->in, pnd->out, buffered);
+ }
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_buffer_traces_command, static) = {
+ .path = "show buffer traces",
+ .short_help = "show buffer traces [status|verbose]",
+ .function = show_buffer_traces,
+};
+
+static clib_error_t *
+clear_buffer_traces (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ const bufmon_main_t *bm = &bufmon_main;
+ const bufmon_per_thread_data_t *ptd;
+ const bufmon_per_node_data_t *pnd;
+
+ vec_foreach (ptd, bm->ptd)
+ vec_foreach (pnd, ptd->pnd)
+ vec_reset_length (pnd);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (clear_buffers_trace_command, static) = {
+ .path = "clear buffer traces",
+ .short_help = "clear buffer traces",
+ .function = clear_buffer_traces,
+};
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Buffers monitoring plugin",
+};
diff --git a/src/plugins/bufmon/bufmon_doc.rst b/src/plugins/bufmon/bufmon_doc.rst
new file mode 100644
index 00000000000..34d5bd35474
--- /dev/null
+++ b/src/plugins/bufmon/bufmon_doc.rst
@@ -0,0 +1,33 @@
+.. _bufmon_doc:
+
+Buffers monitoring plugin
+=========================
+
+This plugin enables to track buffer utilization in the VPP graph nodes.
+The main use is to detect buffer leakage. It works by keeping track of
+number of buffer allocations and free in graph nodes and also of number
+of buffers received in input frames and in output frames. The formula to
+compute the number of “buffered” buffers in a node is simply: #buffered
+= #alloc + #input - #free - #output Note: monitoring will impact
+performances.
+
+Basic usage
+-----------
+
+1. Turn buffer traces on:
+
+::
+
+ ~# vppctl set buffer traces on
+
+2. Monitor buffer usage:
+
+::
+
+ ~# vppctl show buffer traces verbose
+
+3. Turn buffer traces off:
+
+::
+
+ ~# vppctl set buffer traces off
diff --git a/src/plugins/builtinurl/builtins.c b/src/plugins/builtinurl/builtins.c
index 04567c0f8c5..b04e9dd5c7c 100644
--- a/src/plugins/builtinurl/builtins.c
+++ b/src/plugins/builtinurl/builtins.c
@@ -18,9 +18,8 @@
#include <http_static/http_static.h>
#include <vpp/app/version.h>
-int
-handle_get_version (http_builtin_method_type_t reqtype,
- u8 * request, http_session_t * hs)
+hss_url_handler_rc_t
+handle_get_version (hss_url_handler_args_t *args)
{
u8 *s = 0;
@@ -29,11 +28,10 @@ handle_get_version (http_builtin_method_type_t reqtype,
s = format (s, " \"version\": \"%s\",", VPP_BUILD_VER);
s = format (s, " \"build_date\": \"%s\"}}\r\n", VPP_BUILD_DATE);
- hs->data = s;
- hs->data_offset = 0;
- hs->cache_pool_index = ~0;
- hs->free_data = 1;
- return 0;
+ args->data = s;
+ args->data_len = vec_len (s);
+ args->free_vec_data = 1;
+ return HSS_URL_HANDLER_OK;
}
void
@@ -56,16 +54,15 @@ trim_path_from_request (u8 * s, char *path)
* like a c-string.
*/
*cp = 0;
- _vec_len (s) = cp - s;
+ vec_set_len (s, cp - s);
break;
}
cp++;
}
}
-int
-handle_get_interface_stats (http_builtin_method_type_t reqtype,
- u8 * request, http_session_t * hs)
+hss_url_handler_rc_t
+handle_get_interface_stats (hss_url_handler_args_t *args)
{
u8 *s = 0, *stats = 0;
uword *p;
@@ -81,16 +78,16 @@ handle_get_interface_stats (http_builtin_method_type_t reqtype,
vnet_interface_main_t *im = &vnm->interface_main;
/* Get stats for a single interface via http POST */
- if (reqtype == HTTP_BUILTIN_METHOD_POST)
+ if (args->reqtype == HTTP_REQ_POST)
{
- trim_path_from_request (request, "interface_stats.json");
+ trim_path_from_request (args->request, "interface_stats.json");
/* Find the sw_if_index */
- p = hash_get (im->hw_interface_by_name, request);
+ p = hash_get (im->hw_interface_by_name, args->request);
if (!p)
{
s = format (s, "{\"interface_stats\": {[\n");
- s = format (s, " \"name\": \"%s\",", request);
+ s = format (s, " \"name\": \"%s\",", args->request);
s = format (s, " \"error\": \"%s\"", "UnknownInterface");
s = format (s, "]}\n");
goto out;
@@ -100,12 +97,10 @@ handle_get_interface_stats (http_builtin_method_type_t reqtype,
}
else /* default, HTTP_BUILTIN_METHOD_GET */
{
- /* *INDENT-OFF* */
pool_foreach (hi, im->hw_interfaces)
{
vec_add1 (sw_if_indices, hi->sw_if_index);
}
- /* *INDENT-ON* */
}
s = format (s, "{%sinterface_stats%s: [\n", q, q);
@@ -133,18 +128,16 @@ handle_get_interface_stats (http_builtin_method_type_t reqtype,
s = format (s, "]}\n");
out:
- hs->data = s;
- hs->data_offset = 0;
- hs->cache_pool_index = ~0;
- hs->free_data = 1;
+ args->data = s;
+ args->data_len = vec_len (s);
+ args->free_vec_data = 1;
vec_free (sw_if_indices);
vec_free (stats);
- return 0;
+ return HSS_URL_HANDLER_OK;
}
-int
-handle_get_interface_list (http_builtin_method_type_t reqtype,
- u8 * request, http_session_t * hs)
+hss_url_handler_rc_t
+handle_get_interface_list (hss_url_handler_args_t *args)
{
u8 *s = 0;
int i;
@@ -155,14 +148,12 @@ handle_get_interface_list (http_builtin_method_type_t reqtype,
int need_comma = 0;
/* Construct vector of active hw_if_indexes ... */
- /* *INDENT-OFF* */
pool_foreach (hi, im->hw_interfaces)
{
/* No point in mentioning "local0"... */
if (hi - im->hw_interfaces)
vec_add1 (hw_if_indices, hi - im->hw_interfaces);
}
- /* *INDENT-ON* */
/* Build answer */
s = format (s, "{\"interface_list\": [\n");
@@ -177,25 +168,23 @@ handle_get_interface_list (http_builtin_method_type_t reqtype,
s = format (s, "]}\n");
vec_free (hw_if_indices);
- hs->data = s;
- hs->data_offset = 0;
- hs->cache_pool_index = ~0;
- hs->free_data = 1;
- return 0;
+ args->data = s;
+ args->data_len = vec_len (s);
+ args->free_vec_data = 1;
+ return HSS_URL_HANDLER_OK;
}
void
builtinurl_handler_init (builtinurl_main_t * bm)
{
- bm->register_handler (handle_get_version, "version.json",
- HTTP_BUILTIN_METHOD_GET);
+ bm->register_handler (handle_get_version, "version.json", HTTP_REQ_GET);
bm->register_handler (handle_get_interface_list, "interface_list.json",
- HTTP_BUILTIN_METHOD_GET);
- bm->register_handler (handle_get_interface_stats,
- "interface_stats.json", HTTP_BUILTIN_METHOD_GET);
- bm->register_handler (handle_get_interface_stats,
- "interface_stats.json", HTTP_BUILTIN_METHOD_POST);
+ HTTP_REQ_GET);
+ bm->register_handler (handle_get_interface_stats, "interface_stats.json",
+ HTTP_REQ_GET);
+ bm->register_handler (handle_get_interface_stats, "interface_stats.json",
+ HTTP_REQ_POST);
}
/*
diff --git a/src/plugins/builtinurl/builtinurl.c b/src/plugins/builtinurl/builtinurl.c
index 8782906a8d2..749a2c93b8a 100644
--- a/src/plugins/builtinurl/builtinurl.c
+++ b/src/plugins/builtinurl/builtinurl.c
@@ -85,14 +85,12 @@ builtinurl_enable_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (builtinurl_enable_command, static) =
{
.path = "builtinurl enable",
.short_help = "Turn on builtin http/https GET and POST urls",
.function = builtinurl_enable_command_fn,
};
-/* *INDENT-ON* */
/* API message handler */
static void vl_api_builtinurl_enable_t_handler
@@ -124,13 +122,11 @@ builtinurl_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (builtinurl_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "vpp built-in URL support",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/cdp/cdp.c b/src/plugins/cdp/cdp.c
index 1fe557fe82e..00784ccd0bc 100644
--- a/src/plugins/cdp/cdp.c
+++ b/src/plugins/cdp/cdp.c
@@ -86,14 +86,12 @@ cdp_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cdp_command, static) =
{
.path = "cdp",
.short_help = "cdp enable | disable",
.function = cdp_command_fn,
};
-/* *INDENT-ON* */
/* API message handler */
static void vl_api_cdp_enable_disable_t_handler
@@ -124,13 +122,11 @@ cdp_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (cdp_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Cisco Discovery Protocol (CDP)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/cdp/cdp.pg b/src/plugins/cdp/cdp.pg
index b6ba18656c2..32700463ed0 100644
--- a/src/plugins/cdp/cdp.pg
+++ b/src/plugins/cdp/cdp.pg
@@ -1,7 +1,7 @@
-packet-generator new {
- name cdp
- limit 1
- node cdp-input
- size 374-374
- data { hex 0x02b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2043333735304520536f66747761726520284333373530452d554e4956455253414c2d4d292c2056657273696f6e2031322e32283335295345352c2052454c4541534520534f4654574152452028666331290a436f707972696768742028632920313938362d3230303720627920436973636f2053797374656d732c20496e632e0a436f6d70696c6564205468752031392d4a756c2d30372031363a3137206279206e616368656e00060018636973636f2057532d4333373530452d3234544400020011000000010101cc0004000000000003001b54656e4769676162697445746865726e6574312f302f3100040008000000280008002400000c011200000000ffffffff010221ff000000000000001e7a50f000ff000000090004000a00060001000b0005010012000500001300050000160011000000010101cc000400000000001a00100000000100000000ffffffff }
+packet-generator new { \
+ name cdp \
+ limit 1 \
+ node cdp-input \
+ size 374-374 \
+ data { hex 0x02b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2043333735304520536f66747761726520284333373530452d554e4956455253414c2d4d292c2056657273696f6e2031322e32283335295345352c2052454c4541534520534f4654574152452028666331290a436f707972696768742028632920313938362d3230303720627920436973636f2053797374656d732c20496e632e0a436f6d70696c6564205468752031392d4a756c2d30372031363a3137206279206e616368656e00060018636973636f2057532d4333373530452d3234544400020011000000010101cc0004000000000003001b54656e4769676162697445746865726e6574312f302f3100040008000000280008002400000c011200000000ffffffff010221ff000000000000001e7a50f000ff000000090004000a00060001000b0005010012000500001300050000160011000000010101cc000400000000001a00100000000100000000ffffffff } \
}
diff --git a/src/plugins/cdp/cdp_input.c b/src/plugins/cdp/cdp_input.c
index 76a3d70a292..914d4dec66f 100644
--- a/src/plugins/cdp/cdp_input.c
+++ b/src/plugins/cdp/cdp_input.c
@@ -167,25 +167,24 @@ _(version,DEBUG_TLV_DUMP) \
_(platform,DEBUG_TLV_DUMP) \
_(port_id,DEBUG_TLV_DUMP)
-#define _(z,dbg) \
-static \
-cdp_error_t process_##z##_tlv (cdp_main_t *cm, cdp_neighbor_t *n, \
- cdp_tlv_t *t) \
-{ \
- int i; \
- if (dbg) \
- fformat(stdout, "%U\n", format_text_tlv, t); \
- \
- if (n->z) \
- _vec_len(n->z) = 0; \
- \
- for (i = 0; i < (t->l - sizeof (*t)); i++) \
- vec_add1(n->z, t->v[i]); \
- \
- vec_add1(n->z, 0); \
- \
- return CDP_ERROR_NONE; \
-}
+#define _(z, dbg) \
+ static cdp_error_t process_##z##_tlv (cdp_main_t *cm, cdp_neighbor_t *n, \
+ cdp_tlv_t *t) \
+ { \
+ int i; \
+ if (dbg) \
+ fformat (stdout, "%U\n", format_text_tlv, t); \
+ \
+ if (n->z) \
+ vec_set_len (n->z, 0); \
+ \
+ for (i = 0; i < (t->l - sizeof (*t)); i++) \
+ vec_add1 (n->z, t->v[i]); \
+ \
+ vec_add1 (n->z, 0); \
+ \
+ return CDP_ERROR_NONE; \
+ }
foreach_text_to_struct_tlv
#undef _
@@ -354,7 +353,7 @@ cdp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0)
*/
if (n->last_rx_pkt)
- _vec_len (n->last_rx_pkt) = 0;
+ vec_set_len (n->last_rx_pkt, 0);
/* cdp disabled on this interface, we're done */
if (n->disabled)
@@ -417,12 +416,10 @@ cdp_input_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (cdp_input_init) =
{
.runs_after = VLIB_INITS("cdp_periodic_init"),
};
-/* *INDENT-ON* */
static u8 *
@@ -438,7 +435,6 @@ format_cdp_neighbors (u8 * s, va_list * va)
"%=25s %=25s %=25s %=10s\n",
"Our Port", "Peer System", "Peer Port", "Last Heard");
- /* *INDENT-OFF* */
pool_foreach (n, cm->neighbors)
{
hw = vnet_get_sup_hw_interface (vnm, n->sw_if_index);
@@ -448,7 +444,6 @@ format_cdp_neighbors (u8 * s, va_list * va)
hw->name, n->device_name, n->port_id,
n->last_heard);
}
- /* *INDENT-ON* */
return s;
}
@@ -466,13 +461,11 @@ show_cdp (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_cdp_command, static) = {
.path = "show cdp",
.short_help = "Show cdp command",
.function = show_cdp,
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/cdp/cdp_node.c b/src/plugins/cdp/cdp_node.c
index f9ee251c022..49b1e3844e4 100644
--- a/src/plugins/cdp/cdp_node.c
+++ b/src/plugins/cdp/cdp_node.c
@@ -100,7 +100,6 @@ cdp_node_fn (vlib_main_t * vm,
/*
* cdp input graph node declaration
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (cdp_input_node, static) = {
.function = cdp_node_fn,
.name = "cdp-input",
@@ -117,7 +116,6 @@ VLIB_REGISTER_NODE (cdp_input_node, static) = {
[CDP_INPUT_NEXT_NORMAL] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* cdp periodic function
diff --git a/src/plugins/cdp/cdp_periodic.c b/src/plugins/cdp/cdp_periodic.c
index c73d86be316..03a2de0d9ab 100644
--- a/src/plugins/cdp/cdp_periodic.c
+++ b/src/plugins/cdp/cdp_periodic.c
@@ -357,12 +357,10 @@ cdp_periodic (vlib_main_t * vm)
int i;
static cdp_neighbor_t **n_list = 0;
- /* *INDENT-OFF* */
pool_foreach (n, cm->neighbors)
{
vec_add1 (n_list, n);
}
- /* *INDENT-ON* */
/* Across all cdp neighbors known to the system */
for (i = 0; i < vec_len (n_list); i++)
@@ -394,9 +392,9 @@ cdp_periodic (vlib_main_t * vm)
delete_neighbor (cm, n, 1);
}
if (delete_list)
- _vec_len (delete_list) = 0;
+ vec_set_len (delete_list, 0);
if (n_list)
- _vec_len (n_list) = 0;
+ vec_set_len (n_list, 0);
}
static clib_error_t *
diff --git a/src/plugins/cnat/CMakeLists.txt b/src/plugins/cnat/CMakeLists.txt
index cfb55661a78..e99bf056a35 100644
--- a/src/plugins/cnat/CMakeLists.txt
+++ b/src/plugins/cnat/CMakeLists.txt
@@ -24,6 +24,7 @@ add_vpp_plugin(cnat
cnat_types.c
cnat_snat_policy.c
cnat_src_policy.c
+ cnat_maglev.c
API_FILES
cnat.api
diff --git a/src/plugins/cnat/FEATURE.yaml b/src/plugins/cnat/FEATURE.yaml
index 9deda2e94cc..880d713b63f 100644
--- a/src/plugins/cnat/FEATURE.yaml
+++ b/src/plugins/cnat/FEATURE.yaml
@@ -9,7 +9,7 @@ description: "This plugin is intended to complement the VPP's plugin_nat for
Cloud use-cases. It allows for source/destination address/port
translation based on multiple criterias. It is intended to be modular
enough so that one could write a use-case optimised translation function
- without having to deal with actually re-writing packets or maintining
+ without having to deal with actually re-writing packets or maintaining
sessions.
This plugin supports multithreading. Workers share a unique bihash where
sessions are stored."
diff --git a/src/plugins/cnat/cnat.api b/src/plugins/cnat/cnat.api
index e253084e74e..e6ad37dd6eb 100644
--- a/src/plugins/cnat/cnat.api
+++ b/src/plugins/cnat/cnat.api
@@ -1,6 +1,6 @@
/* Hey Emacs use -*- mode: C -*- */
/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2023 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -19,14 +19,16 @@
used to control the ABF plugin
*/
-option version = "0.2.0";
+option version = "0.3.0";
import "vnet/ip/ip_types.api";
import "vnet/fib/fib_types.api";
import "vnet/interface_types.api";
+import "vnet/ip/ip.api";
enum cnat_translation_flags:u8
{
CNAT_TRANSLATION_ALLOC_PORT = 1,
+ CNAT_TRANSLATION_NO_RETURN_SESSION = 4,
};
enum cnat_endpoint_tuple_flags:u8
@@ -70,6 +72,7 @@ typedef cnat_translation
u8 flags;
vl_api_cnat_lb_type_t lb_type;
u32 n_paths;
+ vl_api_ip_flow_hash_config_v2_t flow_hash_config;
vl_api_cnat_endpoint_tuple_t paths[n_paths];
};
@@ -172,6 +175,7 @@ enum cnat_snat_policy_table:u8
CNAT_POLICY_INCLUDE_V4 = 0,
CNAT_POLICY_INCLUDE_V6 = 1,
CNAT_POLICY_POD = 2,
+ CNAT_POLICY_HOST = 3,
};
autoreply define cnat_snat_policy_add_del_if
diff --git a/src/plugins/cnat/cnat.rst b/src/plugins/cnat/cnat.rst
index 8781f405a23..b0426f35373 100644
--- a/src/plugins/cnat/cnat.rst
+++ b/src/plugins/cnat/cnat.rst
@@ -9,7 +9,7 @@ Overview
________
This plugin covers specific NAT use-cases that come mostly
-from the container networking world. On the contraty of the
+from the container networking world. On the contrary of the
NAT concepts used for e.g. a home gateway, there is no notion
of 'outside' and 'inside'. We handle Virtual (or Real) IPs and
translations of the packets destined to them
@@ -33,9 +33,9 @@ that will store the packet rewrite to do and the one to undo
until the flow is reset or a timeout is reached
A ``session`` is a fully resolved 9-tuple of ``src_ip, src_port, dest_ip, dest_port, proto``
-to match incoming packets, and their new attributes ``new_src_ip, new_src_port, new_dest_ip, new_dest_port``. It allows for ``backend`` stickyness and a fast-path for established connections.
+to match incoming packets, and their new attributes ``new_src_ip, new_src_port, new_dest_ip, new_dest_port``. It allows for ``backend`` stickiness and a fast-path for established connections.
-These ``sessions`` expire after 30s for regular ``sessions`` and 1h for estabished
+These ``sessions`` expire after 30s for regular ``sessions`` and 1h for established
TCP connections. These can be changed in vpp's configuration file
.. code-block:: console
@@ -64,7 +64,7 @@ assigned to an interface
If ``30.0.0.2`` is the address of an interface, we can use the following
-to do the same translation, and additionnaly change the source.
+to do the same translation, and additionally change the source.
address with ``1.2.3.4``
.. code-block:: console
@@ -75,17 +75,17 @@ To show existing translations and sessions you can use
.. code-block:: console
- cnat show session verbose
- cant show translation
+ show cnat session verbose
+ show cnat translation
SourceNATing outgoing traffic
-----------------------------
-A independant part of the plugin allows changing the source address
+A independent part of the plugin allows changing the source address
of outgoing traffic on a per-interface basis.
-In the following example, all traffic comming from ``tap0`` and NOT
+In the following example, all traffic coming from ``tap0`` and NOT
going to ``20.0.0.0/24`` will be source NAT-ed with ``30.0.0.1``.
On the way back the translation will be undone.
@@ -94,10 +94,18 @@ address assigned to an interface)
.. code-block:: console
- cnat snat with 30.0.0.1
- cnat snat exclude 20.0.0.0/24
+ set cnat snat-policy addr 30.0.0.1
+ set cnat snat-policy if-pfx
+ set cnat snat-policy if table include-v4 tap0
+ set cnat snat-policy prefix 20.0.0.0/24
set interface feature tap0 cnat-snat-ip4 arc ip4-unicast
+To show the enforced snat policies:
+
+.. code-block:: console
+
+ show cnat snat-policy
+
Other parameters
----------------
@@ -105,7 +113,7 @@ In vpp's startup file, you can also configure the bihash sizes for
* the translation bihash ``(proto, port) -> translation``
* the session bihash ``src_ip, src_port, dest_ip, dest_port, proto -> new_src_ip, new_src_port, new_dest_ip, new_dest_port``
-* the snat bihash for searching ``snat exclude`` prefixes
+* the snat bihash for searching ``snat-policy`` excluded prefixes
.. code-block:: console
@@ -126,19 +134,19 @@ This plugin is built to be extensible. For now two NAT types are defined, ``cnat
* Session lookup : ``rv`` will be set to ``0`` if a session was found
* Translation primitives ``cnat_translation_ip4`` based on sessions
* A session creation primitive ``cnat_session_create``
+* A reverse session creation primitive ``cnat_rsession_create``
-Creating a session will also create a reverse session (for matching return traffic),
-and call a NAT node back that will perform the translation.
+Creating a session will also create reverse session matching return traffic unless told otherwise by setting ``CNAT_TR_FLAG_NO_RETURN_SESSION`` on the translation. This will call the NAT nodes on the return flow and perform the inverse translation.
Known limitations
_________________
-This plugin is still under developpment, it lacks the following features :
+This plugin is still under development, it lacks the following features :
* Load balancing doesn't support parametric probabilities
-* VRFs aren't supported. All rules apply to fib table 0 only
+* VRFs are not supported, all rules apply regardless of the FIB table.
* Programmatic session handling (deletion, lifetime updates) aren't supported
-* ICMP is not yet supported
-* Traffic matching is only done based on ``(proto, dst_addr, dst_port)`` source matching isn't supported
+* translations (i.e. rewriting the destination address) only match on the three
+tuple ``(proto, dst_addr, dst_port)`` other matches are not supported
* Statistics & session tracking are still rudimentary.
diff --git a/src/plugins/cnat/cnat_api.c b/src/plugins/cnat/cnat_api.c
index ea4b3aeaaef..c578e303499 100644
--- a/src/plugins/cnat/cnat_api.c
+++ b/src/plugins/cnat/cnat_api.c
@@ -81,7 +81,7 @@ cnat_endpoint_encode (const cnat_endpoint_t * in,
if (in->ce_flags & CNAT_EP_FLAG_RESOLVED)
ip_address_encode2 (&in->ce_ip, &out->addr);
else
- clib_memset ((void *) &in->ce_ip, 0, sizeof (in->ce_ip));
+ clib_memset (&out->addr, 0, sizeof (out->addr));
}
static void
@@ -97,6 +97,7 @@ vl_api_cnat_translation_update_t_handler (vl_api_cnat_translation_update_t
int rv = 0;
u32 pi, n_paths;
cnat_lb_type_t lb_type;
+ flow_hash_config_t flow_hash_config = 0;
rv = ip_proto_decode (mp->translation.ip_proto, &ip_proto);
@@ -123,7 +124,10 @@ vl_api_cnat_translation_update_t_handler (vl_api_cnat_translation_update_t
flags |= CNAT_FLAG_EXCLUSIVE;
lb_type = (cnat_lb_type_t) mp->translation.lb_type;
- id = cnat_translation_update (&vip, ip_proto, paths, flags, lb_type);
+ flow_hash_config = (flow_hash_config_t) clib_net_to_host_u32 (
+ mp->translation.flow_hash_config);
+ id = cnat_translation_update (&vip, ip_proto, paths, flags, lb_type,
+ flow_hash_config);
vec_free (paths);
diff --git a/src/plugins/cnat/cnat_bihash.h b/src/plugins/cnat/cnat_bihash.h
index c488e61a07d..75099f6bfdb 100644
--- a/src/plugins/cnat/cnat_bihash.h
+++ b/src/plugins/cnat/cnat_bihash.h
@@ -44,11 +44,16 @@ typedef struct
u64 value[7];
} clib_bihash_kv_40_56_t;
+static inline void
+clib_bihash_mark_free_40_56 (clib_bihash_kv_40_56_t *v)
+{
+ v->value[0] = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_40_56 (const clib_bihash_kv_40_56_t *v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value[0] == ~0ULL)
+ if (v->value[0] == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
diff --git a/src/plugins/cnat/cnat_client.c b/src/plugins/cnat/cnat_client.c
index b8fcb9add64..a28896a4c12 100644
--- a/src/plugins/cnat/cnat_client.c
+++ b/src/plugins/cnat/cnat_client.c
@@ -20,10 +20,9 @@
#include <cnat/cnat_translation.h>
cnat_client_t *cnat_client_pool;
-
cnat_client_db_t cnat_client_db;
-
dpo_type_t cnat_client_dpo;
+fib_source_t cnat_fib_source;
static_always_inline u8
cnat_client_is_clone (cnat_client_t * cc)
@@ -34,10 +33,42 @@ cnat_client_is_clone (cnat_client_t * cc)
static void
cnat_client_db_remove (cnat_client_t * cc)
{
+ clib_bihash_kv_16_8_t bkey;
+ if (ip_addr_version (&cc->cc_ip) == AF_IP4)
+ {
+ bkey.key[0] = ip_addr_v4 (&cc->cc_ip).as_u32;
+ bkey.key[1] = 0;
+ }
+ else
+ {
+ bkey.key[0] = ip_addr_v6 (&cc->cc_ip).as_u64[0];
+ bkey.key[1] = ip_addr_v6 (&cc->cc_ip).as_u64[1];
+ }
+
+ clib_bihash_add_del_16_8 (&cnat_client_db.cc_ip_id_hash, &bkey, 0 /* del */);
+}
+
+static void
+cnat_client_db_add (cnat_client_t *cc)
+{
+ index_t cci;
+
+ cci = cc - cnat_client_pool;
+
+ clib_bihash_kv_16_8_t bkey;
+ bkey.value = cci;
if (ip_addr_version (&cc->cc_ip) == AF_IP4)
- hash_unset (cnat_client_db.crd_cip4, ip_addr_v4 (&cc->cc_ip).as_u32);
+ {
+ bkey.key[0] = ip_addr_v4 (&cc->cc_ip).as_u32;
+ bkey.key[1] = 0;
+ }
else
- hash_unset_mem_free (&cnat_client_db.crd_cip6, &ip_addr_v6 (&cc->cc_ip));
+ {
+ bkey.key[0] = ip_addr_v6 (&cc->cc_ip).as_u64[0];
+ bkey.key[1] = ip_addr_v6 (&cc->cc_ip).as_u64[1];
+ }
+
+ clib_bihash_add_del_16_8 (&cnat_client_db.cc_ip_id_hash, &bkey, 1 /* add */);
}
static void
@@ -118,21 +149,6 @@ cnat_client_translation_deleted (index_t cci)
cnat_client_destroy (cc);
}
-static void
-cnat_client_db_add (cnat_client_t * cc)
-{
- index_t cci;
-
- cci = cc - cnat_client_pool;
-
- if (ip_addr_version (&cc->cc_ip) == AF_IP4)
- hash_set (cnat_client_db.crd_cip4, ip_addr_v4 (&cc->cc_ip).as_u32, cci);
- else
- hash_set_mem_alloc (&cnat_client_db.crd_cip6,
- &ip_addr_v6 (&cc->cc_ip), cci);
-}
-
-
index_t
cnat_client_add (const ip_address_t * ip, u8 flags)
{
@@ -228,12 +244,6 @@ int
cnat_client_purge (void)
{
int rv = 0, rrv = 0;
- if ((rv = hash_elts (cnat_client_db.crd_cip6)))
- clib_warning ("len(crd_cip6) isnt 0 but %d", rv);
- rrv |= rv;
- if ((rv = hash_elts (cnat_client_db.crd_cip4)))
- clib_warning ("len(crd_cip4) isnt 0 but %d", rv);
- rrv |= rv;
if ((rv = pool_elts (cnat_client_pool)))
clib_warning ("len(cnat_client_pool) isnt 0 but %d", rv);
rrv |= rv;
@@ -251,9 +261,9 @@ format_cnat_client (u8 * s, va_list * args)
cnat_client_t *cc = pool_elt_at_index (cnat_client_pool, cci);
- s = format (s, "[%d] cnat-client:[%U] tr:%d sess:%d", cci,
- format_ip_address, &cc->cc_ip,
- cc->tr_refcnt, cc->session_refcnt);
+ s = format (s, "[%d] cnat-client:[%U] tr:%d sess:%d locks:%u", cci,
+ format_ip_address, &cc->cc_ip, cc->tr_refcnt, cc->session_refcnt,
+ cc->cc_locks);
if (cc->flags & CNAT_FLAG_EXCLUSIVE)
s = format (s, " exclusive");
@@ -291,7 +301,6 @@ cnat_client_show (vlib_main_t * vm,
vlib_cli_output(vm, "%U", format_cnat_client, cci, 0);
vlib_cli_output (vm, "%d clients", pool_elts (cnat_client_pool));
- vlib_cli_output (vm, "%d timestamps", pool_elts (cnat_timestamps));
}
else
{
@@ -371,12 +380,15 @@ const static dpo_vft_t cnat_client_dpo_vft = {
static clib_error_t *
cnat_client_init (vlib_main_t * vm)
{
+ cnat_main_t *cm = &cnat_main;
cnat_client_dpo = dpo_register_new_type (&cnat_client_dpo_vft,
cnat_client_dpo_nodes);
- cnat_client_db.crd_cip6 = hash_create_mem (0,
- sizeof (ip6_address_t),
- sizeof (uword));
+ clib_bihash_init_16_8 (&cnat_client_db.cc_ip_id_hash, "CNat client DB",
+ cm->client_hash_buckets, cm->client_hash_memory);
+
+ cnat_fib_source = fib_source_allocate ("cnat", CNAT_FIB_SOURCE_PRIORITY,
+ FIB_SOURCE_BH_SIMPLE);
clib_spinlock_init (&cnat_client_db.throttle_lock);
cnat_client_db.throttle_mem =
diff --git a/src/plugins/cnat/cnat_client.h b/src/plugins/cnat/cnat_client.h
index d6e3631d868..4dc6b754b2f 100644
--- a/src/plugins/cnat/cnat_client.h
+++ b/src/plugins/cnat/cnat_client.h
@@ -17,6 +17,7 @@
#define __CNAT_CLIENT_H__
#include <cnat/cnat_types.h>
+#include <vppinfra/bihash_16_8.h>
/**
* A client is a representation of an IP address behind the NAT.
@@ -85,8 +86,6 @@ extern void cnat_client_free_by_ip (ip46_address_t * addr, u8 af);
extern cnat_client_t *cnat_client_pool;
extern dpo_type_t cnat_client_dpo;
-#define CC_INDEX_INVALID ((u32)(~0))
-
static_always_inline cnat_client_t *
cnat_client_get (index_t i)
{
@@ -132,8 +131,7 @@ extern void cnat_client_throttle_pool_process ();
*/
typedef struct cnat_client_db_t_
{
- uword *crd_cip4;
- uword *crd_cip6;
+ clib_bihash_16_8_t cc_ip_id_hash;
/* Pool of addresses that have been throttled
and need to be refcounted before calling
cnat_client_free_by_ip */
@@ -149,27 +147,15 @@ extern cnat_client_db_t cnat_client_db;
static_always_inline cnat_client_t *
cnat_client_ip4_find (const ip4_address_t * ip)
{
- uword *p;
-
- p = hash_get (cnat_client_db.crd_cip4, ip->as_u32);
-
- if (p)
- return (pool_elt_at_index (cnat_client_pool, p[0]));
-
- return (NULL);
-}
-
-static_always_inline u32
-cnat_client_ip4_find_index (const ip4_address_t * ip)
-{
- uword *p;
+ clib_bihash_kv_16_8_t bkey, bval;
- p = hash_get (cnat_client_db.crd_cip4, ip->as_u32);
+ bkey.key[0] = ip->as_u32;
+ bkey.key[1] = 0;
- if (p)
- return p[0];
+ if (clib_bihash_search_16_8 (&cnat_client_db.cc_ip_id_hash, &bkey, &bval))
+ return (NULL);
- return -1;
+ return (pool_elt_at_index (cnat_client_pool, bval.value));
}
/**
@@ -178,14 +164,15 @@ cnat_client_ip4_find_index (const ip4_address_t * ip)
static_always_inline cnat_client_t *
cnat_client_ip6_find (const ip6_address_t * ip)
{
- uword *p;
+ clib_bihash_kv_16_8_t bkey, bval;
- p = hash_get_mem (cnat_client_db.crd_cip6, ip);
+ bkey.key[0] = ip->as_u64[0];
+ bkey.key[1] = ip->as_u64[1];
- if (p)
- return (pool_elt_at_index (cnat_client_pool, p[0]));
+ if (clib_bihash_search_16_8 (&cnat_client_db.cc_ip_id_hash, &bkey, &bval))
+ return (NULL);
- return (NULL);
+ return (pool_elt_at_index (cnat_client_pool, bval.value));
}
/**
diff --git a/src/plugins/cnat/cnat_inline.h b/src/plugins/cnat/cnat_inline.h
index 5a55ecbf3c0..2986b3497a9 100644
--- a/src/plugins/cnat/cnat_inline.h
+++ b/src/plugins/cnat/cnat_inline.h
@@ -19,72 +19,122 @@
#include <cnat/cnat_types.h>
+always_inline int
+cnat_ts_is_free_index (u32 index)
+{
+ u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
+ index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
+ return pool_is_free_index (cnat_timestamps.ts_pools[pidx], index);
+}
+
+always_inline cnat_timestamp_t *
+cnat_timestamp_get (u32 index)
+{
+ /* 6 top bits for choosing pool */
+ u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
+ index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
+ return pool_elt_at_index (cnat_timestamps.ts_pools[pidx], index);
+}
+
+always_inline cnat_timestamp_t *
+cnat_timestamp_get_if_valid (u32 index)
+{
+ /* 6 top bits for choosing pool */
+ u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
+ index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
+ if (pidx >= cnat_timestamps.next_empty_pool_idx)
+ return (NULL);
+ if (pool_is_free_index (cnat_timestamps.ts_pools[pidx], index))
+ return (NULL);
+ return pool_elt_at_index (cnat_timestamps.ts_pools[pidx], index);
+}
+
+always_inline index_t
+cnat_timestamp_alloc ()
+{
+ cnat_timestamp_t *ts;
+ u32 index, pool_sz;
+ uword pidx;
+
+ clib_spinlock_lock (&cnat_timestamps.ts_lock);
+ pidx = clib_bitmap_first_set (cnat_timestamps.ts_free);
+ pool_sz = 1 << (CNAT_TS_BASE_SIZE + pidx);
+ ASSERT (pidx <= cnat_timestamps.next_empty_pool_idx);
+ if (pidx == cnat_timestamps.next_empty_pool_idx)
+ pool_init_fixed (
+ cnat_timestamps.ts_pools[cnat_timestamps.next_empty_pool_idx++],
+ pool_sz);
+ pool_get (cnat_timestamps.ts_pools[pidx], ts);
+ if (pool_elts (cnat_timestamps.ts_pools[pidx]) == pool_sz)
+ clib_bitmap_set (cnat_timestamps.ts_free, pidx, 0);
+ clib_spinlock_unlock (&cnat_timestamps.ts_lock);
+
+ index = (u32) pidx << (32 - CNAT_TS_MPOOL_BITS);
+ return index | (ts - cnat_timestamps.ts_pools[pidx]);
+}
+
+always_inline void
+cnat_timestamp_destroy (u32 index)
+{
+ u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
+ index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
+ clib_spinlock_lock (&cnat_timestamps.ts_lock);
+ pool_put_index (cnat_timestamps.ts_pools[pidx], index);
+ clib_bitmap_set (cnat_timestamps.ts_free, pidx, 1);
+ clib_spinlock_unlock (&cnat_timestamps.ts_lock);
+}
+
always_inline u32
cnat_timestamp_new (f64 t)
{
- u32 index;
- cnat_timestamp_t *ts;
- clib_rwlock_writer_lock (&cnat_main.ts_lock);
- pool_get (cnat_timestamps, ts);
+ index_t index = cnat_timestamp_alloc ();
+ cnat_timestamp_t *ts = cnat_timestamp_get (index);
ts->last_seen = t;
ts->lifetime = cnat_main.session_max_age;
ts->refcnt = CNAT_TIMESTAMP_INIT_REFCNT;
- index = ts - cnat_timestamps;
- clib_rwlock_writer_unlock (&cnat_main.ts_lock);
return index;
}
always_inline void
cnat_timestamp_inc_refcnt (u32 index)
{
- clib_rwlock_reader_lock (&cnat_main.ts_lock);
- cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
- ts->refcnt++;
- clib_rwlock_reader_unlock (&cnat_main.ts_lock);
+ cnat_timestamp_t *ts = cnat_timestamp_get (index);
+ clib_atomic_add_fetch (&ts->refcnt, 1);
}
always_inline void
cnat_timestamp_update (u32 index, f64 t)
{
- clib_rwlock_reader_lock (&cnat_main.ts_lock);
- cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
+ cnat_timestamp_t *ts = cnat_timestamp_get (index);
ts->last_seen = t;
- clib_rwlock_reader_unlock (&cnat_main.ts_lock);
}
always_inline void
cnat_timestamp_set_lifetime (u32 index, u16 lifetime)
{
- clib_rwlock_reader_lock (&cnat_main.ts_lock);
- cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
+ cnat_timestamp_t *ts = cnat_timestamp_get (index);
ts->lifetime = lifetime;
- clib_rwlock_reader_unlock (&cnat_main.ts_lock);
}
always_inline f64
cnat_timestamp_exp (u32 index)
{
f64 t;
- if (INDEX_INVALID == index)
+ cnat_timestamp_t *ts = cnat_timestamp_get_if_valid (index);
+ if (NULL == ts)
return -1;
- clib_rwlock_reader_lock (&cnat_main.ts_lock);
- cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
t = ts->last_seen + (f64) ts->lifetime;
- clib_rwlock_reader_unlock (&cnat_main.ts_lock);
return t;
}
always_inline void
cnat_timestamp_free (u32 index)
{
- if (INDEX_INVALID == index)
+ cnat_timestamp_t *ts = cnat_timestamp_get_if_valid (index);
+ if (NULL == ts)
return;
- clib_rwlock_writer_lock (&cnat_main.ts_lock);
- cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
- ts->refcnt--;
- if (0 == ts->refcnt)
- pool_put (cnat_timestamps, ts);
- clib_rwlock_writer_unlock (&cnat_main.ts_lock);
+ if (0 == clib_atomic_sub_fetch (&ts->refcnt, 1))
+ cnat_timestamp_destroy (index);
}
/*
diff --git a/src/plugins/cnat/cnat_maglev.c b/src/plugins/cnat/cnat_maglev.c
new file mode 100644
index 00000000000..2cdb868b3d7
--- /dev/null
+++ b/src/plugins/cnat/cnat_maglev.c
@@ -0,0 +1,379 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <cnat/cnat_maglev.h>
+
+static int
+cnat_maglev_perm_compare (void *_a, void *_b)
+{
+ return *(u64 *) _b - *(u64 *) _a;
+}
+
+/**
+ * Maglev algorithm implementation. This takes permutation as input,
+ * with the values of offset & skip for the backends.
+ * It fills buckets matching the permuntations, provided buckets is
+ * already of length at least M
+ */
+static void
+cnat_maglev_shuffle (cnat_maglev_perm_t *permutation, u32 *buckets)
+{
+ u32 N, M, i, done = 0;
+ u32 *next = 0;
+
+ N = vec_len (permutation);
+ if (N == 0)
+ return;
+
+ M = vec_len (buckets);
+ if (M == 0)
+ return;
+ vec_set (buckets, -1);
+
+ vec_validate (next, N - 1);
+ vec_zero (next);
+
+ while (1)
+ {
+ for (i = 0; i < N; i++)
+ {
+ u32 c = (permutation[i].offset + next[i] * permutation[i].skip) % M;
+ while (buckets[c] != (u32) -1)
+ {
+ next[i]++;
+ c = (permutation[i].offset + next[i] * permutation[i].skip) % M;
+ }
+
+ buckets[c] = permutation[i].index;
+ next[i]++;
+ done++;
+
+ if (done == M)
+ {
+ vec_free (next);
+ return;
+ }
+ }
+ }
+}
+
+void
+cnat_translation_init_maglev (cnat_translation_t *ct)
+{
+ cnat_maglev_perm_t *permutations = NULL;
+ cnat_main_t *cm = &cnat_main;
+ cnat_ep_trk_t *trk;
+ u32 backend_index = 0;
+
+ if (vec_len (ct->ct_active_paths) == 0)
+ return;
+
+ vec_foreach (trk, ct->ct_active_paths)
+ {
+ cnat_maglev_perm_t permutation;
+ u32 h1, h2;
+
+ if (AF_IP4 == ip_addr_version (&trk->ct_ep[VLIB_TX].ce_ip))
+ {
+ u32 a, b, c;
+ a = ip_addr_v4 (&trk->ct_ep[VLIB_TX].ce_ip).data_u32;
+ b = (u64) trk->ct_ep[VLIB_TX].ce_port;
+ c = 0;
+ hash_v3_mix32 (a, b, c);
+ hash_v3_finalize32 (a, b, c);
+ h1 = c;
+ h2 = b;
+ }
+ else
+ {
+ u64 a, b, c;
+ a = ip_addr_v6 (&trk->ct_ep[VLIB_TX].ce_ip).as_u64[0];
+ b = ip_addr_v6 (&trk->ct_ep[VLIB_TX].ce_ip).as_u64[1];
+ c = (u64) trk->ct_ep[VLIB_TX].ce_port;
+ hash_mix64 (a, b, c);
+ h1 = c;
+ h2 = b;
+ }
+
+ permutation.offset = h1 % cm->maglev_len;
+ permutation.skip = h2 % (cm->maglev_len - 1) + 1;
+ permutation.index = backend_index++;
+
+ if (trk->ct_flags & CNAT_TRK_FLAG_TEST_DISABLED)
+ continue;
+
+ vec_add1 (permutations, permutation);
+ }
+
+ vec_sort_with_function (permutations, cnat_maglev_perm_compare);
+
+ vec_validate (ct->lb_maglev, cm->maglev_len - 1);
+
+ cnat_maglev_shuffle (permutations, ct->lb_maglev);
+
+ vec_free (permutations);
+}
+
+static int
+cnat_u32_vec_contains (u32 *v, u32 e)
+{
+ int i;
+
+ vec_foreach_index (i, v)
+ if (v[i] == e)
+ return 1;
+
+ return 0;
+}
+
+static void
+cnat_maglev_print_changes (vlib_main_t *vm, u32 *changed_bk_indices,
+ u32 *old_maglev_lb, u32 *new_maglev_lb)
+{
+ u32 good_flow_buckets = 0, reset_flow_buckets = 0, stable_to_reset = 0;
+ u32 reset_to_stable = 0, switched_stable = 0;
+ if (vec_len (new_maglev_lb) == 0)
+ return;
+ for (u32 i = 0; i < vec_len (new_maglev_lb); i++)
+ {
+ u8 is_new_changed =
+ cnat_u32_vec_contains (changed_bk_indices, new_maglev_lb[i]);
+ u8 is_old_changed =
+ cnat_u32_vec_contains (changed_bk_indices, old_maglev_lb[i]);
+ if (new_maglev_lb[i] == old_maglev_lb[i])
+ {
+ if (is_new_changed)
+ reset_flow_buckets++;
+ else
+ good_flow_buckets++;
+ }
+ else
+ {
+ if (is_new_changed)
+ stable_to_reset++;
+ else if (is_old_changed)
+ reset_to_stable++;
+ else
+ switched_stable++;
+ }
+ }
+ vlib_cli_output (vm,
+ "good B->B:%d | lost A->A':%d A->B:%d ~%0.2f%% | bad "
+ "B->A':%d B->C:%d ~%0.2f%%",
+ good_flow_buckets, reset_flow_buckets, reset_to_stable,
+ (f64) (reset_flow_buckets + reset_to_stable) /
+ vec_len (new_maglev_lb) * 100.0,
+ stable_to_reset, switched_stable,
+ (f64) (stable_to_reset + switched_stable) /
+ vec_len (new_maglev_lb) * 100.0);
+}
+
+static u8 *
+format_cnat_maglev_buckets (u8 *s, va_list *args)
+{
+ u32 *buckets = va_arg (*args, u32 *);
+ u32 backend_idx = va_arg (*args, u32);
+ u32 count = va_arg (*args, u32);
+
+ for (u32 ii = 0; ii < vec_len (buckets); ii++)
+ if (buckets[ii] == backend_idx)
+ {
+ s = format (s, "%d,", ii);
+ if (--count == 0)
+ return (s);
+ }
+ return (s);
+}
+
+static clib_error_t *
+cnat_translation_test_init_maglev (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ cnat_translation_t *trs = 0, *ct;
+ u64 num_backends = 0, n_tests = 0;
+ cnat_main_t *cm = &cnat_main;
+ cnat_ep_trk_t *trk;
+ u32 rnd;
+ u32 n_changes = 0, n_remove = 0, verbose = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "tests %d", &n_tests))
+ ;
+ else if (unformat (input, "backends %d", &num_backends))
+ ;
+ else if (unformat (input, "len %d", &cm->maglev_len))
+ ;
+ else if (unformat (input, "change %d", &n_changes))
+ ;
+ else if (unformat (input, "rm %d", &n_remove))
+ ;
+ else if (unformat (input, "verbose %d", &verbose))
+ ;
+ else
+ return (clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input));
+ }
+
+ if (num_backends == 0 || n_tests == 0)
+ return (clib_error_return (0, "No backends / tests to run"));
+ ;
+
+ vlib_cli_output (vm, "generating random backends...");
+ rnd = random_default_seed ();
+
+ vec_validate (trs, n_tests - 1);
+ vec_foreach (ct, trs)
+ {
+ vec_validate (ct->ct_active_paths, num_backends - 1);
+ vec_foreach (trk, ct->ct_active_paths)
+ {
+ trk->ct_flags = 0;
+ ip_addr_version (&trk->ct_ep[VLIB_TX].ce_ip) = AF_IP4;
+ ip_addr_v4 (&trk->ct_ep[VLIB_TX].ce_ip).data_u32 = random_u32 (&rnd);
+ trk->ct_ep[VLIB_TX].ce_port = random_u32 (&rnd);
+ }
+ }
+
+ vlib_cli_output (vm, "testing...");
+ f64 start_time = vlib_time_now (vm);
+ vec_foreach (ct, trs)
+ cnat_translation_init_maglev (ct);
+ f64 d = vlib_time_now (vm) - start_time;
+
+ vlib_cli_output (vm, "Test took : %U", format_duration, d);
+ vlib_cli_output (vm, "Per pool : %U", format_duration, d / n_tests);
+
+ /* sanity checking of the output */
+ u32 *backend_freqs = 0;
+ vec_validate (backend_freqs, num_backends - 1);
+ vec_foreach (ct, trs)
+ {
+ if (vec_len (ct->lb_maglev) != cm->maglev_len)
+ vlib_cli_output (vm, "Unexpected bucket length %d",
+ vec_len (ct->lb_maglev));
+
+ vec_zero (backend_freqs);
+ for (u32 i = 0; i < vec_len (ct->lb_maglev); i++)
+ {
+ if (ct->lb_maglev[i] >= num_backends)
+ clib_warning ("out of bound backend");
+ backend_freqs[ct->lb_maglev[i]]++;
+ }
+ u32 fmin = ~0, fmax = 0;
+ for (u32 i = 0; i < num_backends; i++)
+ {
+ if (backend_freqs[i] > fmax)
+ fmax = backend_freqs[i];
+ if (backend_freqs[i] < fmin)
+ fmin = backend_freqs[i];
+ }
+ f64 fdiff = (fmax - fmin);
+ if (fdiff / vec_len (ct->lb_maglev) - 1 > 0.02)
+ vlib_cli_output (vm, "More than 2%% frequency diff (min %d max %d)",
+ fmin, fmax);
+ }
+ vec_free (backend_freqs);
+
+ int i = 0;
+ if (verbose)
+ vec_foreach (ct, trs)
+ {
+ vlib_cli_output (vm, "Translation %d", i++);
+ for (u32 i = 0; i < verbose; i++)
+ {
+ u32 j = random_u32 (&rnd) % vec_len (ct->ct_active_paths);
+ trk = &ct->ct_active_paths[j];
+ vlib_cli_output (
+ vm, "[%03d] %U:%d buckets:%U", j, format_ip_address,
+ &trk->ct_ep[VLIB_TX].ce_ip, trk->ct_ep[VLIB_TX].ce_port,
+ format_cnat_maglev_buckets, ct->lb_maglev, j, verbose);
+ }
+ }
+
+ if (n_remove != 0)
+ {
+ vlib_cli_output (
+ vm, "Removing %d entries (refered to as A), others (B,C) stay same",
+ n_remove);
+ vec_foreach (ct, trs)
+ {
+ u32 *old_maglev_lb = 0;
+ u32 *changed_bk_indices = 0;
+ if (vec_len (ct->lb_maglev) != cm->maglev_len)
+ vlib_cli_output (vm, "Unexpected bucket length %d",
+ vec_len (ct->lb_maglev));
+
+ vec_validate (changed_bk_indices, n_remove - 1);
+ for (u32 i = 0; i < n_remove; i++)
+ {
+ /* remove n_remove backends from the LB set */
+ changed_bk_indices[i] =
+ random_u32 (&rnd) % vec_len (ct->ct_active_paths);
+ trk = &ct->ct_active_paths[changed_bk_indices[i]];
+ trk->ct_flags |= CNAT_TRK_FLAG_TEST_DISABLED;
+ }
+
+ old_maglev_lb = vec_dup (ct->lb_maglev);
+ cnat_translation_init_maglev (ct);
+
+ cnat_maglev_print_changes (vm, changed_bk_indices, old_maglev_lb,
+ ct->lb_maglev);
+
+ vec_free (changed_bk_indices);
+ vec_free (old_maglev_lb);
+ }
+ }
+
+ /* Reshuffle and check changes */
+ if (n_changes != 0)
+ {
+ vlib_cli_output (
+ vm,
+ "Changing %d entries (refered to as A->A'), others (B,C) stay same",
+ n_changes);
+ vec_foreach (ct, trs)
+ {
+ if (vec_len (ct->lb_maglev) != cm->maglev_len)
+ vlib_cli_output (vm, "Unexpected bucket length %d",
+ vec_len (ct->lb_maglev));
+
+ u32 *old_maglev_lb = 0;
+ u32 *changed_bk_indices = 0;
+
+ vec_validate (changed_bk_indices, n_changes - 1);
+ for (u32 i = 0; i < n_changes; i++)
+ {
+ /* Change n_changes backends in the LB set */
+ changed_bk_indices[i] =
+ random_u32 (&rnd) % vec_len (ct->ct_active_paths);
+ trk = &ct->ct_active_paths[changed_bk_indices[i]];
+ ip_addr_v4 (&trk->ct_ep[VLIB_TX].ce_ip).data_u32 =
+ random_u32 (&rnd);
+ trk->ct_ep[VLIB_TX].ce_port = random_u32 (&rnd) & 0xffff;
+ }
+ old_maglev_lb = vec_dup (ct->lb_maglev);
+
+ cnat_translation_init_maglev (ct);
+ cnat_maglev_print_changes (vm, changed_bk_indices, old_maglev_lb,
+ ct->lb_maglev);
+
+ vec_free (changed_bk_indices);
+ vec_free (old_maglev_lb);
+ }
+ }
+
+ vec_foreach (ct, trs)
+ vec_free (ct->ct_active_paths);
+ vec_free (trs);
+
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (cnat_translation_test_init_maglev_cmd, static) = {
+ .path = "test cnat maglev",
+ .short_help = "test cnat maglev tests [n_tests] backends [num_backends] len "
+ "[maglev_len]",
+ .function = cnat_translation_test_init_maglev,
+};
diff --git a/src/plugins/cnat/cnat_maglev.h b/src/plugins/cnat/cnat_maglev.h
new file mode 100644
index 00000000000..a71dd3ce796
--- /dev/null
+++ b/src/plugins/cnat/cnat_maglev.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#ifndef __CNAT_MAGLEV_H__
+#define __CNAT_MAGLEV_H__
+
+#include <cnat/cnat_types.h>
+#include <cnat/cnat_translation.h>
+
+typedef struct
+{
+ /* offset & skip used for sorting, should be first */
+ u32 offset;
+ u32 skip;
+ u32 index;
+} cnat_maglev_perm_t;
+
+extern void cnat_translation_init_maglev (cnat_translation_t *ct);
+
+#endif \ No newline at end of file
diff --git a/src/plugins/cnat/cnat_node.h b/src/plugins/cnat/cnat_node.h
index 246fdb8ba57..d81f6745bc4 100644
--- a/src/plugins/cnat/cnat_node.h
+++ b/src/plugins/cnat/cnat_node.h
@@ -19,6 +19,7 @@
#include <vlibmemory/api.h>
#include <vnet/dpo/load_balance.h>
#include <vnet/dpo/load_balance_map.h>
+#include <vnet/ip/ip_psh_cksum.h>
#include <cnat/cnat_session.h>
#include <cnat/cnat_client.h>
@@ -169,86 +170,92 @@ cmp_ip6_address (const ip6_address_t * a1, const ip6_address_t * a2)
* Inline translation functions
*/
-static_always_inline u8
-has_ip6_address (ip6_address_t * a)
+static_always_inline u16
+ip4_pseudo_header_cksum2 (ip4_header_t *ip4, ip4_address_t address[VLIB_N_DIR])
{
- return ((0 != a->as_u64[0]) || (0 != a->as_u64[1]));
+ ip4_psh_t psh = { 0 };
+ psh.src = address[VLIB_RX];
+ psh.dst = address[VLIB_TX];
+ psh.proto = ip4->protocol;
+ psh.l4len = clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
+ sizeof (ip4_header_t));
+ return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip4_psh_t)));
}
static_always_inline void
-cnat_ip4_translate_l4 (ip4_header_t * ip4, udp_header_t * udp,
- ip_csum_t * sum,
+cnat_ip4_translate_l4 (ip4_header_t *ip4, udp_header_t *udp, ip_csum_t *sum,
ip4_address_t new_addr[VLIB_N_DIR],
- u16 new_port[VLIB_N_DIR])
+ u16 new_port[VLIB_N_DIR], u32 oflags)
{
u16 old_port[VLIB_N_DIR];
- ip4_address_t old_addr[VLIB_N_DIR];
+ old_port[VLIB_TX] = udp->dst_port;
+ old_port[VLIB_RX] = udp->src_port;
- /* Fastpath no checksum */
- if (PREDICT_TRUE (0 == *sum))
+ udp->dst_port = new_port[VLIB_TX];
+ udp->src_port = new_port[VLIB_RX];
+
+ if (oflags &
+ (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM))
{
- udp->dst_port = new_port[VLIB_TX];
- udp->src_port = new_port[VLIB_RX];
+ *sum = ip4_pseudo_header_cksum2 (ip4, new_addr);
return;
}
- old_port[VLIB_TX] = udp->dst_port;
- old_port[VLIB_RX] = udp->src_port;
- old_addr[VLIB_TX] = ip4->dst_address;
- old_addr[VLIB_RX] = ip4->src_address;
+ *sum = ip_csum_update (*sum, ip4->dst_address.as_u32,
+ new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
+ *sum = ip_csum_update (*sum, ip4->src_address.as_u32,
+ new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
- if (new_addr[VLIB_TX].as_u32)
+ *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
+ udp_header_t, dst_port);
+ *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
+ udp_header_t, src_port);
+}
+
+static_always_inline void
+cnat_ip4_translate_sctp (ip4_header_t *ip4, sctp_header_t *sctp,
+ u16 new_port[VLIB_N_DIR])
+{
+ /* Fastpath no checksum */
+ if (PREDICT_TRUE (0 == sctp->checksum))
{
- *sum =
- ip_csum_update (*sum, old_addr[VLIB_TX].as_u32,
- new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
+ sctp->dst_port = new_port[VLIB_TX];
+ sctp->src_port = new_port[VLIB_RX];
+ return;
}
+
if (new_port[VLIB_TX])
- {
- udp->dst_port = new_port[VLIB_TX];
- *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- }
- if (new_addr[VLIB_RX].as_u32)
- {
- *sum =
- ip_csum_update (*sum, old_addr[VLIB_RX].as_u32,
- new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
- }
+ sctp->dst_port = new_port[VLIB_TX];
if (new_port[VLIB_RX])
- {
- udp->src_port = new_port[VLIB_RX];
- *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- }
+ sctp->src_port = new_port[VLIB_RX];
+
+ sctp->checksum = 0;
+ sctp->checksum = clib_host_to_little_u32 (~clib_crc32c_with_init (
+ (u8 *) sctp, ntohs (ip4->length) - sizeof (ip4_header_t),
+ ~0 /* init value */));
}
static_always_inline void
-cnat_ip4_translate_l3 (ip4_header_t * ip4, ip4_address_t new_addr[VLIB_N_DIR])
+cnat_ip4_translate_l3 (ip4_header_t *ip4, ip4_address_t new_addr[VLIB_N_DIR],
+ u32 oflags)
{
ip4_address_t old_addr[VLIB_N_DIR];
ip_csum_t sum;
-
old_addr[VLIB_TX] = ip4->dst_address;
old_addr[VLIB_RX] = ip4->src_address;
+ ip4->dst_address = new_addr[VLIB_TX];
+ ip4->src_address = new_addr[VLIB_RX];
+
+ // We always compute the IP checksum even if oflags &
+ // VNET_BUFFER_OFFLOAD_F_IP_CKSUM is set as this is relatively inexpensive
+ // and will allow avoiding issues in driver that do not behave properly
+ // downstream.
sum = ip4->checksum;
- if (new_addr[VLIB_TX].as_u32)
- {
- ip4->dst_address = new_addr[VLIB_TX];
- sum =
- ip_csum_update (sum, old_addr[VLIB_TX].as_u32,
+ sum = ip_csum_update (sum, old_addr[VLIB_TX].as_u32,
new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
- }
- if (new_addr[VLIB_RX].as_u32)
- {
- ip4->src_address = new_addr[VLIB_RX];
- sum =
- ip_csum_update (sum, old_addr[VLIB_RX].as_u32,
+ sum = ip_csum_update (sum, old_addr[VLIB_RX].as_u32,
new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
- }
ip4->checksum = ip_csum_fold (sum);
}
@@ -257,48 +264,40 @@ cnat_tcp_update_session_lifetime (tcp_header_t * tcp, u32 index)
{
cnat_main_t *cm = &cnat_main;
if (PREDICT_FALSE (tcp_fin (tcp)))
- {
- cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
- }
+ cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
if (PREDICT_FALSE (tcp_rst (tcp)))
- {
- cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
- }
+ cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
if (PREDICT_FALSE (tcp_syn (tcp) && tcp_ack (tcp)))
- {
- cnat_timestamp_set_lifetime (index, cm->tcp_max_age);
- }
+ cnat_timestamp_set_lifetime (index, cm->tcp_max_age);
}
static_always_inline void
-cnat_translation_icmp4_echo (ip4_header_t * ip4, icmp46_header_t * icmp,
+cnat_translation_icmp4_echo (ip4_header_t *ip4, icmp46_header_t *icmp,
ip4_address_t new_addr[VLIB_N_DIR],
- u16 new_port[VLIB_N_DIR])
+ u16 new_port[VLIB_N_DIR], u32 oflags)
{
ip_csum_t sum;
u16 old_port;
cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
- cnat_ip4_translate_l3 (ip4, new_addr);
+ cnat_ip4_translate_l3 (ip4, new_addr, oflags);
old_port = echo->identifier;
echo->identifier = new_port[VLIB_RX];
sum = icmp->checksum;
- sum = ip_csum_update (sum, old_port, new_port[VLIB_RX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
+ sum =
+ ip_csum_update (sum, old_port, new_port[VLIB_RX], udp_header_t, src_port);
icmp->checksum = ip_csum_fold (sum);
}
static_always_inline void
-cnat_translation_icmp4_error (ip4_header_t * outer_ip4,
- icmp46_header_t * icmp,
+cnat_translation_icmp4_error (ip4_header_t *outer_ip4, icmp46_header_t *icmp,
ip4_address_t outer_new_addr[VLIB_N_DIR],
- u16 outer_new_port[VLIB_N_DIR],
- u8 snat_outer_ip)
+ u16 outer_new_port[VLIB_N_DIR], u8 snat_outer_ip,
+ u32 oflags)
{
ip4_address_t new_addr[VLIB_N_DIR];
ip4_address_t old_addr[VLIB_N_DIR];
@@ -327,18 +326,20 @@ cnat_translation_icmp4_error (ip4_header_t * outer_ip4,
/* translate outer ip. */
if (!snat_outer_ip)
outer_new_addr[VLIB_RX] = outer_ip4->src_address;
- cnat_ip4_translate_l3 (outer_ip4, outer_new_addr);
+ cnat_ip4_translate_l3 (outer_ip4, outer_new_addr, oflags);
if (ip4->protocol == IP_PROTOCOL_TCP)
{
inner_l4_old_sum = inner_l4_sum = tcp->checksum;
- cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port);
+ cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port,
+ 0 /* flags */);
tcp->checksum = ip_csum_fold (inner_l4_sum);
}
else if (ip4->protocol == IP_PROTOCOL_UDP)
{
inner_l4_old_sum = inner_l4_sum = udp->checksum;
- cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port);
+ cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port,
+ 0 /* flags */);
udp->checksum = ip_csum_fold (inner_l4_sum);
}
else
@@ -351,37 +352,30 @@ cnat_translation_icmp4_error (ip4_header_t * outer_ip4,
/* UDP/TCP Ports changed */
if (old_port[VLIB_TX] && new_port[VLIB_TX])
sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
+ udp_header_t, dst_port);
if (old_port[VLIB_RX] && new_port[VLIB_RX])
sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
-
+ udp_header_t, src_port);
- cnat_ip4_translate_l3 (ip4, new_addr);
+ cnat_ip4_translate_l3 (ip4, new_addr, 0 /* oflags */);
ip_csum_t new_ip_sum = ip4->checksum;
/* IP checksum changed */
sum = ip_csum_update (sum, old_ip_sum, new_ip_sum, ip4_header_t, checksum);
/* IP src/dst addr changed */
- if (new_addr[VLIB_TX].as_u32)
- sum =
- ip_csum_update (sum, old_addr[VLIB_TX].as_u32, new_addr[VLIB_TX].as_u32,
- ip4_header_t, dst_address);
+ sum = ip_csum_update (sum, old_addr[VLIB_TX].as_u32,
+ new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
- if (new_addr[VLIB_RX].as_u32)
- sum =
- ip_csum_update (sum, old_addr[VLIB_RX].as_u32, new_addr[VLIB_RX].as_u32,
- ip4_header_t, src_address);
+ sum = ip_csum_update (sum, old_addr[VLIB_RX].as_u32,
+ new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
icmp->checksum = ip_csum_fold (sum);
}
static_always_inline void
-cnat_translation_ip4 (const cnat_session_t * session,
- ip4_header_t * ip4, udp_header_t * udp)
+cnat_translation_ip4 (const cnat_session_t *session, ip4_header_t *ip4,
+ udp_header_t *udp, u32 oflags)
{
tcp_header_t *tcp = (tcp_header_t *) udp;
ip4_address_t new_addr[VLIB_N_DIR];
@@ -395,17 +389,23 @@ cnat_translation_ip4 (const cnat_session_t * session,
if (ip4->protocol == IP_PROTOCOL_TCP)
{
ip_csum_t sum = tcp->checksum;
- cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port);
+ cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port, oflags);
tcp->checksum = ip_csum_fold (sum);
- cnat_ip4_translate_l3 (ip4, new_addr);
+ cnat_ip4_translate_l3 (ip4, new_addr, oflags);
cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
}
else if (ip4->protocol == IP_PROTOCOL_UDP)
{
ip_csum_t sum = udp->checksum;
- cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port);
+ cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port, oflags);
udp->checksum = ip_csum_fold (sum);
- cnat_ip4_translate_l3 (ip4, new_addr);
+ cnat_ip4_translate_l3 (ip4, new_addr, oflags);
+ }
+ else if (ip4->protocol == IP_PROTOCOL_SCTP)
+ {
+ sctp_header_t *sctp = (sctp_header_t *) udp;
+ cnat_ip4_translate_sctp (ip4, sctp, new_port);
+ cnat_ip4_translate_l3 (ip4, new_addr, oflags);
}
else if (ip4->protocol == IP_PROTOCOL_ICMP)
{
@@ -417,74 +417,65 @@ cnat_translation_ip4 (const cnat_session_t * session,
(ip4->src_address.as_u32 ==
session->key.cs_ip[VLIB_RX].ip4.as_u32);
cnat_translation_icmp4_error (ip4, icmp, new_addr, new_port,
- snat_outer_ip);
+ snat_outer_ip, oflags);
}
else if (icmp_type_is_echo (icmp->type))
- cnat_translation_icmp4_echo (ip4, icmp, new_addr, new_port);
+ cnat_translation_icmp4_echo (ip4, icmp, new_addr, new_port, oflags);
}
}
static_always_inline void
cnat_ip6_translate_l3 (ip6_header_t * ip6, ip6_address_t new_addr[VLIB_N_DIR])
{
- if (has_ip6_address (&new_addr[VLIB_TX]))
- ip6_address_copy (&ip6->dst_address, &new_addr[VLIB_TX]);
- if (has_ip6_address (&new_addr[VLIB_RX]))
- ip6_address_copy (&ip6->src_address, &new_addr[VLIB_RX]);
+ ip6_address_copy (&ip6->dst_address, &new_addr[VLIB_TX]);
+ ip6_address_copy (&ip6->src_address, &new_addr[VLIB_RX]);
+}
+
+static_always_inline u16
+ip6_pseudo_header_cksum2 (ip6_header_t *ip6, ip6_address_t address[VLIB_N_DIR])
+{
+ ip6_psh_t psh = { 0 };
+ psh.src = address[VLIB_RX];
+ psh.dst = address[VLIB_TX];
+ psh.l4len = ip6->payload_length;
+ psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
+ return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip6_psh_t)));
}
static_always_inline void
-cnat_ip6_translate_l4 (ip6_header_t * ip6, udp_header_t * udp,
- ip_csum_t * sum,
+cnat_ip6_translate_l4 (ip6_header_t *ip6, udp_header_t *udp, ip_csum_t *sum,
ip6_address_t new_addr[VLIB_N_DIR],
- u16 new_port[VLIB_N_DIR])
+ u16 new_port[VLIB_N_DIR], u32 oflags)
{
u16 old_port[VLIB_N_DIR];
- ip6_address_t old_addr[VLIB_N_DIR];
+ old_port[VLIB_TX] = udp->dst_port;
+ old_port[VLIB_RX] = udp->src_port;
- /* Fastpath no checksum */
- if (PREDICT_TRUE (0 == *sum))
+ udp->dst_port = new_port[VLIB_TX];
+ udp->src_port = new_port[VLIB_RX];
+
+ if (oflags &
+ (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM))
{
- udp->dst_port = new_port[VLIB_TX];
- udp->src_port = new_port[VLIB_RX];
+ *sum = ip6_pseudo_header_cksum2 (ip6, new_addr);
return;
}
- old_port[VLIB_TX] = udp->dst_port;
- old_port[VLIB_RX] = udp->src_port;
- ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
- ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
+ *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[0]);
+ *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[1]);
+ *sum = ip_csum_sub_even (*sum, ip6->dst_address.as_u64[0]);
+ *sum = ip_csum_sub_even (*sum, ip6->dst_address.as_u64[1]);
- if (has_ip6_address (&new_addr[VLIB_TX]))
- {
- *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[0]);
- *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[1]);
- *sum = ip_csum_sub_even (*sum, old_addr[VLIB_TX].as_u64[0]);
- *sum = ip_csum_sub_even (*sum, old_addr[VLIB_TX].as_u64[1]);
- }
+ *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[0]);
+ *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[1]);
+ *sum = ip_csum_sub_even (*sum, ip6->src_address.as_u64[0]);
+ *sum = ip_csum_sub_even (*sum, ip6->src_address.as_u64[1]);
- if (new_port[VLIB_TX])
- {
- udp->dst_port = new_port[VLIB_TX];
- *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- }
- if (has_ip6_address (&new_addr[VLIB_RX]))
- {
- *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[0]);
- *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[1]);
- *sum = ip_csum_sub_even (*sum, old_addr[VLIB_RX].as_u64[0]);
- *sum = ip_csum_sub_even (*sum, old_addr[VLIB_RX].as_u64[1]);
- }
+ *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
+ udp_header_t, dst_port);
- if (new_port[VLIB_RX])
- {
- udp->src_port = new_port[VLIB_RX];
- *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- }
+ *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
+ udp_header_t, src_port);
}
static_always_inline void
@@ -503,26 +494,20 @@ cnat_translation_icmp6_echo (ip6_header_t * ip6, icmp46_header_t * icmp,
sum = icmp->checksum;
cnat_ip6_translate_l3 (ip6, new_addr);
- if (has_ip6_address (&new_addr[VLIB_TX]))
- {
- sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
- sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
- }
- if (has_ip6_address (&new_addr[VLIB_RX]))
- {
- sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
- sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
- }
+ sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
+ sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
+
+ sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
+ sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
echo->identifier = new_port[VLIB_RX];
- sum = ip_csum_update (sum, old_port, new_port[VLIB_RX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
+ sum =
+ ip_csum_update (sum, old_port, new_port[VLIB_RX], udp_header_t, src_port);
icmp->checksum = ip_csum_fold (sum);
}
@@ -566,79 +551,64 @@ cnat_translation_icmp6_error (ip6_header_t * outer_ip6,
if (!snat_outer_ip)
ip6_address_copy (&outer_new_addr[VLIB_RX], &outer_ip6->src_address);
cnat_ip6_translate_l3 (outer_ip6, outer_new_addr);
- if (has_ip6_address (&outer_new_addr[VLIB_TX]))
- {
- sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[0]);
- sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[1]);
- sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[0]);
- sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[1]);
- }
- if (has_ip6_address (&outer_new_addr[VLIB_RX]))
- {
- sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[0]);
- sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[1]);
- sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[0]);
- sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[1]);
- }
+ sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[0]);
+ sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[1]);
+ sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[0]);
+ sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[1]);
+
+ sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[0]);
+ sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[1]);
+ sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[0]);
+ sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[1]);
/* Translate inner TCP / UDP */
if (ip6->protocol == IP_PROTOCOL_TCP)
{
inner_l4_old_sum = inner_l4_sum = tcp->checksum;
- cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port);
+ cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port,
+ 0 /* oflags */);
tcp->checksum = ip_csum_fold (inner_l4_sum);
}
else if (ip6->protocol == IP_PROTOCOL_UDP)
{
inner_l4_old_sum = inner_l4_sum = udp->checksum;
- cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port);
+ cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port,
+ 0 /* oflags */);
udp->checksum = ip_csum_fold (inner_l4_sum);
}
else
return;
/* UDP/TCP checksum changed */
- sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum,
- ip4_header_t /* cheat */ ,
+ sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum, ip4_header_t,
checksum);
/* UDP/TCP Ports changed */
- if (old_port[VLIB_TX] && new_port[VLIB_TX])
- sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
-
- if (old_port[VLIB_RX] && new_port[VLIB_RX])
- sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
+ sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
+ udp_header_t, dst_port);
+ sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
+ udp_header_t, src_port);
cnat_ip6_translate_l3 (ip6, new_addr);
/* IP src/dst addr changed */
- if (has_ip6_address (&new_addr[VLIB_TX]))
- {
- sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
- sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
- }
+ sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
+ sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
- if (has_ip6_address (&new_addr[VLIB_RX]))
- {
- sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
- sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
- }
+ sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
+ sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
icmp->checksum = ip_csum_fold (sum);
}
static_always_inline void
-cnat_translation_ip6 (const cnat_session_t * session,
- ip6_header_t * ip6, udp_header_t * udp)
+cnat_translation_ip6 (const cnat_session_t *session, ip6_header_t *ip6,
+ udp_header_t *udp, u32 oflags)
{
tcp_header_t *tcp = (tcp_header_t *) udp;
ip6_address_t new_addr[VLIB_N_DIR];
@@ -652,7 +622,7 @@ cnat_translation_ip6 (const cnat_session_t * session,
if (ip6->protocol == IP_PROTOCOL_TCP)
{
ip_csum_t sum = tcp->checksum;
- cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port);
+ cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port, oflags);
tcp->checksum = ip_csum_fold (sum);
cnat_ip6_translate_l3 (ip6, new_addr);
cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
@@ -660,7 +630,7 @@ cnat_translation_ip6 (const cnat_session_t * session,
else if (ip6->protocol == IP_PROTOCOL_UDP)
{
ip_csum_t sum = udp->checksum;
- cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port);
+ cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port, oflags);
udp->checksum = ip_csum_fold (sum);
cnat_ip6_translate_l3 (ip6, new_addr);
}
@@ -743,6 +713,18 @@ cnat_session_make_key (vlib_buffer_t *b, ip_address_family_t af,
session->key.cs_port[VLIB_RX] = udp->src_port;
session->key.cs_port[VLIB_TX] = udp->dst_port;
}
+ else if (ip4->protocol == IP_PROTOCOL_SCTP)
+ {
+ sctp_header_t *sctp;
+ sctp = (sctp_header_t *) (ip4 + 1);
+ ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
+ &ip4->dst_address);
+ ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
+ &ip4->src_address);
+ session->key.cs_proto = ip4->protocol;
+ session->key.cs_port[VLIB_RX] = sctp->src_port;
+ session->key.cs_port[VLIB_TX] = sctp->dst_port;
+ }
else
goto error;
}
@@ -837,20 +819,74 @@ cnat_load_balance (const cnat_translation_t *ct, ip_address_family_t af,
* rsession_location is the location the (return) session will be
* matched at
*/
+
+static_always_inline void
+cnat_session_create (cnat_session_t *session, cnat_node_ctx_t *ctx)
+{
+ cnat_bihash_kv_t *bkey = (cnat_bihash_kv_t *) session;
+
+ session->value.cs_ts_index = cnat_timestamp_new (ctx->now);
+ cnat_bihash_add_del (&cnat_session_db, bkey, 1);
+}
+
static_always_inline void
-cnat_session_create (cnat_session_t *session, cnat_node_ctx_t *ctx,
- cnat_session_location_t rsession_location,
- u8 rsession_flags)
+cnat_rsession_create (cnat_session_t *session, cnat_node_ctx_t *ctx,
+ cnat_session_location_t rsession_location,
+ cnat_session_flag_t rsession_flags)
{
cnat_client_t *cc;
cnat_bihash_kv_t rkey;
cnat_session_t *rsession = (cnat_session_t *) & rkey;
cnat_bihash_kv_t *bkey = (cnat_bihash_kv_t *) session;
- cnat_bihash_kv_t rvalue;
- int rv;
+ int rv, n_retries = 0;
+ static u32 sport_seed = 0;
- session->value.cs_ts_index = cnat_timestamp_new (ctx->now);
- cnat_bihash_add_del (&cnat_session_db, bkey, 1);
+ cnat_timestamp_inc_refcnt (session->value.cs_ts_index);
+
+ /* First create the return session */
+ ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
+ &session->value.cs_ip[VLIB_TX]);
+ ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
+ &session->value.cs_ip[VLIB_RX]);
+ rsession->key.cs_proto = session->key.cs_proto;
+ rsession->key.cs_loc = rsession_location;
+ rsession->key.__cs_pad = 0;
+ rsession->key.cs_af = ctx->af;
+ rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
+ rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
+
+ ip46_address_copy (&rsession->value.cs_ip[VLIB_RX],
+ &session->key.cs_ip[VLIB_TX]);
+ ip46_address_copy (&rsession->value.cs_ip[VLIB_TX],
+ &session->key.cs_ip[VLIB_RX]);
+ rsession->value.cs_ts_index = session->value.cs_ts_index;
+ rsession->value.cs_lbi = INDEX_INVALID;
+ rsession->value.flags = rsession_flags | CNAT_SESSION_IS_RETURN;
+ rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX];
+ rsession->value.cs_port[VLIB_RX] = session->key.cs_port[VLIB_TX];
+
+retry_add_ression:
+ rv = cnat_bihash_add_del (&cnat_session_db, &rkey,
+ 2 /* add but don't overwrite */);
+ if (rv)
+ {
+ if (!(rsession_flags & CNAT_SESSION_RETRY_SNAT))
+ return;
+
+ /* return session add failed pick an new random src port */
+ rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX] =
+ random_u32 (&sport_seed);
+ if (n_retries++ < 100)
+ goto retry_add_ression;
+ else
+ {
+ clib_warning ("Could not find a free port after 100 tries");
+ /* translate this packet, but don't create state */
+ return;
+ }
+ }
+
+ cnat_bihash_add_del (&cnat_session_db, bkey, 1 /* add */);
if (!(rsession_flags & CNAT_SESSION_FLAG_NO_CLIENT))
{
@@ -894,39 +930,6 @@ cnat_session_create (cnat_session_t *session, cnat_node_ctx_t *ctx,
}
}
- /* create the reverse flow key */
- ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
- &session->value.cs_ip[VLIB_TX]);
- ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
- &session->value.cs_ip[VLIB_RX]);
- rsession->key.cs_proto = session->key.cs_proto;
- rsession->key.cs_loc = rsession_location;
- rsession->key.__cs_pad = 0;
- rsession->key.cs_af = ctx->af;
- rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
- rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
-
- /* First search for existing reverse session */
- rv = cnat_bihash_search_i2 (&cnat_session_db, &rkey, &rvalue);
- if (!rv)
- {
- /* Reverse session already exists
- cleanup before creating for refcnts */
- cnat_session_t *found_rsession = (cnat_session_t *) & rvalue;
- cnat_session_free (found_rsession);
- }
- /* add the reverse flow */
- ip46_address_copy (&rsession->value.cs_ip[VLIB_RX],
- &session->key.cs_ip[VLIB_TX]);
- ip46_address_copy (&rsession->value.cs_ip[VLIB_TX],
- &session->key.cs_ip[VLIB_RX]);
- rsession->value.cs_ts_index = session->value.cs_ts_index;
- rsession->value.cs_lbi = INDEX_INVALID;
- rsession->value.flags = rsession_flags | CNAT_SESSION_IS_RETURN;
- rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX];
- rsession->value.cs_port[VLIB_RX] = session->key.cs_port[VLIB_TX];
-
- cnat_bihash_add_del (&cnat_session_db, &rkey, 1);
}
always_inline uword
diff --git a/src/plugins/cnat/cnat_node_feature.c b/src/plugins/cnat/cnat_node_feature.c
index aced4cd0a15..9b2c0c2fe06 100644
--- a/src/plugins/cnat/cnat_node_feature.c
+++ b/src/plugins/cnat/cnat_node_feature.c
@@ -143,7 +143,10 @@ cnat_input_feature_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
/* refcnt session in current client */
cnat_client_cnt_session (cc);
- cnat_session_create (session, ctx, CNAT_LOCATION_OUTPUT, rsession_flags);
+ cnat_session_create (session, ctx);
+ if (!(ct->flags & CNAT_TR_FLAG_NO_RETURN_SESSION))
+ cnat_rsession_create (session, ctx, CNAT_LOCATION_OUTPUT,
+ rsession_flags);
trace_flags |= CNAT_TRACE_SESSION_CREATED;
}
@@ -156,9 +159,9 @@ cnat_input_feature_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
}
if (AF_IP4 == ctx->af)
- cnat_translation_ip4 (session, ip4, udp0);
+ cnat_translation_ip4 (session, ip4, udp0, vnet_buffer (b)->oflags);
else
- cnat_translation_ip6 (session, ip6, udp0);
+ cnat_translation_ip6 (session, ip6, udp0, vnet_buffer (b)->oflags);
if (NULL != ct)
{
@@ -320,14 +323,17 @@ cnat_output_feature_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
CNAT_SESSION_FLAG_NO_CLIENT | CNAT_SESSION_FLAG_ALLOC_PORT;
trace_flags |= CNAT_TRACE_SESSION_CREATED;
- cnat_session_create (session, ctx, CNAT_LOCATION_INPUT,
- CNAT_SESSION_FLAG_NO_CLIENT);
+
+ cnat_session_create (session, ctx);
+ cnat_rsession_create (session, ctx, CNAT_LOCATION_INPUT,
+ CNAT_SESSION_FLAG_NO_CLIENT |
+ CNAT_SESSION_RETRY_SNAT);
}
if (AF_IP4 == ctx->af)
- cnat_translation_ip4 (session, ip4, udp0);
+ cnat_translation_ip4 (session, ip4, udp0, vnet_buffer (b)->oflags);
else
- cnat_translation_ip6 (session, ip6, udp0);
+ cnat_translation_ip6 (session, ip6, udp0, vnet_buffer (b)->oflags);
trace:
if (PREDICT_FALSE (ctx->do_trace))
diff --git a/src/plugins/cnat/cnat_node_snat.c b/src/plugins/cnat/cnat_node_snat.c
index 9212d67ead6..57530eb397d 100644
--- a/src/plugins/cnat/cnat_node_snat.c
+++ b/src/plugins/cnat/cnat_node_snat.c
@@ -129,15 +129,15 @@ cnat_snat_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
CNAT_SESSION_FLAG_NO_CLIENT | CNAT_SESSION_FLAG_ALLOC_PORT;
trace_flags |= CNAT_TRACE_SESSION_CREATED;
- cnat_session_create (session, ctx, CNAT_LOCATION_FIB,
- CNAT_SESSION_FLAG_HAS_SNAT);
+ cnat_session_create (session, ctx);
+ cnat_rsession_create (session, ctx, CNAT_LOCATION_FIB,
+ CNAT_SESSION_FLAG_HAS_SNAT);
}
-
if (AF_IP4 == ctx->af)
- cnat_translation_ip4 (session, ip4, udp0);
+ cnat_translation_ip4 (session, ip4, udp0, vnet_buffer (b)->oflags);
else
- cnat_translation_ip6 (session, ip6, udp0);
+ cnat_translation_ip6 (session, ip6, udp0, vnet_buffer (b)->oflags);
trace:
if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
diff --git a/src/plugins/cnat/cnat_node_vip.c b/src/plugins/cnat/cnat_node_vip.c
index f166bd4f194..d320746c5fa 100644
--- a/src/plugins/cnat/cnat_node_vip.c
+++ b/src/plugins/cnat/cnat_node_vip.c
@@ -168,7 +168,9 @@ cnat_vip_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b,
/* refcnt session in current client */
cnat_client_cnt_session (cc);
- cnat_session_create (session, ctx, CNAT_LOCATION_FIB, rsession_flags);
+ cnat_session_create (session, ctx);
+ if (!(ct->flags & CNAT_TR_FLAG_NO_RETURN_SESSION))
+ cnat_rsession_create (session, ctx, CNAT_LOCATION_FIB, rsession_flags);
trace_flags |= CNAT_TRACE_SESSION_CREATED;
next0 = ct->ct_lb.dpoi_next_node;
@@ -176,9 +178,9 @@ cnat_vip_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b,
}
if (AF_IP4 == ctx->af)
- cnat_translation_ip4 (session, ip4, udp0);
+ cnat_translation_ip4 (session, ip4, udp0, vnet_buffer (b)->oflags);
else
- cnat_translation_ip6 (session, ip6, udp0);
+ cnat_translation_ip6 (session, ip6, udp0, vnet_buffer (b)->oflags);
if (NULL != ct)
{
diff --git a/src/plugins/cnat/cnat_scanner.c b/src/plugins/cnat/cnat_scanner.c
index b3591f7e8b0..2f982711581 100644
--- a/src/plugins/cnat/cnat_scanner.c
+++ b/src/plugins/cnat/cnat_scanner.c
@@ -14,6 +14,7 @@
*/
#include <cnat/cnat_session.h>
+#include <vlibmemory/api.h>
#include <cnat/cnat_client.h>
static uword
diff --git a/src/plugins/cnat/cnat_session.c b/src/plugins/cnat/cnat_session.c
index 216d2575c37..0f1cd43f501 100644
--- a/src/plugins/cnat/cnat_session.c
+++ b/src/plugins/cnat/cnat_session.c
@@ -94,7 +94,8 @@ format_cnat_session (u8 * s, va_list * args)
cnat_session_t *sess = va_arg (*args, cnat_session_t *);
CLIB_UNUSED (int verbose) = va_arg (*args, int);
f64 ts = 0;
- if (!pool_is_free_index (cnat_timestamps, sess->value.cs_ts_index))
+
+ if (!cnat_ts_is_free_index (sess->value.cs_ts_index))
ts = cnat_timestamp_exp (sess->value.cs_ts_index);
s = format (
@@ -172,15 +173,43 @@ cnat_session_purge (void)
return (0);
}
+void
+cnat_reverse_session_free (cnat_session_t *session)
+{
+ cnat_bihash_kv_t bkey, bvalue;
+ cnat_session_t *rsession = (cnat_session_t *) &bkey;
+ int rv;
+
+ ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
+ &session->value.cs_ip[VLIB_TX]);
+ ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
+ &session->value.cs_ip[VLIB_RX]);
+ rsession->key.cs_proto = session->key.cs_proto;
+ rsession->key.cs_loc = session->key.cs_loc == CNAT_LOCATION_OUTPUT ?
+ CNAT_LOCATION_INPUT :
+ CNAT_LOCATION_OUTPUT;
+ rsession->key.__cs_pad = 0;
+ rsession->key.cs_af = session->key.cs_af;
+ rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
+ rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
+
+ rv = cnat_bihash_search_i2 (&cnat_session_db, &bkey, &bvalue);
+ if (!rv)
+ {
+ /* other session is in bihash */
+ cnat_session_t *rsession = (cnat_session_t *) &bvalue;
+ cnat_session_free (rsession);
+ }
+}
+
u64
cnat_session_scan (vlib_main_t * vm, f64 start_time, int i)
{
BVT (clib_bihash) * h = &cnat_session_db;
int j, k;
- /* Don't scan the l2 fib if it hasn't been instantiated yet */
if (alloc_arena (h) == 0)
- return 0.0;
+ return 0;
for ( /* caller saves starting point */ ; i < h->nbuckets; i++)
{
@@ -210,7 +239,7 @@ cnat_session_scan (vlib_main_t * vm, f64 start_time, int i)
{
for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
{
- if (v->kvp[k].key[0] == ~0ULL && v->kvp[k].value[0] == ~0ULL)
+ if (BV (clib_bihash_is_free) (&v->kvp[k]))
continue;
cnat_session_t *session = (cnat_session_t *) & v->kvp[k];
@@ -219,6 +248,9 @@ cnat_session_scan (vlib_main_t * vm, f64 start_time, int i)
cnat_timestamp_exp (session->value.cs_ts_index))
{
/* age it */
+ cnat_reverse_session_free (session);
+ /* this should be last as deleting the session memset it to
+ * 0xff */
cnat_session_free (session);
/*
@@ -248,6 +280,12 @@ cnat_session_init (vlib_main_t * vm)
cm->session_hash_memory);
BV (clib_bihash_set_kvp_format_fn) (&cnat_session_db, format_cnat_session);
+ cnat_timestamps.next_empty_pool_idx = 0;
+ clib_bitmap_alloc (cnat_timestamps.ts_free, 1 << CNAT_TS_MPOOL_BITS);
+ clib_bitmap_set_region (cnat_timestamps.ts_free, 0, 1,
+ 1 << CNAT_TS_MPOOL_BITS);
+ clib_spinlock_init (&cnat_timestamps.ts_lock);
+
return (NULL);
}
@@ -258,21 +296,38 @@ cnat_timestamp_show (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
cnat_timestamp_t *ts;
- clib_rwlock_reader_lock (&cnat_main.ts_lock);
- pool_foreach (ts, cnat_timestamps)
+ int ts_cnt = 0, cnt;
+ u8 verbose = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- vlib_cli_output (vm, "[%d] last_seen:%f lifetime:%u ref:%u",
- ts - cnat_timestamps, ts->last_seen, ts->lifetime,
- ts->refcnt);
+ if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ return (clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input));
+ }
+
+ for (int i = 0; i < cnat_timestamps.next_empty_pool_idx; i++)
+ {
+ cnt = pool_elts (cnat_timestamps.ts_pools[i]);
+ ts_cnt += cnt;
+ vlib_cli_output (vm, "-- Pool %d [%d/%d]", i, cnt,
+ pool_header (cnat_timestamps.ts_pools[i])->max_elts);
+ if (!verbose)
+ continue;
+ pool_foreach (ts, cnat_timestamps.ts_pools[i])
+ vlib_cli_output (vm, "[%d] last_seen:%f lifetime:%u ref:%u",
+ ts - cnat_timestamps.ts_pools[i], ts->last_seen,
+ ts->lifetime, ts->refcnt);
}
- clib_rwlock_reader_unlock (&cnat_main.ts_lock);
+ vlib_cli_output (vm, "Total timestamps %d", ts_cnt);
return (NULL);
}
VLIB_CLI_COMMAND (cnat_timestamp_show_cmd, static) = {
.path = "show cnat timestamp",
.function = cnat_timestamp_show,
- .short_help = "show cnat timestamp",
+ .short_help = "show cnat timestamp [verbose]",
.is_mp_safe = 1,
};
diff --git a/src/plugins/cnat/cnat_session.h b/src/plugins/cnat/cnat_session.h
index 072bb10f96f..a0a28c9a818 100644
--- a/src/plugins/cnat/cnat_session.h
+++ b/src/plugins/cnat/cnat_session.h
@@ -129,6 +129,11 @@ typedef enum cnat_session_flag_t_
/* Debug flag marking return sessions */
CNAT_SESSION_IS_RETURN = (1 << 4),
+
+ /** On conflicts when adding the return session, try to sNAT the
+ * forward session, and dNAT the return session with a random port */
+ CNAT_SESSION_RETRY_SNAT = (1 << 5),
+
} cnat_session_flag_t;
typedef enum cnat_session_location_t_
diff --git a/src/plugins/cnat/cnat_snat_policy.c b/src/plugins/cnat/cnat_snat_policy.c
index d59156f34c8..cd9bfef492a 100644
--- a/src/plugins/cnat/cnat_snat_policy.c
+++ b/src/plugins/cnat/cnat_snat_policy.c
@@ -29,6 +29,8 @@ unformat_cnat_snat_interface_map_type (unformat_input_t *input, va_list *args)
*a = CNAT_SNAT_IF_MAP_INCLUDE_V6;
else if (unformat (input, "k8s"))
*a = CNAT_SNAT_IF_MAP_INCLUDE_POD;
+ else if (unformat (input, "host"))
+ *a = CNAT_SNAT_IF_MAP_INCLUDE_HOST;
else
return 0;
return 1;
@@ -49,6 +51,9 @@ format_cnat_snat_interface_map_type (u8 *s, va_list *args)
case CNAT_SNAT_IF_MAP_INCLUDE_POD:
s = format (s, "k8s pod");
break;
+ case CNAT_SNAT_IF_MAP_INCLUDE_HOST:
+ s = format (s, "k8s host");
+ break;
default:
s = format (s, "(unknown)");
break;
@@ -108,7 +113,7 @@ cnat_snat_policy_add_del_if_command_fn (vlib_main_t *vm,
vnet_main_t *vnm = vnet_get_main ();
int is_add = 1;
u32 sw_if_index = ~0;
- u32 table;
+ u32 table = 0;
int rv;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
@@ -296,6 +301,14 @@ cnat_snat_policy_k8s (vlib_buffer_t *b, cnat_session_t *session)
u32 in_if = vnet_buffer (b)->sw_if_index[VLIB_RX];
u32 out_if = vnet_buffer (b)->sw_if_index[VLIB_TX];
+ /* we should never snat traffic that we punt to the host, pass traffic as it
+ * is for us */
+ if (clib_bitmap_get (cpm->interface_maps[CNAT_SNAT_IF_MAP_INCLUDE_HOST],
+ out_if))
+ {
+ return 0;
+ }
+
/* source nat for outgoing connections */
if (cnat_snat_policy_interface_enabled (in_if, af))
if (cnat_search_snat_prefix (dst_addr, af))
diff --git a/src/plugins/cnat/cnat_snat_policy.h b/src/plugins/cnat/cnat_snat_policy.h
index 987ae494e16..61c2382602f 100644
--- a/src/plugins/cnat/cnat_snat_policy.h
+++ b/src/plugins/cnat/cnat_snat_policy.h
@@ -45,6 +45,9 @@ typedef enum cnat_snat_interface_map_type_t_
CNAT_SNAT_IF_MAP_INCLUDE_V4 = AF_IP4,
CNAT_SNAT_IF_MAP_INCLUDE_V6 = AF_IP6,
CNAT_SNAT_IF_MAP_INCLUDE_POD,
+ /* CNAT_SNAT_IF_MAP_INCLUDE_HOST is used for interfaces used for punt,
+ replicating uplink */
+ CNAT_SNAT_IF_MAP_INCLUDE_HOST,
CNAT_N_SNAT_IF_MAP,
} cnat_snat_interface_map_type_t;
diff --git a/src/plugins/cnat/cnat_src_policy.c b/src/plugins/cnat/cnat_src_policy.c
index cac24b7742c..8f3f3375148 100644
--- a/src/plugins/cnat/cnat_src_policy.c
+++ b/src/plugins/cnat/cnat_src_policy.c
@@ -59,8 +59,8 @@ cnat_vip_default_source_policy (vlib_main_t * vm,
u16 sport;
sport = udp0->src_port;
/* Allocate a port only if asked and if we actually sNATed */
- if ((ct->flags & CNAT_TRANSLATION_FLAG_ALLOCATE_PORT)
- && (*rsession_flags & CNAT_SESSION_FLAG_HAS_SNAT))
+ if ((ct->flags & CNAT_TR_FLAG_ALLOCATE_PORT) &&
+ (*rsession_flags & CNAT_SESSION_FLAG_HAS_SNAT))
{
sport = 0; /* force allocation */
session->value.flags |= CNAT_SESSION_FLAG_ALLOC_PORT;
diff --git a/src/plugins/cnat/cnat_translation.c b/src/plugins/cnat/cnat_translation.c
index 049809a8684..513cedf0446 100644
--- a/src/plugins/cnat/cnat_translation.c
+++ b/src/plugins/cnat/cnat_translation.c
@@ -18,8 +18,10 @@
#include <vnet/fib/fib_entry_track.h>
#include <vnet/dpo/load_balance.h>
#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/dpo.h>
#include <cnat/cnat_translation.h>
+#include <cnat/cnat_maglev.h>
#include <cnat/cnat_session.h>
#include <cnat/cnat_client.h>
@@ -82,6 +84,7 @@ cnat_tracker_release (cnat_ep_trk_t * trk)
/* We only track fully resolved endpoints */
if (!(trk->ct_flags & CNAT_TRK_ACTIVE))
return;
+ dpo_reset (&trk->ct_dpo); // undo fib_entry_contribute_forwarding
fib_entry_untrack (trk->ct_fei, trk->ct_sibling);
}
@@ -200,110 +203,7 @@ cnat_remove_translation_from_db (index_t cci, cnat_endpoint_t * vip,
clib_bihash_add_del_8_8 (&cnat_translation_db, &bkey, 0);
}
-typedef struct
-{
- cnat_ep_trk_t *trk;
- u32 index;
- u32 offset;
- u32 skip;
-} cnat_maglev_entry_t;
-static int
-cnat_maglev_entry_compare (void *_a, void *_b)
-{
- cnat_ep_trk_t *a = ((cnat_maglev_entry_t *) _a)->trk;
- cnat_ep_trk_t *b = ((cnat_maglev_entry_t *) _b)->trk;
- int rv = 0;
- if ((rv =
- ip_address_cmp (&a->ct_ep[VLIB_TX].ce_ip, &b->ct_ep[VLIB_TX].ce_ip)))
- return rv;
- if ((rv = a->ct_ep[VLIB_TX].ce_port - a->ct_ep[VLIB_TX].ce_port))
- return rv;
- if ((rv =
- ip_address_cmp (&a->ct_ep[VLIB_RX].ce_ip, &b->ct_ep[VLIB_RX].ce_ip)))
- return rv;
- if ((rv = a->ct_ep[VLIB_RX].ce_port - a->ct_ep[VLIB_RX].ce_port))
- return rv;
- return 0;
-}
-
-static void
-cnat_translation_init_maglev (cnat_translation_t *ct)
-{
- cnat_maglev_entry_t *backends = NULL, *bk;
- cnat_main_t *cm = &cnat_main;
- u32 done = 0;
- cnat_ep_trk_t *trk;
- int ep_idx = 0;
-
- vec_foreach (trk, ct->ct_active_paths)
- {
- cnat_maglev_entry_t bk;
- u32 h1, h2;
-
- if (AF_IP4 == ip_addr_version (&trk->ct_ep[VLIB_TX].ce_ip))
- {
- u32 a, b, c;
- a = ip_addr_v4 (&trk->ct_ep[VLIB_TX].ce_ip).data_u32;
- b = (u64) trk->ct_ep[VLIB_TX].ce_port << 16 |
- (u64) trk->ct_ep[VLIB_RX].ce_port;
- c = ip_addr_v4 (&trk->ct_ep[VLIB_RX].ce_ip).data_u32;
- hash_v3_mix32 (a, b, c);
- hash_v3_finalize32 (a, b, c);
- h1 = c;
- h2 = b;
- }
- else
- {
- u64 a, b, c;
- a = ip_addr_v6 (&trk->ct_ep[VLIB_TX].ce_ip).as_u64[0] ^
- ip_addr_v6 (&trk->ct_ep[VLIB_TX].ce_ip).as_u64[1];
- b = (u64) trk->ct_ep[VLIB_TX].ce_port << 16 |
- (u64) trk->ct_ep[VLIB_RX].ce_port;
- c = ip_addr_v6 (&trk->ct_ep[VLIB_RX].ce_ip).as_u64[0] ^
- ip_addr_v6 (&trk->ct_ep[VLIB_RX].ce_ip).as_u64[1];
- hash_mix64 (a, b, c);
- h1 = c;
- h2 = b;
- }
-
- bk.offset = h1 % cm->maglev_len;
- bk.skip = h2 % (cm->maglev_len - 1) + 1;
- bk.index = ep_idx++;
- bk.trk = trk;
- vec_add1 (backends, bk);
- }
-
- if (0 == ep_idx)
- return;
-
- vec_sort_with_function (backends, cnat_maglev_entry_compare);
-
- /* Don't free if previous vector exists, just zero */
- vec_validate (ct->lb_maglev, cm->maglev_len);
- vec_set (ct->lb_maglev, -1);
-
- while (1)
- {
- vec_foreach (bk, backends)
- {
- u32 next = 0;
- u32 c = (bk->offset + next * bk->skip) % cm->maglev_len;
- while (ct->lb_maglev[c] != (u32) -1)
- {
- next++;
- c = (bk->offset + next * bk->skip) % cm->maglev_len;
- }
- ct->lb_maglev[c] = bk->index;
- done++;
- if (done == cm->maglev_len)
- goto finished;
- }
- }
-
-finished:
- vec_free (backends);
-}
static void
cnat_translation_stack (cnat_translation_t * ct)
@@ -323,8 +223,11 @@ cnat_translation_stack (cnat_translation_t * ct)
if (trk->ct_flags & CNAT_TRK_ACTIVE)
vec_add1 (ct->ct_active_paths, *trk);
+ flow_hash_config_t fhc = IP_FLOW_HASH_DEFAULT;
+ if (ct->fhc != 0)
+ fhc = ct->fhc;
lbi = load_balance_create (vec_len (ct->ct_active_paths),
- fib_proto_to_dpo (fproto), IP_FLOW_HASH_DEFAULT);
+ fib_proto_to_dpo (fproto), fhc);
ep_idx = 0;
vec_foreach (trk, ct->ct_active_paths)
@@ -335,7 +238,7 @@ cnat_translation_stack (cnat_translation_t * ct)
dpo_set (&ct->ct_lb, DPO_LOAD_BALANCE, dproto, lbi);
dpo_stack (cnat_client_dpo, dproto, &ct->ct_lb, &ct->ct_lb);
- ct->flags |= CNAT_TRANSLATION_STACKED;
+ ct->flags |= CNAT_TR_FLAG_STACKED;
}
int
@@ -365,8 +268,9 @@ cnat_translation_delete (u32 id)
u32
cnat_translation_update (cnat_endpoint_t *vip, ip_protocol_t proto,
cnat_endpoint_tuple_t *paths, u8 flags,
- cnat_lb_type_t lb_type)
+ cnat_lb_type_t lb_type, flow_hash_config_t fhc)
{
+ const dpo_id_t tmp = DPO_INVALID;
cnat_endpoint_tuple_t *path;
const cnat_client_t *cc;
cnat_translation_t *ct;
@@ -398,6 +302,7 @@ cnat_translation_update (cnat_endpoint_t *vip, ip_protocol_t proto,
ct->ct_cci = cci;
ct->index = ct - cnat_translation_pool;
ct->lb_type = lb_type;
+ ct->fhc = fhc;
cnat_add_translation_to_db (cci, vip, proto, ct->index);
cnat_client_translation_added (cci);
@@ -417,7 +322,7 @@ cnat_translation_update (cnat_endpoint_t *vip, ip_protocol_t proto,
}
vec_reset_length (ct->ct_paths);
- ct->flags &= ~CNAT_TRANSLATION_STACKED;
+ ct->flags &= ~CNAT_TR_FLAG_STACKED;
u64 path_idx = 0;
vec_foreach (path, paths)
@@ -438,6 +343,7 @@ cnat_translation_update (cnat_endpoint_t *vip, ip_protocol_t proto,
clib_memcpy (&trk->ct_ep[VLIB_RX], &path->src_ep,
sizeof (trk->ct_ep[VLIB_RX]));
trk->ct_flags = path->ep_flags;
+ trk->ct_dpo = tmp;
cnat_tracker_track (ct->index, trk);
}
@@ -486,6 +392,11 @@ format_cnat_translation (u8 * s, va_list * args)
format_ip_protocol, ct->ct_proto);
s = format (s, "lb:%U ", format_cnat_lb_type, ct->lb_type);
+ if ((ct->fhc == 0) || (ct->fhc == IP_FLOW_HASH_DEFAULT))
+ s = format (s, "fhc:0x%x(default)", IP_FLOW_HASH_DEFAULT);
+ else
+ s = format (s, "fhc:0x%x", ct->fhc);
+
vec_foreach (ck, ct->ct_paths)
s = format (s, "\n%U", format_cnat_ep_trk, ck, 2);
@@ -615,7 +526,7 @@ cnat_translation_back_walk_notify (fib_node_t * node,
/* If we have more than FIB_PATH_LIST_POPULAR paths
* we might get called during path tracking
* (cnat_tracker_track) */
- if (!(ct->flags & CNAT_TRANSLATION_STACKED))
+ if (!(ct->flags & CNAT_TR_FLAG_STACKED))
return (FIB_NODE_BACK_WALK_CONTINUE);
cnat_translation_stack (ct);
@@ -678,8 +589,9 @@ cnat_translation_cli_add_del (vlib_main_t * vm,
}
}
+ flow_hash_config_t fhc = 0;
if (INDEX_INVALID == del_index)
- cnat_translation_update (&vip, proto, paths, flags, lb_type);
+ cnat_translation_update (&vip, proto, paths, flags, lb_type, fhc);
else
cnat_translation_delete (del_index);
@@ -764,11 +676,11 @@ cnat_if_addr_add_del_backend_cb (addr_resolution_t * ar,
ep->ce_flags |= CNAT_EP_FLAG_RESOLVED;
}
- ct->flags &= ~CNAT_TRANSLATION_STACKED;
+ ct->flags &= ~CNAT_TR_FLAG_STACKED;
cnat_tracker_track (ar->cti, trk);
cnat_translation_stack (ct);
- ct->flags |= CNAT_TRANSLATION_STACKED;
+ ct->flags |= CNAT_TR_FLAG_STACKED;
}
static void
@@ -825,7 +737,7 @@ cnat_translation_init (vlib_main_t * vm)
ip6_main_t *i6m = &ip6_main;
cnat_main_t *cm = &cnat_main;
cnat_translation_fib_node_type =
- fib_node_register_new_type (&cnat_translation_vft);
+ fib_node_register_new_type ("cnat-translation", &cnat_translation_vft);
clib_bihash_init_8_8 (&cnat_translation_db, "CNat translation DB",
cm->translation_hash_buckets,
diff --git a/src/plugins/cnat/cnat_translation.h b/src/plugins/cnat/cnat_translation.h
index 97b0c908b42..9bb3455d9fe 100644
--- a/src/plugins/cnat/cnat_translation.h
+++ b/src/plugins/cnat/cnat_translation.h
@@ -60,12 +60,14 @@ typedef struct cnat_ep_trk_t_
typedef enum cnat_translation_flag_t_
{
/* Do allocate a source port */
- CNAT_TRANSLATION_FLAG_ALLOCATE_PORT = (1 << 0),
+ CNAT_TR_FLAG_ALLOCATE_PORT = (1 << 0),
/* Has this translation been satcked ?
* this allow not being called twice when
* with more then FIB_PATH_LIST_POPULAR backends */
- CNAT_TRANSLATION_STACKED = (1 << 1),
-} cnat_translation_flag_t;
+ CNAT_TR_FLAG_STACKED = (1 << 1),
+ /* Do not create a return session */
+ CNAT_TR_FLAG_NO_RETURN_SESSION = (1 << 2),
+} __clib_packed cnat_translation_flag_t;
typedef enum
{
@@ -76,11 +78,11 @@ typedef enum
CNAT_ADDR_N_RESOLUTIONS,
} cnat_addr_resol_type_t;
-typedef enum __attribute__ ((__packed__))
+typedef enum
{
CNAT_LB_DEFAULT,
CNAT_LB_MAGLEV,
-} cnat_lb_type_t;
+} __clib_packed cnat_lb_type_t;
/**
* Entry used to account for a translation's backend
@@ -160,13 +162,18 @@ typedef struct cnat_translation_t_
/**
* Translation flags
*/
- u8 flags;
+ cnat_translation_flag_t flags;
/**
* Type of load balancing
*/
cnat_lb_type_t lb_type;
+ /**
+ * Type of flow hash config
+ */
+ flow_hash_config_t fhc;
+
union
{
u32 *lb_maglev;
@@ -189,7 +196,8 @@ extern u8 *format_cnat_translation (u8 * s, va_list * args);
extern u32 cnat_translation_update (cnat_endpoint_t *vip,
ip_protocol_t ip_proto,
cnat_endpoint_tuple_t *backends, u8 flags,
- cnat_lb_type_t lb_type);
+ cnat_lb_type_t lb_type,
+ flow_hash_config_t fhc);
/**
* Delete a translation
diff --git a/src/plugins/cnat/cnat_types.c b/src/plugins/cnat/cnat_types.c
index 9b164c6069d..084a03da968 100644
--- a/src/plugins/cnat/cnat_types.c
+++ b/src/plugins/cnat/cnat_types.c
@@ -16,8 +16,7 @@
#include <cnat/cnat_types.h>
cnat_main_t cnat_main;
-fib_source_t cnat_fib_source;
-cnat_timestamp_t *cnat_timestamps;
+cnat_timestamp_mpool_t cnat_timestamps;
char *cnat_error_strings[] = {
#define cnat_error(n,s) s,
@@ -152,19 +151,6 @@ format_cnat_endpoint (u8 * s, va_list * args)
return (s);
}
-static clib_error_t *
-cnat_types_init (vlib_main_t * vm)
-{
- cnat_fib_source = fib_source_allocate ("cnat",
- CNAT_FIB_SOURCE_PRIORITY,
- FIB_SOURCE_BH_SIMPLE);
-
-
- clib_rwlock_init (&cnat_main.ts_lock);
-
- return (NULL);
-}
-
void
cnat_enable_disable_scanner (cnat_scanner_cmd_t event_type)
{
@@ -191,6 +177,8 @@ cnat_config (vlib_main_t * vm, unformat_input_t * input)
cm->session_hash_buckets = CNAT_DEFAULT_SESSION_BUCKETS;
cm->translation_hash_memory = CNAT_DEFAULT_TRANSLATION_MEMORY;
cm->translation_hash_buckets = CNAT_DEFAULT_TRANSLATION_BUCKETS;
+ cm->client_hash_memory = CNAT_DEFAULT_CLIENT_MEMORY;
+ cm->client_hash_buckets = CNAT_DEFAULT_CLIENT_BUCKETS;
cm->snat_hash_memory = CNAT_DEFAULT_SNAT_MEMORY;
cm->snat_hash_buckets = CNAT_DEFAULT_SNAT_BUCKETS;
cm->snat_if_map_length = CNAT_DEFAULT_SNAT_IF_MAP_LEN;
@@ -215,6 +203,12 @@ cnat_config (vlib_main_t * vm, unformat_input_t * input)
else if (unformat (input, "translation-db-memory %U",
unformat_memory_size, &cm->translation_hash_memory))
;
+ else if (unformat (input, "client-db-buckets %u",
+ &cm->client_hash_buckets))
+ ;
+ else if (unformat (input, "client-db-memory %U", unformat_memory_size,
+ &cm->client_hash_memory))
+ ;
else if (unformat (input, "snat-db-buckets %u", &cm->snat_hash_buckets))
;
else if (unformat (input, "snat-if-map-len %u", &cm->snat_if_map_length))
@@ -250,7 +244,6 @@ cnat_get_main ()
}
VLIB_EARLY_CONFIG_FUNCTION (cnat_config, "cnat");
-VLIB_INIT_FUNCTION (cnat_types_init);
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/cnat/cnat_types.h b/src/plugins/cnat/cnat_types.h
index c3ec74c345f..d229d21adae 100644
--- a/src/plugins/cnat/cnat_types.h
+++ b/src/plugins/cnat/cnat_types.h
@@ -36,12 +36,14 @@
#define CNAT_DEFAULT_SESSION_BUCKETS 1024
#define CNAT_DEFAULT_TRANSLATION_BUCKETS 1024
+#define CNAT_DEFAULT_CLIENT_BUCKETS 1024
#define CNAT_DEFAULT_SNAT_BUCKETS 1024
#define CNAT_DEFAULT_SNAT_IF_MAP_LEN 4096
#define CNAT_DEFAULT_SESSION_MEMORY (1 << 20)
#define CNAT_DEFAULT_TRANSLATION_MEMORY (256 << 10)
-#define CNAT_DEFAULT_SNAT_MEMORY (64 << 20)
+#define CNAT_DEFAULT_CLIENT_MEMORY (256 << 10)
+#define CNAT_DEFAULT_SNAT_MEMORY (64 << 10)
/* Should be prime >~ 100 * numBackends */
#define CNAT_DEFAULT_MAGLEV_LEN 1009
@@ -50,11 +52,24 @@
* from fib_source.h */
#define CNAT_FIB_SOURCE_PRIORITY 0x02
-/* Initial refcnt for timestamps (2 : session & rsession) */
-#define CNAT_TIMESTAMP_INIT_REFCNT 2
+/* Initial number of timestamps for a session
+ * this will be incremented when adding the reverse
+ * session in cnat_rsession_create */
+#define CNAT_TIMESTAMP_INIT_REFCNT 1
#define MIN_SRC_PORT ((u16) 0xC000)
+typedef struct
+{
+ /* Source and destination port. */
+ u16 src_port, dst_port;
+
+ /* Random value to distinguish connections. */
+ u32 verification_tag;
+
+ u32 checksum;
+} sctp_header_t;
+
typedef enum cnat_trk_flag_t_
{
/* Endpoint is active (static or dhcp resolved) */
@@ -62,6 +77,8 @@ typedef enum cnat_trk_flag_t_
/* Don't translate this endpoint, but still
* forward. Used by maglev for DSR */
CNAT_TRK_FLAG_NO_NAT = (1 << 1),
+ /* */
+ CNAT_TRK_FLAG_TEST_DISABLED = (1 << 7),
} cnat_trk_flag_t;
typedef enum
@@ -105,6 +122,12 @@ typedef struct cnat_main_
/* Number of buckets of the translation bihash */
u32 translation_hash_buckets;
+ /* Memory size of the client bihash */
+ uword client_hash_memory;
+
+ /* Number of buckets of the client bihash */
+ u32 client_hash_buckets;
+
/* Memory size of the source NAT prefix bihash */
uword snat_hash_memory;
@@ -125,9 +148,6 @@ typedef struct cnat_main_
/* delay in seconds between two scans of session/clients tables */
f64 scanner_timeout;
- /* Lock for the timestamp pool */
- clib_rwlock_t ts_lock;
-
/* Index of the scanner process node */
uword scanner_node_index;
@@ -152,6 +172,23 @@ typedef struct cnat_timestamp_t_
u16 refcnt;
} cnat_timestamp_t;
+/* Create the first pool with 1 << CNAT_TS_BASE_SIZE elts */
+#define CNAT_TS_BASE_SIZE (8)
+/* reserve the top CNAT_TS_MPOOL_BITS bits for finding the pool */
+#define CNAT_TS_MPOOL_BITS (6)
+
+typedef struct cnat_timestamp_mpool_t_
+{
+ /* Increasing fixed size pools of timestamps */
+ cnat_timestamp_t *ts_pools[1 << CNAT_TS_MPOOL_BITS];
+ /* Bitmap of pools with free space */
+ uword *ts_free;
+ /* Index of next pool to init */
+ u8 next_empty_pool_idx;
+ /* ts creation lock */
+ clib_spinlock_t ts_lock;
+} cnat_timestamp_mpool_t;
+
typedef struct cnat_node_ctx_
{
f64 now;
@@ -165,8 +202,7 @@ extern u8 *format_cnat_endpoint (u8 * s, va_list * args);
extern uword unformat_cnat_ep_tuple (unformat_input_t * input,
va_list * args);
extern uword unformat_cnat_ep (unformat_input_t * input, va_list * args);
-extern cnat_timestamp_t *cnat_timestamps;
-extern fib_source_t cnat_fib_source;
+extern cnat_timestamp_mpool_t cnat_timestamps;
extern cnat_main_t cnat_main;
extern char *cnat_error_strings[];
diff --git a/src/plugins/crypto_ipsecmb/CMakeLists.txt b/src/plugins/crypto_ipsecmb/CMakeLists.txt
index 981a045262e..429343a9f3b 100644
--- a/src/plugins/crypto_ipsecmb/CMakeLists.txt
+++ b/src/plugins/crypto_ipsecmb/CMakeLists.txt
@@ -33,6 +33,16 @@ if(IPSECMB_INCLUDE_DIR AND IPSECMB_LIB)
${IPSECMB_LINK_FLAGS}
)
+ file(READ "${IPSECMB_INCLUDE_DIR}/intel-ipsec-mb.h" ipsecmb_header)
+ string(REGEX MATCH "IMB_VERSION_STR (\"+[0-9]+\\.[0-9]+\\.[0-9]+\")" _ ${ipsecmb_header})
+ string(REPLACE "\"" "" IPSECMB_VERSION ${CMAKE_MATCH_1})
+
+ if (${IPSECMB_VERSION} VERSION_GREATER "0.54.0")
+ add_definitions(-DHAVE_IPSECMB_CHACHA_POLY)
+ else()
+ message(STATUS "Intel IPSecMB ${IPSECMB_VERSION} does not support chacha20-poly1305. Disabled")
+ endif()
+
target_compile_options(crypto_ipsecmb_plugin PRIVATE "-march=silvermont" "-maes")
message(STATUS "Intel IPSecMB found: ${IPSECMB_INCLUDE_DIR}")
else()
diff --git a/src/plugins/crypto_ipsecmb/ipsecmb.c b/src/plugins/crypto_ipsecmb/ipsecmb.c
index ad5f7bfe006..064c129ba12 100644
--- a/src/plugins/crypto_ipsecmb/ipsecmb.c
+++ b/src/plugins/crypto_ipsecmb/ipsecmb.c
@@ -25,14 +25,16 @@
#include <vnet/crypto/crypto.h>
#include <vppinfra/cpu.h>
-#define HMAC_MAX_BLOCK_SIZE SHA_512_BLOCK_SIZE
+#define HMAC_MAX_BLOCK_SIZE IMB_SHA_512_BLOCK_SIZE
#define EXPANDED_KEY_N_BYTES (16 * 15)
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- MB_MGR *mgr;
- __m128i cbc_iv;
+ IMB_MGR *mgr;
+#if IMB_VERSION_NUM >= IMB_VERSION(1, 3, 0)
+ IMB_JOB burst_jobs[IMB_MAX_BURST_SIZE];
+#endif
} ipsecmb_per_thread_data_t;
typedef struct
@@ -60,11 +62,12 @@ typedef struct
static ipsecmb_main_t ipsecmb_main = { };
+/* clang-format off */
/*
* (Alg, JOB_HASH_ALG, fn, block-size-bytes, hash-size-bytes, digest-size-bytes)
*/
#define foreach_ipsecmb_hmac_op \
- _(SHA1, SHA1, sha1, 64, 20, 20) \
+ _(SHA1, SHA_1, sha1, 64, 20, 20) \
_(SHA224, SHA_224, sha224, 64, 32, 28) \
_(SHA256, SHA_256, sha256, 64, 32, 32) \
_(SHA384, SHA_384, sha384, 128, 64, 48) \
@@ -88,21 +91,21 @@ static ipsecmb_main_t ipsecmb_main = { };
_(AES_128_GCM, 128) \
_(AES_192_GCM, 192) \
_(AES_256_GCM, 256)
-
+/* clang-format on */
static_always_inline vnet_crypto_op_status_t
-ipsecmb_status_job (JOB_STS status)
+ipsecmb_status_job (IMB_STATUS status)
{
switch (status)
{
- case STS_COMPLETED:
+ case IMB_STATUS_COMPLETED:
return VNET_CRYPTO_OP_STATUS_COMPLETED;
- case STS_BEING_PROCESSED:
- case STS_COMPLETED_AES:
- case STS_COMPLETED_HMAC:
+ case IMB_STATUS_BEING_PROCESSED:
+ case IMB_STATUS_COMPLETED_CIPHER:
+ case IMB_STATUS_COMPLETED_AUTH:
return VNET_CRYPTO_OP_STATUS_WORK_IN_PROGRESS;
- case STS_INVALID_ARGS:
- case STS_INTERNAL_ERROR:
- case STS_ERROR:
+ case IMB_STATUS_INVALID_ARGS:
+ case IMB_STATUS_INTERNAL_ERROR:
+ case IMB_STATUS_ERROR:
return VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR;
}
ASSERT (0);
@@ -110,12 +113,12 @@ ipsecmb_status_job (JOB_STS status)
}
always_inline void
-ipsecmb_retire_hmac_job (JOB_AES_HMAC * job, u32 * n_fail, u32 digest_size)
+ipsecmb_retire_hmac_job (IMB_JOB *job, u32 *n_fail, u32 digest_size)
{
vnet_crypto_op_t *op = job->user_data;
u32 len = op->digest_len ? op->digest_len : digest_size;
- if (PREDICT_FALSE (STS_COMPLETED != job->status))
+ if (PREDICT_FALSE (IMB_STATUS_COMPLETED != job->status))
{
op->status = ipsecmb_status_job (job->status);
*n_fail = *n_fail + 1;
@@ -139,15 +142,71 @@ ipsecmb_retire_hmac_job (JOB_AES_HMAC * job, u32 * n_fail, u32 digest_size)
op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
}
+#if IMB_VERSION_NUM >= IMB_VERSION(1, 3, 0)
+static_always_inline u32
+ipsecmb_ops_hmac_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops,
+ u32 block_size, u32 hash_size, u32 digest_size,
+ IMB_HASH_ALG alg)
+{
+ ipsecmb_main_t *imbm = &ipsecmb_main;
+ ipsecmb_per_thread_data_t *ptd =
+ vec_elt_at_index (imbm->per_thread_data, vm->thread_index);
+ IMB_JOB *job;
+ u32 i, n_fail = 0, ops_index = 0;
+ u8 scratch[n_ops][digest_size];
+ const u32 burst_sz =
+ (n_ops > IMB_MAX_BURST_SIZE) ? IMB_MAX_BURST_SIZE : n_ops;
+
+ while (n_ops)
+ {
+ const u32 n = (n_ops > burst_sz) ? burst_sz : n_ops;
+ /*
+ * configure all the jobs first ...
+ */
+ for (i = 0; i < n; i++, ops_index++)
+ {
+ vnet_crypto_op_t *op = ops[ops_index];
+ const u8 *kd = (u8 *) imbm->key_data[op->key_index];
+
+ job = &ptd->burst_jobs[i];
+
+ job->src = op->src;
+ job->hash_start_src_offset_in_bytes = 0;
+ job->msg_len_to_hash_in_bytes = op->len;
+ job->auth_tag_output_len_in_bytes = digest_size;
+ job->auth_tag_output = scratch[ops_index];
+
+ job->u.HMAC._hashed_auth_key_xor_ipad = kd;
+ job->u.HMAC._hashed_auth_key_xor_opad = kd + hash_size;
+ job->user_data = op;
+ }
+
+ /*
+ * submit all jobs to be processed and retire completed jobs
+ */
+ IMB_SUBMIT_HASH_BURST_NOCHECK (ptd->mgr, ptd->burst_jobs, n, alg);
+
+ for (i = 0; i < n; i++)
+ {
+ job = &ptd->burst_jobs[i];
+ ipsecmb_retire_hmac_job (job, &n_fail, digest_size);
+ }
+
+ n_ops -= n;
+ }
+
+ return ops_index - n_fail;
+}
+#else
static_always_inline u32
-ipsecmb_ops_hmac_inline (vlib_main_t * vm, vnet_crypto_op_t * ops[],
- u32 n_ops, u32 block_size, u32 hash_size,
- u32 digest_size, JOB_HASH_ALG alg)
+ipsecmb_ops_hmac_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops,
+ u32 block_size, u32 hash_size, u32 digest_size,
+ JOB_HASH_ALG alg)
{
ipsecmb_main_t *imbm = &ipsecmb_main;
- ipsecmb_per_thread_data_t *ptd = vec_elt_at_index (imbm->per_thread_data,
- vm->thread_index);
- JOB_AES_HMAC *job;
+ ipsecmb_per_thread_data_t *ptd =
+ vec_elt_at_index (imbm->per_thread_data, vm->thread_index);
+ IMB_JOB *job;
u32 i, n_fail = 0;
u8 scratch[n_ops][digest_size];
@@ -168,9 +227,9 @@ ipsecmb_ops_hmac_inline (vlib_main_t * vm, vnet_crypto_op_t * ops[],
job->auth_tag_output_len_in_bytes = digest_size;
job->auth_tag_output = scratch[i];
- job->cipher_mode = NULL_CIPHER;
- job->cipher_direction = DECRYPT;
- job->chain_order = HASH_CIPHER;
+ job->cipher_mode = IMB_CIPHER_NULL;
+ job->cipher_direction = IMB_DIR_DECRYPT;
+ job->chain_order = IMB_ORDER_HASH_CIPHER;
job->u.HMAC._hashed_auth_key_xor_ipad = kd;
job->u.HMAC._hashed_auth_key_xor_opad = kd + hash_size;
@@ -187,23 +246,27 @@ ipsecmb_ops_hmac_inline (vlib_main_t * vm, vnet_crypto_op_t * ops[],
return n_ops - n_fail;
}
+#endif
+/* clang-format off */
#define _(a, b, c, d, e, f) \
static_always_inline u32 \
ipsecmb_ops_hmac_##a (vlib_main_t * vm, \
vnet_crypto_op_t * ops[], \
u32 n_ops) \
-{ return ipsecmb_ops_hmac_inline (vm, ops, n_ops, d, e, f, b); } \
+{ return ipsecmb_ops_hmac_inline (vm, ops, n_ops, d, e, f, \
+ IMB_AUTH_HMAC_##b); } \
foreach_ipsecmb_hmac_op;
#undef _
+/* clang-format on */
always_inline void
-ipsecmb_retire_cipher_job (JOB_AES_HMAC * job, u32 * n_fail)
+ipsecmb_retire_cipher_job (IMB_JOB *job, u32 *n_fail)
{
vnet_crypto_op_t *op = job->user_data;
- if (PREDICT_FALSE (STS_COMPLETED != job->status))
+ if (PREDICT_FALSE (IMB_STATUS_COMPLETED != job->status))
{
op->status = ipsecmb_status_job (job->status);
*n_fail = *n_fail + 1;
@@ -212,6 +275,62 @@ ipsecmb_retire_cipher_job (JOB_AES_HMAC * job, u32 * n_fail)
op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
}
+#if IMB_VERSION_NUM >= IMB_VERSION(1, 3, 0)
+static_always_inline u32
+ipsecmb_ops_aes_cipher_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[],
+ u32 n_ops, u32 key_len,
+ IMB_CIPHER_DIRECTION direction,
+ IMB_CIPHER_MODE cipher_mode)
+{
+ ipsecmb_main_t *imbm = &ipsecmb_main;
+ ipsecmb_per_thread_data_t *ptd =
+ vec_elt_at_index (imbm->per_thread_data, vm->thread_index);
+ IMB_JOB *job;
+ u32 i, n_fail = 0, ops_index = 0;
+ const u32 burst_sz =
+ (n_ops > IMB_MAX_BURST_SIZE) ? IMB_MAX_BURST_SIZE : n_ops;
+
+ while (n_ops)
+ {
+ const u32 n = (n_ops > burst_sz) ? burst_sz : n_ops;
+
+ for (i = 0; i < n; i++)
+ {
+ ipsecmb_aes_key_data_t *kd;
+ vnet_crypto_op_t *op = ops[ops_index++];
+ kd = (ipsecmb_aes_key_data_t *) imbm->key_data[op->key_index];
+
+ job = &ptd->burst_jobs[i];
+
+ job->src = op->src;
+ job->dst = op->dst;
+ job->msg_len_to_cipher_in_bytes = op->len;
+ job->cipher_start_src_offset_in_bytes = 0;
+
+ job->hash_alg = IMB_AUTH_NULL;
+
+ job->enc_keys = kd->enc_key_exp;
+ job->dec_keys = kd->dec_key_exp;
+ job->iv = op->iv;
+ job->iv_len_in_bytes = IMB_AES_BLOCK_SIZE;
+
+ job->user_data = op;
+ }
+
+ IMB_SUBMIT_CIPHER_BURST_NOCHECK (ptd->mgr, ptd->burst_jobs, n,
+ cipher_mode, direction, key_len / 8);
+ for (i = 0; i < n; i++)
+ {
+ job = &ptd->burst_jobs[i];
+ ipsecmb_retire_cipher_job (job, &n_fail);
+ }
+
+ n_ops -= n;
+ }
+
+ return ops_index - n_fail;
+}
+#else
static_always_inline u32
ipsecmb_ops_aes_cipher_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[],
u32 n_ops, u32 key_len,
@@ -219,9 +338,9 @@ ipsecmb_ops_aes_cipher_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[],
JOB_CIPHER_MODE cipher_mode)
{
ipsecmb_main_t *imbm = &ipsecmb_main;
- ipsecmb_per_thread_data_t *ptd = vec_elt_at_index (imbm->per_thread_data,
- vm->thread_index);
- JOB_AES_HMAC *job;
+ ipsecmb_per_thread_data_t *ptd =
+ vec_elt_at_index (imbm->per_thread_data, vm->thread_index);
+ IMB_JOB *job;
u32 i, n_fail = 0;
for (i = 0; i < n_ops; i++)
@@ -229,7 +348,6 @@ ipsecmb_ops_aes_cipher_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[],
ipsecmb_aes_key_data_t *kd;
vnet_crypto_op_t *op = ops[i];
kd = (ipsecmb_aes_key_data_t *) imbm->key_data[op->key_index];
- __m128i iv;
job = IMB_GET_NEXT_JOB (ptd->mgr);
@@ -238,23 +356,18 @@ ipsecmb_ops_aes_cipher_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[],
job->msg_len_to_cipher_in_bytes = op->len;
job->cipher_start_src_offset_in_bytes = 0;
- job->hash_alg = NULL_HASH;
+ job->hash_alg = IMB_AUTH_NULL;
job->cipher_mode = cipher_mode;
job->cipher_direction = direction;
- job->chain_order = (direction == ENCRYPT ? CIPHER_HASH : HASH_CIPHER);
-
- if ((direction == ENCRYPT) && (op->flags & VNET_CRYPTO_OP_FLAG_INIT_IV))
- {
- iv = ptd->cbc_iv;
- _mm_storeu_si128 ((__m128i *) op->iv, iv);
- ptd->cbc_iv = _mm_aesenc_si128 (iv, iv);
- }
+ job->chain_order =
+ (direction == IMB_DIR_ENCRYPT ? IMB_ORDER_CIPHER_HASH :
+ IMB_ORDER_HASH_CIPHER);
job->aes_key_len_in_bytes = key_len / 8;
- job->aes_enc_key_expanded = kd->enc_key_exp;
- job->aes_dec_key_expanded = kd->dec_key_exp;
+ job->enc_keys = kd->enc_key_exp;
+ job->dec_keys = kd->dec_key_exp;
job->iv = op->iv;
- job->iv_len_in_bytes = AES_BLOCK_SIZE;
+ job->iv_len_in_bytes = IMB_AES_BLOCK_SIZE;
job->user_data = op;
@@ -269,18 +382,22 @@ ipsecmb_ops_aes_cipher_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[],
return n_ops - n_fail;
}
+#endif
+/* clang-format off */
#define _(a, b, c) \
static_always_inline u32 ipsecmb_ops_cipher_enc_##a ( \
vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \
{ \
- return ipsecmb_ops_aes_cipher_inline (vm, ops, n_ops, b, ENCRYPT, c); \
+ return ipsecmb_ops_aes_cipher_inline ( \
+ vm, ops, n_ops, b, IMB_DIR_ENCRYPT, IMB_CIPHER_##c); \
} \
\
static_always_inline u32 ipsecmb_ops_cipher_dec_##a ( \
vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \
{ \
- return ipsecmb_ops_aes_cipher_inline (vm, ops, n_ops, b, DECRYPT, c); \
+ return ipsecmb_ops_aes_cipher_inline ( \
+ vm, ops, n_ops, b, IMB_DIR_DECRYPT, IMB_CIPHER_##c); \
}
foreach_ipsecmb_cipher_op;
@@ -294,7 +411,7 @@ ipsecmb_ops_gcm_cipher_enc_##a##_chained (vlib_main_t * vm, \
ipsecmb_main_t *imbm = &ipsecmb_main; \
ipsecmb_per_thread_data_t *ptd = vec_elt_at_index (imbm->per_thread_data, \
vm->thread_index); \
- MB_MGR *m = ptd->mgr; \
+ IMB_MGR *m = ptd->mgr; \
vnet_crypto_op_chunk_t *chp; \
u32 i, j; \
\
@@ -329,7 +446,7 @@ ipsecmb_ops_gcm_cipher_enc_##a (vlib_main_t * vm, vnet_crypto_op_t * ops[], \
ipsecmb_main_t *imbm = &ipsecmb_main; \
ipsecmb_per_thread_data_t *ptd = vec_elt_at_index (imbm->per_thread_data, \
vm->thread_index); \
- MB_MGR *m = ptd->mgr; \
+ IMB_MGR *m = ptd->mgr; \
u32 i; \
\
for (i = 0; i < n_ops; i++) \
@@ -355,7 +472,7 @@ ipsecmb_ops_gcm_cipher_dec_##a##_chained (vlib_main_t * vm, \
ipsecmb_main_t *imbm = &ipsecmb_main; \
ipsecmb_per_thread_data_t *ptd = vec_elt_at_index (imbm->per_thread_data, \
vm->thread_index); \
- MB_MGR *m = ptd->mgr; \
+ IMB_MGR *m = ptd->mgr; \
vnet_crypto_op_chunk_t *chp; \
u32 i, j, n_failed = 0; \
\
@@ -397,7 +514,7 @@ ipsecmb_ops_gcm_cipher_dec_##a (vlib_main_t * vm, vnet_crypto_op_t * ops[], \
ipsecmb_main_t *imbm = &ipsecmb_main; \
ipsecmb_per_thread_data_t *ptd = vec_elt_at_index (imbm->per_thread_data, \
vm->thread_index); \
- MB_MGR *m = ptd->mgr; \
+ IMB_MGR *m = ptd->mgr; \
u32 i, n_failed = 0; \
\
for (i = 0; i < n_ops; i++) \
@@ -422,17 +539,18 @@ ipsecmb_ops_gcm_cipher_dec_##a (vlib_main_t * vm, vnet_crypto_op_t * ops[], \
\
return n_ops - n_failed; \
}
-
+/* clang-format on */
foreach_ipsecmb_gcm_cipher_op;
#undef _
+#ifdef HAVE_IPSECMB_CHACHA_POLY
always_inline void
-ipsecmb_retire_aead_job (JOB_AES_HMAC *job, u32 *n_fail)
+ipsecmb_retire_aead_job (IMB_JOB *job, u32 *n_fail)
{
vnet_crypto_op_t *op = job->user_data;
u32 len = op->tag_len;
- if (PREDICT_FALSE (STS_COMPLETED != job->status))
+ if (PREDICT_FALSE (IMB_STATUS_COMPLETED != job->status))
{
op->status = ipsecmb_status_job (job->status);
*n_fail = *n_fail + 1;
@@ -462,16 +580,14 @@ ipsecmb_ops_chacha_poly (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops,
ipsecmb_per_thread_data_t *ptd =
vec_elt_at_index (imbm->per_thread_data, vm->thread_index);
struct IMB_JOB *job;
- MB_MGR *m = ptd->mgr;
+ IMB_MGR *m = ptd->mgr;
u32 i, n_fail = 0, last_key_index = ~0;
u8 scratch[VLIB_FRAME_SIZE][16];
- u8 iv_data[16];
u8 *key = 0;
for (i = 0; i < n_ops; i++)
{
vnet_crypto_op_t *op = ops[i];
- __m128i iv;
job = IMB_GET_NEXT_JOB (m);
if (last_key_index != op->key_index)
@@ -494,15 +610,6 @@ ipsecmb_ops_chacha_poly (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops,
job->src = op->src;
job->dst = op->dst;
- if ((dir == IMB_DIR_ENCRYPT) &&
- (op->flags & VNET_CRYPTO_OP_FLAG_INIT_IV))
- {
- iv = ptd->cbc_iv;
- _mm_storeu_si128 ((__m128i *) iv_data, iv);
- clib_memcpy_fast (op->iv, iv_data, 12);
- ptd->cbc_iv = _mm_aesenc_si128 (iv, iv);
- }
-
job->iv = op->iv;
job->iv_len_in_bytes = 12;
job->msg_len_to_cipher_in_bytes = job->msg_len_to_hash_in_bytes =
@@ -550,9 +657,8 @@ ipsecmb_ops_chacha_poly_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[],
ipsecmb_main_t *imbm = &ipsecmb_main;
ipsecmb_per_thread_data_t *ptd =
vec_elt_at_index (imbm->per_thread_data, vm->thread_index);
- MB_MGR *m = ptd->mgr;
+ IMB_MGR *m = ptd->mgr;
u32 i, n_fail = 0, last_key_index = ~0;
- u8 iv_data[16];
u8 *key = 0;
if (dir == IMB_DIR_ENCRYPT)
@@ -562,7 +668,6 @@ ipsecmb_ops_chacha_poly_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[],
vnet_crypto_op_t *op = ops[i];
struct chacha20_poly1305_context_data ctx;
vnet_crypto_op_chunk_t *chp;
- __m128i iv;
u32 j;
ASSERT (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS);
@@ -575,14 +680,6 @@ ipsecmb_ops_chacha_poly_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[],
last_key_index = op->key_index;
}
- if (op->flags & VNET_CRYPTO_OP_FLAG_INIT_IV)
- {
- iv = ptd->cbc_iv;
- _mm_storeu_si128 ((__m128i *) iv_data, iv);
- clib_memcpy_fast (op->iv, iv_data, 12);
- ptd->cbc_iv = _mm_aesenc_si128 (iv, iv);
- }
-
IMB_CHACHA20_POLY1305_INIT (m, key, &ctx, op->iv, op->aad,
op->aad_len);
@@ -662,30 +759,7 @@ ipsec_mb_ops_chacha_poly_dec_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[],
return ipsecmb_ops_chacha_poly_chained (vm, ops, chunks, n_ops,
IMB_DIR_DECRYPT);
}
-
-clib_error_t *
-crypto_ipsecmb_iv_init (ipsecmb_main_t * imbm)
-{
- ipsecmb_per_thread_data_t *ptd;
- clib_error_t *err = 0;
- int fd;
-
- if ((fd = open ("/dev/urandom", O_RDONLY)) < 0)
- return clib_error_return_unix (0, "failed to open '/dev/urandom'");
-
- vec_foreach (ptd, imbm->per_thread_data)
- {
- if (read (fd, &ptd->cbc_iv, sizeof (ptd->cbc_iv)) != sizeof (ptd->cbc_iv))
- {
- err = clib_error_return_unix (0, "'/dev/urandom' read failure");
- close (fd);
- return (err);
- }
- }
-
- close (fd);
- return (NULL);
-}
+#endif
static void
crypto_ipsecmb_key_handler (vlib_main_t * vm, vnet_crypto_key_op_t kop,
@@ -773,8 +847,7 @@ crypto_ipsecmb_init (vlib_main_t * vm)
ipsecmb_alg_data_t *ad;
ipsecmb_per_thread_data_t *ptd;
vlib_thread_main_t *tm = vlib_get_thread_main ();
- clib_error_t *error;
- MB_MGR *m = 0;
+ IMB_MGR *m = 0;
u32 eidx;
u8 *name;
@@ -791,13 +864,16 @@ crypto_ipsecmb_init (vlib_main_t * vm)
vec_validate_aligned (imbm->per_thread_data, tm->n_vlib_mains - 1,
CLIB_CACHE_LINE_BYTES);
- /* *INDENT-OFF* */
vec_foreach (ptd, imbm->per_thread_data)
{
ptd->mgr = alloc_mb_mgr (0);
- if (clib_cpu_supports_avx512f ())
+#if IMB_VERSION_NUM >= IMB_VERSION(1, 3, 0)
+ clib_memset_u8 (ptd->burst_jobs, 0,
+ sizeof (IMB_JOB) * IMB_MAX_BURST_SIZE);
+#endif
+ if (clib_cpu_supports_avx512f ())
init_mb_mgr_avx512 (ptd->mgr);
- else if (clib_cpu_supports_avx2 ())
+ else if (clib_cpu_supports_avx2 () && clib_cpu_supports_bmi2 ())
init_mb_mgr_avx2 (ptd->mgr);
else
init_mb_mgr_sse (ptd->mgr);
@@ -805,10 +881,6 @@ crypto_ipsecmb_init (vlib_main_t * vm)
if (ptd == imbm->per_thread_data)
m = ptd->mgr;
}
- /* *INDENT-ON* */
-
- if (clib_cpu_supports_x86_aes () && (error = crypto_ipsecmb_iv_init (imbm)))
- return (error);
#define _(a, b, c, d, e, f) \
vnet_crypto_register_ops_handler (vm, eidx, VNET_CRYPTO_OP_##a##_HMAC, \
@@ -850,6 +922,7 @@ crypto_ipsecmb_init (vlib_main_t * vm)
foreach_ipsecmb_gcm_cipher_op;
#undef _
+#ifdef HAVE_IPSECMB_CHACHA_POLY
vnet_crypto_register_ops_handler (vm, eidx,
VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC,
ipsecmb_ops_chacha_poly_enc);
@@ -864,25 +937,22 @@ crypto_ipsecmb_init (vlib_main_t * vm)
ipsec_mb_ops_chacha_poly_dec_chained);
ad = imbm->alg_data + VNET_CRYPTO_ALG_CHACHA20_POLY1305;
ad->data_size = 0;
+#endif
vnet_crypto_register_key_handler (vm, eidx, crypto_ipsecmb_key_handler);
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (crypto_ipsecmb_init) =
{
.runs_after = VLIB_INITS ("vnet_crypto_init"),
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Intel IPSEC Multi-buffer Crypto Engine",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/crypto_native/CMakeLists.txt b/src/plugins/crypto_native/CMakeLists.txt
index 688a8c95baf..5499ed4608a 100644
--- a/src/plugins/crypto_native/CMakeLists.txt
+++ b/src/plugins/crypto_native/CMakeLists.txt
@@ -12,24 +12,26 @@
# limitations under the License.
if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
- list(APPEND VARIANTS "slm\;-march=silvermont")
- list(APPEND VARIANTS "hsw\;-march=haswell")
+ list(APPEND VARIANTS "slm\;-march=silvermont -maes")
+ list(APPEND VARIANTS "hsw\;-march=haswell -maes")
if(compiler_flag_march_skylake_avx512 AND compiler_flag_mprefer_vector_width_256)
list(APPEND VARIANTS "skx\;-march=skylake-avx512 -mprefer-vector-width=256")
endif()
if(compiler_flag_march_icelake_client AND compiler_flag_mprefer_vector_width_512)
list(APPEND VARIANTS "icl\;-march=icelake-client -mprefer-vector-width=512")
endif()
- set (COMPILE_FILES aes_cbc.c aes_gcm.c)
- set (COMPILE_OPTS -Wall -fno-common -maes)
+ if(compiler_flag_march_alderlake)
+ list(APPEND VARIANTS "adl\;-march=alderlake -mprefer-vector-width=256")
+ endif()
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
list(APPEND VARIANTS "armv8\;-march=armv8.1-a+crc+crypto")
- set (COMPILE_FILES aes_cbc.c aes_gcm.c)
- set (COMPILE_OPTS -Wall -fno-common)
endif()
+set (COMPILE_FILES aes_cbc.c aes_gcm.c aes_ctr.c sha2.c)
+set (COMPILE_OPTS -Wall -fno-common)
+
if (NOT VARIANTS)
return()
endif()
diff --git a/src/plugins/crypto_native/FEATURE.yaml b/src/plugins/crypto_native/FEATURE.yaml
index 206caceb2d4..06f26d4a8cf 100644
--- a/src/plugins/crypto_native/FEATURE.yaml
+++ b/src/plugins/crypto_native/FEATURE.yaml
@@ -5,6 +5,6 @@ features:
- CBC(128, 192, 256)
- GCM(128, 192, 256)
-description: "An implentation of a native crypto-engine"
+description: "An implementation of a native crypto-engine"
state: production
properties: [API, CLI, MULTITHREAD]
diff --git a/src/plugins/crypto_native/aes.h b/src/plugins/crypto_native/aes.h
deleted file mode 100644
index 762d528d064..00000000000
--- a/src/plugins/crypto_native/aes.h
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- *------------------------------------------------------------------
- * Copyright (c) 2020 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#ifndef __aesni_h__
-#define __aesni_h__
-
-typedef enum
-{
- AES_KEY_128 = 0,
- AES_KEY_192 = 1,
- AES_KEY_256 = 2,
-} aes_key_size_t;
-
-#define AES_KEY_ROUNDS(x) (10 + x * 2)
-#define AES_KEY_BYTES(x) (16 + x * 8)
-
-static const u8x16 byte_mask_scale = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
-};
-
-static_always_inline u8x16
-aes_block_load (u8 * p)
-{
- return *(u8x16u *) p;
-}
-
-static_always_inline u8x16
-aes_enc_round (u8x16 a, u8x16 k)
-{
-#if defined (__AES__)
- return (u8x16) _mm_aesenc_si128 ((__m128i) a, (__m128i) k);
-#elif defined (__ARM_FEATURE_CRYPTO)
- return vaesmcq_u8 (vaeseq_u8 (a, u8x16_splat (0))) ^ k;
-#endif
-}
-
-#if defined (__VAES__)
-static_always_inline u8x64
-aes_enc_round_x4 (u8x64 a, u8x64 k)
-{
- return (u8x64) _mm512_aesenc_epi128 ((__m512i) a, (__m512i) k);
-}
-
-static_always_inline u8x64
-aes_enc_last_round_x4 (u8x64 a, u8x64 k)
-{
- return (u8x64) _mm512_aesenclast_epi128 ((__m512i) a, (__m512i) k);
-}
-
-static_always_inline u8x64
-aes_dec_round_x4 (u8x64 a, u8x64 k)
-{
- return (u8x64) _mm512_aesdec_epi128 ((__m512i) a, (__m512i) k);
-}
-
-static_always_inline u8x64
-aes_dec_last_round_x4 (u8x64 a, u8x64 k)
-{
- return (u8x64) _mm512_aesdeclast_epi128 ((__m512i) a, (__m512i) k);
-}
-#endif
-
-static_always_inline u8x16
-aes_enc_last_round (u8x16 a, u8x16 k)
-{
-#if defined (__AES__)
- return (u8x16) _mm_aesenclast_si128 ((__m128i) a, (__m128i) k);
-#elif defined (__ARM_FEATURE_CRYPTO)
- return vaeseq_u8 (a, u8x16_splat (0)) ^ k;
-#endif
-}
-
-#ifdef __x86_64__
-
-static_always_inline u8x16
-aes_dec_round (u8x16 a, u8x16 k)
-{
- return (u8x16) _mm_aesdec_si128 ((__m128i) a, (__m128i) k);
-}
-
-static_always_inline u8x16
-aes_dec_last_round (u8x16 a, u8x16 k)
-{
- return (u8x16) _mm_aesdeclast_si128 ((__m128i) a, (__m128i) k);
-}
-#endif
-
-static_always_inline void
-aes_block_store (u8 * p, u8x16 r)
-{
- *(u8x16u *) p = r;
-}
-
-static_always_inline u8x16
-aes_byte_mask (u8x16 x, u8 n_bytes)
-{
- return x & u8x16_is_greater (u8x16_splat (n_bytes), byte_mask_scale);
-}
-
-static_always_inline u8x16
-aes_load_partial (u8x16u * p, int n_bytes)
-{
- ASSERT (n_bytes <= 16);
-#ifdef __AVX512F__
- __m128i zero = { };
- return (u8x16) _mm_mask_loadu_epi8 (zero, (1 << n_bytes) - 1, p);
-#else
- return aes_byte_mask (CLIB_MEM_OVERFLOW_LOAD (*, p), n_bytes);
-#endif
-}
-
-static_always_inline void
-aes_store_partial (void *p, u8x16 r, int n_bytes)
-{
-#if __aarch64__
- clib_memcpy_fast (p, &r, n_bytes);
-#else
-#ifdef __AVX512F__
- _mm_mask_storeu_epi8 (p, (1 << n_bytes) - 1, (__m128i) r);
-#else
- u8x16 mask = u8x16_is_greater (u8x16_splat (n_bytes), byte_mask_scale);
- _mm_maskmoveu_si128 ((__m128i) r, (__m128i) mask, p);
-#endif
-#endif
-}
-
-
-static_always_inline u8x16
-aes_encrypt_block (u8x16 block, const u8x16 * round_keys, aes_key_size_t ks)
-{
- int rounds = AES_KEY_ROUNDS (ks);
- block ^= round_keys[0];
- for (int i = 1; i < rounds; i += 1)
- block = aes_enc_round (block, round_keys[i]);
- return aes_enc_last_round (block, round_keys[rounds]);
-}
-
-static_always_inline u8x16
-aes_inv_mix_column (u8x16 a)
-{
-#if defined (__AES__)
- return (u8x16) _mm_aesimc_si128 ((__m128i) a);
-#elif defined (__ARM_FEATURE_CRYPTO)
- return vaesimcq_u8 (a);
-#endif
-}
-
-#ifdef __x86_64__
-#define aes_keygen_assist(a, b) \
- (u8x16) _mm_aeskeygenassist_si128((__m128i) a, b)
-
-/* AES-NI based AES key expansion based on code samples from
- Intel(r) Advanced Encryption Standard (AES) New Instructions White Paper
- (323641-001) */
-
-static_always_inline void
-aes128_key_assist (u8x16 * rk, u8x16 r)
-{
- u8x16 t = rk[-1];
- t ^= u8x16_word_shift_left (t, 4);
- t ^= u8x16_word_shift_left (t, 4);
- t ^= u8x16_word_shift_left (t, 4);
- rk[0] = t ^ (u8x16) u32x4_shuffle ((u32x4) r, 3, 3, 3, 3);
-}
-
-static_always_inline void
-aes128_key_expand (u8x16 * rk, u8x16 const *k)
-{
- rk[0] = k[0];
- aes128_key_assist (rk + 1, aes_keygen_assist (rk[0], 0x01));
- aes128_key_assist (rk + 2, aes_keygen_assist (rk[1], 0x02));
- aes128_key_assist (rk + 3, aes_keygen_assist (rk[2], 0x04));
- aes128_key_assist (rk + 4, aes_keygen_assist (rk[3], 0x08));
- aes128_key_assist (rk + 5, aes_keygen_assist (rk[4], 0x10));
- aes128_key_assist (rk + 6, aes_keygen_assist (rk[5], 0x20));
- aes128_key_assist (rk + 7, aes_keygen_assist (rk[6], 0x40));
- aes128_key_assist (rk + 8, aes_keygen_assist (rk[7], 0x80));
- aes128_key_assist (rk + 9, aes_keygen_assist (rk[8], 0x1b));
- aes128_key_assist (rk + 10, aes_keygen_assist (rk[9], 0x36));
-}
-
-static_always_inline void
-aes192_key_assist (u8x16 * r1, u8x16 * r2, u8x16 key_assist)
-{
- u8x16 t;
- r1[0] ^= t = u8x16_word_shift_left (r1[0], 4);
- r1[0] ^= t = u8x16_word_shift_left (t, 4);
- r1[0] ^= u8x16_word_shift_left (t, 4);
- r1[0] ^= (u8x16) _mm_shuffle_epi32 ((__m128i) key_assist, 0x55);
- r2[0] ^= u8x16_word_shift_left (r2[0], 4);
- r2[0] ^= (u8x16) _mm_shuffle_epi32 ((__m128i) r1[0], 0xff);
-}
-
-static_always_inline void
-aes192_key_expand (u8x16 * rk, u8x16u const *k)
-{
- u8x16 r1, r2;
-
- rk[0] = r1 = k[0];
- /* *INDENT-OFF* */
- rk[1] = r2 = (u8x16) (u64x2) { *(u64 *) (k + 1), 0 };
- /* *INDENT-ON* */
-
- aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x1));
- rk[1] = (u8x16) _mm_shuffle_pd ((__m128d) rk[1], (__m128d) r1, 0);
- rk[2] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
-
- aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x2));
- rk[3] = r1;
- rk[4] = r2;
-
- aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x4));
- rk[4] = (u8x16) _mm_shuffle_pd ((__m128d) rk[4], (__m128d) r1, 0);
- rk[5] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
-
- aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x8));
- rk[6] = r1;
- rk[7] = r2;
-
- aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x10));
- rk[7] = (u8x16) _mm_shuffle_pd ((__m128d) rk[7], (__m128d) r1, 0);
- rk[8] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
-
- aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x20));
- rk[9] = r1;
- rk[10] = r2;
-
- aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x40));
- rk[10] = (u8x16) _mm_shuffle_pd ((__m128d) rk[10], (__m128d) r1, 0);
- rk[11] = (u8x16) _mm_shuffle_pd ((__m128d) r1, (__m128d) r2, 1);
-
- aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x80));
- rk[12] = r1;
-}
-
-static_always_inline void
-aes256_key_assist (u8x16 * rk, int i, u8x16 key_assist)
-{
- u8x16 r, t;
- rk += i;
- r = rk[-2];
- r ^= t = u8x16_word_shift_left (r, 4);
- r ^= t = u8x16_word_shift_left (t, 4);
- r ^= u8x16_word_shift_left (t, 4);
- r ^= (u8x16) u32x4_shuffle ((u32x4) key_assist, 3, 3, 3, 3);
- rk[0] = r;
-
- if (i >= 14)
- return;
-
- key_assist = aes_keygen_assist (rk[0], 0x0);
- r = rk[-1];
- r ^= t = u8x16_word_shift_left (r, 4);
- r ^= t = u8x16_word_shift_left (t, 4);
- r ^= u8x16_word_shift_left (t, 4);
- r ^= (u8x16) u32x4_shuffle ((u32x4) key_assist, 2, 2, 2, 2);
- rk[1] = r;
-}
-
-static_always_inline void
-aes256_key_expand (u8x16 * rk, u8x16u const *k)
-{
- rk[0] = k[0];
- rk[1] = k[1];
- aes256_key_assist (rk, 2, aes_keygen_assist (rk[1], 0x01));
- aes256_key_assist (rk, 4, aes_keygen_assist (rk[3], 0x02));
- aes256_key_assist (rk, 6, aes_keygen_assist (rk[5], 0x04));
- aes256_key_assist (rk, 8, aes_keygen_assist (rk[7], 0x08));
- aes256_key_assist (rk, 10, aes_keygen_assist (rk[9], 0x10));
- aes256_key_assist (rk, 12, aes_keygen_assist (rk[11], 0x20));
- aes256_key_assist (rk, 14, aes_keygen_assist (rk[13], 0x40));
-}
-#endif
-
-#ifdef __aarch64__
-
-static const u8x16 aese_prep_mask1 =
- { 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12 };
-static const u8x16 aese_prep_mask2 =
- { 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15, 12, 13, 14, 15 };
-
-static_always_inline void
-aes128_key_expand_round_neon (u8x16 * rk, u32 rcon)
-{
- u8x16 r, t, last_round = rk[-1], z = { };
- r = vqtbl1q_u8 (last_round, aese_prep_mask1);
- r = vaeseq_u8 (r, z);
- r ^= (u8x16) vdupq_n_u32 (rcon);
- r ^= last_round;
- r ^= t = vextq_u8 (z, last_round, 12);
- r ^= t = vextq_u8 (z, t, 12);
- r ^= vextq_u8 (z, t, 12);
- rk[0] = r;
-}
-
-static_always_inline void
-aes128_key_expand (u8x16 * rk, const u8x16 * k)
-{
- rk[0] = k[0];
- aes128_key_expand_round_neon (rk + 1, 0x01);
- aes128_key_expand_round_neon (rk + 2, 0x02);
- aes128_key_expand_round_neon (rk + 3, 0x04);
- aes128_key_expand_round_neon (rk + 4, 0x08);
- aes128_key_expand_round_neon (rk + 5, 0x10);
- aes128_key_expand_round_neon (rk + 6, 0x20);
- aes128_key_expand_round_neon (rk + 7, 0x40);
- aes128_key_expand_round_neon (rk + 8, 0x80);
- aes128_key_expand_round_neon (rk + 9, 0x1b);
- aes128_key_expand_round_neon (rk + 10, 0x36);
-}
-
-static_always_inline void
-aes192_key_expand_round_neon (u8x8 * rk, u32 rcon)
-{
- u8x8 r, last_round = rk[-1], z = { };
- u8x16 r2, z2 = { };
-
- r2 = (u8x16) vdupq_lane_u64 ((uint64x1_t) last_round, 0);
- r2 = vqtbl1q_u8 (r2, aese_prep_mask1);
- r2 = vaeseq_u8 (r2, z2);
- r2 ^= (u8x16) vdupq_n_u32 (rcon);
-
- r = (u8x8) vdup_laneq_u64 ((u64x2) r2, 0);
- r ^= rk[-3];
- r ^= vext_u8 (z, rk[-3], 4);
- rk[0] = r;
-
- r = rk[-2] ^ vext_u8 (r, z, 4);
- r ^= vext_u8 (z, r, 4);
- rk[1] = r;
-
- if (rcon == 0x80)
- return;
-
- r = rk[-1] ^ vext_u8 (r, z, 4);
- r ^= vext_u8 (z, r, 4);
- rk[2] = r;
-}
-
-static_always_inline void
-aes192_key_expand (u8x16 * ek, const u8x16u * k)
-{
- u8x8 *rk = (u8x8 *) ek;
- ek[0] = k[0];
- rk[2] = *(u8x8u *) (k + 1);
- aes192_key_expand_round_neon (rk + 3, 0x01);
- aes192_key_expand_round_neon (rk + 6, 0x02);
- aes192_key_expand_round_neon (rk + 9, 0x04);
- aes192_key_expand_round_neon (rk + 12, 0x08);
- aes192_key_expand_round_neon (rk + 15, 0x10);
- aes192_key_expand_round_neon (rk + 18, 0x20);
- aes192_key_expand_round_neon (rk + 21, 0x40);
- aes192_key_expand_round_neon (rk + 24, 0x80);
-}
-
-
-static_always_inline void
-aes256_key_expand_round_neon (u8x16 * rk, u32 rcon)
-{
- u8x16 r, t, z = { };
-
- r = vqtbl1q_u8 (rk[-1], rcon ? aese_prep_mask1 : aese_prep_mask2);
- r = vaeseq_u8 (r, z);
- if (rcon)
- r ^= (u8x16) vdupq_n_u32 (rcon);
- r ^= rk[-2];
- r ^= t = vextq_u8 (z, rk[-2], 12);
- r ^= t = vextq_u8 (z, t, 12);
- r ^= vextq_u8 (z, t, 12);
- rk[0] = r;
-}
-
-static_always_inline void
-aes256_key_expand (u8x16 * rk, u8x16 const *k)
-{
- rk[0] = k[0];
- rk[1] = k[1];
- aes256_key_expand_round_neon (rk + 2, 0x01);
- aes256_key_expand_round_neon (rk + 3, 0);
- aes256_key_expand_round_neon (rk + 4, 0x02);
- aes256_key_expand_round_neon (rk + 5, 0);
- aes256_key_expand_round_neon (rk + 6, 0x04);
- aes256_key_expand_round_neon (rk + 7, 0);
- aes256_key_expand_round_neon (rk + 8, 0x08);
- aes256_key_expand_round_neon (rk + 9, 0);
- aes256_key_expand_round_neon (rk + 10, 0x10);
- aes256_key_expand_round_neon (rk + 11, 0);
- aes256_key_expand_round_neon (rk + 12, 0x20);
- aes256_key_expand_round_neon (rk + 13, 0);
- aes256_key_expand_round_neon (rk + 14, 0x40);
-}
-
-#endif
-
-static_always_inline void
-aes_key_expand (u8x16 * key_schedule, u8 const *key, aes_key_size_t ks)
-{
- switch (ks)
- {
- case AES_KEY_128:
- aes128_key_expand (key_schedule, (u8x16u const *) key);
- break;
- case AES_KEY_192:
- aes192_key_expand (key_schedule, (u8x16u const *) key);
- break;
- case AES_KEY_256:
- aes256_key_expand (key_schedule, (u8x16u const *) key);
- break;
- }
-}
-
-static_always_inline void
-aes_key_enc_to_dec (u8x16 * ke, u8x16 * kd, aes_key_size_t ks)
-{
- int rounds = AES_KEY_ROUNDS (ks);
-
- kd[rounds] = ke[0];
- kd[0] = ke[rounds];
-
- for (int i = 1; i < (rounds / 2); i++)
- {
- kd[rounds - i] = aes_inv_mix_column (ke[i]);
- kd[i] = aes_inv_mix_column (ke[rounds - i]);
- }
-
- kd[rounds / 2] = aes_inv_mix_column (ke[rounds / 2]);
-}
-
-#endif /* __aesni_h__ */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/crypto_native/aes_cbc.c b/src/plugins/crypto_native/aes_cbc.c
index c8ec37d152d..dd7ca3f1cf1 100644
--- a/src/plugins/crypto_native/aes_cbc.c
+++ b/src/plugins/crypto_native/aes_cbc.c
@@ -19,214 +19,30 @@
#include <vnet/plugin/plugin.h>
#include <vnet/crypto/crypto.h>
#include <crypto_native/crypto_native.h>
-#include <crypto_native/aes.h>
+#include <vppinfra/crypto/aes_cbc.h>
#if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0
#pragma GCC optimize ("O3")
#endif
-typedef struct
-{
- u8x16 encrypt_key[15];
-#if __VAES__
- u8x64 decrypt_key[15];
-#else
- u8x16 decrypt_key[15];
-#endif
-} aes_cbc_key_data_t;
-
-
-static_always_inline void __clib_unused
-aes_cbc_dec (u8x16 * k, u8x16u * src, u8x16u * dst, u8x16u * iv, int count,
- int rounds)
-{
- u8x16 r[4], c[4], f;
-
- f = iv[0];
- while (count >= 64)
- {
- clib_prefetch_load (src + 8);
- clib_prefetch_load (dst + 8);
-
- c[0] = r[0] = src[0];
- c[1] = r[1] = src[1];
- c[2] = r[2] = src[2];
- c[3] = r[3] = src[3];
-
-#if __x86_64__
- r[0] ^= k[0];
- r[1] ^= k[0];
- r[2] ^= k[0];
- r[3] ^= k[0];
-
- for (int i = 1; i < rounds; i++)
- {
- r[0] = aes_dec_round (r[0], k[i]);
- r[1] = aes_dec_round (r[1], k[i]);
- r[2] = aes_dec_round (r[2], k[i]);
- r[3] = aes_dec_round (r[3], k[i]);
- }
-
- r[0] = aes_dec_last_round (r[0], k[rounds]);
- r[1] = aes_dec_last_round (r[1], k[rounds]);
- r[2] = aes_dec_last_round (r[2], k[rounds]);
- r[3] = aes_dec_last_round (r[3], k[rounds]);
-#else
- for (int i = 0; i < rounds - 1; i++)
- {
- r[0] = vaesimcq_u8 (vaesdq_u8 (r[0], k[i]));
- r[1] = vaesimcq_u8 (vaesdq_u8 (r[1], k[i]));
- r[2] = vaesimcq_u8 (vaesdq_u8 (r[2], k[i]));
- r[3] = vaesimcq_u8 (vaesdq_u8 (r[3], k[i]));
- }
- r[0] = vaesdq_u8 (r[0], k[rounds - 1]) ^ k[rounds];
- r[1] = vaesdq_u8 (r[1], k[rounds - 1]) ^ k[rounds];
- r[2] = vaesdq_u8 (r[2], k[rounds - 1]) ^ k[rounds];
- r[3] = vaesdq_u8 (r[3], k[rounds - 1]) ^ k[rounds];
-#endif
- dst[0] = r[0] ^ f;
- dst[1] = r[1] ^ c[0];
- dst[2] = r[2] ^ c[1];
- dst[3] = r[3] ^ c[2];
- f = c[3];
-
- count -= 64;
- src += 4;
- dst += 4;
- }
-
- while (count > 0)
- {
- c[0] = r[0] = src[0];
-#if __x86_64__
- r[0] ^= k[0];
- for (int i = 1; i < rounds; i++)
- r[0] = aes_dec_round (r[0], k[i]);
- r[0] = aes_dec_last_round (r[0], k[rounds]);
-#else
- c[0] = r[0] = src[0];
- for (int i = 0; i < rounds - 1; i++)
- r[0] = vaesimcq_u8 (vaesdq_u8 (r[0], k[i]));
- r[0] = vaesdq_u8 (r[0], k[rounds - 1]) ^ k[rounds];
-#endif
- dst[0] = r[0] ^ f;
- f = c[0];
-
- count -= 16;
- src += 1;
- dst += 1;
- }
-}
-
-#if __x86_64__
-#ifdef __VAES__
-
-static_always_inline u8x64
-aes_block_load_x4 (u8 * src[], int i)
-{
- u8x64 r = { };
- r = u8x64_insert_u8x16 (r, aes_block_load (src[0] + i), 0);
- r = u8x64_insert_u8x16 (r, aes_block_load (src[1] + i), 1);
- r = u8x64_insert_u8x16 (r, aes_block_load (src[2] + i), 2);
- r = u8x64_insert_u8x16 (r, aes_block_load (src[3] + i), 3);
- return r;
-}
-
-static_always_inline void
-aes_block_store_x4 (u8 * dst[], int i, u8x64 r)
-{
- aes_block_store (dst[0] + i, u8x64_extract_u8x16 (r, 0));
- aes_block_store (dst[1] + i, u8x64_extract_u8x16 (r, 1));
- aes_block_store (dst[2] + i, u8x64_extract_u8x16 (r, 2));
- aes_block_store (dst[3] + i, u8x64_extract_u8x16 (r, 3));
-}
-
-static_always_inline u8x64
-aes_cbc_dec_permute (u8x64 a, u8x64 b)
-{
- __m512i perm = { 6, 7, 8, 9, 10, 11, 12, 13 };
- return (u8x64) _mm512_permutex2var_epi64 ((__m512i) a, perm, (__m512i) b);
-}
-
-static_always_inline void
-vaes_cbc_dec (u8x64 * k, u8x64u * src, u8x64u * dst, u8x16 * iv, int count,
- aes_key_size_t rounds)
-{
- u8x64 f, r[4], c[4] = { };
- __mmask8 m;
- int i, n_blocks = count >> 4;
-
- f = (u8x64) _mm512_mask_loadu_epi64 (_mm512_setzero_si512 (), 0xc0,
- (__m512i *) (iv - 3));
-
- while (n_blocks >= 16)
- {
- c[0] = src[0];
- c[1] = src[1];
- c[2] = src[2];
- c[3] = src[3];
-
- r[0] = c[0] ^ k[0];
- r[1] = c[1] ^ k[0];
- r[2] = c[2] ^ k[0];
- r[3] = c[3] ^ k[0];
-
- for (i = 1; i < rounds; i++)
- {
- r[0] = aes_dec_round_x4 (r[0], k[i]);
- r[1] = aes_dec_round_x4 (r[1], k[i]);
- r[2] = aes_dec_round_x4 (r[2], k[i]);
- r[3] = aes_dec_round_x4 (r[3], k[i]);
- }
-
- r[0] = aes_dec_last_round_x4 (r[0], k[i]);
- r[1] = aes_dec_last_round_x4 (r[1], k[i]);
- r[2] = aes_dec_last_round_x4 (r[2], k[i]);
- r[3] = aes_dec_last_round_x4 (r[3], k[i]);
-
- dst[0] = r[0] ^= aes_cbc_dec_permute (f, c[0]);
- dst[1] = r[1] ^= aes_cbc_dec_permute (c[0], c[1]);
- dst[2] = r[2] ^= aes_cbc_dec_permute (c[1], c[2]);
- dst[4] = r[3] ^= aes_cbc_dec_permute (c[2], c[3]);
- f = c[3];
-
- n_blocks -= 16;
- src += 4;
- dst += 4;
- }
-
- while (n_blocks > 0)
- {
- m = (1 << (n_blocks * 2)) - 1;
- c[0] = (u8x64) _mm512_mask_loadu_epi64 ((__m512i) c[0], m,
- (__m512i *) src);
- f = aes_cbc_dec_permute (f, c[0]);
- r[0] = c[0] ^ k[0];
- for (i = 1; i < rounds; i++)
- r[0] = aes_dec_round_x4 (r[0], k[i]);
- r[0] = aes_dec_last_round_x4 (r[0], k[i]);
- _mm512_mask_storeu_epi64 ((__m512i *) dst, m, (__m512i) (r[0] ^ f));
- f = c[0];
- n_blocks -= 4;
- src += 1;
- dst += 1;
- }
-}
-#endif
-#endif
-
-#ifdef __VAES__
-#define N 16
-#define u32xN u32x16
-#define u32xN_min_scalar u32x16_min_scalar
+#if defined(__VAES__) && defined(__AVX512F__)
+#define u8xN u8x64
+#define u32xN u32x16
+#define u32xN_min_scalar u32x16_min_scalar
#define u32xN_is_all_zero u32x16_is_all_zero
-#define u32xN_splat u32x16_splat
+#define u32xN_splat u32x16_splat
+#elif defined(__VAES__)
+#define u8xN u8x32
+#define u32xN u32x8
+#define u32xN_min_scalar u32x8_min_scalar
+#define u32xN_is_all_zero u32x8_is_all_zero
+#define u32xN_splat u32x8_splat
#else
-#define N 4
-#define u32xN u32x4
-#define u32xN_min_scalar u32x4_min_scalar
+#define u8xN u8x16
+#define u32xN u32x4
+#define u32xN_min_scalar u32x4_min_scalar
#define u32xN_is_all_zero u32x4_is_all_zero
-#define u32xN_splat u32x4_splat
+#define u32xN_splat u32x4_splat
#endif
static_always_inline u32
@@ -234,30 +50,22 @@ aes_ops_enc_aes_cbc (vlib_main_t * vm, vnet_crypto_op_t * ops[],
u32 n_ops, aes_key_size_t ks)
{
crypto_native_main_t *cm = &crypto_native_main;
- crypto_native_per_thread_data_t *ptd =
- vec_elt_at_index (cm->per_thread_data, vm->thread_index);
int rounds = AES_KEY_ROUNDS (ks);
u8 placeholder[8192];
u32 i, j, count, n_left = n_ops;
u32xN placeholder_mask = { };
u32xN len = { };
- vnet_crypto_key_index_t key_index[N];
- u8 *src[N] = { };
- u8 *dst[N] = { };
-#if __VAES__
- u8x64 r[N / 4] = { };
- u8x64 k[15][N / 4] = { };
- u8x16 *kq, *rq = (u8x16 *) r;
-#else
- u8x16 r[N] = { };
- u8x16 k[15][N] = { };
-#endif
+ vnet_crypto_key_index_t key_index[4 * N_AES_LANES];
+ u8 *src[4 * N_AES_LANES] = {};
+ u8 *dst[4 * N_AES_LANES] = {};
+ u8xN r[4] = {};
+ u8xN k[15][4] = {};
- for (i = 0; i < N; i++)
+ for (i = 0; i < 4 * N_AES_LANES; i++)
key_index[i] = ~0;
more:
- for (i = 0; i < N; i++)
+ for (i = 0; i < 4 * N_AES_LANES; i++)
if (len[i] == 0)
{
if (n_left == 0)
@@ -269,20 +77,8 @@ more:
}
else
{
- u8x16 t;
- if (ops[0]->flags & VNET_CRYPTO_OP_FLAG_INIT_IV)
- {
- t = ptd->cbc_iv[i];
- *(u8x16u *) ops[0]->iv = t;
- ptd->cbc_iv[i] = aes_enc_round (t, t);
- }
- else
- t = aes_block_load (ops[0]->iv);
-#if __VAES__
- rq[i] = t;
-#else
- r[i] = t;
-#endif
+ u8x16 t = aes_block_load (ops[0]->iv);
+ ((u8x16 *) r)[i] = t;
src[i] = ops[0]->src;
dst[i] = ops[0]->dst;
@@ -294,14 +90,7 @@ more:
key_index[i] = ops[0]->key_index;
kd = (aes_cbc_key_data_t *) cm->key_data[key_index[i]];
for (j = 0; j < rounds + 1; j++)
- {
-#if __VAES__
- kq = (u8x16 *) k[j];
- kq[i] = kd->encrypt_key[j];
-#else
- k[j][i] = kd->encrypt_key[j];
-#endif
- }
+ ((u8x16 *) k[j])[i] = kd->encrypt_key[j];
}
ops[0]->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
n_left--;
@@ -315,11 +104,11 @@ more:
for (i = 0; i < count; i += 16)
{
-#ifdef __VAES__
+#if defined(__VAES__) && defined(__AVX512F__)
r[0] = u8x64_xor3 (r[0], aes_block_load_x4 (src, i), k[0][0]);
- r[1] = u8x64_xor3 (r[1], aes_block_load_x4 (src, i), k[0][1]);
- r[2] = u8x64_xor3 (r[2], aes_block_load_x4 (src, i), k[0][2]);
- r[3] = u8x64_xor3 (r[3], aes_block_load_x4 (src, i), k[0][3]);
+ r[1] = u8x64_xor3 (r[1], aes_block_load_x4 (src + 4, i), k[0][1]);
+ r[2] = u8x64_xor3 (r[2], aes_block_load_x4 (src + 8, i), k[0][2]);
+ r[3] = u8x64_xor3 (r[3], aes_block_load_x4 (src + 12, i), k[0][3]);
for (j = 1; j < rounds; j++)
{
@@ -337,6 +126,28 @@ more:
aes_block_store_x4 (dst + 4, i, r[1]);
aes_block_store_x4 (dst + 8, i, r[2]);
aes_block_store_x4 (dst + 12, i, r[3]);
+#elif defined(__VAES__)
+ r[0] = u8x32_xor3 (r[0], aes_block_load_x2 (src, i), k[0][0]);
+ r[1] = u8x32_xor3 (r[1], aes_block_load_x2 (src + 2, i), k[0][1]);
+ r[2] = u8x32_xor3 (r[2], aes_block_load_x2 (src + 4, i), k[0][2]);
+ r[3] = u8x32_xor3 (r[3], aes_block_load_x2 (src + 6, i), k[0][3]);
+
+ for (j = 1; j < rounds; j++)
+ {
+ r[0] = aes_enc_round_x2 (r[0], k[j][0]);
+ r[1] = aes_enc_round_x2 (r[1], k[j][1]);
+ r[2] = aes_enc_round_x2 (r[2], k[j][2]);
+ r[3] = aes_enc_round_x2 (r[3], k[j][3]);
+ }
+ r[0] = aes_enc_last_round_x2 (r[0], k[j][0]);
+ r[1] = aes_enc_last_round_x2 (r[1], k[j][1]);
+ r[2] = aes_enc_last_round_x2 (r[2], k[j][2]);
+ r[3] = aes_enc_last_round_x2 (r[3], k[j][3]);
+
+ aes_block_store_x2 (dst, i, r[0]);
+ aes_block_store_x2 (dst + 2, i, r[1]);
+ aes_block_store_x2 (dst + 4, i, r[2]);
+ aes_block_store_x2 (dst + 6, i, r[3]);
#else
#if __x86_64__
r[0] = u8x16_xor3 (r[0], aes_block_load (src[0] + i), k[0][0]);
@@ -346,16 +157,16 @@ more:
for (j = 1; j < rounds; j++)
{
- r[0] = aes_enc_round (r[0], k[j][0]);
- r[1] = aes_enc_round (r[1], k[j][1]);
- r[2] = aes_enc_round (r[2], k[j][2]);
- r[3] = aes_enc_round (r[3], k[j][3]);
+ r[0] = aes_enc_round_x1 (r[0], k[j][0]);
+ r[1] = aes_enc_round_x1 (r[1], k[j][1]);
+ r[2] = aes_enc_round_x1 (r[2], k[j][2]);
+ r[3] = aes_enc_round_x1 (r[3], k[j][3]);
}
- r[0] = aes_enc_last_round (r[0], k[j][0]);
- r[1] = aes_enc_last_round (r[1], k[j][1]);
- r[2] = aes_enc_last_round (r[2], k[j][2]);
- r[3] = aes_enc_last_round (r[3], k[j][3]);
+ r[0] = aes_enc_last_round_x1 (r[0], k[j][0]);
+ r[1] = aes_enc_last_round_x1 (r[1], k[j][1]);
+ r[2] = aes_enc_last_round_x1 (r[2], k[j][2]);
+ r[3] = aes_enc_last_round_x1 (r[3], k[j][3]);
aes_block_store (dst[0] + i, r[0]);
aes_block_store (dst[1] + i, r[1]);
@@ -387,7 +198,7 @@ more:
len -= u32xN_splat (count);
- for (i = 0; i < N; i++)
+ for (i = 0; i < 4 * N_AES_LANES; i++)
{
src[i] += count;
dst[i] += count;
@@ -416,8 +227,11 @@ aes_ops_dec_aes_cbc (vlib_main_t * vm, vnet_crypto_op_t * ops[],
ASSERT (n_ops >= 1);
decrypt:
-#ifdef __VAES__
- vaes_cbc_dec (kd->decrypt_key, (u8x64u *) op->src, (u8x64u *) op->dst,
+#if defined(__VAES__) && defined(__AVX512F__)
+ aes4_cbc_dec (kd->decrypt_key, (u8x64u *) op->src, (u8x64u *) op->dst,
+ (u8x16u *) op->iv, op->len, rounds);
+#elif defined(__VAES__)
+ aes2_cbc_dec (kd->decrypt_key, (u8x32u *) op->src, (u8x32u *) op->dst,
(u8x16u *) op->iv, op->len, rounds);
#else
aes_cbc_dec (kd->decrypt_key, (u8x16u *) op->src, (u8x16u *) op->dst,
@@ -435,99 +249,91 @@ decrypt:
return n_ops;
}
-static_always_inline void *
-aes_cbc_key_exp (vnet_crypto_key_t * key, aes_key_size_t ks)
+static int
+aes_cbc_cpu_probe ()
+{
+#if defined(__VAES__) && defined(__AVX512F__)
+ if (clib_cpu_supports_vaes () && clib_cpu_supports_avx512f ())
+ return 50;
+#elif defined(__VAES__)
+ if (clib_cpu_supports_vaes ())
+ return 40;
+#elif defined(__AVX512F__)
+ if (clib_cpu_supports_avx512f ())
+ return 30;
+#elif defined(__AVX2__)
+ if (clib_cpu_supports_avx2 ())
+ return 20;
+#elif __AES__
+ if (clib_cpu_supports_aes ())
+ return 10;
+#elif __aarch64__
+ if (clib_cpu_supports_aarch64_aes ())
+ return 10;
+#endif
+ return -1;
+}
+
+static void *
+aes_cbc_key_exp_128 (vnet_crypto_key_t *key)
{
- u8x16 e[15], d[15];
aes_cbc_key_data_t *kd;
kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);
- aes_key_expand (e, key->data, ks);
- aes_key_enc_to_dec (e, d, ks);
- for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
- {
-#if __VAES__
- kd->decrypt_key[i] = (u8x64) _mm512_broadcast_i64x2 ((__m128i) d[i]);
-#else
- kd->decrypt_key[i] = d[i];
-#endif
- kd->encrypt_key[i] = e[i];
- }
+ clib_aes128_cbc_key_expand (kd, key->data);
return kd;
}
-#define foreach_aes_cbc_handler_type _(128) _(192) _(256)
-
-#define _(x) \
-static u32 aes_ops_dec_aes_cbc_##x \
-(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
-{ return aes_ops_dec_aes_cbc (vm, ops, n_ops, AES_KEY_##x); } \
-static u32 aes_ops_enc_aes_cbc_##x \
-(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
-{ return aes_ops_enc_aes_cbc (vm, ops, n_ops, AES_KEY_##x); } \
-static void * aes_cbc_key_exp_##x (vnet_crypto_key_t *key) \
-{ return aes_cbc_key_exp (key, AES_KEY_##x); }
-
-foreach_aes_cbc_handler_type;
-#undef _
-
-#include <fcntl.h>
+static void *
+aes_cbc_key_exp_192 (vnet_crypto_key_t *key)
+{
+ aes_cbc_key_data_t *kd;
+ kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);
+ clib_aes192_cbc_key_expand (kd, key->data);
+ return kd;
+}
-clib_error_t *
-#ifdef __VAES__
-crypto_native_aes_cbc_init_icl (vlib_main_t * vm)
-#elif __AVX512F__
-crypto_native_aes_cbc_init_skx (vlib_main_t * vm)
-#elif __aarch64__
-crypto_native_aes_cbc_init_neon (vlib_main_t * vm)
-#elif __AVX2__
-crypto_native_aes_cbc_init_hsw (vlib_main_t * vm)
-#else
-crypto_native_aes_cbc_init_slm (vlib_main_t * vm)
-#endif
+static void *
+aes_cbc_key_exp_256 (vnet_crypto_key_t *key)
{
- crypto_native_main_t *cm = &crypto_native_main;
- crypto_native_per_thread_data_t *ptd;
- clib_error_t *err = 0;
- int fd;
+ aes_cbc_key_data_t *kd;
+ kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);
+ clib_aes256_cbc_key_expand (kd, key->data);
+ return kd;
+}
- if ((fd = open ("/dev/urandom", O_RDONLY)) < 0)
- return clib_error_return_unix (0, "failed to open '/dev/urandom'");
+#define foreach_aes_cbc_handler_type _ (128) _ (192) _ (256)
+
+#define _(x) \
+ static u32 aes_ops_enc_aes_cbc_##x (vlib_main_t *vm, \
+ vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return aes_ops_enc_aes_cbc (vm, ops, n_ops, AES_KEY_##x); \
+ } \
+ \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##x##_cbc_enc) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##x##_CBC_ENC, \
+ .fn = aes_ops_enc_aes_cbc_##x, \
+ .probe = aes_cbc_cpu_probe, \
+ }; \
+ \
+ static u32 aes_ops_dec_aes_cbc_##x (vlib_main_t *vm, \
+ vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return aes_ops_dec_aes_cbc (vm, ops, n_ops, AES_KEY_##x); \
+ } \
+ \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##x##_cbc_dec) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##x##_CBC_DEC, \
+ .fn = aes_ops_dec_aes_cbc_##x, \
+ .probe = aes_cbc_cpu_probe, \
+ }; \
+ \
+ CRYPTO_NATIVE_KEY_HANDLER (aes_##x##_cbc) = { \
+ .alg_id = VNET_CRYPTO_ALG_AES_##x##_CBC, \
+ .key_fn = aes_cbc_key_exp_##x, \
+ .probe = aes_cbc_cpu_probe, \
+ };
- /* *INDENT-OFF* */
- vec_foreach (ptd, cm->per_thread_data)
- {
- for (int i = 0; i < 4; i++)
- {
- if (read(fd, ptd->cbc_iv, sizeof (ptd->cbc_iv)) !=
- sizeof (ptd->cbc_iv))
- {
- err = clib_error_return_unix (0, "'/dev/urandom' read failure");
- goto error;
- }
- }
- }
- /* *INDENT-ON* */
-
-#define _(x) \
- vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_AES_##x##_CBC_ENC, \
- aes_ops_enc_aes_cbc_##x); \
- vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_AES_##x##_CBC_DEC, \
- aes_ops_dec_aes_cbc_##x); \
- cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_CBC] = aes_cbc_key_exp_##x;
- foreach_aes_cbc_handler_type;
+foreach_aes_cbc_handler_type;
#undef _
-error:
- close (fd);
- return err;
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/crypto_native/aes_ctr.c b/src/plugins/crypto_native/aes_ctr.c
new file mode 100644
index 00000000000..d02a7b69b9d
--- /dev/null
+++ b/src/plugins/crypto_native/aes_ctr.c
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/crypto/crypto.h>
+#include <crypto_native/crypto_native.h>
+#include <vppinfra/crypto/aes_ctr.h>
+
+#if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0
+#pragma GCC optimize("O3")
+#endif
+
+static_always_inline u32
+aes_ops_aes_ctr (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops,
+ vnet_crypto_op_chunk_t *chunks, aes_key_size_t ks,
+ int maybe_chained)
+{
+ crypto_native_main_t *cm = &crypto_native_main;
+ vnet_crypto_op_t *op = ops[0];
+ aes_ctr_key_data_t *kd;
+ aes_ctr_ctx_t ctx;
+ u32 n_left = n_ops;
+
+next:
+ kd = (aes_ctr_key_data_t *) cm->key_data[op->key_index];
+
+ clib_aes_ctr_init (&ctx, kd, op->iv, ks);
+ if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
+ {
+ vnet_crypto_op_chunk_t *chp = chunks + op->chunk_index;
+ for (int j = 0; j < op->n_chunks; j++, chp++)
+ clib_aes_ctr_transform (&ctx, chp->src, chp->dst, chp->len, ks);
+ }
+ else
+ clib_aes_ctr_transform (&ctx, op->src, op->dst, op->len, ks);
+
+ op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
+
+ if (--n_left)
+ {
+ op += 1;
+ goto next;
+ }
+
+ return n_ops;
+}
+
+static_always_inline void *
+aes_ctr_key_exp (vnet_crypto_key_t *key, aes_key_size_t ks)
+{
+ aes_ctr_key_data_t *kd;
+
+ kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);
+
+ clib_aes_ctr_key_expand (kd, key->data, ks);
+
+ return kd;
+}
+
+#define foreach_aes_ctr_handler_type _ (128) _ (192) _ (256)
+
+#define _(x) \
+ static u32 aes_ops_aes_ctr_##x (vlib_main_t *vm, vnet_crypto_op_t *ops[], \
+ u32 n_ops) \
+ { \
+ return aes_ops_aes_ctr (vm, ops, n_ops, 0, AES_KEY_##x, 0); \
+ } \
+ static u32 aes_ops_aes_ctr_##x##_chained ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \
+ u32 n_ops) \
+ { \
+ return aes_ops_aes_ctr (vm, ops, n_ops, chunks, AES_KEY_##x, 1); \
+ } \
+ static void *aes_ctr_key_exp_##x (vnet_crypto_key_t *key) \
+ { \
+ return aes_ctr_key_exp (key, AES_KEY_##x); \
+ }
+
+foreach_aes_ctr_handler_type;
+#undef _
+
+static int
+probe ()
+{
+#if defined(__VAES__) && defined(__AVX512F__)
+ if (clib_cpu_supports_vaes () && clib_cpu_supports_avx512f ())
+ return 50;
+#elif defined(__VAES__)
+ if (clib_cpu_supports_vaes ())
+ return 40;
+#elif defined(__AVX512F__)
+ if (clib_cpu_supports_avx512f ())
+ return 30;
+#elif defined(__AVX2__)
+ if (clib_cpu_supports_avx2 ())
+ return 20;
+#elif __AES__
+ if (clib_cpu_supports_aes ())
+ return 10;
+#elif __aarch64__
+ if (clib_cpu_supports_aarch64_aes ())
+ return 10;
+#endif
+ return -1;
+}
+
+#define _(b) \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##b##_ctr_enc) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##b##_CTR_ENC, \
+ .fn = aes_ops_aes_ctr_##b, \
+ .cfn = aes_ops_aes_ctr_##b##_chained, \
+ .probe = probe, \
+ }; \
+ \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##b##_ctr_dec) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##b##_CTR_DEC, \
+ .fn = aes_ops_aes_ctr_##b, \
+ .cfn = aes_ops_aes_ctr_##b##_chained, \
+ .probe = probe, \
+ }; \
+ CRYPTO_NATIVE_KEY_HANDLER (aes_##b##_ctr) = { \
+ .alg_id = VNET_CRYPTO_ALG_AES_##b##_CTR, \
+ .key_fn = aes_ctr_key_exp_##b, \
+ .probe = probe, \
+ };
+
+_ (128) _ (192) _ (256)
+#undef _
diff --git a/src/plugins/crypto_native/aes_gcm.c b/src/plugins/crypto_native/aes_gcm.c
index e0c1e6c12c3..220788d4e97 100644
--- a/src/plugins/crypto_native/aes_gcm.c
+++ b/src/plugins/crypto_native/aes_gcm.c
@@ -19,1100 +19,26 @@
#include <vnet/plugin/plugin.h>
#include <vnet/crypto/crypto.h>
#include <crypto_native/crypto_native.h>
-#include <crypto_native/aes.h>
-#include <crypto_native/ghash.h>
+#include <vppinfra/crypto/aes_gcm.h>
-#if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0
-#pragma GCC optimize ("O3")
+#if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0
+#pragma GCC optimize("O3")
#endif
-#ifdef __VAES__
-#define NUM_HI 32
-#else
-#define NUM_HI 8
-#endif
-
-typedef struct
-{
- /* pre-calculated hash key values */
- const u8x16 Hi[NUM_HI];
- /* extracted AES key */
- const u8x16 Ke[15];
-#ifdef __VAES__
- const u8x64 Ke4[15];
-#endif
-} aes_gcm_key_data_t;
-
-typedef struct
-{
- u32 counter;
- union
- {
- u32x4 Y;
- u32x16 Y4;
- };
-} aes_gcm_counter_t;
-
-typedef enum
-{
- AES_GCM_F_WITH_GHASH = (1 << 0),
- AES_GCM_F_LAST_ROUND = (1 << 1),
- AES_GCM_F_ENCRYPT = (1 << 2),
- AES_GCM_F_DECRYPT = (1 << 3),
-} aes_gcm_flags_t;
-
-static const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 };
-
-#ifndef __VAES__
-static_always_inline void
-aes_gcm_enc_first_round (u8x16 * r, aes_gcm_counter_t * ctr, u8x16 k,
- int n_blocks)
-{
- if (PREDICT_TRUE ((u8) ctr->counter < (256 - 2 * n_blocks)))
- {
- for (int i = 0; i < n_blocks; i++)
- {
- r[i] = k ^ (u8x16) ctr->Y;
- ctr->Y += ctr_inv_1;
- }
- ctr->counter += n_blocks;
- }
- else
- {
- for (int i = 0; i < n_blocks; i++)
- {
- r[i] = k ^ (u8x16) ctr->Y;
- ctr->counter++;
- ctr->Y[3] = clib_host_to_net_u32 (ctr->counter + 1);
- }
- }
-}
-
-static_always_inline void
-aes_gcm_enc_round (u8x16 * r, u8x16 k, int n_blocks)
-{
- for (int i = 0; i < n_blocks; i++)
- r[i] = aes_enc_round (r[i], k);
-}
-
-static_always_inline void
-aes_gcm_enc_last_round (u8x16 * r, u8x16 * d, u8x16 const *k,
- int rounds, int n_blocks)
-{
-
- /* additional ronuds for AES-192 and AES-256 */
- for (int i = 10; i < rounds; i++)
- aes_gcm_enc_round (r, k[i], n_blocks);
-
- for (int i = 0; i < n_blocks; i++)
- d[i] ^= aes_enc_last_round (r[i], k[rounds]);
-}
-#endif
-
-static_always_inline u8x16
-aes_gcm_ghash_blocks (u8x16 T, aes_gcm_key_data_t * kd,
- u8x16u * in, int n_blocks)
-{
- ghash_data_t _gd, *gd = &_gd;
- u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - n_blocks;
- ghash_mul_first (gd, u8x16_reflect (in[0]) ^ T, Hi[0]);
- for (int i = 1; i < n_blocks; i++)
- ghash_mul_next (gd, u8x16_reflect ((in[i])), Hi[i]);
- ghash_reduce (gd);
- ghash_reduce2 (gd);
- return ghash_final (gd);
-}
-
-static_always_inline u8x16
-aes_gcm_ghash (u8x16 T, aes_gcm_key_data_t * kd, u8x16u * in, u32 n_left)
-{
-
- while (n_left >= 128)
- {
- T = aes_gcm_ghash_blocks (T, kd, in, 8);
- n_left -= 128;
- in += 8;
- }
-
- if (n_left >= 64)
- {
- T = aes_gcm_ghash_blocks (T, kd, in, 4);
- n_left -= 64;
- in += 4;
- }
-
- if (n_left >= 32)
- {
- T = aes_gcm_ghash_blocks (T, kd, in, 2);
- n_left -= 32;
- in += 2;
- }
-
- if (n_left >= 16)
- {
- T = aes_gcm_ghash_blocks (T, kd, in, 1);
- n_left -= 16;
- in += 1;
- }
-
- if (n_left)
- {
- u8x16 r = aes_load_partial (in, n_left);
- T = ghash_mul (u8x16_reflect (r) ^ T, kd->Hi[NUM_HI - 1]);
- }
- return T;
-}
-
-#ifndef __VAES__
-static_always_inline u8x16
-aes_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
- aes_gcm_counter_t * ctr, u8x16u * inv, u8x16u * outv,
- int rounds, int n, int last_block_bytes, aes_gcm_flags_t f)
-{
- u8x16 r[n];
- ghash_data_t _gd = { }, *gd = &_gd;
- const u8x16 *rk = (u8x16 *) kd->Ke;
- int ghash_blocks = (f & AES_GCM_F_ENCRYPT) ? 4 : n, gc = 1;
- u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - ghash_blocks;
-
- clib_prefetch_load (inv + 4);
-
- /* AES rounds 0 and 1 */
- aes_gcm_enc_first_round (r, ctr, rk[0], n);
- aes_gcm_enc_round (r, rk[1], n);
-
- /* load data - decrypt round */
- if (f & AES_GCM_F_DECRYPT)
- {
- for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
- d[i] = inv[i];
-
- if (f & AES_GCM_F_LAST_ROUND)
- d[n - 1] = aes_load_partial (inv + n - 1, last_block_bytes);
- }
-
- /* GHASH multiply block 1 */
- if (f & AES_GCM_F_WITH_GHASH)
- ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
-
- /* AES rounds 2 and 3 */
- aes_gcm_enc_round (r, rk[2], n);
- aes_gcm_enc_round (r, rk[3], n);
-
- /* GHASH multiply block 2 */
- if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
- ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
-
- /* AES rounds 4 and 5 */
- aes_gcm_enc_round (r, rk[4], n);
- aes_gcm_enc_round (r, rk[5], n);
-
- /* GHASH multiply block 3 */
- if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
- ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
-
- /* AES rounds 6 and 7 */
- aes_gcm_enc_round (r, rk[6], n);
- aes_gcm_enc_round (r, rk[7], n);
-
- /* GHASH multiply block 4 */
- if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
- ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
-
- /* AES rounds 8 and 9 */
- aes_gcm_enc_round (r, rk[8], n);
- aes_gcm_enc_round (r, rk[9], n);
-
- /* GHASH reduce 1st step */
- if (f & AES_GCM_F_WITH_GHASH)
- ghash_reduce (gd);
-
- /* load data - encrypt round */
- if (f & AES_GCM_F_ENCRYPT)
- {
- for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
- d[i] = inv[i];
-
- if (f & AES_GCM_F_LAST_ROUND)
- d[n - 1] = aes_load_partial (inv + n - 1, last_block_bytes);
- }
-
- /* GHASH reduce 2nd step */
- if (f & AES_GCM_F_WITH_GHASH)
- ghash_reduce2 (gd);
-
- /* AES last round(s) */
- aes_gcm_enc_last_round (r, d, rk, rounds, n);
-
- /* store data */
- for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
- outv[i] = d[i];
-
- if (f & AES_GCM_F_LAST_ROUND)
- aes_store_partial (outv + n - 1, d[n - 1], last_block_bytes);
-
- /* GHASH final step */
- if (f & AES_GCM_F_WITH_GHASH)
- T = ghash_final (gd);
-
- return T;
-}
-
-static_always_inline u8x16
-aes_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
- aes_gcm_counter_t * ctr, u8x16u * inv, u8x16u * outv,
- int rounds, aes_gcm_flags_t f)
-{
- u8x16 r[4];
- ghash_data_t _gd, *gd = &_gd;
- const u8x16 *rk = (u8x16 *) kd->Ke;
- u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - 8;
-
- /* AES rounds 0 and 1 */
- aes_gcm_enc_first_round (r, ctr, rk[0], 4);
- aes_gcm_enc_round (r, rk[1], 4);
-
- /* load 4 blocks of data - decrypt round */
- if (f & AES_GCM_F_DECRYPT)
- {
- d[0] = inv[0];
- d[1] = inv[1];
- d[2] = inv[2];
- d[3] = inv[3];
- }
-
- /* GHASH multiply block 0 */
- ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
-
- /* AES rounds 2 and 3 */
- aes_gcm_enc_round (r, rk[2], 4);
- aes_gcm_enc_round (r, rk[3], 4);
-
- /* GHASH multiply block 1 */
- ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
-
- /* AES rounds 4 and 5 */
- aes_gcm_enc_round (r, rk[4], 4);
- aes_gcm_enc_round (r, rk[5], 4);
-
- /* GHASH multiply block 2 */
- ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
-
- /* AES rounds 6 and 7 */
- aes_gcm_enc_round (r, rk[6], 4);
- aes_gcm_enc_round (r, rk[7], 4);
-
- /* GHASH multiply block 3 */
- ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
-
- /* AES rounds 8 and 9 */
- aes_gcm_enc_round (r, rk[8], 4);
- aes_gcm_enc_round (r, rk[9], 4);
-
- /* load 4 blocks of data - encrypt round */
- if (f & AES_GCM_F_ENCRYPT)
- {
- d[0] = inv[0];
- d[1] = inv[1];
- d[2] = inv[2];
- d[3] = inv[3];
- }
-
- /* AES last round(s) */
- aes_gcm_enc_last_round (r, d, rk, rounds, 4);
-
- /* store 4 blocks of data */
- outv[0] = d[0];
- outv[1] = d[1];
- outv[2] = d[2];
- outv[3] = d[3];
-
- /* load next 4 blocks of data data - decrypt round */
- if (f & AES_GCM_F_DECRYPT)
- {
- d[0] = inv[4];
- d[1] = inv[5];
- d[2] = inv[6];
- d[3] = inv[7];
- }
-
- /* GHASH multiply block 4 */
- ghash_mul_next (gd, u8x16_reflect (d[0]), Hi[4]);
-
- /* AES rounds 0, 1 and 2 */
- aes_gcm_enc_first_round (r, ctr, rk[0], 4);
- aes_gcm_enc_round (r, rk[1], 4);
- aes_gcm_enc_round (r, rk[2], 4);
-
- /* GHASH multiply block 5 */
- ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[5]);
-
- /* AES rounds 3 and 4 */
- aes_gcm_enc_round (r, rk[3], 4);
- aes_gcm_enc_round (r, rk[4], 4);
-
- /* GHASH multiply block 6 */
- ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[6]);
-
- /* AES rounds 5 and 6 */
- aes_gcm_enc_round (r, rk[5], 4);
- aes_gcm_enc_round (r, rk[6], 4);
-
- /* GHASH multiply block 7 */
- ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[7]);
-
- /* AES rounds 7 and 8 */
- aes_gcm_enc_round (r, rk[7], 4);
- aes_gcm_enc_round (r, rk[8], 4);
-
- /* GHASH reduce 1st step */
- ghash_reduce (gd);
-
- /* AES round 9 */
- aes_gcm_enc_round (r, rk[9], 4);
-
- /* load data - encrypt round */
- if (f & AES_GCM_F_ENCRYPT)
- {
- d[0] = inv[4];
- d[1] = inv[5];
- d[2] = inv[6];
- d[3] = inv[7];
- }
-
- /* GHASH reduce 2nd step */
- ghash_reduce2 (gd);
-
- /* AES last round(s) */
- aes_gcm_enc_last_round (r, d, rk, rounds, 4);
-
- /* store data */
- outv[4] = d[0];
- outv[5] = d[1];
- outv[6] = d[2];
- outv[7] = d[3];
-
- /* GHASH final step */
- return ghash_final (gd);
-}
-
-static_always_inline u8x16
-aes_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
- int n_blocks, int n_bytes)
-{
- ghash_data_t _gd, *gd = &_gd;
- u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - n_blocks;
-
- if (n_bytes)
- d[n_blocks - 1] = aes_byte_mask (d[n_blocks - 1], n_bytes);
-
- ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
- if (n_blocks > 1)
- ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
- if (n_blocks > 2)
- ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
- if (n_blocks > 3)
- ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
- ghash_reduce (gd);
- ghash_reduce2 (gd);
- return ghash_final (gd);
-}
-#endif
-
-#ifdef __VAES__
-static const u32x16 ctr_inv_1234 = {
- 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24,
-};
-
-static const u32x16 ctr_inv_4444 = {
- 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24
-};
-
-static const u32x16 ctr_1234 = {
- 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0,
-};
-
-static_always_inline void
-aes4_gcm_enc_first_round (u8x64 * r, aes_gcm_counter_t * ctr, u8x64 k, int n)
-{
- u8 last_byte = (u8) ctr->counter;
- int i = 0;
-
- /* As counter is stored in network byte order for performance reasons we
- are incrementing least significant byte only except in case where we
- overlow. As we are processing four 512-blocks in parallel except the
- last round, overflow can happen only when n == 4 */
-
- if (n == 4)
- for (; i < 2; i++)
- {
- r[i] = k ^ (u8x64) ctr->Y4;
- ctr->Y4 += ctr_inv_4444;
- }
-
- if (n == 4 && PREDICT_TRUE (last_byte == 241))
- {
- u32x16 Yc, Yr = (u32x16) u8x64_reflect_u8x16 ((u8x64) ctr->Y4);
-
- for (; i < n; i++)
- {
- r[i] = k ^ (u8x64) ctr->Y4;
- Yc = u32x16_splat (ctr->counter + 4 * (i + 1)) + ctr_1234;
- Yr = (u32x16) u32x16_mask_blend (Yr, Yc, 0x1111);
- ctr->Y4 = (u32x16) u8x64_reflect_u8x16 ((u8x64) Yr);
- }
- }
- else
- {
- for (; i < n; i++)
- {
- r[i] = k ^ (u8x64) ctr->Y4;
- ctr->Y4 += ctr_inv_4444;
- }
- }
- ctr->counter += n * 4;
-}
-
-static_always_inline void
-aes4_gcm_enc_round (u8x64 * r, u8x64 k, int n_blocks)
-{
- for (int i = 0; i < n_blocks; i++)
- r[i] = aes_enc_round_x4 (r[i], k);
-}
-
-static_always_inline void
-aes4_gcm_enc_last_round (u8x64 * r, u8x64 * d, u8x64 const *k,
- int rounds, int n_blocks)
-{
-
- /* additional ronuds for AES-192 and AES-256 */
- for (int i = 10; i < rounds; i++)
- aes4_gcm_enc_round (r, k[i], n_blocks);
-
- for (int i = 0; i < n_blocks; i++)
- d[i] ^= aes_enc_last_round_x4 (r[i], k[rounds]);
-}
-
-static_always_inline u8x16
-aes4_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d,
- aes_gcm_counter_t * ctr, u8x16u * in, u8x16u * out,
- int rounds, int n, int last_4block_bytes, aes_gcm_flags_t f)
-{
- ghash4_data_t _gd, *gd = &_gd;
- const u8x64 *rk = (u8x64 *) kd->Ke4;
- int i, ghash_blocks, gc = 1;
- u8x64u *Hi4, *inv = (u8x64u *) in, *outv = (u8x64u *) out;
- u8x64 r[4];
- u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);
-
- if (f & AES_GCM_F_ENCRYPT)
- {
- /* during encryption we either hash four 512-bit blocks from previous
- round or we don't hash at all */
- ghash_blocks = 4;
- Hi4 = (u8x64u *) (kd->Hi + NUM_HI - ghash_blocks * 4);
- }
- else
- {
- /* during deccryption we hash 1..4 512-bit blocks from current round */
- ghash_blocks = n;
- int n_128bit_blocks = n * 4;
- /* if this is last round of decryption, we may have less than 4
- 128-bit blocks in the last 512-bit data block, so we need to adjust
- Hi4 pointer accordingly */
- if (f & AES_GCM_F_LAST_ROUND)
- n_128bit_blocks += ((last_4block_bytes + 15) >> 4) - 4;
- Hi4 = (u8x64u *) (kd->Hi + NUM_HI - n_128bit_blocks);
- }
-
- /* AES rounds 0 and 1 */
- aes4_gcm_enc_first_round (r, ctr, rk[0], n);
- aes4_gcm_enc_round (r, rk[1], n);
-
- /* load 4 blocks of data - decrypt round */
- if (f & AES_GCM_F_DECRYPT)
- {
- for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
- d[i] = inv[i];
-
- if (f & AES_GCM_F_LAST_ROUND)
- d[i] = u8x64_mask_load (u8x64_splat (0), inv + i, byte_mask);
- }
-
- /* GHASH multiply block 0 */
- if (f & AES_GCM_F_WITH_GHASH)
- ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^
- u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]);
-
- /* AES rounds 2 and 3 */
- aes4_gcm_enc_round (r, rk[2], n);
- aes4_gcm_enc_round (r, rk[3], n);
-
- /* GHASH multiply block 1 */
- if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]);
-
- /* AES rounds 4 and 5 */
- aes4_gcm_enc_round (r, rk[4], n);
- aes4_gcm_enc_round (r, rk[5], n);
-
- /* GHASH multiply block 2 */
- if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]);
-
- /* AES rounds 6 and 7 */
- aes4_gcm_enc_round (r, rk[6], n);
- aes4_gcm_enc_round (r, rk[7], n);
-
- /* GHASH multiply block 3 */
- if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]);
-
- /* load 4 blocks of data - decrypt round */
- if (f & AES_GCM_F_ENCRYPT)
- {
- for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
- d[i] = inv[i];
-
- if (f & AES_GCM_F_LAST_ROUND)
- d[i] = u8x64_mask_load (u8x64_splat (0), inv + i, byte_mask);
- }
-
- /* AES rounds 8 and 9 */
- aes4_gcm_enc_round (r, rk[8], n);
- aes4_gcm_enc_round (r, rk[9], n);
-
- /* AES last round(s) */
- aes4_gcm_enc_last_round (r, d, rk, rounds, n);
-
- /* store 4 blocks of data */
- for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
- outv[i] = d[i];
-
- if (f & AES_GCM_F_LAST_ROUND)
- u8x64_mask_store (d[i], outv + i, byte_mask);
-
- /* GHASH reduce 1st step */
- ghash4_reduce (gd);
-
- /* GHASH reduce 2nd step */
- ghash4_reduce2 (gd);
-
- /* GHASH final step */
- return ghash4_final (gd);
-}
-
-static_always_inline u8x16
-aes4_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d,
- aes_gcm_counter_t * ctr, u8x16u * in, u8x16u * out,
- int rounds, aes_gcm_flags_t f)
-{
- u8x64 r[4];
- ghash4_data_t _gd, *gd = &_gd;
- const u8x64 *rk = (u8x64 *) kd->Ke4;
- u8x64 *Hi4 = (u8x64 *) (kd->Hi + NUM_HI - 32);
- u8x64u *inv = (u8x64u *) in, *outv = (u8x64u *) out;
-
- /* AES rounds 0 and 1 */
- aes4_gcm_enc_first_round (r, ctr, rk[0], 4);
- aes4_gcm_enc_round (r, rk[1], 4);
-
- /* load 4 blocks of data - decrypt round */
- if (f & AES_GCM_F_DECRYPT)
- for (int i = 0; i < 4; i++)
- d[i] = inv[i];
-
- /* GHASH multiply block 0 */
- ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^
- u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]);
-
- /* AES rounds 2 and 3 */
- aes4_gcm_enc_round (r, rk[2], 4);
- aes4_gcm_enc_round (r, rk[3], 4);
-
- /* GHASH multiply block 1 */
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]);
-
- /* AES rounds 4 and 5 */
- aes4_gcm_enc_round (r, rk[4], 4);
- aes4_gcm_enc_round (r, rk[5], 4);
-
- /* GHASH multiply block 2 */
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]);
-
- /* AES rounds 6 and 7 */
- aes4_gcm_enc_round (r, rk[6], 4);
- aes4_gcm_enc_round (r, rk[7], 4);
-
- /* GHASH multiply block 3 */
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]);
-
- /* AES rounds 8 and 9 */
- aes4_gcm_enc_round (r, rk[8], 4);
- aes4_gcm_enc_round (r, rk[9], 4);
-
- /* load 4 blocks of data - encrypt round */
- if (f & AES_GCM_F_ENCRYPT)
- for (int i = 0; i < 4; i++)
- d[i] = inv[i];
-
- /* AES last round(s) */
- aes4_gcm_enc_last_round (r, d, rk, rounds, 4);
-
- /* store 4 blocks of data */
- for (int i = 0; i < 4; i++)
- outv[i] = d[i];
-
- /* load 4 blocks of data - decrypt round */
- if (f & AES_GCM_F_DECRYPT)
- for (int i = 0; i < 4; i++)
- d[i] = inv[i + 4];
-
- /* GHASH multiply block 3 */
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[0]), Hi4[4]);
-
- /* AES rounds 0 and 1 */
- aes4_gcm_enc_first_round (r, ctr, rk[0], 4);
- aes4_gcm_enc_round (r, rk[1], 4);
-
- /* GHASH multiply block 5 */
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[5]);
-
- /* AES rounds 2 and 3 */
- aes4_gcm_enc_round (r, rk[2], 4);
- aes4_gcm_enc_round (r, rk[3], 4);
-
- /* GHASH multiply block 6 */
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[6]);
-
- /* AES rounds 4 and 5 */
- aes4_gcm_enc_round (r, rk[4], 4);
- aes4_gcm_enc_round (r, rk[5], 4);
-
- /* GHASH multiply block 7 */
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[7]);
-
- /* AES rounds 6 and 7 */
- aes4_gcm_enc_round (r, rk[6], 4);
- aes4_gcm_enc_round (r, rk[7], 4);
-
- /* GHASH reduce 1st step */
- ghash4_reduce (gd);
-
- /* AES rounds 8 and 9 */
- aes4_gcm_enc_round (r, rk[8], 4);
- aes4_gcm_enc_round (r, rk[9], 4);
-
- /* GHASH reduce 2nd step */
- ghash4_reduce2 (gd);
-
- /* load 4 blocks of data - encrypt round */
- if (f & AES_GCM_F_ENCRYPT)
- for (int i = 0; i < 4; i++)
- d[i] = inv[i + 4];
-
- /* AES last round(s) */
- aes4_gcm_enc_last_round (r, d, rk, rounds, 4);
-
- /* store 4 blocks of data */
- for (int i = 0; i < 4; i++)
- outv[i + 4] = d[i];
-
- /* GHASH final step */
- return ghash4_final (gd);
-}
-
-static_always_inline u8x16
-aes4_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d,
- int n, int last_4block_bytes)
-{
- ghash4_data_t _gd, *gd = &_gd;
- u8x64u *Hi4;
- int n_128bit_blocks;
- u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);
- n_128bit_blocks = (n - 1) * 4 + ((last_4block_bytes + 15) >> 4);
- Hi4 = (u8x64u *) (kd->Hi + NUM_HI - n_128bit_blocks);
-
- d[n - 1] = u8x64_mask_blend (u8x64_splat (0), d[n - 1], byte_mask);
- ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^
- u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]);
- if (n > 1)
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]);
- if (n > 2)
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]);
- if (n > 3)
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]);
- ghash4_reduce (gd);
- ghash4_reduce2 (gd);
- return ghash4_final (gd);
-}
-#endif
-
-static_always_inline u8x16
-aes_gcm_enc (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr,
- u8x16u * inv, u8x16u * outv, u32 n_left, int rounds)
-{
- aes_gcm_flags_t f = AES_GCM_F_ENCRYPT;
-
- if (n_left == 0)
- return T;
-
-#if __VAES__
- u8x64 d4[4];
- if (n_left < 256)
- {
- f |= AES_GCM_F_LAST_ROUND;
- if (n_left > 192)
- {
- n_left -= 192;
- aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 4, n_left);
- }
- else if (n_left > 128)
- {
- n_left -= 128;
- aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 3, n_left);
- }
- else if (n_left > 64)
- {
- n_left -= 64;
- aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 2, n_left);
- }
- else
- {
- aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 1, n_left);
- }
- }
-
- aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
-
- /* next */
- n_left -= 256;
- outv += 16;
- inv += 16;
-
- f |= AES_GCM_F_WITH_GHASH;
-
- while (n_left >= 512)
- {
- T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f);
-
- /* next */
- n_left -= 512;
- outv += 32;
- inv += 32;
- }
-
- while (n_left >= 256)
- {
- T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
-
- /* next */
- n_left -= 256;
- outv += 16;
- inv += 16;
- }
-
- if (n_left == 0)
- return aes4_gcm_ghash_last (T, kd, d4, 4, 64);
-
- f |= AES_GCM_F_LAST_ROUND;
-
- if (n_left > 192)
- {
- n_left -= 192;
- T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 4, n_left);
- }
-
- if (n_left > 128)
- {
- n_left -= 128;
- T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 3, n_left);
- }
-
- if (n_left > 64)
- {
- n_left -= 64;
- T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 2, n_left);
- }
-
- T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 1, n_left);
-#else
- u8x16 d[4];
- if (n_left < 64)
- {
- f |= AES_GCM_F_LAST_ROUND;
- if (n_left > 48)
- {
- n_left -= 48;
- aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 4, n_left);
- }
- else if (n_left > 32)
- {
- n_left -= 32;
- aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 3, n_left);
- }
- else if (n_left > 16)
- {
- n_left -= 16;
- aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 2, n_left);
- }
- else
- {
- aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 1, n_left);
- }
- }
-
- aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
-
- /* next */
- n_left -= 64;
- outv += 4;
- inv += 4;
-
- f |= AES_GCM_F_WITH_GHASH;
-
- while (n_left >= 128)
- {
- T = aes_gcm_calc_double (T, kd, d, ctr, inv, outv, rounds, f);
-
- /* next */
- n_left -= 128;
- outv += 8;
- inv += 8;
- }
-
- if (n_left >= 64)
- {
- T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
-
- /* next */
- n_left -= 64;
- outv += 4;
- inv += 4;
- }
-
- if (n_left == 0)
- return aes_gcm_ghash_last (T, kd, d, 4, 0);
-
- f |= AES_GCM_F_LAST_ROUND;
-
- if (n_left > 48)
- {
- n_left -= 48;
- T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 4, n_left);
- }
-
- if (n_left > 32)
- {
- n_left -= 32;
- T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 3, n_left);
- }
-
- if (n_left > 16)
- {
- n_left -= 16;
- T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 2, n_left);
- }
-
- T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 1, n_left);
-#endif
-}
-
-static_always_inline u8x16
-aes_gcm_dec (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr,
- u8x16u * inv, u8x16u * outv, u32 n_left, int rounds)
-{
- aes_gcm_flags_t f = AES_GCM_F_WITH_GHASH | AES_GCM_F_DECRYPT;
-#ifdef __VAES__
- u8x64 d4[4] = { };
-
- while (n_left >= 512)
- {
- T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f);
-
- /* next */
- n_left -= 512;
- outv += 32;
- inv += 32;
- }
-
- while (n_left >= 256)
- {
- T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
-
- /* next */
- n_left -= 256;
- outv += 16;
- inv += 16;
- }
-
- if (n_left == 0)
- return T;
-
- f |= AES_GCM_F_LAST_ROUND;
-
- if (n_left > 192)
- return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4,
- n_left - 192, f);
- if (n_left > 128)
- return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3,
- n_left - 128, f);
- if (n_left > 64)
- return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2,
- n_left - 64, f);
- return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
-#else
- u8x16 d[4];
- while (n_left >= 128)
- {
- T = aes_gcm_calc_double (T, kd, d, ctr, inv, outv, rounds, f);
-
- /* next */
- n_left -= 128;
- outv += 8;
- inv += 8;
- }
-
- if (n_left >= 64)
- {
- T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
-
- /* next */
- n_left -= 64;
- outv += 4;
- inv += 4;
- }
-
- if (n_left == 0)
- return T;
-
- f |= AES_GCM_F_LAST_ROUND;
-
- if (n_left > 48)
- return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left - 48, f);
-
- if (n_left > 32)
- return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left - 32, f);
-
- if (n_left > 16)
- return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left - 16, f);
-
- return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
-#endif
-}
-
-static_always_inline int
-aes_gcm (u8x16u * in, u8x16u * out, u8x16u * addt, u8x16u * iv, u8x16u * tag,
- u32 data_bytes, u32 aad_bytes, u8 tag_len, aes_gcm_key_data_t * kd,
- int aes_rounds, int is_encrypt)
-{
- int i;
- u8x16 r, T = { };
- u32x4 Y0;
- ghash_data_t _gd, *gd = &_gd;
- aes_gcm_counter_t _ctr, *ctr = &_ctr;
-
- clib_prefetch_load (iv);
- clib_prefetch_load (in);
- clib_prefetch_load (in + 4);
-
- /* calculate ghash for AAD - optimized for ipsec common cases */
- if (aad_bytes == 8)
- T = aes_gcm_ghash (T, kd, addt, 8);
- else if (aad_bytes == 12)
- T = aes_gcm_ghash (T, kd, addt, 12);
- else
- T = aes_gcm_ghash (T, kd, addt, aad_bytes);
-
- /* initalize counter */
- ctr->counter = 1;
- Y0 = (u32x4) aes_load_partial (iv, 12) + ctr_inv_1;
-#ifdef __VAES__
- ctr->Y4 = u32x16_splat_u32x4 (Y0) + ctr_inv_1234;
-#else
- ctr->Y = Y0 + ctr_inv_1;
-#endif
-
- /* ghash and encrypt/edcrypt */
- if (is_encrypt)
- T = aes_gcm_enc (T, kd, ctr, in, out, data_bytes, aes_rounds);
- else
- T = aes_gcm_dec (T, kd, ctr, in, out, data_bytes, aes_rounds);
-
- clib_prefetch_load (tag);
-
- /* Finalize ghash - data bytes and aad bytes converted to bits */
- /* *INDENT-OFF* */
- r = (u8x16) ((u64x2) {data_bytes, aad_bytes} << 3);
- /* *INDENT-ON* */
-
- /* interleaved computation of final ghash and E(Y0, k) */
- ghash_mul_first (gd, r ^ T, kd->Hi[NUM_HI - 1]);
- r = kd->Ke[0] ^ (u8x16) Y0;
- for (i = 1; i < 5; i += 1)
- r = aes_enc_round (r, kd->Ke[i]);
- ghash_reduce (gd);
- ghash_reduce2 (gd);
- for (; i < 9; i += 1)
- r = aes_enc_round (r, kd->Ke[i]);
- T = ghash_final (gd);
- for (; i < aes_rounds; i += 1)
- r = aes_enc_round (r, kd->Ke[i]);
- r = aes_enc_last_round (r, kd->Ke[aes_rounds]);
- T = u8x16_reflect (T) ^ r;
-
- /* tag_len 16 -> 0 */
- tag_len &= 0xf;
-
- if (is_encrypt)
- {
- /* store tag */
- if (tag_len)
- aes_store_partial (tag, T, tag_len);
- else
- tag[0] = T;
- }
- else
- {
- /* check tag */
- u16 tag_mask = tag_len ? (1 << tag_len) - 1 : 0xffff;
- if ((u8x16_msb_mask (tag[0] == T) & tag_mask) != tag_mask)
- return 0;
- }
- return 1;
-}
-
static_always_inline u32
-aes_ops_enc_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[],
- u32 n_ops, aes_key_size_t ks)
+aes_ops_enc_aes_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops,
+ aes_key_size_t ks)
{
crypto_native_main_t *cm = &crypto_native_main;
vnet_crypto_op_t *op = ops[0];
aes_gcm_key_data_t *kd;
u32 n_left = n_ops;
-
next:
kd = (aes_gcm_key_data_t *) cm->key_data[op->key_index];
- aes_gcm ((u8x16u *) op->src, (u8x16u *) op->dst, (u8x16u *) op->aad,
- (u8x16u *) op->iv, (u8x16u *) op->tag, op->len, op->aad_len,
- op->tag_len, kd, AES_KEY_ROUNDS (ks), /* is_encrypt */ 1);
+ aes_gcm (op->src, op->dst, op->aad, (u8 *) op->iv, op->tag, op->len,
+ op->aad_len, op->tag_len, kd, AES_KEY_ROUNDS (ks),
+ AES_GCM_OP_ENCRYPT);
op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
if (--n_left)
@@ -1125,7 +51,7 @@ next:
}
static_always_inline u32
-aes_ops_dec_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops,
+aes_ops_dec_aes_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops,
aes_key_size_t ks)
{
crypto_native_main_t *cm = &crypto_native_main;
@@ -1136,10 +62,9 @@ aes_ops_dec_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops,
next:
kd = (aes_gcm_key_data_t *) cm->key_data[op->key_index];
- rv = aes_gcm ((u8x16u *) op->src, (u8x16u *) op->dst, (u8x16u *) op->aad,
- (u8x16u *) op->iv, (u8x16u *) op->tag, op->len,
+ rv = aes_gcm (op->src, op->dst, op->aad, (u8 *) op->iv, op->tag, op->len,
op->aad_len, op->tag_len, kd, AES_KEY_ROUNDS (ks),
- /* is_encrypt */ 0);
+ AES_GCM_OP_DECRYPT);
if (rv)
{
@@ -1161,75 +86,81 @@ next:
}
static_always_inline void *
-aes_gcm_key_exp (vnet_crypto_key_t * key, aes_key_size_t ks)
+aes_gcm_key_exp (vnet_crypto_key_t *key, aes_key_size_t ks)
{
aes_gcm_key_data_t *kd;
- u8x16 H;
kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);
- /* expand AES key */
- aes_key_expand ((u8x16 *) kd->Ke, key->data, ks);
+ clib_aes_gcm_key_expand (kd, key->data, ks);
- /* pre-calculate H */
- H = aes_encrypt_block (u8x16_splat (0), kd->Ke, ks);
- H = u8x16_reflect (H);
- ghash_precompute (H, (u8x16 *) kd->Hi, NUM_HI);
-#ifdef __VAES__
- u8x64 *Ke4 = (u8x64 *) kd->Ke4;
- for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
- Ke4[i] = u8x64_splat_u8x16 (kd->Ke[i]);
-#endif
return kd;
}
-#define foreach_aes_gcm_handler_type _(128) _(192) _(256)
-
-#define _(x) \
-static u32 aes_ops_dec_aes_gcm_##x \
-(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
-{ return aes_ops_dec_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \
-static u32 aes_ops_enc_aes_gcm_##x \
-(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
-{ return aes_ops_enc_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \
-static void * aes_gcm_key_exp_##x (vnet_crypto_key_t *key) \
-{ return aes_gcm_key_exp (key, AES_KEY_##x); }
+#define foreach_aes_gcm_handler_type _ (128) _ (192) _ (256)
+
+#define _(x) \
+ static u32 aes_ops_dec_aes_gcm_##x (vlib_main_t *vm, \
+ vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return aes_ops_dec_aes_gcm (vm, ops, n_ops, AES_KEY_##x); \
+ } \
+ static u32 aes_ops_enc_aes_gcm_##x (vlib_main_t *vm, \
+ vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return aes_ops_enc_aes_gcm (vm, ops, n_ops, AES_KEY_##x); \
+ } \
+ static void *aes_gcm_key_exp_##x (vnet_crypto_key_t *key) \
+ { \
+ return aes_gcm_key_exp (key, AES_KEY_##x); \
+ }
foreach_aes_gcm_handler_type;
#undef _
-clib_error_t *
-#ifdef __VAES__
-crypto_native_aes_gcm_init_icl (vlib_main_t * vm)
-#elif __AVX512F__
-crypto_native_aes_gcm_init_skx (vlib_main_t * vm)
-#elif __AVX2__
-crypto_native_aes_gcm_init_hsw (vlib_main_t * vm)
+static int
+probe ()
+{
+#if defined(__VAES__) && defined(__AVX512F__)
+ if (clib_cpu_supports_vpclmulqdq () && clib_cpu_supports_vaes () &&
+ clib_cpu_supports_avx512f ())
+ return 50;
+#elif defined(__VAES__)
+ if (clib_cpu_supports_vpclmulqdq () && clib_cpu_supports_vaes ())
+ return 40;
+#elif defined(__AVX512F__)
+ if (clib_cpu_supports_pclmulqdq () && clib_cpu_supports_avx512f ())
+ return 30;
+#elif defined(__AVX2__)
+ if (clib_cpu_supports_pclmulqdq () && clib_cpu_supports_avx2 ())
+ return 20;
+#elif __AES__
+ if (clib_cpu_supports_pclmulqdq () && clib_cpu_supports_aes ())
+ return 10;
#elif __aarch64__
-crypto_native_aes_gcm_init_neon (vlib_main_t * vm)
-#else
-crypto_native_aes_gcm_init_slm (vlib_main_t * vm)
+ if (clib_cpu_supports_aarch64_aes ())
+ return 10;
#endif
-{
- crypto_native_main_t *cm = &crypto_native_main;
+ return -1;
+}
+
+#define _(b) \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##b##_gcm_enc) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##b##_GCM_ENC, \
+ .fn = aes_ops_enc_aes_gcm_##b, \
+ .probe = probe, \
+ }; \
+ \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##b##_gcm_dec) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##b##_GCM_DEC, \
+ .fn = aes_ops_dec_aes_gcm_##b, \
+ .probe = probe, \
+ }; \
+ CRYPTO_NATIVE_KEY_HANDLER (aes_##b##_gcm) = { \
+ .alg_id = VNET_CRYPTO_ALG_AES_##b##_GCM, \
+ .key_fn = aes_gcm_key_exp_##b, \
+ .probe = probe, \
+ };
-#define _(x) \
- vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_AES_##x##_GCM_ENC, \
- aes_ops_enc_aes_gcm_##x); \
- vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_AES_##x##_GCM_DEC, \
- aes_ops_dec_aes_gcm_##x); \
- cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_GCM] = aes_gcm_key_exp_##x;
- foreach_aes_gcm_handler_type;
+_ (128) _ (192) _ (256)
#undef _
- return 0;
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/crypto_native/crypto_native.h b/src/plugins/crypto_native/crypto_native.h
index d5c33daa1a6..3d18e8cabd0 100644
--- a/src/plugins/crypto_native/crypto_native.h
+++ b/src/plugins/crypto_native/crypto_native.h
@@ -19,38 +19,66 @@
#define __crypto_native_h__
typedef void *(crypto_native_key_fn_t) (vnet_crypto_key_t * key);
+typedef int (crypto_native_variant_probe_t) ();
-typedef struct
+typedef struct crypto_native_op_handler
+{
+ struct crypto_native_op_handler *next;
+ vnet_crypto_op_id_t op_id;
+ vnet_crypto_ops_handler_t *fn;
+ vnet_crypto_chained_ops_handler_t *cfn;
+ crypto_native_variant_probe_t *probe;
+ int priority;
+} crypto_native_op_handler_t;
+
+typedef struct crypto_native_key_handler
{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u8x16 cbc_iv[16];
-} crypto_native_per_thread_data_t;
+ struct crypto_native_key_handler *next;
+ vnet_crypto_alg_t alg_id;
+ crypto_native_key_fn_t *key_fn;
+ crypto_native_variant_probe_t *probe;
+ int priority;
+} crypto_native_key_handler_t;
typedef struct
{
u32 crypto_engine_index;
- crypto_native_per_thread_data_t *per_thread_data;
crypto_native_key_fn_t *key_fn[VNET_CRYPTO_N_ALGS];
void **key_data;
+ crypto_native_op_handler_t *op_handlers;
+ crypto_native_key_handler_t *key_handlers;
} crypto_native_main_t;
extern crypto_native_main_t crypto_native_main;
-#define foreach_crypto_native_march_variant _(slm) _(hsw) _(skx) _(icl) _(neon)
-
-#define _(v) \
-clib_error_t __clib_weak *crypto_native_aes_cbc_init_##v (vlib_main_t * vm); \
-clib_error_t __clib_weak *crypto_native_aes_gcm_init_##v (vlib_main_t * vm); \
-
-foreach_crypto_native_march_variant;
-#undef _
+#define CRYPTO_NATIVE_OP_HANDLER(x) \
+ static crypto_native_op_handler_t __crypto_native_op_handler_##x; \
+ static void __clib_constructor __crypto_native_op_handler_cb_##x (void) \
+ { \
+ crypto_native_main_t *cm = &crypto_native_main; \
+ int priority = __crypto_native_op_handler_##x.probe (); \
+ if (priority >= 0) \
+ { \
+ __crypto_native_op_handler_##x.priority = priority; \
+ __crypto_native_op_handler_##x.next = cm->op_handlers; \
+ cm->op_handlers = &__crypto_native_op_handler_##x; \
+ } \
+ } \
+ static crypto_native_op_handler_t __crypto_native_op_handler_##x
+#define CRYPTO_NATIVE_KEY_HANDLER(x) \
+ static crypto_native_key_handler_t __crypto_native_key_handler_##x; \
+ static void __clib_constructor __crypto_native_key_handler_cb_##x (void) \
+ { \
+ crypto_native_main_t *cm = &crypto_native_main; \
+ int priority = __crypto_native_key_handler_##x.probe (); \
+ if (priority >= 0) \
+ { \
+ __crypto_native_key_handler_##x.priority = priority; \
+ __crypto_native_key_handler_##x.next = cm->key_handlers; \
+ cm->key_handlers = &__crypto_native_key_handler_##x; \
+ } \
+ } \
+ static crypto_native_key_handler_t __crypto_native_key_handler_##x
#endif /* __crypto_native_h__ */
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/crypto_native/ghash.h b/src/plugins/crypto_native/ghash.h
deleted file mode 100644
index f389d11cfe7..00000000000
--- a/src/plugins/crypto_native/ghash.h
+++ /dev/null
@@ -1,419 +0,0 @@
-/*
- *------------------------------------------------------------------
- * Copyright (c) 2019 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-/*
- *------------------------------------------------------------------
- * Copyright(c) 2018, Intel Corporation All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES * LOSS OF USE,
- * DATA, OR PROFITS * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *------------------------------------------------------------------
- */
-
-/*
- * Based on work by: Shay Gueron, Michael E. Kounavis, Erdinc Ozturk,
- * Vinodh Gopal, James Guilford, Tomasz Kantecki
- *
- * References:
- * [1] Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation on
- * Intel Architecture Processors. August, 2010
- * [2] Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode on
- * Intel Architecture Processors. October, 2012.
- * [3] intel-ipsec-mb library, https://github.com/01org/intel-ipsec-mb.git
- *
- * Definitions:
- * GF Galois Extension Field GF(2^128) - finite field where elements are
- * represented as polynomials with coefficients in GF(2) with the
- * highest degree of 127. Polynomials are represented as 128-bit binary
- * numbers where each bit represents one coefficient.
- * e.g. polynomial x^5 + x^3 + x + 1 is represented in binary 101011.
- * H hash key (128 bit)
- * POLY irreducible polynomial x^127 + x^7 + x^2 + x + 1
- * RPOLY irreducible polynomial x^128 + x^127 + x^126 + x^121 + 1
- * + addition in GF, which equals to XOR operation
- * * multiplication in GF
- *
- * GF multiplication consists of 2 steps:
- * - carry-less multiplication of two 128-bit operands into 256-bit result
- * - reduction of 256-bit result into 128-bit with modulo POLY
- *
- * GHash is calculated on 128-bit blocks of data according to the following
- * formula:
- * GH = (GH + data) * hash_key
- *
- * To avoid bit-reflection of data, this code uses GF multipication
- * with reversed polynomial:
- * a * b * x^-127 mod RPOLY
- *
- * To improve computation speed table Hi is precomputed with powers of H',
- * where H' is calculated as H<<1 mod RPOLY.
- * This allows us to improve performance by deferring reduction. For example
- * to caclulate ghash of 4 128-bit blocks of data (b0, b1, b2, b3), we can do:
- *
- * __i128 Hi[4];
- * ghash_precompute (H, Hi, 4);
- *
- * ghash_data_t _gd, *gd = &_gd;
- * ghash_mul_first (gd, GH ^ b0, Hi[3]);
- * ghash_mul_next (gd, b1, Hi[2]);
- * ghash_mul_next (gd, b2, Hi[1]);
- * ghash_mul_next (gd, b3, Hi[0]);
- * ghash_reduce (gd);
- * ghash_reduce2 (gd);
- * GH = ghash_final (gd);
- *
- * Reduction step is split into 3 functions so it can be better interleaved
- * with other code, (i.e. with AES computation).
- */
-
-#ifndef __ghash_h__
-#define __ghash_h__
-
-static_always_inline u8x16
-gmul_lo_lo (u8x16 a, u8x16 b)
-{
-#if defined (__PCLMUL__)
- return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x00);
-#elif defined (__ARM_FEATURE_CRYPTO)
- return (u8x16) vmull_p64 ((poly64_t) vget_low_p64 ((poly64x2_t) a),
- (poly64_t) vget_low_p64 ((poly64x2_t) b));
-#endif
-}
-
-static_always_inline u8x16
-gmul_hi_lo (u8x16 a, u8x16 b)
-{
-#if defined (__PCLMUL__)
- return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x01);
-#elif defined (__ARM_FEATURE_CRYPTO)
- return (u8x16) vmull_p64 ((poly64_t) vget_high_p64 ((poly64x2_t) a),
- (poly64_t) vget_low_p64 ((poly64x2_t) b));
-#endif
-}
-
-static_always_inline u8x16
-gmul_lo_hi (u8x16 a, u8x16 b)
-{
-#if defined (__PCLMUL__)
- return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x10);
-#elif defined (__ARM_FEATURE_CRYPTO)
- return (u8x16) vmull_p64 ((poly64_t) vget_low_p64 ((poly64x2_t) a),
- (poly64_t) vget_high_p64 ((poly64x2_t) b));
-#endif
-}
-
-static_always_inline u8x16
-gmul_hi_hi (u8x16 a, u8x16 b)
-{
-#if defined (__PCLMUL__)
- return (u8x16) _mm_clmulepi64_si128 ((__m128i) a, (__m128i) b, 0x11);
-#elif defined (__ARM_FEATURE_CRYPTO)
- return (u8x16) vmull_high_p64 ((poly64x2_t) a, (poly64x2_t) b);
-#endif
-}
-
-typedef struct
-{
- u8x16 mid, hi, lo, tmp_lo, tmp_hi;
- int pending;
-} ghash_data_t;
-
-static const u8x16 ghash_poly = {
- 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2
-};
-
-static const u8x16 ghash_poly2 = {
- 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2
-};
-
-static_always_inline void
-ghash_mul_first (ghash_data_t * gd, u8x16 a, u8x16 b)
-{
- /* a1 * b1 */
- gd->hi = gmul_hi_hi (a, b);
- /* a0 * b0 */
- gd->lo = gmul_lo_lo (a, b);
- /* a0 * b1 ^ a1 * b0 */
- gd->mid = (gmul_hi_lo (a, b) ^ gmul_lo_hi (a, b));
-
- /* set gd->pending to 0 so next invocation of ghash_mul_next(...) knows that
- there is no pending data in tmp_lo and tmp_hi */
- gd->pending = 0;
-}
-
-static_always_inline void
-ghash_mul_next (ghash_data_t * gd, u8x16 a, u8x16 b)
-{
- /* a1 * b1 */
- u8x16 hi = gmul_hi_hi (a, b);
- /* a0 * b0 */
- u8x16 lo = gmul_lo_lo (a, b);
-
- /* this branch will be optimized out by the compiler, and it allows us to
- reduce number of XOR operations by using ternary logic */
- if (gd->pending)
- {
- /* there is peding data from previous invocation so we can XOR */
- gd->hi = u8x16_xor3 (gd->hi, gd->tmp_hi, hi);
- gd->lo = u8x16_xor3 (gd->lo, gd->tmp_lo, lo);
- gd->pending = 0;
- }
- else
- {
- /* there is no peding data from previous invocation so we postpone XOR */
- gd->tmp_hi = hi;
- gd->tmp_lo = lo;
- gd->pending = 1;
- }
-
- /* gd->mid ^= a0 * b1 ^ a1 * b0 */
- gd->mid = u8x16_xor3 (gd->mid, gmul_hi_lo (a, b), gmul_lo_hi (a, b));
-}
-
-static_always_inline void
-ghash_reduce (ghash_data_t * gd)
-{
- u8x16 r;
-
- /* Final combination:
- gd->lo ^= gd->mid << 64
- gd->hi ^= gd->mid >> 64 */
- u8x16 midl = u8x16_word_shift_left (gd->mid, 8);
- u8x16 midr = u8x16_word_shift_right (gd->mid, 8);
-
- if (gd->pending)
- {
- gd->lo = u8x16_xor3 (gd->lo, gd->tmp_lo, midl);
- gd->hi = u8x16_xor3 (gd->hi, gd->tmp_hi, midr);
- }
- else
- {
- gd->lo ^= midl;
- gd->hi ^= midr;
- }
- r = gmul_hi_lo (ghash_poly2, gd->lo);
- gd->lo ^= u8x16_word_shift_left (r, 8);
-}
-
-static_always_inline void
-ghash_reduce2 (ghash_data_t * gd)
-{
- gd->tmp_lo = gmul_lo_lo (ghash_poly2, gd->lo);
- gd->tmp_hi = gmul_lo_hi (ghash_poly2, gd->lo);
-}
-
-static_always_inline u8x16
-ghash_final (ghash_data_t * gd)
-{
- return u8x16_xor3 (gd->hi, u8x16_word_shift_right (gd->tmp_lo, 4),
- u8x16_word_shift_left (gd->tmp_hi, 4));
-}
-
-static_always_inline u8x16
-ghash_mul (u8x16 a, u8x16 b)
-{
- ghash_data_t _gd, *gd = &_gd;
- ghash_mul_first (gd, a, b);
- ghash_reduce (gd);
- ghash_reduce2 (gd);
- return ghash_final (gd);
-}
-
-#ifdef __VPCLMULQDQ__
-
-static const u8x64 ghash4_poly2 = {
- 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
- 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
- 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
- 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
-};
-
-typedef struct
-{
- u8x64 hi, lo, mid, tmp_lo, tmp_hi;
- int pending;
-} ghash4_data_t;
-
-static_always_inline u8x64
-gmul4_lo_lo (u8x64 a, u8x64 b)
-{
- return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x00);
-}
-
-static_always_inline u8x64
-gmul4_hi_lo (u8x64 a, u8x64 b)
-{
- return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x01);
-}
-
-static_always_inline u8x64
-gmul4_lo_hi (u8x64 a, u8x64 b)
-{
- return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x10);
-}
-
-static_always_inline u8x64
-gmul4_hi_hi (u8x64 a, u8x64 b)
-{
- return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x11);
-}
-
-
-static_always_inline void
-ghash4_mul_first (ghash4_data_t * gd, u8x64 a, u8x64 b)
-{
- gd->hi = gmul4_hi_hi (a, b);
- gd->lo = gmul4_lo_lo (a, b);
- gd->mid = (gmul4_hi_lo (a, b) ^ gmul4_lo_hi (a, b));
- gd->pending = 0;
-}
-
-static_always_inline void
-ghash4_mul_next (ghash4_data_t * gd, u8x64 a, u8x64 b)
-{
- u8x64 hi = gmul4_hi_hi (a, b);
- u8x64 lo = gmul4_lo_lo (a, b);
-
- if (gd->pending)
- {
- /* there is peding data from previous invocation so we can XOR */
- gd->hi = u8x64_xor3 (gd->hi, gd->tmp_hi, hi);
- gd->lo = u8x64_xor3 (gd->lo, gd->tmp_lo, lo);
- gd->pending = 0;
- }
- else
- {
- /* there is no peding data from previous invocation so we postpone XOR */
- gd->tmp_hi = hi;
- gd->tmp_lo = lo;
- gd->pending = 1;
- }
- gd->mid = u8x64_xor3 (gd->mid, gmul4_hi_lo (a, b), gmul4_lo_hi (a, b));
-}
-
-static_always_inline void
-ghash4_reduce (ghash4_data_t * gd)
-{
- u8x64 r;
-
- /* Final combination:
- gd->lo ^= gd->mid << 64
- gd->hi ^= gd->mid >> 64 */
-
- u8x64 midl = u8x64_word_shift_left (gd->mid, 8);
- u8x64 midr = u8x64_word_shift_right (gd->mid, 8);
-
- if (gd->pending)
- {
- gd->lo = u8x64_xor3 (gd->lo, gd->tmp_lo, midl);
- gd->hi = u8x64_xor3 (gd->hi, gd->tmp_hi, midr);
- }
- else
- {
- gd->lo ^= midl;
- gd->hi ^= midr;
- }
-
- r = gmul4_hi_lo (ghash4_poly2, gd->lo);
- gd->lo ^= u8x64_word_shift_left (r, 8);
-
-}
-
-static_always_inline void
-ghash4_reduce2 (ghash4_data_t * gd)
-{
- gd->tmp_lo = gmul4_lo_lo (ghash4_poly2, gd->lo);
- gd->tmp_hi = gmul4_lo_hi (ghash4_poly2, gd->lo);
-}
-
-static_always_inline u8x16
-ghash4_final (ghash4_data_t * gd)
-{
- u8x64 r;
- u8x32 t;
-
- r = u8x64_xor3 (gd->hi, u8x64_word_shift_right (gd->tmp_lo, 4),
- u8x64_word_shift_left (gd->tmp_hi, 4));
-
- /* horizontal XOR of 4 128-bit lanes */
- t = u8x64_extract_lo (r) ^ u8x64_extract_hi (r);
- return u8x32_extract_hi (t) ^ u8x32_extract_lo (t);
-}
-#endif
-
-static_always_inline void
-ghash_precompute (u8x16 H, u8x16 * Hi, int n)
-{
- u8x16 r8;
- u32x4 r32;
- /* calcullate H<<1 mod poly from the hash key */
- r8 = (u8x16) ((u64x2) H >> 63);
- H = (u8x16) ((u64x2) H << 1);
- H |= u8x16_word_shift_left (r8, 8);
- r32 = (u32x4) u8x16_word_shift_right (r8, 8);
-#ifdef __SSE2__
- r32 = u32x4_shuffle (r32, 0, 1, 2, 0);
-#else
- r32[3] = r32[0];
-#endif
- /* *INDENT-OFF* */
- r32 = r32 == (u32x4) {1, 0, 0, 1};
- /* *INDENT-ON* */
- Hi[n - 1] = H = H ^ ((u8x16) r32 & ghash_poly);
-
- /* calculate H^(i + 1) */
- for (int i = n - 2; i >= 0; i--)
- Hi[i] = ghash_mul (H, Hi[i + 1]);
-}
-
-#endif /* __ghash_h__ */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/crypto_native/main.c b/src/plugins/crypto_native/main.c
index 32bbbb13652..2bc0d98f196 100644
--- a/src/plugins/crypto_native/main.c
+++ b/src/plugins/crypto_native/main.c
@@ -63,100 +63,66 @@ clib_error_t *
crypto_native_init (vlib_main_t * vm)
{
crypto_native_main_t *cm = &crypto_native_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- clib_error_t *error = 0;
- if (clib_cpu_supports_x86_aes () == 0 &&
- clib_cpu_supports_aarch64_aes () == 0)
+ if (cm->op_handlers == 0)
return 0;
- vec_validate_aligned (cm->per_thread_data, tm->n_vlib_mains - 1,
- CLIB_CACHE_LINE_BYTES);
-
cm->crypto_engine_index =
vnet_crypto_register_engine (vm, "native", 100,
"Native ISA Optimized Crypto");
- if (0);
-#if __x86_64__
- else if (crypto_native_aes_cbc_init_icl && clib_cpu_supports_vaes ())
- error = crypto_native_aes_cbc_init_icl (vm);
- else if (crypto_native_aes_cbc_init_skx && clib_cpu_supports_avx512f ())
- error = crypto_native_aes_cbc_init_skx (vm);
- else if (crypto_native_aes_cbc_init_hsw && clib_cpu_supports_avx2 ())
- error = crypto_native_aes_cbc_init_hsw (vm);
- else if (crypto_native_aes_cbc_init_slm)
- error = crypto_native_aes_cbc_init_slm (vm);
-#endif
-#if __aarch64__
- else if (crypto_native_aes_cbc_init_neon)
- error = crypto_native_aes_cbc_init_neon (vm);
-#endif
- else
- error = clib_error_return (0, "No AES CBC implemenation available");
-
- if (error)
- goto error;
-
-#if __x86_64__
- if (clib_cpu_supports_pclmulqdq ())
+ crypto_native_op_handler_t *oh = cm->op_handlers;
+ crypto_native_key_handler_t *kh = cm->key_handlers;
+ crypto_native_op_handler_t **best_by_op_id = 0;
+ crypto_native_key_handler_t **best_by_alg_id = 0;
+
+ while (oh)
{
- if (crypto_native_aes_gcm_init_icl && clib_cpu_supports_vaes ())
- error = crypto_native_aes_gcm_init_icl (vm);
- else if (crypto_native_aes_gcm_init_skx && clib_cpu_supports_avx512f ())
- error = crypto_native_aes_gcm_init_skx (vm);
- else if (crypto_native_aes_gcm_init_hsw && clib_cpu_supports_avx2 ())
- error = crypto_native_aes_gcm_init_hsw (vm);
- else if (crypto_native_aes_gcm_init_slm)
- error = crypto_native_aes_gcm_init_slm (vm);
- else
- error = clib_error_return (0, "No AES GCM implemenation available");
-
- if (error)
- goto error;
+ vec_validate (best_by_op_id, oh->op_id);
+
+ if (best_by_op_id[oh->op_id] == 0 ||
+ best_by_op_id[oh->op_id]->priority < oh->priority)
+ best_by_op_id[oh->op_id] = oh;
+
+ oh = oh->next;
}
-#endif
-#if __aarch64__
- if (crypto_native_aes_gcm_init_neon)
- error = crypto_native_aes_gcm_init_neon (vm);
- else
- error = clib_error_return (0, "No AES GCM implemenation available");
- if (error)
- goto error;
-#endif
+ while (kh)
+ {
+ vec_validate (best_by_alg_id, kh->alg_id);
- vnet_crypto_register_key_handler (vm, cm->crypto_engine_index,
- crypto_native_key_handler);
+ if (best_by_alg_id[kh->alg_id] == 0 ||
+ best_by_alg_id[kh->alg_id]->priority < kh->priority)
+ best_by_alg_id[kh->alg_id] = kh;
+
+ kh = kh->next;
+ }
+
+ vec_foreach_pointer (oh, best_by_op_id)
+ if (oh)
+ vnet_crypto_register_ops_handlers (vm, cm->crypto_engine_index,
+ oh->op_id, oh->fn, oh->cfn);
+ vec_foreach_pointer (kh, best_by_alg_id)
+ if (kh)
+ cm->key_fn[kh->alg_id] = kh->key_fn;
-error:
- if (error)
- vec_free (cm->per_thread_data);
+ vec_free (best_by_op_id);
+ vec_free (best_by_alg_id);
- return error;
+ vnet_crypto_register_key_handler (vm, cm->crypto_engine_index,
+ crypto_native_key_handler);
+ return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (crypto_native_init) =
{
.runs_after = VLIB_INITS ("vnet_crypto_init"),
};
-/* *INDENT-ON* */
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
- .description = "Intel IA32 Software Crypto Engine",
+ .description = "Native Crypto Engine",
};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/crypto_native/sha2.c b/src/plugins/crypto_native/sha2.c
new file mode 100644
index 00000000000..459ce6d8e79
--- /dev/null
+++ b/src/plugins/crypto_native/sha2.c
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/crypto/crypto.h>
+#include <crypto_native/crypto_native.h>
+#include <vppinfra/crypto/sha2.h>
+
+static_always_inline u32
+crypto_native_ops_hash_sha2 (vlib_main_t *vm, vnet_crypto_op_t *ops[],
+ u32 n_ops, vnet_crypto_op_chunk_t *chunks,
+ clib_sha2_type_t type, int maybe_chained)
+{
+ vnet_crypto_op_t *op = ops[0];
+ clib_sha2_ctx_t ctx;
+ u32 n_left = n_ops;
+
+next:
+ if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
+ {
+ vnet_crypto_op_chunk_t *chp = chunks + op->chunk_index;
+ clib_sha2_init (&ctx, type);
+ for (int j = 0; j < op->n_chunks; j++, chp++)
+ clib_sha2_update (&ctx, chp->src, chp->len);
+ clib_sha2_final (&ctx, op->digest);
+ }
+ else
+ clib_sha2 (type, op->src, op->len, op->digest);
+
+ op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
+
+ if (--n_left)
+ {
+ op += 1;
+ goto next;
+ }
+
+ return n_ops;
+}
+
+static_always_inline u32
+crypto_native_ops_hmac_sha2 (vlib_main_t *vm, vnet_crypto_op_t *ops[],
+ u32 n_ops, vnet_crypto_op_chunk_t *chunks,
+ clib_sha2_type_t type)
+{
+ crypto_native_main_t *cm = &crypto_native_main;
+ vnet_crypto_op_t *op = ops[0];
+ u32 n_left = n_ops;
+ clib_sha2_hmac_ctx_t ctx;
+ u8 buffer[64];
+ u32 sz, n_fail = 0;
+
+ for (; n_left; n_left--, op++)
+ {
+ clib_sha2_hmac_init (
+ &ctx, type, (clib_sha2_hmac_key_data_t *) cm->key_data[op->key_index]);
+ if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
+ {
+ vnet_crypto_op_chunk_t *chp = chunks + op->chunk_index;
+ for (int j = 0; j < op->n_chunks; j++, chp++)
+ clib_sha2_hmac_update (&ctx, chp->src, chp->len);
+ }
+ else
+ clib_sha2_hmac_update (&ctx, op->src, op->len);
+
+ clib_sha2_hmac_final (&ctx, buffer);
+
+ if (op->digest_len)
+ {
+ sz = op->digest_len;
+ if (op->flags & VNET_CRYPTO_OP_FLAG_HMAC_CHECK)
+ {
+ if ((memcmp (op->digest, buffer, sz)))
+ {
+ n_fail++;
+ op->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
+ continue;
+ }
+ }
+ else
+ clib_memcpy_fast (op->digest, buffer, sz);
+ }
+ else
+ {
+ sz = clib_sha2_variants[type].digest_size;
+ if (op->flags & VNET_CRYPTO_OP_FLAG_HMAC_CHECK)
+ {
+ if ((memcmp (op->digest, buffer, sz)))
+ {
+ n_fail++;
+ op->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
+ continue;
+ }
+ }
+ else
+ clib_memcpy_fast (op->digest, buffer, sz);
+ }
+
+ op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
+ }
+
+ return n_ops - n_fail;
+}
+
+static void *
+sha2_key_add (vnet_crypto_key_t *key, clib_sha2_type_t type)
+{
+ clib_sha2_hmac_key_data_t *kd;
+
+ kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);
+ clib_sha2_hmac_key_data (type, key->data, vec_len (key->data), kd);
+
+ return kd;
+}
+
+static int
+probe ()
+{
+#if defined(__SHA__) && defined(__x86_64__)
+ if (clib_cpu_supports_sha ())
+ return 50;
+#elif defined(__ARM_FEATURE_SHA2)
+ if (clib_cpu_supports_sha2 ())
+ return 10;
+#endif
+ return -1;
+}
+
+#define _(b) \
+ static u32 crypto_native_ops_hash_sha##b ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return crypto_native_ops_hash_sha2 (vm, ops, n_ops, 0, CLIB_SHA2_##b, 0); \
+ } \
+ \
+ static u32 crypto_native_ops_chained_hash_sha##b ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \
+ u32 n_ops) \
+ { \
+ return crypto_native_ops_hash_sha2 (vm, ops, n_ops, chunks, \
+ CLIB_SHA2_##b, 1); \
+ } \
+ \
+ static u32 crypto_native_ops_hmac_sha##b ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return crypto_native_ops_hmac_sha2 (vm, ops, n_ops, 0, CLIB_SHA2_##b); \
+ } \
+ \
+ static u32 crypto_native_ops_chained_hmac_sha##b ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \
+ u32 n_ops) \
+ { \
+ return crypto_native_ops_hmac_sha2 (vm, ops, n_ops, chunks, \
+ CLIB_SHA2_##b); \
+ } \
+ \
+ static void *sha2_##b##_key_add (vnet_crypto_key_t *k) \
+ { \
+ return sha2_key_add (k, CLIB_SHA2_##b); \
+ } \
+ \
+ CRYPTO_NATIVE_OP_HANDLER (crypto_native_hash_sha##b) = { \
+ .op_id = VNET_CRYPTO_OP_SHA##b##_HASH, \
+ .fn = crypto_native_ops_hash_sha##b, \
+ .cfn = crypto_native_ops_chained_hash_sha##b, \
+ .probe = probe, \
+ }; \
+ CRYPTO_NATIVE_OP_HANDLER (crypto_native_hmac_sha##b) = { \
+ .op_id = VNET_CRYPTO_OP_SHA##b##_HMAC, \
+ .fn = crypto_native_ops_hmac_sha##b, \
+ .cfn = crypto_native_ops_chained_hmac_sha##b, \
+ .probe = probe, \
+ }; \
+ CRYPTO_NATIVE_KEY_HANDLER (crypto_native_hmac_sha##b) = { \
+ .alg_id = VNET_CRYPTO_ALG_HMAC_SHA##b, \
+ .key_fn = sha2_##b##_key_add, \
+ .probe = probe, \
+ };
+
+_ (224)
+_ (256)
+
+#undef _
diff --git a/src/plugins/crypto_openssl/CMakeLists.txt b/src/plugins/crypto_openssl/CMakeLists.txt
index d014144eca8..472b0ef3243 100644
--- a/src/plugins/crypto_openssl/CMakeLists.txt
+++ b/src/plugins/crypto_openssl/CMakeLists.txt
@@ -16,11 +16,12 @@ if(NOT OPENSSL_FOUND)
endif()
include_directories(${OPENSSL_INCLUDE_DIR})
+add_compile_definitions(OPENSSL_SUPPRESS_DEPRECATED)
add_vpp_plugin(crypto_openssl
SOURCES
main.c
LINK_LIBRARIES
- ${OPENSSL_LIBRARIES}
+ ${OPENSSL_CRYPTO_LIBRARIES}
)
diff --git a/src/plugins/crypto_openssl/crypto_openssl.h b/src/plugins/crypto_openssl/crypto_openssl.h
new file mode 100644
index 00000000000..e16429fb5dd
--- /dev/null
+++ b/src/plugins/crypto_openssl/crypto_openssl.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 ARM Ltd and/or its affiliates.
+ */
+
+#ifndef __crypto_openssl_h__
+#define __crypto_openssl_h__
+
+typedef void *(crypto_openssl_ctx_fn_t) (vnet_crypto_key_t *key,
+ vnet_crypto_key_op_t kop,
+ vnet_crypto_key_index_t idx);
+
+typedef struct
+{
+ u32 crypto_engine_index;
+ crypto_openssl_ctx_fn_t *ctx_fn[VNET_CRYPTO_N_ALGS];
+} crypto_openssl_main_t;
+
+extern crypto_openssl_main_t crypto_openssl_main;
+
+#endif /* __crypto_openssl_h__ */
diff --git a/src/plugins/crypto_openssl/main.c b/src/plugins/crypto_openssl/main.c
index 48846b14483..b070cf336a5 100644
--- a/src/plugins/crypto_openssl/main.c
+++ b/src/plugins/crypto_openssl/main.c
@@ -15,6 +15,8 @@
*------------------------------------------------------------------
*/
+#include <sys/syscall.h>
+
#include <openssl/evp.h>
#include <openssl/hmac.h>
#include <openssl/rand.h>
@@ -24,12 +26,14 @@
#include <vnet/plugin/plugin.h>
#include <vnet/crypto/crypto.h>
#include <vpp/app/version.h>
+#include <crypto_openssl/crypto_openssl.h>
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- EVP_CIPHER_CTX *evp_cipher_ctx;
- HMAC_CTX *hmac_ctx;
+ EVP_CIPHER_CTX **evp_cipher_enc_ctx;
+ EVP_CIPHER_CTX **evp_cipher_dec_ctx;
+ HMAC_CTX **hmac_ctx;
EVP_MD_CTX *hash_ctx;
#if OPENSSL_VERSION_NUMBER < 0x10100000L
HMAC_CTX _hmac_ctx;
@@ -49,7 +53,10 @@ static openssl_per_thread_data_t *per_thread_data = 0;
_ (gcm, AES_256_GCM, EVP_aes_256_gcm, 8) \
_ (cbc, AES_128_CTR, EVP_aes_128_ctr, 8) \
_ (cbc, AES_192_CTR, EVP_aes_192_ctr, 8) \
- _ (cbc, AES_256_CTR, EVP_aes_256_ctr, 8)
+ _ (cbc, AES_256_CTR, EVP_aes_256_ctr, 8) \
+ _ (null_gmac, AES_128_NULL_GMAC, EVP_aes_128_gcm, 8) \
+ _ (null_gmac, AES_192_NULL_GMAC, EVP_aes_192_gcm, 8) \
+ _ (null_gmac, AES_256_NULL_GMAC, EVP_aes_256_gcm, 8)
#define foreach_openssl_chacha20_evp_op \
_ (chacha20_poly1305, CHACHA20_POLY1305, EVP_chacha20_poly1305, 8)
@@ -84,6 +91,8 @@ static openssl_per_thread_data_t *per_thread_data = 0;
_(SHA384, EVP_sha384) \
_(SHA512, EVP_sha512)
+crypto_openssl_main_t crypto_openssl_main;
+
static_always_inline u32
openssl_ops_enc_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[],
vnet_crypto_op_chunk_t *chunks, u32 n_ops,
@@ -91,7 +100,7 @@ openssl_ops_enc_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[],
{
openssl_per_thread_data_t *ptd = vec_elt_at_index (per_thread_data,
vm->thread_index);
- EVP_CIPHER_CTX *ctx = ptd->evp_cipher_ctx;
+ EVP_CIPHER_CTX *ctx;
vnet_crypto_op_chunk_t *chp;
u32 i, j, curr_len = 0;
u8 out_buf[VLIB_BUFFER_DEFAULT_DATA_SIZE * 5];
@@ -99,16 +108,10 @@ openssl_ops_enc_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[],
for (i = 0; i < n_ops; i++)
{
vnet_crypto_op_t *op = ops[i];
- vnet_crypto_key_t *key = vnet_crypto_get_key (op->key_index);
int out_len = 0;
- if (op->flags & VNET_CRYPTO_OP_FLAG_INIT_IV)
- RAND_bytes (op->iv, iv_len);
-
- EVP_EncryptInit_ex (ctx, cipher, NULL, key->data, op->iv);
-
- if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
- EVP_CIPHER_CTX_set_padding (ctx, 0);
+ ctx = ptd->evp_cipher_enc_ctx[op->key_index];
+ EVP_EncryptInit_ex (ctx, NULL, NULL, NULL, op->iv);
if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
{
@@ -152,7 +155,7 @@ openssl_ops_dec_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[],
{
openssl_per_thread_data_t *ptd = vec_elt_at_index (per_thread_data,
vm->thread_index);
- EVP_CIPHER_CTX *ctx = ptd->evp_cipher_ctx;
+ EVP_CIPHER_CTX *ctx;
vnet_crypto_op_chunk_t *chp;
u32 i, j, curr_len = 0;
u8 out_buf[VLIB_BUFFER_DEFAULT_DATA_SIZE * 5];
@@ -160,13 +163,10 @@ openssl_ops_dec_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[],
for (i = 0; i < n_ops; i++)
{
vnet_crypto_op_t *op = ops[i];
- vnet_crypto_key_t *key = vnet_crypto_get_key (op->key_index);
int out_len = 0;
- EVP_DecryptInit_ex (ctx, cipher, NULL, key->data, op->iv);
-
- if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
- EVP_CIPHER_CTX_set_padding (ctx, 0);
+ ctx = ptd->evp_cipher_dec_ctx[op->key_index];
+ EVP_DecryptInit_ex (ctx, NULL, NULL, NULL, op->iv);
if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
{
@@ -206,26 +206,21 @@ openssl_ops_dec_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[],
static_always_inline u32
openssl_ops_enc_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[],
vnet_crypto_op_chunk_t *chunks, u32 n_ops,
- const EVP_CIPHER *cipher, int is_gcm, const int iv_len)
+ const EVP_CIPHER *cipher, int is_gcm, int is_gmac,
+ const int iv_len)
{
openssl_per_thread_data_t *ptd = vec_elt_at_index (per_thread_data,
vm->thread_index);
- EVP_CIPHER_CTX *ctx = ptd->evp_cipher_ctx;
+ EVP_CIPHER_CTX *ctx;
vnet_crypto_op_chunk_t *chp;
u32 i, j;
for (i = 0; i < n_ops; i++)
{
vnet_crypto_op_t *op = ops[i];
- vnet_crypto_key_t *key = vnet_crypto_get_key (op->key_index);
int len = 0;
- if (op->flags & VNET_CRYPTO_OP_FLAG_INIT_IV)
- RAND_bytes (op->iv, 8);
-
- EVP_EncryptInit_ex (ctx, cipher, 0, 0, 0);
- if (is_gcm)
- EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_GCM_SET_IVLEN, 12, NULL);
- EVP_EncryptInit_ex (ctx, 0, 0, key->data, op->iv);
+ ctx = ptd->evp_cipher_enc_ctx[op->key_index];
+ EVP_EncryptInit_ex (ctx, 0, 0, NULL, op->iv);
if (op->aad_len)
EVP_EncryptUpdate (ctx, NULL, &len, op->aad, op->aad_len);
if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
@@ -233,13 +228,14 @@ openssl_ops_enc_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[],
chp = chunks + op->chunk_index;
for (j = 0; j < op->n_chunks; j++)
{
- EVP_EncryptUpdate (ctx, chp->dst, &len, chp->src, chp->len);
+ EVP_EncryptUpdate (ctx, is_gmac ? 0 : chp->dst, &len, chp->src,
+ chp->len);
chp += 1;
}
}
else
- EVP_EncryptUpdate (ctx, op->dst, &len, op->src, op->len);
- EVP_EncryptFinal_ex (ctx, op->dst + len, &len);
+ EVP_EncryptUpdate (ctx, is_gmac ? 0 : op->dst, &len, op->src, op->len);
+ EVP_EncryptFinal_ex (ctx, is_gmac ? 0 : op->dst + len, &len);
EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_AEAD_GET_TAG, op->tag_len, op->tag);
op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
}
@@ -247,12 +243,21 @@ openssl_ops_enc_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[],
}
static_always_inline u32
+openssl_ops_enc_null_gmac (vlib_main_t *vm, vnet_crypto_op_t *ops[],
+ vnet_crypto_op_chunk_t *chunks, u32 n_ops,
+ const EVP_CIPHER *cipher, const int iv_len)
+{
+ return openssl_ops_enc_aead (vm, ops, chunks, n_ops, cipher,
+ /* is_gcm */ 1, /* is_gmac */ 1, iv_len);
+}
+
+static_always_inline u32
openssl_ops_enc_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[],
vnet_crypto_op_chunk_t *chunks, u32 n_ops,
const EVP_CIPHER *cipher, const int iv_len)
{
return openssl_ops_enc_aead (vm, ops, chunks, n_ops, cipher,
- /* is_gcm */ 1, iv_len);
+ /* is_gcm */ 1, /* is_gmac */ 0, iv_len);
}
static_always_inline __clib_unused u32
@@ -261,29 +266,27 @@ openssl_ops_enc_chacha20_poly1305 (vlib_main_t *vm, vnet_crypto_op_t *ops[],
const EVP_CIPHER *cipher, const int iv_len)
{
return openssl_ops_enc_aead (vm, ops, chunks, n_ops, cipher,
- /* is_gcm */ 0, iv_len);
+ /* is_gcm */ 0, /* is_gmac */ 0, iv_len);
}
static_always_inline u32
openssl_ops_dec_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[],
vnet_crypto_op_chunk_t *chunks, u32 n_ops,
- const EVP_CIPHER *cipher, int is_gcm, const int iv_len)
+ const EVP_CIPHER *cipher, int is_gcm, int is_gmac,
+ const int iv_len)
{
openssl_per_thread_data_t *ptd = vec_elt_at_index (per_thread_data,
vm->thread_index);
- EVP_CIPHER_CTX *ctx = ptd->evp_cipher_ctx;
+ EVP_CIPHER_CTX *ctx;
vnet_crypto_op_chunk_t *chp;
u32 i, j, n_fail = 0;
for (i = 0; i < n_ops; i++)
{
vnet_crypto_op_t *op = ops[i];
- vnet_crypto_key_t *key = vnet_crypto_get_key (op->key_index);
int len = 0;
- EVP_DecryptInit_ex (ctx, cipher, 0, 0, 0);
- if (is_gcm)
- EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_GCM_SET_IVLEN, 12, 0);
- EVP_DecryptInit_ex (ctx, 0, 0, key->data, op->iv);
+ ctx = ptd->evp_cipher_dec_ctx[op->key_index];
+ EVP_DecryptInit_ex (ctx, 0, 0, NULL, op->iv);
if (op->aad_len)
EVP_DecryptUpdate (ctx, 0, &len, op->aad, op->aad_len);
if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
@@ -291,15 +294,19 @@ openssl_ops_dec_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[],
chp = chunks + op->chunk_index;
for (j = 0; j < op->n_chunks; j++)
{
- EVP_DecryptUpdate (ctx, chp->dst, &len, chp->src, chp->len);
+ EVP_DecryptUpdate (ctx, is_gmac ? 0 : chp->dst, &len, chp->src,
+ chp->len);
chp += 1;
}
}
else
- EVP_DecryptUpdate (ctx, op->dst, &len, op->src, op->len);
+ {
+ EVP_DecryptUpdate (ctx, is_gmac ? 0 : op->dst, &len, op->src,
+ op->len);
+ }
EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_AEAD_SET_TAG, op->tag_len, op->tag);
- if (EVP_DecryptFinal_ex (ctx, op->dst + len, &len) > 0)
+ if (EVP_DecryptFinal_ex (ctx, is_gmac ? 0 : op->dst + len, &len) > 0)
op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
else
{
@@ -311,12 +318,21 @@ openssl_ops_dec_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[],
}
static_always_inline u32
+openssl_ops_dec_null_gmac (vlib_main_t *vm, vnet_crypto_op_t *ops[],
+ vnet_crypto_op_chunk_t *chunks, u32 n_ops,
+ const EVP_CIPHER *cipher, const int iv_len)
+{
+ return openssl_ops_dec_aead (vm, ops, chunks, n_ops, cipher,
+ /* is_gcm */ 1, /* is_gmac */ 1, iv_len);
+}
+
+static_always_inline u32
openssl_ops_dec_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[],
vnet_crypto_op_chunk_t *chunks, u32 n_ops,
const EVP_CIPHER *cipher, const int iv_len)
{
return openssl_ops_dec_aead (vm, ops, chunks, n_ops, cipher,
- /* is_gcm */ 1, iv_len);
+ /* is_gcm */ 1, /* is_gmac */ 0, iv_len);
}
static_always_inline __clib_unused u32
@@ -325,7 +341,7 @@ openssl_ops_dec_chacha20_poly1305 (vlib_main_t *vm, vnet_crypto_op_t *ops[],
const EVP_CIPHER *cipher, const int iv_len)
{
return openssl_ops_dec_aead (vm, ops, chunks, n_ops, cipher,
- /* is_gcm */ 0, iv_len);
+ /* is_gcm */ 0, /* is_gmac */ 0, iv_len);
}
static_always_inline u32
@@ -370,17 +386,17 @@ openssl_ops_hmac (vlib_main_t * vm, vnet_crypto_op_t * ops[],
u8 buffer[64];
openssl_per_thread_data_t *ptd = vec_elt_at_index (per_thread_data,
vm->thread_index);
- HMAC_CTX *ctx = ptd->hmac_ctx;
+ HMAC_CTX *ctx;
vnet_crypto_op_chunk_t *chp;
u32 i, j, n_fail = 0;
for (i = 0; i < n_ops; i++)
{
vnet_crypto_op_t *op = ops[i];
- vnet_crypto_key_t *key = vnet_crypto_get_key (op->key_index);
unsigned int out_len = 0;
size_t sz = op->digest_len ? op->digest_len : EVP_MD_size (md);
- HMAC_Init_ex (ctx, key->data, vec_len (key->data), md, NULL);
+ ctx = ptd->hmac_ctx[op->key_index];
+ HMAC_Init_ex (ctx, NULL, 0, NULL, NULL);
if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
{
chp = chunks + op->chunk_index;
@@ -410,6 +426,131 @@ openssl_ops_hmac (vlib_main_t * vm, vnet_crypto_op_t * ops[],
return n_ops - n_fail;
}
+static_always_inline void *
+openssl_ctx_cipher (vnet_crypto_key_t *key, vnet_crypto_key_op_t kop,
+ vnet_crypto_key_index_t idx, const EVP_CIPHER *cipher,
+ int is_gcm)
+{
+ EVP_CIPHER_CTX *ctx;
+ openssl_per_thread_data_t *ptd;
+
+ if (VNET_CRYPTO_KEY_OP_ADD == kop)
+ {
+ vec_foreach (ptd, per_thread_data)
+ {
+ vec_validate_aligned (ptd->evp_cipher_enc_ctx, idx,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (ptd->evp_cipher_dec_ctx, idx,
+ CLIB_CACHE_LINE_BYTES);
+
+ ctx = EVP_CIPHER_CTX_new ();
+ EVP_CIPHER_CTX_set_padding (ctx, 0);
+ EVP_EncryptInit_ex (ctx, cipher, NULL, NULL, NULL);
+ if (is_gcm)
+ EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_GCM_SET_IVLEN, 12, NULL);
+ EVP_EncryptInit_ex (ctx, 0, 0, key->data, 0);
+ ptd->evp_cipher_enc_ctx[idx] = ctx;
+
+ ctx = EVP_CIPHER_CTX_new ();
+ EVP_CIPHER_CTX_set_padding (ctx, 0);
+ EVP_DecryptInit_ex (ctx, cipher, 0, 0, 0);
+ if (is_gcm)
+ EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_GCM_SET_IVLEN, 12, 0);
+ EVP_DecryptInit_ex (ctx, 0, 0, key->data, 0);
+ ptd->evp_cipher_dec_ctx[idx] = ctx;
+ }
+ }
+ else if (VNET_CRYPTO_KEY_OP_MODIFY == kop)
+ {
+ vec_foreach (ptd, per_thread_data)
+ {
+ ctx = ptd->evp_cipher_enc_ctx[idx];
+ EVP_EncryptInit_ex (ctx, cipher, NULL, NULL, NULL);
+ if (is_gcm)
+ EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_GCM_SET_IVLEN, 12, NULL);
+ EVP_EncryptInit_ex (ctx, 0, 0, key->data, 0);
+
+ ctx = ptd->evp_cipher_dec_ctx[idx];
+ EVP_DecryptInit_ex (ctx, cipher, 0, 0, 0);
+ if (is_gcm)
+ EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_GCM_SET_IVLEN, 12, 0);
+ EVP_DecryptInit_ex (ctx, 0, 0, key->data, 0);
+ }
+ }
+ else if (VNET_CRYPTO_KEY_OP_DEL == kop)
+ {
+ vec_foreach (ptd, per_thread_data)
+ {
+ ctx = ptd->evp_cipher_enc_ctx[idx];
+ EVP_CIPHER_CTX_free (ctx);
+ ptd->evp_cipher_enc_ctx[idx] = NULL;
+
+ ctx = ptd->evp_cipher_dec_ctx[idx];
+ EVP_CIPHER_CTX_free (ctx);
+ ptd->evp_cipher_dec_ctx[idx] = NULL;
+ }
+ }
+ return NULL;
+}
+
+static_always_inline void *
+openssl_ctx_hmac (vnet_crypto_key_t *key, vnet_crypto_key_op_t kop,
+ vnet_crypto_key_index_t idx, const EVP_MD *md)
+{
+ HMAC_CTX *ctx;
+ openssl_per_thread_data_t *ptd;
+ if (VNET_CRYPTO_KEY_OP_ADD == kop)
+ {
+ vec_foreach (ptd, per_thread_data)
+ {
+ vec_validate_aligned (ptd->hmac_ctx, idx, CLIB_CACHE_LINE_BYTES);
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+ ctx = HMAC_CTX_new ();
+ HMAC_Init_ex (ctx, key->data, vec_len (key->data), md, NULL);
+ ptd->hmac_ctx[idx] = ctx;
+#else
+ HMAC_CTX_init (&(ptd->_hmac_ctx));
+ ptd->hmac_ctx[idx] = &ptd->_hmac_ctx;
+#endif
+ }
+ }
+ else if (VNET_CRYPTO_KEY_OP_MODIFY == kop)
+ {
+ vec_foreach (ptd, per_thread_data)
+ {
+ ctx = ptd->hmac_ctx[idx];
+ HMAC_Init_ex (ctx, key->data, vec_len (key->data), md, NULL);
+ }
+ }
+ else if (VNET_CRYPTO_KEY_OP_DEL == kop)
+ {
+ vec_foreach (ptd, per_thread_data)
+ {
+ ctx = ptd->hmac_ctx[idx];
+ HMAC_CTX_free (ctx);
+ ptd->hmac_ctx[idx] = NULL;
+ }
+ }
+ return NULL;
+}
+
+static void
+crypto_openssl_key_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop,
+ vnet_crypto_key_index_t idx)
+{
+ vnet_crypto_key_t *key = vnet_crypto_get_key (idx);
+ crypto_openssl_main_t *cm = &crypto_openssl_main;
+
+ /** TODO: add linked alg support **/
+ if (key->type == VNET_CRYPTO_KEY_TYPE_LINK)
+ return;
+
+ if (cm->ctx_fn[key->alg] == 0)
+ return;
+
+ cm->ctx_fn[key->alg](key, kop, idx);
+}
+
#define _(m, a, b, iv) \
static u32 openssl_ops_enc_##a (vlib_main_t *vm, vnet_crypto_op_t *ops[], \
u32 n_ops) \
@@ -435,6 +576,16 @@ openssl_ops_hmac (vlib_main_t * vm, vnet_crypto_op_t * ops[],
u32 n_ops) \
{ \
return openssl_ops_dec_##m (vm, ops, chunks, n_ops, b (), iv); \
+ } \
+ static void *openssl_ctx_##a (vnet_crypto_key_t *key, \
+ vnet_crypto_key_op_t kop, \
+ vnet_crypto_key_index_t idx) \
+ { \
+ int is_gcm = ((VNET_CRYPTO_ALG_AES_128_GCM <= key->alg) && \
+ (VNET_CRYPTO_ALG_AES_256_NULL_GMAC >= key->alg)) ? \
+ 1 : \
+ 0; \
+ return openssl_ctx_cipher (key, kop, idx, b (), is_gcm); \
}
foreach_openssl_evp_op;
@@ -456,29 +607,43 @@ foreach_openssl_evp_op;
foreach_openssl_hash_op;
#undef _
-#define _(a, b) \
-static u32 \
-openssl_ops_hmac_##a (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
-{ return openssl_ops_hmac (vm, ops, 0, n_ops, b ()); } \
-static u32 \
-openssl_ops_hmac_chained_##a (vlib_main_t * vm, vnet_crypto_op_t * ops[], \
- vnet_crypto_op_chunk_t *chunks, u32 n_ops) \
-{ return openssl_ops_hmac (vm, ops, chunks, n_ops, b ()); } \
+#define _(a, b) \
+ static u32 openssl_ops_hmac_##a (vlib_main_t *vm, vnet_crypto_op_t *ops[], \
+ u32 n_ops) \
+ { \
+ return openssl_ops_hmac (vm, ops, 0, n_ops, b ()); \
+ } \
+ static u32 openssl_ops_hmac_chained_##a ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \
+ u32 n_ops) \
+ { \
+ return openssl_ops_hmac (vm, ops, chunks, n_ops, b ()); \
+ } \
+ static void *openssl_ctx_hmac_##a (vnet_crypto_key_t *key, \
+ vnet_crypto_key_op_t kop, \
+ vnet_crypto_key_index_t idx) \
+ { \
+ return openssl_ctx_hmac (key, kop, idx, b ()); \
+ }
foreach_openssl_hmac_op;
#undef _
-
clib_error_t *
crypto_openssl_init (vlib_main_t * vm)
{
+ crypto_openssl_main_t *cm = &crypto_openssl_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
openssl_per_thread_data_t *ptd;
- u8 *seed_data = 0;
- time_t t;
- pid_t pid;
+ u8 seed[32];
+
+ if (syscall (SYS_getrandom, &seed, sizeof (seed), 0) != sizeof (seed))
+ return clib_error_return_unix (0, "getrandom() failed");
+
+ RAND_seed (seed, sizeof (seed));
u32 eidx = vnet_crypto_register_engine (vm, "openssl", 50, "OpenSSL");
+ cm->crypto_engine_index = eidx;
#define _(m, a, b, iv) \
vnet_crypto_register_ops_handlers (vm, eidx, VNET_CRYPTO_OP_##a##_ENC, \
@@ -486,15 +651,17 @@ crypto_openssl_init (vlib_main_t * vm)
openssl_ops_enc_chained_##a); \
vnet_crypto_register_ops_handlers (vm, eidx, VNET_CRYPTO_OP_##a##_DEC, \
openssl_ops_dec_##a, \
- openssl_ops_dec_chained_##a);
+ openssl_ops_dec_chained_##a); \
+ cm->ctx_fn[VNET_CRYPTO_ALG_##a] = openssl_ctx_##a;
foreach_openssl_evp_op;
#undef _
-#define _(a, b) \
- vnet_crypto_register_ops_handlers (vm, eidx, VNET_CRYPTO_OP_##a##_HMAC, \
- openssl_ops_hmac_##a, \
- openssl_ops_hmac_chained_##a); \
+#define _(a, b) \
+ vnet_crypto_register_ops_handlers (vm, eidx, VNET_CRYPTO_OP_##a##_HMAC, \
+ openssl_ops_hmac_##a, \
+ openssl_ops_hmac_chained_##a); \
+ cm->ctx_fn[VNET_CRYPTO_ALG_HMAC_##a] = openssl_ctx_hmac_##a;
foreach_openssl_hmac_op;
#undef _
@@ -512,43 +679,25 @@ crypto_openssl_init (vlib_main_t * vm)
vec_foreach (ptd, per_thread_data)
{
- ptd->evp_cipher_ctx = EVP_CIPHER_CTX_new ();
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
- ptd->hmac_ctx = HMAC_CTX_new ();
ptd->hash_ctx = EVP_MD_CTX_create ();
-#else
- HMAC_CTX_init (&(ptd->_hmac_ctx));
- ptd->hmac_ctx = &ptd->_hmac_ctx;
#endif
}
-
- t = time (NULL);
- pid = getpid ();
- vec_add (seed_data, &t, sizeof (t));
- vec_add (seed_data, &pid, sizeof (pid));
- vec_add (seed_data, seed_data, sizeof (seed_data));
-
- RAND_seed ((const void *) seed_data, vec_len (seed_data));
-
- vec_free (seed_data);
-
+ vnet_crypto_register_key_handler (vm, cm->crypto_engine_index,
+ crypto_openssl_key_handler);
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (crypto_openssl_init) =
{
.runs_after = VLIB_INITS ("vnet_crypto_init"),
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "OpenSSL Crypto Engine",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.api b/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.api
index f1741286d73..8ee8a15f48b 100644
--- a/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.api
+++ b/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.api
@@ -19,7 +19,7 @@
used to control the crypto SW scheduler plugin
*/
-option version = "0.1.0";
+option version = "1.1.0";
/** \brief crypto sw scheduler: Enable or disable workers
diff --git a/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.h b/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.h
index 50dd6c11830..e74dfdd2c2a 100644
--- a/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.h
+++ b/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.h
@@ -21,18 +21,32 @@
#define CRYPTO_SW_SCHEDULER_QUEUE_SIZE 64
#define CRYPTO_SW_SCHEDULER_QUEUE_MASK (CRYPTO_SW_SCHEDULER_QUEUE_SIZE - 1)
+STATIC_ASSERT ((0 == (CRYPTO_SW_SCHEDULER_QUEUE_SIZE &
+ (CRYPTO_SW_SCHEDULER_QUEUE_SIZE - 1))),
+ "CRYPTO_SW_SCHEDULER_QUEUE_SIZE is not pow2");
+
+typedef enum crypto_sw_scheduler_queue_type_t_
+{
+ CRYPTO_SW_SCHED_QUEUE_TYPE_ENCRYPT = 0,
+ CRYPTO_SW_SCHED_QUEUE_TYPE_DECRYPT,
+ CRYPTO_SW_SCHED_QUEUE_N_TYPES
+} crypto_sw_scheduler_queue_type_t;
+
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u32 head;
u32 tail;
- vnet_crypto_async_frame_t *jobs[0];
+ vnet_crypto_async_frame_t **jobs;
} crypto_sw_scheduler_queue_t;
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- crypto_sw_scheduler_queue_t *queues[VNET_CRYPTO_ASYNC_OP_N_IDS];
+ crypto_sw_scheduler_queue_t queue[CRYPTO_SW_SCHED_QUEUE_N_TYPES];
+ u32 last_serve_lcore_id;
+ u8 last_serve_encrypt;
+ u8 last_return_queue;
vnet_crypto_op_t *crypto_ops;
vnet_crypto_op_t *integ_ops;
vnet_crypto_op_t *chained_crypto_ops;
diff --git a/src/plugins/crypto_sw_scheduler/main.c b/src/plugins/crypto_sw_scheduler/main.c
index b0548fa297a..73a158e86b2 100644
--- a/src/plugins/crypto_sw_scheduler/main.c
+++ b/src/plugins/crypto_sw_scheduler/main.c
@@ -25,14 +25,14 @@ crypto_sw_scheduler_set_worker_crypto (u32 worker_idx, u8 enabled)
crypto_sw_scheduler_main_t *cm = &crypto_sw_scheduler_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
crypto_sw_scheduler_per_thread_data_t *ptd = 0;
- u32 count = 0, i = vlib_num_workers () > 0;
+ u32 count = 0, i;
if (worker_idx >= vlib_num_workers ())
{
return VNET_API_ERROR_INVALID_VALUE;
}
- for (; i < tm->n_vlib_mains; i++)
+ for (i = 0; i < tm->n_vlib_mains; i++)
{
ptd = cm->per_thread_data + i;
count += ptd->self_crypto_enabled;
@@ -74,68 +74,45 @@ crypto_sw_scheduler_key_handler (vlib_main_t * vm, vnet_crypto_key_op_t kop,
}
static int
-crypto_sw_scheduler_frame_enqueue (vlib_main_t * vm,
- vnet_crypto_async_frame_t * frame)
+crypto_sw_scheduler_frame_enqueue (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame, u8 is_enc)
{
crypto_sw_scheduler_main_t *cm = &crypto_sw_scheduler_main;
- crypto_sw_scheduler_per_thread_data_t *ptd
- = vec_elt_at_index (cm->per_thread_data, vm->thread_index);
- crypto_sw_scheduler_queue_t *q = ptd->queues[frame->op];
- u64 head = q->head;
-
- if (q->jobs[head & CRYPTO_SW_SCHEDULER_QUEUE_MASK])
+ crypto_sw_scheduler_per_thread_data_t *ptd =
+ vec_elt_at_index (cm->per_thread_data, vm->thread_index);
+ crypto_sw_scheduler_queue_t *current_queue =
+ is_enc ? &ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_ENCRYPT] :
+ &ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_DECRYPT];
+ u64 head = current_queue->head;
+
+ if (current_queue->jobs[head & CRYPTO_SW_SCHEDULER_QUEUE_MASK])
{
u32 n_elts = frame->n_elts, i;
for (i = 0; i < n_elts; i++)
frame->elts[i].status = VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR;
return -1;
}
- q->jobs[head & CRYPTO_SW_SCHEDULER_QUEUE_MASK] = frame;
+
+ current_queue->jobs[head & CRYPTO_SW_SCHEDULER_QUEUE_MASK] = frame;
head += 1;
CLIB_MEMORY_STORE_BARRIER ();
- q->head = head;
+ current_queue->head = head;
return 0;
}
-static_always_inline vnet_crypto_async_frame_t *
-crypto_sw_scheduler_get_pending_frame (crypto_sw_scheduler_queue_t * q)
+static int
+crypto_sw_scheduler_frame_enqueue_decrypt (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
{
- vnet_crypto_async_frame_t *f;
- u32 i;
- u32 tail = q->tail;
- u32 head = q->head;
-
- for (i = tail; i < head; i++)
- {
- f = q->jobs[i & CRYPTO_SW_SCHEDULER_QUEUE_MASK];
- if (!f)
- continue;
- if (clib_atomic_bool_cmp_and_swap
- (&f->state, VNET_CRYPTO_FRAME_STATE_PENDING,
- VNET_CRYPTO_FRAME_STATE_WORK_IN_PROGRESS))
- {
- return f;
- }
+ return crypto_sw_scheduler_frame_enqueue (vm, frame, 0);
}
- return NULL;
-}
-
-static_always_inline vnet_crypto_async_frame_t *
-crypto_sw_scheduler_get_completed_frame (crypto_sw_scheduler_queue_t * q)
-{
- vnet_crypto_async_frame_t *f = 0;
- if (q->jobs[q->tail & CRYPTO_SW_SCHEDULER_QUEUE_MASK]
- && q->jobs[q->tail & CRYPTO_SW_SCHEDULER_QUEUE_MASK]->state
- >= VNET_CRYPTO_FRAME_STATE_SUCCESS)
+ static int
+ crypto_sw_scheduler_frame_enqueue_encrypt (
+ vlib_main_t *vm, vnet_crypto_async_frame_t *frame)
{
- u32 tail = q->tail;
- CLIB_MEMORY_STORE_BARRIER ();
- q->tail++;
- f = q->jobs[tail & CRYPTO_SW_SCHEDULER_QUEUE_MASK];
- q->jobs[tail & CRYPTO_SW_SCHEDULER_QUEUE_MASK] = 0;
+
+ return crypto_sw_scheduler_frame_enqueue (vm, frame, 1);
}
- return f;
-}
static_always_inline void
cryptodev_sw_scheduler_sgl (vlib_main_t *vm,
@@ -267,7 +244,7 @@ crypto_sw_scheduler_convert_link_crypto (vlib_main_t * vm,
integ_op->digest = fe->digest;
integ_op->digest_len = digest_len;
integ_op->key_index = key->index_integ;
- integ_op->flags = fe->flags & ~VNET_CRYPTO_OP_FLAG_INIT_IV;
+ integ_op->flags = fe->flags;
crypto_op->user_data = integ_op->user_data = index;
}
@@ -283,17 +260,22 @@ process_ops (vlib_main_t * vm, vnet_crypto_async_frame_t * f,
n_fail = n_ops - vnet_crypto_process_ops (vm, op, n_ops);
- while (n_fail)
+ /*
+ * If we had a failure in the ops then we need to walk all the ops
+ * and set the status in the corresponding frame. This status is
+ * not set in the case with no failures, as in that case the overall
+ * frame status is success.
+ */
+ if (n_fail)
{
- ASSERT (op - ops < n_ops);
-
- if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+ for (int i = 0; i < n_ops; i++)
{
+ ASSERT (op - ops < n_ops);
+
f->elts[op->user_data].status = op->status;
- *state = VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
- n_fail--;
+ op++;
}
- op++;
+ *state = VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
}
}
@@ -310,170 +292,287 @@ process_chained_ops (vlib_main_t * vm, vnet_crypto_async_frame_t * f,
n_fail = n_ops - vnet_crypto_process_chained_ops (vm, op, chunks, n_ops);
- while (n_fail)
+ /*
+ * If we had a failure in the ops then we need to walk all the ops
+ * and set the status in the corresponding frame. This status is
+ * not set in the case with no failures, as in that case the overall
+ * frame status is success.
+ */
+ if (n_fail)
{
- ASSERT (op - ops < n_ops);
-
- if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+ for (int i = 0; i < n_ops; i++)
{
+ ASSERT (op - ops < n_ops);
+
f->elts[op->user_data].status = op->status;
- *state = VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
- n_fail--;
+ op++;
}
- op++;
+ *state = VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
}
}
-static_always_inline vnet_crypto_async_frame_t *
-crypto_sw_scheduler_dequeue_aead (vlib_main_t * vm,
- vnet_crypto_async_op_id_t async_op_id,
- vnet_crypto_op_id_t sync_op_id, u8 tag_len,
- u8 aad_len, u32 * nb_elts_processed,
- u32 * enqueue_thread_idx)
+static_always_inline void
+crypto_sw_scheduler_process_aead (vlib_main_t *vm,
+ crypto_sw_scheduler_per_thread_data_t *ptd,
+ vnet_crypto_async_frame_t *f, u32 aead_op,
+ u32 aad_len, u32 digest_len)
{
- crypto_sw_scheduler_main_t *cm = &crypto_sw_scheduler_main;
- crypto_sw_scheduler_per_thread_data_t *ptd = 0;
- crypto_sw_scheduler_queue_t *q = 0;
- vnet_crypto_async_frame_t *f = 0;
vnet_crypto_async_frame_elt_t *fe;
u32 *bi;
- u32 n_elts;
- int i = 0;
+ u32 n_elts = f->n_elts;
u8 state = VNET_CRYPTO_FRAME_STATE_SUCCESS;
- if (cm->per_thread_data[vm->thread_index].self_crypto_enabled)
- {
- /* *INDENT-OFF* */
- vec_foreach_index (i, cm->per_thread_data)
- {
- ptd = cm->per_thread_data + i;
- q = ptd->queues[async_op_id];
- f = crypto_sw_scheduler_get_pending_frame (q);
- if (f)
- break;
- }
- /* *INDENT-ON* */
- }
+ vec_reset_length (ptd->crypto_ops);
+ vec_reset_length (ptd->integ_ops);
+ vec_reset_length (ptd->chained_crypto_ops);
+ vec_reset_length (ptd->chained_integ_ops);
+ vec_reset_length (ptd->chunks);
- ptd = cm->per_thread_data + vm->thread_index;
+ fe = f->elts;
+ bi = f->buffer_indices;
- if (f)
+ while (n_elts--)
{
- *nb_elts_processed = n_elts = f->n_elts;
- fe = f->elts;
- bi = f->buffer_indices;
-
- vec_reset_length (ptd->crypto_ops);
- vec_reset_length (ptd->chained_crypto_ops);
- vec_reset_length (ptd->chunks);
-
- while (n_elts--)
- {
- if (n_elts > 1)
- clib_prefetch_load (fe + 1);
+ if (n_elts > 1)
+ clib_prefetch_load (fe + 1);
- crypto_sw_scheduler_convert_aead (vm, ptd, fe, fe - f->elts, bi[0],
- sync_op_id, aad_len, tag_len);
- bi++;
- fe++;
- }
+ crypto_sw_scheduler_convert_aead (vm, ptd, fe, fe - f->elts, bi[0],
+ aead_op, aad_len, digest_len);
+ bi++;
+ fe++;
+ }
process_ops (vm, f, ptd->crypto_ops, &state);
process_chained_ops (vm, f, ptd->chained_crypto_ops, ptd->chunks,
&state);
f->state = state;
- *enqueue_thread_idx = f->enqueue_thread_index;
- }
-
- return crypto_sw_scheduler_get_completed_frame (ptd->queues[async_op_id]);
}
-static_always_inline vnet_crypto_async_frame_t *
-crypto_sw_scheduler_dequeue_link (vlib_main_t * vm,
- vnet_crypto_async_op_id_t async_op_id,
- vnet_crypto_op_id_t sync_crypto_op_id,
- vnet_crypto_op_id_t sync_integ_op_id,
- u16 digest_len, u8 is_enc,
- u32 * nb_elts_processed,
- u32 * enqueue_thread_idx)
+static_always_inline void
+crypto_sw_scheduler_process_link (vlib_main_t *vm,
+ crypto_sw_scheduler_main_t *cm,
+ crypto_sw_scheduler_per_thread_data_t *ptd,
+ vnet_crypto_async_frame_t *f, u32 crypto_op,
+ u32 auth_op, u16 digest_len, u8 is_enc)
{
- crypto_sw_scheduler_main_t *cm = &crypto_sw_scheduler_main;
- crypto_sw_scheduler_per_thread_data_t *ptd = 0;
- crypto_sw_scheduler_queue_t *q = 0;
- vnet_crypto_async_frame_t *f = 0;
vnet_crypto_async_frame_elt_t *fe;
u32 *bi;
- u32 n_elts;
- int i = 0;
+ u32 n_elts = f->n_elts;
u8 state = VNET_CRYPTO_FRAME_STATE_SUCCESS;
- if (cm->per_thread_data[vm->thread_index].self_crypto_enabled)
+ vec_reset_length (ptd->crypto_ops);
+ vec_reset_length (ptd->integ_ops);
+ vec_reset_length (ptd->chained_crypto_ops);
+ vec_reset_length (ptd->chained_integ_ops);
+ vec_reset_length (ptd->chunks);
+ fe = f->elts;
+ bi = f->buffer_indices;
+
+ while (n_elts--)
+ {
+ if (n_elts > 1)
+ clib_prefetch_load (fe + 1);
+
+ crypto_sw_scheduler_convert_link_crypto (
+ vm, ptd, cm->keys + fe->key_index, fe, fe - f->elts, bi[0], crypto_op,
+ auth_op, digest_len, is_enc);
+ bi++;
+ fe++;
+ }
+
+ if (is_enc)
+ {
+ process_ops (vm, f, ptd->crypto_ops, &state);
+ process_chained_ops (vm, f, ptd->chained_crypto_ops, ptd->chunks,
+ &state);
+ process_ops (vm, f, ptd->integ_ops, &state);
+ process_chained_ops (vm, f, ptd->chained_integ_ops, ptd->chunks, &state);
+ }
+ else
{
- /* *INDENT-OFF* */
- vec_foreach_index (i, cm->per_thread_data)
- {
- ptd = cm->per_thread_data + i;
- q = ptd->queues[async_op_id];
- f = crypto_sw_scheduler_get_pending_frame (q);
- if (f)
- break;
- }
- /* *INDENT-ON* */
+ process_ops (vm, f, ptd->integ_ops, &state);
+ process_chained_ops (vm, f, ptd->chained_integ_ops, ptd->chunks, &state);
+ process_ops (vm, f, ptd->crypto_ops, &state);
+ process_chained_ops (vm, f, ptd->chained_crypto_ops, ptd->chunks,
+ &state);
}
- ptd = cm->per_thread_data + vm->thread_index;
+ f->state = state;
+}
- if (f)
+static_always_inline int
+convert_async_crypto_id (vnet_crypto_async_op_id_t async_op_id, u32 *crypto_op,
+ u32 *auth_op_or_aad_len, u16 *digest_len, u8 *is_enc)
+{
+ switch (async_op_id)
{
- vec_reset_length (ptd->crypto_ops);
- vec_reset_length (ptd->integ_ops);
- vec_reset_length (ptd->chained_crypto_ops);
- vec_reset_length (ptd->chained_integ_ops);
- vec_reset_length (ptd->chunks);
+#define _(n, s, k, t, a) \
+ case VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC: \
+ *crypto_op = VNET_CRYPTO_OP_##n##_ENC; \
+ *auth_op_or_aad_len = a; \
+ *digest_len = t; \
+ *is_enc = 1; \
+ return 1; \
+ case VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC: \
+ *crypto_op = VNET_CRYPTO_OP_##n##_DEC; \
+ *auth_op_or_aad_len = a; \
+ *digest_len = t; \
+ *is_enc = 0; \
+ return 1;
+ foreach_crypto_aead_async_alg
+#undef _
- *nb_elts_processed = n_elts = f->n_elts;
- fe = f->elts;
- bi = f->buffer_indices;
+#define _(c, h, s, k, d) \
+ case VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC: \
+ *crypto_op = VNET_CRYPTO_OP_##c##_ENC; \
+ *auth_op_or_aad_len = VNET_CRYPTO_OP_##h##_HMAC; \
+ *digest_len = d; \
+ *is_enc = 1; \
+ return 0; \
+ case VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC: \
+ *crypto_op = VNET_CRYPTO_OP_##c##_DEC; \
+ *auth_op_or_aad_len = VNET_CRYPTO_OP_##h##_HMAC; \
+ *digest_len = d; \
+ *is_enc = 0; \
+ return 0;
+ foreach_crypto_link_async_alg
+#undef _
- while (n_elts--)
- {
- if (n_elts > 1)
- clib_prefetch_load (fe + 1);
-
- crypto_sw_scheduler_convert_link_crypto (vm, ptd,
- cm->keys + fe->key_index,
- fe, fe - f->elts, bi[0],
- sync_crypto_op_id,
- sync_integ_op_id,
- digest_len, is_enc);
- bi++;
- fe++;
- }
+ default : return -1;
+ }
- if (is_enc)
- {
- process_ops (vm, f, ptd->crypto_ops, &state);
- process_chained_ops (vm, f, ptd->chained_crypto_ops, ptd->chunks,
- &state);
- process_ops (vm, f, ptd->integ_ops, &state);
- process_chained_ops (vm, f, ptd->chained_integ_ops, ptd->chunks,
- &state);
- }
- else
+ return -1;
+}
+
+static_always_inline vnet_crypto_async_frame_t *
+crypto_sw_scheduler_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
+ u32 *enqueue_thread_idx)
+{
+ crypto_sw_scheduler_main_t *cm = &crypto_sw_scheduler_main;
+ crypto_sw_scheduler_per_thread_data_t *ptd =
+ cm->per_thread_data + vm->thread_index;
+ vnet_crypto_async_frame_t *f = 0;
+ crypto_sw_scheduler_queue_t *current_queue = 0;
+ u32 tail, head;
+ u8 found = 0;
+ u8 recheck_queues = 1;
+
+run_next_queues:
+ /* get a pending frame to process */
+ if (ptd->self_crypto_enabled)
+ {
+ u32 i = ptd->last_serve_lcore_id + 1;
+
+ while (1)
{
- process_ops (vm, f, ptd->integ_ops, &state);
- process_chained_ops (vm, f, ptd->chained_integ_ops, ptd->chunks,
- &state);
- process_ops (vm, f, ptd->crypto_ops, &state);
- process_chained_ops (vm, f, ptd->chained_crypto_ops, ptd->chunks,
- &state);
+ crypto_sw_scheduler_per_thread_data_t *st;
+ u32 j;
+
+ if (i >= vec_len (cm->per_thread_data))
+ i = 0;
+
+ st = cm->per_thread_data + i;
+
+ if (ptd->last_serve_encrypt)
+ current_queue = &st->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_DECRYPT];
+ else
+ current_queue = &st->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_ENCRYPT];
+
+ tail = current_queue->tail;
+ head = current_queue->head;
+
+ /* Skip this queue unless tail < head or head has overflowed
+ * and tail has not. At the point where tail overflows (== 0),
+ * the largest possible value of head is (queue size - 1).
+ * Prior to that, the largest possible value of head is
+ * (queue size - 2).
+ */
+ if ((tail > head) && (head >= CRYPTO_SW_SCHEDULER_QUEUE_MASK))
+ goto skip_queue;
+
+ for (j = tail; j != head; j++)
+ {
+
+ f = current_queue->jobs[j & CRYPTO_SW_SCHEDULER_QUEUE_MASK];
+
+ if (!f)
+ continue;
+
+ if (clib_atomic_bool_cmp_and_swap (
+ &f->state, VNET_CRYPTO_FRAME_STATE_PENDING,
+ VNET_CRYPTO_FRAME_STATE_WORK_IN_PROGRESS))
+ {
+ found = 1;
+ break;
+ }
+ }
+
+ skip_queue:
+ if (found || i == ptd->last_serve_lcore_id)
+ {
+ CLIB_MEMORY_STORE_BARRIER ();
+ ptd->last_serve_encrypt = !ptd->last_serve_encrypt;
+ break;
+ }
+
+ i++;
}
- f->state = state;
+ ptd->last_serve_lcore_id = i;
+ }
+
+ if (found)
+ {
+ u32 crypto_op, auth_op_or_aad_len;
+ u16 digest_len;
+ u8 is_enc;
+ int ret;
+
+ ret = convert_async_crypto_id (f->op, &crypto_op, &auth_op_or_aad_len,
+ &digest_len, &is_enc);
+
+ if (ret == 1)
+ crypto_sw_scheduler_process_aead (vm, ptd, f, crypto_op,
+ auth_op_or_aad_len, digest_len);
+ else if (ret == 0)
+ crypto_sw_scheduler_process_link (
+ vm, cm, ptd, f, crypto_op, auth_op_or_aad_len, digest_len, is_enc);
+
*enqueue_thread_idx = f->enqueue_thread_index;
+ *nb_elts_processed = f->n_elts;
+ }
+
+ if (ptd->last_return_queue)
+ {
+ current_queue = &ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_DECRYPT];
+ ptd->last_return_queue = 0;
+ }
+ else
+ {
+ current_queue = &ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_ENCRYPT];
+ ptd->last_return_queue = 1;
}
- return crypto_sw_scheduler_get_completed_frame (ptd->queues[async_op_id]);
+ tail = current_queue->tail & CRYPTO_SW_SCHEDULER_QUEUE_MASK;
+
+ if (current_queue->jobs[tail] &&
+ current_queue->jobs[tail]->state >= VNET_CRYPTO_FRAME_STATE_SUCCESS)
+ {
+
+ CLIB_MEMORY_STORE_BARRIER ();
+ current_queue->tail++;
+ f = current_queue->jobs[tail];
+ current_queue->jobs[tail] = 0;
+
+ return f;
+ }
+
+ if (!found && recheck_queues)
+ {
+ recheck_queues = 0;
+ goto run_next_queues;
+ }
+ return 0;
}
static clib_error_t *
@@ -533,14 +632,12 @@ sw_scheduler_set_worker_crypto (vlib_main_t * vm, unformat_input_t * input,
* @cliexstart{set sw_scheduler worker 0 crypto off}
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_set_sw_scheduler_worker_crypto, static) = {
.path = "set sw_scheduler",
.short_help = "set sw_scheduler worker <idx> crypto <on|off>",
.function = sw_scheduler_set_worker_crypto,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
sw_scheduler_show_workers (vlib_main_t * vm, unformat_input_t * input,
@@ -569,14 +666,12 @@ sw_scheduler_show_workers (vlib_main_t * vm, unformat_input_t * input,
* @cliexstart{show sw_scheduler workers}
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_show_sw_scheduler_workers, static) = {
.path = "show sw_scheduler workers",
.short_help = "show sw_scheduler workers",
.function = sw_scheduler_show_workers,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
clib_error_t *
sw_scheduler_cli_init (vlib_main_t * vm)
@@ -586,50 +681,6 @@ sw_scheduler_cli_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (sw_scheduler_cli_init);
-/* *INDENT-OFF* */
-#define _(n, s, k, t, a) \
- static vnet_crypto_async_frame_t \
- *crypto_sw_scheduler_frame_dequeue_##n##_TAG_##t##_AAD_##a##_enc ( \
- vlib_main_t *vm, u32 *nb_elts_processed, u32 * thread_idx) \
- { \
- return crypto_sw_scheduler_dequeue_aead ( \
- vm, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC, \
- VNET_CRYPTO_OP_##n##_ENC, t, a, nb_elts_processed, thread_idx); \
- } \
- static vnet_crypto_async_frame_t \
- *crypto_sw_scheduler_frame_dequeue_##n##_TAG_##t##_AAD_##a##_dec ( \
- vlib_main_t *vm, u32 *nb_elts_processed, u32 * thread_idx) \
- { \
- return crypto_sw_scheduler_dequeue_aead ( \
- vm, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC, \
- VNET_CRYPTO_OP_##n##_DEC, t, a, nb_elts_processed, thread_idx); \
- }
-foreach_crypto_aead_async_alg
-#undef _
-
-#define _(c, h, s, k, d) \
- static vnet_crypto_async_frame_t \
- *crypto_sw_scheduler_frame_dequeue_##c##_##h##_TAG##d##_enc ( \
- vlib_main_t *vm, u32 *nb_elts_processed, u32 * thread_idx) \
- { \
- return crypto_sw_scheduler_dequeue_link ( \
- vm, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC, \
- VNET_CRYPTO_OP_##c##_ENC, VNET_CRYPTO_OP_##h##_HMAC, d, 1, \
- nb_elts_processed, thread_idx); \
- } \
- static vnet_crypto_async_frame_t \
- *crypto_sw_scheduler_frame_dequeue_##c##_##h##_TAG##d##_dec ( \
- vlib_main_t *vm, u32 *nb_elts_processed, u32 * thread_idx) \
- { \
- return crypto_sw_scheduler_dequeue_link ( \
- vm, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC, \
- VNET_CRYPTO_OP_##c##_DEC, VNET_CRYPTO_OP_##h##_HMAC, d, 0, \
- nb_elts_processed, thread_idx); \
- }
- foreach_crypto_link_async_alg
-#undef _
- /* *INDENT-ON* */
-
crypto_sw_scheduler_main_t crypto_sw_scheduler_main;
clib_error_t *
crypto_sw_scheduler_init (vlib_main_t * vm)
@@ -638,26 +689,33 @@ crypto_sw_scheduler_init (vlib_main_t * vm)
vlib_thread_main_t *tm = vlib_get_thread_main ();
clib_error_t *error = 0;
crypto_sw_scheduler_per_thread_data_t *ptd;
-
- u32 queue_size = CRYPTO_SW_SCHEDULER_QUEUE_SIZE * sizeof (void *)
- + sizeof (crypto_sw_scheduler_queue_t);
+ u32 i;
vec_validate_aligned (cm->per_thread_data, tm->n_vlib_mains - 1,
CLIB_CACHE_LINE_BYTES);
- vec_foreach (ptd, cm->per_thread_data)
- {
- ptd->self_crypto_enabled = 1;
- u32 i;
- for (i = 0; i < VNET_CRYPTO_ASYNC_OP_N_IDS; i++)
- {
- crypto_sw_scheduler_queue_t *q
- = clib_mem_alloc_aligned (queue_size, CLIB_CACHE_LINE_BYTES);
- ASSERT (q != 0);
- ptd->queues[i] = q;
- clib_memset_u8 (q, 0, queue_size);
- }
- }
+ for (i = 0; i < tm->n_vlib_mains; i++)
+ {
+ ptd = cm->per_thread_data + i;
+ ptd->self_crypto_enabled = i > 0 || vlib_num_workers () < 1;
+
+ ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_DECRYPT].head = 0;
+ ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_DECRYPT].tail = 0;
+
+ vec_validate_aligned (
+ ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_DECRYPT].jobs,
+ CRYPTO_SW_SCHEDULER_QUEUE_SIZE - 1, CLIB_CACHE_LINE_BYTES);
+
+ ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_ENCRYPT].head = 0;
+ ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_ENCRYPT].tail = 0;
+
+ ptd->last_serve_encrypt = 0;
+ ptd->last_return_queue = 0;
+
+ vec_validate_aligned (
+ ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_ENCRYPT].jobs,
+ CRYPTO_SW_SCHEDULER_QUEUE_SIZE - 1, CLIB_CACHE_LINE_BYTES);
+ }
cm->crypto_engine_index =
vnet_crypto_register_engine (vm, "sw_scheduler", 100,
@@ -668,33 +726,28 @@ crypto_sw_scheduler_init (vlib_main_t * vm)
crypto_sw_scheduler_api_init (vm);
- /* *INDENT-OFF* */
#define _(n, s, k, t, a) \
- vnet_crypto_register_async_handler ( \
- vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC, \
- crypto_sw_scheduler_frame_enqueue, \
- crypto_sw_scheduler_frame_dequeue_##n##_TAG_##t##_AAD_##a##_enc); \
- vnet_crypto_register_async_handler ( \
- vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC, \
- crypto_sw_scheduler_frame_enqueue, \
- crypto_sw_scheduler_frame_dequeue_##n##_TAG_##t##_AAD_##a##_dec);
+ vnet_crypto_register_enqueue_handler ( \
+ vm, cm->crypto_engine_index, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC, \
+ crypto_sw_scheduler_frame_enqueue_encrypt); \
+ vnet_crypto_register_enqueue_handler ( \
+ vm, cm->crypto_engine_index, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC, \
+ crypto_sw_scheduler_frame_enqueue_decrypt);
foreach_crypto_aead_async_alg
#undef _
#define _(c, h, s, k, d) \
- vnet_crypto_register_async_handler ( \
- vm, cm->crypto_engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC, \
- crypto_sw_scheduler_frame_enqueue, \
- crypto_sw_scheduler_frame_dequeue_##c##_##h##_TAG##d##_enc); \
- vnet_crypto_register_async_handler ( \
- vm, cm->crypto_engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC, \
- crypto_sw_scheduler_frame_enqueue, \
- crypto_sw_scheduler_frame_dequeue_##c##_##h##_TAG##d##_dec);
- foreach_crypto_link_async_alg
+ vnet_crypto_register_enqueue_handler ( \
+ vm, cm->crypto_engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC, \
+ crypto_sw_scheduler_frame_enqueue_encrypt); \
+ vnet_crypto_register_enqueue_handler ( \
+ vm, cm->crypto_engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC, \
+ crypto_sw_scheduler_frame_enqueue_decrypt);
+ foreach_crypto_link_async_alg
#undef _
- /* *INDENT-ON* */
+
+ vnet_crypto_register_dequeue_handler (vm, cm->crypto_engine_index,
+ crypto_sw_scheduler_dequeue);
if (error)
vec_free (cm->per_thread_data);
@@ -702,7 +755,6 @@ crypto_sw_scheduler_init (vlib_main_t * vm)
return error;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (crypto_sw_scheduler_init) = {
.runs_after = VLIB_INITS ("vnet_crypto_init"),
};
@@ -711,7 +763,6 @@ VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "SW Scheduler Crypto Async Engine plugin",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ct6/ct6.c b/src/plugins/ct6/ct6.c
index 205cd3f50ef..e5c69be2c9d 100644
--- a/src/plugins/ct6/ct6.c
+++ b/src/plugins/ct6/ct6.c
@@ -153,7 +153,6 @@ set_ct6_enable_disable_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ct6_command, static) =
{
.path = "set ct6",
@@ -161,7 +160,6 @@ VLIB_CLI_COMMAND (set_ct6_command, static) =
"set ct6 [inside|outside] <interface-name> [disable]",
.function = set_ct6_enable_disable_command_fn,
};
-/* *INDENT-ON* */
/* API message handler */
static void vl_api_ct6_enable_disable_t_handler
@@ -216,30 +214,24 @@ ct6_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (ct6_init);
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ct6out2in, static) =
{
.arc_name = "ip6-unicast",
.node_name = "ct6-out2in",
.runs_before = VNET_FEATURES ("ip6-lookup"),
};
-/* *INDENT-ON */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ct6in2out, static) = {
.arc_name = "interface-output",
.node_name = "ct6-in2out",
.runs_before = VNET_FEATURES ("interface-output-arc-end"),
};
-/* *INDENT-ON */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "IPv6 Connection Tracker",
};
-/* *INDENT-ON* */
u8 *
format_ct6_session (u8 * s, va_list * args)
@@ -320,26 +312,22 @@ show_ct6_command_fn_command_fn (vlib_main_t * vm,
format (s, "%U", format_ct6_session, cmp,
0 /* pool */ , 0 /* header */ , verbose);
- /* *INDENT-OFF* */
pool_foreach (s0, cmp->sessions[i])
{
s = format (s, "%U", format_ct6_session, cmp, i, s0, verbose);
}
- /* *INDENT-ON* */
}
vlib_cli_output (cmp->vlib_main, "%v", s);
vec_free (s);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ct6_command_fn_command, static) =
{
.path = "show ip6 connection-tracker",
.short_help = "show ip6 connection-tracker",
.function = show_ct6_command_fn_command_fn,
};
-/* *INDENT-ON* */
static void
increment_v6_address (ip6_address_t * a)
@@ -429,12 +417,10 @@ test_ct6_command_fn_command_fn (vlib_main_t * vm,
created = 0;
}
- /* *INDENT-OFF* */
pool_foreach (s0, cmp->sessions[0])
{
s = format (s, "%U", format_ct6_session, cmp, 0, s0, 1 /* verbose */);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "\nEnd state: first index %d last index %d\n%v",
cmp->first_index[0], cmp->last_index[0], s);
@@ -449,12 +435,10 @@ test_ct6_command_fn_command_fn (vlib_main_t * vm,
ct6_update_session_hit (cmp, s0, 234.0);
- /* *INDENT-OFF* */
pool_foreach (s0, cmp->sessions[0])
{
s = format (s, "%U", format_ct6_session, cmp, 0, s0, 1 /* verbose */);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "\nEnd state: first index %d last index %d\n%v",
cmp->first_index[0], cmp->last_index[0], s);
@@ -464,14 +448,12 @@ test_ct6_command_fn_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_ct6_command_fn_command, static) =
{
.path = "test ip6 connection-tracker",
.short_help = "test ip6 connection-tracker",
.function = test_ct6_command_fn_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ct6_config (vlib_main_t * vm, unformat_input_t * input)
diff --git a/src/plugins/ct6/ct6.h b/src/plugins/ct6/ct6.h
index 534534f5c99..0b7deb07839 100644
--- a/src/plugins/ct6/ct6.h
+++ b/src/plugins/ct6/ct6.h
@@ -26,7 +26,6 @@
#include <vppinfra/hash.h>
#include <vppinfra/error.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
union
@@ -43,7 +42,6 @@ typedef CLIB_PACKED (struct
u64 as_u64[6];
};
}) ct6_session_key_t;
-/* *INDENT-ON* */
typedef struct
{
diff --git a/src/plugins/ct6/ct6_in2out.c b/src/plugins/ct6/ct6_in2out.c
index b8bda18370c..c5d26c8caa7 100644
--- a/src/plugins/ct6/ct6_in2out.c
+++ b/src/plugins/ct6/ct6_in2out.c
@@ -344,7 +344,6 @@ VLIB_NODE_FN (ct6_in2out_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return ct6_in2out_inline (vm, node, frame, 0 /* is_trace */ );
}
-/* *INDENT-OFF* */
#ifndef CLIB_MARCH_VARIANT
VLIB_REGISTER_NODE (ct6_in2out_node) =
{
@@ -365,7 +364,6 @@ VLIB_REGISTER_NODE (ct6_in2out_node) =
.unformat_buffer = unformat_ethernet_header,
};
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ct6/ct6_out2in.c b/src/plugins/ct6/ct6_out2in.c
index ebb6da56134..a94ae38f0c5 100644
--- a/src/plugins/ct6/ct6_out2in.c
+++ b/src/plugins/ct6/ct6_out2in.c
@@ -246,7 +246,6 @@ VLIB_NODE_FN (ct6_out2in_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return ct6_out2in_inline (vm, node, frame, 0 /* is_trace */ );
}
-/* *INDENT-OFF* */
#ifndef CLIB_MARCH_VARIANT
VLIB_REGISTER_NODE (ct6_out2in_node) =
{
@@ -266,7 +265,6 @@ VLIB_REGISTER_NODE (ct6_out2in_node) =
},
};
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dev_ena/CMakeLists.txt b/src/plugins/dev_ena/CMakeLists.txt
new file mode 100644
index 00000000000..d9224d6fd9b
--- /dev/null
+++ b/src/plugins/dev_ena/CMakeLists.txt
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright(c) 2022 Cisco Systems, Inc.
+
+add_vpp_plugin(dev_ena
+ SOURCES
+ aq.c
+ aenq.c
+ ena.c
+ format.c
+ format_aq.c
+ port.c
+ queue.c
+ rx_node.c
+ tx_node.c
+ reg.c
+
+ MULTIARCH_SOURCES
+ rx_node.c
+ tx_node.c
+)
+
diff --git a/src/plugins/dev_ena/aenq.c b/src/plugins/dev_ena/aenq.c
new file mode 100644
index 00000000000..64be3c4af3a
--- /dev/null
+++ b/src/plugins/dev_ena/aenq.c
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+
+#define ENA_AENQ_POLL_INTERVAL 0.2
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+ .class_name = "ena",
+ .subclass_name = "aenq",
+};
+
+void
+ena_aenq_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+
+ log_debug (dev, "");
+
+ ASSERT (ed->aenq_started == 0);
+
+ vnet_dev_dma_mem_free (vm, dev, ed->aenq.entries);
+ ed->aenq.entries = 0;
+ ed->aenq.depth = 0;
+}
+
+vnet_dev_rv_t
+ena_aenq_olloc (vlib_main_t *vm, vnet_dev_t *dev, u16 depth)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u32 alloc_sz = sizeof (ena_aenq_entry_t) * depth;
+ vnet_dev_rv_t rv;
+
+ log_debug (dev, "");
+
+ ASSERT (ed->aenq.entries == 0);
+
+ if ((rv = vnet_dev_dma_mem_alloc (vm, dev, alloc_sz, 0,
+ (void **) &ed->aenq.entries)))
+ goto err;
+
+ ed->aenq.depth = depth;
+
+ return VNET_DEV_OK;
+err:
+ ena_aenq_free (vm, dev);
+ return rv;
+}
+
+static ena_aenq_entry_t *
+ena_get_next_aenq_entry (vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u16 index = ed->aenq.head & pow2_mask (ENA_ASYNC_QUEUE_LOG2_DEPTH);
+ u16 phase = 1 & (ed->aenq.head >> ENA_ASYNC_QUEUE_LOG2_DEPTH);
+ ena_aenq_entry_t *e = ed->aenq.entries + index;
+
+ if (e->phase != phase)
+ return 0;
+
+ ed->aenq.head++;
+
+ return e;
+}
+
+static void
+ena_aenq_poll (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_aenq_entry_t *ae;
+
+ while ((ae = ena_get_next_aenq_entry (dev)))
+ {
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ vnet_dev_port_state_changes_t changes = {};
+
+ log_debug (dev, "aenq: group %u syndrome %u phase %u timestamp %lu",
+ ae->group, ae->syndrome, ae->phase, ae->timestamp);
+
+ switch (ae->group)
+ {
+ case ENA_AENQ_GROUP_LINK_CHANGE:
+ log_debug (dev, "link_change: status %u",
+ ae->link_change.link_status);
+ changes.link_state = 1;
+ changes.change.link_state = 1;
+ foreach_vnet_dev_port (p, dev)
+ vnet_dev_port_state_change (vm, p, changes);
+ break;
+
+ case ENA_AENQ_GROUP_NOTIFICATION:
+ log_warn (dev, "unhandled AENQ notification received [syndrome %u]",
+ ae->syndrome);
+ break;
+
+ case ENA_AENQ_GROUP_KEEP_ALIVE:
+ if (ae->keep_alive.rx_drops || ae->keep_alive.tx_drops)
+ log_debug (dev, "keep_alive: rx_drops %lu tx_drops %lu",
+ ae->keep_alive.rx_drops, ae->keep_alive.tx_drops);
+ ed->aenq.rx_drops = ae->keep_alive.rx_drops - ed->aenq.rx_drops0;
+ ed->aenq.tx_drops = ae->keep_alive.tx_drops - ed->aenq.tx_drops0;
+ ed->aenq.last_keepalive = vlib_time_now (vm);
+ break;
+
+ default:
+ log_debug (dev, "unknown aenq entry (group %u) %U", ae->group,
+ format_hexdump, ae, sizeof (*ae));
+ };
+ }
+}
+
+vnet_dev_rv_t
+ena_aenq_start (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u16 depth = ed->aenq.depth;
+ u32 alloc_sz = sizeof (ena_aenq_entry_t) * depth;
+
+ ASSERT (ed->aenq_started == 0);
+ ASSERT (ed->aq_started == 1);
+
+ ena_reg_aenq_caps_t aenq_caps = {
+ .depth = depth,
+ .entry_size = sizeof (ena_aenq_entry_t),
+ };
+
+ if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_AENQ_CONFIG))
+ {
+ ena_aq_feat_aenq_config_t aenq;
+ vnet_dev_rv_t rv;
+
+ if ((rv = ena_aq_get_feature (vm, dev, ENA_ADMIN_FEAT_ID_AENQ_CONFIG,
+ &aenq)))
+ {
+ log_err (dev, "aenq_start: get_Feature(AENQ_CONFIG) failed");
+ return rv;
+ }
+
+ aenq.enabled_groups.link_change = 1;
+ aenq.enabled_groups.fatal_error = 1;
+ aenq.enabled_groups.warning = 1;
+ aenq.enabled_groups.notification = 1;
+ aenq.enabled_groups.keep_alive = 1;
+ aenq.enabled_groups.as_u32 &= aenq.supported_groups.as_u32;
+ aenq.supported_groups.as_u32 = 0;
+
+ if ((rv = ena_aq_set_feature (vm, dev, ENA_ADMIN_FEAT_ID_AENQ_CONFIG,
+ &aenq)))
+ {
+ log_err (dev, "aenq_start: set_Feature(AENQ_CONFIG) failed");
+ return rv;
+ }
+ }
+
+ clib_memset (ed->aenq.entries, 0, alloc_sz);
+ ed->aenq.head = depth;
+
+ ena_reg_set_dma_addr (vm, dev, ENA_REG_AENQ_BASE_LO, ENA_REG_AENQ_BASE_HI,
+ ed->aenq.entries);
+
+ ena_reg_write (dev, ENA_REG_AENQ_CAPS, &aenq_caps);
+ ena_reg_write (dev, ENA_REG_AENQ_HEAD_DB, &(u32){ depth });
+
+ ed->aenq_started = 1;
+
+ vnet_dev_poll_dev_add (vm, dev, ENA_AENQ_POLL_INTERVAL, ena_aenq_poll);
+
+ return VNET_DEV_OK;
+}
+
+void
+ena_aenq_stop (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ if (ed->aenq_started == 1)
+ {
+ ena_reg_aenq_caps_t aenq_caps = {};
+ vnet_dev_poll_dev_remove (vm, dev, ena_aenq_poll);
+ ena_reg_write (dev, ENA_REG_AENQ_CAPS, &aenq_caps);
+ ed->aenq_started = 0;
+ }
+}
diff --git a/src/plugins/dev_ena/aq.c b/src/plugins/dev_ena/aq.c
new file mode 100644
index 00000000000..290d5bd52c6
--- /dev/null
+++ b/src/plugins/dev_ena/aq.c
@@ -0,0 +1,359 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+#include <vnet/ethernet/ethernet.h>
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+ .class_name = "ena",
+ .subclass_name = "admin",
+};
+
+VLIB_REGISTER_LOG_CLASS (ena_stats_log, static) = {
+ .class_name = "ena",
+ .subclass_name = "admin-stats",
+};
+
+ena_aq_feat_info_t feat_info[] = {
+#define _(v, ver, gt, st, n, s) \
+ [v] = { .name = #n, \
+ .version = (ver), \
+ .data_sz = sizeof (s), \
+ .get = (gt), \
+ .set = (st) },
+ foreach_ena_aq_feature_id
+#undef _
+};
+
+ena_aq_feat_info_t *
+ena_aq_get_feat_info (ena_aq_feature_id_t id)
+{
+ if (id >= ARRAY_LEN (feat_info) || feat_info[id].data_sz == 0)
+ return 0;
+
+ return feat_info + id;
+}
+
+void
+ena_aq_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ vnet_dev_dma_mem_free (vm, dev, ed->aq.cq_entries);
+ vnet_dev_dma_mem_free (vm, dev, ed->aq.sq_entries);
+ ed->aq.depth = 0;
+}
+
+vnet_dev_rv_t
+ena_aq_olloc (vlib_main_t *vm, vnet_dev_t *dev, u16 depth)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ vnet_dev_dma_mem_free (vm, dev, ed->aq.cq_entries);
+ vnet_dev_dma_mem_free (vm, dev, ed->aq.sq_entries);
+ u32 sq_alloc_sz = sizeof (ena_aq_sq_entry_t) * depth;
+ u32 cq_alloc_sz = sizeof (ena_aq_cq_entry_t) * depth;
+ vnet_dev_rv_t rv;
+
+ ASSERT (ed->aq.sq_entries == 0);
+ ASSERT (ed->aq.cq_entries == 0);
+
+ rv = vnet_dev_dma_mem_alloc (vm, dev, sq_alloc_sz, 0,
+ (void **) &ed->aq.sq_entries);
+ if (rv != VNET_DEV_OK)
+ goto err;
+
+ rv = vnet_dev_dma_mem_alloc (vm, dev, cq_alloc_sz, 0,
+ (void **) &ed->aq.cq_entries);
+ if (rv != VNET_DEV_OK)
+ goto err;
+
+ ed->aq.depth = depth;
+
+ return VNET_DEV_OK;
+err:
+ ena_aq_free (vm, dev);
+ return rv;
+}
+
+vnet_dev_rv_t
+ena_aq_start (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u16 depth = ed->aq.depth;
+ u32 sq_alloc_sz = sizeof (ena_aq_sq_entry_t) * depth;
+ u32 cq_alloc_sz = sizeof (ena_aq_cq_entry_t) * depth;
+
+ ASSERT (ed->aq_started == 0);
+
+ ena_reg_aq_caps_t aq_caps = {
+ .depth = depth,
+ .entry_size = sizeof (ena_aq_sq_entry_t),
+ };
+
+ ena_reg_acq_caps_t acq_caps = {
+ .depth = depth,
+ .entry_size = sizeof (ena_aq_cq_entry_t),
+ };
+
+ clib_memset (ed->aq.sq_entries, 0, sq_alloc_sz);
+ clib_memset (ed->aq.cq_entries, 0, cq_alloc_sz);
+
+ ed->aq.sq_next = 0;
+ ed->aq.cq_head = 0;
+
+ ena_reg_set_dma_addr (vm, dev, ENA_REG_AQ_BASE_LO, ENA_REG_AQ_BASE_HI,
+ ed->aq.sq_entries);
+ ena_reg_set_dma_addr (vm, dev, ENA_REG_ACQ_BASE_LO, ENA_REG_ACQ_BASE_HI,
+ ed->aq.cq_entries);
+
+ ena_reg_write (dev, ENA_REG_AQ_CAPS, &aq_caps);
+ ena_reg_write (dev, ENA_REG_ACQ_CAPS, &acq_caps);
+
+ ed->aq_started = 1;
+
+ return VNET_DEV_OK;
+}
+
+void
+ena_aq_stop (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ ena_reg_aq_caps_t aq_caps = {};
+ ena_reg_acq_caps_t acq_caps = {};
+
+ if (ed->aq_started)
+ {
+ ena_reg_write (dev, ENA_REG_AQ_CAPS, &aq_caps);
+ ena_reg_write (dev, ENA_REG_ACQ_CAPS, &acq_caps);
+ ed->aq_started = 0;
+ }
+}
+vnet_dev_rv_t
+ena_aq_req (vlib_main_t *vm, vnet_dev_t *dev, ena_aq_opcode_t opcode,
+ void *sqe_data, u8 sqe_data_sz, void *cqe_data, u8 cqe_data_sz)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u32 next = ed->aq.sq_next++;
+ u32 index = next & pow2_mask (ENA_ADMIN_QUEUE_LOG2_DEPTH);
+ u8 phase = 1 & (~(next >> ENA_ADMIN_QUEUE_LOG2_DEPTH));
+ ena_aq_sq_entry_t *sqe = ed->aq.sq_entries + index;
+ ena_aq_cq_entry_t *cqe = ed->aq.cq_entries + index;
+ f64 suspend_time = 1e-6;
+
+ clib_memcpy_fast (&sqe->data, sqe_data, sqe_data_sz);
+ sqe->opcode = opcode;
+ sqe->command_id = index;
+ sqe->phase = phase;
+
+ ena_reg_write (dev, ENA_REG_AQ_DB, &ed->aq.sq_next);
+
+ while (cqe->phase != phase)
+ {
+ vlib_process_suspend (vm, suspend_time);
+ suspend_time *= 2;
+ if (suspend_time > 1e-3)
+ {
+ log_err (dev, "admin queue timeout (opcode %U)",
+ format_ena_aq_opcode, opcode);
+ return VNET_DEV_ERR_TIMEOUT;
+ }
+ }
+
+ if (cqe->status != ENA_ADMIN_COMPL_STATUS_SUCCESS)
+ {
+ log_err (dev,
+ "cqe[%u]: opcode %U status %U ext_status %u sq_head_idx %u",
+ cqe - ed->aq.cq_entries, format_ena_aq_opcode, opcode,
+ format_ena_aq_status, cqe->status, cqe->extended_status,
+ cqe->sq_head_indx);
+ return VNET_DEV_ERR_DEVICE_NO_REPLY;
+ }
+
+ log_debug (dev, "cqe: status %u ext_status %u sq_head_idx %u", cqe->status,
+ cqe->extended_status, cqe->sq_head_indx);
+
+ if (cqe_data && cqe_data_sz)
+ clib_memcpy_fast (cqe_data, &cqe->data, cqe_data_sz);
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+ena_aq_set_feature (vlib_main_t *vm, vnet_dev_t *dev,
+ ena_aq_feature_id_t feat_id, void *data)
+{
+ vnet_dev_rv_t rv;
+
+ struct
+ {
+ ena_aq_aq_ctrl_buff_info_t control_buffer;
+ ena_aq_get_set_feature_common_desc_t feat_common;
+ u32 data[11];
+ } fd = {
+ .feat_common.feature_id = feat_id,
+ .feat_common.feature_version = feat_info[feat_id].version,
+ };
+
+ log_debug (dev, "set_feature(%s):\n %U", feat_info[feat_id].name,
+ format_ena_aq_feat_desc, feat_id, data);
+
+ ASSERT (feat_info[feat_id].data_sz > 1);
+ clib_memcpy (&fd.data, data, feat_info[feat_id].data_sz);
+
+ rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_SET_FEATURE, &fd, sizeof (fd), 0, 0);
+
+ if (rv != VNET_DEV_OK)
+ log_err (dev, "get_feature(%U) failed", format_ena_aq_feat_name, feat_id);
+
+ return rv;
+}
+
+vnet_dev_rv_t
+ena_aq_get_feature (vlib_main_t *vm, vnet_dev_t *dev,
+ ena_aq_feature_id_t feat_id, void *data)
+{
+ vnet_dev_rv_t rv;
+
+ struct
+ {
+ ena_aq_aq_ctrl_buff_info_t control_buffer;
+ ena_aq_get_set_feature_common_desc_t feat_common;
+ u32 data[11];
+ } fd = {
+ .feat_common.feature_id = feat_id,
+ .feat_common.feature_version = feat_info[feat_id].version,
+ };
+
+ rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_GET_FEATURE, &fd, sizeof (fd), data,
+ feat_info[feat_id].data_sz);
+
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (dev, "get_feature(%U) failed", format_ena_aq_feat_name,
+ feat_id);
+ return rv;
+ }
+
+ ASSERT (feat_info[feat_id].data_sz > 1);
+
+ log_debug (dev, "get_feature(%s):\n %U", feat_info[feat_id].name,
+ format_ena_aq_feat_desc, feat_id, data);
+
+ return 0;
+}
+
+vnet_dev_rv_t
+ena_aq_create_sq (vlib_main_t *vm, vnet_dev_t *dev,
+ ena_aq_create_sq_cmd_t *cmd, ena_aq_create_sq_resp_t *resp)
+{
+ vnet_dev_rv_t rv;
+
+ log_debug (dev, "create_sq_cmd_req:\n %U", format_ena_aq_create_sq_cmd,
+ cmd);
+
+ rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_CREATE_SQ, cmd, sizeof (*cmd), resp,
+ sizeof (*resp));
+
+ if (rv != VNET_DEV_OK)
+ log_debug (dev, "create_sq_cmd_resp:\n %U", format_ena_aq_create_sq_resp,
+ resp);
+ return rv;
+}
+
+vnet_dev_rv_t
+ena_aq_create_cq (vlib_main_t *vm, vnet_dev_t *dev,
+ ena_aq_create_cq_cmd_t *cmd, ena_aq_create_cq_resp_t *resp)
+{
+ vnet_dev_rv_t rv;
+
+ log_debug (dev, "create_cq_cmd_req:\n %U", format_ena_aq_create_cq_cmd,
+ cmd);
+
+ rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_CREATE_CQ, cmd, sizeof (*cmd), resp,
+ sizeof (*resp));
+
+ if (rv != VNET_DEV_OK)
+ log_debug (dev, "create_cq_cmd_resp:\n %U", format_ena_aq_create_cq_resp,
+ resp);
+
+ return rv;
+}
+
+vnet_dev_rv_t
+ena_aq_destroy_sq (vlib_main_t *vm, vnet_dev_t *dev,
+ ena_aq_destroy_sq_cmd_t *cmd)
+{
+ log_debug (dev, "destroy_sq_cmd_req:\n %U", format_ena_aq_destroy_sq_cmd,
+ cmd);
+
+ return ena_aq_req (vm, dev, ENA_AQ_OPCODE_DESTROY_SQ, cmd, sizeof (*cmd), 0,
+ 0);
+}
+
+vnet_dev_rv_t
+ena_aq_destroy_cq (vlib_main_t *vm, vnet_dev_t *dev,
+ ena_aq_destroy_cq_cmd_t *cmd)
+{
+ log_debug (dev, "destroy_cq_cmd_req:\n %U", format_ena_aq_destroy_cq_cmd,
+ cmd);
+
+ return ena_aq_req (vm, dev, ENA_AQ_OPCODE_DESTROY_CQ, cmd, sizeof (*cmd), 0,
+ 0);
+}
+
+vnet_dev_rv_t
+ena_aq_get_stats (vlib_main_t *vm, vnet_dev_t *dev, ena_aq_stats_type_t type,
+ ena_aq_stats_scope_t scope, u16 queue_idx, void *data)
+{
+ vnet_dev_rv_t rv;
+ format_function_t *ff = 0;
+ u8 data_sz[] = {
+ [ENA_ADMIN_STATS_TYPE_BASIC] = sizeof (ena_aq_basic_stats_t),
+ [ENA_ADMIN_STATS_TYPE_EXTENDED] = 0,
+ [ENA_ADMIN_STATS_TYPE_ENI] = sizeof (ena_aq_eni_stats_t),
+ };
+
+ char *type_str[] = {
+#define _(n, s) [n] = #s,
+ foreach_ena_aq_stats_type
+#undef _
+ };
+
+ char *scope_str[] = {
+#define _(n, s) [n] = #s,
+ foreach_ena_aq_stats_scope
+#undef _
+ };
+
+ ena_aq_get_stats_cmd_t cmd = {
+ .type = type,
+ .scope = scope,
+ .queue_idx = scope == ENA_ADMIN_STATS_SCOPE_SPECIFIC_QUEUE ? queue_idx : 0,
+ .device_id = 0xffff,
+ };
+
+ if ((rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_GET_STATS, &cmd, sizeof (cmd),
+ data, data_sz[type])))
+ {
+ ena_stats_log_err (dev, "get_stats(%s, %s) failed", type_str[type],
+ scope_str[scope]);
+ return rv;
+ }
+
+ if (type == ENA_ADMIN_STATS_TYPE_BASIC)
+ ff = format_ena_aq_basic_stats;
+ else if (type == ENA_ADMIN_STATS_TYPE_ENI)
+ ff = format_ena_aq_eni_stats;
+
+ if (ff)
+ ena_stats_log_debug (dev, "get_stats(%s, %s, %u):\n %U", type_str[type],
+ scope_str[scope], queue_idx, ff, data);
+ else
+ ena_stats_log_debug (dev, "get_stats(%s, %s, %u): unknown data",
+ type_str[type], scope_str[scope], queue_idx);
+
+ return VNET_DEV_OK;
+}
diff --git a/src/plugins/dev_ena/ena.c b/src/plugins/dev_ena/ena.c
new file mode 100644
index 00000000000..ead090839c7
--- /dev/null
+++ b/src/plugins/dev_ena/ena.c
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+static ena_aq_host_info_t host_info = {
+ .os_type = 3, /* DPDK */
+ .kernel_ver_str = VPP_BUILD_VER,
+ .os_dist_str = VPP_BUILD_VER,
+ .driver_version = {
+ .major = 16,
+ .minor = 0,
+ .sub_minor = 0,
+ },
+ .ena_spec_version = {
+ .major = 2,
+ .minor = 0,
+ },
+ .driver_supported_features = {
+ .rx_offset = 1,
+ .rss_configurable_function_key = 1,
+ }
+};
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+ .class_name = "ena",
+ .subclass_name = "init",
+};
+
+#define _(f, n, s, d) \
+ { .name = #n, .desc = d, .severity = VL_COUNTER_SEVERITY_##s },
+
+static vlib_error_desc_t ena_rx_node_counters[] = {
+ foreach_ena_rx_node_counter
+};
+static vlib_error_desc_t ena_tx_node_counters[] = {
+ foreach_ena_tx_node_counter
+};
+#undef _
+
+vnet_dev_node_t ena_rx_node = {
+ .error_counters = ena_rx_node_counters,
+ .n_error_counters = ARRAY_LEN (ena_rx_node_counters),
+ .format_trace = format_ena_rx_trace,
+};
+
+vnet_dev_node_t ena_tx_node = {
+ .error_counters = ena_tx_node_counters,
+ .n_error_counters = ARRAY_LEN (ena_tx_node_counters),
+};
+
+static void
+ena_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_aenq_stop (vm, dev);
+ ena_aq_stop (vm, dev);
+}
+
+static vnet_dev_rv_t
+ena_alloc (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ vnet_dev_rv_t rv;
+
+ if ((rv = vnet_dev_dma_mem_alloc (vm, dev, 4096, 4096,
+ (void **) &ed->host_info)))
+ return rv;
+
+ if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (ena_mmio_resp_t), 0,
+ (void **) &ed->mmio_resp)))
+ return rv;
+
+ if ((rv = ena_aq_olloc (vm, dev, ENA_ADMIN_QUEUE_DEPTH)))
+ return rv;
+
+ if ((rv = ena_aenq_olloc (vm, dev, ENA_ASYNC_QUEUE_DEPTH)))
+ return rv;
+
+ return VNET_DEV_OK;
+}
+
+static void
+ena_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+
+ ena_aenq_free (vm, dev);
+ ena_aq_free (vm, dev);
+
+ vnet_dev_dma_mem_free (vm, dev, ed->host_info);
+ vnet_dev_dma_mem_free (vm, dev, ed->mmio_resp);
+}
+
+static vnet_dev_rv_t
+ena_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ ena_aq_feat_host_attr_config_t host_attr = {};
+ vlib_pci_config_hdr_t pci_cfg_hdr;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ vnet_dev_port_add_args_t port = {
+ .port = {
+ .attr = {
+ .type = VNET_DEV_PORT_TYPE_ETHERNET,
+ },
+ .ops = {
+ .init = ena_port_init,
+ .start = ena_port_start,
+ .stop = ena_port_stop,
+ .config_change = ena_port_cfg_change,
+ .config_change_validate = ena_port_cfg_change_validate,
+ },
+ .data_size = sizeof (ena_port_t),
+ },
+ .rx_node = &ena_rx_node,
+ .tx_node = &ena_tx_node,
+ .rx_queue = {
+ .config = {
+ .data_size = sizeof (ena_rxq_t),
+ .default_size = 512,
+ .min_size = 32,
+ .size_is_power_of_two = 1,
+ },
+ .ops = {
+ .alloc = ena_rx_queue_alloc,
+ .start = ena_rx_queue_start,
+ .stop = ena_rx_queue_stop,
+ .free = ena_rx_queue_free,
+ },
+ },
+ .tx_queue = {
+ .config = {
+ .data_size = sizeof (ena_txq_t),
+ .default_size = 512,
+ .min_size = 32,
+ .size_is_power_of_two = 1,
+ },
+ .ops = {
+ .alloc = ena_tx_queue_alloc,
+ .start = ena_tx_queue_start,
+ .stop = ena_tx_queue_stop,
+ .free = ena_tx_queue_free,
+ },
+ },
+ };
+
+ if ((rv = vnet_dev_pci_read_config_header (vm, dev, &pci_cfg_hdr)))
+ goto err;
+
+ log_debug (dev, "revision_id 0x%x", pci_cfg_hdr.revision_id);
+
+ ed->readless = (pci_cfg_hdr.revision_id & 1) == 0;
+
+ if ((rv = vnet_dev_pci_map_region (vm, dev, 0, &ed->reg_bar)))
+ goto err;
+
+ if ((rv = ena_reg_reset (vm, dev, ENA_RESET_REASON_NORMAL)))
+ goto err;
+
+ if ((rv = ena_aq_start (vm, dev)))
+ goto err;
+
+ *ed->host_info = host_info;
+ ed->host_info->num_cpus = vlib_get_n_threads ();
+ ena_set_mem_addr (vm, dev, &host_attr.os_info_ba, ed->host_info);
+
+ if ((rv = ena_aq_set_feature (vm, dev, ENA_ADMIN_FEAT_ID_HOST_ATTR_CONFIG,
+ &host_attr)))
+ return rv;
+
+ if ((rv = ena_aq_get_feature (vm, dev, ENA_ADMIN_FEAT_ID_DEVICE_ATTRIBUTES,
+ &ed->dev_attr)))
+ return rv;
+
+ if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_MAX_QUEUES_EXT))
+ {
+ ena_aq_feat_max_queue_ext_t max_q_ext;
+ if ((rv = ena_aq_get_feature (vm, dev, ENA_ADMIN_FEAT_ID_MAX_QUEUES_EXT,
+ &max_q_ext)))
+ goto err;
+ port.port.attr.max_rx_queues =
+ clib_min (max_q_ext.max_rx_cq_num, max_q_ext.max_rx_sq_num);
+ port.port.attr.max_tx_queues =
+ clib_min (max_q_ext.max_tx_cq_num, max_q_ext.max_tx_sq_num);
+ port.rx_queue.config.max_size =
+ clib_min (max_q_ext.max_rx_cq_depth, max_q_ext.max_rx_sq_depth);
+ port.tx_queue.config.max_size =
+ clib_min (max_q_ext.max_tx_cq_depth, max_q_ext.max_tx_sq_depth);
+ }
+ else
+ {
+ log_err (dev, "device doesn't support MAX_QUEUES_EXT");
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE_VER;
+ }
+
+ if ((rv = ena_aenq_start (vm, dev)))
+ goto err;
+
+ port.port.attr.max_supported_rx_frame_size = ed->dev_attr.max_mtu;
+
+ if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_MTU))
+ port.port.attr.caps.change_max_rx_frame_size = 1;
+
+ vnet_dev_set_hw_addr_eth_mac (&port.port.attr.hw_addr,
+ ed->dev_attr.mac_addr);
+
+ return vnet_dev_port_add (vm, dev, 0, &port);
+
+err:
+ ena_free (vm, dev);
+ return rv;
+}
+
+static u8 *
+ena_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info)
+{
+ vnet_dev_bus_pci_device_info_t *di = dev_info;
+ const struct
+ {
+ u16 device_id;
+ char *description;
+ } ena_dev_types[] = {
+ { .device_id = 0x0ec2, .description = "Elastic Network Adapter (ENA) PF" },
+ { .device_id = 0xec20, .description = "Elastic Network Adapter (ENA) VF" },
+ };
+
+ if (di->vendor_id != 0x1d0f) /* AMAZON */
+ return 0;
+
+ FOREACH_ARRAY_ELT (dt, ena_dev_types)
+ {
+ if (dt->device_id == di->device_id)
+ return format (0, "%s", dt->description);
+ }
+
+ return 0;
+}
+
+VNET_DEV_REGISTER_DRIVER (ena) = {
+ .name = "ena",
+ .bus = "pci",
+ .device_data_sz = sizeof (ena_device_t),
+ .ops = {
+ .alloc = ena_alloc,
+ .init = ena_init,
+ .deinit = ena_deinit,
+ .free = ena_free,
+ .format_info = format_ena_dev_info,
+ .probe = ena_probe,
+ },
+};
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "dev_ena",
+};
diff --git a/src/plugins/dev_ena/ena.h b/src/plugins/dev_ena/ena.h
new file mode 100644
index 00000000000..4acb8d9625a
--- /dev/null
+++ b/src/plugins/dev_ena/ena.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_H_
+#define _ENA_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/types.h>
+#include <dev_ena/ena_defs.h>
+
+#define ENA_ADMIN_QUEUE_LOG2_DEPTH 2
+#define ENA_ASYNC_QUEUE_LOG2_DEPTH 5
+#define ENA_ADMIN_QUEUE_DEPTH (1 << ENA_ADMIN_QUEUE_LOG2_DEPTH)
+#define ENA_ASYNC_QUEUE_DEPTH (1 << ENA_ASYNC_QUEUE_LOG2_DEPTH)
+
+typedef struct
+{
+ u8 readless : 1;
+ u8 aq_started : 1;
+ u8 aenq_started : 1;
+ u8 llq : 1;
+
+ void *reg_bar;
+
+ /* mmio */
+ ena_mmio_resp_t *mmio_resp;
+
+ /* admin queue */
+ struct
+ {
+ ena_aq_sq_entry_t *sq_entries;
+ ena_aq_cq_entry_t *cq_entries;
+ u16 sq_next;
+ u16 cq_head;
+ u16 depth;
+ } aq;
+
+ /* host info */
+ ena_aq_host_info_t *host_info;
+
+ /* device info */
+ ena_aq_feat_device_attr_t dev_attr;
+
+ /* async event notification */
+ struct
+ {
+ ena_aenq_entry_t *entries;
+ u16 head;
+ u16 depth;
+ f64 last_keepalive;
+ u64 tx_drops, tx_drops0;
+ u64 rx_drops, rx_drops0;
+ } aenq;
+
+} ena_device_t;
+
+typedef struct
+{
+} ena_port_t;
+
+typedef struct
+{
+ u32 *buffer_indices;
+ u16 *compl_sqe_indices;
+ ena_rx_desc_t *sqes;
+ ena_rx_cdesc_t *cqes;
+ u32 *sq_db;
+ u32 sq_next;
+ u32 cq_next;
+ u16 cq_idx;
+ u16 sq_idx;
+ u16 n_compl_sqes;
+ u8 cq_created : 1;
+ u8 sq_created : 1;
+} ena_rxq_t;
+
+typedef struct
+{
+ u32 *buffer_indices;
+ ena_tx_desc_t *sqes;
+ ena_tx_llq_desc128_t *llq_descs;
+ ena_tx_cdesc_t *cqes;
+ u64 *sqe_templates;
+ u32 *sq_db;
+ u32 sq_tail;
+ u32 sq_head;
+ u32 cq_next;
+ u16 cq_idx;
+ u16 sq_idx;
+ u8 cq_created : 1;
+ u8 sq_created : 1;
+ u8 llq : 1;
+} ena_txq_t;
+
+typedef struct
+{
+ u16 qid;
+ u16 next_index;
+ u32 hw_if_index;
+ ena_rx_cdesc_status_t status;
+ u16 length;
+ u16 n_desc;
+ u16 req_id;
+} ena_rx_trace_t;
+
+/* admin.c */
+typedef struct
+{
+ char *name;
+ u8 version;
+ u8 data_sz;
+ u8 get;
+ u8 set;
+} ena_aq_feat_info_t;
+
+ena_aq_feat_info_t *ena_aq_get_feat_info (ena_aq_feature_id_t);
+vnet_dev_rv_t ena_aq_olloc (vlib_main_t *, vnet_dev_t *, u16);
+vnet_dev_rv_t ena_aq_start (vlib_main_t *, vnet_dev_t *);
+void ena_aq_stop (vlib_main_t *, vnet_dev_t *);
+void ena_aq_free (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t ena_aq_create_sq (vlib_main_t *, vnet_dev_t *,
+ ena_aq_create_sq_cmd_t *,
+ ena_aq_create_sq_resp_t *);
+vnet_dev_rv_t ena_aq_create_cq (vlib_main_t *, vnet_dev_t *,
+ ena_aq_create_cq_cmd_t *,
+ ena_aq_create_cq_resp_t *);
+vnet_dev_rv_t ena_aq_destroy_sq (vlib_main_t *, vnet_dev_t *,
+ ena_aq_destroy_sq_cmd_t *);
+vnet_dev_rv_t ena_aq_destroy_cq (vlib_main_t *, vnet_dev_t *,
+ ena_aq_destroy_cq_cmd_t *);
+vnet_dev_rv_t ena_aq_set_feature (vlib_main_t *, vnet_dev_t *,
+ ena_aq_feature_id_t, void *);
+vnet_dev_rv_t ena_aq_get_feature (vlib_main_t *, vnet_dev_t *,
+ ena_aq_feature_id_t, void *);
+vnet_dev_rv_t ena_aq_get_stats (vlib_main_t *, vnet_dev_t *,
+ ena_aq_stats_type_t, ena_aq_stats_scope_t, u16,
+ void *);
+
+/* aenq.c */
+vnet_dev_rv_t ena_aenq_olloc (vlib_main_t *, vnet_dev_t *, u16);
+vnet_dev_rv_t ena_aenq_start (vlib_main_t *, vnet_dev_t *);
+void ena_aenq_stop (vlib_main_t *, vnet_dev_t *);
+void ena_aenq_free (vlib_main_t *, vnet_dev_t *);
+
+/* reg.c */
+void ena_reg_write (vnet_dev_t *, ena_reg_t, void *);
+void ena_reg_read (vnet_dev_t *, ena_reg_t, const void *);
+void ena_reg_set_dma_addr (vlib_main_t *, vnet_dev_t *, u32, u32, void *);
+vnet_dev_rv_t ena_reg_reset (vlib_main_t *, vnet_dev_t *, ena_reset_reason_t);
+
+/* port.c */
+vnet_dev_rv_t ena_port_init (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t ena_port_start (vlib_main_t *, vnet_dev_port_t *);
+void ena_port_stop (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t ena_port_cfg_change (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t ena_port_cfg_change_validate (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+
+/* queue.c */
+vnet_dev_rv_t ena_rx_queue_alloc (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t ena_tx_queue_alloc (vlib_main_t *, vnet_dev_tx_queue_t *);
+void ena_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *);
+void ena_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *);
+vnet_dev_rv_t ena_rx_queue_start (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t ena_tx_queue_start (vlib_main_t *, vnet_dev_tx_queue_t *);
+void ena_rx_queue_stop (vlib_main_t *, vnet_dev_rx_queue_t *);
+void ena_tx_queue_stop (vlib_main_t *, vnet_dev_tx_queue_t *);
+
+/* format.c */
+format_function_t format_ena_dev_info;
+format_function_t format_ena_mem_addr;
+format_function_t format_ena_tx_desc;
+format_function_t format_ena_rx_trace;
+
+/* format_admin.c */
+format_function_t format_ena_aq_feat_desc;
+format_function_t format_ena_aq_feat_name;
+format_function_t format_ena_aq_opcode;
+format_function_t format_ena_aq_status;
+format_function_t format_ena_aq_feat_id_bitmap;
+format_function_t format_ena_aq_create_sq_cmd;
+format_function_t format_ena_aq_create_cq_cmd;
+format_function_t format_ena_aq_create_sq_resp;
+format_function_t format_ena_aq_create_cq_resp;
+format_function_t format_ena_aq_destroy_sq_cmd;
+format_function_t format_ena_aq_destroy_cq_cmd;
+format_function_t format_ena_aq_basic_stats;
+format_function_t format_ena_aq_eni_stats;
+
+#define foreach_ena_rx_node_counter \
+ _ (BUFFER_ALLOC, buffer_alloc, ERROR, "buffer alloc error")
+
+typedef enum
+{
+#define _(f, lf, t, s) ENA_RX_NODE_CTR_##f,
+ foreach_ena_rx_node_counter
+#undef _
+ ENA_RX_NODE_N_CTRS,
+} ena_rx_node_ctr_t;
+
+#define foreach_ena_tx_node_counter \
+ _ (CHAIN_TOO_LONG, chain_too_long, ERROR, "buffer chain too long") \
+ _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots")
+
+typedef enum
+{
+#define _(f, lf, t, s) ENA_TX_NODE_CTR_##f,
+ foreach_ena_tx_node_counter
+#undef _
+ ENA_TX_NODE_N_CTRS,
+} ena_tx_node_ctr_t;
+
+#define log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, ena_log.class, "%U" f, format_vnet_dev_log, \
+ (dev), clib_string_skip_prefix (__func__, "ena_"), ##__VA_ARGS__)
+#define log_info(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_INFO, ena_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_notice(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_NOTICE, ena_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_warn(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_WARNING, ena_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_err(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, ena_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+
+#endif /* _ENA_H_ */
diff --git a/src/plugins/dev_ena/ena_admin_defs.h b/src/plugins/dev_ena/ena_admin_defs.h
new file mode 100644
index 00000000000..6433a1563b8
--- /dev/null
+++ b/src/plugins/dev_ena/ena_admin_defs.h
@@ -0,0 +1,685 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_ADMIN_DEFS_H_
+#define _ENA_ADMIN_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+
+#define foreach_ena_aq_opcode \
+ _ (1, CREATE_SQ) \
+ _ (2, DESTROY_SQ) \
+ _ (3, CREATE_CQ) \
+ _ (4, DESTROY_CQ) \
+ _ (8, GET_FEATURE) \
+ _ (9, SET_FEATURE) \
+ _ (11, GET_STATS)
+
+typedef enum
+{
+#define _(v, n) ENA_AQ_OPCODE_##n = (v),
+ foreach_ena_aq_opcode
+#undef _
+} __clib_packed ena_aq_opcode_t;
+
+#define foreach_ena_aq_compl_status \
+ _ (0, SUCCESS) \
+ _ (1, RESOURCE_ALLOCATION_FAILURE) \
+ _ (2, BAD_OPCODE) \
+ _ (3, UNSUPPORTED_OPCODE) \
+ _ (4, MALFORMED_REQUEST) \
+ _ (5, ILLEGAL_PARAMETER) \
+ _ (6, UNKNOWN_ERROR) \
+ _ (7, RESOURCE_BUSY)
+
+typedef enum
+{
+#define _(v, n) ENA_ADMIN_COMPL_STATUS_##n = (v),
+ foreach_ena_aq_compl_status
+#undef _
+} __clib_packed ena_aq_compl_status_t;
+
+/* id, versiom, get, set, name, struct */
+#define foreach_ena_aq_feature_id \
+ _ (1, 0, 1, 0, DEVICE_ATTRIBUTES, ena_aq_feat_device_attr_t) \
+ _ (2, 0, 1, 0, MAX_QUEUES_NUM, ena_aq_feat_max_queue_num_t) \
+ _ (3, 0, 1, 0, HW_HINTS, ena_aq_feat_hw_hints_t) \
+ _ (4, 0, 1, 1, LLQ, ena_aq_feat_llq_t) \
+ _ (5, 0, 1, 0, EXTRA_PROPERTIES_STRINGS, \
+ ena_aq_feat_extra_properties_strings_t) \
+ _ (6, 0, 1, 0, EXTRA_PROPERTIES_FLAGS, \
+ ena_aq_feat_extra_properties_flags_t) \
+ _ (7, 1, 1, 0, MAX_QUEUES_EXT, ena_aq_feat_max_queue_ext_t) \
+ _ (10, 0, 1, 1, RSS_HASH_FUNCTION, ena_aq_feat_rss_hash_function_t) \
+ _ (11, 0, 1, 0, STATELESS_OFFLOAD_CONFIG, \
+ ena_aq_feat_stateless_offload_config_t) \
+ _ (12, 0, 1, 1, RSS_INDIRECTION_TABLE_CONFIG, \
+ ena_aq_feat_rss_ind_table_config_t) \
+ _ (14, 0, 0, 1, MTU, ena_aq_feat_mtu_t) \
+ _ (18, 0, 1, 1, RSS_HASH_INPUT, ena_aq_feat_rss_hash_input_t) \
+ _ (20, 0, 1, 0, INTERRUPT_MODERATION, ena_aq_feat_intr_moder_t) \
+ _ (26, 0, 1, 1, AENQ_CONFIG, ena_aq_feat_aenq_config_t) \
+ _ (27, 0, 1, 0, LINK_CONFIG, ena_aq_feat_link_config_t) \
+ _ (28, 0, 0, 1, HOST_ATTR_CONFIG, ena_aq_feat_host_attr_config_t) \
+ _ (29, 0, 1, 1, PHC_CONFIG, ena_aq_feat_phc_config_t)
+
+typedef enum
+{
+#define _(v, ver, r, w, n, s) ENA_ADMIN_FEAT_ID_##n = (v),
+ foreach_ena_aq_feature_id
+#undef _
+} __clib_packed ena_aq_feature_id_t;
+
+#define foreach_ena_aq_stats_type \
+ _ (0, BASIC) \
+ _ (1, EXTENDED) \
+ _ (2, ENI)
+
+#define foreach_ena_aq_stats_scope \
+ _ (0, SPECIFIC_QUEUE) \
+ _ (1, ETH_TRAFFIC)
+
+typedef enum
+{
+#define _(v, n) ENA_ADMIN_STATS_TYPE_##n = (v),
+ foreach_ena_aq_stats_type
+#undef _
+} __clib_packed ena_aq_stats_type_t;
+
+typedef enum
+{
+#define _(v, n) ENA_ADMIN_STATS_SCOPE_##n = (v),
+ foreach_ena_aq_stats_scope
+#undef _
+} __clib_packed ena_aq_stats_scope_t;
+
+typedef struct
+{
+ u32 addr_lo;
+ u16 addr_hi;
+ u16 _reserved_16;
+} ena_mem_addr_t;
+
+#define foreach_ena_aq_aenq_groups \
+ _ (link_change) \
+ _ (fatal_error) \
+ _ (warning) \
+ _ (notification) \
+ _ (keep_alive) \
+ _ (refresh_capabilities) \
+ _ (conf_notifications)
+
+typedef union
+{
+ struct
+ {
+#define _(g) u32 g : 1;
+ foreach_ena_aq_aenq_groups
+#undef _
+ };
+ u32 as_u32;
+} ena_aq_aenq_groups_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_aenq_groups_t, 4);
+
+typedef struct
+{
+ u32 length;
+ ena_mem_addr_t addr;
+} ena_aq_aq_ctrl_buff_info_t;
+
+typedef struct
+{
+ u32 impl_id;
+ u32 device_version;
+ u32 supported_features;
+ u32 _reserved3;
+ u32 phys_addr_width;
+ u32 virt_addr_width;
+ u8 mac_addr[6];
+ u8 _reserved7[2];
+ u32 max_mtu;
+} ena_aq_feat_device_attr_t;
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u16 l3_sort : 1;
+ u16 l4_sort : 1;
+ };
+ u16 supported_input_sort;
+ };
+ union
+ {
+ struct
+ {
+ u16 enable_l3_sort : 1;
+ u16 enable_l4_sort : 1;
+ };
+ u16 enabled_input_sort;
+ };
+} ena_aq_feat_rss_hash_input_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_feat_rss_hash_input_t, 4);
+
+typedef struct
+{
+ u16 intr_delay_resolution;
+ u16 reserved;
+} ena_aq_feat_intr_moder_t;
+
+typedef struct
+{
+ ena_aq_aenq_groups_t supported_groups;
+ ena_aq_aenq_groups_t enabled_groups;
+} ena_aq_feat_aenq_config_t;
+
+#define foreach_ena_aq_link_types \
+ _ (0, 1000, 1G) \
+ _ (1, 2500, 2_5G) \
+ _ (2, 5000, 5G) \
+ _ (3, 10000, 10G) \
+ _ (4, 25000, 25G) \
+ _ (5, 40000, 40G) \
+ _ (6, 50000, 50G) \
+ _ (7, 100000, 100G) \
+ _ (8, 200000, 200G) \
+ _ (9, 400000, 400G)
+
+typedef enum
+{
+#define _(b, v, n) ENA_ADMIN_LINK_TYPE_##n = (1U << b),
+ foreach_ena_aq_link_types
+#undef _
+} ena_aq_link_types_t;
+
+typedef struct
+{
+ u32 speed;
+ ena_aq_link_types_t supported;
+ u32 autoneg : 1;
+ u32 duplex : 1;
+} ena_aq_feat_link_config_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_feat_link_config_t, 12);
+
+typedef struct
+{
+ u32 tx;
+ u32 rx_supported;
+ u32 rx_enabled;
+} ena_aq_feat_stateless_offload_config_t;
+
+typedef struct
+{
+ u16 cq_idx;
+ u16 reserved;
+} ena_aq_feat_rss_ind_table_entry_t;
+
+typedef struct
+{
+ u16 min_size;
+ u16 max_size;
+ u16 size;
+ u8 one_entry_update : 1;
+ u8 reserved;
+ u32 inline_index;
+ ena_aq_feat_rss_ind_table_entry_t inline_entry;
+} ena_aq_feat_rss_ind_table_config_t;
+
+typedef struct
+{
+ u32 mtu;
+} ena_aq_feat_mtu_t;
+
+typedef struct
+{
+ u32 count;
+} ena_aq_feat_extra_properties_strings_t;
+
+typedef struct
+{
+ u32 flags;
+} ena_aq_feat_extra_properties_flags_t;
+
+typedef struct
+{
+ u32 max_sq_num;
+ u32 max_sq_depth;
+ u32 max_cq_num;
+ u32 max_cq_depth;
+ u32 max_legacy_llq_num;
+ u32 max_legacy_llq_depth;
+ u32 max_header_size;
+ u16 max_packet_tx_descs;
+ u16 max_packet_rx_descs;
+} ena_aq_feat_max_queue_num_t;
+
+typedef struct
+{
+ u16 mmio_read_timeout;
+ u16 driver_watchdog_timeout;
+ u16 missing_tx_completion_timeout;
+ u16 missed_tx_completion_count_threshold_to_reset;
+ u16 admin_completion_tx_timeout;
+ u16 netdev_wd_timeout;
+ u16 max_tx_sgl_size;
+ u16 max_rx_sgl_size;
+ u16 reserved[8];
+} ena_aq_feat_hw_hints_t;
+
+typedef struct
+{
+ u8 version;
+ u8 _reserved1[3];
+ u32 max_tx_sq_num;
+ u32 max_tx_cq_num;
+ u32 max_rx_sq_num;
+ u32 max_rx_cq_num;
+ u32 max_tx_sq_depth;
+ u32 max_tx_cq_depth;
+ u32 max_rx_sq_depth;
+ u32 max_rx_cq_depth;
+ u32 max_tx_header_size;
+ u16 max_per_packet_tx_descs;
+ u16 max_per_packet_rx_descs;
+} ena_aq_feat_max_queue_ext_t;
+
+typedef struct
+{
+ u32 supported_func;
+ u32 selected_func;
+ u32 init_val;
+} ena_aq_feat_rss_hash_function_t;
+
+typedef struct
+{
+ ena_mem_addr_t os_info_ba;
+ ena_mem_addr_t debug_ba;
+ u32 debug_area_size;
+} ena_aq_feat_host_attr_config_t;
+
+typedef struct
+{
+ u8 type;
+ u8 reserved1[3];
+ u32 doorbell_offset;
+ u32 expire_timeout_usec;
+ u32 block_timeout_usec;
+ ena_mem_addr_t output_address;
+ u32 output_length;
+} ena_aq_feat_phc_config_t;
+
+typedef struct
+{
+ u32 max_llq_num;
+ u32 max_llq_depth;
+ u16 header_location_ctrl_supported;
+ u16 header_location_ctrl_enabled;
+ u16 entry_size_ctrl_supported;
+ u16 entry_size_ctrl_enabled;
+ u16 desc_num_before_header_supported;
+ u16 desc_num_before_header_enabled;
+ u16 descriptors_stride_ctrl_supported;
+ u16 descriptors_stride_ctrl_enabled;
+ union
+ {
+ struct
+ {
+ u16 supported_flags;
+ u16 max_tx_burst_size;
+ } get;
+ struct
+ {
+ u16 enabled_flags;
+ } set;
+ } accel_mode;
+} ena_aq_feat_llq_t;
+
+typedef struct
+{
+ /* feat common */
+ u8 flags;
+ ena_aq_feature_id_t feature_id;
+ u8 feature_version;
+ u8 _reserved;
+} ena_aq_get_set_feature_common_desc_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_get_set_feature_common_desc_t, 4);
+
+typedef struct
+{
+ ena_aq_aq_ctrl_buff_info_t control_buffer;
+ ena_aq_stats_type_t type;
+ ena_aq_stats_scope_t scope;
+ u16 _reserved3;
+ u16 queue_idx;
+ u16 device_id;
+} ena_aq_get_stats_cmd_t;
+STATIC_ASSERT_SIZEOF (ena_aq_get_stats_cmd_t, 20);
+
+typedef enum
+{
+ ENA_ADMIN_SQ_DIRECTION_TX = 1,
+ ENA_ADMIN_SQ_DIRECTION_RX = 2,
+} ena_aq_sq_direction_t;
+
+typedef enum
+{
+ ENA_ADMIN_SQ_PLACEMENT_POLICY_HOST = 1,
+ ENA_ADMIN_SQ_PLACEMENT_POLICY_DEVICE = 3,
+} ena_aq_sq_placement_policy_t;
+
+typedef enum
+{
+ ENA_ADMIN_SQ_COMPLETION_POLICY_DESC = 0,
+ ENA_ADMIN_SQ_COMPLETION_POLICY_DESC_ON_DEMAND = 1,
+ ENA_ADMIN_SQ_COMPLETION_POLICY_HEAD_ON_DEMAND = 2,
+ ENA_ADMIN_SQ_COMPLETION_POLICY_HEAD = 3,
+} ena_aq_completion_policy_t;
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u8 _reserved0_0 : 5;
+ u8 sq_direction : 3; /* ena_aq_sq_direction_t */
+ };
+ u8 sq_identity;
+ };
+
+ u8 _reserved1;
+
+ union
+ {
+ struct
+ {
+ u8 placement_policy : 4; /* ena_aq_sq_placement_policy_t */
+ u8 completion_policy : 3; /* ena_aq_completion_policy_t */
+ u8 _reserved2_7 : 1;
+ };
+ u8 sq_caps_2;
+ };
+
+ union
+ {
+ struct
+ {
+ u8 is_physically_contiguous : 1;
+ u8 _reserved3_1 : 7;
+ };
+ u8 sq_caps_3;
+ };
+
+ u16 cq_idx;
+ u16 sq_depth;
+ ena_mem_addr_t sq_ba;
+ ena_mem_addr_t sq_head_writeback; /* used if completion_policy is 2 or 3 */
+ u32 _reserved0_w7;
+ u32 _reserved0_w8;
+} ena_aq_create_sq_cmd_t;
+
+typedef struct
+{
+ u16 sq_idx;
+ u16 _reserved;
+ u32 sq_doorbell_offset; /* REG BAR offset of queue dorbell */
+ u32 llq_descriptors_offset; /* LLQ MEM BAR offset of descriptors */
+ u32 llq_headers_offset; /* LLQ MEM BAR offset of headers mem */
+} ena_aq_create_sq_resp_t;
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u8 _reserved0_0 : 5;
+ u8 interrupt_mode_enabled : 1;
+ u8 _reserved0_6 : 2;
+ };
+ u8 cq_caps_1;
+ };
+
+ union
+ {
+ struct
+ {
+ u8 cq_entry_size_words : 4;
+ u8 _reserved1_4 : 4;
+ };
+ u8 cq_caps_2;
+ };
+
+ u16 cq_depth;
+ u32 msix_vector;
+ ena_mem_addr_t cq_ba;
+} ena_aq_create_cq_cmd_t;
+
+typedef struct
+{
+ u16 cq_idx;
+ u16 cq_actual_depth;
+ u32 numa_node_register_offset;
+ u32 cq_head_db_register_offset;
+ u32 cq_interrupt_unmask_register_offset;
+} ena_aq_create_cq_resp_t;
+
+typedef struct
+{
+ u16 sq_idx;
+ union
+ {
+ struct
+ {
+ u8 _reserved : 5;
+ u8 sq_direction : 3; /* ena_aq_sq_direction_t */
+ };
+ u8 sq_identity;
+ };
+ u8 _reserved1;
+} ena_aq_destroy_sq_cmd_t;
+
+typedef struct
+{
+ u16 cq_idx;
+ u16 _reserved1;
+} ena_aq_destroy_cq_cmd_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_create_sq_cmd_t, 32);
+STATIC_ASSERT_SIZEOF (ena_aq_create_sq_resp_t, 16);
+STATIC_ASSERT_SIZEOF (ena_aq_create_cq_cmd_t, 16);
+STATIC_ASSERT_SIZEOF (ena_aq_create_cq_resp_t, 16);
+STATIC_ASSERT_SIZEOF (ena_aq_destroy_sq_cmd_t, 4);
+STATIC_ASSERT_SIZEOF (ena_aq_destroy_cq_cmd_t, 4);
+
+typedef struct
+{
+ /* common desc */
+ u16 command_id;
+ ena_aq_opcode_t opcode;
+
+ union
+ {
+ struct
+ {
+ u8 phase : 1;
+ u8 ctrl_data : 1;
+ u8 ctrl_data_indirect : 1;
+ u8 _reserved_3_3 : 5;
+ };
+ u8 flags;
+ };
+
+ u32 data[15];
+} ena_aq_sq_entry_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_sq_entry_t, 64);
+
+typedef struct
+{
+ u32 os_type;
+ u8 os_dist_str[128];
+ u32 os_dist;
+ u8 kernel_ver_str[32];
+ u32 kernel_ver;
+
+ struct
+ {
+ u8 major;
+ u8 minor;
+ u8 sub_minor;
+ u8 module_type;
+ } driver_version;
+
+ u32 supported_network_features[2];
+
+ struct
+ {
+ u16 minor : 8;
+ u16 major : 8;
+ } ena_spec_version;
+
+ struct
+ {
+ u16 function : 3;
+ u16 device : 5;
+ u16 bus : 8;
+ } bdf;
+
+ u16 num_cpus;
+ u16 _reserved;
+
+ union
+ {
+ struct
+ {
+ u32 _reserved0 : 1;
+ u32 rx_offset : 1;
+ u32 interrupt_moderation : 1;
+ u32 rx_buf_mirroring : 1;
+ u32 rss_configurable_function_key : 1;
+ u32 _reserved5 : 1;
+ u32 rx_page_reuse : 1;
+ u32 tx_ipv6_csum_offload : 1;
+ u32 _reserved8 : 24;
+ };
+ u32 as_u32;
+ } driver_supported_features;
+
+} ena_aq_host_info_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_host_info_t, 196);
+
+typedef struct
+{
+ union
+ {
+ u64 tx_bytes;
+ struct
+ {
+ u32 tx_bytes_low;
+ u32 tx_bytes_high;
+ };
+ };
+ union
+ {
+ u64 tx_pkts;
+ struct
+ {
+ u32 tx_pkts_low;
+ u32 tx_pkts_high;
+ };
+ };
+ union
+ {
+ u64 rx_bytes;
+ struct
+ {
+ u32 rx_bytes_low;
+ u32 rx_bytes_high;
+ };
+ };
+ union
+ {
+ u64 rx_pkts;
+ struct
+ {
+ u32 rx_pkts_low;
+ u32 rx_pkts_high;
+ };
+ };
+ union
+ {
+ u64 rx_drops;
+ struct
+ {
+ u32 rx_drops_low;
+ u32 rx_drops_high;
+ };
+ };
+ union
+ {
+ u64 tx_drops;
+ struct
+ {
+ u32 tx_drops_low;
+ u32 tx_drops_high;
+ };
+ };
+} ena_aq_basic_stats_t;
+
+#define foreach_ena_aq_basic_counter \
+ _ (rx_pkts, "RX Packets") \
+ _ (tx_pkts, "TX Packets") \
+ _ (rx_bytes, "RX Bytes") \
+ _ (tx_bytes, "TX Bytes") \
+ _ (rx_drops, "RX Packet Drops") \
+ _ (tx_drops, "TX Packet Drops")
+
+typedef struct
+{
+ u64 bw_in_allowance_exceeded;
+ u64 bw_out_allowance_exceeded;
+ u64 pps_allowance_exceeded;
+ u64 conntrack_allowance_exceeded;
+ u64 linklocal_allowance_exceeded;
+} ena_aq_eni_stats_t;
+
+#define foreach_ena_aq_eni_counter \
+ _ (bw_in_allowance_exceeded, "Input BW Allowance Exceeded") \
+ _ (bw_out_allowance_exceeded, "Output BW Allowance Exceeded") \
+ _ (pps_allowance_exceeded, "PPS Allowance Exceeded") \
+ _ (conntrack_allowance_exceeded, "ConnTrack Allowance Exceeded") \
+ _ (linklocal_allowance_exceeded, "LinkLocal Allowance Exceeded")
+
+typedef struct
+{
+ /* common desc */
+ u16 command;
+ ena_aq_compl_status_t status;
+ union
+ {
+ struct
+ {
+ u8 phase : 1;
+ u8 _reserved3_1 : 7;
+ };
+ u8 flags;
+ };
+ u16 extended_status;
+ u16 sq_head_indx;
+
+ u32 data[14];
+} ena_aq_cq_entry_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_cq_entry_t, 64);
+
+#endif /* _ENA_ADMIN_DEFS_H_ */
diff --git a/src/plugins/dev_ena/ena_aenq_defs.h b/src/plugins/dev_ena/ena_aenq_defs.h
new file mode 100644
index 00000000000..4530f5e7a42
--- /dev/null
+++ b/src/plugins/dev_ena/ena_aenq_defs.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_AENQ_DEFS_H_
+#define _ENA_AENQ_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+
+#define foreach_aenq_group \
+ _ (0, LINK_CHANGE) \
+ _ (1, FATAL_ERROR) \
+ _ (2, WARNING) \
+ _ (3, NOTIFICATION) \
+ _ (4, KEEP_ALIVE) \
+ _ (5, REFRESH_CAPABILITIES) \
+ _ (6, CONF_NOTIFICATIONS)
+
+#define foreach_aenq_syndrome \
+ _ (0, SUSPEND) \
+ _ (1, RESUME) \
+ _ (2, UPDATE_HINTS)
+
+typedef enum
+{
+#define _(v, n) ENA_AENQ_GROUP_##n = (v),
+ foreach_aenq_group
+#undef _
+} ena_aenq_group_t;
+
+typedef enum
+{
+#define _(v, n) ENA_AENQ_SYNDROME_##n = (v),
+ foreach_aenq_syndrome
+#undef _
+} ena_aenq_syndrome_t;
+
+typedef struct
+{
+ ena_aenq_group_t group : 16;
+ ena_aenq_syndrome_t syndrome : 16;
+
+ union
+ {
+ struct
+ {
+ u8 phase : 1;
+ };
+ u8 flags;
+ };
+ u8 reserved1[3];
+
+ union
+ {
+ u64 timestamp;
+ struct
+ {
+ u32 timestamp_low;
+ u32 timestamp_high;
+ };
+ };
+
+ union
+ {
+ u32 data[12];
+
+ struct
+ {
+ union
+ {
+ struct
+ {
+ u32 link_status : 1;
+ };
+ u32 flags;
+ };
+ } link_change;
+
+ struct
+ {
+ union
+ {
+ u64 rx_drops;
+ struct
+ {
+ u32 rx_drops_low;
+ u32 rx_drops_high;
+ };
+ };
+
+ union
+ {
+ u64 tx_drops;
+ struct
+ {
+ u32 tx_drops_low;
+ u32 tx_drops_high;
+ };
+ };
+ } keep_alive;
+ };
+} __clib_packed ena_aenq_entry_t;
+
+STATIC_ASSERT_SIZEOF (ena_aenq_entry_t, 64);
+
+#endif /* _ENA_AENQ_DEFS_H_ */
diff --git a/src/plugins/dev_ena/ena_defs.h b/src/plugins/dev_ena/ena_defs.h
new file mode 100644
index 00000000000..1e52ed4e05b
--- /dev/null
+++ b/src/plugins/dev_ena/ena_defs.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_DEFS_H_
+#define _ENA_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <dev_ena/ena_reg_defs.h>
+#include <dev_ena/ena_admin_defs.h>
+#include <dev_ena/ena_aenq_defs.h>
+#include <dev_ena/ena_io_defs.h>
+
+/*
+ * MMIO Response
+ */
+typedef struct
+{
+ u16 req_id;
+ u16 reg_off;
+ u32 reg_val;
+} ena_mmio_resp_t;
+
+#endif /* _ENA_DEFS_H_ */
diff --git a/src/plugins/dev_ena/ena_inlines.h b/src/plugins/dev_ena/ena_inlines.h
new file mode 100644
index 00000000000..106bd5eaa21
--- /dev/null
+++ b/src/plugins/dev_ena/ena_inlines.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_INLINES_H_
+#define _ENA_INLINES_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <dev_ena/ena.h>
+
+#define ena_log_is_debug() \
+ vlib_log_is_enabled (VLIB_LOG_LEVEL_DEBUG, ena_log.class)
+
+#define ena_stats_log_err(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, ena_stats_log.class, "%U: " f, \
+ format_vnet_dev_addr, dev, ##__VA_ARGS__)
+
+#define ena_stats_log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, ena_stats_log.class, "%U: " f, \
+ format_vnet_dev_addr, dev, ##__VA_ARGS__)
+
+#define ena_stats_log_is_debug() \
+ vlib_log_is_enabled (VLIB_LOG_LEVEL_DEBUG, ena_stats_log.class)
+
+static_always_inline void
+ena_set_mem_addr (vlib_main_t *vm, vnet_dev_t *dev, ena_mem_addr_t *m, void *p)
+{
+ u64 pa = vnet_dev_get_dma_addr (vm, dev, p);
+ *m = (ena_mem_addr_t){ .addr_lo = (u32) pa, .addr_hi = (u16) (pa >> 32) };
+}
+
+static_always_inline int
+ena_aq_feature_is_supported (vnet_dev_t *dev, ena_aq_feature_id_t feat_id)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ return (ed->dev_attr.supported_features & (1U << feat_id)) != 0;
+}
+
+#endif /* ENA_INLINES_H */
diff --git a/src/plugins/dev_ena/ena_io_defs.h b/src/plugins/dev_ena/ena_io_defs.h
new file mode 100644
index 00000000000..89ca2ac6498
--- /dev/null
+++ b/src/plugins/dev_ena/ena_io_defs.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_IO_DEFS_H_
+#define _ENA_IO_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/vector.h>
+
+typedef struct
+{
+ u16 length; /* 0 = 64K */
+ u8 reserved2;
+ union
+ {
+ struct
+ {
+ u8 phase : 1;
+ u8 reserved1 : 1;
+ u8 first : 1; /* first descriptor in transaction */
+ u8 last : 1; /* last descriptor in transaction */
+ u8 comp_req : 1; /* should completion be posted? */
+ u8 reserved5 : 1;
+ u8 reserved67 : 2;
+ };
+ u8 ctrl;
+ };
+ u16 req_id;
+ u16 reserved6;
+} ena_rx_desc_lo_t;
+
+STATIC_ASSERT_SIZEOF (ena_rx_desc_lo_t, 8);
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ ena_rx_desc_lo_t lo;
+ u32 buff_addr_lo;
+ u16 buff_addr_hi;
+ u16 reserved16_w3;
+ };
+ u64x2 as_u64x2;
+ };
+} ena_rx_desc_t;
+
+STATIC_ASSERT_SIZEOF (ena_rx_desc_t, 16);
+
+#define foreach_ena_rx_cdesc_status \
+ _ (5, l3_proto_idx) \
+ _ (2, src_vlan_cnt) \
+ _ (1, _reserved7) \
+ _ (5, l4_proto_idx) \
+ _ (1, l3_csum_err) \
+ _ (1, l4_csum_err) \
+ _ (1, ipv4_frag) \
+ _ (1, l4_csum_checked) \
+ _ (7, _reserved17) \
+ _ (1, phase) \
+ _ (1, l3_csum2) \
+ _ (1, first) \
+ _ (1, last) \
+ _ (2, _reserved28) \
+ _ (1, buffer) \
+ _ (1, _reserved31)
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+#define _(b, n) u32 n : (b);
+ foreach_ena_rx_cdesc_status
+#undef _
+ };
+ u32 as_u32;
+ };
+} ena_rx_cdesc_status_t;
+
+typedef struct
+{
+ ena_rx_cdesc_status_t status;
+ u16 length;
+ u16 req_id;
+ u32 hash;
+ u16 sub_qid;
+ u8 offset;
+ u8 reserved;
+} ena_rx_cdesc_t;
+
+STATIC_ASSERT_SIZEOF (ena_rx_cdesc_t, 16);
+
+#define foreach_ena_tx_desc \
+ /* len_ctrl */ \
+ _ (16, length) \
+ _ (6, req_id_hi) \
+ _ (1, _reserved0_22) \
+ _ (1, meta_desc) \
+ _ (1, phase) \
+ _ (1, _reserved0_25) \
+ _ (1, first) \
+ _ (1, last) \
+ _ (1, comp_req) \
+ _ (2, _reserved0_29) \
+ _ (1, _reserved0_31) \
+ /* meta_ctrl */ \
+ _ (4, l3_proto_idx) \
+ _ (1, df) \
+ _ (2, _reserved1_5) \
+ _ (1, tso_en) \
+ _ (5, l4_proto_idx) \
+ _ (1, l3_csum_en) \
+ _ (1, l4_csum_en) \
+ _ (1, ethernet_fcs_dis) \
+ _ (1, _reserved1_16) \
+ _ (1, l4_csum_partial) \
+ _ (3, _reserved_1_18) \
+ _ (1, _reserved_1_21) \
+ _ (10, req_id_lo)
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+#define _(b, n) u32 n : (b);
+ foreach_ena_tx_desc
+#undef _
+ u32 buff_addr_lo;
+ u16 buff_addr_hi;
+ u8 _reserved3_16;
+ u8 header_length;
+ };
+
+ u16x8 as_u16x8;
+ u32x4 as_u32x4;
+ u64x2 as_u64x2;
+ };
+} ena_tx_desc_t;
+
+STATIC_ASSERT_SIZEOF (ena_tx_desc_t, 16);
+
+typedef struct
+{
+ ena_tx_desc_t desc[2];
+ u8 data[96];
+} __clib_aligned (128)
+ena_tx_llq_desc128_t;
+STATIC_ASSERT_SIZEOF (ena_tx_llq_desc128_t, 128);
+
+typedef union
+{
+ struct
+ {
+ u16 req_id;
+ u8 status;
+ union
+ {
+ struct
+ {
+ u8 phase : 1;
+ };
+ u8 flags;
+ };
+ u16 sub_qid;
+ u16 sq_head_idx;
+ };
+ u64 as_u64;
+} ena_tx_cdesc_t;
+
+STATIC_ASSERT_SIZEOF (ena_tx_cdesc_t, 8);
+
+#endif /* _ENA_IO_DEFS_H_ */
diff --git a/src/plugins/dev_ena/ena_reg_defs.h b/src/plugins/dev_ena/ena_reg_defs.h
new file mode 100644
index 00000000000..11d458e21ac
--- /dev/null
+++ b/src/plugins/dev_ena/ena_reg_defs.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_REG_DEFS_H_
+#define _ENA_REG_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+
+#define ena_reg_version_t_fields \
+ __ (8, minor) \
+ __ (8, major)
+
+#define ena_reg_controller_version_t_fields \
+ __ (8, subminor) \
+ __ (8, minor) \
+ __ (8, major) \
+ __ (8, impl_id)
+
+#define ena_reg_caps_t_fields \
+ __ (1, contiguous_queue_required) \
+ __ (5, reset_timeout) \
+ __ (2, _unused) \
+ __ (8, dma_addr_width) \
+ __ (4, admin_cmd_to)
+
+#define ena_reg_aq_caps_t_fields \
+ __ (16, depth) \
+ __ (16, entry_size)
+
+#define ena_reg_acq_caps_t_fields \
+ __ (16, depth) \
+ __ (16, entry_size)
+
+#define ena_reg_aenq_caps_t_fields \
+ __ (16, depth) \
+ __ (16, entry_size)
+
+#define ena_reg_dev_ctl_t_fields \
+ __ (1, dev_reset) \
+ __ (1, aq_restart) \
+ __ (1, quiescent) \
+ __ (1, io_resume) \
+ __ (24, _unused) \
+ __ (4, reset_reason)
+
+#define ena_reg_dev_sts_t_fields \
+ __ (1, ready) \
+ __ (1, aq_restart_in_progress) \
+ __ (1, aq_restart_finished) \
+ __ (1, reset_in_progress) \
+ __ (1, reset_finished) \
+ __ (1, fatal_error) \
+ __ (1, quiescent_state_in_progress) \
+ __ (1, quiescent_state_achieved)
+
+#define ena_reg_mmio_reg_read_t_fields \
+ __ (16, req_id) \
+ __ (16, reg_off)
+
+#define ena_reg_rss_ind_entry_update_t_fields \
+ __ (16, index) \
+ __ (16, cx_idx)
+
+#define __(l, f) u32 f : l;
+#define _(n) \
+ typedef union \
+ { \
+ struct \
+ { \
+ n##_fields; \
+ }; \
+ u32 as_u32; \
+ } n;
+
+_ (ena_reg_version_t)
+_ (ena_reg_controller_version_t)
+_ (ena_reg_caps_t)
+_ (ena_reg_aq_caps_t)
+_ (ena_reg_acq_caps_t)
+_ (ena_reg_aenq_caps_t)
+_ (ena_reg_dev_ctl_t)
+_ (ena_reg_dev_sts_t)
+_ (ena_reg_mmio_reg_read_t)
+_ (ena_reg_rss_ind_entry_update_t)
+#undef _
+#undef __
+
+#define foreach_ena_reg \
+ _ (0x00, 1, VERSION, ena_reg_version_t_fields) \
+ _ (0x04, 1, CONTROLLER_VERSION, ena_reg_controller_version_t_fields) \
+ _ (0x08, 1, CAPS, ena_reg_caps_t_fields) \
+ _ (0x0c, 1, EXT_CAPS, ) \
+ _ (0x10, 1, AQ_BASE_LO, ) \
+ _ (0x14, 1, AQ_BASE_HI, ) \
+ _ (0x18, 1, AQ_CAPS, ena_reg_aq_caps_t_fields) \
+ _ (0x20, 1, ACQ_BASE_LO, ) \
+ _ (0x24, 1, ACQ_BASE_HI, ) \
+ _ (0x28, 1, ACQ_CAPS, ena_reg_acq_caps_t_fields) \
+ _ (0x2c, 0, AQ_DB, ) \
+ _ (0x30, 0, ACQ_TAIL, ) \
+ _ (0x34, 1, AENQ_CAPS, ena_reg_aenq_caps_t_fields) \
+ _ (0x38, 0, AENQ_BASE_LO, ) \
+ _ (0x3c, 0, AENQ_BASE_HI, ) \
+ _ (0x40, 0, AENQ_HEAD_DB, ) \
+ _ (0x44, 0, AENQ_TAIL, ) \
+ _ (0x4c, 1, INTR_MASK, ) \
+ _ (0x54, 0, DEV_CTL, ena_reg_dev_ctl_t_fields) \
+ _ (0x58, 1, DEV_STS, ena_reg_dev_sts_t_fields) \
+ _ (0x5c, 0, MMIO_REG_READ, ena_reg_mmio_reg_read_t_fields) \
+ _ (0x60, 0, MMIO_RESP_LO, ) \
+ _ (0x64, 0, MMIO_RESP_HI, ) \
+ _ (0x68, 0, RSS_IND_ENTRY_UPDATE, ena_reg_rss_ind_entry_update_t_fields)
+
+typedef enum
+{
+#define _(o, r, n, f) ENA_REG_##n = o,
+ foreach_ena_reg
+#undef _
+} ena_reg_t;
+
+#define foreach_ena_reset_reason \
+ _ (0, NORMAL) \
+ _ (1, KEEP_ALIVE_TO) \
+ _ (2, ADMIN_TO) \
+ _ (3, MISS_TX_CMPL) \
+ _ (4, INV_RX_REQ_ID) \
+ _ (5, INV_TX_REQ_ID) \
+ _ (6, TOO_MANY_RX_DESCS) \
+ _ (7, INIT_ERR) \
+ _ (8, DRIVER_INVALID_STATE) \
+ _ (9, OS_TRIGGER) \
+ _ (10, OS_NETDEV_WD) \
+ _ (11, SHUTDOWN) \
+ _ (12, USER_TRIGGER) \
+ _ (13, GENERIC) \
+ _ (14, MISS_INTERRUPT) \
+ _ (15, SUSPECTED_POLL_STARVATION) \
+ _ (16, RX_DESCRIPTOR_MALFORMED) \
+ _ (17, TX_DESCRIPTOR_MALFORMED)
+
+typedef enum
+{
+#define _(o, n) ENA_RESET_REASON_##n = o,
+ foreach_ena_reset_reason
+#undef _
+} ena_reset_reason_t;
+
+#endif /* _ENA_REG_DEFS_H_ */
diff --git a/src/plugins/dev_ena/format.c b/src/plugins/dev_ena/format.c
new file mode 100644
index 00000000000..2db52b50f66
--- /dev/null
+++ b/src/plugins/dev_ena/format.c
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vlib/pci/pci.h"
+#include "vnet/error.h"
+#include "vppinfra/error.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_defs.h>
+
+u8 *
+format_ena_dev_info (u8 *s, va_list *args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u32 indent = format_get_indent (s) + 2;
+
+ format (s, "Elastic Network Adapter:");
+ format (s, "\n%UDevice version is %u, implementation id is %u",
+ format_white_space, indent, ed->dev_attr.device_version,
+ ed->dev_attr.impl_id);
+ format (s, "\n%Urx drops %lu, tx drops %lu", format_white_space, indent,
+ ed->aenq.rx_drops, ed->aenq.tx_drops);
+ format (s, "\n%ULast keepalive arrived ", format_white_space, indent);
+ if (ed->aenq.last_keepalive != 0.0)
+ format (s, "%.2f seconds ago",
+ vlib_time_now (vm) - ed->aenq.last_keepalive);
+ else
+ format (s, "never");
+ return s;
+}
+
+u8 *
+format_ena_mem_addr (u8 *s, va_list *args)
+{
+ ena_mem_addr_t *ema = va_arg (*args, ena_mem_addr_t *);
+ return format (s, "0x%lx", (u64) ema->addr_hi << 32 | ema->addr_lo);
+}
+
+u8 *
+format_ena_tx_desc (u8 *s, va_list *args)
+{
+ ena_tx_desc_t *d = va_arg (*args, ena_tx_desc_t *);
+ s =
+ format (s, "addr 0x%012lx", (u64) d->buff_addr_hi << 32 | d->buff_addr_lo);
+ s = format (s, " len %u", d->length);
+ s = format (s, " req_id 0x%x", d->req_id_lo | d->req_id_hi << 10);
+ if (d->header_length)
+ s = format (s, " hdr_len %u", d->header_length);
+#define _(v, n) \
+ if ((v) < 6 && #n[0] != '_' && d->n) \
+ s = format (s, " " #n " %u", d->n);
+ foreach_ena_tx_desc
+#undef _
+ return s;
+}
+
+u8 *
+format_ena_rx_desc_status (u8 *s, va_list *args)
+{
+ ena_rx_cdesc_status_t st = va_arg (*args, ena_rx_cdesc_status_t);
+ s = format (s, "0x%x", st.as_u32);
+ if (st.as_u32 != 0)
+ {
+ int not_first_line = 0;
+ s = format (s, " -> ");
+#define _(b, n) \
+ if (st.n) \
+ s = format (s, "%s%s %u", not_first_line++ ? ", " : "", #n, st.n);
+ foreach_ena_rx_cdesc_status
+#undef _
+ }
+ return s;
+}
+
+u8 *
+format_ena_rx_trace (u8 *s, va_list *args)
+{
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ vlib_node_t *node = va_arg (*args, vlib_node_t *);
+ ena_rx_trace_t *t = va_arg (*args, ena_rx_trace_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
+ u32 indent = format_get_indent (s);
+
+ s = format (
+ s, "ena: %v (%d) qid %u next-node %U length %u req-id 0x%x n-desc %u",
+ hi->name, t->hw_if_index, t->qid, format_vlib_next_node_name, vm,
+ node->index, t->next_index, t->length, t->req_id, t->n_desc);
+ s = format (s, "\n%Ustatus: %U", format_white_space, indent + 2,
+ format_ena_rx_desc_status, t->status);
+ return s;
+}
+
+u8 *
+format_ena_regs (u8 *s, va_list *args)
+{
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ int offset = va_arg (*args, int);
+ u32 indent = format_get_indent (s);
+ u32 rv = 0, f, v;
+ u8 *s2 = 0;
+
+#define _(o, r, rn, m) \
+ if ((offset == -1 || offset == o) && r == 1) \
+ { \
+ s = format (s, "\n%U", format_white_space, indent); \
+ vec_reset_length (s2); \
+ s2 = format (s2, "[0x%02x] %s:", o, #rn); \
+ ena_reg_read (dev, o, &rv); \
+ s = format (s, "%-34v = 0x%08x", s2, rv); \
+ f = 0; \
+ m \
+ }
+
+#define __(l, fn) \
+ if (#fn[0] != '_') \
+ { \
+ vec_reset_length (s2); \
+ s2 = format (s2, "\n%U", format_white_space, indent); \
+ s2 = format (s2, " [%2u:%2u] %s", f + l - 1, f, #fn); \
+ s = format (s, " %-35v = ", s2); \
+ v = (rv >> f) & pow2_mask (l); \
+ if (l < 3) \
+ s = format (s, "%u", v); \
+ else if (l <= 8) \
+ s = format (s, "0x%02x (%u)", v, v); \
+ else if (l <= 16) \
+ s = format (s, "0x%04x", v); \
+ else \
+ s = format (s, "0x%08x", v); \
+ } \
+ f += l;
+
+ foreach_ena_reg;
+#undef _
+
+ vec_free (s2);
+
+ return s;
+}
diff --git a/src/plugins/dev_ena/format_aq.c b/src/plugins/dev_ena/format_aq.c
new file mode 100644
index 00000000000..18bad1e050b
--- /dev/null
+++ b/src/plugins/dev_ena/format_aq.c
@@ -0,0 +1,412 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+#include <vnet/ethernet/ethernet.h>
+
+static char *opcode_names[] = {
+#define _(v, s) [v] = #s,
+ foreach_ena_aq_opcode
+#undef _
+};
+
+static char *status_names[] = {
+#define _(v, s) [v] = #s,
+ foreach_ena_aq_compl_status
+#undef _
+};
+
+#define __maxval(s, f) (u64) (((typeof ((s)[0])){ .f = -1LL }).f)
+
+#define __name(s, n) \
+ { \
+ s = format (s, "%s%U%-32s: ", line ? "\n" : "", format_white_space, \
+ line ? indent : 0, #n); \
+ line++; \
+ }
+
+#define _format_number(s, d, n, ...) \
+ { \
+ __name (s, n); \
+ if (d->n < 10) \
+ s = format (s, "%u", d->n); \
+ else if (__maxval (d, n) <= 255) \
+ s = format (s, "0x%02x (%u)", d->n, d->n); \
+ else if (__maxval (d, n) <= 65535) \
+ s = format (s, "0x%04x (%u)", d->n, d->n); \
+ else \
+ s = format (s, "0x%08x (%u)", d->n, d->n); \
+ }
+
+#define _format_with_fn_and_ptr(s, c, n, f) \
+ { \
+ __name (s, n); \
+ s = format (s, "%U", f, &((c)->n)); \
+ }
+
+#define _format_with_fn_and_val(s, c, n, f) \
+ { \
+ __name (s, n); \
+ s = format (s, "%U", f, (c)->n); \
+ }
+#define _format_ena_memory(s, c, n) \
+ _format_with_fn_and_ptr (s, c, n, format_ena_mem_addr)
+
+u8 *
+format_ena_aq_opcode (u8 *s, va_list *args)
+{
+ u32 opcode = va_arg (*args, u32);
+
+ if (opcode >= ARRAY_LEN (opcode_names) || opcode_names[opcode] == 0)
+ return format (s, "UNKNOWN(%u)", opcode);
+ return format (s, "%s", opcode_names[opcode]);
+}
+
+u8 *
+format_ena_aq_status (u8 *s, va_list *args)
+{
+ u32 status = va_arg (*args, u32);
+
+ if (status >= ARRAY_LEN (status_names) || status_names[status] == 0)
+ return format (s, "UNKNOWN(%u)", status);
+ return format (s, "%s", status_names[status]);
+}
+
+u8 *
+format_ena_aq_aenq_groups (u8 *s, va_list *args)
+{
+ ena_aq_aenq_groups_t g = va_arg (*args, ena_aq_aenq_groups_t);
+ u32 i, not_first = 0;
+ u32 indent = format_get_indent (s);
+
+#define _(x) \
+ if (g.x) \
+ { \
+ if (format_get_indent (s) > 80) \
+ s = format (s, "\n%U", format_white_space, indent); \
+ s = format (s, "%s%s", not_first++ ? " " : "", #x); \
+ g.x = 0; \
+ }
+ foreach_ena_aq_aenq_groups;
+#undef _
+
+ foreach_set_bit_index (i, g.as_u32)
+ s = format (s, "%sunknown-%u", not_first++ ? " " : "", i);
+
+ return s;
+}
+
+u8 *
+format_ena_aq_feat_id_bitmap (u8 *s, va_list *args)
+{
+ u32 bmp = va_arg (*args, u32);
+ int i, line = 0;
+ u32 indent = format_get_indent (s);
+
+ foreach_set_bit_index (i, bmp)
+ {
+ ena_aq_feat_info_t *info = ena_aq_get_feat_info (i);
+ if (line++)
+ s = format (s, ", ");
+ if (format_get_indent (s) > 80)
+ s = format (s, "\n%U", format_white_space, indent);
+ if (info)
+ s = format (s, "%s", info->name);
+ else
+ s = format (s, "unknown-%u", i);
+ }
+
+ return s;
+}
+
+u8 *
+format_ena_aq_feat_name (u8 *s, va_list *args)
+{
+ ena_aq_feature_id_t feat_id = va_arg (*args, int);
+ char *feat_names[] = {
+#define _(v, r, gt, st, s, u) [v] = #s,
+ foreach_ena_aq_feature_id
+#undef _
+ };
+
+ if (feat_id >= ARRAY_LEN (feat_names) || feat_names[feat_id] == 0)
+ return format (s, "UNKNOWN(%u)", feat_id);
+ return format (s, "%s", feat_names[feat_id]);
+}
+
+u8 *
+format_ena_aq_feat_desc (u8 *s, va_list *args)
+{
+ ena_aq_feature_id_t feat_id = va_arg (*args, int);
+ void *data = va_arg (*args, void *);
+ ena_aq_feat_info_t *info = ena_aq_get_feat_info (feat_id);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ switch (feat_id)
+ {
+ case ENA_ADMIN_FEAT_ID_DEVICE_ATTRIBUTES:
+ {
+ ena_aq_feat_device_attr_t *d = data;
+ _format_number (s, d, impl_id);
+ _format_number (s, d, device_version);
+ _format_number (s, d, phys_addr_width);
+ _format_number (s, d, virt_addr_width);
+ _format_with_fn_and_val (s, d, mac_addr, format_ethernet_address);
+ _format_number (s, d, max_mtu);
+ _format_with_fn_and_val (s, d, supported_features,
+ format_ena_aq_feat_id_bitmap);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_AENQ_CONFIG:
+ {
+ ena_aq_feat_aenq_config_t *d = data;
+ _format_with_fn_and_val (s, d, supported_groups,
+ format_ena_aq_aenq_groups);
+ _format_with_fn_and_val (s, d, enabled_groups,
+ format_ena_aq_aenq_groups);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_INTERRUPT_MODERATION:
+ {
+ ena_aq_feat_intr_moder_t *d = data;
+ _format_number (s, d, intr_delay_resolution);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_STATELESS_OFFLOAD_CONFIG:
+ {
+ ena_aq_feat_stateless_offload_config_t *d = data;
+ _format_number (s, d, rx_supported);
+ _format_number (s, d, rx_enabled);
+ _format_number (s, d, tx);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_RSS_INDIRECTION_TABLE_CONFIG:
+ {
+ ena_aq_feat_rss_ind_table_config_t *d = data;
+ _format_number (s, d, min_size);
+ _format_number (s, d, max_size);
+ _format_number (s, d, size);
+ _format_number (s, d, one_entry_update);
+ _format_number (s, d, inline_index);
+ _format_number (s, d, inline_entry.cq_idx);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_MAX_QUEUES_NUM:
+ {
+ ena_aq_feat_max_queue_num_t *d = data;
+ _format_number (s, d, max_sq_num);
+ _format_number (s, d, max_sq_depth);
+ _format_number (s, d, max_cq_num);
+ _format_number (s, d, max_cq_depth);
+ _format_number (s, d, max_legacy_llq_num);
+ _format_number (s, d, max_legacy_llq_depth);
+ _format_number (s, d, max_header_size);
+ _format_number (s, d, max_packet_tx_descs);
+ _format_number (s, d, max_packet_rx_descs);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_MAX_QUEUES_EXT:
+ {
+ ena_aq_feat_max_queue_ext_t *d = data;
+ _format_number (s, d, max_rx_sq_num);
+ _format_number (s, d, max_rx_cq_num);
+ _format_number (s, d, max_tx_sq_num);
+ _format_number (s, d, max_tx_cq_num);
+ _format_number (s, d, max_rx_sq_depth);
+ _format_number (s, d, max_rx_cq_depth);
+ _format_number (s, d, max_tx_sq_depth);
+ _format_number (s, d, max_tx_cq_depth);
+ _format_number (s, d, version);
+ _format_number (s, d, max_tx_header_size);
+ _format_number (s, d, max_per_packet_tx_descs);
+ _format_number (s, d, max_per_packet_rx_descs);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_RSS_HASH_FUNCTION:
+ {
+ ena_aq_feat_rss_hash_function_t *d = data;
+ _format_number (s, d, supported_func);
+ _format_number (s, d, selected_func);
+ _format_number (s, d, init_val);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_LLQ:
+ {
+ ena_aq_feat_llq_t *d = data;
+ _format_number (s, d, max_llq_num);
+ _format_number (s, d, max_llq_depth);
+ _format_number (s, d, header_location_ctrl_supported);
+ _format_number (s, d, header_location_ctrl_enabled);
+ _format_number (s, d, entry_size_ctrl_supported);
+ _format_number (s, d, entry_size_ctrl_enabled);
+ _format_number (s, d, desc_num_before_header_supported);
+ _format_number (s, d, desc_num_before_header_enabled);
+ _format_number (s, d, descriptors_stride_ctrl_supported);
+ _format_number (s, d, descriptors_stride_ctrl_enabled);
+ _format_number (s, d, accel_mode.get.supported_flags);
+ _format_number (s, d, accel_mode.get.max_tx_burst_size);
+ _format_number (s, d, accel_mode.set.enabled_flags);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_EXTRA_PROPERTIES_STRINGS:
+ {
+ ena_aq_feat_extra_properties_strings_t *d = data;
+ _format_number (s, d, count);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_EXTRA_PROPERTIES_FLAGS:
+ {
+ ena_aq_feat_extra_properties_flags_t *d = data;
+ _format_number (s, d, flags);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_HOST_ATTR_CONFIG:
+ {
+ ena_aq_feat_host_attr_config_t *d = data;
+ _format_ena_memory (s, d, os_info_ba);
+ _format_ena_memory (s, d, debug_ba);
+ _format_number (s, d, debug_area_size);
+ }
+ break;
+
+ default:
+ if (info)
+ s = format (s, "%U", format_hexdump, data, info->data_sz);
+ break;
+ }
+
+ return s;
+}
+
+u8 *
+format_ena_aq_create_sq_cmd (u8 *s, va_list *args)
+{
+ ena_aq_create_sq_cmd_t *cmd = va_arg (*args, ena_aq_create_sq_cmd_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, cmd, sq_direction);
+ _format_number (s, cmd, placement_policy);
+ _format_number (s, cmd, completion_policy);
+ _format_number (s, cmd, is_physically_contiguous);
+ _format_number (s, cmd, cq_idx);
+ _format_number (s, cmd, sq_depth);
+ _format_ena_memory (s, cmd, sq_ba);
+ _format_ena_memory (s, cmd, sq_head_writeback);
+ return s;
+}
+
+u8 *
+format_ena_aq_create_cq_cmd (u8 *s, va_list *args)
+{
+ ena_aq_create_cq_cmd_t *cmd = va_arg (*args, ena_aq_create_cq_cmd_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, cmd, interrupt_mode_enabled);
+ _format_number (s, cmd, cq_entry_size_words);
+ _format_number (s, cmd, cq_depth);
+ _format_number (s, cmd, msix_vector);
+ _format_ena_memory (s, cmd, cq_ba);
+ return s;
+}
+
+u8 *
+format_ena_aq_create_sq_resp (u8 *s, va_list *args)
+{
+ ena_aq_create_sq_resp_t *resp = va_arg (*args, ena_aq_create_sq_resp_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, resp, sq_idx);
+ _format_number (s, resp, sq_doorbell_offset);
+ _format_number (s, resp, llq_descriptors_offset);
+ _format_number (s, resp, llq_headers_offset);
+ return s;
+}
+
+u8 *
+format_ena_aq_create_cq_resp (u8 *s, va_list *args)
+{
+ ena_aq_create_cq_resp_t *resp = va_arg (*args, ena_aq_create_cq_resp_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, resp, cq_idx);
+ _format_number (s, resp, cq_actual_depth);
+ _format_number (s, resp, numa_node_register_offset);
+ _format_number (s, resp, cq_head_db_register_offset);
+ _format_number (s, resp, cq_interrupt_unmask_register_offset);
+ return s;
+}
+
+u8 *
+format_ena_aq_destroy_sq_cmd (u8 *s, va_list *args)
+{
+ ena_aq_destroy_sq_cmd_t *cmd = va_arg (*args, ena_aq_destroy_sq_cmd_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, cmd, sq_idx);
+ _format_number (s, cmd, sq_direction);
+ return s;
+}
+
+u8 *
+format_ena_aq_destroy_cq_cmd (u8 *s, va_list *args)
+{
+ ena_aq_destroy_cq_cmd_t *cmd = va_arg (*args, ena_aq_destroy_cq_cmd_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, cmd, cq_idx);
+ return s;
+}
+
+u8 *
+format_ena_aq_basic_stats (u8 *s, va_list *args)
+{
+ ena_aq_basic_stats_t *st = va_arg (*args, ena_aq_basic_stats_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, st, tx_bytes);
+ _format_number (s, st, tx_pkts);
+ _format_number (s, st, rx_bytes);
+ _format_number (s, st, rx_pkts);
+ _format_number (s, st, rx_drops);
+ _format_number (s, st, tx_drops);
+ return s;
+}
+
+u8 *
+format_ena_aq_eni_stats (u8 *s, va_list *args)
+{
+ ena_aq_eni_stats_t *st = va_arg (*args, ena_aq_eni_stats_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, st, bw_in_allowance_exceeded);
+ _format_number (s, st, bw_out_allowance_exceeded);
+ _format_number (s, st, pps_allowance_exceeded);
+ _format_number (s, st, conntrack_allowance_exceeded);
+ _format_number (s, st, linklocal_allowance_exceeded);
+ return s;
+}
diff --git a/src/plugins/dev_ena/port.c b/src/plugins/dev_ena/port.c
new file mode 100644
index 00000000000..2b26fefc5e3
--- /dev/null
+++ b/src/plugins/dev_ena/port.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+ .class_name = "ena",
+ .subclass_name = "port",
+};
+
+vnet_dev_rv_t
+ena_port_init (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+
+ log_debug (dev, "port %u", port->port_id);
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+ena_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rv_t rv;
+
+ log_debug (dev, "port start: port %u", port->port_id);
+
+ if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_MTU))
+ {
+ ena_aq_feat_mtu_t mtu = { .mtu = port->max_rx_frame_size };
+
+ if ((rv = ena_aq_set_feature (vm, dev, ENA_ADMIN_FEAT_ID_MTU, &mtu)))
+ return rv;
+ }
+
+ if ((rv = vnet_dev_port_start_all_rx_queues (vm, port)))
+ return rv;
+
+ if ((rv = vnet_dev_port_start_all_tx_queues (vm, port)))
+ return rv;
+
+ return VNET_DEV_OK;
+}
+
+void
+ena_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ log_debug (port->dev, "port stop: port %u", port->port_id);
+}
+
+vnet_dev_rv_t
+ena_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ if (port->started)
+ rv = VNET_DEV_ERR_PORT_STARTED;
+ break;
+
+ default:
+ rv = VNET_DEV_ERR_NOT_SUPPORTED;
+ };
+
+ return rv;
+}
+
+vnet_dev_rv_t
+ena_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ break;
+
+ default:
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ };
+
+ return rv;
+}
diff --git a/src/plugins/dev_ena/queue.c b/src/plugins/dev_ena/queue.c
new file mode 100644
index 00000000000..08c763c8461
--- /dev/null
+++ b/src/plugins/dev_ena/queue.c
@@ -0,0 +1,384 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+ .class_name = "ena",
+ .subclass_name = "queue",
+};
+
+void
+ena_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq);
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+
+ ASSERT (rxq->started == 0);
+ ASSERT (eq->cq_created == 0);
+ ASSERT (eq->sq_created == 0);
+
+ log_debug (dev, "queue %u", rxq->queue_id);
+
+ foreach_pointer (p, eq->buffer_indices, eq->compl_sqe_indices)
+ if (p)
+ clib_mem_free (p);
+
+ foreach_pointer (p, eq->cqes, eq->sqes)
+ vnet_dev_dma_mem_free (vm, dev, p);
+}
+
+vnet_dev_rv_t
+ena_rx_queue_alloc (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+ ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq);
+ u16 size = rxq->size;
+ vnet_dev_rv_t rv;
+
+ ASSERT (eq->buffer_indices == 0);
+ ASSERT (eq->compl_sqe_indices == 0);
+ ASSERT (eq->cqes == 0);
+ ASSERT (eq->sqes == 0);
+
+ log_debug (dev, "queue %u", rxq->queue_id);
+
+ eq->buffer_indices = clib_mem_alloc_aligned (
+ sizeof (eq->buffer_indices[0]) * size, CLIB_CACHE_LINE_BYTES);
+
+ eq->compl_sqe_indices = clib_mem_alloc_aligned (
+ sizeof (eq->compl_sqe_indices[0]) * size, CLIB_CACHE_LINE_BYTES);
+
+ if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->cqes[0]) * size, 0,
+ (void **) &eq->cqes)))
+ goto err;
+
+ if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->sqes[0]) * size, 0,
+ (void **) &eq->sqes)))
+ goto err;
+
+ return VNET_DEV_OK;
+
+err:
+ ena_rx_queue_free (vm, rxq);
+ return rv;
+}
+
+void
+ena_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+
+ ASSERT (txq->started == 0);
+
+ log_debug (dev, "queue %u", txq->queue_id);
+
+ foreach_pointer (p, eq->buffer_indices, eq->sqe_templates)
+ if (p)
+ clib_mem_free (p);
+
+ foreach_pointer (p, eq->cqes, eq->sqes)
+ vnet_dev_dma_mem_free (vm, dev, p);
+}
+
+vnet_dev_rv_t
+ena_tx_queue_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+ ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq);
+ u16 size = txq->size;
+ vnet_dev_rv_t rv;
+
+ ASSERT (eq->buffer_indices == 0);
+ ASSERT (eq->sqe_templates == 0);
+ ASSERT (eq->cqes == 0);
+ ASSERT (eq->sqes == 0);
+
+ log_debug (dev, "queue %u", txq->queue_id);
+
+ eq->buffer_indices = clib_mem_alloc_aligned (
+ sizeof (eq->buffer_indices[0]) * size, CLIB_CACHE_LINE_BYTES);
+ eq->sqe_templates = clib_mem_alloc_aligned (
+ sizeof (eq->sqe_templates[0]) * size, CLIB_CACHE_LINE_BYTES);
+
+ if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->cqes[0]) * size, 0,
+ (void **) &eq->cqes)))
+ goto err;
+
+ if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->sqes[0]) * size, 0,
+ (void **) &eq->sqes)))
+ goto err;
+
+ return VNET_DEV_OK;
+
+err:
+ ena_tx_queue_free (vm, txq);
+ return rv;
+}
+
+vnet_dev_rv_t
+ena_rx_queue_start (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq);
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u16 buffer_size = vnet_dev_get_rx_queue_buffer_data_size (vm, rxq);
+ u16 size = rxq->size;
+ vnet_dev_rv_t rv;
+
+ /* Create Completion Queue */
+ ena_aq_create_cq_resp_t cqresp;
+ ena_aq_create_cq_cmd_t cqcmd = {
+ .interrupt_mode_enabled = 1,
+ .cq_entry_size_words = sizeof (ena_rx_cdesc_t) / 4,
+ .cq_depth = size,
+ .msix_vector = ~0,
+ };
+
+ ena_set_mem_addr (vm, dev, &cqcmd.cq_ba, eq->cqes);
+ if ((rv = ena_aq_create_cq (vm, dev, &cqcmd, &cqresp)))
+ {
+ log_err (dev, "queue %u cq creation failed", rxq->queue_id);
+ goto error;
+ }
+
+ eq->cq_idx = cqresp.cq_idx;
+ eq->cq_created = 1;
+
+ log_debug (dev, "queue %u cq %u created", rxq->queue_id, eq->cq_idx);
+
+ /* Create Submission Queue */
+ ena_aq_create_sq_resp_t sqresp;
+ ena_aq_create_sq_cmd_t sqcmd = {
+ .sq_direction = ENA_ADMIN_SQ_DIRECTION_RX,
+ .placement_policy = ENA_ADMIN_SQ_PLACEMENT_POLICY_HOST,
+ .completion_policy = ENA_ADMIN_SQ_COMPLETION_POLICY_DESC,
+ .is_physically_contiguous = 1,
+ .sq_depth = size,
+ .cq_idx = cqresp.cq_idx,
+ };
+
+ ena_set_mem_addr (vm, dev, &sqcmd.sq_ba, eq->sqes);
+ if ((rv = ena_aq_create_sq (vm, dev, &sqcmd, &sqresp)))
+ {
+ log_err (dev, "queue %u sq creation failed", rxq->queue_id);
+ goto error;
+ }
+
+ eq->sq_idx = sqresp.sq_idx;
+ eq->sq_db = (u32 *) ((u8 *) ed->reg_bar + sqresp.sq_doorbell_offset);
+ eq->sq_created = 1;
+
+ log_debug (dev, "queue %u sq %u created, sq_db %p", rxq->queue_id,
+ eq->sq_idx, eq->sq_db);
+
+ for (int i = 0; i < size; i++)
+ {
+ eq->sqes[i] = (ena_rx_desc_t){
+ .lo = {
+ .length = buffer_size,
+ .comp_req = 1,
+ .first = 1,
+ .last = 1,
+ .reserved5 = 1, /* ena_com says MBO */
+ .req_id = i,
+ },
+ };
+ eq->buffer_indices[i] = VLIB_BUFFER_INVALID_INDEX;
+ eq->compl_sqe_indices[i] = i;
+ }
+
+ eq->sq_next = 0;
+ eq->n_compl_sqes = size;
+
+ return VNET_DEV_OK;
+
+error:
+ ena_rx_queue_stop (vm, rxq);
+ return rv;
+}
+
+vnet_dev_rv_t
+ena_tx_queue_start (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u16 size = txq->size;
+ vnet_dev_rv_t rv;
+
+ /* Create Completion Queue */
+ ena_aq_create_cq_resp_t cqresp;
+ ena_aq_create_cq_cmd_t cqcmd = {
+ .interrupt_mode_enabled = 1,
+ .cq_entry_size_words = sizeof (ena_tx_cdesc_t) / 4,
+ .cq_depth = size,
+ .msix_vector = ~0,
+ };
+
+ ena_set_mem_addr (vm, dev, &cqcmd.cq_ba, eq->cqes);
+ if ((rv = ena_aq_create_cq (vm, dev, &cqcmd, &cqresp)))
+ {
+ log_err (dev, "queue %u cq creation failed", txq->queue_id);
+ goto error;
+ }
+
+ eq->cq_idx = cqresp.cq_idx;
+ eq->cq_created = 1;
+
+ log_debug (dev, "queue %u cq %u created", txq->queue_id, eq->cq_idx);
+
+ /* Create Submission Queue */
+ ena_aq_create_sq_resp_t sqresp;
+ ena_aq_create_sq_cmd_t sqcmd = {
+ .sq_direction = ENA_ADMIN_SQ_DIRECTION_TX,
+ .placement_policy = eq->llq ? ENA_ADMIN_SQ_PLACEMENT_POLICY_DEVICE :
+ ENA_ADMIN_SQ_PLACEMENT_POLICY_HOST,
+ .completion_policy = ENA_ADMIN_SQ_COMPLETION_POLICY_DESC,
+ .is_physically_contiguous = 1,
+ .sq_depth = size,
+ .cq_idx = cqresp.cq_idx,
+ };
+
+ if (eq->llq == 0)
+ ena_set_mem_addr (vm, dev, &sqcmd.sq_ba, eq->sqes);
+ if ((rv = ena_aq_create_sq (vm, dev, &sqcmd, &sqresp)))
+ {
+ log_err (dev, "queue %u sq creation failed", txq->queue_id);
+ goto error;
+ }
+
+ eq->sq_idx = sqresp.sq_idx;
+ eq->sq_db = (u32 *) ((u8 *) ed->reg_bar + sqresp.sq_doorbell_offset);
+ eq->sq_created = 1;
+
+ log_debug (dev, "queue %u sq %u created, sq_db %p", txq->queue_id,
+ eq->sq_idx, eq->sq_db);
+
+ for (u32 i = 0; i < size; i++)
+ {
+ eq->sqe_templates[i] =
+ (ena_tx_desc_t){ .req_id_lo = i, .req_id_hi = i >> 10, .comp_req = 1 }
+ .as_u64x2[0];
+
+ eq->buffer_indices[i] = VLIB_BUFFER_INVALID_INDEX;
+ }
+
+ eq->sq_head = 0;
+ eq->sq_tail = 0;
+ eq->cq_next = 0;
+
+#if 0
+ if (txq->llq)
+ txq->llq_descs =
+ (ena_tx_llq_desc128_t *) ((u8 *) ed->mem_bar +
+ sqresp.llq_descriptors_offset);
+#endif
+
+ log_debug (dev, "queue %u sq %u created, sq_db %p llq_desc %p",
+ txq->queue_id, eq->sq_idx, eq->sq_db,
+ eq->llq ? eq->llq_descs : 0);
+ return VNET_DEV_OK;
+
+error:
+ ena_tx_queue_stop (vm, txq);
+ return rv;
+}
+
+static void
+ena_free_sq_buffer_indices (vlib_main_t *vm, u32 *sq_buffer_indices,
+ u32 n_desc)
+{
+ u32 *to = sq_buffer_indices;
+
+ for (u32 *from = to; from < sq_buffer_indices + n_desc; from++)
+ if (from[0] != VLIB_BUFFER_INVALID_INDEX)
+ to++[0] = from[0];
+
+ if (to - sq_buffer_indices > 0)
+ vlib_buffer_free (vm, sq_buffer_indices, to - sq_buffer_indices);
+}
+
+void
+ena_rx_queue_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq);
+ vnet_dev_t *dev = rxq->port->dev;
+ vnet_dev_rv_t rv;
+
+ if (eq->sq_created)
+ {
+ ena_aq_destroy_sq_cmd_t cmd = {
+ .sq_idx = eq->sq_idx,
+ .sq_direction = ENA_ADMIN_SQ_DIRECTION_TX,
+ };
+
+ if ((rv = ena_aq_destroy_sq (vm, dev, &cmd)))
+ log_err (dev, "queue %u failed to destroy sq %u", rxq->queue_id,
+ eq->sq_idx);
+ eq->sq_created = 0;
+ };
+
+ if (eq->cq_created)
+ {
+ ena_aq_destroy_cq_cmd_t cmd = {
+ .cq_idx = eq->cq_idx,
+ };
+
+ if ((rv = ena_aq_destroy_cq (vm, dev, &cmd)))
+ log_err (dev, "queue %u failed to destroy cq %u", rxq->queue_id,
+ eq->cq_idx);
+ eq->cq_created = 0;
+ };
+
+ if (eq->n_compl_sqes < rxq->size)
+ ena_free_sq_buffer_indices (vm, eq->buffer_indices, rxq->size);
+}
+
+void
+ena_tx_queue_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_t *dev = txq->port->dev;
+ vnet_dev_rv_t rv;
+
+ if (eq->sq_created)
+ {
+ ena_aq_destroy_sq_cmd_t cmd = {
+ .sq_idx = eq->sq_idx,
+ .sq_direction = ENA_ADMIN_SQ_DIRECTION_TX,
+ };
+
+ if ((rv = ena_aq_destroy_sq (vm, dev, &cmd)))
+ log_err (dev, "queue %u failed to destroy sq %u", txq->queue_id,
+ eq->sq_idx);
+ eq->sq_created = 0;
+ };
+
+ if (eq->cq_created)
+ {
+ ena_aq_destroy_cq_cmd_t cmd = {
+ .cq_idx = eq->cq_idx,
+ };
+
+ if ((rv = ena_aq_destroy_cq (vm, dev, &cmd)))
+ log_err (dev, "queue %u failed to destroy cq %u", txq->queue_id,
+ eq->cq_idx);
+ eq->cq_created = 0;
+ };
+
+ if (eq->sq_head != eq->sq_tail)
+ ena_free_sq_buffer_indices (vm, eq->buffer_indices, txq->size);
+}
diff --git a/src/plugins/dev_ena/reg.c b/src/plugins/dev_ena/reg.c
new file mode 100644
index 00000000000..7f2cc0f8aba
--- /dev/null
+++ b/src/plugins/dev_ena/reg.c
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+ .class_name = "ena",
+ .subclass_name = "reg",
+};
+
+static vnet_dev_rv_t
+ena_err (vnet_dev_t *dev, vnet_dev_rv_t rv, char *fmt, ...)
+{
+ va_list va;
+ u8 *s;
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+ log_err (dev, "%v", s);
+ vec_free (s);
+ return rv;
+}
+
+static u8 *
+format_ena_reg_name (u8 *s, va_list *args)
+{
+ int offset = va_arg (*args, int);
+
+ char *reg_names[] = {
+#define _(o, r, rn, m) [(o) >> 2] = #rn,
+ foreach_ena_reg
+#undef _
+ };
+
+ offset >>= 2;
+
+ if (offset < 0 || offset >= ARRAY_LEN (reg_names) || reg_names[offset] == 0)
+ return format (s, "(unknown)");
+ return format (s, "%s", reg_names[offset]);
+}
+
+void
+ena_reg_write (vnet_dev_t *dev, ena_reg_t reg, void *v)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u32 *p = (u32 *) ((u8 *) ed->reg_bar + reg);
+ u32 val = *(u32 *) v;
+ log_debug (dev, "%s: reg %U (0x%02x) value 0x%08x", __func__,
+ format_ena_reg_name, reg, reg, val);
+ __atomic_store_n (p, val, __ATOMIC_RELEASE);
+}
+
+void
+ena_reg_set_dma_addr (vlib_main_t *vm, vnet_dev_t *dev, u32 rlo, u32 rhi,
+ void *p)
+{
+ uword pa = vnet_dev_get_dma_addr (vm, dev, p);
+ u32 reg = (u32) pa;
+ ena_reg_write (dev, rlo, &reg);
+ reg = pa >> 32;
+ ena_reg_write (dev, rhi, &reg);
+}
+
+void
+ena_reg_read (vnet_dev_t *dev, ena_reg_t reg, const void *v)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ vlib_main_t *vm = vlib_get_main ();
+ u32 rv;
+ f64 dt = 0, t0;
+
+ if (ed->readless == 0)
+ {
+ rv =
+ __atomic_load_n ((u32 *) ((u8 *) ed->reg_bar + reg), __ATOMIC_SEQ_CST);
+ }
+ else
+ {
+ u32 *p = (u32 *) ((u8 *) ed->reg_bar + ENA_REG_MMIO_REG_READ);
+
+ ena_reg_mmio_reg_read_t rr = { .reg_off = reg, .req_id = 1 };
+ ed->mmio_resp->req_id = 0;
+ ed->mmio_resp->reg_val = ~0;
+
+ __atomic_store_n (p, rr.as_u32, __ATOMIC_RELEASE);
+
+ t0 = vlib_time_now (vm);
+ while (ed->mmio_resp->req_id == 0 && dt < 0.2)
+ {
+ CLIB_PAUSE ();
+ dt = vlib_time_now (vm) - t0;
+ }
+
+ rv = ed->mmio_resp->reg_val;
+ }
+
+ log_debug (dev, "%s: reg %U (0x%02x) value 0x%08x dt %.3fs", __func__,
+ format_ena_reg_name, reg, reg, rv, dt);
+ *(u32 *) v = rv;
+}
+
+vnet_dev_rv_t
+ena_reg_reset (vlib_main_t *vm, vnet_dev_t *dev, ena_reset_reason_t reason)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ ena_reg_version_t ver;
+ ena_reg_controller_version_t ctrl_ver;
+ ena_reg_caps_t caps = {};
+ ena_reg_dev_sts_t dev_sts = {};
+ ena_reg_dev_ctl_t reset_start = { .dev_reset = 1, .reset_reason = reason };
+
+ if (ed->readless)
+ ena_reg_set_dma_addr (vm, dev, ENA_REG_MMIO_RESP_LO, ENA_REG_MMIO_RESP_HI,
+ ed->mmio_resp);
+
+ ena_reg_read (dev, ENA_REG_DEV_STS, &dev_sts);
+ ena_reg_read (dev, ENA_REG_CAPS, &caps);
+
+ if (caps.as_u32 == ~0 && dev_sts.as_u32 == ~0)
+ return ena_err (dev, VNET_DEV_ERR_BUS, "failed to read regs");
+
+ if (dev_sts.ready == 0)
+ return VNET_DEV_ERR_NOT_READY;
+
+ log_debug (dev, "reg_reset: reset timeout is %u", caps.reset_timeout);
+
+ ena_reg_write (dev, ENA_REG_DEV_CTL, &reset_start);
+
+ if (ed->readless)
+ ena_reg_set_dma_addr (vm, dev, ENA_REG_MMIO_RESP_LO, ENA_REG_MMIO_RESP_HI,
+ ed->mmio_resp);
+
+ while (1)
+ {
+ int i = 0;
+ ena_reg_read (dev, ENA_REG_DEV_STS, &dev_sts);
+ if (dev_sts.reset_in_progress)
+ break;
+ if (i++ == 20)
+ return ena_err (dev, VNET_DEV_ERR_BUS, "failed to initiate reset");
+ vlib_process_suspend (vm, 0.001);
+ }
+
+ ena_reg_write (dev, ENA_REG_DEV_CTL, &(ena_reg_dev_ctl_t){});
+
+ return 0;
+ while (1)
+ {
+ int i = 0;
+ ena_reg_read (dev, ENA_REG_DEV_STS, &dev_sts);
+ if (dev_sts.reset_in_progress == 0)
+ break;
+ if (i++ == 20)
+ return ena_err (dev, VNET_DEV_ERR_BUS, "failed to complete reset");
+ vlib_process_suspend (vm, 0.001);
+ }
+
+ ena_reg_read (dev, ENA_REG_VERSION, &ver);
+ ena_reg_read (dev, ENA_REG_CONTROLLER_VERSION, &ctrl_ver);
+
+ log_info (dev, "version %u.%u controller_version %u.%u.%u impl_id %u\n",
+ ver.major, ver.minor, ctrl_ver.major, ctrl_ver.minor,
+ ctrl_ver.subminor, ctrl_ver.impl_id);
+
+ return 0;
+}
diff --git a/src/plugins/dev_ena/rx_node.c b/src/plugins/dev_ena/rx_node.c
new file mode 100644
index 00000000000..41fc5b8c943
--- /dev/null
+++ b/src/plugins/dev_ena/rx_node.c
@@ -0,0 +1,457 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/vector/mask_compare.h>
+#include <vppinfra/vector/compress.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+
+#define ENA_RX_REFILL_BATCH 32
+
+typedef struct
+{
+ u16 phase_bit;
+ u16 size;
+ u32 mask;
+ ena_rx_cdesc_status_t st_or;
+ ena_rx_cdesc_status_t st_and;
+ u16 *comp_sqe_indices;
+ u32 *sq_buffer_indices;
+} ena_rx_ctx_t;
+
+static_always_inline void
+ena_device_input_status_to_flags (ena_rx_cdesc_status_t *statuses, u32 *flags,
+ u32 n_desc, vlib_frame_bitmap_t first_bmp,
+ int maybe_chained)
+{
+ const ena_rx_cdesc_status_t mask_first = { .first = 1 },
+ match_first1 = { .first = 1 };
+
+ const ena_rx_cdesc_status_t mask_last = { .last = 1 },
+ match_last0 = { .last = 0 };
+
+ const ena_rx_cdesc_status_t mask_l4_csum = { .ipv4_frag = 1,
+ .l4_csum_checked = 1,
+ .l4_csum_err = 1 },
+ match_l4_csum_ok = { .l4_csum_checked = 1 };
+
+ clib_memset_u32 (statuses + n_desc, 0, 8);
+#if defined(CLIB_HAVE_VEC128)
+
+#if defined(CxLIB_HAVE_VEC512)
+#define N 16
+#define u32xN u32x16
+#define u32xNu u32x16u
+#define u32xN_splat u32x16_splat
+#elif defined(CxLIB_HAVE_VEC256)
+#define N 8
+#define u32xN u32x8
+#define u32xNu u32x8u
+#define u32xN_splat u32x8_splat
+#else
+#define N 4
+#define u32xN u32x4
+#define u32xNu u32x4u
+#define u32xN_splat u32x4_splat
+#endif
+
+ const u32xN st_mask_first = u32xN_splat (mask_first.as_u32);
+ const u32xN st_match_first1 = u32xN_splat (match_first1.as_u32);
+ const u32xN st_mask_last = u32xN_splat (mask_last.as_u32);
+ const u32xN st_match_last0 = u32xN_splat (match_last0.as_u32);
+ const u32xN st_mask_l4_csum = u32xN_splat (mask_l4_csum.as_u32);
+ const u32xN st_match_l4_csum_ok = u32xN_splat (match_l4_csum_ok.as_u32);
+ const u32xN f_total_len_valid = u32xN_splat (VLIB_BUFFER_TOTAL_LENGTH_VALID);
+ const u32xN f_next_preset = u32xN_splat (VLIB_BUFFER_NEXT_PRESENT);
+ const u32xN f_l4_csum = u32xN_splat (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
+ VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
+
+ for (u32 i = 0; i < round_pow2 (n_desc, 2 * N); i += 2 * N)
+ {
+ uword msk = 0;
+ u32xN f0, f1, r0, r1;
+ u32xN s0 = ((u32xNu *) (statuses + i))[0];
+ u32xN s1 = ((u32xNu *) (statuses + i))[1];
+
+ r0 = (s0 & st_mask_first) == st_match_first1;
+ r1 = (s1 & st_mask_first) == st_match_first1;
+ f0 = r0 & f_total_len_valid;
+ f1 = r1 & f_total_len_valid;
+
+ if (maybe_chained)
+ {
+#if defined(CxLIB_HAVE_VEC512)
+ u64 msb_mask = 0x1111111111111111;
+ msk = bit_extract_u64 (u8x64_msb_mask ((u8x64) r0), msb_mask);
+ msk |= bit_extract_u64 (u8x64_msb_mask ((u8x64) r1), msb_mask) << 16;
+#elif defined(CxLIB_HAVE_VEC256)
+ msk = u8x32_msb_mask ((u8x32) r0);
+ msk |= (u64) u8x32_msb_mask ((u8x32) r1) << 32;
+ msk = bit_extract_u64 (msk, 0x1111111111111111);
+#else
+ msk = u8x16_msb_mask ((u8x16) r0);
+ msk |= (u32) u8x16_msb_mask ((u8x16) r1) << 16;
+ msk = bit_extract_u32 (msk, 0x11111111);
+#endif
+ first_bmp[i / uword_bits] |= msk << (i % uword_bits);
+ }
+
+ f0 |= ((s0 & st_mask_last) == st_match_last0) & f_next_preset;
+ f1 |= ((s1 & st_mask_last) == st_match_last0) & f_next_preset;
+
+ f0 |= ((s0 & st_mask_l4_csum) == st_match_l4_csum_ok) & f_l4_csum;
+ f1 |= ((s1 & st_mask_l4_csum) == st_match_l4_csum_ok) & f_l4_csum;
+
+ ((u32xNu *) (flags + i))[0] = f0;
+ ((u32xNu *) (flags + i))[1] = f1;
+ }
+#else
+ while (n_left)
+ {
+ u16 f = 0;
+ ena_rx_cdesc_status_t st = statuses++[0];
+
+ if ((st.as_u32 & mask_first.as_u32) == match_first1.as_u32)
+ f |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ if ((st.as_u32 & mask_last.as_u32) == match_last0.as_u32)
+ f |= VLIB_BUFFER_NEXT_PRESENT;
+
+ if ((st.as_u32 & mask_l4_csum.as_u32) == match_l4_csum_ok.as_u32)
+ f |= VNET_BUFFER_F_L4_CHECKSUM_COMPUTED |
+ VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
+
+ flags++[0] = f;
+ n_left--;
+ }
+#endif
+}
+
+static_always_inline u16
+ena_device_input_cq_dequeue_no_wrap (ena_rx_ctx_t *ctx, ena_rxq_t *q,
+ ena_rx_cdesc_status_t *statuses,
+ u16 *lengths, u16 *csi)
+{
+ u32 next = q->cq_next;
+ ena_rx_cdesc_t *cqes = q->cqes;
+ u32 phase = (next & ctx->size << 1) != 0;
+ u16 index = next & ctx->mask;
+ ena_rx_cdesc_t *cd = cqes + index;
+ ena_rx_cdesc_status_t st;
+ u32 n_to_check, i = 0;
+
+ st = cd->status;
+ if (st.phase == phase)
+ return 0;
+
+ n_to_check = clib_min (VLIB_FRAME_SIZE, ctx->size - index);
+
+ ctx->st_or.as_u32 |= st.as_u32;
+ ctx->st_and.as_u32 &= st.as_u32;
+ statuses[i] = st;
+ lengths[i] = cd->length;
+ csi[i] = cd->req_id;
+ i++;
+ cd++;
+
+more:
+ for (st = cd->status; i < n_to_check && st.phase != phase;
+ i++, st = (++cd)->status)
+ {
+ ctx->st_or.as_u32 |= st.as_u32;
+ ctx->st_and.as_u32 &= st.as_u32;
+ statuses[i] = st;
+ lengths[i] = cd->length;
+ csi[i] = cd->req_id;
+ }
+
+ if (i == n_to_check)
+ {
+ n_to_check = VLIB_FRAME_SIZE - n_to_check;
+ if (n_to_check)
+ {
+ phase ^= 1;
+ cd = cqes;
+ goto more;
+ }
+ }
+
+ /* revert incomplete */
+ if (PREDICT_FALSE (statuses[i - 1].last == 0))
+ {
+ i--;
+ while (i && statuses[i - 1].last == 0)
+ i--;
+ }
+
+ return i;
+}
+
+static_always_inline void
+ena_device_input_refill (vlib_main_t *vm, ena_rx_ctx_t *ctx,
+ vnet_dev_rx_queue_t *rxq, int use_va)
+{
+ ena_rxq_t *q = vnet_dev_get_rx_queue_data (rxq);
+ const u64x2 flip_phase = (ena_rx_desc_t){ .lo.phase = 1 }.as_u64x2;
+ u32 buffer_indices[ENA_RX_REFILL_BATCH];
+ uword dma_addr[ENA_RX_REFILL_BATCH];
+ u32 n_alloc, n_compl_sqes = q->n_compl_sqes;
+ u16 *csi = ctx->comp_sqe_indices;
+ ena_rx_desc_t *sqes = q->sqes;
+
+ while (n_compl_sqes > 0)
+ {
+ n_alloc = vlib_buffer_alloc_from_pool (
+ vm, buffer_indices, clib_min (ENA_RX_REFILL_BATCH, n_compl_sqes),
+ vnet_dev_get_rx_queue_buffer_pool_index (rxq));
+
+ if (PREDICT_FALSE (n_alloc == 0))
+ break;
+
+ vlib_get_buffers_with_offset (vm, buffer_indices, (void **) dma_addr,
+ ENA_RX_REFILL_BATCH,
+ STRUCT_OFFSET_OF (vlib_buffer_t, data));
+
+ if (!use_va)
+ for (u32 i = 0; i < n_alloc; i++)
+ dma_addr[i] = vlib_physmem_get_pa (vm, (void *) dma_addr[i]);
+
+ for (u32 i = 0; i < n_alloc; i++)
+ {
+ u16 slot = csi[i];
+ u64x2 r = sqes[slot].as_u64x2 ^ flip_phase;
+ ctx->sq_buffer_indices[slot] = buffer_indices[i];
+ r[1] = dma_addr[i];
+ sqes[slot].as_u64x2 = r; /* write SQE as single 16-byte store */
+ }
+
+ csi += n_alloc;
+ n_compl_sqes -= n_alloc;
+ }
+
+ if (n_compl_sqes == q->n_compl_sqes)
+ return;
+
+ q->sq_next += q->n_compl_sqes - n_compl_sqes;
+ __atomic_store_n (q->sq_db, q->sq_next, __ATOMIC_RELEASE);
+
+ if (PREDICT_FALSE (n_compl_sqes))
+ clib_memmove (ctx->comp_sqe_indices, csi, n_compl_sqes * sizeof (csi[0]));
+
+ q->n_compl_sqes = n_compl_sqes;
+}
+
+static_always_inline uword
+ena_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_dev_rx_queue_t *rxq)
+{
+ ena_rxq_t *q = vnet_dev_get_rx_queue_data (rxq);
+ vnet_dev_port_t *port = rxq->port;
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_buffer_t *buffers[VLIB_FRAME_SIZE], **b;
+ ena_rx_cdesc_status_t statuses[VLIB_FRAME_SIZE + 8];
+ u16 lengths[VLIB_FRAME_SIZE + 8], *l;
+ u32 flags[VLIB_FRAME_SIZE + 8], *f;
+ u16 *csi;
+ uword n_rx_packets = 0, n_rx_bytes = 0;
+ vlib_frame_bitmap_t head_bmp = {};
+ u32 sw_if_index = port->intf.sw_if_index;
+ u32 hw_if_index = port->intf.hw_if_index;
+ u32 n_trace, n_deq, n_left;
+ u32 cq_next = q->cq_next;
+ u32 next_index = rxq->next_index;
+ vlib_frame_t *next_frame;
+ vlib_buffer_template_t bt = rxq->buffer_template;
+ u32 *bi;
+ int maybe_chained;
+
+ ASSERT (count_set_bits (rxq->size) == 1);
+ ena_rx_ctx_t ctx = {
+ .size = rxq->size,
+ .mask = rxq->size - 1,
+ .st_and.as_u32 = ~0,
+ .comp_sqe_indices = q->compl_sqe_indices,
+ .sq_buffer_indices = q->buffer_indices,
+ };
+
+ /* we may have completed SQE indices from previous run */
+ csi = ctx.comp_sqe_indices + q->n_compl_sqes;
+
+ n_deq =
+ ena_device_input_cq_dequeue_no_wrap (&ctx, q, statuses, lengths, csi);
+
+ if (n_deq == 0)
+ goto refill;
+
+ q->n_compl_sqes += n_deq;
+
+ maybe_chained = ctx.st_and.first && ctx.st_and.last ? 0 : 1;
+
+ next_frame =
+ vlib_get_next_frame_internal (vm, node, next_index, /* new frame */ 1);
+ bi = vlib_frame_vector_args (next_frame);
+
+ /* move buffer indices from the ring */
+ for (u32 i = 0; i < n_deq; i++)
+ {
+ u32 slot = csi[i];
+ bi[i] = ctx.sq_buffer_indices[slot];
+ ctx.sq_buffer_indices[slot] = VLIB_BUFFER_INVALID_INDEX;
+ }
+
+ vlib_get_buffers (vm, bi, buffers, n_deq);
+
+ if (PREDICT_FALSE (maybe_chained))
+ ena_device_input_status_to_flags (statuses, flags, n_deq, head_bmp, 1);
+ else
+ ena_device_input_status_to_flags (statuses, flags, n_deq, head_bmp, 0);
+
+ for (b = buffers, l = lengths, f = flags, n_left = n_deq; n_left >= 8;
+ b += 4, f += 4, l += 4, n_left -= 4)
+ {
+ clib_prefetch_store (b[4]);
+ clib_prefetch_store (b[5]);
+ clib_prefetch_store (b[6]);
+ clib_prefetch_store (b[7]);
+ b[0]->template = bt;
+ n_rx_bytes += b[0]->current_length = l[0];
+ b[0]->flags = f[0];
+ b[1]->template = bt;
+ n_rx_bytes += b[1]->current_length = l[1];
+ b[1]->flags = f[1];
+ b[2]->template = bt;
+ n_rx_bytes += b[2]->current_length = l[2];
+ b[2]->flags = f[2];
+ b[3]->template = bt;
+ n_rx_bytes += b[3]->current_length = l[3];
+ b[3]->flags = f[3];
+ }
+
+ for (; n_left > 0; b += 1, f += 1, l += 1, n_left -= 1)
+ {
+ b[0]->template = bt;
+ n_rx_bytes += b[0]->current_length = l[0];
+ b[0]->flags = f[0];
+ }
+
+ if (maybe_chained)
+ {
+ vlib_buffer_t *hb = 0;
+ vlib_frame_bitmap_t tail_buf_bmp = {};
+ u32 i, total_len = 0, head_flags = 0, tail_flags = 0;
+ n_rx_packets = vlib_frame_bitmap_count_set_bits (head_bmp);
+
+ vlib_frame_bitmap_init (tail_buf_bmp, n_deq);
+ vlib_frame_bitmap_xor (tail_buf_bmp, head_bmp);
+
+ foreach_vlib_frame_bitmap_set_bit_index (i, tail_buf_bmp)
+ {
+ vlib_buffer_t *pb = buffers[i - 1];
+ /* only store opertations here */
+ pb->next_buffer = bi[i];
+ if (vlib_frame_bitmap_is_bit_set (tail_buf_bmp, i - 1) == 0)
+ {
+ if (hb)
+ {
+ hb->total_length_not_including_first_buffer = total_len;
+ /* tail descriptor contains protocol info so we need to
+ * combine head and tail buffer flags */
+ hb->flags = head_flags | tail_flags;
+ }
+ head_flags = flags[i - 1];
+ total_len = 0;
+ hb = pb;
+ }
+ total_len += lengths[i];
+ tail_flags = flags[i];
+ }
+
+ hb->total_length_not_including_first_buffer = total_len;
+ hb->flags = head_flags | tail_flags;
+ }
+ else
+ n_rx_packets = n_deq;
+
+ /* packet tracing */
+ if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node))))
+ {
+ u32 i;
+ if (!maybe_chained)
+ vlib_frame_bitmap_init (head_bmp, n_deq);
+ foreach_vlib_frame_bitmap_set_bit_index (i, head_bmp)
+ {
+ vlib_buffer_t *b = buffers[i];
+ if (vlib_trace_buffer (vm, node, next_index, b, 0))
+ {
+ u32 j = i;
+ ena_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr));
+ tr->next_index = next_index;
+ tr->qid = rxq->queue_id;
+ tr->hw_if_index = hw_if_index;
+ tr->n_desc = 1;
+ tr->length = lengths[i];
+ tr->req_id = csi[i];
+ tr->status = statuses[i];
+ while (statuses[j].last == 0)
+ {
+ j++;
+ tr->n_desc++;
+ tr->length += lengths[j];
+ }
+ tr->status = statuses[j];
+
+ if (-n_trace)
+ goto trace_done;
+ }
+ }
+ trace_done:
+ vlib_set_trace_count (vm, node, n_trace);
+ }
+
+ if (PREDICT_FALSE (maybe_chained))
+ clib_compress_u32 (bi, bi, head_bmp, n_deq);
+
+ if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT))
+ {
+ ethernet_input_frame_t *ef;
+ next_frame->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+ ef = vlib_frame_scalar_args (next_frame);
+ ef->sw_if_index = sw_if_index;
+ ef->hw_if_index = hw_if_index;
+
+ if (ctx.st_or.l3_csum_err == 0)
+ next_frame->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
+ vlib_frame_no_append (next_frame);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, VLIB_FRAME_SIZE - n_rx_packets);
+
+ vlib_increment_combined_counter (
+ vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ vm->thread_index, hw_if_index, n_rx_packets, n_rx_bytes);
+
+ q->cq_next = cq_next + n_deq;
+
+refill:
+ if (rxq->port->dev->va_dma)
+ ena_device_input_refill (vm, &ctx, rxq, 1);
+ else
+ ena_device_input_refill (vm, &ctx, rxq, 0);
+
+ return n_rx_packets;
+}
+
+VNET_DEV_NODE_FN (ena_rx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ u32 n_rx = 0;
+ foreach_vnet_dev_rx_queue_runtime (rxq, node)
+ n_rx += ena_device_input_inline (vm, node, rxq);
+ return n_rx;
+}
diff --git a/src/plugins/dev_ena/tx_node.c b/src/plugins/dev_ena/tx_node.c
new file mode 100644
index 00000000000..ae1b852c036
--- /dev/null
+++ b/src/plugins/dev_ena/tx_node.c
@@ -0,0 +1,514 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <dev_ena/ena.h>
+#include <vnet/ethernet/ethernet.h>
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+
+#define ENA_TX_ENQ_BATCH_SZ 64
+#define ENA_MAX_LOG2_TXQ_SIZE 11
+#define ENA_TX_MAX_TAIL_LEN 5
+
+typedef struct
+{
+ u32 n_bytes;
+ ena_device_t *ed;
+ u16 n_desc;
+ u32 mask;
+ u16 n_packets_left;
+ u16 n_free_slots;
+ u32 *from;
+ u32 *sq_buffer_indices;
+ u32 tmp_bi[VLIB_FRAME_SIZE];
+ ena_tx_desc_t *sqes;
+ u64 *sqe_templates;
+ u16 n_dropped_chain_too_long;
+ u8 llq;
+ void *bd;
+} ena_tx_ctx_t;
+
+/* bits inside req_id which represent SQE index */
+static const u16 reqid_sqe_idx_mask = (1U << ENA_MAX_LOG2_TXQ_SIZE) - 1;
+
+static_always_inline void
+ena_txq_adv_sq_tail (ena_tx_ctx_t *ctx, ena_txq_t *eq)
+{
+ /* CQEs can arrive out of order, so we cannot blindly advance SQ tail for
+ * number of free slots, instead we need to check if slot contains invalid
+ * buffer index */
+
+ u32 sq_head = eq->sq_head;
+ u32 sq_tail = eq->sq_tail;
+ u16 n, offset = sq_tail & ctx->mask;
+ u32 *bi = ctx->sq_buffer_indices + offset;
+ u16 n_to_check = clib_min (sq_head - sq_tail, ctx->n_desc - offset);
+
+advance_sq_tail:
+ n = n_to_check;
+
+#ifdef CLIB_HAVE_VEC256
+ for (; n >= 8; n -= 8, bi += 8)
+ if (!u32x8_is_all_equal (*(u32x8u *) bi, VLIB_BUFFER_INVALID_INDEX))
+ break;
+#elif defined(CLIB_HAVE_VEC128)
+ for (; n >= 4; n -= 4, bi += 4)
+ if (!u32x4_is_all_equal (*(u32x4u *) bi, VLIB_BUFFER_INVALID_INDEX))
+ break;
+#endif
+
+ for (; n > 0; n -= 1, bi += 1)
+ if (bi[0] != VLIB_BUFFER_INVALID_INDEX)
+ break;
+
+ sq_tail += n_to_check - n;
+
+ if (n == 0 && sq_tail < sq_head)
+ {
+ n_to_check = sq_head - sq_tail;
+ bi = ctx->sq_buffer_indices;
+ goto advance_sq_tail;
+ }
+
+ eq->sq_tail = sq_tail;
+}
+
+static_always_inline void
+ena_txq_deq (vlib_main_t *vm, ena_tx_ctx_t *ctx, ena_txq_t *txq)
+{
+ /* dequeue CQ, extract SQ slot and number of chained buffers from
+ * req_id, move completed buffer indices to temp array */
+ const ena_tx_cdesc_t mask_phase = { .phase = 1 };
+ ena_tx_cdesc_t *cqes = txq->cqes, *cd, match_phase = {};
+ u32 cq_next = txq->cq_next;
+ u32 offset, n = 0;
+ u32 n_to_check;
+ u32 *buffers_to_free = ctx->tmp_bi;
+ u32 n_buffers_to_free = 0;
+
+ offset = cq_next & ctx->mask;
+ cd = cqes + offset;
+ n_to_check = ctx->n_desc - offset;
+ match_phase.phase = ~(cq_next & (ctx->n_desc << 1)) != 0;
+
+#ifdef CLIB_HAVE_VEC256
+ const u16 reqid_nic1 = 1U << ENA_MAX_LOG2_TXQ_SIZE;
+ const ena_tx_cdesc_t mask_reqid = { .req_id = reqid_sqe_idx_mask },
+ match_ph0_nic1 = { .req_id = reqid_nic1, .phase = 0 },
+ match_ph1_nic1 = { .req_id = reqid_nic1, .phase = 1 },
+ mask_ph_nic = { .req_id = ~reqid_sqe_idx_mask,
+ .phase = 1 };
+ /* both phase and req_id are in lower 32 bits */
+ u32x8 mask_ph_nic_x8 = u32x8_splat (mask_ph_nic.as_u64);
+ u32x8 mask_reqid_x8 = u32x8_splat (mask_reqid.as_u64);
+ u32x8 match_ph_nic1_x8 = u32x8_splat (
+ match_phase.phase ? match_ph1_nic1.as_u64 : match_ph0_nic1.as_u64);
+ u32x8 buf_inv_idx_x8 = u32x8_splat (VLIB_BUFFER_INVALID_INDEX);
+#endif
+
+more:
+ while (n < n_to_check)
+ {
+ u16 req_id, n_in_chain;
+
+#ifdef CLIB_HAVE_VEC256
+ while (n + 7 < n_to_check)
+ {
+ u32x8 r, v;
+
+ /* load lower 32-bits of 8 CQEs in 256-bit register */
+ r = u32x8_shuffle2 (*(u32x8u *) cd, *(u32x8u *) (cd + 4), 0, 2, 4, 6,
+ 8, 10, 12, 14);
+
+ /* check if all 8 CQEs are completed and there is no chained bufs */
+ if (u32x8_is_equal (r & mask_ph_nic_x8, match_ph_nic1_x8) == 0)
+ goto one_by_one;
+
+ r &= mask_reqid_x8;
+
+ /* take consumed buffer indices from ring */
+ v = u32x8_gather_u32 (ctx->sq_buffer_indices, r,
+ sizeof (ctx->sq_buffer_indices[0]));
+ u32x8_scatter_u32 (ctx->sq_buffer_indices, r, buf_inv_idx_x8,
+ sizeof (ctx->sq_buffer_indices[0]));
+ *(u32x8u *) (buffers_to_free + n_buffers_to_free) = v;
+ n_buffers_to_free += 8;
+
+ n += 8;
+ cd += 8;
+ continue;
+ }
+ one_by_one:
+#endif
+
+ if ((cd->as_u64 & mask_phase.as_u64) != match_phase.as_u64)
+ goto done;
+
+ req_id = cd->req_id;
+ n_in_chain = req_id >> ENA_MAX_LOG2_TXQ_SIZE;
+ req_id &= reqid_sqe_idx_mask;
+
+ buffers_to_free[n_buffers_to_free++] = ctx->sq_buffer_indices[req_id];
+ ctx->sq_buffer_indices[req_id] = VLIB_BUFFER_INVALID_INDEX;
+
+ if (PREDICT_FALSE (n_in_chain > 1))
+ while (n_in_chain-- > 1)
+ {
+ req_id = (req_id + 1) & ctx->mask;
+ buffers_to_free[n_buffers_to_free++] =
+ ctx->sq_buffer_indices[req_id];
+ ctx->sq_buffer_indices[req_id] = VLIB_BUFFER_INVALID_INDEX;
+ }
+
+ n++;
+ cd++;
+ }
+
+ if (PREDICT_FALSE (n == n_to_check))
+ {
+ cq_next += n;
+ n = 0;
+ cd = cqes;
+ match_phase.phase ^= 1;
+#ifdef CLIB_HAVE_VEC256
+ match_ph_nic1_x8 ^= u32x8_splat (mask_phase.as_u64);
+#endif
+ n_to_check = ctx->n_desc;
+ goto more;
+ }
+
+done:
+
+ if (n_buffers_to_free)
+ {
+ cq_next += n;
+
+ /* part two - free buffers stored in temporary array */
+ vlib_buffer_free_no_next (vm, buffers_to_free, n_buffers_to_free);
+ txq->cq_next = cq_next;
+
+ ena_txq_adv_sq_tail (ctx, txq);
+ }
+}
+
+static_always_inline u16
+ena_txq_wr_sqe (vlib_main_t *vm, vlib_buffer_t *b, int use_iova,
+ ena_tx_desc_t *dp, u32 n_in_chain, ena_tx_desc_t desc)
+{
+ uword dma_addr = use_iova ? vlib_buffer_get_current_va (b) :
+ vlib_buffer_get_current_pa (vm, b);
+ u16 len = b->current_length;
+
+ desc.req_id_hi = n_in_chain << (ENA_MAX_LOG2_TXQ_SIZE - 10);
+ desc.as_u16x8[0] = len;
+ ASSERT (dma_addr < 0xffffffffffff); /* > 48bit - should never happen */
+ desc.as_u64x2[1] = dma_addr; /* this also overwrites header_length */
+
+ /* write descriptor as single 128-bit store */
+ dp->as_u64x2 = desc.as_u64x2;
+ return len;
+}
+
+static_always_inline void
+ena_txq_copy_sqes (ena_tx_ctx_t *ctx, u32 off, ena_tx_desc_t *s, u32 n_desc)
+{
+ const u64 temp_phase_xor = (ena_tx_desc_t){ .phase = 1 }.as_u64x2[0];
+ u32 n = 0;
+
+ if (ctx->llq)
+ {
+ ena_tx_llq_desc128_t *llq_descs = (ena_tx_llq_desc128_t *) ctx->sqes;
+ for (; n < n_desc; n += 1, s += 1, off += 1)
+ {
+ ena_tx_llq_desc128_t td = {};
+ u64 t = ctx->sqe_templates[off];
+ u64x2 v = { t, 0 };
+ ctx->sqe_templates[off] = t ^ temp_phase_xor;
+ td.desc[0].as_u64x2 = v | s->as_u64x2;
+ td.desc[0].phase = 1;
+ td.desc[0].header_length = 96;
+ td.desc[0].length -= 96;
+ td.desc[0].buff_addr_lo += 96;
+ vlib_buffer_t *b =
+ vlib_get_buffer (vlib_get_main (), ctx->sq_buffer_indices[off]);
+ clib_memcpy_fast (td.data, vlib_buffer_get_current (b), 96);
+ fformat (stderr, "%U\n", format_hexdump_u32, &td, 32);
+ fformat (stderr, "%U\n", format_ena_tx_desc, &td);
+ clib_memcpy_fast (llq_descs + off, &td, 128);
+ }
+ return;
+ }
+
+#ifdef CLIB_HAVE_VEC512
+ u64x8 temp_phase_xor_x8 = u64x8_splat (temp_phase_xor);
+ for (; n + 7 < n_desc; n += 8, s += 8, off += 8)
+ {
+ u64x8 t8 = *(u64x8u *) (ctx->sqe_templates + off);
+ *(u64x8u *) (ctx->sqe_templates + off) = t8 ^ temp_phase_xor_x8;
+ u64x8 r0 = *(u64x8u *) s;
+ u64x8 r1 = *(u64x8u *) (s + 4);
+ r0 |= u64x8_shuffle2 (t8, (u64x8){}, 0, 9, 1, 11, 2, 13, 3, 15);
+ r1 |= u64x8_shuffle2 (t8, (u64x8){}, 4, 9, 5, 11, 6, 13, 7, 15);
+ *((u64x8u *) (ctx->sqes + off)) = r0;
+ *((u64x8u *) (ctx->sqes + off + 4)) = r1;
+ }
+#elif defined(CLIB_HAVE_VEC256)
+ u64x4 temp_phase_xor_x4 = u64x4_splat (temp_phase_xor);
+ for (; n + 3 < n_desc; n += 4, s += 4, off += 4)
+ {
+ u64x4 t4 = *(u64x4u *) (ctx->sqe_templates + off);
+ *(u64x4u *) (ctx->sqe_templates + off) = t4 ^ temp_phase_xor_x4;
+ u64x4 r0 = *(u64x4u *) s;
+ u64x4 r1 = *(u64x4u *) (s + 2);
+ r0 |= u64x4_shuffle2 (t4, (u64x4){}, 0, 5, 1, 7);
+ r1 |= u64x4_shuffle2 (t4, (u64x4){}, 2, 5, 3, 7);
+ *((u64x4u *) (ctx->sqes + off)) = r0;
+ *((u64x4u *) (ctx->sqes + off + 2)) = r1;
+ }
+#endif
+
+ for (; n < n_desc; n += 1, s += 1, off += 1)
+ {
+ u64 t = ctx->sqe_templates[off];
+ u64x2 v = { t, 0 };
+ ctx->sqe_templates[off] = t ^ temp_phase_xor;
+ ctx->sqes[off].as_u64x2 = v | s->as_u64x2;
+ }
+}
+
+static_always_inline u32
+ena_txq_enq_one (vlib_main_t *vm, ena_tx_ctx_t *ctx, vlib_buffer_t *b0,
+ ena_tx_desc_t *d, u16 n_free_desc, u32 *f, int use_iova)
+{
+ const ena_tx_desc_t single = { .first = 1, .last = 1 };
+ vlib_buffer_t *b;
+ u32 i, n;
+
+ /* non-chained buffer */
+ if ((b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0)
+ {
+ ctx->n_bytes += ena_txq_wr_sqe (vm, b0, use_iova, d, 1, single);
+ f[0] = ctx->from[0];
+ ctx->from += 1;
+ ctx->n_packets_left -= 1;
+ return 1;
+ }
+
+ /* count number of buffers in chain */
+ for (n = 1, b = b0; b->flags & VLIB_BUFFER_NEXT_PRESENT; n++)
+ b = vlib_get_buffer (vm, b->next_buffer);
+
+ /* if chain is too long, drop packet */
+ if (n > ENA_TX_MAX_TAIL_LEN + 1)
+ {
+ vlib_buffer_free_one (vm, ctx->from[0]);
+ ctx->from += 1;
+ ctx->n_packets_left -= 1;
+ ctx->n_dropped_chain_too_long++;
+ return 0;
+ }
+
+ /* no enough descriptors to accomodate? */
+ if (n > n_free_desc)
+ return 0;
+
+ /* first */
+ f++[0] = ctx->from[0];
+ ctx->from += 1;
+ ctx->n_packets_left -= 1;
+ ctx->n_bytes +=
+ ena_txq_wr_sqe (vm, b0, use_iova, d++, n, (ena_tx_desc_t){ .first = 1 });
+
+ /* mid */
+ for (i = 1, b = b0; i < n - 1; i++)
+ {
+ f++[0] = b->next_buffer;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ ctx->n_bytes +=
+ ena_txq_wr_sqe (vm, b, use_iova, d++, 0, (ena_tx_desc_t){});
+ }
+
+ /* last */
+ f[0] = b->next_buffer;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ ctx->n_bytes +=
+ ena_txq_wr_sqe (vm, b, use_iova, d, 0, (ena_tx_desc_t){ .last = 1 });
+
+ return n;
+}
+
+static_always_inline uword
+ena_txq_enq (vlib_main_t *vm, ena_tx_ctx_t *ctx, ena_txq_t *txq, int use_iova)
+{
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 *f = ctx->tmp_bi;
+ ena_tx_desc_t desc[ENA_TX_ENQ_BATCH_SZ], *d = desc;
+ const ena_tx_desc_t single = { .first = 1, .last = 1 };
+ u32 n_desc_left, n;
+
+ if (ctx->n_packets_left == 0)
+ return 0;
+
+ if (ctx->n_free_slots == 0)
+ return 0;
+
+ n_desc_left = clib_min (ENA_TX_ENQ_BATCH_SZ, ctx->n_free_slots);
+
+ while (n_desc_left >= 4 && ctx->n_packets_left >= 8)
+ {
+ clib_prefetch_load (vlib_get_buffer (vm, ctx->from[4]));
+ b0 = vlib_get_buffer (vm, ctx->from[0]);
+ clib_prefetch_load (vlib_get_buffer (vm, ctx->from[5]));
+ b1 = vlib_get_buffer (vm, ctx->from[1]);
+ clib_prefetch_load (vlib_get_buffer (vm, ctx->from[6]));
+ b2 = vlib_get_buffer (vm, ctx->from[2]);
+ clib_prefetch_load (vlib_get_buffer (vm, ctx->from[7]));
+ b3 = vlib_get_buffer (vm, ctx->from[3]);
+
+ if (PREDICT_FALSE (((b0->flags | b1->flags | b2->flags | b3->flags) &
+ VLIB_BUFFER_NEXT_PRESENT) == 0))
+ {
+ ctx->n_bytes += ena_txq_wr_sqe (vm, b0, use_iova, d++, 1, single);
+ ctx->n_bytes += ena_txq_wr_sqe (vm, b1, use_iova, d++, 1, single);
+ ctx->n_bytes += ena_txq_wr_sqe (vm, b2, use_iova, d++, 1, single);
+ ctx->n_bytes += ena_txq_wr_sqe (vm, b3, use_iova, d++, 1, single);
+ vlib_buffer_copy_indices (f, ctx->from, 4);
+ ctx->from += 4;
+ ctx->n_packets_left -= 4;
+
+ n_desc_left -= 4;
+ f += 4;
+ }
+ else
+ {
+ n = ena_txq_enq_one (vm, ctx, b0, d, n_desc_left, f, use_iova);
+ if (n == 0)
+ break;
+ n_desc_left -= n;
+ f += n;
+ d += n;
+ }
+ }
+
+ while (n_desc_left > 0 && ctx->n_packets_left > 0)
+ {
+ vlib_buffer_t *b0;
+
+ b0 = vlib_get_buffer (vm, ctx->from[0]);
+ n = ena_txq_enq_one (vm, ctx, b0, d, n_desc_left, f, use_iova);
+ if (n == 0)
+ break;
+ n_desc_left -= n;
+ f += n;
+ d += n;
+ }
+
+ n = d - desc;
+
+ if (n)
+ {
+ u32 head = txq->sq_head;
+ u32 offset = head & ctx->mask;
+ u32 n_before_wrap = ctx->n_desc - offset;
+ u32 n_copy;
+
+ d = desc;
+ f = ctx->tmp_bi;
+
+ if (n_before_wrap >= n)
+ {
+ n_copy = n;
+ vlib_buffer_copy_indices (ctx->sq_buffer_indices + offset, f,
+ n_copy);
+ ena_txq_copy_sqes (ctx, offset, d, n_copy);
+ }
+ else
+ {
+ n_copy = n_before_wrap;
+ vlib_buffer_copy_indices (ctx->sq_buffer_indices + offset, f,
+ n_copy);
+ ena_txq_copy_sqes (ctx, offset, d, n_copy);
+
+ n_copy = n - n_before_wrap;
+ vlib_buffer_copy_indices (ctx->sq_buffer_indices, f + n_before_wrap,
+ n_copy);
+ ena_txq_copy_sqes (ctx, 0, d + n_before_wrap, n_copy);
+ }
+
+ head += n;
+ __atomic_store_n (txq->sq_db, head, __ATOMIC_RELEASE);
+ txq->sq_head = head;
+ ctx->n_free_slots -= n;
+
+ return n;
+ }
+ return 0;
+}
+
+VNET_DEV_NODE_FN (ena_tx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ vnet_dev_tx_node_runtime_t *tnr = vnet_dev_get_tx_node_runtime (node);
+ vnet_dev_tx_queue_t *txq = tnr->tx_queue;
+ vnet_dev_t *dev = txq->port->dev;
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq);
+ u32 n_pkts = 0;
+
+ ena_tx_ctx_t ctx = { .mask = txq->size - 1,
+ .n_desc = txq->size,
+ .n_packets_left = frame->n_vectors,
+ .from = vlib_frame_vector_args (frame),
+ .sqe_templates = eq->sqe_templates,
+ .sqes = eq->sqes,
+ .sq_buffer_indices = eq->buffer_indices,
+ .llq = ed->llq };
+
+ vnet_dev_tx_queue_lock_if_needed (txq);
+
+ /* try 3 times to enquee packets by first freeing consumed from the ring
+ * and then trying to enqueue as much as possible */
+ for (int i = 0; i < 3; i++)
+ {
+ /* free buffers consumed by ENA */
+ if (eq->sq_head != eq->sq_tail)
+ ena_txq_deq (vm, &ctx, eq);
+
+ /* enqueue new buffers, try until last attempt enqueues 0 packets */
+ ctx.n_free_slots = ctx.n_desc - (eq->sq_head - eq->sq_tail);
+
+ if (dev->va_dma)
+ while (ena_txq_enq (vm, &ctx, eq, /* va */ 1) > 0)
+ ;
+ else
+ while (ena_txq_enq (vm, &ctx, eq, /* va */ 0) > 0)
+ ;
+
+ if (ctx.n_packets_left == 0)
+ break;
+ }
+
+ vnet_dev_tx_queue_unlock_if_needed (txq);
+
+ if (ctx.n_dropped_chain_too_long)
+ vlib_error_count (vm, node->node_index, ENA_TX_NODE_CTR_CHAIN_TOO_LONG,
+ ctx.n_dropped_chain_too_long);
+
+ n_pkts = frame->n_vectors - ctx.n_packets_left;
+ vlib_increment_combined_counter (
+ vnet_get_main ()->interface_main.combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_TX,
+ vm->thread_index, tnr->hw_if_index, n_pkts, ctx.n_bytes);
+
+ if (ctx.n_packets_left)
+ {
+ vlib_buffer_free (vm, ctx.from, ctx.n_packets_left);
+ vlib_error_count (vm, node->node_index, ENA_TX_NODE_CTR_NO_FREE_SLOTS,
+ ctx.n_packets_left);
+ }
+
+ return n_pkts;
+}
diff --git a/src/plugins/dev_iavf/CMakeLists.txt b/src/plugins/dev_iavf/CMakeLists.txt
new file mode 100644
index 00000000000..8fa89b7a677
--- /dev/null
+++ b/src/plugins/dev_iavf/CMakeLists.txt
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright(c) 2022 Cisco Systems, Inc.
+
+add_vpp_plugin(dev_iavf
+ SOURCES
+ adminq.c
+ counters.c
+ format.c
+ iavf.c
+ port.c
+ queue.c
+ rx_node.c
+ tx_node.c
+ virtchnl.c
+
+ MULTIARCH_SOURCES
+ rx_node.c
+ tx_node.c
+)
+
diff --git a/src/plugins/dev_iavf/adminq.c b/src/plugins/dev_iavf/adminq.c
new file mode 100644
index 00000000000..c12dc8aa2f6
--- /dev/null
+++ b/src/plugins/dev_iavf/adminq.c
@@ -0,0 +1,485 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <ctype.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/iavf_regs.h>
+#include <dev_iavf/virtchnl.h>
+#include <vnet/ethernet/ethernet.h>
+
+#define IIAVF_AQ_LARGE_BUF 512
+#define IIAVF_AQ_ATQ_LEN 4
+#define IIAVF_AQ_ARQ_LEN 16
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+ .class_name = "iavf",
+ .subclass_name = "adminq",
+};
+
+struct iavf_adminq_dma_mem
+{
+ iavf_aq_desc_t atq[IIAVF_AQ_ATQ_LEN];
+ iavf_aq_desc_t arq[IIAVF_AQ_ARQ_LEN];
+ struct
+ {
+ u8 data[IIAVF_AQ_BUF_SIZE];
+ } atq_bufs[IIAVF_AQ_ATQ_LEN];
+ struct
+ {
+ u8 data[IIAVF_AQ_BUF_SIZE];
+ } arq_bufs[IIAVF_AQ_ARQ_LEN];
+};
+
+static const iavf_dyn_ctl dyn_ctl0_disable = {
+ .itr_indx = 3,
+};
+
+static const iavf_dyn_ctl dyn_ctl0_enable = {
+ .intena = 1,
+ .clearpba = 1,
+ .itr_indx = 3,
+};
+
+static const iavf_vfint_icr0_ena1 icr0_ena1_aq_enable = {
+ .adminq = 1,
+};
+
+static inline void
+iavf_irq_0_disable (iavf_device_t *ad)
+{
+ iavf_reg_write (ad, IAVF_VFINT_ICR0_ENA1, 0);
+ iavf_reg_write (ad, IAVF_VFINT_DYN_CTL0, dyn_ctl0_disable.as_u32);
+ iavf_reg_flush (ad);
+}
+
+static inline void
+iavf_irq_0_enable (iavf_device_t *ad)
+{
+ iavf_reg_write (ad, IAVF_VFINT_ICR0_ENA1, icr0_ena1_aq_enable.as_u32);
+ iavf_reg_write (ad, IAVF_VFINT_DYN_CTL0, dyn_ctl0_enable.as_u32);
+ iavf_reg_flush (ad);
+}
+
+static_always_inline int
+iavf_aq_desc_is_done (iavf_aq_desc_t *d)
+{
+ iavf_aq_desc_flags_t flags;
+ flags.as_u16 = __atomic_load_n (&d->flags.as_u16, __ATOMIC_ACQUIRE);
+ return flags.dd;
+}
+
+static u8 *
+format_iavf_aq_desc_flags (u8 *s, va_list *args)
+{
+ iavf_aq_desc_flags_t f = va_arg (*args, iavf_aq_desc_flags_t);
+ int i = 0;
+
+#define _(n, v) \
+ if (f.v) \
+ { \
+ char str[] = #v, *sp = str; \
+ if (i++) \
+ { \
+ vec_add1 (s, ','); \
+ vec_add1 (s, ' '); \
+ } \
+ while (sp[0]) \
+ vec_add1 (s, (u8) toupper (sp++[0])); \
+ }
+ foreach_iavf_aq_desc_flag
+#undef _
+ return s;
+}
+
+static u8 *
+format_iavf_aq_desc_retval (u8 *s, va_list *args)
+{
+ iavf_aq_desc_retval_t rv = va_arg (*args, u32);
+
+ char *retvals[] = {
+#define _(a, b) [a] = #b,
+ foreach_iavf_aq_desc_retval
+#undef _
+ };
+
+ if (rv >= ARRAY_LEN (retvals) || retvals[rv] == 0)
+ return format (s, "UNKNOWN(%d)", rv);
+
+ return format (s, "%s", retvals[rv]);
+}
+
+static u8 *
+format_iavf_aq_desc (u8 *s, va_list *args)
+{
+ iavf_aq_desc_t *d = va_arg (*args, iavf_aq_desc_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "opcode 0x%04x datalen %u retval %U (%u) flags %U", d->opcode,
+ d->datalen, format_iavf_aq_desc_retval, d->retval, d->retval,
+ format_iavf_aq_desc_flags, d->flags);
+
+ if (d->opcode == IIAVF_AQ_DESC_OP_SEND_TO_PF ||
+ d->opcode == IIAVF_AQ_DESC_OP_MESSAGE_FROM_PF)
+ {
+ s =
+ format (s, "\n%Uv_opcode %U (%u) v_retval %U (%d) buf_dma_addr 0x%lx",
+ format_white_space, indent, format_virtchnl_op_name,
+ d->v_opcode, d->v_opcode, format_virtchnl_status, d->v_retval,
+ d->v_retval, (uword) d->param2 << 32 | d->param3);
+ }
+ else
+ {
+ s = format (
+ s, "\n%Ucookie_hi 0x%x cookie_lo 0x%x params %08x %08x %08x %08x",
+ format_white_space, indent, d->cookie_hi, d->cookie_lo, d->param0,
+ d->param1, d->param2, d->param3);
+ }
+ return s;
+}
+
+vnet_dev_rv_t
+iavf_aq_alloc (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ return vnet_dev_dma_mem_alloc (vm, dev, sizeof (iavf_adminq_dma_mem_t), 0,
+ (void **) &ad->aq_mem);
+}
+
+void
+iavf_aq_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ vnet_dev_dma_mem_free (vm, dev, ad->aq_mem);
+}
+
+static void
+iavf_aq_arq_slot_init (vlib_main_t *vm, vnet_dev_t *dev, u16 slot)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ u64 pa = vnet_dev_get_dma_addr (vm, dev, ad->aq_mem->arq_bufs + slot);
+ ad->aq_mem->arq[slot] = (iavf_aq_desc_t){
+ .flags.buf = 1,
+ .flags.lb = IIAVF_AQ_BUF_SIZE > IIAVF_AQ_LARGE_BUF,
+ .datalen = sizeof (ad->aq_mem->arq_bufs[0].data),
+ .addr_hi = (u32) (pa >> 32),
+ .addr_lo = (u32) pa,
+ };
+}
+
+static void
+iavf_aq_poll (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ iavf_aq_desc_t *d;
+ u8 *b;
+
+ while (iavf_aq_arq_next_acq (vm, dev, &d, &b, 0))
+ {
+
+ log_debug (dev, "poll[%u] flags %x %U op %u v_op %u", ad->arq_next_slot,
+ d->flags.as_u16, format_iavf_aq_desc_flags, d->flags,
+ d->opcode, d->v_opcode);
+ if ((d->datalen != sizeof (virtchnl_pf_event_t)) ||
+ ((d->flags.buf) == 0))
+ {
+ log_err (dev, "event message error");
+ }
+
+ vec_add1 (ad->events, *(virtchnl_pf_event_t *) b);
+ iavf_aq_arq_next_rel (vm, dev);
+ }
+
+ if (vec_len (ad->events))
+ {
+ virtchnl_pf_event_t *e;
+ char *virtchnl_event_names[] = {
+#define _(v, n) [v] = #n,
+ foreach_virtchnl_event_code
+#undef _
+ };
+
+ vec_foreach (e, ad->events)
+ {
+ log_debug (dev, "event %s (%u) sev %d",
+ virtchnl_event_names[e->event], e->event, e->severity);
+
+ if (e->event == VIRTCHNL_EVENT_LINK_CHANGE)
+ {
+ vnet_dev_port_state_changes_t changes = {};
+ vnet_dev_port_t *port = vnet_dev_get_port_by_id (dev, 0);
+
+ if (port)
+ {
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ int link_up;
+ u32 speed = 0;
+
+ if (ap->vf_cap_flags & VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
+ {
+ link_up = e->event_data.link_event_adv.link_status;
+ speed = e->event_data.link_event_adv.link_speed;
+ }
+ else
+ {
+ const u32 speed_table[8] = { 100, 1000, 10000, 40000,
+ 20000, 25000, 2500, 5000 };
+
+ link_up = e->event_data.link_event.link_status;
+ speed = e->event_data.link_event.link_speed;
+
+ if (count_set_bits (speed) == 1 && speed &&
+ pow2_mask (8))
+ speed = speed_table[get_lowest_set_bit_index (speed)];
+ else
+ {
+ if (link_up)
+ log_warn (dev,
+ "unsupported link speed value "
+ "received (0x%x)",
+ speed);
+ speed = 0;
+ }
+ }
+
+ log_debug (dev, "LINK_CHANGE speed %u state %u", speed,
+ link_up);
+
+ if (port->link_up != link_up)
+ {
+ changes.change.link_state = 1;
+ changes.link_state = link_up;
+ log_debug (dev, "link state changed to %s",
+ link_up ? "up" : "down");
+ }
+
+ if (port->speed != speed * 1000)
+ {
+ changes.change.link_speed = 1;
+ changes.link_speed = speed * 1000;
+ log_debug (dev, "link speed changed to %u Mbps", speed);
+ }
+
+ if (changes.change.any)
+ vnet_dev_port_state_change (vm, port, changes);
+ }
+ }
+ }
+ vec_reset_length (ad->events);
+ }
+}
+
+static void
+iavf_adminq_msix_handler (vlib_main_t *vm, vnet_dev_t *dev, u16 line)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ iavf_reg_write (ad, IAVF_VFINT_DYN_CTL0, dyn_ctl0_enable.as_u32);
+ log_debug (dev, "MSI-X interrupt %u received", line);
+ vnet_dev_process_call_op_no_wait (vm, dev, iavf_aq_poll);
+}
+
+static void
+iavf_adminq_intx_handler (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_adminq_msix_handler (vm, dev, 0);
+}
+
+void
+iavf_aq_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ uword pa;
+ u32 len;
+
+ /* disable both tx and rx adminq queue */
+ iavf_reg_write (ad, IAVF_ATQLEN, 0);
+ iavf_reg_write (ad, IAVF_ARQLEN, 0);
+
+ len = IIAVF_AQ_ATQ_LEN;
+ pa = vnet_dev_get_dma_addr (vm, dev, &ad->aq_mem->atq);
+ iavf_reg_write (ad, IAVF_ATQT, 0); /* Tail */
+ iavf_reg_write (ad, IAVF_ATQH, 0); /* Head */
+ iavf_reg_write (ad, IAVF_ATQBAL, (u32) pa); /* Base Address Low */
+ iavf_reg_write (ad, IAVF_ATQBAH, (u32) (pa >> 32)); /* Base Address High */
+ iavf_reg_write (ad, IAVF_ATQLEN, len | (1ULL << 31)); /* len & ena */
+
+ len = IIAVF_AQ_ARQ_LEN;
+ pa = vnet_dev_get_dma_addr (vm, dev, ad->aq_mem->arq);
+ iavf_reg_write (ad, IAVF_ARQT, 0); /* Tail */
+ iavf_reg_write (ad, IAVF_ARQH, 0); /* Head */
+ iavf_reg_write (ad, IAVF_ARQBAL, (u32) pa); /* Base Address Low */
+ iavf_reg_write (ad, IAVF_ARQBAH, (u32) (pa >> 32)); /* Base Address High */
+ iavf_reg_write (ad, IAVF_ARQLEN, len | (1ULL << 31)); /* len & ena */
+
+ for (int i = 0; i < len; i++)
+ iavf_aq_arq_slot_init (vm, dev, i);
+ iavf_reg_write (ad, IAVF_ARQT, len - 1); /* Tail */
+
+ ad->atq_next_slot = 0;
+ ad->arq_next_slot = 0;
+ ad->adminq_active = 1;
+}
+
+void
+iavf_aq_poll_on (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+
+ vnet_dev_poll_dev_add (vm, dev, IIAVF_AQ_POLL_INTERVAL, iavf_aq_poll);
+
+ if (vnet_dev_get_pci_n_msix_interrupts (dev) > 0)
+ {
+ vnet_dev_pci_msix_add_handler (vm, dev, iavf_adminq_msix_handler, 0, 1);
+ vnet_dev_pci_msix_enable (vm, dev, 0, 1);
+ }
+ else
+ vnet_dev_pci_intx_add_handler (vm, dev, iavf_adminq_intx_handler);
+
+ iavf_irq_0_enable (ad);
+}
+
+void
+iavf_aq_poll_off (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+
+ iavf_irq_0_disable (ad);
+
+ vnet_dev_poll_dev_remove (vm, dev, iavf_aq_poll);
+
+ if (vnet_dev_get_pci_n_msix_interrupts (dev) > 0)
+ {
+ vnet_dev_pci_msix_disable (vm, dev, 0, 1);
+ vnet_dev_pci_msix_remove_handler (vm, dev, 0, 1);
+ }
+ else
+ vnet_dev_pci_intx_remove_handler (vm, dev);
+}
+
+vnet_dev_rv_t
+iavf_aq_atq_enq (vlib_main_t *vm, vnet_dev_t *dev, iavf_aq_desc_t *desc,
+ const u8 *data, u16 len, f64 timeout)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ iavf_aq_desc_t *d = ad->aq_mem->atq + ad->atq_next_slot;
+ u8 *buf = ad->aq_mem->atq_bufs[ad->atq_next_slot].data;
+
+ ASSERT (len <= IIAVF_AQ_BUF_SIZE);
+
+ *d = *desc;
+
+ if (len)
+ {
+ u64 pa = vnet_dev_get_dma_addr (vm, dev, buf);
+ d->datalen = len;
+ d->addr_hi = (u32) (pa >> 32);
+ d->addr_lo = (u32) pa;
+ d->flags.buf = 1;
+ d->flags.rd = 1;
+ d->flags.lb = len > IIAVF_AQ_LARGE_BUF;
+ clib_memcpy_fast (buf, data, len);
+ }
+
+ log_debug (dev, "slot %u\n %U", ad->atq_next_slot, format_iavf_aq_desc, d);
+
+ ad->atq_next_slot = (ad->atq_next_slot + 1) % IIAVF_AQ_ATQ_LEN;
+ iavf_reg_write (ad, IAVF_ATQT, ad->atq_next_slot);
+ iavf_reg_flush (ad);
+
+ if (timeout > 0)
+ {
+ f64 suspend_time = timeout / 62;
+ f64 t0 = vlib_time_now (vm);
+ iavf_aq_desc_flags_t flags;
+
+ while (1)
+ {
+ flags.as_u16 = __atomic_load_n (&d->flags.as_u16, __ATOMIC_ACQUIRE);
+
+ if (flags.err)
+ {
+ log_err (dev, "adminq enqueue error [opcode 0x%x, retval %d]",
+ d->opcode, d->retval);
+ return VNET_DEV_ERR_BUG;
+ }
+
+ if (flags.dd && flags.cmp)
+ return VNET_DEV_OK;
+
+ if (vlib_time_now (vm) - t0 > timeout)
+ {
+ log_err (dev, "adminq enqueue timeout [opcode 0x%x]", d->opcode);
+ return VNET_DEV_ERR_TIMEOUT;
+ }
+
+ vlib_process_suspend (vm, suspend_time);
+ suspend_time *= 2;
+ }
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+iavf_aq_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ if (ad->adminq_active)
+ {
+ iavf_aq_desc_t d = {
+ .opcode = IIAVF_AQ_DESC_OP_QUEUE_SHUTDOWN,
+ .driver_unloading = 1,
+ .flags = { .si = 1 },
+ };
+ log_debug (dev, "adminq queue shutdown");
+ iavf_aq_atq_enq (vm, dev, &d, 0, 0, 0);
+ ad->adminq_active = 0;
+ }
+}
+
+int
+iavf_aq_arq_next_acq (vlib_main_t *vm, vnet_dev_t *dev, iavf_aq_desc_t **dp,
+ u8 **bp, f64 timeout)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ iavf_aq_desc_t *d = ad->aq_mem->arq + ad->arq_next_slot;
+
+ if (timeout)
+ {
+ f64 suspend_time = timeout / 62;
+ f64 t0 = vlib_time_now (vm);
+
+ while (!iavf_aq_desc_is_done (d))
+ {
+ if (vlib_time_now (vm) - t0 > timeout)
+ return 0;
+
+ vlib_process_suspend (vm, suspend_time);
+
+ suspend_time *= 2;
+ }
+ }
+ else if (!iavf_aq_desc_is_done (d))
+ return 0;
+
+ log_debug (dev, "arq desc acquired in slot %u\n %U", ad->arq_next_slot,
+ format_iavf_aq_desc, d);
+ *dp = d;
+ *bp = ad->aq_mem->arq_bufs[ad->arq_next_slot].data;
+ return 1;
+}
+
+void
+iavf_aq_arq_next_rel (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ ASSERT (iavf_aq_desc_is_done (ad->aq_mem->arq + ad->arq_next_slot));
+ iavf_aq_arq_slot_init (vm, dev, ad->arq_next_slot);
+ iavf_reg_write (ad, IAVF_ARQT, ad->arq_next_slot);
+ iavf_reg_flush (ad);
+ ad->arq_next_slot = (ad->arq_next_slot + 1) % IIAVF_AQ_ARQ_LEN;
+}
diff --git a/src/plugins/dev_iavf/counters.c b/src/plugins/dev_iavf/counters.c
new file mode 100644
index 00000000000..6dcd01141f0
--- /dev/null
+++ b/src/plugins/dev_iavf/counters.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+#include <dev_iavf/virtchnl_funcs.h>
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+ .class_name = "iavf",
+ .subclass_name = "counters",
+};
+
+typedef enum
+{
+ IIAVF_PORT_CTR_RX_BYTES,
+ IIAVF_PORT_CTR_TX_BYTES,
+ IIAVF_PORT_CTR_RX_PACKETS,
+ IIAVF_PORT_CTR_TX_PACKETS,
+ IIAVF_PORT_CTR_RX_DROPS,
+ IIAVF_PORT_CTR_TX_DROPS,
+ IIAVF_PORT_CTR_RX_UCAST,
+ IIAVF_PORT_CTR_TX_UCAST,
+ IIAVF_PORT_CTR_RX_MCAST,
+ IIAVF_PORT_CTR_TX_MCAST,
+ IIAVF_PORT_CTR_RX_BCAST,
+ IIAVF_PORT_CTR_TX_BCAST,
+ IIAVF_PORT_CTR_RX_UNKNOWN_PROTOCOL,
+ IIAVF_PORT_CTR_TX_ERRORS,
+} iavf_port_counter_id_t;
+
+vnet_dev_counter_t iavf_port_counters[] = {
+ VNET_DEV_CTR_RX_BYTES (IIAVF_PORT_CTR_RX_BYTES),
+ VNET_DEV_CTR_RX_PACKETS (IIAVF_PORT_CTR_RX_PACKETS),
+ VNET_DEV_CTR_RX_DROPS (IIAVF_PORT_CTR_RX_DROPS),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_RX_UCAST, RX, PACKETS, "unicast"),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_RX_MCAST, RX, PACKETS, "multicast"),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_RX_BCAST, RX, PACKETS, "broadcast"),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_RX_UNKNOWN_PROTOCOL, RX, PACKETS,
+ "unknown protocol"),
+
+ VNET_DEV_CTR_TX_BYTES (IIAVF_PORT_CTR_TX_BYTES),
+ VNET_DEV_CTR_TX_PACKETS (IIAVF_PORT_CTR_TX_PACKETS),
+ VNET_DEV_CTR_TX_DROPS (IIAVF_PORT_CTR_TX_DROPS),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_TX_UCAST, TX, PACKETS, "unicast"),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_TX_MCAST, TX, PACKETS, "multicast"),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_TX_BCAST, TX, PACKETS, "broadcast"),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_TX_ERRORS, TX, PACKETS, "errors"),
+};
+
+void
+iavf_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_port_add_counters (vm, port, iavf_port_counters,
+ ARRAY_LEN (iavf_port_counters));
+}
+
+void
+iavf_port_poll_stats (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_rv_t rv;
+ vnet_dev_t *dev = port->dev;
+ virtchnl_eth_stats_t stats;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ virtchnl_queue_select_t qs = { .vsi_id = ap->vsi_id };
+
+ rv = iavf_vc_op_get_stats (vm, dev, &qs, &stats);
+
+ if (rv != VNET_DEV_OK)
+ return;
+
+ foreach_vnet_dev_counter (c, port->counter_main)
+ {
+ switch (c->user_data)
+ {
+ case IIAVF_PORT_CTR_RX_BYTES:
+ vnet_dev_counter_value_update (vm, c, stats.rx_bytes);
+ break;
+ case IIAVF_PORT_CTR_TX_BYTES:
+ vnet_dev_counter_value_update (vm, c, stats.tx_bytes);
+ break;
+ case IIAVF_PORT_CTR_RX_PACKETS:
+ vnet_dev_counter_value_update (
+ vm, c, stats.rx_unicast + stats.rx_broadcast + stats.rx_multicast);
+ break;
+ case IIAVF_PORT_CTR_TX_PACKETS:
+ vnet_dev_counter_value_update (
+ vm, c, stats.tx_unicast + stats.tx_broadcast + stats.tx_multicast);
+ break;
+ case IIAVF_PORT_CTR_RX_DROPS:
+ vnet_dev_counter_value_update (vm, c, stats.rx_discards);
+ break;
+ case IIAVF_PORT_CTR_TX_DROPS:
+ vnet_dev_counter_value_update (vm, c, stats.tx_discards);
+ break;
+ case IIAVF_PORT_CTR_RX_UCAST:
+ vnet_dev_counter_value_update (vm, c, stats.rx_unicast);
+ break;
+ case IIAVF_PORT_CTR_TX_UCAST:
+ vnet_dev_counter_value_update (vm, c, stats.tx_unicast);
+ break;
+ case IIAVF_PORT_CTR_RX_MCAST:
+ vnet_dev_counter_value_update (vm, c, stats.rx_multicast);
+ break;
+ case IIAVF_PORT_CTR_TX_MCAST:
+ vnet_dev_counter_value_update (vm, c, stats.tx_multicast);
+ break;
+ case IIAVF_PORT_CTR_RX_BCAST:
+ vnet_dev_counter_value_update (vm, c, stats.rx_broadcast);
+ break;
+ case IIAVF_PORT_CTR_TX_BCAST:
+ vnet_dev_counter_value_update (vm, c, stats.tx_broadcast);
+ break;
+ case IIAVF_PORT_CTR_RX_UNKNOWN_PROTOCOL:
+ vnet_dev_counter_value_update (vm, c, stats.rx_unknown_protocol);
+ break;
+ case IIAVF_PORT_CTR_TX_ERRORS:
+ vnet_dev_counter_value_update (vm, c, stats.tx_errors);
+ break;
+ default:
+ ASSERT (0);
+ }
+ }
+}
diff --git a/src/plugins/dev_iavf/format.c b/src/plugins/dev_iavf/format.c
new file mode 100644
index 00000000000..9a3dde47ee9
--- /dev/null
+++ b/src/plugins/dev_iavf/format.c
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+
+u8 *
+format_iavf_vf_cap_flags (u8 *s, va_list *args)
+{
+ u32 flags = va_arg (*args, u32);
+ int not_first = 0;
+
+ char *strs[32] = {
+#define _(a, b, c) [a] = c,
+ foreach_iavf_vf_cap_flag
+#undef _
+ };
+
+ for (int i = 0; i < 32; i++)
+ {
+ if ((flags & (1 << i)) == 0)
+ continue;
+ if (not_first)
+ s = format (s, " ");
+ if (strs[i])
+ s = format (s, "%s", strs[i]);
+ else
+ s = format (s, "unknown(%u)", i);
+ not_first = 1;
+ }
+ return s;
+}
+
+u8 *
+format_iavf_rx_desc_qw1 (u8 *s, va_list *args)
+{
+ iavf_rx_desc_qw1_t *qw1 = va_arg (*args, iavf_rx_desc_qw1_t *);
+ s = format (s, "len %u ptype %u ubmcast %u fltstat %u flags", qw1->length,
+ qw1->ptype, qw1->ubmcast, qw1->fltstat);
+
+#define _(f) \
+ if (qw1->f) \
+ s = format (s, " " #f)
+
+ _ (dd);
+ _ (eop);
+ _ (l2tag1p);
+ _ (l3l4p);
+ _ (crcp);
+ _ (flm);
+ _ (lpbk);
+ _ (ipv6exadd);
+ _ (int_udp_0);
+ _ (ipe);
+ _ (l4e);
+ _ (oversize);
+#undef _
+ return s;
+}
+
+u8 *
+format_iavf_rx_trace (u8 *s, va_list *args)
+{
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ vlib_node_t *node = va_arg (*args, vlib_node_t *);
+ iavf_rx_trace_t *t = va_arg (*args, iavf_rx_trace_t *);
+ iavf_rx_desc_qw1_t *qw1;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
+ u32 indent = format_get_indent (s);
+ int i = 0;
+
+ s = format (s, "avf: %v (%d) qid %u next-node %U flow-id %u", hi->name,
+ t->hw_if_index, t->qid, format_vlib_next_node_name, vm,
+ node->index, t->next_index, t->flow_id);
+
+ qw1 = (iavf_rx_desc_qw1_t *) t->qw1s;
+
+ do
+ s = format (s, "\n%Udesc %u: %U", format_white_space, indent + 2, i,
+ format_iavf_rx_desc_qw1, qw1 + i);
+ while ((qw1[i++].eop) == 0 && i < IAVF_RX_MAX_DESC_IN_CHAIN);
+
+ return s;
+}
+
+u8 *
+format_iavf_port_status (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *);
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "caps: %U", format_iavf_vf_cap_flags, ap->vf_cap_flags);
+ s = format (s, "\n%Uvsi is %u, RSS key size is %u, RSS lut size is %u",
+ format_white_space, indent, ap->vsi_id, ap->rss_key_size,
+ ap->rss_lut_size);
+ s = format (s, "\n%Uflow offload ", format_white_space, indent);
+ if (ap->flow_offload)
+ s = format (s, "enabled, %u flows configured",
+ vec_len (ap->flow_lookup_entries));
+ else
+ s = format (s, "disabled");
+ return s;
+}
diff --git a/src/plugins/dev_iavf/iavf.c b/src/plugins/dev_iavf/iavf.c
new file mode 100644
index 00000000000..d1c2b9edc63
--- /dev/null
+++ b/src/plugins/dev_iavf/iavf.c
@@ -0,0 +1,307 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <vppinfra/ring.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+#include <dev_iavf/virtchnl_funcs.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+ .class_name = "iavf",
+ .subclass_name = "init",
+};
+
+#define IAVF_MAX_QPAIRS 32
+
+static const u32 driver_cap_flags =
+ /**/ VIRTCHNL_VF_CAP_ADV_LINK_SPEED |
+ /**/ VIRTCHNL_VF_LARGE_NUM_QPAIRS |
+ /**/ VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF |
+ /**/ VIRTCHNL_VF_OFFLOAD_FDIR_PF |
+ /**/ VIRTCHNL_VF_OFFLOAD_L2 |
+ /**/ VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
+ /**/ VIRTCHNL_VF_OFFLOAD_RSS_PF |
+ /**/ VIRTCHNL_VF_OFFLOAD_RX_POLLING |
+ /**/ VIRTCHNL_VF_OFFLOAD_VLAN |
+ /**/ VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
+ /**/ VIRTCHNL_VF_OFFLOAD_WB_ON_ITR |
+ /**/ 0;
+
+static const virtchnl_version_info_t driver_virtchnl_version = {
+ .major = VIRTCHNL_VERSION_MAJOR,
+ .minor = VIRTCHNL_VERSION_MINOR,
+};
+
+#define _(f, n, s, d) \
+ { .name = #n, .desc = d, .severity = VL_COUNTER_SEVERITY_##s },
+
+vlib_error_desc_t iavf_rx_node_counters[] = { foreach_iavf_rx_node_counter };
+vlib_error_desc_t iavf_tx_node_counters[] = { foreach_iavf_tx_node_counter };
+#undef _
+
+vnet_dev_node_t iavf_rx_node = {
+ .error_counters = iavf_rx_node_counters,
+ .n_error_counters = ARRAY_LEN (iavf_rx_node_counters),
+ .format_trace = format_iavf_rx_trace,
+};
+
+vnet_dev_node_t iavf_tx_node = {
+ .error_counters = iavf_tx_node_counters,
+ .n_error_counters = ARRAY_LEN (iavf_tx_node_counters),
+};
+
+static struct
+{
+ u16 device_id;
+ char *desc;
+} iavf_dev_types[] = {
+ { 0x1889, "Intel(R) Adaptive Virtual Function" },
+ { 0x154c, "Intel(R) X710 Virtual Function" },
+ { 0x37cd, "Intel(R) X722 Virtual Function" },
+};
+
+static u8 *
+iavf_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info)
+{
+ vnet_dev_bus_pci_device_info_t *di = dev_info;
+
+ if (di->vendor_id != 0x8086)
+ return 0;
+
+ FOREACH_ARRAY_ELT (dt, iavf_dev_types)
+ {
+ if (dt->device_id == di->device_id)
+ return format (0, "%s", dt->desc);
+ }
+
+ return 0;
+}
+
+static vnet_dev_rv_t
+iavf_reset (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ u32 n_tries = 50;
+
+ iavf_aq_init (vm, dev);
+ iavf_vc_op_reset_vf (vm, dev);
+
+ do
+ {
+ if (n_tries-- == 0)
+ return VNET_DEV_ERR_TIMEOUT;
+ vlib_process_suspend (vm, 0.02);
+ }
+ while ((iavf_reg_read (ad, IAVF_VFGEN_RSTAT) & 3) != 2);
+
+ iavf_aq_init (vm, dev);
+ iavf_aq_poll_on (vm, dev);
+ return (VNET_DEV_OK);
+}
+
+static vnet_dev_rv_t
+iavf_alloc (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ log_debug (dev, "alloc");
+ return iavf_aq_alloc (vm, dev);
+}
+
+static vnet_dev_rv_t
+iavf_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ virtchnl_version_info_t ver;
+ virtchnl_vf_resource_t res;
+ u32 n_threads = vlib_get_n_threads ();
+ u16 max_frame_sz;
+ vnet_dev_rv_t rv;
+
+ log_debug (dev, "init");
+
+ if ((rv = vnet_dev_pci_map_region (vm, dev, 0, &ad->bar0)))
+ return rv;
+
+ if ((rv = vnet_dev_pci_bus_master_enable (vm, dev)))
+ return rv;
+
+ if ((rv = iavf_reset (vm, dev)))
+ return rv;
+
+ if ((rv = iavf_vc_op_version (vm, dev, &driver_virtchnl_version, &ver)))
+ return rv;
+
+ if (ver.major != driver_virtchnl_version.major ||
+ ver.minor != driver_virtchnl_version.minor)
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE_VER;
+
+ if ((rv = iavf_vc_op_get_vf_resources (vm, dev, &driver_cap_flags, &res)))
+ return rv;
+
+ if (res.num_vsis != 1 || res.vsi_res[0].vsi_type != VIRTCHNL_VSI_SRIOV)
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+
+ if (res.max_mtu == 0)
+ {
+ log_warn (dev, "PF driver is reporting invalid value of 0 for max_mtu, "
+ "consider upgrade");
+ max_frame_sz = ETHERNET_MAX_PACKET_BYTES;
+ }
+ else
+ /* reverse of PF driver MTU calculation */
+ max_frame_sz = res.max_mtu + 14 /* ethernet header */ + 4 /* FCS */ +
+ 2 * 4 /* two VLAN tags */;
+
+ iavf_port_t iavf_port = {
+ .vf_cap_flags = res.vf_cap_flags,
+ .rss_key_size = res.rss_key_size,
+ .rss_lut_size = res.rss_lut_size,
+ .max_vectors = res.max_vectors,
+ .vsi_id = res.vsi_res[0].vsi_id,
+ .num_qp = clib_min (IAVF_MAX_QPAIRS, res.vsi_res[0].num_queue_pairs),
+ };
+
+ vnet_dev_port_add_args_t port_add_args = {
+ .port = {
+ .attr = {
+ .type = VNET_DEV_PORT_TYPE_ETHERNET,
+ .max_rx_queues = clib_min (IAVF_MAX_QPAIRS, res.num_queue_pairs),
+ .max_tx_queues = clib_min (IAVF_MAX_QPAIRS, res.num_queue_pairs),
+ .max_supported_rx_frame_size = max_frame_sz,
+ .caps = {
+ .change_max_rx_frame_size = 1,
+ .interrupt_mode = 1,
+ .rss = 1,
+ .mac_filter = 1,
+ },
+ .rx_offloads = {
+ .ip4_cksum = 1,
+ },
+ .tx_offloads = {
+ .ip4_cksum = 1,
+ .tcp_gso = 1,
+ },
+ },
+ .ops = {
+ .init = iavf_port_init,
+ .start = iavf_port_start,
+ .stop = iavf_port_stop,
+ .config_change = iavf_port_cfg_change,
+ .config_change_validate = iavf_port_cfg_change_validate,
+ .format_status = format_iavf_port_status,
+ },
+ .data_size = sizeof (iavf_port_t),
+ .initial_data = &iavf_port,
+ },
+ .rx_node = &iavf_rx_node,
+ .tx_node = &iavf_tx_node,
+ .rx_queue = {
+ .config = {
+ .data_size = sizeof (iavf_rxq_t),
+ .default_size = 512,
+ .multiplier = 32,
+ .min_size = 32,
+ .max_size = 4096,
+ .size_is_power_of_two = 1,
+ },
+ .ops = {
+ .alloc = iavf_rx_queue_alloc,
+ .free = iavf_rx_queue_free,
+ },
+ },
+ .tx_queue = {
+ .config = {
+ .data_size = sizeof (iavf_txq_t),
+ .default_size = 512,
+ .multiplier = 32,
+ .min_size = 32,
+ .max_size = 4096,
+ .size_is_power_of_two = 1,
+ },
+ .ops = {
+ .alloc = iavf_tx_queue_alloc,
+ .free = iavf_tx_queue_free,
+ },
+ },
+ };
+
+ vnet_dev_set_hw_addr_eth_mac (&port_add_args.port.attr.hw_addr,
+ res.vsi_res[0].default_mac_addr);
+
+ log_info (dev, "MAC address is %U", format_ethernet_address,
+ res.vsi_res[0].default_mac_addr);
+
+ if (n_threads <= vnet_dev_get_pci_n_msix_interrupts (dev) - 1)
+ {
+ port_add_args.port.attr.caps.interrupt_mode = 1;
+ iavf_port.n_rx_vectors = n_threads;
+ }
+ else
+ {
+ log_notice (
+ dev,
+ "number of threads (%u) bigger than number of interrupt lines "
+ "(%u), interrupt mode disabled",
+ vlib_get_n_threads (), res.max_vectors);
+ iavf_port.n_rx_vectors = 1;
+ }
+
+ if (res.vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF)
+ {
+ if (res.rss_key_size < IAVF_MAX_RSS_KEY_SIZE)
+ {
+ log_notice (
+ dev, "unsupported RSS config provided by device, RSS disabled");
+ }
+ else
+ {
+ port_add_args.port.attr.caps.rss = 1;
+ if (res.rss_lut_size > IAVF_MAX_RSS_LUT_SIZE)
+ log_notice (dev, "device supports bigger RSS LUT than driver");
+ }
+ }
+
+ return vnet_dev_port_add (vm, dev, 0, &port_add_args);
+}
+
+static void
+iavf_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ log_debug (dev, "deinit");
+ iavf_aq_poll_off (vm, dev);
+ iavf_aq_deinit (vm, dev);
+ iavf_aq_free (vm, dev);
+}
+
+static void
+iavf_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ log_debug (dev, "free");
+ iavf_aq_free (vm, dev);
+}
+
+VNET_DEV_REGISTER_DRIVER (avf) = {
+ .name = "iavf",
+ .bus = "pci",
+ .device_data_sz = sizeof (iavf_device_t),
+ .runtime_temp_space_sz = sizeof (iavf_rt_data_t),
+ .ops = {
+ .alloc = iavf_alloc,
+ .init = iavf_init,
+ .deinit = iavf_deinit,
+ .free = iavf_free,
+ .probe = iavf_probe,
+ },
+};
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "dev_iavf",
+};
diff --git a/src/plugins/dev_iavf/iavf.h b/src/plugins/dev_iavf/iavf.h
new file mode 100644
index 00000000000..39f92741a63
--- /dev/null
+++ b/src/plugins/dev_iavf/iavf.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _IIAVF_H_
+#define _IIAVF_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <dev_iavf/iavf_desc.h>
+#include <dev_iavf/virtchnl.h>
+
+#define IAVF_ITR_INT 250
+#define IAVF_RX_MAX_DESC_IN_CHAIN 5
+#define IAVF_MAX_RSS_KEY_SIZE 52
+#define IAVF_MAX_RSS_LUT_SIZE 64
+#define IIAVF_AQ_POLL_INTERVAL 0.2
+#define IIAVF_AQ_BUF_SIZE 4096
+
+typedef struct iavf_adminq_dma_mem iavf_adminq_dma_mem_t;
+
+typedef struct
+{
+ u8 adminq_active : 1;
+ void *bar0;
+
+ /* Admin queues */
+ iavf_adminq_dma_mem_t *aq_mem;
+ u16 atq_next_slot;
+ u16 arq_next_slot;
+ virtchnl_pf_event_t *events;
+} iavf_device_t;
+
+typedef struct
+{
+ u32 flow_id;
+ u16 next_index;
+ i16 buffer_advance;
+} iavf_flow_lookup_entry_t;
+
+typedef struct
+{
+ u8 admin_up : 1;
+ u8 flow_offload : 1;
+ iavf_flow_lookup_entry_t *flow_lookup_entries;
+ u64 intr_mode_per_rxq_bitmap;
+ u32 vf_cap_flags;
+ u16 vsi_id;
+ u16 rss_key_size;
+ u16 rss_lut_size;
+ u16 num_qp;
+ u16 max_vectors;
+ u16 n_rx_vectors;
+} iavf_port_t;
+
+typedef struct
+{
+ u32 *qtx_tail;
+ u32 *buffer_indices;
+ iavf_tx_desc_t *descs;
+ u16 next;
+ u16 n_enqueued;
+ u16 *rs_slots;
+ iavf_tx_desc_t *tmp_descs;
+ u32 *tmp_bufs;
+ u32 *ph_bufs;
+} iavf_txq_t;
+
+typedef struct
+{
+ u32 *qrx_tail;
+ u32 *buffer_indices;
+ iavf_rx_desc_t *descs;
+ u16 next;
+ u16 n_enqueued;
+} iavf_rxq_t;
+
+typedef struct
+{
+ u16 qid;
+ u16 next_index;
+ u32 hw_if_index;
+ u32 flow_id;
+ u64 qw1s[IAVF_RX_MAX_DESC_IN_CHAIN];
+} iavf_rx_trace_t;
+
+/* adminq.c */
+vnet_dev_rv_t iavf_aq_alloc (vlib_main_t *, vnet_dev_t *);
+void iavf_aq_init (vlib_main_t *, vnet_dev_t *);
+void iavf_aq_poll_on (vlib_main_t *, vnet_dev_t *);
+void iavf_aq_poll_off (vlib_main_t *, vnet_dev_t *);
+void iavf_aq_deinit (vlib_main_t *, vnet_dev_t *);
+void iavf_aq_free (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t iavf_aq_atq_enq (vlib_main_t *, vnet_dev_t *, iavf_aq_desc_t *,
+ const u8 *, u16, f64);
+int iavf_aq_arq_next_acq (vlib_main_t *, vnet_dev_t *, iavf_aq_desc_t **,
+ u8 **, f64);
+void iavf_aq_arq_next_rel (vlib_main_t *, vnet_dev_t *);
+format_function_t format_virtchnl_op_name;
+format_function_t format_virtchnl_status;
+
+/* format.c */
+format_function_t format_iavf_vf_cap_flags;
+format_function_t format_iavf_rx_trace;
+format_function_t format_iavf_port_status;
+
+/* port.c */
+vnet_dev_rv_t iavf_port_init (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t iavf_port_start (vlib_main_t *, vnet_dev_port_t *);
+void iavf_port_stop (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t iavf_port_cfg_change (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t iavf_port_cfg_change_validate (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+
+/* queue.c */
+vnet_dev_rv_t iavf_rx_queue_alloc (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t iavf_tx_queue_alloc (vlib_main_t *, vnet_dev_tx_queue_t *);
+vnet_dev_rv_t iavf_rx_queue_start (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t iavf_tx_queue_start (vlib_main_t *, vnet_dev_tx_queue_t *);
+void iavf_rx_queue_stop (vlib_main_t *, vnet_dev_rx_queue_t *);
+void iavf_tx_queue_stop (vlib_main_t *, vnet_dev_tx_queue_t *);
+void iavf_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *);
+void iavf_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *);
+
+/* counter.c */
+void iavf_port_poll_stats (vlib_main_t *, vnet_dev_port_t *);
+void iavf_port_add_counters (vlib_main_t *, vnet_dev_port_t *);
+
+/* inline funcs */
+
+static inline u32
+iavf_get_u32 (void *start, int offset)
+{
+ return *(u32 *) (((u8 *) start) + offset);
+}
+
+static inline void
+iavf_reg_write (iavf_device_t *ad, u32 addr, u32 val)
+{
+ __atomic_store_n ((u32 *) ((u8 *) ad->bar0 + addr), val, __ATOMIC_RELEASE);
+}
+
+static inline u32
+iavf_reg_read (iavf_device_t *ad, u32 addr)
+{
+ return __atomic_load_n ((u32 *) (ad->bar0 + addr), __ATOMIC_RELAXED);
+ ;
+}
+
+static inline void
+iavf_reg_flush (iavf_device_t *ad)
+{
+ iavf_reg_read (ad, IAVF_VFGEN_RSTAT);
+ asm volatile("" ::: "memory");
+}
+
+#define log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, iavf_log.class, "%U" f, \
+ format_vnet_dev_log, (dev), \
+ clib_string_skip_prefix (__func__, "iavf_"), ##__VA_ARGS__)
+#define log_info(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_INFO, iavf_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_notice(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_NOTICE, iavf_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_warn(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_WARNING, iavf_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_err(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, iavf_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+
+/* temp */
+#define IAVF_RX_VECTOR_SZ VLIB_FRAME_SIZE
+
+typedef struct
+{
+ u64 qw1s[IAVF_RX_MAX_DESC_IN_CHAIN - 1];
+ u32 buffers[IAVF_RX_MAX_DESC_IN_CHAIN - 1];
+} iavf_rx_tail_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vlib_buffer_t *bufs[IAVF_RX_VECTOR_SZ];
+ u16 next[IAVF_RX_VECTOR_SZ];
+ u64 qw1s[IAVF_RX_VECTOR_SZ];
+ u32 flow_ids[IAVF_RX_VECTOR_SZ];
+ iavf_rx_tail_t tails[IAVF_RX_VECTOR_SZ];
+} iavf_rt_data_t;
+
+#define foreach_iavf_tx_node_counter \
+ _ (SEG_SZ_EXCEEDED, seg_sz_exceeded, ERROR, "segment size exceeded") \
+ _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots")
+
+typedef enum
+{
+#define _(f, n, s, d) IAVF_TX_NODE_CTR_##f,
+ foreach_iavf_tx_node_counter
+#undef _
+} iavf_tx_node_counter_t;
+
+#define foreach_iavf_rx_node_counter \
+ _ (BUFFER_ALLOC, buffer_alloc, ERROR, "buffer alloc error")
+
+typedef enum
+{
+#define _(f, n, s, d) IAVF_RX_NODE_CTR_##f,
+ foreach_iavf_rx_node_counter
+#undef _
+} iavf_rx_node_counter_t;
+
+#endif /* _IIAVF_H_ */
diff --git a/src/plugins/dev_iavf/iavf_desc.h b/src/plugins/dev_iavf/iavf_desc.h
new file mode 100644
index 00000000000..053013ed9b0
--- /dev/null
+++ b/src/plugins/dev_iavf/iavf_desc.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _IIAVF_DESC_H_
+#define _IIAVF_DESC_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <dev_iavf/virtchnl.h>
+
+#define IAVF_RX_MAX_DESC_IN_CHAIN 5
+
+#define IAVF_TXD_CMD(x) (1 << (x + 4))
+#define IAVF_TXD_CMD_EXT(x, val) ((u64) val << (x + 4))
+#define IAVF_TXD_CMD_EOP IAVF_TXD_CMD (0)
+#define IAVF_TXD_CMD_RS IAVF_TXD_CMD (1)
+#define IAVF_TXD_CMD_RSV IAVF_TXD_CMD (2)
+#define IAVF_TXD_CMD_IIPT_NONE IAVF_TXD_CMD_EXT (5, 0)
+#define IAVF_TXD_CMD_IIPT_IPV6 IAVF_TXD_CMD_EXT (5, 1)
+#define IAVF_TXD_CMD_IIPT_IPV4_NO_CSUM IAVF_TXD_CMD_EXT (5, 2)
+#define IAVF_TXD_CMD_IIPT_IPV4 IAVF_TXD_CMD_EXT (5, 3)
+#define IAVF_TXD_CMD_L4T_UNKNOWN IAVF_TXD_CMD_EXT (8, 0)
+#define IAVF_TXD_CMD_L4T_TCP IAVF_TXD_CMD_EXT (8, 1)
+#define IAVF_TXD_CMD_L4T_SCTP IAVF_TXD_CMD_EXT (8, 2)
+#define IAVF_TXD_CMD_L4T_UDP IAVF_TXD_CMD_EXT (8, 3)
+#define IAVF_TXD_OFFSET(x, factor, val) \
+ (((u64) val / (u64) factor) << (16 + x))
+#define IAVF_TXD_OFFSET_MACLEN(val) IAVF_TXD_OFFSET (0, 2, val)
+#define IAVF_TXD_OFFSET_IPLEN(val) IAVF_TXD_OFFSET (7, 4, val)
+#define IAVF_TXD_OFFSET_L4LEN(val) IAVF_TXD_OFFSET (14, 4, val)
+#define IAVF_TXD_DTYP_CTX 0x1ULL
+#define IAVF_TXD_CTX_CMD_TSO IAVF_TXD_CMD (0)
+#define IAVF_TXD_CTX_SEG(val, x) (((u64) val) << (30 + x))
+#define IAVF_TXD_CTX_SEG_TLEN(val) IAVF_TXD_CTX_SEG (val, 0)
+#define IAVF_TXD_CTX_SEG_MSS(val) IAVF_TXD_CTX_SEG (val, 20)
+
+typedef union
+{
+ struct
+ {
+ u32 mirr : 13;
+ u32 _reserved1 : 3;
+ u32 l2tag1 : 16;
+ u32 filter_status;
+ };
+ u64 as_u64;
+} iavf_rx_desc_qw0_t;
+
+typedef union
+{
+ struct
+ {
+ /* status */
+ u64 dd : 1;
+ u64 eop : 1;
+ u64 l2tag1p : 1;
+ u64 l3l4p : 1;
+ u64 crcp : 1;
+ u64 _reserved2 : 4;
+ u64 ubmcast : 2;
+ u64 flm : 1;
+ u64 fltstat : 2;
+ u64 lpbk : 1;
+ u64 ipv6exadd : 1;
+ u64 _reserved3 : 2;
+ u64 int_udp_0 : 1;
+
+ /* error */
+ u64 _reserved_err0 : 3;
+ u64 ipe : 1;
+ u64 l4e : 1;
+ u64 _reserved_err5 : 1;
+ u64 oversize : 1;
+ u64 _reserved_err7 : 1;
+
+ u64 rsv2 : 3;
+ u64 ptype : 8;
+ u64 length : 26;
+ };
+ u64 as_u64;
+} iavf_rx_desc_qw1_t;
+
+STATIC_ASSERT_SIZEOF (iavf_rx_desc_qw0_t, 8);
+STATIC_ASSERT_SIZEOF (iavf_rx_desc_qw1_t, 8);
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ iavf_rx_desc_qw0_t qw0;
+ iavf_rx_desc_qw0_t qw1;
+ u64 rsv3 : 64;
+ u32 flex_lo;
+ u32 fdid_flex_hi;
+ };
+ u64 qword[4];
+ u64 addr;
+#ifdef CLIB_HAVE_VEC256
+ u64x4 as_u64x4;
+#endif
+ };
+} iavf_rx_desc_t;
+
+STATIC_ASSERT_SIZEOF (iavf_rx_desc_t, 32);
+
+typedef struct
+{
+ union
+ {
+ u64 qword[2];
+#ifdef CLIB_HAVE_VEC128
+ u64x2 as_u64x2;
+#endif
+ };
+} iavf_tx_desc_t;
+
+STATIC_ASSERT_SIZEOF (iavf_tx_desc_t, 16);
+
+#endif /* _IIAVF_DESC_H_ */
diff --git a/src/plugins/dev_iavf/iavf_regs.h b/src/plugins/dev_iavf/iavf_regs.h
new file mode 100644
index 00000000000..be3070b05e5
--- /dev/null
+++ b/src/plugins/dev_iavf/iavf_regs.h
@@ -0,0 +1,364 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _IIAVF_REGS_H_
+#define _IIAVF_REGS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+
+#define iavf_reg_ctrl_t_fields \
+ __ (1, full_duplex) \
+ __ (1, _reserved1) \
+ __ (1, gio_master_disable) \
+ __ (3, _reserved3) \
+ __ (1, set_link_up) \
+ __ (9, _reserved7) \
+ __ (1, sdp0_gpien) \
+ __ (1, sdp1_gpien) \
+ __ (1, sdp0_data) \
+ __ (1, sdp1_data) \
+ __ (1, adww3wuc) \
+ __ (1, sdp0_wde) \
+ __ (1, sdp0_iodir) \
+ __ (1, sdp1_iodir) \
+ __ (2, _reserved24) \
+ __ (1, port_sw_reset) \
+ __ (1, rx_flow_ctl_en) \
+ __ (1, tx_flow_ctl_en) \
+ __ (1, device_reset) \
+ __ (1, vlan_mode_enable) \
+ __ (1, phy_reset)
+
+#define iavf_reg_status_t_fields \
+ __ (1, full_duplex) \
+ __ (1, link_up) \
+ __ (2, _reserved2) \
+ __ (1, tx_off) \
+ __ (1, _reserved5) \
+ __ (2, speed) \
+ __ (2, asdv) \
+ __ (1, phy_reset_asserted) \
+ __ (8, _reserved11) \
+ __ (1, gio_master_en_sts) \
+ __ (1, dev_rst_set) \
+ __ (1, rst_done) \
+ __ (1, speed_2p5) \
+ __ (7, _reserved23) \
+ __ (1, lpi_ignore) \
+ __ (1, _reserved31)
+
+#define iavf_reg_ctrl_ext_t_fields \
+ __ (2, _reserved0) \
+ __ (1, sdp2_gpien) \
+ __ (1, sdp3_gpien) \
+ __ (2, _reserved4) \
+ __ (1, sdp2_data) \
+ __ (1, sdp3_data) \
+ __ (2, _reserved8) \
+ __ (1, sdp2_iodir) \
+ __ (1, sdp3_iodir) \
+ __ (1, _reserved12) \
+ __ (1, eeprom_block_rst) \
+ __ (2, _reserved14) \
+ __ (1, no_snoop_dis) \
+ __ (1, relaxed_ordering_dis) \
+ __ (2, _reserved18) \
+ __ (1, phy_power_down_ena) \
+ __ (5, _reserved121) \
+ __ (1, ext_vlan_ena) \
+ __ (1, _reserved127) \
+ __ (1, driver_loaded) \
+ __ (3, _reserved29)
+
+#define iavf_reg_mdic_t_fields \
+ __ (16, data) \
+ __ (5, regadd) \
+ __ (5, _reserved21) \
+ __ (2, opcode) \
+ __ (1, ready) \
+ __ (1, mid_ie) \
+ __ (1, mid_err) \
+ __ (1, _reserved31)
+
+#define iavf_reg_rctl_t_fields \
+ __ (1, _reserved0) \
+ __ (1, rx_enable) \
+ __ (1, store_bad_packets) \
+ __ (1, uc_promisc_ena) \
+ __ (1, mc_promisc_ena) \
+ __ (1, long_pkt_reception_ena) \
+ __ (2, loopback_mode) \
+ __ (2, hash_select) \
+ __ (2, _reserved10) \
+ __ (2, mc_uc_tbl_off) \
+ __ (1, _reserved14) \
+ __ (1, bcast_accept_mode) \
+ __ (2, rx_buf_sz) \
+ __ (1, vlan_filter_ena) \
+ __ (1, cannonical_form_ind_ena) \
+ __ (1, cannonical_form_ind_bit_val) \
+ __ (1, pad_small_rx_pkts) \
+ __ (1, discard_pause_frames) \
+ __ (1, pass_mac_ctrl_frames) \
+ __ (2, _reserved24) \
+ __ (1, strip_eth_crc) \
+ __ (5, _reserved26)
+
+#define iavf_reg_tctl_t_fields \
+ __ (1, _reserved0) \
+ __ (1, tx_enable) \
+ __ (1, _reserved2) \
+ __ (1, pad_short_pkts) \
+ __ (8, collision_threshold) \
+ __ (10, backoff_slot_time) \
+ __ (1, sw_xoff_tx) \
+ __ (1, _reserved23) \
+ __ (1, retransmit_on_late_colision) \
+ __ (7, reserved25)
+
+#define iavf_reg_phpm_t_fields \
+ __ (1, _reserved0) \
+ __ (1, restart_autoneg) \
+ __ (1, _reserved2) \
+ __ (1, dis_1000_in_non_d0a) \
+ __ (1, link_energy_detect) \
+ __ (1, go_link_disc) \
+ __ (1, disable_1000) \
+ __ (1, spd_b2b_en) \
+ __ (1, rst_compl) \
+ __ (1, dis_100_in_non_d0a) \
+ __ (1, ulp_req) \
+ __ (1, disable_2500) \
+ __ (1, dis_2500_in_non_d0a) \
+ __ (1, ulp_trig) \
+ __ (2, ulp_delay) \
+ __ (1, link_enery_en) \
+ __ (1, dev_off_en) \
+ __ (1, dev_off_state) \
+ __ (1, ulp_en) \
+ __ (12, _reserved20)
+
+#define iavf_reg_manc_t_fields \
+ __ (1, flow_ctrl_discard) \
+ __ (1, ncsi_discard) \
+ __ (12, _reserved2) \
+ __ (1, fw_reset) \
+ __ (1, tco_isolate) \
+ __ (1, tco_reset) \
+ __ (1, rcv_tco_en) \
+ __ (1, keep_phy_link_up) \
+ __ (1, rcv_all) \
+ __ (1, inhibit_ulp) \
+ __ (2, _reserved21) \
+ __ (1, en_xsum_filter) \
+ __ (1, en_ipv4_filter) \
+ __ (1, fixed_net_type) \
+ __ (1, net_type) \
+ __ (1, ipv6_adv_only) \
+ __ (1, en_bmc2os) \
+ __ (1, en_bmc2net) \
+ __ (1, mproxye) \
+ __ (1, mproxya)
+
+#define iavf_reg_swsm_t_fields \
+ __ (1, smbi) \
+ __ (1, swesmbi) \
+ __ (30, _reserved2)
+
+#define iavf_reg_fwsm_t_fields \
+ __ (1, eep_fw_semaphore) \
+ __ (3, fw_mode) \
+ __ (2, _reserved4) \
+ __ (1, eep_reload_ind) \
+ __ (8, _reserved7) \
+ __ (1, fw_val_bit) \
+ __ (3, reset_ctr) \
+ __ (6, ext_err_ind) \
+ __ (1, pcie_config_err_ind) \
+ __ (5, _reserved26) \
+ __ (1, factory_mac_addr_restored)
+
+#define iavf_reg_sw_fw_sync_t_fields \
+ __ (1, sw_flash_sm) \
+ __ (1, sw_phy_sm) \
+ __ (1, sw_i2c_sm) \
+ __ (1, sw_mac_csr_sm) \
+ __ (3, _reserved4) \
+ __ (1, sw_svr_sm) \
+ __ (1, sw_mb_sm) \
+ __ (1, _reserved9) \
+ __ (1, sw_mng_sm) \
+ __ (5, _reserved11) \
+ __ (1, fw_flash_sm) \
+ __ (1, fw_phy_sm) \
+ __ (1, fw_i2c_sm) \
+ __ (1, fw_mac_csr_sm) \
+ __ (3, _reserved20) \
+ __ (1, fw_svr_sm) \
+ __ (8, _reserved24)
+
+#define iavf_reg_srrctl_t_fields \
+ __ (7, bsizepacket) \
+ __ (1, _reserved7) \
+ __ (6, bsizeheader) \
+ __ (2, timer1_sel) \
+ __ (1, _reserved16) \
+ __ (2, timer0_sel) \
+ __ (1, use_domain) \
+ __ (5, rdmts) \
+ __ (3, desc_type) \
+ __ (2, _reserved28) \
+ __ (1, timestamp) \
+ __ (1, drop_en)
+
+#define iavf_reg_rxdctl_t_fields \
+ __ (5, pthresh) \
+ __ (3, _reserved5) \
+ __ (5, hthresh) \
+ __ (3, _reserved13) \
+ __ (5, wthresh) \
+ __ (4, _reserved21) \
+ __ (1, enable) \
+ __ (1, swflush) \
+ __ (5, _reserved27)
+
+#define iavf_reg_eec_t_fields \
+ __ (6, _reserved0) \
+ __ (1, flash_in_use) \
+ __ (1, _reserved7) \
+ __ (1, ee_pres) \
+ __ (1, auto_rd) \
+ __ (1, _reservedxi10) \
+ __ (4, ee_size) \
+ __ (4, pci_ana_done) \
+ __ (1, flash_detected) \
+ __ (2, _reserved20) \
+ __ (1, shadow_modified) \
+ __ (1, flupd) \
+ __ (1, _reserved24) \
+ __ (1, sec1val) \
+ __ (1, fludone) \
+ __ (5, _reserved27)
+
+#define iavf_reg_eemngctl_t_fields \
+ __ (11, addr) \
+ __ (4, reserved11) \
+ __ (1, cmd_valid) \
+ __ (1, write) \
+ __ (1, eebusy) \
+ __ (1, cfg_done) \
+ __ (12, _reserved19) \
+ __ (1, done)
+
+#define IAVF_REG_STRUCT(n) \
+ typedef union \
+ { \
+ struct \
+ { \
+ n##_fields; \
+ }; \
+ u32 as_u32; \
+ } n; \
+ STATIC_ASSERT_SIZEOF (n, 4);
+
+#define __(n, f) u32 f : n;
+IAVF_REG_STRUCT (iavf_reg_status_t);
+IAVF_REG_STRUCT (iavf_reg_ctrl_t);
+IAVF_REG_STRUCT (iavf_reg_ctrl_ext_t);
+IAVF_REG_STRUCT (iavf_reg_mdic_t);
+IAVF_REG_STRUCT (iavf_reg_rctl_t);
+IAVF_REG_STRUCT (iavf_reg_tctl_t);
+IAVF_REG_STRUCT (iavf_reg_phpm_t);
+IAVF_REG_STRUCT (iavf_reg_manc_t);
+IAVF_REG_STRUCT (iavf_reg_swsm_t);
+IAVF_REG_STRUCT (iavf_reg_fwsm_t);
+IAVF_REG_STRUCT (iavf_reg_sw_fw_sync_t);
+IAVF_REG_STRUCT (iavf_reg_srrctl_t);
+IAVF_REG_STRUCT (iavf_reg_rxdctl_t);
+IAVF_REG_STRUCT (iavf_reg_eec_t);
+IAVF_REG_STRUCT (iavf_reg_eemngctl_t);
+#undef __
+
+#define foreach_iavf_reg \
+ _ (0x00000, CTRL, iavf_reg_ctrl_t_fields) \
+ _ (0x00008, STATUS, iavf_reg_status_t_fields) \
+ _ (0x00018, CTRL_EXT, iavf_reg_ctrl_ext_t_fields) \
+ _ (0x00020, MDIC, iavf_reg_mdic_t_fields) \
+ _ (0x00100, RCTL, iavf_reg_rctl_t_fields) \
+ _ (0x00400, TCTL, iavf_reg_tctl_t_fields) \
+ _ (0x00404, TCTL_EXT, ) \
+ _ (0x00e14, PHPM, iavf_reg_phpm_t_fields) \
+ _ (0x01500, ICR, ) \
+ _ (0x0150c, IMC, ) \
+ _ (0x05400, RAL0, ) \
+ _ (0x05404, RAH0, ) \
+ _ (0x05820, MANC, iavf_reg_manc_t_fields) \
+ _ (0x05b50, SWSM, iavf_reg_swsm_t_fields) \
+ _ (0x05b54, FWSM, iavf_reg_fwsm_t_fields) \
+ _ (0x05b5c, SW_FW_SYNC, iavf_reg_sw_fw_sync_t_fields) \
+ _ (0x0c000, RDBAL0, ) \
+ _ (0x0c004, RDBAH0, ) \
+ _ (0x0c008, RDLEN0, ) \
+ _ (0x0c00c, SRRCTL0, iavf_reg_srrctl_t_fields) \
+ _ (0x0c010, RDH0, ) \
+ _ (0x0c018, RDT0, ) \
+ _ (0x0c028, RXDCTL0, iavf_reg_rxdctl_t_fields) \
+ _ (0x12010, EEC, iavf_reg_eec_t_fields) \
+ _ (0x12030, EEMNGCTL, iavf_reg_eemngctl_t_fields)
+
+#define IAVF_REG_RDBAL(n) (IAVF_REG_RDBAL0 + (n) *0x40)
+#define IAVF_REG_RDBAH(n) (IAVF_REG_RDBAH0 + (n) *0x40)
+#define IAVF_REG_RDLEN(n) (IAVF_REG_RDLEN0 + (n) *0x40)
+#define IAVF_REG_SRRCTL(n) (IAVF_REG_SRRCTL0 + (n) *0x40)
+#define IAVF_REG_RDH(n) (IAVF_REG_RDH0 + (n) *0x40)
+#define IAVF_REG_RDT(n) (IAVF_REG_RDT0 + (n) *0x40)
+#define IAVF_REG_RXDCTL(n) (IAVF_REG_RXDCTL0 + (n) *0x40)
+#define IAVF_REG_SRRCTL(n) (IAVF_REG_SRRCTL0 + (n) *0x40)
+
+typedef enum
+{
+#define _(o, n, f) IAVF_REG_##n = (o),
+ foreach_iavf_reg
+#undef _
+} iavf_reg_t;
+
+typedef union
+{
+ struct
+ {
+ u32 intena : 1;
+ u32 clearpba : 1;
+ u32 swint_trig : 1;
+ u32 itr_indx : 2;
+ u32 interval : 12;
+ u32 _rsvd23 : 7;
+ u32 sw_itr_indx_ena : 1;
+ u32 sw_itr_indx : 2;
+ u32 _rsvd29 : 3;
+ u32 wb_on_itr : 1;
+ u32 intena_msk : 1;
+ };
+ u32 as_u32;
+} iavf_dyn_ctl;
+
+STATIC_ASSERT_SIZEOF (iavf_dyn_ctl, 4);
+
+typedef union
+{
+ struct
+ {
+ u32 _reserved0 : 30;
+ u32 adminq : 1;
+ u32 _reserved31 : 1;
+ };
+ u32 as_u32;
+} iavf_vfint_icr0_ena1;
+
+STATIC_ASSERT_SIZEOF (iavf_vfint_icr0_ena1, 4);
+
+#endif /* _IIAVF_REGS_H_ */
diff --git a/src/plugins/dev_iavf/port.c b/src/plugins/dev_iavf/port.c
new file mode 100644
index 00000000000..982436d9b45
--- /dev/null
+++ b/src/plugins/dev_iavf/port.c
@@ -0,0 +1,543 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/iavf_regs.h>
+#include <dev_iavf/virtchnl.h>
+#include <dev_iavf/virtchnl_funcs.h>
+#include <vnet/ethernet/ethernet.h>
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+ .class_name = "iavf",
+ .subclass_name = "port",
+};
+
+static const u8 default_rss_key[] = {
+ 0x44, 0x39, 0x79, 0x6b, 0xb5, 0x4c, 0x50, 0x23, 0xb6, 0x75, 0xea, 0x5b, 0x12,
+ 0x4f, 0x9f, 0x30, 0xb8, 0xa2, 0xc0, 0x3d, 0xdf, 0xdc, 0x4d, 0x02, 0xa0, 0x8c,
+ 0x9b, 0x33, 0x4a, 0xf6, 0x4a, 0x4c, 0x05, 0xc6, 0xfa, 0x34, 0x39, 0x58, 0xd8,
+ 0x55, 0x7d, 0x99, 0x58, 0x3a, 0xe1, 0x38, 0xc9, 0x2e, 0x81, 0x15, 0x03, 0x66,
+};
+
+const static iavf_dyn_ctl dyn_ctln_disabled = {};
+const static iavf_dyn_ctl dyn_ctln_enabled = {
+ .intena = 1,
+ .clearpba = 1,
+ .interval = IAVF_ITR_INT / 2,
+};
+const static iavf_dyn_ctl dyn_ctln_wb_on_itr = {
+ .itr_indx = 1,
+ .interval = 2,
+ .wb_on_itr = 1,
+};
+
+vnet_dev_rv_t
+iavf_port_vlan_strip_disable (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ virtchnl_vlan_caps_t vc;
+ vnet_dev_rv_t rv;
+ u32 outer, inner;
+ const u32 mask = VIRTCHNL_VLAN_ETHERTYPE_8100;
+
+ if ((ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) == 0)
+ return iavf_vc_op_disable_vlan_stripping (vm, dev);
+
+ if ((rv = iavf_vc_op_get_offload_vlan_v2_caps (vm, dev, &vc)))
+ return rv;
+
+ outer = vc.offloads.stripping_support.outer;
+ inner = vc.offloads.stripping_support.inner;
+
+ outer = outer & VIRTCHNL_VLAN_TOGGLE ? outer & mask : 0;
+ inner = inner & VIRTCHNL_VLAN_TOGGLE ? inner & mask : 0;
+
+ virtchnl_vlan_setting_t vs = {
+ .vport_id = ap->vsi_id,
+ .outer_ethertype_setting = outer,
+ .inner_ethertype_setting = inner,
+ };
+
+ return iavf_vc_op_disable_vlan_stripping_v2 (vm, dev, &vs);
+}
+
+vnet_dev_rv_t
+iavf_port_init_rss (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ u16 keylen = clib_min (sizeof (default_rss_key), ap->rss_key_size);
+ u8 buffer[VIRTCHNL_MSG_SZ (virtchnl_rss_key_t, key, keylen)];
+ virtchnl_rss_key_t *key = (virtchnl_rss_key_t *) buffer;
+
+ if (!port->attr.caps.rss)
+ return VNET_DEV_OK;
+
+ /* config RSS key */
+ *key = (virtchnl_rss_key_t){
+ .vsi_id = ap->vsi_id,
+ .key_len = keylen,
+ };
+
+ clib_memcpy (key->key, default_rss_key, sizeof (default_rss_key));
+
+ return iavf_vc_op_config_rss_key (vm, dev, key);
+}
+
+vnet_dev_rv_t
+iavf_port_update_rss_lut (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ u16 lut_size = clib_min (IAVF_MAX_RSS_LUT_SIZE, ap->rss_lut_size);
+ u8 buffer[VIRTCHNL_MSG_SZ (virtchnl_rss_lut_t, lut, lut_size)];
+ virtchnl_rss_lut_t *lut = (virtchnl_rss_lut_t *) buffer;
+ u32 enabled_rxq_bmp = 0;
+
+ if (!port->attr.caps.rss)
+ return VNET_DEV_OK;
+
+ *lut = (virtchnl_rss_lut_t){
+ .vsi_id = ap->vsi_id,
+ .lut_entries = lut_size,
+ };
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->enabled)
+ enabled_rxq_bmp |= 1ULL << q->queue_id;
+
+ /* config RSS LUT */
+ for (u32 i = 0, j; i < lut->lut_entries;)
+ foreach_set_bit_index (j, enabled_rxq_bmp)
+ {
+ lut->lut[i++] = j;
+ if (i >= lut->lut_entries)
+ break;
+ }
+
+ return iavf_vc_op_config_rss_lut (vm, dev, lut);
+}
+
+vnet_dev_rv_t
+iavf_port_init_vsi_queues (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ virtchnl_queue_pair_info_t *qpi;
+ u16 vsi_id = ap->vsi_id;
+ u16 data_size = vlib_buffer_get_default_data_size (vm);
+ u16 max_frame_size = port->max_rx_frame_size;
+ u8 buffer[VIRTCHNL_MSG_SZ (virtchnl_vsi_queue_config_info_t, qpair,
+ ap->num_qp)];
+ virtchnl_vsi_queue_config_info_t *ci =
+ (virtchnl_vsi_queue_config_info_t *) buffer;
+
+ *ci = (virtchnl_vsi_queue_config_info_t){
+ .num_queue_pairs = ap->num_qp,
+ .vsi_id = vsi_id,
+ };
+
+ for (u16 i = 0; i < ap->num_qp; i++)
+ ci->qpair[i] = (virtchnl_queue_pair_info_t){
+ .rxq = { .vsi_id = vsi_id,
+ .queue_id = i,
+ .max_pkt_size = ETHERNET_MIN_PACKET_BYTES },
+ .txq = { .vsi_id = vsi_id, .queue_id = i },
+ };
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (q);
+ qpi = ci->qpair + q->queue_id;
+ qpi->rxq.ring_len = q->size;
+ qpi->rxq.databuffer_size = data_size;
+ qpi->rxq.dma_ring_addr = vnet_dev_get_dma_addr (vm, dev, arq->descs);
+ qpi->rxq.max_pkt_size = max_frame_size;
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (q);
+ qpi = ci->qpair + q->queue_id;
+ qpi->txq.ring_len = q->size;
+ qpi->txq.dma_ring_addr = vnet_dev_get_dma_addr (vm, dev, atq->descs);
+ }
+
+ return iavf_vc_op_config_vsi_queues (vm, dev, ci);
+}
+
+vnet_dev_rv_t
+iavf_port_rx_irq_config (vlib_main_t *vm, vnet_dev_port_t *port, int enable)
+{
+ vnet_dev_t *dev = port->dev;
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ u16 n_rx_vectors = ap->n_rx_vectors;
+ u8 buffer[VIRTCHNL_MSG_SZ (virtchnl_irq_map_info_t, vecmap, n_rx_vectors)];
+ u8 n_intr_mode_queues_per_vector[n_rx_vectors];
+ u8 n_queues_per_vector[n_rx_vectors];
+ virtchnl_irq_map_info_t *im = (virtchnl_irq_map_info_t *) buffer;
+ vnet_dev_rv_t rv;
+
+ log_debug (dev, "intr mode per queue bitmap 0x%x",
+ ap->intr_mode_per_rxq_bitmap);
+
+ for (u32 i = 0; i < n_rx_vectors; i++)
+ n_intr_mode_queues_per_vector[i] = n_queues_per_vector[i] = 0;
+
+ *im = (virtchnl_irq_map_info_t){
+ .num_vectors = n_rx_vectors,
+ };
+
+ if (port->attr.caps.interrupt_mode)
+ {
+ for (u16 i = 0; i < im->num_vectors; i++)
+ im->vecmap[i] = (virtchnl_vector_map_t){
+ .vsi_id = ap->vsi_id,
+ .vector_id = i + 1,
+ };
+ if (enable)
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ if (rxq->enabled)
+ {
+ u32 i = rxq->rx_thread_index;
+ im->vecmap[i].rxq_map |= 1 << rxq->queue_id;
+ n_queues_per_vector[i]++;
+ n_intr_mode_queues_per_vector[i] +=
+ u64_is_bit_set (ap->intr_mode_per_rxq_bitmap, rxq->queue_id);
+ }
+ }
+ else
+ {
+ im->vecmap[0] = (virtchnl_vector_map_t){
+ .vsi_id = ap->vsi_id,
+ .vector_id = 1,
+ };
+ if (enable)
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ if (rxq->enabled)
+ im->vecmap[0].rxq_map |= 1 << rxq->queue_id;
+ }
+
+ if ((rv = iavf_vc_op_config_irq_map (vm, dev, im)))
+ return rv;
+
+ for (int i = 0; i < n_rx_vectors; i++)
+ {
+ u32 val;
+
+ if (enable == 0 || n_queues_per_vector[i] == 0)
+ val = dyn_ctln_disabled.as_u32;
+ else if (ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR &&
+ n_intr_mode_queues_per_vector[i] == 0)
+ val = dyn_ctln_wb_on_itr.as_u32;
+ else
+ val = dyn_ctln_enabled.as_u32;
+
+ iavf_reg_write (ad, IAVF_VFINT_DYN_CTLN (i), val);
+ log_debug (dev, "VFINT_DYN_CTLN(%u) set to 0x%x", i, val);
+ }
+
+ return rv;
+}
+
+static void
+avf_msix_n_handler (vlib_main_t *vm, vnet_dev_t *dev, u16 line)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ vnet_dev_port_t *port = vnet_dev_get_port_by_id (dev, 0);
+
+ line--;
+
+ iavf_reg_write (ad, IAVF_VFINT_DYN_CTLN (line), dyn_ctln_enabled.as_u32);
+ vlib_node_set_interrupt_pending (vlib_get_main_by_index (line),
+ port->intf.rx_node_index);
+}
+
+vnet_dev_rv_t
+iavf_port_init (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ vnet_dev_rv_t rv;
+
+ log_debug (port->dev, "port %u", port->port_id);
+
+ ap->intr_mode_per_rxq_bitmap = 0;
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->interrupt_mode)
+ u64_bit_set (&ap->intr_mode_per_rxq_bitmap, q->queue_id, 1);
+
+ if ((rv = iavf_port_vlan_strip_disable (vm, port)))
+ return rv;
+
+ if ((rv = iavf_port_init_rss (vm, port)))
+ return rv;
+
+ vnet_dev_pci_msix_add_handler (vm, dev, &avf_msix_n_handler, 1,
+ ap->n_rx_vectors);
+ vnet_dev_pci_msix_enable (vm, dev, 1, ap->n_rx_vectors);
+ for (u32 i = 1; i < ap->n_rx_vectors; i++)
+ vnet_dev_pci_msix_set_polling_thread (vm, dev, i + 1, i);
+
+ if (port->dev->poll_stats)
+ iavf_port_add_counters (vm, port);
+
+ return VNET_DEV_OK;
+}
+
+static vnet_dev_rv_t
+iavf_enable_disable_queues (vlib_main_t *vm, vnet_dev_port_t *port, int enable)
+{
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+
+ virtchnl_queue_select_t qs = {
+ .vsi_id = ap->vsi_id,
+ };
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if ((enable && q->enabled) || (!enable && q->started))
+ qs.rx_queues |= 1ULL << q->queue_id;
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if ((enable && q->enabled) || (!enable && q->started))
+ qs.tx_queues |= 1ULL << q->queue_id;
+
+ return enable ? iavf_vc_op_enable_queues (vm, port->dev, &qs) :
+ iavf_vc_op_disable_queues (vm, port->dev, &qs);
+}
+
+vnet_dev_rv_t
+iavf_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_rv_t rv;
+
+ log_debug (port->dev, "port %u", port->port_id);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->enabled)
+ if ((rv = iavf_rx_queue_start (vm, q)))
+ goto done;
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if ((rv = iavf_tx_queue_start (vm, q)))
+ goto done;
+
+ if ((rv = iavf_port_update_rss_lut (vm, port)))
+ goto done;
+
+ /* configure qpairs */
+ if ((rv = iavf_port_init_vsi_queues (vm, port)))
+ goto done;
+
+ if ((rv = iavf_port_rx_irq_config (vm, port, /* enable */ 1)))
+ goto done;
+
+ if ((rv = iavf_enable_disable_queues (vm, port, 1)))
+ goto done;
+
+ if (port->dev->poll_stats)
+ vnet_dev_poll_port_add (vm, port, 1, iavf_port_poll_stats);
+
+done:
+ if (rv)
+ {
+ foreach_vnet_dev_port_rx_queue (q, port)
+ iavf_rx_queue_stop (vm, q);
+ foreach_vnet_dev_port_tx_queue (q, port)
+ iavf_tx_queue_stop (vm, q);
+ }
+ return rv;
+}
+
+void
+iavf_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ log_debug (port->dev, "port %u", port->port_id);
+
+ iavf_enable_disable_queues (vm, port, /* enable */ 0);
+ iavf_port_rx_irq_config (vm, port, /* disable */ 0);
+
+ if (port->dev->poll_stats)
+ vnet_dev_poll_port_remove (vm, port, iavf_port_poll_stats);
+
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ iavf_rx_queue_stop (vm, rxq);
+
+ foreach_vnet_dev_port_tx_queue (txq, port)
+ iavf_tx_queue_stop (vm, txq);
+
+ vnet_dev_port_state_change (vm, port,
+ (vnet_dev_port_state_changes_t){
+ .change.link_state = 1,
+ .change.link_speed = 1,
+ .link_speed = 0,
+ .link_state = 0,
+ });
+}
+
+vnet_dev_rv_t
+iavf_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ if (port->started)
+ rv = VNET_DEV_ERR_PORT_STARTED;
+ break;
+
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ break;
+
+ default:
+ rv = VNET_DEV_ERR_NOT_SUPPORTED;
+ };
+
+ return rv;
+}
+
+static vnet_dev_rv_t
+iavf_port_add_del_eth_addr (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_hw_addr_t *addr, int is_add,
+ int is_primary)
+{
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ virtchnl_ether_addr_list_t al = {
+ .vsi_id = ap->vsi_id,
+ .num_elements = 1,
+ .list[0].primary = is_primary ? 1 : 0,
+ .list[0].extra = is_primary ? 0 : 1,
+ };
+
+ clib_memcpy (al.list[0].addr, addr, sizeof (al.list[0].addr));
+
+ return is_add ? iavf_vc_op_add_eth_addr (vm, port->dev, &al) :
+ iavf_vc_op_del_eth_addr (vm, port->dev, &al);
+}
+
+static vnet_dev_rv_t
+iavf_port_cfg_rxq_int_mode_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ u16 qid, u8 state, u8 all)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ vnet_dev_t *dev = port->dev;
+ char *ed = state ? "ena" : "disa";
+ char qstr[16];
+ u64 old, new = 0;
+
+ state = state != 0;
+ old = ap->intr_mode_per_rxq_bitmap;
+
+ if (all)
+ {
+ snprintf (qstr, sizeof (qstr), "all queues");
+ if (state)
+ foreach_vnet_dev_port_rx_queue (q, port)
+ u64_bit_set (&new, q->queue_id, 1);
+ }
+ else
+ {
+ snprintf (qstr, sizeof (qstr), "queue %u", qid);
+ new = old;
+ u64_bit_set (&new, qid, state);
+ }
+
+ if (new == old)
+ {
+ log_warn (dev, "interrupt mode already %sbled on %s", ed, qstr);
+ return rv;
+ }
+
+ ap->intr_mode_per_rxq_bitmap = new;
+
+ if (port->started)
+ {
+ if ((rv = iavf_port_rx_irq_config (vm, port, 1)))
+ {
+ ap->intr_mode_per_rxq_bitmap = old;
+ log_err (dev, "failed to %sble interrupt mode on %s", ed, qstr);
+ return rv;
+ }
+ }
+
+ log_debug (dev, "interrupt mode %sbled on %s, new bitmap is 0x%x", ed, qstr,
+ new);
+ return rv;
+}
+
+vnet_dev_rv_t
+iavf_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_t *dev = port->dev;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ {
+ virtchnl_promisc_info_t pi = {
+ .vsi_id = ap->vsi_id,
+ .unicast_promisc = req->promisc,
+ .multicast_promisc = req->promisc,
+ };
+
+ rv = iavf_vc_op_config_promisc_mode (vm, dev, &pi);
+ }
+ break;
+
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ rv = iavf_port_add_del_eth_addr (vm, port, &port->primary_hw_addr,
+ /* is_add */ 0,
+ /* is_primary */ 1);
+ if (rv == VNET_DEV_OK)
+ rv = iavf_port_add_del_eth_addr (vm, port, &req->addr,
+ /* is_add */ 1,
+ /* is_primary */ 1);
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ rv = iavf_port_add_del_eth_addr (vm, port, &req->addr,
+ /* is_add */ 1,
+ /* is_primary */ 0);
+ break;
+
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ rv = iavf_port_add_del_eth_addr (vm, port, &req->addr,
+ /* is_add */ 0,
+ /* is_primary */ 0);
+ break;
+
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ break;
+
+ case VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE:
+ rv = iavf_port_cfg_rxq_int_mode_change (vm, port, req->queue_id, 1,
+ req->all_queues);
+ break;
+
+ case VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE:
+ rv = iavf_port_cfg_rxq_int_mode_change (vm, port, req->queue_id, 0,
+ req->all_queues);
+ break;
+
+ default:
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ };
+
+ return rv;
+}
diff --git a/src/plugins/dev_iavf/queue.c b/src/plugins/dev_iavf/queue.c
new file mode 100644
index 00000000000..113c0dbdfc7
--- /dev/null
+++ b/src/plugins/dev_iavf/queue.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <vppinfra/ring.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+#include <dev_iavf/virtchnl_funcs.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+ .class_name = "iavf",
+ .subclass_name = "queue",
+};
+
+vnet_dev_rv_t
+iavf_rx_queue_alloc (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
+ vnet_dev_rv_t rv;
+
+ arq->buffer_indices = clib_mem_alloc_aligned (
+ rxq->size * sizeof (arq->buffer_indices[0]), CLIB_CACHE_LINE_BYTES);
+
+ if ((rv =
+ vnet_dev_dma_mem_alloc (vm, dev, sizeof (iavf_rx_desc_t) * rxq->size,
+ 0, (void **) &arq->descs)))
+ return rv;
+
+ arq->qrx_tail = ad->bar0 + IAVF_QRX_TAIL (rxq->queue_id);
+
+ log_debug (dev, "queue %u alocated", rxq->queue_id);
+ return rv;
+}
+
+void
+iavf_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_t *dev = rxq->port->dev;
+ iavf_rxq_t *aq = vnet_dev_get_rx_queue_data (rxq);
+
+ log_debug (dev, "queue %u", rxq->queue_id);
+
+ vnet_dev_dma_mem_free (vm, dev, aq->descs);
+
+ foreach_pointer (p, aq->buffer_indices)
+ if (p)
+ clib_mem_free (p);
+}
+
+vnet_dev_rv_t
+iavf_tx_queue_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_t *dev = txq->port->dev;
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_rv_t rv;
+
+ if ((rv =
+ vnet_dev_dma_mem_alloc (vm, dev, sizeof (iavf_tx_desc_t) * txq->size,
+ 0, (void **) &atq->descs)))
+ return rv;
+
+ clib_ring_new_aligned (atq->rs_slots, 32, CLIB_CACHE_LINE_BYTES);
+ atq->buffer_indices = clib_mem_alloc_aligned (
+ txq->size * sizeof (atq->buffer_indices[0]), CLIB_CACHE_LINE_BYTES);
+ atq->tmp_descs = clib_mem_alloc_aligned (
+ sizeof (atq->tmp_descs[0]) * txq->size, CLIB_CACHE_LINE_BYTES);
+ atq->tmp_bufs = clib_mem_alloc_aligned (
+ sizeof (atq->tmp_bufs[0]) * txq->size, CLIB_CACHE_LINE_BYTES);
+
+ atq->qtx_tail = ad->bar0 + IAVF_QTX_TAIL (txq->queue_id);
+
+ log_debug (dev, "queue %u alocated", txq->queue_id);
+ return VNET_DEV_OK;
+}
+
+void
+iavf_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_t *dev = txq->port->dev;
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+ iavf_txq_t *aq = vnet_dev_get_tx_queue_data (txq);
+
+ log_debug (dev, "queue %u", txq->queue_id);
+ vnet_dev_dma_mem_free (vm, dev, aq->descs);
+ clib_ring_free (atq->rs_slots);
+
+ foreach_pointer (p, aq->tmp_descs, aq->tmp_bufs, aq->buffer_indices)
+ if (p)
+ clib_mem_free (p);
+}
+
+vnet_dev_rv_t
+iavf_rx_queue_start (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_t *dev = rxq->port->dev;
+ iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
+ iavf_rx_desc_t *d = arq->descs;
+ u32 n_enq, *bi = arq->buffer_indices;
+ u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq);
+
+ n_enq = vlib_buffer_alloc_from_pool (vm, bi, rxq->size - 8, bpi);
+
+ if (n_enq < 8)
+ {
+ if (n_enq)
+ vlib_buffer_free (vm, bi, n_enq);
+ return VNET_DEV_ERR_BUFFER_ALLOC_FAIL;
+ }
+
+ for (u32 i = 0; i < n_enq; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi[i]);
+ u64 dma_addr = vnet_dev_get_dma_addr (vm, dev, b->data);
+ d[i] = (iavf_rx_desc_t){ .addr = dma_addr };
+ }
+
+ arq->n_enqueued = n_enq;
+ arq->next = 0;
+ __atomic_store_n (arq->qrx_tail, n_enq, __ATOMIC_RELEASE);
+ return VNET_DEV_OK;
+}
+
+void
+iavf_rx_queue_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
+
+ __atomic_store_n (arq->qrx_tail, 0, __ATOMIC_RELAXED);
+ if (arq->n_enqueued)
+ {
+ vlib_buffer_free_from_ring_no_next (vm, arq->buffer_indices, arq->next,
+ rxq->size, arq->n_enqueued);
+ log_debug (rxq->port->dev, "%u buffers freed from rx queue %u",
+ arq->n_enqueued, rxq->queue_id);
+ }
+ arq->n_enqueued = arq->next = 0;
+}
+
+vnet_dev_rv_t
+iavf_tx_queue_start (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+ atq->next = 0;
+ atq->n_enqueued = 0;
+ clib_ring_reset (atq->rs_slots);
+ __atomic_store_n (atq->qtx_tail, 0, __ATOMIC_RELAXED);
+ return VNET_DEV_OK;
+}
+
+void
+iavf_tx_queue_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+
+ log_debug (txq->port->dev, "queue %u", txq->queue_id);
+
+ __atomic_store_n (atq->qtx_tail, 0, __ATOMIC_RELAXED);
+ if (atq->n_enqueued)
+ {
+ vlib_buffer_free_from_ring_no_next (vm, atq->buffer_indices,
+ atq->next - atq->n_enqueued,
+ txq->size, atq->n_enqueued);
+ log_debug (txq->port->dev, "%u buffers freed from tx queue %u",
+ atq->n_enqueued, txq->queue_id);
+ }
+ atq->n_enqueued = atq->next = 0;
+}
diff --git a/src/plugins/dev_iavf/rx_node.c b/src/plugins/dev_iavf/rx_node.c
new file mode 100644
index 00000000000..ee6d7e8def0
--- /dev/null
+++ b/src/plugins/dev_iavf/rx_node.c
@@ -0,0 +1,529 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+#include <dev_iavf/iavf.h>
+
+#define IAVF_RX_REFILL_TRESHOLD 32
+
+static const iavf_rx_desc_qw1_t mask_eop = { .eop = 1 };
+static const iavf_rx_desc_qw1_t mask_flm = { .flm = 1 };
+static const iavf_rx_desc_qw1_t mask_dd = { .dd = 1 };
+static const iavf_rx_desc_qw1_t mask_ipe = { .ipe = 1 };
+static const iavf_rx_desc_qw1_t mask_dd_eop = { .dd = 1, .eop = 1 };
+
+static_always_inline int
+iavf_rxd_is_not_eop (iavf_rx_desc_t *d)
+{
+ return (d->qw1.as_u64 & mask_eop.as_u64) == 0;
+}
+
+static_always_inline int
+iavf_rxd_is_not_dd (iavf_rx_desc_t *d)
+{
+ return (d->qw1.as_u64 & mask_dd.as_u64) == 0;
+}
+
+static_always_inline void
+iavf_rx_desc_write (iavf_rx_desc_t *d, u64 addr)
+{
+#ifdef CLIB_HAVE_VEC256
+ *(u64x4 *) d = (u64x4){ addr, 0, 0, 0 };
+#else
+ d->qword[0] = addr;
+ d->qword[1] = 0;
+#endif
+}
+
+static_always_inline void
+iavf_rxq_refill (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_dev_rx_queue_t *rxq, int use_va_dma)
+{
+ u16 n_refill, mask, n_alloc, slot, size;
+ iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
+ vlib_buffer_t *b[8];
+ iavf_rx_desc_t *d, *first_d;
+ void *p[8];
+
+ size = rxq->size;
+ mask = size - 1;
+ n_refill = mask - arq->n_enqueued;
+ if (PREDICT_TRUE (n_refill <= IAVF_RX_REFILL_TRESHOLD))
+ return;
+
+ slot = (arq->next - n_refill - 1) & mask;
+
+ n_refill &= ~7; /* round to 8 */
+ n_alloc = vlib_buffer_alloc_to_ring_from_pool (
+ vm, arq->buffer_indices, slot, size, n_refill,
+ vnet_dev_get_rx_queue_buffer_pool_index (rxq));
+
+ if (PREDICT_FALSE (n_alloc != n_refill))
+ {
+ vlib_error_count (vm, node->node_index, IAVF_RX_NODE_CTR_BUFFER_ALLOC,
+ 1);
+ if (n_alloc)
+ vlib_buffer_free_from_ring (vm, arq->buffer_indices, slot, size,
+ n_alloc);
+ return;
+ }
+
+ arq->n_enqueued += n_alloc;
+ first_d = arq->descs;
+
+ ASSERT (slot % 8 == 0);
+
+ while (n_alloc >= 8)
+ {
+ d = first_d + slot;
+
+ if (use_va_dma)
+ {
+ vlib_get_buffers_with_offset (vm, arq->buffer_indices + slot, p, 8,
+ sizeof (vlib_buffer_t));
+ iavf_rx_desc_write (d + 0, pointer_to_uword (p[0]));
+ iavf_rx_desc_write (d + 1, pointer_to_uword (p[1]));
+ iavf_rx_desc_write (d + 2, pointer_to_uword (p[2]));
+ iavf_rx_desc_write (d + 3, pointer_to_uword (p[3]));
+ iavf_rx_desc_write (d + 4, pointer_to_uword (p[4]));
+ iavf_rx_desc_write (d + 5, pointer_to_uword (p[5]));
+ iavf_rx_desc_write (d + 6, pointer_to_uword (p[6]));
+ iavf_rx_desc_write (d + 7, pointer_to_uword (p[7]));
+ }
+ else
+ {
+ vlib_get_buffers (vm, arq->buffer_indices + slot, b, 8);
+ iavf_rx_desc_write (d + 0, vlib_buffer_get_pa (vm, b[0]));
+ iavf_rx_desc_write (d + 1, vlib_buffer_get_pa (vm, b[1]));
+ iavf_rx_desc_write (d + 2, vlib_buffer_get_pa (vm, b[2]));
+ iavf_rx_desc_write (d + 3, vlib_buffer_get_pa (vm, b[3]));
+ iavf_rx_desc_write (d + 4, vlib_buffer_get_pa (vm, b[4]));
+ iavf_rx_desc_write (d + 5, vlib_buffer_get_pa (vm, b[5]));
+ iavf_rx_desc_write (d + 6, vlib_buffer_get_pa (vm, b[6]));
+ iavf_rx_desc_write (d + 7, vlib_buffer_get_pa (vm, b[7]));
+ }
+
+ /* next */
+ slot = (slot + 8) & mask;
+ n_alloc -= 8;
+ }
+
+ /* RXQ can be smaller than 256 packets, especially if jumbo. */
+ arq->descs[slot].qword[1] = 0;
+
+ __atomic_store_n (arq->qrx_tail, slot, __ATOMIC_RELEASE);
+}
+
+static_always_inline uword
+iavf_rx_attach_tail (vlib_main_t *vm, vlib_buffer_template_t *bt,
+ vlib_buffer_t *b, u64 qw1, iavf_rx_tail_t *t)
+{
+ vlib_buffer_t *hb = b;
+ u32 tlnifb = 0, i = 0;
+
+ if (qw1 & mask_eop.as_u64)
+ return 0;
+
+ while ((qw1 & mask_eop.as_u64) == 0)
+ {
+ ASSERT (i < IAVF_RX_MAX_DESC_IN_CHAIN - 1);
+ ASSERT (qw1 & mask_dd.as_u64);
+ qw1 = t->qw1s[i];
+ b->next_buffer = t->buffers[i];
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ b->template = *bt;
+ tlnifb += b->current_length = ((iavf_rx_desc_qw1_t) qw1).length;
+ i++;
+ }
+
+ hb->total_length_not_including_first_buffer = tlnifb;
+ hb->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ return tlnifb;
+}
+
+static_always_inline void
+iavf_process_flow_offload (vnet_dev_port_t *port, iavf_rt_data_t *rtd,
+ uword n_rx_packets)
+{
+ uword n;
+ iavf_flow_lookup_entry_t fle;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+
+ for (n = 0; n < n_rx_packets; n++)
+ {
+ if ((rtd->qw1s[n] & mask_flm.as_u64) == 0)
+ continue;
+
+ fle = *pool_elt_at_index (ap->flow_lookup_entries, rtd->flow_ids[n]);
+
+ if (fle.next_index != (u16) ~0)
+ rtd->next[n] = fle.next_index;
+
+ if (fle.flow_id != ~0)
+ rtd->bufs[n]->flow_id = fle.flow_id;
+
+ if (fle.buffer_advance != ~0)
+ vlib_buffer_advance (rtd->bufs[n], fle.buffer_advance);
+ }
+}
+
+static_always_inline uword
+iavf_process_rx_burst (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_dev_rx_queue_t *rxq, iavf_rt_data_t *rtd,
+ vlib_buffer_template_t *bt, u32 n_left,
+ int maybe_multiseg)
+{
+ vlib_buffer_t **b = rtd->bufs;
+ u64 *qw1 = rtd->qw1s;
+ iavf_rx_tail_t *tail = rtd->tails;
+ uword n_rx_bytes = 0;
+
+ while (n_left >= 4)
+ {
+ if (n_left >= 12)
+ {
+ vlib_prefetch_buffer_header (b[8], LOAD);
+ vlib_prefetch_buffer_header (b[9], LOAD);
+ vlib_prefetch_buffer_header (b[10], LOAD);
+ vlib_prefetch_buffer_header (b[11], LOAD);
+ }
+
+ b[0]->template = *bt;
+ b[1]->template = *bt;
+ b[2]->template = *bt;
+ b[3]->template = *bt;
+
+ n_rx_bytes += b[0]->current_length =
+ ((iavf_rx_desc_qw1_t) qw1[0]).length;
+ n_rx_bytes += b[1]->current_length =
+ ((iavf_rx_desc_qw1_t) qw1[1]).length;
+ n_rx_bytes += b[2]->current_length =
+ ((iavf_rx_desc_qw1_t) qw1[2]).length;
+ n_rx_bytes += b[3]->current_length =
+ ((iavf_rx_desc_qw1_t) qw1[3]).length;
+
+ if (maybe_multiseg)
+ {
+ n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[0], qw1[0], tail + 0);
+ n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[1], qw1[1], tail + 1);
+ n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[2], qw1[2], tail + 2);
+ n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[3], qw1[3], tail + 3);
+ }
+
+ /* next */
+ qw1 += 4;
+ tail += 4;
+ b += 4;
+ n_left -= 4;
+ }
+
+ while (n_left)
+ {
+ b[0]->template = *bt;
+
+ n_rx_bytes += b[0]->current_length =
+ ((iavf_rx_desc_qw1_t) qw1[0]).length;
+
+ if (maybe_multiseg)
+ n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[0], qw1[0], tail + 0);
+
+ /* next */
+ qw1 += 1;
+ tail += 1;
+ b += 1;
+ n_left -= 1;
+ }
+ return n_rx_bytes;
+}
+
+static_always_inline uword
+iavf_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, vnet_dev_port_t *port,
+ vnet_dev_rx_queue_t *rxq, int with_flows)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 thr_idx = vlib_get_thread_index ();
+ iavf_rt_data_t *rtd = vnet_dev_get_rt_temp_space (vm);
+ iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
+ vlib_buffer_template_t bt = rxq->buffer_template;
+ u32 n_trace, n_rx_packets = 0, n_rx_bytes = 0;
+ u16 n_tail_desc = 0;
+ u64 or_qw1 = 0;
+ u32 *bi, *to_next, n_left_to_next;
+ u32 next_index = rxq->next_index;
+ u32 sw_if_index = port->intf.sw_if_index;
+ u32 hw_if_index = port->intf.hw_if_index;
+ u16 next = arq->next;
+ u16 size = rxq->size;
+ u16 mask = size - 1;
+ iavf_rx_desc_t *d, *descs = arq->descs;
+#ifdef CLIB_HAVE_VEC256
+ u64x4 q1x4, or_q1x4 = { 0 };
+ u32x4 fdidx4;
+ u64x4 dd_eop_mask4 = u64x4_splat (mask_dd_eop.as_u64);
+#elif defined(CLIB_HAVE_VEC128)
+ u32x4 q1x4_lo, q1x4_hi, or_q1x4 = { 0 };
+ u32x4 fdidx4;
+ u32x4 dd_eop_mask4 = u32x4_splat (mask_dd_eop.as_u64);
+#endif
+ int single_next = 1;
+
+ /* is there anything on the ring */
+ d = descs + next;
+ if ((d->qword[1] & mask_dd.as_u64) == 0)
+ goto done;
+
+ vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* fetch up to IAVF_RX_VECTOR_SZ from the rx ring, unflatten them and
+ copy needed data from descriptor to rx vector */
+ bi = to_next;
+
+ while (n_rx_packets < IAVF_RX_VECTOR_SZ)
+ {
+ if (next + 11 < size)
+ {
+ int stride = 8;
+ clib_prefetch_load ((void *) (descs + (next + stride)));
+ clib_prefetch_load ((void *) (descs + (next + stride + 1)));
+ clib_prefetch_load ((void *) (descs + (next + stride + 2)));
+ clib_prefetch_load ((void *) (descs + (next + stride + 3)));
+ }
+
+#ifdef CLIB_HAVE_VEC256
+ if (n_rx_packets >= IAVF_RX_VECTOR_SZ - 4 || next >= size - 4)
+ goto one_by_one;
+
+ q1x4 = u64x4_gather ((void *) &d[0].qword[1], (void *) &d[1].qword[1],
+ (void *) &d[2].qword[1], (void *) &d[3].qword[1]);
+
+ /* not all packets are ready or at least one of them is chained */
+ if (!u64x4_is_equal (q1x4 & dd_eop_mask4, dd_eop_mask4))
+ goto one_by_one;
+
+ or_q1x4 |= q1x4;
+
+ u64x4_store_unaligned (q1x4, rtd->qw1s + n_rx_packets);
+#elif defined(CLIB_HAVE_VEC128)
+ if (n_rx_packets >= IAVF_RX_VECTOR_SZ - 4 || next >= size - 4)
+ goto one_by_one;
+
+ q1x4_lo =
+ u32x4_gather ((void *) &d[0].qword[1], (void *) &d[1].qword[1],
+ (void *) &d[2].qword[1], (void *) &d[3].qword[1]);
+
+ /* not all packets are ready or at least one of them is chained */
+ if (!u32x4_is_equal (q1x4_lo & dd_eop_mask4, dd_eop_mask4))
+ goto one_by_one;
+
+ q1x4_hi = u32x4_gather (
+ (void *) &d[0].qword[1] + 4, (void *) &d[1].qword[1] + 4,
+ (void *) &d[2].qword[1] + 4, (void *) &d[3].qword[1] + 4);
+
+ or_q1x4 |= q1x4_lo;
+ rtd->qw1s[n_rx_packets + 0] = (u64) q1x4_hi[0] << 32 | (u64) q1x4_lo[0];
+ rtd->qw1s[n_rx_packets + 1] = (u64) q1x4_hi[1] << 32 | (u64) q1x4_lo[1];
+ rtd->qw1s[n_rx_packets + 2] = (u64) q1x4_hi[2] << 32 | (u64) q1x4_lo[2];
+ rtd->qw1s[n_rx_packets + 3] = (u64) q1x4_hi[3] << 32 | (u64) q1x4_lo[3];
+#endif
+#if defined(CLIB_HAVE_VEC256) || defined(CLIB_HAVE_VEC128)
+
+ if (with_flows)
+ {
+ fdidx4 = u32x4_gather (
+ (void *) &d[0].fdid_flex_hi, (void *) &d[1].fdid_flex_hi,
+ (void *) &d[2].fdid_flex_hi, (void *) &d[3].fdid_flex_hi);
+ u32x4_store_unaligned (fdidx4, rtd->flow_ids + n_rx_packets);
+ }
+
+ vlib_buffer_copy_indices (bi, arq->buffer_indices + next, 4);
+
+ /* next */
+ next = (next + 4) & mask;
+ d = descs + next;
+ n_rx_packets += 4;
+ bi += 4;
+ continue;
+ one_by_one:
+#endif
+ clib_prefetch_load ((void *) (descs + ((next + 8) & mask)));
+
+ if (iavf_rxd_is_not_dd (d))
+ break;
+
+ bi[0] = arq->buffer_indices[next];
+
+ /* deal with chained buffers */
+ if (PREDICT_FALSE (iavf_rxd_is_not_eop (d)))
+ {
+ u16 tail_desc = 0;
+ u16 tail_next = next;
+ iavf_rx_tail_t *tail = rtd->tails + n_rx_packets;
+ iavf_rx_desc_t *td;
+ do
+ {
+ tail_next = (tail_next + 1) & mask;
+ td = descs + tail_next;
+
+ /* bail out in case of incomplete transaction */
+ if (iavf_rxd_is_not_dd (td))
+ goto no_more_desc;
+
+ or_qw1 |= tail->qw1s[tail_desc] = td[0].qword[1];
+ tail->buffers[tail_desc] = arq->buffer_indices[tail_next];
+ tail_desc++;
+ }
+ while (iavf_rxd_is_not_eop (td));
+ next = tail_next;
+ n_tail_desc += tail_desc;
+ }
+
+ or_qw1 |= rtd->qw1s[n_rx_packets] = d[0].qword[1];
+ if (PREDICT_FALSE (with_flows))
+ {
+ rtd->flow_ids[n_rx_packets] = d[0].fdid_flex_hi;
+ }
+
+ /* next */
+ next = (next + 1) & mask;
+ d = descs + next;
+ n_rx_packets++;
+ bi++;
+ }
+no_more_desc:
+
+ if (n_rx_packets == 0)
+ goto done;
+
+ arq->next = next;
+ arq->n_enqueued -= n_rx_packets + n_tail_desc;
+
+#if defined(CLIB_HAVE_VEC256) || defined(CLIB_HAVE_VEC128)
+ or_qw1 |= or_q1x4[0] | or_q1x4[1] | or_q1x4[2] | or_q1x4[3];
+#endif
+
+ vlib_get_buffers (vm, to_next, rtd->bufs, n_rx_packets);
+
+ n_rx_bytes =
+ n_tail_desc ?
+ iavf_process_rx_burst (vm, node, rxq, rtd, &bt, n_rx_packets, 1) :
+ iavf_process_rx_burst (vm, node, rxq, rtd, &bt, n_rx_packets, 0);
+
+ /* the MARKed packets may have different next nodes */
+ if (PREDICT_FALSE (with_flows && (or_qw1 & mask_flm.as_u64)))
+ {
+ u32 n;
+ single_next = 0;
+ for (n = 0; n < n_rx_packets; n++)
+ rtd->next[n] = next_index;
+
+ iavf_process_flow_offload (port, rtd, n_rx_packets);
+ }
+
+ /* packet trace if enabled */
+ if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node))))
+ {
+ u32 n_left = n_rx_packets;
+ u32 i, j;
+ u16 *next_indices = rtd->next;
+
+ i = 0;
+ while (n_trace && n_left)
+ {
+ vlib_buffer_t *b = rtd->bufs[i];
+ if (PREDICT_FALSE (single_next == 0))
+ next_index = next_indices[0];
+
+ if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b,
+ /* follow_chain */ 0)))
+ {
+ iavf_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr));
+ tr->next_index = next_index;
+ tr->qid = rxq->queue_id;
+ tr->hw_if_index = hw_if_index;
+ tr->qw1s[0] = rtd->qw1s[i];
+ tr->flow_id =
+ (tr->qw1s[0] & mask_flm.as_u64) ? rtd->flow_ids[i] : 0;
+ for (j = 1; j < IAVF_RX_MAX_DESC_IN_CHAIN; j++)
+ tr->qw1s[j] = rtd->tails[i].qw1s[j - 1];
+
+ n_trace--;
+ }
+
+ /* next */
+ n_left--;
+ i++;
+ next_indices++;
+ }
+ vlib_set_trace_count (vm, node, n_trace);
+ }
+
+ /* enqueu the packets to the next nodes */
+ if (PREDICT_FALSE (with_flows && (or_qw1 & mask_flm.as_u64)))
+ {
+ /* release next node's frame vector, in this case we use
+ vlib_buffer_enqueue_to_next to place the packets
+ */
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
+ /* enqueue buffers to the next node */
+ vlib_buffer_enqueue_to_next (vm, node, to_next, rtd->next, n_rx_packets);
+ }
+ else
+ {
+ if (PREDICT_TRUE (next_index == VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT))
+ {
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ ethernet_input_frame_t *ef;
+ nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
+ f = vlib_get_frame (vm, nf->frame);
+ f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+ ef = vlib_frame_scalar_args (f);
+ ef->sw_if_index = sw_if_index;
+ ef->hw_if_index = hw_if_index;
+
+ if ((or_qw1 & mask_ipe.as_u64) == 0)
+ f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
+ vlib_frame_no_append (f);
+ }
+
+ n_left_to_next -= n_rx_packets;
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_increment_combined_counter (
+ vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thr_idx, hw_if_index, n_rx_packets, n_rx_bytes);
+
+done:
+ return n_rx_packets;
+}
+
+VNET_DEV_NODE_FN (iavf_rx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ u32 n_rx = 0;
+ foreach_vnet_dev_rx_queue_runtime (rxq, node)
+ {
+ vnet_dev_port_t *port = rxq->port;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ if (PREDICT_FALSE (ap->flow_offload))
+ n_rx += iavf_device_input_inline (vm, node, frame, port, rxq, 1);
+ else
+ n_rx += iavf_device_input_inline (vm, node, frame, port, rxq, 0);
+
+ /* refill rx ring */
+ if (rxq->port->dev->va_dma)
+ iavf_rxq_refill (vm, node, rxq, 1 /* use_va_dma */);
+ else
+ iavf_rxq_refill (vm, node, rxq, 0 /* use_va_dma */);
+ }
+
+ return n_rx;
+}
diff --git a/src/plugins/dev_iavf/tx_node.c b/src/plugins/dev_iavf/tx_node.c
new file mode 100644
index 00000000000..451db80d286
--- /dev/null
+++ b/src/plugins/dev_iavf/tx_node.c
@@ -0,0 +1,517 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/ring.h>
+#include <vppinfra/vector/ip_csum.h>
+
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
+
+#include <dev_iavf/iavf.h>
+
+static_always_inline u8
+iavf_tx_desc_get_dtyp (iavf_tx_desc_t *d)
+{
+ return d->qword[1] & 0x0f;
+}
+
+struct iavf_ip4_psh
+{
+ u32 src;
+ u32 dst;
+ u8 zero;
+ u8 proto;
+ u16 l4len;
+};
+
+struct iavf_ip6_psh
+{
+ ip6_address_t src;
+ ip6_address_t dst;
+ u32 l4len;
+ u32 proto;
+};
+
+static_always_inline u64
+iavf_tx_prepare_cksum (vlib_buffer_t *b, u8 is_tso)
+{
+ u64 flags = 0;
+ if (!is_tso && !(b->flags & VNET_BUFFER_F_OFFLOAD))
+ return 0;
+
+ vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
+ u32 is_tcp = is_tso || oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
+ u32 is_udp = !is_tso && oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
+
+ if (!is_tcp && !is_udp)
+ return 0;
+
+ u32 is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
+ u32 is_ip6 = b->flags & VNET_BUFFER_F_IS_IP6;
+
+ ASSERT (!(is_tcp && is_udp));
+ ASSERT (is_ip4 || is_ip6);
+ i16 l2_hdr_offset = b->current_data;
+ i16 l3_hdr_offset = vnet_buffer (b)->l3_hdr_offset;
+ i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset;
+ u16 l2_len = l3_hdr_offset - l2_hdr_offset;
+ u16 l3_len = l4_hdr_offset - l3_hdr_offset;
+ ip4_header_t *ip4 = (void *) (b->data + l3_hdr_offset);
+ ip6_header_t *ip6 = (void *) (b->data + l3_hdr_offset);
+ tcp_header_t *tcp = (void *) (b->data + l4_hdr_offset);
+ udp_header_t *udp = (void *) (b->data + l4_hdr_offset);
+ u16 l4_len = is_tcp ? tcp_header_bytes (tcp) : sizeof (udp_header_t);
+ u16 sum = 0;
+
+ flags |= IAVF_TXD_OFFSET_MACLEN (l2_len) | IAVF_TXD_OFFSET_IPLEN (l3_len) |
+ IAVF_TXD_OFFSET_L4LEN (l4_len);
+ flags |= is_ip4 ? IAVF_TXD_CMD_IIPT_IPV4 : IAVF_TXD_CMD_IIPT_IPV6;
+ flags |= is_tcp ? IAVF_TXD_CMD_L4T_TCP : IAVF_TXD_CMD_L4T_UDP;
+
+ if (is_ip4)
+ ip4->checksum = 0;
+
+ if (is_tso)
+ {
+ if (is_ip4)
+ ip4->length = 0;
+ else
+ ip6->payload_length = 0;
+ }
+
+ if (is_ip4)
+ {
+ struct iavf_ip4_psh psh = { 0 };
+ psh.src = ip4->src_address.as_u32;
+ psh.dst = ip4->dst_address.as_u32;
+ psh.proto = ip4->protocol;
+ psh.l4len = is_tso ?
+ 0 :
+ clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
+ (l4_hdr_offset - l3_hdr_offset));
+ sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh));
+ }
+ else
+ {
+ struct iavf_ip6_psh psh = { 0 };
+ psh.src = ip6->src_address;
+ psh.dst = ip6->dst_address;
+ psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
+ psh.l4len = is_tso ? 0 : ip6->payload_length;
+ sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh));
+ }
+
+ if (is_tcp)
+ tcp->checksum = sum;
+ else
+ udp->checksum = sum;
+ return flags;
+}
+
+static_always_inline u32
+iavf_tx_fill_ctx_desc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq,
+ iavf_tx_desc_t *d, vlib_buffer_t *b)
+{
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+ vlib_buffer_t *ctx_ph;
+ u32 *bi = atq->ph_bufs;
+
+next:
+ ctx_ph = vlib_get_buffer (vm, bi[0]);
+ if (PREDICT_FALSE (ctx_ph->ref_count == 255))
+ {
+ bi++;
+ goto next;
+ }
+
+ /* Acquire a reference on the placeholder buffer */
+ ctx_ph->ref_count++;
+
+ u16 l234hdr_sz = vnet_buffer (b)->l4_hdr_offset - b->current_data +
+ vnet_buffer2 (b)->gso_l4_hdr_sz;
+ u16 tlen = vlib_buffer_length_in_chain (vm, b) - l234hdr_sz;
+ d[0].qword[0] = 0;
+ d[0].qword[1] = IAVF_TXD_DTYP_CTX | IAVF_TXD_CTX_CMD_TSO |
+ IAVF_TXD_CTX_SEG_MSS (vnet_buffer2 (b)->gso_size) |
+ IAVF_TXD_CTX_SEG_TLEN (tlen);
+ return bi[0];
+}
+
+static_always_inline void
+iavf_tx_copy_desc (iavf_tx_desc_t *d, iavf_tx_desc_t *s, u32 n_descs)
+{
+#if defined CLIB_HAVE_VEC512
+ while (n_descs >= 8)
+ {
+ u64x8u *dv = (u64x8u *) d;
+ u64x8u *sv = (u64x8u *) s;
+
+ dv[0] = sv[0];
+ dv[1] = sv[1];
+
+ /* next */
+ d += 8;
+ s += 8;
+ n_descs -= 8;
+ }
+#elif defined CLIB_HAVE_VEC256
+ while (n_descs >= 4)
+ {
+ u64x4u *dv = (u64x4u *) d;
+ u64x4u *sv = (u64x4u *) s;
+
+ dv[0] = sv[0];
+ dv[1] = sv[1];
+
+ /* next */
+ d += 4;
+ s += 4;
+ n_descs -= 4;
+ }
+#elif defined CLIB_HAVE_VEC128
+ while (n_descs >= 2)
+ {
+ u64x2u *dv = (u64x2u *) d;
+ u64x2u *sv = (u64x2u *) s;
+
+ dv[0] = sv[0];
+ dv[1] = sv[1];
+
+ /* next */
+ d += 2;
+ s += 2;
+ n_descs -= 2;
+ }
+#endif
+ while (n_descs)
+ {
+ d[0].qword[0] = s[0].qword[0];
+ d[0].qword[1] = s[0].qword[1];
+ d++;
+ s++;
+ n_descs--;
+ }
+}
+
+static_always_inline void
+iavf_tx_fill_data_desc (vlib_main_t *vm, iavf_tx_desc_t *d, vlib_buffer_t *b,
+ u64 cmd, int use_va_dma)
+{
+ if (use_va_dma)
+ d->qword[0] = vlib_buffer_get_current_va (b);
+ else
+ d->qword[0] = vlib_buffer_get_current_pa (vm, b);
+ d->qword[1] = (((u64) b->current_length) << 34 | cmd | IAVF_TXD_CMD_RSV);
+}
+static_always_inline u16
+iavf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_dev_tx_queue_t *txq, u32 *buffers, u32 n_packets,
+ u16 *n_enq_descs, int use_va_dma)
+{
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+ const u64 cmd_eop = IAVF_TXD_CMD_EOP;
+ u16 n_free_desc, n_desc_left, n_packets_left = n_packets;
+#if defined CLIB_HAVE_VEC512
+ vlib_buffer_t *b[8];
+#else
+ vlib_buffer_t *b[4];
+#endif
+ iavf_tx_desc_t *d = atq->tmp_descs;
+ u32 *tb = atq->tmp_bufs;
+
+ n_free_desc = n_desc_left = txq->size - atq->n_enqueued - 8;
+
+ if (n_desc_left == 0)
+ return 0;
+
+ while (n_packets_left && n_desc_left)
+ {
+#if defined CLIB_HAVE_VEC512
+ u32 flags;
+ u64x8 or_flags_vec512;
+ u64x8 flags_mask_vec512;
+#else
+ u32 flags, or_flags;
+#endif
+
+#if defined CLIB_HAVE_VEC512
+ if (n_packets_left < 8 || n_desc_left < 8)
+#else
+ if (n_packets_left < 8 || n_desc_left < 4)
+#endif
+ goto one_by_one;
+
+#if defined CLIB_HAVE_VEC512
+ u64x8 base_ptr = u64x8_splat (vm->buffer_main->buffer_mem_start);
+ u32x8 buf_indices = u32x8_load_unaligned (buffers);
+
+ *(u64x8 *) &b = base_ptr + u64x8_from_u32x8 (
+ buf_indices << CLIB_LOG2_CACHE_LINE_BYTES);
+
+ or_flags_vec512 = u64x8_i64gather (u64x8_load_unaligned (b), 0, 1);
+#else
+ vlib_prefetch_buffer_with_index (vm, buffers[4], LOAD);
+ vlib_prefetch_buffer_with_index (vm, buffers[5], LOAD);
+ vlib_prefetch_buffer_with_index (vm, buffers[6], LOAD);
+ vlib_prefetch_buffer_with_index (vm, buffers[7], LOAD);
+
+ b[0] = vlib_get_buffer (vm, buffers[0]);
+ b[1] = vlib_get_buffer (vm, buffers[1]);
+ b[2] = vlib_get_buffer (vm, buffers[2]);
+ b[3] = vlib_get_buffer (vm, buffers[3]);
+
+ or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
+#endif
+
+#if defined CLIB_HAVE_VEC512
+ flags_mask_vec512 = u64x8_splat (
+ VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD | VNET_BUFFER_F_GSO);
+ if (PREDICT_FALSE (
+ !u64x8_is_all_zero (or_flags_vec512 & flags_mask_vec512)))
+#else
+ if (PREDICT_FALSE (or_flags &
+ (VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD |
+ VNET_BUFFER_F_GSO)))
+#endif
+ goto one_by_one;
+
+#if defined CLIB_HAVE_VEC512
+ vlib_buffer_copy_indices (tb, buffers, 8);
+ iavf_tx_fill_data_desc (vm, d + 0, b[0], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 1, b[1], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 2, b[2], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 3, b[3], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 4, b[4], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 5, b[5], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 6, b[6], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 7, b[7], cmd_eop, use_va_dma);
+
+ buffers += 8;
+ n_packets_left -= 8;
+ n_desc_left -= 8;
+ d += 8;
+ tb += 8;
+#else
+ vlib_buffer_copy_indices (tb, buffers, 4);
+
+ iavf_tx_fill_data_desc (vm, d + 0, b[0], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 1, b[1], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 2, b[2], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 3, b[3], cmd_eop, use_va_dma);
+
+ buffers += 4;
+ n_packets_left -= 4;
+ n_desc_left -= 4;
+ d += 4;
+ tb += 4;
+#endif
+
+ continue;
+
+ one_by_one:
+ tb[0] = buffers[0];
+ b[0] = vlib_get_buffer (vm, buffers[0]);
+ flags = b[0]->flags;
+
+ /* No chained buffers or TSO case */
+ if (PREDICT_TRUE (
+ (flags & (VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_GSO)) == 0))
+ {
+ u64 cmd = cmd_eop;
+
+ if (PREDICT_FALSE (flags & VNET_BUFFER_F_OFFLOAD))
+ cmd |= iavf_tx_prepare_cksum (b[0], 0 /* is_tso */);
+
+ iavf_tx_fill_data_desc (vm, d, b[0], cmd, use_va_dma);
+ }
+ else
+ {
+ u16 n_desc_needed = 1;
+ u64 cmd = 0;
+
+ if (flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ vlib_buffer_t *next = vlib_get_buffer (vm, b[0]->next_buffer);
+ n_desc_needed = 2;
+ while (next->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ next = vlib_get_buffer (vm, next->next_buffer);
+ n_desc_needed++;
+ }
+ }
+
+ if (flags & VNET_BUFFER_F_GSO)
+ {
+ n_desc_needed++;
+ }
+ else if (PREDICT_FALSE (n_desc_needed > 8))
+ {
+ vlib_buffer_free_one (vm, buffers[0]);
+ vlib_error_count (vm, node->node_index,
+ IAVF_TX_NODE_CTR_SEG_SZ_EXCEEDED, 1);
+ n_packets_left -= 1;
+ buffers += 1;
+ continue;
+ }
+
+ if (PREDICT_FALSE (n_desc_left < n_desc_needed))
+ break;
+
+ if (flags & VNET_BUFFER_F_GSO)
+ {
+ /* Enqueue a context descriptor */
+ tb[1] = tb[0];
+ tb[0] = iavf_tx_fill_ctx_desc (vm, txq, d, b[0]);
+ n_desc_left -= 1;
+ d += 1;
+ tb += 1;
+ cmd = iavf_tx_prepare_cksum (b[0], 1 /* is_tso */);
+ }
+ else if (flags & VNET_BUFFER_F_OFFLOAD)
+ {
+ cmd = iavf_tx_prepare_cksum (b[0], 0 /* is_tso */);
+ }
+
+ /* Deal with chain buffer if present */
+ while (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ iavf_tx_fill_data_desc (vm, d, b[0], cmd, use_va_dma);
+
+ n_desc_left -= 1;
+ d += 1;
+ tb += 1;
+
+ tb[0] = b[0]->next_buffer;
+ b[0] = vlib_get_buffer (vm, b[0]->next_buffer);
+ }
+
+ iavf_tx_fill_data_desc (vm, d, b[0], cmd_eop | cmd, use_va_dma);
+ }
+
+ buffers += 1;
+ n_packets_left -= 1;
+ n_desc_left -= 1;
+ d += 1;
+ tb += 1;
+ }
+
+ *n_enq_descs = n_free_desc - n_desc_left;
+ return n_packets - n_packets_left;
+}
+
+VNET_DEV_NODE_FN (iavf_tx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node);
+ vnet_dev_tx_queue_t *txq = rt->tx_queue;
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+ u16 next;
+ u16 mask = txq->size - 1;
+ u32 *buffers = vlib_frame_vector_args (frame);
+ u16 n_enq, n_left, n_desc, *slot;
+ u16 n_retry = 2;
+
+ n_left = frame->n_vectors;
+
+ vnet_dev_tx_queue_lock_if_needed (txq);
+
+retry:
+ next = atq->next;
+ /* release consumed bufs */
+ if (atq->n_enqueued)
+ {
+ i32 complete_slot = -1;
+ while (1)
+ {
+ u16 *slot = clib_ring_get_first (atq->rs_slots);
+
+ if (slot == 0)
+ break;
+
+ if (iavf_tx_desc_get_dtyp (atq->descs + slot[0]) != 0x0F)
+ break;
+
+ complete_slot = slot[0];
+
+ clib_ring_deq (atq->rs_slots);
+ }
+
+ if (complete_slot >= 0)
+ {
+ u16 first, mask, n_free;
+ mask = txq->size - 1;
+ first = (atq->next - atq->n_enqueued) & mask;
+ n_free = (complete_slot + 1 - first) & mask;
+
+ atq->n_enqueued -= n_free;
+ vlib_buffer_free_from_ring_no_next (vm, atq->buffer_indices, first,
+ txq->size, n_free);
+ }
+ }
+
+ n_desc = 0;
+ if (dev->va_dma)
+ n_enq = iavf_tx_prepare (vm, node, txq, buffers, n_left, &n_desc, 1);
+ else
+ n_enq = iavf_tx_prepare (vm, node, txq, buffers, n_left, &n_desc, 0);
+
+ if (n_desc)
+ {
+ if (PREDICT_TRUE (next + n_desc <= txq->size))
+ {
+ /* no wrap */
+ iavf_tx_copy_desc (atq->descs + next, atq->tmp_descs, n_desc);
+ vlib_buffer_copy_indices (atq->buffer_indices + next, atq->tmp_bufs,
+ n_desc);
+ }
+ else
+ {
+ /* wrap */
+ u32 n_not_wrap = txq->size - next;
+ iavf_tx_copy_desc (atq->descs + next, atq->tmp_descs, n_not_wrap);
+ iavf_tx_copy_desc (atq->descs, atq->tmp_descs + n_not_wrap,
+ n_desc - n_not_wrap);
+ vlib_buffer_copy_indices (atq->buffer_indices + next, atq->tmp_bufs,
+ n_not_wrap);
+ vlib_buffer_copy_indices (atq->buffer_indices,
+ atq->tmp_bufs + n_not_wrap,
+ n_desc - n_not_wrap);
+ }
+
+ next += n_desc;
+ if ((slot = clib_ring_enq (atq->rs_slots)))
+ {
+ u16 rs_slot = slot[0] = (next - 1) & mask;
+ atq->descs[rs_slot].qword[1] |= IAVF_TXD_CMD_RS;
+ }
+
+ atq->next = next & mask;
+ __atomic_store_n (atq->qtx_tail, atq->next, __ATOMIC_RELEASE);
+ atq->n_enqueued += n_desc;
+ n_left -= n_enq;
+ }
+
+ if (n_left)
+ {
+ buffers += n_enq;
+
+ if (n_retry--)
+ goto retry;
+
+ vlib_buffer_free (vm, buffers, n_left);
+ vlib_error_count (vm, node->node_index, IAVF_TX_NODE_CTR_NO_FREE_SLOTS,
+ n_left);
+ }
+
+ vnet_dev_tx_queue_unlock_if_needed (txq);
+
+ return frame->n_vectors - n_left;
+}
diff --git a/src/plugins/dev_iavf/virtchnl.c b/src/plugins/dev_iavf/virtchnl.c
new file mode 100644
index 00000000000..eca48106ce3
--- /dev/null
+++ b/src/plugins/dev_iavf/virtchnl.c
@@ -0,0 +1,372 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+#include <dev_iavf/virtchnl_funcs.h>
+#include <vnet/ethernet/ethernet.h>
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+ .class_name = "iavf",
+ .subclass_name = "virtchnl",
+};
+
+u8 *
+format_virtchnl_op_name (u8 *s, va_list *args)
+{
+ virtchnl_op_t op = va_arg (*args, virtchnl_op_t);
+ char *op_names[] = {
+#define _(a, b) [a] = #b,
+ foreach_virtchnl_op
+#undef _
+ };
+
+ if (op >= ARRAY_LEN (op_names) || op_names[op] == 0)
+ return format (s, "UNKNOWN(%u)", op);
+
+ return format (s, "%s", op_names[op]);
+}
+
+u8 *
+format_virtchnl_status (u8 *s, va_list *args)
+{
+ virtchnl_status_t c = va_arg (*args, virtchnl_status_t);
+
+ if (0)
+ ;
+#define _(a, b) else if (c == a) return format (s, #b);
+ foreach_virtchnl_status
+#undef _
+ return format (s, "UNKNOWN(%d)", c);
+}
+
+static u8 *
+format_virtchnl_vlan_support_caps (u8 *s, va_list *args)
+{
+ virtchnl_vlan_support_caps_t v = va_arg (*args, u32);
+ int not_first = 0;
+
+ char *strs[32] = {
+#define _(a, b, c) [a] = c,
+ foreach_virtchnl_vlan_support_bit
+#undef _
+ };
+
+ if (v == VIRTCHNL_VLAN_UNSUPPORTED)
+ return format (s, "unsupported");
+
+ for (int i = 0; i < 32; i++)
+ {
+ if ((v & (1 << i)) == 0)
+ continue;
+ if (not_first)
+ s = format (s, " ");
+ if (strs[i])
+ s = format (s, "%s", strs[i]);
+ else
+ s = format (s, "unknown(%u)", i);
+ not_first = 1;
+ }
+ return s;
+}
+
+static u8 *
+format_virtchnl_op_req (u8 *s, va_list *args)
+{
+ virtchnl_op_t op = va_arg (*args, virtchnl_op_t);
+ void *p = va_arg (*args, void *);
+ u32 indent = format_get_indent (s);
+
+ if (p == 0)
+ return format (s, "no data");
+
+ switch (op)
+ {
+ case VIRTCHNL_OP_VERSION:
+ {
+ virtchnl_version_info_t *r = p;
+ s = format (s, "version: %u.%u", r->major, r->minor);
+ }
+ break;
+ case VIRTCHNL_OP_GET_VF_RESOURCES:
+ {
+ u32 *r = p;
+ s = format (s, "%U", format_iavf_vf_cap_flags, *r);
+ }
+ break;
+ case VIRTCHNL_OP_ENABLE_QUEUES:
+ case VIRTCHNL_OP_DISABLE_QUEUES:
+ case VIRTCHNL_OP_GET_STATS:
+ {
+ virtchnl_queue_select_t *r = p;
+ s = format (s, "vsi %u rx 0x%x tx 0x%x", r->vsi_id, r->rx_queues,
+ r->tx_queues);
+ }
+ break;
+ case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
+ {
+ virtchnl_vsi_queue_config_info_t *r = p;
+ s = format (s, "vsi %u num_qp %u", r->vsi_id, r->num_queue_pairs);
+ for (int i = 0; i < r->num_queue_pairs; i++)
+ {
+ virtchnl_rxq_info_t *ri = &r->qpair[i].rxq;
+ virtchnl_txq_info_t *ti = &r->qpair[i].txq;
+
+ s = format (s, "\n%U qpair %u", format_white_space, indent + 2, i);
+ s = format (s,
+ "\n%U rx vsi %u queue %u dma_ring_addr 0x%lx "
+ "ring_len %u data_sz %u max_pkt_sz %u",
+ format_white_space, indent + 4, ri->vsi_id,
+ ri->queue_id, ri->dma_ring_addr, ri->ring_len,
+ ri->databuffer_size, ri->max_pkt_size);
+ s = format (
+ s, "\n%U tx vsi %u queue %u dma_ring_addr 0x%lx ring_len %u",
+ format_white_space, indent + 4, ti->vsi_id, ti->queue_id,
+ ti->dma_ring_addr, ti->ring_len);
+ }
+ }
+ break;
+ case VIRTCHNL_OP_CONFIG_IRQ_MAP:
+ {
+ virtchnl_irq_map_info_t *r = p;
+ s = format (s, "num_vectors %u", r->num_vectors);
+ for (int i = 0; i < r->num_vectors; i++)
+ {
+ virtchnl_vector_map_t *vecmap = r->vecmap + i;
+ s = format (s,
+ "\n%Uvsi %u vector_id %u rxq_map 0x%04x txq_map "
+ "0x%04x rxitr_idx %u txitr_idx %u",
+ format_white_space, indent + 2, vecmap->vsi_id,
+ vecmap->vector_id, vecmap->rxq_map, vecmap->txq_map,
+ vecmap->rxitr_idx, vecmap->txitr_idx);
+ }
+ }
+ break;
+ case VIRTCHNL_OP_CONFIG_RSS_LUT:
+ {
+ virtchnl_rss_lut_t *r = p;
+ s = format (s, "vsi %u entries %u lut", r->vsi_id, r->lut_entries);
+ for (int i = 0; i < r->lut_entries; i++)
+ s = format (s, " %u", r->lut[i]);
+ }
+ break;
+ case VIRTCHNL_OP_CONFIG_RSS_KEY:
+ {
+ virtchnl_rss_key_t *r = p;
+ s = format (s, "vsi %u len %u key ", r->vsi_id, r->key_len);
+ for (int i = 0; i < r->key_len; i++)
+ s = format (s, "%02x", r->key[i]);
+ }
+ break;
+ case VIRTCHNL_OP_ADD_ETH_ADDR:
+ case VIRTCHNL_OP_DEL_ETH_ADDR:
+ {
+ virtchnl_ether_addr_list_t *r = p;
+ s = format (s, "vsi %u num_elements %u elts: ", r->vsi_id,
+ r->num_elements);
+ for (int i = 0; i < r->num_elements; i++)
+ s = format (s, "%s%U%s%s", i ? ", " : "", format_ethernet_address,
+ r->list[i].addr, r->list[i].primary ? " primary" : "",
+ r->list[i].extra ? " extra" : "");
+ }
+ break;
+ case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
+ {
+ virtchnl_promisc_info_t *r = p;
+ s = format (
+ s, "promisc_info: vsi %u flags 0x%x (unicast %s multicast %s)",
+ r->vsi_id, r->flags,
+ r->flags & FLAG_VF_UNICAST_PROMISC ? "on" : "off",
+ r->flags & FLAG_VF_MULTICAST_PROMISC ? "on" : "off");
+ }
+ break;
+ case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2:
+ {
+ virtchnl_vlan_setting_t *r = p;
+ s = format (s,
+ "vport %u outer_ethertype_setting 0x%x [%U] "
+ "inner_ethertype_setting 0x%x [%U]",
+ r->vport_id, r->outer_ethertype_setting,
+ format_virtchnl_vlan_support_caps,
+ r->outer_ethertype_setting, r->inner_ethertype_setting,
+ format_virtchnl_vlan_support_caps,
+ r->inner_ethertype_setting);
+ }
+ break;
+ default:
+ s = format (s, "unknown op 0x%04x", op);
+ break;
+ };
+ return s;
+}
+static u8 *
+format_virtchnl_op_resp (u8 *s, va_list *args)
+{
+ virtchnl_op_t op = va_arg (*args, virtchnl_op_t);
+ void *p = va_arg (*args, void *);
+ u32 indent = format_get_indent (s);
+
+ if (p == 0)
+ return format (s, "no data");
+
+ switch (op)
+ {
+ case VIRTCHNL_OP_VERSION:
+ {
+ virtchnl_version_info_t *r = p;
+ s = format (s, "version %u.%u", r->major, r->minor);
+ }
+ break;
+ case VIRTCHNL_OP_GET_VF_RESOURCES:
+ {
+ virtchnl_vf_resource_t *r = p;
+ s =
+ format (s,
+ "vf_resource: num_vsis %u num_queue_pairs %u "
+ "max_vectors %u max_mtu %u rss_key_size %u rss_lut_size %u",
+ r->num_vsis, r->num_queue_pairs, r->max_vectors, r->max_mtu,
+ r->rss_key_size, r->rss_lut_size);
+ s = format (s, "\n%Uvf_cap_flags 0x%x (%U)", format_white_space,
+ indent + 2, r->vf_cap_flags, format_iavf_vf_cap_flags,
+ r->vf_cap_flags);
+ for (int i = 0; i < r->num_vsis; i++)
+ s = format (s,
+ "\n%Uvsi_resource[%u]: vsi %u num_qp %u vsi_type %u "
+ "qset_handle %u default_mac_addr %U",
+ format_white_space, indent + 2, i, r->vsi_res[i].vsi_id,
+ r->vsi_res[i].num_queue_pairs, r->vsi_res[i].vsi_type,
+ r->vsi_res[i].qset_handle, format_ethernet_address,
+ r->vsi_res[i].default_mac_addr);
+ }
+ break;
+ case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS:
+ {
+ virtchnl_vlan_caps_t *r = p;
+ s = format (s, "filtering: ethertype_init 0x%x max_filters %u",
+ r->filtering.ethertype_init, r->filtering.max_filters);
+ s = format (s, "\n%U outer [%U] inner [%U]", format_white_space,
+ indent, format_virtchnl_vlan_support_caps,
+ r->filtering.filtering_support.outer,
+ format_virtchnl_vlan_support_caps,
+ r->filtering.filtering_support.inner);
+ s = format (s, "\n%Uoffloads: ethertype_init 0x%x ethertype_match %u",
+ format_white_space, indent, r->offloads.ethertype_init,
+ r->offloads.ethertype_match);
+ s = format (s, "\n%U stripping outer [%U] stripping inner [%U]",
+ format_white_space, indent,
+ format_virtchnl_vlan_support_caps,
+ r->offloads.stripping_support.outer,
+ format_virtchnl_vlan_support_caps,
+ r->offloads.stripping_support.inner);
+ s = format (s, "\n%U insertion outer [%U] inserion inner [%U]",
+ format_white_space, indent,
+ format_virtchnl_vlan_support_caps,
+ r->offloads.insertion_support.outer,
+ format_virtchnl_vlan_support_caps,
+ r->offloads.insertion_support.inner);
+ }
+ break;
+ case VIRTCHNL_OP_GET_STATS:
+ {
+ virtchnl_eth_stats_t *r = p;
+ s = format (s,
+ "rx: bytes %lu, unicast %lu, multicast %lu, broadcast "
+ "%lu, discards %lu unknown_protocol %lu",
+ r->rx_bytes, r->rx_unicast, r->rx_multicast,
+ r->rx_broadcast, r->rx_discards, r->rx_unknown_protocol);
+ s = format (s, "\n%U", format_white_space, indent);
+ s = format (s,
+ "tx: bytes %lu, unicast %lu, multicast %lu, broadcast "
+ "%lu, discards %lu errors %lu",
+ r->tx_bytes, r->tx_unicast, r->tx_multicast,
+ r->tx_broadcast, r->tx_discards, r->tx_errors);
+ }
+ break;
+ default:
+ s = format (s, "unknown op 0x%04x", op);
+ break;
+ };
+ return s;
+}
+
+vnet_dev_rv_t
+iavf_virtchnl_req (vlib_main_t *vm, vnet_dev_t *dev, iavf_virtchnl_req_t *r)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ vnet_dev_rv_t rv;
+ iavf_aq_desc_t *d;
+ u8 *b;
+
+ log_debug (dev, "%U req:\n %U", format_virtchnl_op_name, r->op,
+ format_virtchnl_op_req, r->op, r->req);
+
+ iavf_aq_desc_t txd = {
+ .opcode = IIAVF_AQ_DESC_OP_SEND_TO_PF,
+ .v_opcode = r->op,
+ .flags = { .si = 1 },
+ };
+
+ rv = iavf_aq_atq_enq (vm, dev, &txd, r->req, r->req_sz, 0.5);
+
+ if (rv != VNET_DEV_OK)
+ return rv;
+
+ if (r->no_reply)
+ return VNET_DEV_OK;
+
+retry:
+ if (!iavf_aq_arq_next_acq (vm, dev, &d, &b, 1.0))
+ {
+ log_err (ad, "timeout waiting for virtchnl response");
+ return VNET_DEV_ERR_TIMEOUT;
+ }
+
+ if (d->v_opcode == VIRTCHNL_OP_EVENT)
+ {
+ if ((d->datalen != sizeof (virtchnl_pf_event_t)) ||
+ ((d->flags.buf) == 0))
+ {
+ log_err (dev, "event message error");
+ return VNET_DEV_ERR_BUG;
+ }
+
+ vec_add1 (ad->events, *(virtchnl_pf_event_t *) b);
+ iavf_aq_arq_next_rel (vm, dev);
+ goto retry;
+ }
+
+ if (d->v_opcode != r->op)
+ {
+ log_err (dev,
+ "unexpected response received [v_opcode = %u, expected %u, "
+ "v_retval %d]",
+ d->v_opcode, r->op, d->v_retval);
+ rv = VNET_DEV_ERR_BUG;
+ goto done;
+ }
+
+ r->status = d->v_retval;
+
+ if (d->v_retval)
+ {
+ log_err (dev, "error [v_opcode = %u, v_retval %d]", d->v_opcode,
+ d->v_retval);
+ rv = VNET_DEV_ERR_BUG;
+ goto done;
+ }
+
+ if (r->resp_sz && d->flags.buf)
+ clib_memcpy_fast (r->resp, b, r->resp_sz);
+
+done:
+ iavf_aq_arq_next_rel (vm, dev);
+ if (rv == VNET_DEV_OK)
+ log_debug (dev, "%U resp:\n %U", format_virtchnl_op_name, r->op,
+ format_virtchnl_op_resp, r->op, r->resp);
+ return rv;
+}
diff --git a/src/plugins/dev_iavf/virtchnl.h b/src/plugins/dev_iavf/virtchnl.h
new file mode 100644
index 00000000000..2099104c8ad
--- /dev/null
+++ b/src/plugins/dev_iavf/virtchnl.h
@@ -0,0 +1,570 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _IIAVF_VIRTCHNL_H_
+#define _IIAVF_VIRTCHNL_H_
+
+#define VIRTCHNL_VERSION_MAJOR 1
+#define VIRTCHNL_VERSION_MINOR 1
+
+#define foreach_iavf_promisc_flags \
+ _ (0, UNICAST_PROMISC, "unicast") \
+ _ (1, MULTICAST_PROMISC, "multicast")
+
+enum
+{
+#define _(a, b, c) FLAG_VF_##b = (1 << a),
+ foreach_iavf_promisc_flags
+#undef _
+};
+
+#define IAVF_VFINT_DYN_CTLN(x) (0x00003800 + (0x4 * x))
+#define IAVF_VFINT_ICR0 0x00004800
+#define IAVF_VFINT_ICR0_ENA1 0x00005000
+#define IAVF_VFINT_DYN_CTL0 0x00005C00
+#define IAVF_ARQBAH 0x00006000
+#define IAVF_ATQH 0x00006400
+#define IAVF_ATQLEN 0x00006800
+#define IAVF_ARQBAL 0x00006C00
+#define IAVF_ARQT 0x00007000
+#define IAVF_ARQH 0x00007400
+#define IAVF_ATQBAH 0x00007800
+#define IAVF_ATQBAL 0x00007C00
+#define IAVF_ARQLEN 0x00008000
+#define IAVF_ATQT 0x00008400
+#define IAVF_VFGEN_RSTAT 0x00008800
+#define IAVF_QTX_TAIL(q) (0x00000000 + (0x4 * q))
+#define IAVF_QRX_TAIL(q) (0x00002000 + (0x4 * q))
+
+#define foreach_virtchnl_op \
+ _ (0, UNKNOWN) \
+ _ (1, VERSION) \
+ _ (2, RESET_VF) \
+ _ (3, GET_VF_RESOURCES) \
+ _ (4, CONFIG_TX_QUEUE) \
+ _ (5, CONFIG_RX_QUEUE) \
+ _ (6, CONFIG_VSI_QUEUES) \
+ _ (7, CONFIG_IRQ_MAP) \
+ _ (8, ENABLE_QUEUES) \
+ _ (9, DISABLE_QUEUES) \
+ _ (10, ADD_ETH_ADDR) \
+ _ (11, DEL_ETH_ADDR) \
+ _ (12, ADD_VLAN) \
+ _ (13, DEL_VLAN) \
+ _ (14, CONFIG_PROMISCUOUS_MODE) \
+ _ (15, GET_STATS) \
+ _ (16, RSVD) \
+ _ (17, EVENT) \
+ _ (18, UNDEF_18) \
+ _ (19, UNDEF_19) \
+ _ (20, IWARP) \
+ _ (21, CONFIG_IWARP_IRQ_MAP) \
+ _ (22, RELEASE_IWARP_IRQ_MAP) \
+ _ (23, CONFIG_RSS_KEY) \
+ _ (24, CONFIG_RSS_LUT) \
+ _ (25, GET_RSS_HENA_CAPS) \
+ _ (26, SET_RSS_HENA) \
+ _ (27, ENABLE_VLAN_STRIPPING) \
+ _ (28, DISABLE_VLAN_STRIPPING) \
+ _ (29, REQUEST_QUEUES) \
+ _ (30, ENABLE_CHANNELS) \
+ _ (31, DISABLE_CHANNELS) \
+ _ (32, ADD_CLOUD_FILTER) \
+ _ (33, DEL_CLOUD_FILTER) \
+ _ (45, ADD_RSS_CFG) \
+ _ (46, DEL_RSS_CFG) \
+ _ (47, ADD_FDIR_FILTER) \
+ _ (48, DEL_FDIR_FILTER) \
+ _ (49, QUERY_FDIR_FILTER) \
+ _ (50, GET_MAX_RSS_QREGION) \
+ _ (51, GET_OFFLOAD_VLAN_V2_CAPS) \
+ _ (52, ADD_VLAN_V2) \
+ _ (53, DEL_VLAN_V2) \
+ _ (54, ENABLE_VLAN_STRIPPING_V2) \
+ _ (55, DISABLE_VLAN_STRIPPING_V2) \
+ _ (56, ENABLE_VLAN_INSERTION_V2) \
+ _ (57, DISABLE_VLAN_INSERTION_V2) \
+ _ (58, ENABLE_VLAN_FILTERING_V2) \
+ _ (59, DISABLE_VLAN_FILTERING_V2) \
+ _ (107, ENABLE_QUEUES_V2) \
+ _ (108, DISABLE_QUEUES_V2) \
+ _ (111, MAP_QUEUE_VECTOR)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL_OP_##n = v,
+ foreach_virtchnl_op
+#undef _
+ VIRTCHNL_N_OPS,
+} virtchnl_op_t;
+
+#define foreach_virtchnl_status \
+ _ (0, SUCCESS) \
+ _ (-5, ERR_PARAM) \
+ _ (-18, ERR_NO_MEMORY) \
+ _ (-38, ERR_OPCODE_MISMATCH) \
+ _ (-39, ERR_CQP_COMPL_ERROR) \
+ _ (-40, ERR_INVALID_VF_ID) \
+ _ (-53, ERR_ADMIN_QUEUE_ERROR) \
+ _ (-64, NOT_SUPPORTED)
+
+typedef enum
+{
+#define _(a, b) VIRTCHNL_STATUS_##b = a,
+ foreach_virtchnl_status
+#undef _
+} virtchnl_status_t;
+
+#define foreach_iavf_vf_cap_flag \
+ _ (0, OFFLOAD_L2, "l2") \
+ _ (1, OFFLOAD_IWARP, "iwarp") \
+ _ (2, OFFLOAD_RSVD, "rsvd") \
+ _ (3, OFFLOAD_RSS_AQ, "rss-aq") \
+ _ (4, OFFLOAD_RSS_REG, "rss-reg") \
+ _ (5, OFFLOAD_WB_ON_ITR, "wb-on-itr") \
+ _ (6, OFFLOAD_REQ_QUEUES, "req-queues") \
+ _ (7, CAP_ADV_LINK_SPEED, "adv-link-speed") \
+ _ (9, LARGE_NUM_QPAIRS, "large-num-qpairs") \
+ _ (15, OFFLOAD_VLAN_V2, "vlan-v2") \
+ _ (16, OFFLOAD_VLAN, "vlan") \
+ _ (17, OFFLOAD_RX_POLLING, "rx-polling") \
+ _ (18, OFFLOAD_RSS_PCTYPE_V2, "rss-pctype-v2") \
+ _ (19, OFFLOAD_RSS_PF, "rss-pf") \
+ _ (20, OFFLOAD_ENCAP, "encap") \
+ _ (21, OFFLOAD_ENCAP_CSUM, "encap-csum") \
+ _ (22, OFFLOAD_RX_ENCAP_CSUM, "rx-encap-csum") \
+ _ (23, OFFLOAD_ADQ, "offload-adq") \
+ _ (24, OFFLOAD_ADQ_v2, "offload-adq-v2") \
+ _ (25, OFFLOAD_USO, "offload-uso") \
+ _ (26, OFFLOAD_RX_FLEX_DESC, "offload-rx-flex-desc") \
+ _ (27, OFFLOAD_ADV_RSS_PF, "offload-adv-rss-pf") \
+ _ (28, OFFLOAD_FDIR_PF, "offload-fdir-pf") \
+ _ (30, CAP_DCF, "dcf")
+
+typedef enum
+{
+#define _(a, b, c) VIRTCHNL_VF_##b = (1 << a),
+ foreach_iavf_vf_cap_flag
+#undef _
+} iavf_vf_cap_flag_t;
+
+typedef enum
+{
+ VIRTCHNL_VSI_TYPE_INVALID = 0,
+ VIRTCHNL_VSI_SRIOV = 6,
+} virtchnl_vsi_type_t;
+
+typedef enum
+{
+ VIRTCHNL_VFR_INPROGRESS = 0,
+ VIRTCHNL_VFR_COMPLETED,
+ VIRTCHNL_VFR_VFACTIVE,
+} virtchnl_vfr_states_t;
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 num_queue_pairs;
+ virtchnl_vsi_type_t vsi_type;
+ u16 qset_handle;
+ u8 default_mac_addr[6];
+} virtchnl_vsi_resource_t;
+
+typedef struct
+{
+ u16 num_vsis;
+ u16 num_queue_pairs;
+ u16 max_vectors;
+ u16 max_mtu;
+ u32 vf_cap_flags;
+ u32 rss_key_size;
+ u32 rss_lut_size;
+ virtchnl_vsi_resource_t vsi_res[1];
+} virtchnl_vf_resource_t;
+
+#define foreach_virtchnl_event_code \
+ _ (0, UNKNOWN) \
+ _ (1, LINK_CHANGE) \
+ _ (2, RESET_IMPENDING) \
+ _ (3, PF_DRIVER_CLOSE)
+
+typedef enum
+{
+#define _(a, b) VIRTCHNL_EVENT_##b = (a),
+ foreach_virtchnl_event_code
+#undef _
+} virtchnl_event_codes_t;
+
+#define foreach_virtchnl_link_speed \
+ _ (0, 2_5GB, "2.5 Gbps") \
+ _ (1, 100MB, "100 Mbps") \
+ _ (2, 1GB, "1 Gbps") \
+ _ (3, 10GB, "10 Gbps") \
+ _ (4, 40GB, "40 Gbps") \
+ _ (5, 20GB, "20 Gbps") \
+ _ (6, 25GB, "25 Gbps") \
+ _ (7, 5GB, "5 Gbps")
+
+typedef enum
+{
+ VIRTCHNL_LINK_SPEED_UNKNOWN = 0,
+#define _(a, b, c) VIRTCHNL_LINK_SPEED_##b = (1 << a),
+ foreach_virtchnl_link_speed
+#undef _
+} virtchnl_link_speed_t;
+
+typedef struct
+{
+ virtchnl_event_codes_t event;
+ union
+ {
+ struct
+ {
+ virtchnl_link_speed_t link_speed;
+ u8 link_status;
+ } link_event;
+ struct
+ {
+ u32 link_speed;
+ u8 link_status;
+ } link_event_adv;
+ } event_data;
+ int severity;
+} virtchnl_pf_event_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_pf_event_t, 16);
+
+typedef struct
+{
+ u32 major;
+ u32 minor;
+} virtchnl_version_info_t;
+
+#define foreach_iavf_aq_desc_flag \
+ _ (1, dd) \
+ _ (1, cmp) \
+ _ (1, err) \
+ _ (1, vfe) \
+ _ (5, reserved) \
+ _ (1, lb) \
+ _ (1, rd) \
+ _ (1, vfc) \
+ _ (1, buf) \
+ _ (1, si) \
+ _ (1, ie) \
+ _ (1, fe)
+
+typedef union
+{
+ struct
+ {
+#define _(n, s) u16 s : n;
+ foreach_iavf_aq_desc_flag
+#undef _
+ };
+ u16 as_u16;
+} iavf_aq_desc_flags_t;
+
+STATIC_ASSERT_SIZEOF (iavf_aq_desc_flags_t, 2);
+
+typedef enum
+{
+ IIAVF_AQ_DESC_OP_QUEUE_SHUTDOWN = 0x0003,
+ IIAVF_AQ_DESC_OP_SEND_TO_PF = 0x0801,
+ IIAVF_AQ_DESC_OP_MESSAGE_FROM_PF = 0x0802,
+} __clib_packed iavf_aq_desc_op_t;
+
+#define foreach_iavf_aq_desc_retval \
+ _ (0, OK) \
+ _ (1, EPERM) \
+ _ (2, ENOENT) \
+ _ (3, ESRCH) \
+ _ (4, EINTR) \
+ _ (5, EIO) \
+ _ (6, ENXIO) \
+ _ (7, E2BIG) \
+ _ (8, EAGAIN) \
+ _ (9, ENOMEM) \
+ _ (10, EACCES) \
+ _ (11, EFAULT) \
+ _ (12, EBUSY) \
+ _ (13, EEXIST) \
+ _ (14, EINVAL) \
+ _ (15, ENOTTY) \
+ _ (16, ENOSPC) \
+ _ (17, ENOSYS) \
+ _ (18, ERANGE) \
+ _ (19, EFLUSHED) \
+ _ (20, BAD_ADDR) \
+ _ (21, EMODE) \
+ _ (22, EFBIG) \
+ _ (23, ESBCOMP) \
+ _ (24, ENOSEC) \
+ _ (25, EBADSIG) \
+ _ (26, ESVN) \
+ _ (27, EBADMAN) \
+ _ (28, EBADBUF) \
+ _ (29, EACCES_BMCU)
+
+typedef enum
+{
+#define _(a, b) IIAVF_AQ_DESC_RETVAL_##b = a,
+ foreach_iavf_aq_desc_retval
+#undef _
+} __clib_packed iavf_aq_desc_retval_t;
+
+typedef struct
+{
+ iavf_aq_desc_flags_t flags;
+ iavf_aq_desc_op_t opcode;
+ u16 datalen;
+ u16 retval;
+ union
+ {
+ u32 cookie_hi;
+ virtchnl_op_t v_opcode;
+ };
+ union
+ {
+ u32 cookie_lo;
+ virtchnl_status_t v_retval;
+ };
+ union
+ {
+ u8 driver_unloading : 1;
+ u32 param0;
+ };
+ u32 param1;
+ union
+ {
+ u32 param2;
+ u32 addr_hi;
+ };
+ union
+ {
+ u32 param3;
+ u32 addr_lo;
+ };
+} iavf_aq_desc_t;
+
+STATIC_ASSERT_SIZEOF (iavf_aq_desc_t, 32);
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 queue_id;
+ u16 ring_len;
+ u64 dma_ring_addr;
+ u64 dma_headwb_addr;
+} virtchnl_txq_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_txq_info_t, 24);
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 queue_id;
+ u32 ring_len;
+ u16 hdr_size;
+ u16 splithdr_enabled;
+ u32 databuffer_size;
+ u32 max_pkt_size;
+ u8 crc_disable;
+ u8 rxdid;
+ u8 pad[2];
+ u64 dma_ring_addr;
+ i32 rx_split_pos;
+ u32 pad2;
+} virtchnl_rxq_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_rxq_info_t, 40);
+
+typedef struct
+{
+ virtchnl_txq_info_t txq;
+ virtchnl_rxq_info_t rxq;
+} virtchnl_queue_pair_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_queue_pair_info_t, 64);
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 num_queue_pairs;
+ u32 pad;
+ virtchnl_queue_pair_info_t qpair[1];
+} virtchnl_vsi_queue_config_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_vsi_queue_config_info_t, 72);
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 pad;
+ u32 rx_queues;
+ u32 tx_queues;
+} virtchnl_queue_select_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_queue_select_t, 12);
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 vector_id;
+ u16 rxq_map;
+ u16 txq_map;
+ u16 rxitr_idx;
+ u16 txitr_idx;
+} virtchnl_vector_map_t;
+
+typedef struct
+{
+ u16 num_vectors;
+ virtchnl_vector_map_t vecmap[1];
+} virtchnl_irq_map_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_irq_map_info_t, 14);
+
+typedef struct
+{
+ u8 addr[6];
+ union
+ {
+ struct
+ {
+ u8 primary : 1;
+ u8 extra : 1;
+ };
+ u8 type;
+ };
+ u8 pad[1];
+} virtchnl_ether_addr_t;
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 num_elements;
+ virtchnl_ether_addr_t list[1];
+} virtchnl_ether_addr_list_t;
+
+#define foreach_virtchnl_eth_stats \
+ _ (rx_bytes) \
+ _ (rx_unicast) \
+ _ (rx_multicast) \
+ _ (rx_broadcast) \
+ _ (rx_discards) \
+ _ (rx_unknown_protocol) \
+ _ (tx_bytes) \
+ _ (tx_unicast) \
+ _ (tx_multicast) \
+ _ (tx_broadcast) \
+ _ (tx_discards) \
+ _ (tx_errors)
+
+typedef struct
+{
+#define _(s) u64 s;
+ foreach_virtchnl_eth_stats
+#undef _
+} virtchnl_eth_stats_t;
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 key_len;
+ u8 key[1];
+} virtchnl_rss_key_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_rss_key_t, 6);
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 lut_entries;
+ u8 lut[1];
+} virtchnl_rss_lut_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_rss_lut_t, 6);
+
+/* VIRTCHNL_OP_REQUEST_QUEUES */
+typedef struct
+{
+ u16 num_queue_pairs;
+} virtchnl_vf_res_request_t;
+
+typedef struct
+{
+ u32 outer;
+ u32 inner;
+} virtchnl_vlan_supported_caps_t;
+
+typedef struct
+{
+ virtchnl_vlan_supported_caps_t filtering_support;
+ u32 ethertype_init;
+ u16 max_filters;
+ u8 pad[2];
+} virtchnl_vlan_filtering_caps_t;
+
+typedef struct virtchnl_vlan_offload_caps
+{
+ virtchnl_vlan_supported_caps_t stripping_support;
+ virtchnl_vlan_supported_caps_t insertion_support;
+ u32 ethertype_init;
+ u8 ethertype_match;
+ u8 pad[3];
+} virtchnl_vlan_offload_caps_t;
+
+typedef struct
+{
+ virtchnl_vlan_filtering_caps_t filtering;
+ virtchnl_vlan_offload_caps_t offloads;
+} virtchnl_vlan_caps_t;
+
+#define foreach_virtchnl_vlan_support_bit \
+ _ (0, ETHERTYPE_8100, "dot1Q") \
+ _ (1, ETHERTYPE_88A8, "dot1AD") \
+ _ (2, ETHERTYPE_9100, "QinQ") \
+ _ (8, TAG_LOCATION_L2TAG1, "l2tag1") \
+ _ (9, TAG_LOCATION_L2TAG2, "l2tag2") \
+ _ (10, TAG_LOCATION_L2TAG2_2, "l2tag2_2") \
+ _ (24, PRIO, "prio") \
+ _ (28, FILTER_MASK, "filter-mask") \
+ _ (29, ETHERTYPE_AND, "etype-and") \
+ _ (30, ETHERTYPE_XOR, "etype-xor") \
+ _ (31, TOGGLE, "toggle")
+
+typedef enum
+{
+ VIRTCHNL_VLAN_UNSUPPORTED = 0,
+#define _(a, b, c) VIRTCHNL_VLAN_##b = (1 << a),
+ foreach_virtchnl_vlan_support_bit
+#undef _
+} virtchnl_vlan_support_caps_t;
+
+typedef struct
+{
+ u32 outer_ethertype_setting;
+ u32 inner_ethertype_setting;
+ u16 vport_id;
+ u8 pad[6];
+} virtchnl_vlan_setting_t;
+
+typedef struct
+{
+ u16 vsi_id;
+ union
+ {
+ struct
+ {
+ u16 unicast_promisc : 1;
+ u16 multicast_promisc : 1;
+ };
+ u16 flags;
+ };
+} virtchnl_promisc_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_promisc_info_t, 4);
+
+#endif /* IAVF_VIRTCHNL_H */
diff --git a/src/plugins/dev_iavf/virtchnl_funcs.h b/src/plugins/dev_iavf/virtchnl_funcs.h
new file mode 100644
index 00000000000..e7f3901e0ee
--- /dev/null
+++ b/src/plugins/dev_iavf/virtchnl_funcs.h
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _IIAVF_VIRTCHNL_FUNCS_H_
+#define _IIAVF_VIRTCHNL_FUNCS_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/dev/dev.h>
+#include <dev_iavf/iavf.h>
+
+#define VIRTCHNL_MSG_SZ(s, e, n) STRUCT_OFFSET_OF (s, e[(n) + 1])
+
+typedef struct
+{
+ virtchnl_op_t op;
+ u8 no_reply : 1;
+ u16 req_sz;
+ u16 resp_sz;
+ virtchnl_status_t status;
+ const void *req;
+ void *resp;
+} iavf_virtchnl_req_t;
+
+vnet_dev_rv_t iavf_virtchnl_req (vlib_main_t *, vnet_dev_t *,
+ iavf_virtchnl_req_t *);
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_version (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_version_info_t *req,
+ virtchnl_version_info_t *resp)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_VERSION,
+ .req = req,
+ .req_sz = sizeof (*req),
+ .resp = resp,
+ .resp_sz = sizeof (*resp),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_reset_vf (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_RESET_VF,
+ .no_reply = 1,
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_get_vf_resources (vlib_main_t *vm, vnet_dev_t *dev, const u32 *req,
+ virtchnl_vf_resource_t *resp)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_GET_VF_RESOURCES,
+ .req = req,
+ .req_sz = sizeof (*req),
+ .resp = resp,
+ .resp_sz = sizeof (*resp),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_enable_queues (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_queue_select_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_ENABLE_QUEUES,
+ .req = req,
+ .req_sz = sizeof (*req),
+ };
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_disable_queues (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_queue_select_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_DISABLE_QUEUES,
+ .req = req,
+ .req_sz = sizeof (*req),
+ };
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_config_vsi_queues (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_vsi_queue_config_info_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+ .req = req,
+ .req_sz = VIRTCHNL_MSG_SZ (virtchnl_vsi_queue_config_info_t, qpair,
+ req->num_queue_pairs),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_config_irq_map (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_irq_map_info_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_CONFIG_IRQ_MAP,
+ .req = req,
+ .req_sz =
+ VIRTCHNL_MSG_SZ (virtchnl_irq_map_info_t, vecmap, req->num_vectors),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_config_rss_lut (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_rss_lut_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_CONFIG_RSS_LUT,
+ .req = req,
+ .req_sz = VIRTCHNL_MSG_SZ (virtchnl_rss_lut_t, lut, req->lut_entries),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_config_rss_key (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_rss_key_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_CONFIG_RSS_KEY,
+ .req = req,
+ .req_sz = VIRTCHNL_MSG_SZ (virtchnl_rss_key_t, key, req->key_len),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_config_promisc_mode (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_promisc_info_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+ .req = req,
+ .req_sz = sizeof (*req),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_disable_vlan_stripping (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_add_eth_addr (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_ether_addr_list_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_ADD_ETH_ADDR,
+ .req = req,
+ .req_sz =
+ VIRTCHNL_MSG_SZ (virtchnl_ether_addr_list_t, list, req->num_elements),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_del_eth_addr (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_ether_addr_list_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_DEL_ETH_ADDR,
+ .req = req,
+ .req_sz =
+ VIRTCHNL_MSG_SZ (virtchnl_ether_addr_list_t, list, req->num_elements),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_get_offload_vlan_v2_caps (vlib_main_t *vm, vnet_dev_t *dev,
+ virtchnl_vlan_caps_t *resp)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS,
+ .resp = resp,
+ .resp_sz = sizeof (*resp),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_get_stats (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_queue_select_t *req,
+ virtchnl_eth_stats_t *resp)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_GET_STATS,
+ .req = req,
+ .req_sz = sizeof (*req),
+ .resp = resp,
+ .resp_sz = sizeof (*resp),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_disable_vlan_stripping_v2 (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_vlan_setting_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2,
+ .req = req,
+ .req_sz = sizeof (*req),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+#endif /* _IIAVF_VIRTCHNL_FUNCS_H_ */
diff --git a/src/plugins/dev_octeon/CMakeLists.txt b/src/plugins/dev_octeon/CMakeLists.txt
new file mode 100644
index 00000000000..e8abf1a3389
--- /dev/null
+++ b/src/plugins/dev_octeon/CMakeLists.txt
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright(c) 2022 Cisco Systems, Inc.
+
+if (NOT VPP_PLATFORM_NAME STREQUAL "octeon10")
+ return()
+endif()
+
+# Find OCTEON roc files
+vpp_find_path(OCTEON_ROC_DIR PATH_SUFFIXES octeon-roc NAMES platform.h)
+vpp_plugin_find_library(dev-octeon OCTEON_ROC_LIB "libocteon-roc.a")
+
+if (NOT OCTEON_ROC_DIR)
+ message("OCTEON ROC files not found - Marvell OCTEON device plugin disabled")
+ return()
+endif()
+
+if (NOT OCTEON_ROC_LIB)
+ message("OCTEON ROC library (libocteon-roc.a) not found - Marvell OCTEON device plugin disabled")
+ return ()
+endif()
+
+include_directories (${OCTEON_ROC_DIR}/)
+
+add_vpp_plugin(dev_octeon
+ SOURCES
+ init.c
+ format.c
+ port.c
+ queue.c
+ roc_helper.c
+ rx_node.c
+ tx_node.c
+ flow.c
+
+ MULTIARCH_SOURCES
+ rx_node.c
+ tx_node.c
+
+ LINK_LIBRARIES
+ ${OCTEON_ROC_LIB}
+)
+
diff --git a/src/plugins/dev_octeon/common.h b/src/plugins/dev_octeon/common.h
new file mode 100644
index 00000000000..a7a051526d2
--- /dev/null
+++ b/src/plugins/dev_octeon/common.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _OCT_COMMON_H_
+#define _OCT_COMMON_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <base/roc_api.h>
+
+static_always_inline u32
+oct_aura_free_all_buffers (vlib_main_t *vm, u64 aura_handle, u16 hdr_off)
+{
+ u32 n = 0;
+ u64 iova;
+
+ while ((iova = roc_npa_aura_op_alloc (aura_handle, 0)))
+ {
+ vlib_buffer_t *b = (void *) iova + hdr_off;
+ vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, b));
+ n++;
+ }
+ return n;
+}
+
+#endif /* _OCT_COMMON_H_ */
diff --git a/src/plugins/dev_octeon/flow.c b/src/plugins/dev_octeon/flow.c
new file mode 100644
index 00000000000..1c367a036ab
--- /dev/null
+++ b/src/plugins/dev_octeon/flow.c
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2024 Marvell.
+ * SPDX-License-Identifier: Apache-2.0
+ * https://spdx.org/licenses/Apache-2.0.html
+ */
+
+#include <dev_octeon/octeon.h>
+#include <base/roc_npc_priv.h>
+
+VLIB_REGISTER_LOG_CLASS (oct_log, static) = {
+ .class_name = "octeon",
+ .subclass_name = "flow",
+};
+
+#define FLOW_IS_ETHERNET_CLASS(f) (f->type == VNET_FLOW_TYPE_ETHERNET)
+
+#define FLOW_IS_IPV4_CLASS(f) \
+ ((f->type == VNET_FLOW_TYPE_IP4) || \
+ (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \
+ (f->type == VNET_FLOW_TYPE_IP4_VXLAN) || \
+ (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
+ (f->type == VNET_FLOW_TYPE_IP4_GTPU) || \
+ (f->type == VNET_FLOW_TYPE_IP4_L2TPV3OIP) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IPSEC_ESP) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IPSEC_AH))
+
+#define FLOW_IS_IPV6_CLASS(f) \
+ ((f->type == VNET_FLOW_TYPE_IP6) || \
+ (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED) || \
+ (f->type == VNET_FLOW_TYPE_IP6_VXLAN))
+
+#define FLOW_IS_L3_TYPE(f) \
+ ((f->type == VNET_FLOW_TYPE_IP4) || (f->type == VNET_FLOW_TYPE_IP6))
+
+#define FLOW_IS_L4_TYPE(f) \
+ ((f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \
+ (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED))
+
+#define FLOW_IS_L4_TUNNEL_TYPE(f) \
+ ((f->type == VNET_FLOW_TYPE_IP4_VXLAN) || \
+ (f->type == VNET_FLOW_TYPE_IP6_VXLAN) || \
+ (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
+ (f->type == VNET_FLOW_TYPE_IP4_GTPU))
+
+#define OCT_FLOW_UNSUPPORTED_ACTIONS(f) \
+ ((f->actions == VNET_FLOW_ACTION_BUFFER_ADVANCE) || \
+ (f->actions == VNET_FLOW_ACTION_REDIRECT_TO_NODE))
+
+/* Keep values in sync with vnet/flow.h */
+#define foreach_oct_flow_rss_types \
+ _ (1, FLOW_KEY_TYPE_IPV4 | FLOW_KEY_TYPE_TCP, "ipv4-tcp") \
+ _ (2, FLOW_KEY_TYPE_IPV4 | FLOW_KEY_TYPE_UDP, "ipv4-udp") \
+ _ (3, FLOW_KEY_TYPE_IPV4 | FLOW_KEY_TYPE_SCTP, "ipv4-sctp") \
+ _ (5, FLOW_KEY_TYPE_IPV4, "ipv4") \
+ _ (9, FLOW_KEY_TYPE_IPV6 | FLOW_KEY_TYPE_TCP, "ipv6-tcp") \
+ _ (10, FLOW_KEY_TYPE_IPV6 | FLOW_KEY_TYPE_UDP, "ipv6-udp") \
+ _ (11, FLOW_KEY_TYPE_IPV6 | FLOW_KEY_TYPE_SCTP, "ipv6-sctp") \
+ _ (13, FLOW_KEY_TYPE_IPV6_EXT, "ipv6-ex") \
+ _ (14, FLOW_KEY_TYPE_IPV6, "ipv6") \
+ _ (16, FLOW_KEY_TYPE_PORT, "port") \
+ _ (17, FLOW_KEY_TYPE_VXLAN, "vxlan") \
+ _ (18, FLOW_KEY_TYPE_GENEVE, "geneve") \
+ _ (19, FLOW_KEY_TYPE_NVGRE, "nvgre") \
+ _ (20, FLOW_KEY_TYPE_GTPU, "gtpu") \
+ _ (60, FLOW_KEY_TYPE_L4_DST, "l4-dst-only") \
+ _ (61, FLOW_KEY_TYPE_L4_SRC, "l4-src-only") \
+ _ (62, FLOW_KEY_TYPE_L3_DST, "l3-dst-only") \
+ _ (63, FLOW_KEY_TYPE_L3_SRC, "l3-src-only")
+
+typedef struct
+{
+ u16 src_port;
+ u16 dst_port;
+ u32 verification_tag;
+ u32 cksum;
+} sctp_header_t;
+
+typedef struct
+{
+ u8 ver_flags;
+ u8 type;
+ u16 length;
+ u32 teid;
+} gtpu_header_t;
+
+static void
+oct_flow_convert_rss_types (u64 *key, u64 rss_types)
+{
+#define _(a, b, c) \
+ if (rss_types & (1UL << a)) \
+ *key |= b;
+
+ foreach_oct_flow_rss_types
+#undef _
+
+ return;
+}
+
+vnet_dev_rv_t
+oct_flow_validate_params (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_type_t type, u32 flow_index,
+ uword *priv_data)
+{
+ vnet_flow_t *flow = vnet_get_flow (flow_index);
+ u32 last_queue;
+ u32 qid;
+
+ if (type == VNET_DEV_PORT_CFG_GET_RX_FLOW_COUNTER ||
+ type == VNET_DEV_PORT_CFG_RESET_RX_FLOW_COUNTER)
+ {
+ log_err (port->dev, "Unsupported request type");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+
+ if (OCT_FLOW_UNSUPPORTED_ACTIONS (flow))
+ {
+ log_err (port->dev, "Unsupported flow action");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+
+ if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE)
+ {
+ qid = flow->redirect_queue;
+ if (qid > port->intf.num_rx_queues - 1 || qid < 0)
+ {
+ log_err (port->dev,
+ "Given Q(%d) is invalid, supported range is %d-%d", qid, 0,
+ port->intf.num_rx_queues - 1);
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ }
+
+ if (flow->actions & VNET_FLOW_ACTION_RSS)
+ {
+ last_queue = flow->queue_index + flow->queue_num;
+ if (last_queue > port->intf.num_rx_queues - 1)
+ {
+ log_err (port->dev,
+ "Given Q range(%d-%d) is invalid, supported range is %d-%d",
+ flow->queue_index, flow->queue_index + flow->queue_num, 0,
+ port->intf.num_rx_queues - 1);
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ }
+ return VNET_DEV_OK;
+}
+
+static vnet_dev_rv_t
+oct_flow_rule_create (vnet_dev_port_t *port, struct roc_npc_action *actions,
+ struct roc_npc_item_info *item_info, vnet_flow_t *flow,
+ uword *private_data)
+{
+ oct_port_t *oct_port = vnet_dev_get_port_data (port);
+ struct roc_npc_attr attr = { .priority = 1, .ingress = 1 };
+ struct roc_npc_flow *npc_flow;
+ oct_flow_entry_t *flow_entry;
+ struct roc_npc *npc;
+ int rv = 0;
+
+ npc = &oct_port->npc;
+
+ npc_flow =
+ roc_npc_flow_create (npc, &attr, item_info, actions, npc->pf_func, &rv);
+ if (rv)
+ {
+ log_err (port->dev, "roc_npc_flow_create failed with '%s' error",
+ roc_error_msg_get (rv));
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ roc_npc_mcam_clear_counter (npc, npc_flow->ctr_id);
+
+ pool_get_zero (oct_port->flow_entries, flow_entry);
+ flow_entry->index = flow_entry - oct_port->flow_entries;
+ flow_entry->vnet_flow_index = flow->index;
+ flow_entry->npc_flow = npc_flow;
+
+ *private_data = flow_entry->index;
+
+ return VNET_DEV_OK;
+}
+
+static vnet_dev_rv_t
+oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow,
+ uword *private_data)
+{
+ struct roc_npc_item_info item_info[ROC_NPC_ITEM_TYPE_END] = {};
+ struct roc_npc_action actions[ROC_NPC_ITEM_TYPE_END] = {};
+ oct_port_t *oct_port = vnet_dev_get_port_data (port);
+ u16 l4_src_port = 0, l4_dst_port = 0;
+ u16 l4_src_mask = 0, l4_dst_mask = 0;
+ struct roc_npc_action_rss rss_conf = {};
+ struct roc_npc_action_queue conf = {};
+ struct roc_npc_action_mark mark = {};
+ struct roc_npc *npc = &oct_port->npc;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ int layer = 0, index = 0;
+ u16 *queues = NULL;
+ u64 flow_key = 0;
+ u8 proto = 0;
+ u16 action = 0;
+
+ if (FLOW_IS_ETHERNET_CLASS (flow))
+ {
+ ethernet_header_t eth_spec = { .type = clib_host_to_net_u16 (
+ flow->ethernet.eth_hdr.type) },
+ eth_mask = { .type = 0xFFFF };
+
+ item_info[layer].spec = (void *) &eth_spec;
+ item_info[layer].mask = (void *) &eth_mask;
+ item_info[layer].size = sizeof (ethernet_header_t);
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_ETH;
+ layer++;
+ }
+
+ else if (FLOW_IS_IPV4_CLASS (flow))
+ {
+ vnet_flow_ip4_t *ip4_hdr = &flow->ip4;
+ proto = ip4_hdr->protocol.prot;
+ ip4_header_t ip4_spec = { .src_address = ip4_hdr->src_addr.addr,
+ .dst_address = ip4_hdr->dst_addr.addr },
+ ip4_mask = { .src_address = ip4_hdr->src_addr.mask,
+ .dst_address = ip4_hdr->dst_addr.mask };
+
+ item_info[layer].spec = (void *) &ip4_spec;
+ item_info[layer].mask = (void *) &ip4_mask;
+ item_info[layer].size = sizeof (ip4_header_t);
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_IPV4;
+ layer++;
+
+ if (FLOW_IS_L4_TYPE (flow))
+ {
+ vnet_flow_ip4_n_tuple_t *ip4_tuple_hdr = &flow->ip4_n_tuple;
+
+ l4_src_port = clib_host_to_net_u16 (ip4_tuple_hdr->src_port.port);
+ l4_dst_port = clib_host_to_net_u16 (ip4_tuple_hdr->dst_port.port);
+ l4_src_mask = clib_host_to_net_u16 (ip4_tuple_hdr->src_port.mask);
+ l4_dst_mask = clib_host_to_net_u16 (ip4_tuple_hdr->dst_port.mask);
+ }
+ }
+ else if (FLOW_IS_IPV6_CLASS (flow))
+ {
+ vnet_flow_ip6_t *ip6_hdr = &flow->ip6;
+ proto = ip6_hdr->protocol.prot;
+ ip6_header_t ip6_spec = { .src_address = ip6_hdr->src_addr.addr,
+ .dst_address = ip6_hdr->dst_addr.addr },
+ ip6_mask = { .src_address = ip6_hdr->src_addr.mask,
+ .dst_address = ip6_hdr->dst_addr.mask };
+
+ item_info[layer].spec = (void *) &ip6_spec;
+ item_info[layer].mask = (void *) &ip6_mask;
+ item_info[layer].size = sizeof (ip6_header_t);
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_IPV6;
+ layer++;
+
+ if (FLOW_IS_L4_TYPE (flow))
+ {
+ vnet_flow_ip6_n_tuple_t *ip6_tuple_hdr = &flow->ip6_n_tuple;
+
+ l4_src_port = clib_host_to_net_u16 (ip6_tuple_hdr->src_port.port);
+ l4_dst_port = clib_host_to_net_u16 (ip6_tuple_hdr->dst_port.port);
+ l4_src_mask = clib_host_to_net_u16 (ip6_tuple_hdr->src_port.mask);
+ l4_dst_mask = clib_host_to_net_u16 (ip6_tuple_hdr->dst_port.mask);
+ }
+ }
+
+ if (!proto)
+ goto end_item_info;
+
+ switch (proto)
+ {
+ case IP_PROTOCOL_UDP:
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_UDP;
+
+ udp_header_t udp_spec = { .src_port = l4_src_port,
+ .dst_port = l4_dst_port },
+ udp_mask = { .src_port = l4_src_mask,
+ .dst_port = l4_dst_mask };
+
+ item_info[layer].spec = (void *) &udp_spec;
+ item_info[layer].mask = (void *) &udp_mask;
+ item_info[layer].size = sizeof (udp_header_t);
+ layer++;
+
+ if (FLOW_IS_L4_TUNNEL_TYPE (flow))
+ {
+ switch (flow->type)
+ {
+ case VNET_FLOW_TYPE_IP4_GTPU:
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_GTPU;
+ gtpu_header_t gtpu_spec = { .teid = clib_host_to_net_u32 (
+ flow->ip4_gtpu.teid) },
+ gtpu_mask = { .teid = 0XFFFFFFFF };
+
+ item_info[layer].spec = (void *) &gtpu_spec;
+ item_info[layer].mask = (void *) &gtpu_mask;
+ item_info[layer].size = sizeof (gtpu_header_t);
+ layer++;
+ break;
+
+ default:
+ log_err (port->dev, "Unsupported L4 tunnel type");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ } /* FLOW_IS_L4_TUNNEL_TYPE */
+ break;
+
+ case IP_PROTOCOL_TCP:
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_TCP;
+
+ tcp_header_t tcp_spec = { .src_port = l4_src_port,
+ .dst_port = l4_dst_port },
+ tcp_mask = { .src_port = l4_src_mask,
+ .dst_port = l4_dst_mask };
+
+ item_info[layer].spec = (void *) &tcp_spec;
+ item_info[layer].mask = (void *) &tcp_mask;
+ item_info[layer].size = sizeof (tcp_header_t);
+ layer++;
+ break;
+
+ case IP_PROTOCOL_SCTP:
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_SCTP;
+
+ sctp_header_t sctp_spec = { .src_port = l4_src_port,
+ .dst_port = l4_dst_port },
+ sctp_mask = { .src_port = l4_src_mask,
+ .dst_port = l4_dst_mask };
+
+ item_info[layer].spec = (void *) &sctp_spec;
+ item_info[layer].mask = (void *) &sctp_mask;
+ item_info[layer].size = sizeof (sctp_header_t);
+ layer++;
+ break;
+
+ case IP_PROTOCOL_IPSEC_ESP:
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_ESP;
+ esp_header_t esp_spec = { .spi = clib_host_to_net_u32 (
+ flow->ip4_ipsec_esp.spi) },
+ esp_mask = { .spi = 0xFFFFFFFF };
+
+ item_info[layer].spec = (void *) &esp_spec;
+ item_info[layer].mask = (void *) &esp_mask;
+ item_info[layer].size = sizeof (u32);
+ layer++;
+ break;
+
+ default:
+ log_err (port->dev, "Unsupported IP protocol '%U'", format_ip_protocol,
+ proto);
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+
+end_item_info:
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_END;
+
+ if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE)
+ {
+ conf.index = flow->redirect_queue;
+ actions[action].type = ROC_NPC_ACTION_TYPE_QUEUE;
+ actions[action].conf = &conf;
+ action++;
+ }
+
+ else if (flow->actions & VNET_FLOW_ACTION_DROP)
+ {
+ actions[action].type = ROC_NPC_ACTION_TYPE_DROP;
+ action++;
+ }
+
+ else if (flow->actions & VNET_FLOW_ACTION_RSS)
+ {
+ if (!flow->queue_num)
+ {
+ log_err (port->dev, "RSS action has no queues");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ queues = clib_mem_alloc (sizeof (u16) * port->intf.num_rx_queues);
+
+ for (index = 0; index < flow->queue_num; index++)
+ queues[index] = flow->queue_index++;
+
+ oct_flow_convert_rss_types (&flow_key, flow->rss_types);
+ if (!flow_key)
+ {
+ log_err (port->dev, "Invalid RSS hash function");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ npc->flowkey_cfg_state = flow_key;
+ rss_conf.queue_num = flow->queue_num;
+ rss_conf.queue = queues;
+
+ actions[action].type = ROC_NPC_ACTION_TYPE_RSS;
+ actions[action].conf = &rss_conf;
+ action++;
+ }
+
+ if (flow->actions & VNET_FLOW_ACTION_MARK)
+ {
+ if (flow->mark_flow_id == 0 ||
+ flow->mark_flow_id > (NPC_FLOW_FLAG_VAL - 2))
+ {
+ log_err (port->dev, "mark flow id must be > 0 and < 0xfffe");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ /* RoC library adds 1 to id, so subtract 1 */
+ mark.id = flow->mark_flow_id - 1;
+ actions[action].type = ROC_NPC_ACTION_TYPE_MARK;
+ actions[action].conf = &mark;
+ action++;
+ }
+
+ /* make count as default action */
+ actions[action].type = ROC_NPC_ACTION_TYPE_COUNT;
+ actions[action + 1].type = ROC_NPC_ACTION_TYPE_END;
+
+ rv = oct_flow_rule_create (port, actions, item_info, flow, private_data);
+
+ if (queues)
+ clib_mem_free (queues);
+
+ return rv;
+}
+
+static vnet_dev_rv_t
+oct_flow_del (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow,
+ uword *private_data)
+{
+ oct_port_t *oct_port = vnet_dev_get_port_data (port);
+ struct roc_npc *npc = &oct_port->npc;
+ struct roc_npc_flow *npc_flow;
+ oct_flow_entry_t *flow_entry;
+ int rv = 0, index;
+
+ index = *private_data;
+ flow_entry = pool_elt_at_index (oct_port->flow_entries, index);
+ npc_flow = flow_entry->npc_flow;
+ rv = roc_npc_flow_destroy (npc, npc_flow);
+ if (rv)
+ {
+ log_err (port->dev, "roc_npc_flow_destroy failed with '%s' error",
+ roc_error_msg_get (rv));
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ pool_put (oct_port->flow_entries, flow_entry);
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+oct_flow_query (vlib_main_t *vm, vnet_dev_port_t *port, u32 flow_index,
+ uword private_data, u64 *hits)
+{
+ oct_port_t *oct_port = vnet_dev_get_port_data (port);
+ struct roc_npc *npc = &oct_port->npc;
+ struct roc_npc_flow *npc_flow;
+ oct_flow_entry_t *flow_entry;
+ i32 flow_count;
+ int rv = 0;
+
+ flow_count = pool_elts (oct_port->flow_entries);
+ if (!flow_count)
+ {
+ log_err (port->dev, "Flow entry pool is empty");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+
+ flow_entry = pool_elt_at_index (oct_port->flow_entries, private_data);
+ npc_flow = flow_entry->npc_flow;
+ if (npc_flow->ctr_id == NPC_COUNTER_NONE)
+ {
+ log_err (port->dev, "Counters are not available for given flow id (%u)",
+ flow_index);
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+
+ rv = roc_npc_mcam_read_counter (npc, npc_flow->ctr_id, hits);
+ if (rv != 0)
+ {
+ log_err (port->dev, "Error reading flow counter for given flow id (%u)",
+ flow_index);
+ return VNET_DEV_ERR_INTERNAL;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+oct_flow_ops_fn (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_type_t type, u32 flow_index,
+ uword *priv_data)
+{
+ vnet_flow_t *flow = vnet_get_flow (flow_index);
+
+ if (type == VNET_DEV_PORT_CFG_ADD_RX_FLOW)
+ return oct_flow_add (vm, port, flow, priv_data);
+
+ if (type == VNET_DEV_PORT_CFG_DEL_RX_FLOW)
+ return oct_flow_del (vm, port, flow, priv_data);
+
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+}
diff --git a/src/plugins/dev_octeon/format.c b/src/plugins/dev_octeon/format.c
new file mode 100644
index 00000000000..e624b84f54e
--- /dev/null
+++ b/src/plugins/dev_octeon/format.c
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vlib/pci/pci.h"
+#include "vnet/error.h"
+#include "vppinfra/error.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <dev_octeon/octeon.h>
+
+u8 *
+format_oct_port_status (u8 *s, va_list *args)
+{
+ return s;
+}
+
+u8 *
+format_oct_nix_rx_cqe_desc (u8 *s, va_list *args)
+{
+ oct_nix_rx_cqe_desc_t *d = va_arg (*args, oct_nix_rx_cqe_desc_t *);
+ u32 indent = format_get_indent (s);
+ typeof (d->hdr) *h = &d->hdr;
+ typeof (d->parse.f) *p = &d->parse.f;
+ typeof (d->sg0) *sg0 = &d->sg0;
+ typeof (d->sg0) *sg1 = &d->sg1;
+
+ s = format (s, "hdr: cqe_type %u nude %u q %u tag 0x%x", h->cqe_type,
+ h->node, h->q, h->tag);
+ s = format (s, "\n%Uparse:", format_white_space, indent);
+#define _(n, f) s = format (s, " " #n " " f, p->n)
+ _ (chan, "%u");
+ _ (errcode, "%u");
+ _ (errlev, "%u");
+ _ (desc_sizem1, "%u");
+ _ (pkt_lenm1, "%u");
+ _ (pkind, "%u");
+ s = format (s, "\n%U ", format_white_space, indent);
+ _ (nix_idx, "%u");
+ _ (color, "%u");
+ _ (flow_key_alg, "%u");
+ _ (eoh_ptr, "%u");
+ _ (match_id, "0x%x");
+ s = format (s, "\n%U ", format_white_space, indent);
+ _ (wqe_aura, "0x%x");
+ _ (pb_aura, "0x%x");
+ _ (imm_copy, "%u");
+ _ (express, "%u");
+ _ (wqwd, "%u");
+ _ (l2m, "%u");
+ _ (l2b, "%u");
+ _ (l3m, "%u");
+ _ (l3b, "%u");
+#undef _
+ s = format (s, "\n%U ", format_white_space, indent);
+ s = format (s, "layer: a b c d e f g h");
+ s = format (s, "\n%U ", format_white_space, indent);
+ s = format (s, "type: %3u %3u %3u %3u %3u %3u %3u %3u", p->latype,
+ p->lbtype, p->lctype, p->ldtype, p->letype, p->lftype, p->lgtype,
+ p->lhtype);
+ s = format (s, "\n%U ", format_white_space, indent);
+ s = format (
+ s, "flags: 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x",
+ p->laflags, p->lbflags, p->lcflags, p->ldflags, p->leflags, p->lfflags,
+ p->lgflags, p->lhflags);
+ s = format (s, "\n%U ", format_white_space, indent);
+ s = format (s, "ptr: %3u %3u %3u %3u %3u %3u %3u %3u", p->laptr,
+ p->lbptr, p->lcptr, p->ldptr, p->leptr, p->lfptr, p->lgptr,
+ p->lhptr);
+
+ if (sg0->subdc != 0x4)
+ return format (s, "\n%Usg0: unexpected subdc %x", format_white_space,
+ indent, sg0->subdc);
+
+ s = format (s,
+ "\n%Usg0: segs %u seg1_sz %u seg2_sz %u seg3_sz %u seg1 "
+ "%p seg2 %p seg3 %p",
+ format_white_space, indent, sg0->segs, sg0->seg1_size,
+ sg0->seg2_size, sg0->seg3_size, d->segs0[0], d->segs0[1],
+ d->segs0[2]);
+
+ if (sg1->subdc != 0x4 && sg1->subdc != 0)
+ return format (s, "\n%Usg1: unexpected subdc %x", format_white_space,
+ indent, sg1->subdc);
+
+ if (sg1->subdc == 4)
+ s = format (s,
+ "\n%Usg1: segs %u seg1_sz %u seg2_sz %u seg3_sz %u seg1 "
+ "%p seg2 %p seg3 %p",
+ format_white_space, indent, sg1->segs, sg1->seg1_size,
+ sg1->seg2_size, sg1->seg3_size, d->segs1[0], d->segs1[1],
+ d->segs1[2]);
+
+ return s;
+}
+
+u8 *
+format_oct_rx_trace (u8 *s, va_list *args)
+{
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ vlib_node_t *node = va_arg (*args, vlib_node_t *);
+ oct_rx_trace_t *t = va_arg (*args, oct_rx_trace_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "octeon-rx: next-node %U sw_if_index %u",
+ format_vlib_next_node_name, vm, node->index, t->next_index,
+ t->sw_if_index);
+ s = format (s, "\n%U%U", format_white_space, indent + 2,
+ format_oct_nix_rx_cqe_desc, &t->desc);
+ return s;
+}
+
+u8 *
+format_oct_tx_trace (u8 *s, va_list *args)
+{
+ va_arg (*args, vlib_main_t *);
+ va_arg (*args, vlib_node_t *);
+ oct_tx_trace_t *t = va_arg (*args, oct_tx_trace_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "octeon-tx: sw_if_index %u", t->sw_if_index);
+ s = format (s, "\n%Uhdr[0]:", format_white_space, indent + 2);
+#define _(n, f) s = format (s, " " #n " " f, t->desc.hdr_w0.n)
+ _ (total, "%u");
+ _ (df, "%u");
+ _ (aura, "0x%x");
+ _ (sizem1, "%u");
+ _ (pnc, "%u");
+ _ (sq, "%u");
+#undef _
+ s = format (s, "\n%Uhdr[1]:", format_white_space, indent + 2);
+#define _(n, f) s = format (s, " " #n " " f, t->desc.hdr_w1.n)
+ _ (ol3ptr, "%u");
+ _ (ol4ptr, "%u");
+ _ (il3ptr, "%u");
+ _ (il4ptr, "%u");
+ _ (ol3type, "%u");
+ _ (ol4type, "%u");
+ _ (il3type, "%u");
+ _ (il4type, "%u");
+ _ (sqe_id, "%u");
+#undef _
+
+ foreach_int (j, 0, 4)
+ {
+ s = format (s, "\n%Usg[%u]:", format_white_space, indent + 2, j);
+#define _(n, f) s = format (s, " " #n " " f, t->desc.sg[j].n)
+ _ (subdc, "%u");
+ _ (segs, "%u");
+ _ (seg1_size, "%u");
+ _ (seg2_size, "%u");
+ _ (seg3_size, "%u");
+ _ (i1, "%u");
+ _ (i2, "%u");
+ _ (i3, "%u");
+ _ (ld_type, "%u");
+#undef _
+ for (int i = 1; i < 4; i++)
+ s = format (s, "\n%Usg[%u]: %p", format_white_space, indent + 2, i + j,
+ t->desc.sg[i + j]);
+ }
+
+ return s;
+}
+
+u8 *
+format_oct_port_flow (u8 *s, va_list *args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *);
+ u32 flow_index = va_arg (*args, u32);
+ uword private_data = va_arg (*args, uword);
+ u64 hits;
+
+ if (flow_index == ~0)
+ return s;
+
+ if (oct_flow_query (vm, port, flow_index, private_data, &hits) ==
+ VNET_DEV_OK)
+ s = format (s, "flow (%u) hit count: %lu", flow_index, hits);
+
+ return s;
+}
diff --git a/src/plugins/dev_octeon/hw_defs.h b/src/plugins/dev_octeon/hw_defs.h
new file mode 100644
index 00000000000..ab0fc7bd8da
--- /dev/null
+++ b/src/plugins/dev_octeon/hw_defs.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _OCT_HW_DEFS_H_
+#define _OCT_HW_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <base/roc_api.h>
+
+typedef union
+{
+ struct
+ {
+ u64 tail : 20;
+ u64 head : 20;
+ u64 resv40 : 6;
+ u64 cq_err : 1;
+ u64 resv47 : 16;
+ u64 op_err : 1;
+ };
+ u64 as_u64;
+} oct_nix_lf_cq_op_status_t;
+
+STATIC_ASSERT_SIZEOF (oct_nix_lf_cq_op_status_t, 8);
+
+typedef union
+{
+ struct
+ {
+ u64 aura : 20;
+ u64 _reseved20 : 12;
+ u64 count_eot : 1;
+ u64 _reserved33 : 30;
+ u64 fabs : 1;
+ };
+ u64 as_u64;
+} oct_npa_lf_aura_batch_free0_t;
+
+STATIC_ASSERT_SIZEOF (oct_npa_lf_aura_batch_free0_t, 8);
+
+typedef struct
+{
+ oct_npa_lf_aura_batch_free0_t w0;
+ u64 data[15];
+} oct_npa_lf_aura_batch_free_line_t;
+
+STATIC_ASSERT_SIZEOF (oct_npa_lf_aura_batch_free_line_t, 128);
+
+typedef union
+{
+ struct npa_batch_alloc_compare_s compare_s;
+ u64 as_u64;
+} oct_npa_batch_alloc_compare_t;
+
+typedef union
+{
+ struct
+ {
+ union nix_send_hdr_w0_u hdr_w0;
+ union nix_send_hdr_w1_u hdr_w1;
+ union nix_send_sg_s sg[8];
+ };
+ u128 as_u128[5];
+} oct_tx_desc_t;
+
+STATIC_ASSERT_SIZEOF (oct_tx_desc_t, 80);
+
+typedef union
+{
+ u128 dwords[8];
+ u64 words[16];
+} lmt_line_t;
+
+STATIC_ASSERT_SIZEOF (lmt_line_t, 1 << ROC_LMT_LINE_SIZE_LOG2);
+
+typedef union
+{
+ union nix_rx_parse_u f;
+ u64 w[7];
+} oct_nix_rx_parse_t;
+
+STATIC_ASSERT_SIZEOF (oct_nix_rx_parse_t, 56);
+
+typedef struct
+{
+ CLIB_ALIGN_MARK (desc, 128);
+ struct nix_cqe_hdr_s hdr;
+ oct_nix_rx_parse_t parse;
+ struct nix_rx_sg_s sg0;
+ void *segs0[3];
+ struct nix_rx_sg_s sg1;
+ void *segs1[3];
+} oct_nix_rx_cqe_desc_t;
+
+STATIC_ASSERT_SIZEOF (oct_nix_rx_cqe_desc_t, 128);
+
+#endif /* _OCT_HW_DEFS_H_ */
diff --git a/src/plugins/dev_octeon/init.c b/src/plugins/dev_octeon/init.c
new file mode 100644
index 00000000000..8c5ed95b062
--- /dev/null
+++ b/src/plugins/dev_octeon/init.c
@@ -0,0 +1,312 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <dev_octeon/octeon.h>
+
+#include <base/roc_api.h>
+#include <common.h>
+
+struct roc_model oct_model;
+
+VLIB_REGISTER_LOG_CLASS (oct_log, static) = {
+ .class_name = "octeon",
+ .subclass_name = "init",
+};
+
+#define _(f, n, s, d) \
+ { .name = #n, .desc = d, .severity = VL_COUNTER_SEVERITY_##s },
+
+vlib_error_desc_t oct_tx_node_counters[] = { foreach_oct_tx_node_counter };
+#undef _
+
+vnet_dev_node_t oct_rx_node = {
+ .format_trace = format_oct_rx_trace,
+};
+
+vnet_dev_node_t oct_tx_node = {
+ .format_trace = format_oct_tx_trace,
+ .error_counters = oct_tx_node_counters,
+ .n_error_counters = ARRAY_LEN (oct_tx_node_counters),
+};
+
+static struct
+{
+ u16 device_id;
+ oct_device_type_t type;
+ char *description;
+} oct_dev_types[] = {
+
+#define _(id, device_type, desc) \
+ { \
+ .device_id = (id), .type = OCT_DEVICE_TYPE_##device_type, \
+ .description = (desc) \
+ }
+
+ _ (0xa063, RVU_PF, "Marvell Octeon Resource Virtualization Unit PF"),
+ _ (0xa0f8, RVU_VF, "Marvell Octeon Resource Virtualization Unit VF"),
+ _ (0xa0f7, SDP_VF, "Marvell Octeon System DPI Packet Interface Unit VF"),
+ _ (0xa0f3, CPT_VF, "Marvell Octeon Cryptographic Accelerator Unit VF"),
+#undef _
+};
+
+static u8 *
+oct_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info)
+{
+ vnet_dev_bus_pci_device_info_t *di = dev_info;
+
+ if (di->vendor_id != 0x177d) /* Cavium */
+ return 0;
+
+ FOREACH_ARRAY_ELT (dt, oct_dev_types)
+ {
+ if (dt->device_id == di->device_id)
+ return format (0, "%s", dt->description);
+ }
+
+ return 0;
+}
+
+vnet_dev_rv_t
+cnx_return_roc_err (vnet_dev_t *dev, int rrv, char *fmt, ...)
+{
+ va_list va;
+ va_start (va, fmt);
+ u8 *s = va_format (0, fmt, &va);
+ va_end (va);
+
+ log_err (dev, "%v: %s [%d]", s, roc_error_msg_get (rrv), rrv);
+ vec_free (s);
+
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+}
+
+static vnet_dev_rv_t
+oct_alloc (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ cd->nix =
+ clib_mem_alloc_aligned (sizeof (struct roc_nix), CLIB_CACHE_LINE_BYTES);
+ return VNET_DEV_OK;
+}
+
+static vnet_dev_rv_t
+oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ u8 mac_addr[6];
+ int rrv;
+ oct_port_t oct_port = {};
+
+ *cd->nix = (struct roc_nix){
+ .reta_sz = ROC_NIX_RSS_RETA_SZ_256,
+ .max_sqb_count = 512,
+ .pci_dev = &cd->plt_pci_dev,
+ };
+
+ if ((rrv = roc_nix_dev_init (cd->nix)))
+ return cnx_return_roc_err (dev, rrv, "roc_nix_dev_init");
+
+ if ((rrv = roc_nix_npc_mac_addr_get (cd->nix, mac_addr)))
+ return cnx_return_roc_err (dev, rrv, "roc_nix_npc_mac_addr_get");
+
+ vnet_dev_port_add_args_t port_add_args = {
+ .port = {
+ .attr = {
+ .type = VNET_DEV_PORT_TYPE_ETHERNET,
+ .max_rx_queues = 64,
+ .max_tx_queues = 64,
+ .max_supported_rx_frame_size = roc_nix_max_pkt_len (cd->nix),
+ .caps = {
+ .rss = 1,
+ },
+ .rx_offloads = {
+ .ip4_cksum = 1,
+ },
+ },
+ .ops = {
+ .init = oct_port_init,
+ .deinit = oct_port_deinit,
+ .start = oct_port_start,
+ .stop = oct_port_stop,
+ .config_change = oct_port_cfg_change,
+ .config_change_validate = oct_port_cfg_change_validate,
+ .format_status = format_oct_port_status,
+ .format_flow = format_oct_port_flow,
+ },
+ .data_size = sizeof (oct_port_t),
+ .initial_data = &oct_port,
+ },
+ .rx_node = &oct_rx_node,
+ .tx_node = &oct_tx_node,
+ .rx_queue = {
+ .config = {
+ .data_size = sizeof (oct_rxq_t),
+ .default_size = 1024,
+ .multiplier = 32,
+ .min_size = 256,
+ .max_size = 16384,
+ },
+ .ops = {
+ .alloc = oct_rx_queue_alloc,
+ .free = oct_rx_queue_free,
+ .format_info = format_oct_rxq_info,
+ },
+ },
+ .tx_queue = {
+ .config = {
+ .data_size = sizeof (oct_txq_t),
+ .default_size = 1024,
+ .multiplier = 32,
+ .min_size = 256,
+ .max_size = 16384,
+ },
+ .ops = {
+ .alloc = oct_tx_queue_alloc,
+ .free = oct_tx_queue_free,
+ .format_info = format_oct_txq_info,
+ },
+ },
+ };
+
+ vnet_dev_set_hw_addr_eth_mac (&port_add_args.port.attr.hw_addr, mac_addr);
+
+ log_info (dev, "MAC address is %U", format_ethernet_address, mac_addr);
+
+ return vnet_dev_port_add (vm, dev, 0, &port_add_args);
+}
+
+static vnet_dev_rv_t
+oct_init_cpt (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ int rrv;
+ struct roc_cpt cpt = {
+ .pci_dev = &cd->plt_pci_dev,
+ };
+
+ if ((rrv = roc_cpt_dev_init (&cpt)))
+ return cnx_return_roc_err (dev, rrv, "roc_cpt_dev_init");
+ return VNET_DEV_OK;
+}
+
+static vnet_dev_rv_t
+oct_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ vlib_pci_config_hdr_t pci_hdr;
+ vnet_dev_rv_t rv;
+
+ rv = vnet_dev_pci_read_config_header (vm, dev, &pci_hdr);
+ if (rv != VNET_DEV_OK)
+ return rv;
+
+ if (pci_hdr.vendor_id != 0x177d)
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+
+ FOREACH_ARRAY_ELT (dt, oct_dev_types)
+ {
+ if (dt->device_id == pci_hdr.device_id)
+ cd->type = dt->type;
+ }
+
+ if (cd->type == OCT_DEVICE_TYPE_UNKNOWN)
+ return rv;
+
+ rv = VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+
+ cd->plt_pci_dev = (struct plt_pci_device){
+ .id.vendor_id = pci_hdr.vendor_id,
+ .id.device_id = pci_hdr.device_id,
+ .id.class_id = pci_hdr.class << 16 | pci_hdr.subclass,
+ .pci_handle = vnet_dev_get_pci_handle (dev),
+ };
+
+ foreach_int (i, 2, 4)
+ {
+ rv = vnet_dev_pci_map_region (vm, dev, i,
+ &cd->plt_pci_dev.mem_resource[i].addr);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+
+ strncpy ((char *) cd->plt_pci_dev.name, dev->device_id,
+ sizeof (cd->plt_pci_dev.name) - 1);
+
+ switch (cd->type)
+ {
+ case OCT_DEVICE_TYPE_RVU_PF:
+ case OCT_DEVICE_TYPE_RVU_VF:
+ case OCT_DEVICE_TYPE_SDP_VF:
+ return oct_init_nix (vm, dev);
+
+ case OCT_DEVICE_TYPE_CPT_VF:
+ return oct_init_cpt (vm, dev);
+
+ default:
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+ }
+
+ return 0;
+}
+
+static void
+oct_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ oct_device_t *cd = vnet_dev_get_data (dev);
+
+ if (cd->nix_initialized)
+ roc_nix_dev_fini (cd->nix);
+}
+
+static void
+oct_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ oct_device_t *cd = vnet_dev_get_data (dev);
+
+ if (cd->nix_initialized)
+ roc_nix_dev_fini (cd->nix);
+}
+
+VNET_DEV_REGISTER_DRIVER (octeon) = {
+ .name = "octeon",
+ .bus = "pci",
+ .device_data_sz = sizeof (oct_device_t),
+ .ops = {
+ .alloc = oct_alloc,
+ .init = oct_init,
+ .deinit = oct_deinit,
+ .free = oct_free,
+ .probe = oct_probe,
+ },
+};
+
+static clib_error_t *
+oct_plugin_init (vlib_main_t *vm)
+{
+ int rv;
+ extern oct_plt_init_param_t oct_plt_init_param;
+
+ rv = oct_plt_init (&oct_plt_init_param);
+ if (rv)
+ return clib_error_return (0, "oct_plt_init failed");
+
+ rv = roc_model_init (&oct_model);
+ if (rv)
+ return clib_error_return (0, "roc_model_init failed");
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (oct_plugin_init);
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "dev_octeon",
+};
diff --git a/src/plugins/dev_octeon/octeon.h b/src/plugins/dev_octeon/octeon.h
new file mode 100644
index 00000000000..92ec953ed23
--- /dev/null
+++ b/src/plugins/dev_octeon/octeon.h
@@ -0,0 +1,186 @@
+
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+#ifndef _OCTEON_H_
+#define _OCTEON_H_
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/flow/flow.h>
+#include <vnet/udp/udp.h>
+#include <vnet/ipsec/esp.h>
+#include <base/roc_api.h>
+#include <dev_octeon/hw_defs.h>
+
+#define OCT_BATCH_ALLOC_IOVA0_MASK 0xFFFFFFFFFFFFFF80
+
+typedef enum
+{
+ OCT_DEVICE_TYPE_UNKNOWN = 0,
+ OCT_DEVICE_TYPE_RVU_PF,
+ OCT_DEVICE_TYPE_RVU_VF,
+ OCT_DEVICE_TYPE_SDP_VF,
+ OCT_DEVICE_TYPE_CPT_VF,
+} __clib_packed oct_device_type_t;
+
+typedef struct
+{
+ oct_device_type_t type;
+ u8 nix_initialized : 1;
+ u8 status : 1;
+ u8 full_duplex : 1;
+ u32 speed;
+ struct plt_pci_device plt_pci_dev;
+ struct roc_cpt cpt;
+ struct roc_nix *nix;
+} oct_device_t;
+
+typedef struct
+{
+ /* vnet flow index */
+ u32 vnet_flow_index;
+
+ u32 index;
+ /* Internal flow object */
+ struct roc_npc_flow *npc_flow;
+} oct_flow_entry_t;
+
+typedef struct
+{
+ u8 lf_allocated : 1;
+ u8 tm_initialized : 1;
+ u8 npc_initialized : 1;
+ struct roc_npc npc;
+ oct_flow_entry_t *flow_entries;
+} oct_port_t;
+
+typedef struct
+{
+ u8 npa_pool_initialized : 1;
+ u8 cq_initialized : 1;
+ u8 rq_initialized : 1;
+ u16 hdr_off;
+ u32 n_enq;
+ u64 aura_handle;
+ u64 aura_batch_free_ioaddr;
+ u64 lmt_base_addr;
+ CLIB_CACHE_LINE_ALIGN_MARK (data0);
+ struct roc_nix_cq cq;
+ struct roc_nix_rq rq;
+} oct_rxq_t;
+
+typedef struct
+{
+ CLIB_ALIGN_MARK (cl, 128);
+ u64 iova[16];
+} oct_npa_batch_alloc_cl128_t;
+
+typedef union
+{
+ struct npa_batch_alloc_status_s status;
+ u64 as_u64;
+} oct_npa_batch_alloc_status_t;
+
+STATIC_ASSERT_SIZEOF (oct_npa_batch_alloc_cl128_t, 128);
+
+typedef struct
+{
+ u8 sq_initialized : 1;
+ u8 npa_pool_initialized : 1;
+ u16 hdr_off;
+ u32 n_enq;
+ u64 aura_handle;
+ u64 io_addr;
+ void *lmt_addr;
+
+ oct_npa_batch_alloc_cl128_t *ba_buffer;
+ u8 ba_first_cl;
+ u8 ba_num_cl;
+ CLIB_CACHE_LINE_ALIGN_MARK (data0);
+ struct roc_nix_sq sq;
+} oct_txq_t;
+
+/* format.c */
+format_function_t format_oct_port_status;
+format_function_t format_oct_rx_trace;
+format_function_t format_oct_tx_trace;
+format_function_t format_oct_port_flow;
+
+/* port.c */
+vnet_dev_rv_t oct_port_init (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t oct_port_start (vlib_main_t *, vnet_dev_port_t *);
+void oct_port_stop (vlib_main_t *, vnet_dev_port_t *);
+void oct_port_deinit (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t oct_port_cfg_change (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t oct_port_cfg_change_validate (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+
+/* queue.c */
+vnet_dev_rv_t oct_rx_queue_alloc (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t oct_tx_queue_alloc (vlib_main_t *, vnet_dev_tx_queue_t *);
+void oct_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *);
+void oct_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *);
+vnet_dev_rv_t oct_rxq_init (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t oct_txq_init (vlib_main_t *, vnet_dev_tx_queue_t *);
+void oct_rxq_deinit (vlib_main_t *, vnet_dev_rx_queue_t *);
+void oct_txq_deinit (vlib_main_t *, vnet_dev_tx_queue_t *);
+format_function_t format_oct_rxq_info;
+format_function_t format_oct_txq_info;
+
+/* flow.c */
+vnet_dev_rv_t oct_flow_ops_fn (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_type_t, u32, uword *);
+vnet_dev_rv_t oct_flow_validate_params (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_type_t, u32,
+ uword *);
+vnet_dev_rv_t oct_flow_query (vlib_main_t *, vnet_dev_port_t *, u32, uword,
+ u64 *);
+
+#define log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, oct_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_info(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_INFO, oct_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_notice(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_NOTICE, oct_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_warn(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_WARNING, oct_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_err(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, oct_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+
+#define foreach_oct_tx_node_counter \
+ _ (CHAIN_TOO_LONG, chain_too_long, ERROR, "drop due to buffer chain > 6") \
+ _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots") \
+ _ (AURA_BATCH_ALLOC_ISSUE_FAIL, aura_batch_alloc_issue_fail, ERROR, \
+ "aura batch alloc issue failed") \
+ _ (AURA_BATCH_ALLOC_NOT_READY, aura_batch_alloc_not_ready, ERROR, \
+ "aura batch alloc not ready")
+
+typedef enum
+{
+#define _(f, n, s, d) OCT_TX_NODE_CTR_##f,
+ foreach_oct_tx_node_counter
+#undef _
+} oct_tx_node_counter_t;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 next_index;
+ oct_nix_rx_cqe_desc_t desc;
+} oct_rx_trace_t;
+
+typedef struct
+{
+ u32 sw_if_index;
+ oct_tx_desc_t desc;
+} oct_tx_trace_t;
+#endif /* _OCTEON_H_ */
diff --git a/src/plugins/dev_octeon/port.c b/src/plugins/dev_octeon/port.c
new file mode 100644
index 00000000000..d5f78301adf
--- /dev/null
+++ b/src/plugins/dev_octeon/port.c
@@ -0,0 +1,493 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_octeon/octeon.h>
+#include <dev_octeon/common.h>
+#include <vnet/ethernet/ethernet.h>
+
+#define OCT_FLOW_PREALLOC_SIZE 1
+#define OCT_FLOW_MAX_PRIORITY 7
+#define OCT_ETH_LINK_SPEED_100G 100000 /**< 100 Gbps */
+
+VLIB_REGISTER_LOG_CLASS (oct_log, static) = {
+ .class_name = "octeon",
+ .subclass_name = "port",
+};
+
+static const u8 default_rss_key[] = {
+ 0xfe, 0xed, 0x0b, 0xad, 0xfe, 0xed, 0x0b, 0xad, 0xad, 0x0b, 0xed, 0xfe,
+ 0xad, 0x0b, 0xed, 0xfe, 0x13, 0x57, 0x9b, 0xef, 0x24, 0x68, 0xac, 0x0e,
+ 0x91, 0x72, 0x53, 0x11, 0x82, 0x64, 0x20, 0x44, 0x12, 0xef, 0x34, 0xcd,
+ 0x56, 0xbc, 0x78, 0x9a, 0x9a, 0x78, 0xbc, 0x56, 0xcd, 0x34, 0xef, 0x12
+};
+
+static const u32 default_rss_flowkey =
+ (FLOW_KEY_TYPE_IPV4 | FLOW_KEY_TYPE_IPV6 | FLOW_KEY_TYPE_TCP |
+ FLOW_KEY_TYPE_UDP | FLOW_KEY_TYPE_SCTP);
+
+static const u64 rxq_cfg =
+ ROC_NIX_LF_RX_CFG_DIS_APAD | ROC_NIX_LF_RX_CFG_IP6_UDP_OPT |
+ ROC_NIX_LF_RX_CFG_L2_LEN_ERR | ROC_NIX_LF_RX_CFG_DROP_RE |
+ ROC_NIX_LF_RX_CFG_CSUM_OL4 | ROC_NIX_LF_RX_CFG_CSUM_IL4 |
+ ROC_NIX_LF_RX_CFG_LEN_OL3 | ROC_NIX_LF_RX_CFG_LEN_OL4 |
+ ROC_NIX_LF_RX_CFG_LEN_IL3 | ROC_NIX_LF_RX_CFG_LEN_IL4;
+
+static vnet_dev_rv_t
+oct_roc_err (vnet_dev_t *dev, int rv, char *fmt, ...)
+{
+ u8 *s = 0;
+ va_list va;
+
+ va_start (va, fmt);
+ s = va_format (s, fmt, &va);
+ va_end (va);
+
+ log_err (dev, "%v - ROC error %s (%d)", s, roc_error_msg_get (rv), rv);
+
+ vec_free (s);
+ return VNET_DEV_ERR_INTERNAL;
+}
+
+vnet_dev_rv_t
+oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ oct_port_t *cp = vnet_dev_get_port_data (port);
+ struct roc_nix *nix = cd->nix;
+ vnet_dev_rv_t rv;
+ int rrv;
+
+ log_debug (dev, "port init: port %u", port->port_id);
+
+ if ((rrv = roc_nix_lf_alloc (nix, port->intf.num_rx_queues,
+ port->intf.num_tx_queues, rxq_cfg)))
+ {
+ oct_port_deinit (vm, port);
+ return oct_roc_err (
+ dev, rrv,
+ "roc_nix_lf_alloc(nb_rxq = %u, nb_txq = %d, rxq_cfg=0x%lx) failed",
+ port->intf.num_rx_queues, port->intf.num_tx_queues, rxq_cfg);
+ }
+ cp->lf_allocated = 1;
+
+ if ((rrv = roc_nix_tm_init (nix)))
+ {
+ oct_port_deinit (vm, port);
+ return oct_roc_err (dev, rrv, "roc_nix_tm_init() failed");
+ }
+ cp->tm_initialized = 1;
+
+ if ((rrv = roc_nix_tm_hierarchy_enable (nix, ROC_NIX_TM_DEFAULT,
+ /* xmit_enable */ 0)))
+ {
+ oct_port_deinit (vm, port);
+ return oct_roc_err (dev, rrv, "roc_nix_tm_hierarchy_enable() failed");
+ }
+
+ if ((rrv = roc_nix_rss_default_setup (nix, default_rss_flowkey)))
+ {
+ oct_port_deinit (vm, port);
+ return oct_roc_err (dev, rrv, "roc_nix_rss_default_setup() failed");
+ }
+
+ roc_nix_rss_key_set (nix, default_rss_key);
+
+ cp->npc.roc_nix = nix;
+ cp->npc.flow_prealloc_size = OCT_FLOW_PREALLOC_SIZE;
+ cp->npc.flow_max_priority = OCT_FLOW_MAX_PRIORITY;
+ if ((rrv = roc_npc_init (&cp->npc)))
+ {
+ oct_port_deinit (vm, port);
+ return oct_roc_err (dev, rrv, "roc_npc_init() failed");
+ }
+ cp->npc_initialized = 1;
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->enabled)
+ if ((rv = oct_rxq_init (vm, q)))
+ {
+ oct_port_deinit (vm, port);
+ return rv;
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if (q->enabled)
+ if ((rv = oct_txq_init (vm, q)))
+ {
+ oct_port_deinit (vm, port);
+ return rv;
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+oct_port_deinit (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ oct_port_t *cp = vnet_dev_get_port_data (port);
+ struct roc_nix *nix = cd->nix;
+ int rrv;
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ oct_rxq_deinit (vm, q);
+ foreach_vnet_dev_port_tx_queue (q, port)
+ oct_txq_deinit (vm, q);
+
+ if (cp->npc_initialized)
+ {
+ if ((rrv = roc_npc_fini (&cp->npc)))
+ oct_roc_err (dev, rrv, "roc_npc_fini() failed");
+ cp->npc_initialized = 0;
+ }
+
+ if (cp->tm_initialized)
+ {
+ roc_nix_tm_fini (nix);
+ cp->tm_initialized = 0;
+ }
+
+ if (cp->lf_allocated)
+ {
+ if ((rrv = roc_nix_lf_free (nix)))
+ oct_roc_err (dev, rrv, "roc_nix_lf_free() failed");
+ cp->lf_allocated = 0;
+ }
+}
+
+void
+oct_port_poll (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ struct roc_nix_link_info link_info = {};
+ vnet_dev_port_state_changes_t changes = {};
+ int rrv;
+
+ if (roc_nix_is_lbk (nix))
+ {
+ link_info.status = 1;
+ link_info.full_duplex = 1;
+ link_info.autoneg = 0;
+ link_info.speed = OCT_ETH_LINK_SPEED_100G;
+ }
+ else
+ {
+ rrv = roc_nix_mac_link_info_get (nix, &link_info);
+ if (rrv)
+ return;
+ }
+
+ if (cd->status != link_info.status)
+ {
+ changes.change.link_state = 1;
+ changes.link_state = link_info.status;
+ cd->status = link_info.status;
+ }
+
+ if (cd->full_duplex != link_info.full_duplex)
+ {
+ changes.change.link_duplex = 1;
+ changes.full_duplex = link_info.full_duplex;
+ cd->full_duplex = link_info.full_duplex;
+ }
+
+ if (cd->speed != link_info.speed)
+ {
+ changes.change.link_speed = 1;
+ changes.link_speed = link_info.speed;
+ cd->speed = link_info.speed;
+ }
+
+ if (changes.change.any == 0)
+ return;
+
+ log_debug (dev,
+ "status %u full_duplex %u speed %u port %u lmac_type_id %u "
+ "fec %u aautoneg %u",
+ link_info.status, link_info.full_duplex, link_info.speed,
+ link_info.port, link_info.lmac_type_id, link_info.fec,
+ link_info.autoneg);
+ vnet_dev_port_state_change (vm, port, changes);
+}
+
+vnet_dev_rv_t
+oct_rxq_start (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_t *dev = rxq->port->dev;
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ u32 buffer_indices[rxq->size], n_alloc;
+ u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq);
+ int rrv;
+
+ n_alloc = vlib_buffer_alloc_from_pool (vm, buffer_indices, rxq->size, bpi);
+
+ for (int i = 0; i < n_alloc; i++)
+ roc_npa_aura_op_free (
+ crq->aura_handle, 0,
+ pointer_to_uword (vlib_get_buffer (vm, buffer_indices[i])) -
+ crq->hdr_off);
+
+ crq->n_enq = n_alloc;
+
+ if (roc_npa_aura_op_available (crq->aura_handle) != rxq->size)
+ log_warn (rxq->port->dev, "rx queue %u aura not filled completelly",
+ rxq->queue_id);
+
+ if ((rrv = roc_nix_rq_ena_dis (&crq->rq, 1)))
+ return oct_roc_err (dev, rrv, "roc_nix_rq_ena_dis() failed");
+
+ return VNET_DEV_OK;
+}
+void
+oct_rxq_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_t *dev = rxq->port->dev;
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ int rrv;
+ u32 n;
+
+ if ((rrv = roc_nix_rq_ena_dis (&crq->rq, 0)))
+ oct_roc_err (dev, rrv, "roc_nix_rq_ena_dis() failed");
+
+ n = oct_aura_free_all_buffers (vm, crq->aura_handle, crq->hdr_off);
+
+ if (crq->n_enq - n > 0)
+ log_err (dev, "%u buffers leaked on rx queue %u stop", crq->n_enq - n,
+ rxq->queue_id);
+ else
+ log_debug (dev, "%u buffers freed from rx queue %u", n, rxq->queue_id);
+
+ crq->n_enq = 0;
+}
+
+void
+oct_txq_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_t *dev = txq->port->dev;
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ oct_npa_batch_alloc_cl128_t *cl;
+ u32 n, off = ctq->hdr_off;
+
+ n = oct_aura_free_all_buffers (vm, ctq->aura_handle, off);
+ ctq->n_enq -= n;
+
+ if (ctq->n_enq > 0 && ctq->ba_num_cl > 0)
+ for (n = ctq->ba_num_cl, cl = ctq->ba_buffer + ctq->ba_first_cl; n;
+ cl++, n--)
+ {
+ oct_npa_batch_alloc_status_t st;
+
+ st.as_u64 = __atomic_load_n (cl->iova, __ATOMIC_ACQUIRE);
+ if (st.status.ccode != ALLOC_CCODE_INVAL)
+ for (u32 i = 0; i < st.status.count; i++)
+ {
+ vlib_buffer_t *b = (vlib_buffer_t *) (cl->iova[i] + off);
+ vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, b));
+ ctq->n_enq--;
+ }
+ }
+
+ if (ctq->n_enq > 0)
+ log_err (dev, "%u buffers leaked on tx queue %u stop", ctq->n_enq,
+ txq->queue_id);
+ else
+ log_debug (dev, "%u buffers freed from tx queue %u", n, txq->queue_id);
+
+ ctq->n_enq = 0;
+}
+
+vnet_dev_rv_t
+oct_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ struct roc_nix_eeprom_info eeprom_info = {};
+ vnet_dev_rv_t rv;
+ int rrv;
+
+ log_debug (port->dev, "port start: port %u", port->port_id);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if ((rv = oct_rxq_start (vm, q)) != VNET_DEV_OK)
+ goto done;
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (q);
+ ctq->n_enq = 0;
+ }
+
+ if ((rrv = roc_nix_mac_mtu_set (nix, 9200)))
+ {
+ rv = oct_roc_err (dev, rrv, "roc_nix_mac_mtu_set() failed");
+ goto done;
+ }
+
+ if ((rrv = roc_nix_npc_rx_ena_dis (nix, true)))
+ {
+ rv = oct_roc_err (dev, rrv, "roc_nix_npc_rx_ena_dis() failed");
+ goto done;
+ }
+
+ vnet_dev_poll_port_add (vm, port, 0.5, oct_port_poll);
+
+ if (roc_nix_eeprom_info_get (nix, &eeprom_info) == 0)
+ {
+ log_debug (dev, "sff_id %u data %U", eeprom_info.sff_id, format_hexdump,
+ eeprom_info.buf, sizeof (eeprom_info.buf));
+ }
+done:
+ if (rv != VNET_DEV_OK)
+ oct_port_stop (vm, port);
+ return VNET_DEV_OK;
+}
+
+void
+oct_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ int rrv;
+
+ log_debug (port->dev, "port stop: port %u", port->port_id);
+
+ vnet_dev_poll_port_remove (vm, port, oct_port_poll);
+
+ rrv = roc_nix_npc_rx_ena_dis (nix, false);
+ if (rrv)
+ {
+ oct_roc_err (dev, rrv, "roc_nix_npc_rx_ena_dis() failed");
+ return;
+ }
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ oct_rxq_stop (vm, q);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ oct_txq_stop (vm, q);
+}
+
+vnet_dev_rv_t
+oct_validate_config_promisc_mode (vnet_dev_port_t *port, int enable)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+
+ if (roc_nix_is_vf_or_sdp (nix))
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+oct_op_config_promisc_mode (vlib_main_t *vm, vnet_dev_port_t *port, int enable)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ int rv;
+
+ rv = roc_nix_npc_promisc_ena_dis (nix, enable);
+ if (rv)
+ {
+ return oct_roc_err (dev, rv, "roc_nix_npc_promisc_ena_dis failed");
+ }
+
+ rv = roc_nix_mac_promisc_mode_enable (nix, enable);
+ if (rv)
+ {
+ return oct_roc_err (dev, rv,
+ "roc_nix_mac_promisc_mode_enable(%s) failed",
+ enable ? "true" : "false");
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+oct_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ if (port->started)
+ rv = VNET_DEV_ERR_PORT_STARTED;
+ break;
+
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ rv = oct_validate_config_promisc_mode (port, req->promisc);
+ break;
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_RX_FLOW:
+ case VNET_DEV_PORT_CFG_DEL_RX_FLOW:
+ case VNET_DEV_PORT_CFG_GET_RX_FLOW_COUNTER:
+ case VNET_DEV_PORT_CFG_RESET_RX_FLOW_COUNTER:
+ rv = oct_flow_validate_params (vm, port, req->type, req->flow_index,
+ req->private_data);
+ break;
+
+ default:
+ rv = VNET_DEV_ERR_NOT_SUPPORTED;
+ };
+
+ return rv;
+}
+
+vnet_dev_rv_t
+oct_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ rv = oct_op_config_promisc_mode (vm, port, req->promisc);
+ break;
+
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ break;
+
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ break;
+
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_RX_FLOW:
+ case VNET_DEV_PORT_CFG_DEL_RX_FLOW:
+ case VNET_DEV_PORT_CFG_GET_RX_FLOW_COUNTER:
+ case VNET_DEV_PORT_CFG_RESET_RX_FLOW_COUNTER:
+ rv = oct_flow_ops_fn (vm, port, req->type, req->flow_index,
+ req->private_data);
+
+ break;
+
+ default:
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ };
+
+ return rv;
+}
diff --git a/src/plugins/dev_octeon/queue.c b/src/plugins/dev_octeon/queue.c
new file mode 100644
index 00000000000..d6ae794fb8d
--- /dev/null
+++ b/src/plugins/dev_octeon/queue.c
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_octeon/octeon.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (oct_log, static) = {
+ .class_name = "octeon",
+ .subclass_name = "queue",
+};
+
+static vnet_dev_rv_t
+oct_roc_err (vnet_dev_t *dev, int rv, char *fmt, ...)
+{
+ u8 *s = 0;
+ va_list va;
+
+ va_start (va, fmt);
+ s = va_format (s, fmt, &va);
+ va_end (va);
+
+ log_err (dev, "%v - ROC error %s (%d)", s, roc_error_msg_get (rv), rv);
+
+ vec_free (s);
+ return VNET_DEV_ERR_INTERNAL;
+}
+
+vnet_dev_rv_t
+oct_rx_queue_alloc (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+
+ log_debug (dev, "rx_queue_alloc: queue %u alocated", rxq->queue_id);
+ return VNET_DEV_OK;
+}
+
+void
+oct_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+
+ log_debug (dev, "rx_queue_free: queue %u", rxq->queue_id);
+}
+
+vnet_dev_rv_t
+oct_tx_queue_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+ u32 sz = sizeof (void *) * ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS;
+ vnet_dev_rv_t rv;
+
+ log_debug (dev, "tx_queue_alloc: queue %u alocated", txq->queue_id);
+
+ rv = vnet_dev_dma_mem_alloc (vm, dev, sz, 128, (void **) &ctq->ba_buffer);
+
+ if (rv != VNET_DEV_OK)
+ return rv;
+
+ clib_memset_u64 (ctq->ba_buffer, OCT_BATCH_ALLOC_IOVA0_MASK,
+ ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS);
+
+ return rv;
+}
+
+void
+oct_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+
+ log_debug (dev, "tx_queue_free: queue %u", txq->queue_id);
+
+ vnet_dev_dma_mem_free (vm, dev, ctq->ba_buffer);
+}
+
+vnet_dev_rv_t
+oct_rxq_init (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ vnet_dev_t *dev = rxq->port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ vlib_buffer_pool_t *bp =
+ vlib_get_buffer_pool (vm, vnet_dev_get_rx_queue_buffer_pool_index (rxq));
+ struct roc_nix *nix = cd->nix;
+ int rrv;
+
+ struct npa_aura_s aura = {};
+ struct npa_pool_s npapool = { .nat_align = 1 };
+
+ if ((rrv = roc_npa_pool_create (&crq->aura_handle, bp->alloc_size, rxq->size,
+ &aura, &npapool, 0)))
+ {
+ oct_rxq_deinit (vm, rxq);
+ return oct_roc_err (dev, rrv, "roc_npa_pool_create() failed");
+ }
+
+ crq->npa_pool_initialized = 1;
+ log_notice (dev, "NPA pool created, aura_handle = 0x%lx", crq->aura_handle);
+
+ crq->cq = (struct roc_nix_cq){
+ .nb_desc = rxq->size,
+ .qid = rxq->queue_id,
+ };
+
+ if ((rrv = roc_nix_cq_init (nix, &crq->cq)))
+ {
+ oct_rxq_deinit (vm, rxq);
+ return oct_roc_err (dev, rrv,
+ "roc_nix_cq_init(qid = %u, nb_desc = %u) failed",
+ crq->cq.nb_desc, crq->cq.nb_desc);
+ }
+
+ crq->cq_initialized = 1;
+ log_debug (dev, "CQ %u initialised (qmask 0x%x wdata 0x%lx)", crq->cq.qid,
+ crq->cq.qmask, crq->cq.wdata);
+
+ crq->hdr_off = vm->buffer_main->ext_hdr_size;
+
+ crq->rq = (struct roc_nix_rq){
+ .qid = rxq->queue_id,
+ .cqid = crq->cq.qid,
+ .aura_handle = crq->aura_handle,
+ .first_skip = crq->hdr_off + sizeof (vlib_buffer_t),
+ .later_skip = crq->hdr_off + sizeof (vlib_buffer_t),
+ .lpb_size = bp->data_size + crq->hdr_off + sizeof (vlib_buffer_t),
+ .flow_tag_width = 32,
+ };
+
+ if ((rrv = roc_nix_rq_init (nix, &crq->rq, 1 /* disable */)))
+ {
+ oct_rxq_deinit (vm, rxq);
+ return oct_roc_err (dev, rrv, "roc_nix_rq_init(qid = %u) failed",
+ crq->rq.qid);
+ }
+
+ crq->rq_initialized = 1;
+ crq->lmt_base_addr = roc_idev_lmt_base_addr_get ();
+ crq->aura_batch_free_ioaddr =
+ (roc_npa_aura_handle_to_base (crq->aura_handle) +
+ NPA_LF_AURA_BATCH_FREE0) |
+ (0x7 << 4);
+
+ log_debug (dev, "RQ %u initialised", crq->cq.qid);
+
+ return VNET_DEV_OK;
+}
+
+void
+oct_rxq_deinit (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ vnet_dev_t *dev = rxq->port->dev;
+ int rrv;
+
+ if (crq->rq_initialized)
+ {
+ rrv = roc_nix_rq_fini (&crq->rq);
+ if (rrv)
+ oct_roc_err (dev, rrv, "roc_nix_rq_fini() failed");
+ crq->rq_initialized = 0;
+ }
+
+ if (crq->cq_initialized)
+ {
+ rrv = roc_nix_cq_fini (&crq->cq);
+ if (rrv)
+ oct_roc_err (dev, rrv, "roc_nix_cq_fini() failed");
+ crq->cq_initialized = 0;
+ }
+
+ if (crq->npa_pool_initialized)
+ {
+ rrv = roc_npa_pool_destroy (crq->aura_handle);
+ if (rrv)
+ oct_roc_err (dev, rrv, "roc_npa_pool_destroy() failed");
+ crq->npa_pool_initialized = 0;
+ }
+}
+
+vnet_dev_rv_t
+oct_txq_init (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_t *dev = txq->port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ struct npa_aura_s aura = {};
+ struct npa_pool_s npapool = { .nat_align = 1 };
+ int rrv;
+ vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, 0);
+
+ if ((rrv = roc_npa_pool_create (
+ &ctq->aura_handle, bp->alloc_size,
+ txq->size * 6 /* worst case - two SG with 3 segs each = 6 */, &aura,
+ &npapool, 0)))
+ {
+ oct_txq_deinit (vm, txq);
+ return oct_roc_err (dev, rrv, "roc_npa_pool_create() failed");
+ }
+
+ ctq->npa_pool_initialized = 1;
+ log_notice (dev, "NPA pool created, aura_handle = 0x%lx", ctq->aura_handle);
+
+ ctq->sq = (struct roc_nix_sq){
+ .nb_desc = txq->size,
+ .qid = txq->queue_id,
+ .max_sqe_sz = NIX_MAXSQESZ_W16,
+ };
+
+ if ((rrv = roc_nix_sq_init (nix, &ctq->sq)))
+ {
+ oct_txq_deinit (vm, txq);
+ return oct_roc_err (
+ dev, rrv,
+ "roc_nix_sq_init(qid = %u, nb_desc = %u, max_sqe_sz = %u) failed",
+ ctq->sq.nb_desc, ctq->sq.max_sqe_sz);
+ }
+
+ ctq->sq_initialized = 1;
+ log_debug (dev, "SQ initialised, qid %u, nb_desc %u, max_sqe_sz %u",
+ ctq->sq.qid, ctq->sq.nb_desc, ctq->sq.max_sqe_sz);
+
+ ctq->hdr_off = vm->buffer_main->ext_hdr_size;
+
+ if (ctq->sq.lmt_addr == 0)
+ ctq->sq.lmt_addr = (void *) nix->lmt_base;
+ ctq->io_addr = ctq->sq.io_addr & ~0x7fULL;
+ ctq->lmt_addr = ctq->sq.lmt_addr;
+
+ return VNET_DEV_OK;
+}
+
+void
+oct_txq_deinit (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_t *dev = txq->port->dev;
+ int rrv;
+
+ if (ctq->sq_initialized)
+ {
+ rrv = roc_nix_sq_fini (&ctq->sq);
+ if (rrv)
+ oct_roc_err (dev, rrv, "roc_nix_sq_fini() failed");
+ ctq->sq_initialized = 0;
+ }
+
+ if (ctq->npa_pool_initialized)
+ {
+ rrv = roc_npa_pool_destroy (ctq->aura_handle);
+ if (rrv)
+ oct_roc_err (dev, rrv, "roc_npa_pool_destroy() failed");
+ ctq->npa_pool_initialized = 0;
+ }
+}
+
+u8 *
+format_oct_rxq_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_rx_queue_t *rxq = va_arg (*args, vnet_dev_rx_queue_t *);
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ u32 indent = format_get_indent (s);
+
+ if (a->debug)
+ {
+ s = format (s, "n_enq %u cq_nb_desc %u", crq->n_enq, crq->cq.nb_desc);
+ s = format (s, "\n%Uaura: id 0x%x count %u limit %u avail %u",
+ format_white_space, indent,
+ roc_npa_aura_handle_to_aura (crq->aura_handle),
+ roc_npa_aura_op_cnt_get (crq->aura_handle),
+ roc_npa_aura_op_limit_get (crq->aura_handle),
+ roc_npa_aura_op_available (crq->aura_handle));
+ }
+ return s;
+}
+
+u8 *
+format_oct_txq_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_tx_queue_t *txq = va_arg (*args, vnet_dev_tx_queue_t *);
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ u32 indent = format_get_indent (s);
+
+ if (a->debug)
+ {
+ s = format (s, "n_enq %u sq_nb_desc %u io_addr %p lmt_addr %p",
+ ctq->n_enq, ctq->sq.nb_desc, ctq->io_addr, ctq->lmt_addr);
+ s = format (s, "\n%Uaura: id 0x%x count %u limit %u avail %u",
+ format_white_space, indent,
+ roc_npa_aura_handle_to_aura (ctq->aura_handle),
+ roc_npa_aura_op_cnt_get (ctq->aura_handle),
+ roc_npa_aura_op_limit_get (ctq->aura_handle),
+ roc_npa_aura_op_available (ctq->aura_handle));
+ }
+
+ return s;
+}
diff --git a/src/plugins/dev_octeon/roc_helper.c b/src/plugins/dev_octeon/roc_helper.c
new file mode 100644
index 00000000000..f10c2cb578b
--- /dev/null
+++ b/src/plugins/dev_octeon/roc_helper.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2023 Marvell.
+ * SPDX-License-Identifier: Apache-2.0
+ * https://spdx.org/licenses/Apache-2.0.html
+ */
+
+#include <vnet/vnet.h>
+#include <vlib/pci/pci.h>
+#include <vlib/linux/vfio.h>
+#include <base/roc_api.h>
+#include <common.h>
+
+static oct_plt_memzone_list_t memzone_list;
+
+static inline void
+oct_plt_log (oct_plt_log_level_t level, oct_plt_log_class_t cls, char *fmt,
+ ...)
+{
+ vlib_log ((vlib_log_level_t) level, cls, fmt);
+}
+
+static inline void
+oct_plt_spinlock_init (oct_plt_spinlock_t *p)
+{
+ clib_spinlock_init ((clib_spinlock_t *) p);
+}
+
+static void
+oct_plt_spinlock_lock (oct_plt_spinlock_t *p)
+{
+ clib_spinlock_lock ((clib_spinlock_t *) p);
+}
+
+static void
+oct_plt_spinlock_unlock (oct_plt_spinlock_t *p)
+{
+ clib_spinlock_unlock ((clib_spinlock_t *) p);
+}
+
+static int
+oct_plt_spinlock_trylock (oct_plt_spinlock_t *p)
+{
+ return clib_spinlock_trylock ((clib_spinlock_t *) p);
+}
+
+static u64
+oct_plt_get_thread_index (void)
+{
+ return __os_thread_index;
+}
+
+static void
+oct_drv_physmem_free (vlib_main_t *vm, void *mem)
+{
+ if (!mem)
+ {
+ clib_warning ("Invalid address %p", mem);
+ return;
+ }
+
+ vlib_physmem_free (vm, mem);
+}
+
+static void *
+oct_drv_physmem_alloc (vlib_main_t *vm, u32 size, u32 align)
+{
+ clib_error_t *error = NULL;
+ uword *mem = NULL;
+
+ if (align)
+ {
+ /* Force cache line alloc in case alignment is less than cache line */
+ align = align < CLIB_CACHE_LINE_BYTES ? CLIB_CACHE_LINE_BYTES : align;
+ mem = vlib_physmem_alloc_aligned_on_numa (vm, size, align, 0);
+ }
+ else
+ mem =
+ vlib_physmem_alloc_aligned_on_numa (vm, size, CLIB_CACHE_LINE_BYTES, 0);
+ if (!mem)
+ return NULL;
+
+ error = vfio_map_physmem_page (vm, mem);
+ if (error)
+ goto report_error;
+
+ clib_memset (mem, 0, size);
+ return mem;
+
+report_error:
+ clib_error_report (error);
+ oct_drv_physmem_free (vm, mem);
+
+ return NULL;
+}
+
+static void
+oct_plt_free (void *addr)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ oct_drv_physmem_free ((void *) vm, addr);
+}
+
+static void *
+oct_plt_zmalloc (u32 size, u32 align)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ return oct_drv_physmem_alloc (vm, size, align);
+}
+
+static oct_plt_memzone_t *
+memzone_get (u32 index)
+{
+ if (index == ((u32) ~0))
+ return 0;
+
+ return pool_elt_at_index (memzone_list.mem_pool, index);
+}
+
+static int
+oct_plt_memzone_free (const oct_plt_memzone_t *name)
+{
+ uword *p;
+ p = hash_get_mem (memzone_list.memzone_by_name, name);
+
+ if (p[0] == ((u32) ~0))
+ return -EINVAL;
+
+ hash_unset_mem (memzone_list.memzone_by_name, name);
+
+ pool_put_index (memzone_list.mem_pool, p[0]);
+
+ return 0;
+}
+
+static oct_plt_memzone_t *
+oct_plt_memzone_lookup (const char *name)
+{
+ uword *p;
+ p = hash_get_mem (memzone_list.memzone_by_name, name);
+ if (p)
+ return memzone_get (p[0]);
+
+ return 0;
+}
+
+static oct_plt_memzone_t *
+oct_plt_memzone_reserve_aligned (const char *name, u64 len, u8 socket,
+ u32 flags, u32 align)
+{
+ oct_plt_memzone_t *mem_pool;
+ void *p = NULL;
+
+ pool_get_zero (memzone_list.mem_pool, mem_pool);
+
+ p = oct_plt_zmalloc (len, align);
+ if (!p)
+ return NULL;
+
+ mem_pool->addr = p;
+ mem_pool->index = mem_pool - memzone_list.mem_pool;
+ hash_set_mem (memzone_list.memzone_by_name, name, mem_pool->index);
+
+ return mem_pool;
+}
+
+oct_plt_init_param_t oct_plt_init_param = {
+ .oct_plt_log_reg_class = vlib_log_register_class,
+ .oct_plt_log = oct_plt_log,
+ .oct_plt_free = oct_plt_free,
+ .oct_plt_zmalloc = oct_plt_zmalloc,
+ .oct_plt_memzone_free = oct_plt_memzone_free,
+ .oct_plt_memzone_lookup = oct_plt_memzone_lookup,
+ .oct_plt_memzone_reserve_aligned = oct_plt_memzone_reserve_aligned,
+ .oct_plt_spinlock_init = oct_plt_spinlock_init,
+ .oct_plt_spinlock_lock = oct_plt_spinlock_lock,
+ .oct_plt_spinlock_unlock = oct_plt_spinlock_unlock,
+ .oct_plt_spinlock_trylock = oct_plt_spinlock_trylock,
+ .oct_plt_get_thread_index = oct_plt_get_thread_index,
+};
diff --git a/src/plugins/dev_octeon/rx_node.c b/src/plugins/dev_octeon/rx_node.c
new file mode 100644
index 00000000000..997f1356199
--- /dev/null
+++ b/src/plugins/dev_octeon/rx_node.c
@@ -0,0 +1,392 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+#include <dev_octeon/octeon.h>
+#include <dev_octeon/hw_defs.h>
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ u32 hw_if_index;
+ u32 trace_count;
+ u32 n_traced;
+ oct_nix_rx_cqe_desc_t *next_desc;
+ u64 parse_w0_or;
+ u32 n_left_to_next;
+ u32 *to_next;
+ u32 n_rx_pkts;
+ u32 n_rx_bytes;
+ u32 n_segs;
+} oct_rx_node_ctx_t;
+
+static_always_inline vlib_buffer_t *
+oct_seg_to_bp (void *p)
+{
+ return (vlib_buffer_t *) p - 1;
+}
+
+static_always_inline void
+oct_rx_attach_tail (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, vlib_buffer_t *h,
+ oct_nix_rx_cqe_desc_t *d)
+{
+ u32 tail_sz = 0, n_tail_segs = 0;
+ vlib_buffer_t *p, *b;
+ u8 segs0 = d->sg0.segs, segs1 = 0;
+
+ if (segs0 < 2)
+ return;
+
+ b = oct_seg_to_bp (d->segs0[1]);
+ h->next_buffer = vlib_get_buffer_index (vm, b);
+ tail_sz += b->current_length = d->sg0.seg2_size;
+ n_tail_segs++;
+
+ if (segs0 == 2)
+ goto done;
+
+ p = b;
+ p->flags = VLIB_BUFFER_NEXT_PRESENT;
+ b = oct_seg_to_bp (d->segs0[2]);
+ p->next_buffer = vlib_get_buffer_index (vm, b);
+ tail_sz += b->current_length = d->sg0.seg3_size;
+ n_tail_segs++;
+
+ if (d->sg1.subdc != NIX_SUBDC_SG)
+ goto done;
+
+ segs1 = d->sg1.segs;
+ if (segs1 == 0)
+ goto done;
+
+ p = b;
+ p->flags = VLIB_BUFFER_NEXT_PRESENT;
+ b = oct_seg_to_bp (d->segs1[0]);
+ p->next_buffer = vlib_get_buffer_index (vm, b);
+ tail_sz += b->current_length = d->sg1.seg1_size;
+ n_tail_segs++;
+
+ if (segs1 == 1)
+ goto done;
+
+ p = b;
+ p->flags = VLIB_BUFFER_NEXT_PRESENT;
+ b = oct_seg_to_bp (d->segs1[1]);
+ p->next_buffer = vlib_get_buffer_index (vm, b);
+ tail_sz += b->current_length = d->sg1.seg2_size;
+ n_tail_segs++;
+
+ if (segs1 == 2)
+ goto done;
+
+ p = b;
+ p->flags = VLIB_BUFFER_NEXT_PRESENT;
+ b = oct_seg_to_bp (d->segs1[2]);
+ p->next_buffer = vlib_get_buffer_index (vm, b);
+ tail_sz += b->current_length = d->sg1.seg3_size;
+ n_tail_segs++;
+
+done:
+ b->flags = 0;
+ h->total_length_not_including_first_buffer = tail_sz;
+ h->flags |= VLIB_BUFFER_NEXT_PRESENT | VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ ctx->n_rx_bytes += tail_sz;
+ ctx->n_segs += n_tail_segs;
+}
+
+static_always_inline u32
+oct_rx_batch (vlib_main_t *vm, oct_rx_node_ctx_t *ctx,
+ vnet_dev_rx_queue_t *rxq, u32 n)
+{
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ vlib_buffer_template_t bt = rxq->buffer_template;
+ u32 n_left;
+ oct_nix_rx_cqe_desc_t *d = ctx->next_desc;
+ vlib_buffer_t *b[4];
+
+ for (n_left = n; n_left >= 8; d += 4, n_left -= 4, ctx->to_next += 4)
+ {
+ u32 segs = 0;
+ clib_prefetch_store (oct_seg_to_bp (d[4].segs0[0]));
+ clib_prefetch_store (oct_seg_to_bp (d[5].segs0[0]));
+ b[0] = oct_seg_to_bp (d[0].segs0[0]);
+ clib_prefetch_store (oct_seg_to_bp (d[6].segs0[0]));
+ b[1] = oct_seg_to_bp (d[1].segs0[0]);
+ clib_prefetch_store (oct_seg_to_bp (d[7].segs0[0]));
+ b[2] = oct_seg_to_bp (d[2].segs0[0]);
+ b[3] = oct_seg_to_bp (d[3].segs0[0]);
+ ctx->to_next[0] = vlib_get_buffer_index (vm, b[0]);
+ ctx->to_next[1] = vlib_get_buffer_index (vm, b[1]);
+ ctx->to_next[2] = vlib_get_buffer_index (vm, b[2]);
+ ctx->to_next[3] = vlib_get_buffer_index (vm, b[3]);
+ b[0]->template = bt;
+ b[1]->template = bt;
+ b[2]->template = bt;
+ b[3]->template = bt;
+ ctx->n_rx_bytes += b[0]->current_length = d[0].sg0.seg1_size;
+ ctx->n_rx_bytes += b[1]->current_length = d[1].sg0.seg1_size;
+ ctx->n_rx_bytes += b[2]->current_length = d[2].sg0.seg1_size;
+ ctx->n_rx_bytes += b[3]->current_length = d[3].sg0.seg1_size;
+ b[0]->flow_id = d[0].parse.w[3] >> 48;
+ b[1]->flow_id = d[1].parse.w[3] >> 48;
+ b[2]->flow_id = d[2].parse.w[3] >> 48;
+ b[3]->flow_id = d[3].parse.w[3] >> 48;
+ ctx->n_segs += 4;
+ segs = d[0].sg0.segs + d[1].sg0.segs + d[2].sg0.segs + d[3].sg0.segs;
+
+ if (PREDICT_FALSE (segs > 4))
+ {
+ oct_rx_attach_tail (vm, ctx, b[0], d + 0);
+ oct_rx_attach_tail (vm, ctx, b[1], d + 1);
+ oct_rx_attach_tail (vm, ctx, b[2], d + 2);
+ oct_rx_attach_tail (vm, ctx, b[3], d + 3);
+ }
+ }
+
+ for (; n_left; d += 1, n_left -= 1, ctx->to_next += 1)
+ {
+ b[0] = (vlib_buffer_t *) d->segs0[0] - 1;
+ ctx->to_next[0] = vlib_get_buffer_index (vm, b[0]);
+ b[0]->template = bt;
+ ctx->n_rx_bytes += b[0]->current_length = d[0].sg0.seg1_size;
+ b[0]->flow_id = d[0].parse.w[3] >> 48;
+ ctx->n_segs += 1;
+ if (d[0].sg0.segs > 1)
+ oct_rx_attach_tail (vm, ctx, b[0], d + 0);
+ }
+
+ plt_write64 ((crq->cq.wdata | n), crq->cq.door);
+ ctx->n_rx_pkts += n;
+ ctx->n_left_to_next -= n;
+ return n;
+}
+
+static_always_inline void
+oct_rxq_refill_batch (vlib_main_t *vm, u64 lmt_id, u64 addr,
+ oct_npa_lf_aura_batch_free_line_t *lines, u32 *bi,
+ oct_npa_lf_aura_batch_free0_t w0, u64 n_lines)
+{
+ u64 data;
+
+ for (u32 i = 0; i < n_lines; i++, bi += 15)
+ {
+ lines[i].w0 = w0;
+ vlib_get_buffers (vm, bi, (vlib_buffer_t **) lines[i].data, 15);
+ }
+
+ data = lmt_id | ((n_lines - 1) << 12) | ((1ULL << (n_lines * 3)) - 1) << 19;
+ roc_lmt_submit_steorl (data, addr);
+
+ /* Data Store Memory Barrier - outer shareable domain */
+ asm volatile("dmb oshst" ::: "memory");
+}
+
+static_always_inline u32
+oct_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, u16 n_refill)
+{
+ const u32 batch_max_lines = 16;
+ const u32 bufs_per_line = 15;
+ const u32 batch_max_bufs = 15 * 16;
+
+ u32 batch_bufs, n_lines, n_alloc;
+ u32 buffer_indices[batch_max_bufs];
+ u64 lmt_addr, lmt_id, addr, n_enq = 0;
+ u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq);
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ oct_npa_lf_aura_batch_free_line_t *lines;
+
+ if (n_refill < bufs_per_line)
+ return 0;
+
+ n_lines = n_refill / bufs_per_line;
+
+ addr = crq->aura_batch_free_ioaddr;
+ lmt_addr = crq->lmt_base_addr;
+ lmt_id = vm->thread_index << ROC_LMT_LINES_PER_CORE_LOG2;
+ lmt_addr += lmt_id << ROC_LMT_LINE_SIZE_LOG2;
+ lines = (oct_npa_lf_aura_batch_free_line_t *) lmt_addr;
+
+ oct_npa_lf_aura_batch_free0_t w0 = {
+ .aura = roc_npa_aura_handle_to_aura (crq->aura_handle),
+ .count_eot = 1,
+ };
+
+ while (n_lines >= batch_max_lines)
+ {
+ n_alloc =
+ vlib_buffer_alloc_from_pool (vm, buffer_indices, batch_max_bufs, bpi);
+ if (PREDICT_FALSE (n_alloc < batch_max_bufs))
+ goto alloc_fail;
+ oct_rxq_refill_batch (vm, lmt_id, addr, lines, buffer_indices, w0,
+ batch_max_lines);
+ n_lines -= batch_max_lines;
+ n_enq += batch_max_bufs;
+ }
+
+ if (n_lines == 0)
+ return n_enq;
+
+ batch_bufs = n_lines * bufs_per_line;
+ n_alloc = vlib_buffer_alloc_from_pool (vm, buffer_indices, batch_bufs, bpi);
+
+ if (PREDICT_FALSE (n_alloc < batch_bufs))
+ {
+ alloc_fail:
+ if (n_alloc >= bufs_per_line)
+ {
+ u32 n_unalloc;
+ n_lines = n_alloc / bufs_per_line;
+ batch_bufs = n_lines * bufs_per_line;
+ n_unalloc = n_alloc - batch_bufs;
+
+ if (n_unalloc)
+ vlib_buffer_unalloc_to_pool (vm, buffer_indices + batch_bufs,
+ n_unalloc, bpi);
+ }
+ else
+ {
+ if (n_alloc)
+ vlib_buffer_unalloc_to_pool (vm, buffer_indices, n_alloc, bpi);
+ return n_enq;
+ }
+ }
+
+ oct_rxq_refill_batch (vm, lmt_id, addr, lines, buffer_indices, w0, n_lines);
+ n_enq += batch_bufs;
+
+ return n_enq;
+}
+
+static_always_inline void
+oct_rx_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
+ oct_rx_node_ctx_t *ctx, oct_nix_rx_cqe_desc_t *d, u32 n_desc)
+{
+ u32 i = 0;
+ if (PREDICT_TRUE (ctx->trace_count == 0))
+ return;
+
+ while (ctx->n_traced < ctx->trace_count && i < n_desc)
+ {
+ vlib_buffer_t *b = (vlib_buffer_t *) d[i].segs0[0] - 1;
+
+ if (PREDICT_TRUE (vlib_trace_buffer (vm, node, ctx->next_index, b,
+ /* follow_chain */ 0)))
+ {
+ oct_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr));
+ tr->next_index = ctx->next_index;
+ tr->sw_if_index = ctx->sw_if_index;
+ tr->desc = d[i];
+ ctx->n_traced++;
+ }
+ i++;
+ }
+}
+
+static_always_inline uword
+oct_rx_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, vnet_dev_port_t *port,
+ vnet_dev_rx_queue_t *rxq, int with_flows)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 thr_idx = vlib_get_thread_index ();
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ u32 n_desc, head, n, n_enq;
+ u32 cq_size = crq->cq.nb_desc;
+ u32 cq_mask = crq->cq.qmask;
+ oct_nix_rx_cqe_desc_t *descs = crq->cq.desc_base;
+ oct_nix_lf_cq_op_status_t status;
+ oct_rx_node_ctx_t _ctx = {
+ .next_index = rxq->next_index,
+ .sw_if_index = port->intf.sw_if_index,
+ .hw_if_index = port->intf.hw_if_index,
+ }, *ctx = &_ctx;
+
+ /* get head and tail from NIX_LF_CQ_OP_STATUS */
+ status.as_u64 = roc_atomic64_add_sync (crq->cq.wdata, crq->cq.status);
+ if (status.cq_err || status.op_err)
+ return 0;
+
+ head = status.head;
+ n_desc = (status.tail - head) & cq_mask;
+
+ if (n_desc == 0)
+ goto refill;
+
+ vlib_get_new_next_frame (vm, node, ctx->next_index, ctx->to_next,
+ ctx->n_left_to_next);
+
+ ctx->trace_count = vlib_get_trace_count (vm, node);
+
+ while (1)
+ {
+ ctx->next_desc = descs + head;
+ n = clib_min (cq_size - head, clib_min (n_desc, ctx->n_left_to_next));
+ n = oct_rx_batch (vm, ctx, rxq, n);
+ oct_rx_trace (vm, node, ctx, descs + head, n);
+
+ if (ctx->n_left_to_next == 0)
+ break;
+
+ status.as_u64 = roc_atomic64_add_sync (crq->cq.wdata, crq->cq.status);
+ if (status.cq_err || status.op_err)
+ break;
+
+ head = status.head;
+ n_desc = (status.tail - head) & cq_mask;
+ if (n_desc == 0)
+ break;
+ }
+
+ if (ctx->n_traced)
+ vlib_set_trace_count (vm, node, ctx->trace_count - ctx->n_traced);
+
+ if (PREDICT_TRUE (ctx->next_index == VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT))
+ {
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ ethernet_input_frame_t *ef;
+ oct_nix_rx_parse_t p = { .w[0] = ctx->parse_w0_or };
+ nf = vlib_node_runtime_get_next_frame (vm, node, ctx->next_index);
+ f = vlib_get_frame (vm, nf->frame);
+ f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+ ef = vlib_frame_scalar_args (f);
+ ef->sw_if_index = ctx->sw_if_index;
+ ef->hw_if_index = ctx->hw_if_index;
+
+ if (p.f.errcode == 0 && p.f.errlev == 0)
+ f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
+
+ vlib_frame_no_append (f);
+ }
+
+ vlib_put_next_frame (vm, node, ctx->next_index, ctx->n_left_to_next);
+
+ vlib_increment_combined_counter (
+ vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thr_idx, ctx->hw_if_index, ctx->n_rx_pkts, ctx->n_rx_bytes);
+
+refill:
+ n_enq = crq->n_enq - ctx->n_segs;
+ n_enq += oct_rxq_refill (vm, rxq, rxq->size - n_enq);
+ crq->n_enq = n_enq;
+
+ return ctx->n_rx_pkts;
+}
+
+VNET_DEV_NODE_FN (oct_rx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ u32 n_rx = 0;
+ foreach_vnet_dev_rx_queue_runtime (rxq, node)
+ {
+ vnet_dev_port_t *port = rxq->port;
+ n_rx += oct_rx_node_inline (vm, node, frame, port, rxq, 0);
+ }
+
+ return n_rx;
+}
diff --git a/src/plugins/dev_octeon/tx_node.c b/src/plugins/dev_octeon/tx_node.c
new file mode 100644
index 00000000000..0dbf8759d35
--- /dev/null
+++ b/src/plugins/dev_octeon/tx_node.c
@@ -0,0 +1,435 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/ring.h>
+#include <vppinfra/vector/ip_csum.h>
+
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
+
+#include <dev_octeon/octeon.h>
+
+typedef struct
+{
+ union nix_send_hdr_w0_u hdr_w0_teplate;
+ vlib_node_runtime_t *node;
+ u32 n_tx_bytes;
+ u32 n_drop;
+ vlib_buffer_t *drop[VLIB_FRAME_SIZE];
+ u32 batch_alloc_not_ready;
+ u32 batch_alloc_issue_fail;
+ u16 lmt_id;
+ u64 lmt_ioaddr;
+ lmt_line_t *lmt_lines;
+} oct_tx_ctx_t;
+
+static_always_inline u32
+oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq)
+{
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ u8 num_cl;
+ u64 ah;
+ u32 n_freed = 0, n;
+ oct_npa_batch_alloc_cl128_t *cl;
+
+ num_cl = ctq->ba_num_cl;
+ if (num_cl)
+ {
+ u16 off = ctq->hdr_off;
+ u32 *bi = (u32 *) ctq->ba_buffer;
+
+ for (cl = ctq->ba_buffer + ctq->ba_first_cl; num_cl > 0; num_cl--, cl++)
+ {
+ oct_npa_batch_alloc_status_t st;
+
+ if ((st.as_u64 = __atomic_load_n (cl->iova, __ATOMIC_RELAXED)) ==
+ OCT_BATCH_ALLOC_IOVA0_MASK + ALLOC_CCODE_INVAL)
+ {
+ cl_not_ready:
+ ctx->batch_alloc_not_ready++;
+ n_freed = bi - (u32 *) ctq->ba_buffer;
+ if (n_freed > 0)
+ {
+ vlib_buffer_free_no_next (vm, (u32 *) ctq->ba_buffer,
+ n_freed);
+ ctq->ba_num_cl = num_cl;
+ ctq->ba_first_cl = cl - ctq->ba_buffer;
+ return n_freed;
+ }
+
+ return 0;
+ }
+
+ if (st.status.count > 8 &&
+ __atomic_load_n (cl->iova + 8, __ATOMIC_RELAXED) ==
+ OCT_BATCH_ALLOC_IOVA0_MASK)
+ goto cl_not_ready;
+
+#if (CLIB_DEBUG > 0)
+ cl->iova[0] &= OCT_BATCH_ALLOC_IOVA0_MASK;
+#endif
+ if (PREDICT_TRUE (st.status.count == 16))
+ {
+ /* optimize for likely case where cacheline is full */
+ vlib_get_buffer_indices_with_offset (vm, (void **) cl, bi, 16,
+ off);
+ bi += 16;
+ }
+ else
+ {
+ vlib_get_buffer_indices_with_offset (vm, (void **) cl, bi,
+ st.status.count, off);
+ bi += st.status.count;
+ }
+ }
+
+ n_freed = bi - (u32 *) ctq->ba_buffer;
+ if (n_freed > 0)
+ vlib_buffer_free_no_next (vm, (u32 *) ctq->ba_buffer, n_freed);
+
+ /* clear status bits in each cacheline */
+ n = cl - ctq->ba_buffer;
+ for (u32 i = 0; i < n; i++)
+ ctq->ba_buffer[i].iova[0] = ctq->ba_buffer[i].iova[8] =
+ OCT_BATCH_ALLOC_IOVA0_MASK;
+
+ ctq->ba_num_cl = ctq->ba_first_cl = 0;
+ }
+
+ ah = ctq->aura_handle;
+
+ if ((n = roc_npa_aura_op_available (ah)) >= 32)
+ {
+ u64 addr, res;
+
+ n = clib_min (n, ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS);
+
+ oct_npa_batch_alloc_compare_t cmp = {
+ .compare_s = { .aura = roc_npa_aura_handle_to_aura (ah),
+ .stype = ALLOC_STYPE_STF,
+ .count = n }
+ };
+
+ addr = roc_npa_aura_handle_to_base (ah) + NPA_LF_AURA_BATCH_ALLOC;
+ res = roc_atomic64_casl (cmp.as_u64, (uint64_t) ctq->ba_buffer,
+ (i64 *) addr);
+ if (res == ALLOC_RESULT_ACCEPTED || res == ALLOC_RESULT_NOCORE)
+ {
+ ctq->ba_num_cl = (n + 15) / 16;
+ ctq->ba_first_cl = 0;
+ }
+ else
+ ctx->batch_alloc_issue_fail++;
+ }
+
+ return n_freed;
+}
+
+static_always_inline u8
+oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b,
+ lmt_line_t *line, u32 flags, int simple, int trace)
+{
+ u8 n_dwords = 2;
+ u32 total_len = 0;
+ oct_tx_desc_t d = {
+ .hdr_w0 = ctx->hdr_w0_teplate,
+ .sg[0] = {
+ .segs = 1,
+ .subdc = NIX_SUBDC_SG,
+ },
+ .sg[4] = {
+ .subdc = NIX_SUBDC_SG,
+ },
+ };
+
+ if (!simple && flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ u8 n_tail_segs = 0;
+ vlib_buffer_t *tail_segs[5], *t = b;
+
+ while (t->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ t = vlib_get_buffer (vm, t->next_buffer);
+ tail_segs[n_tail_segs++] = t;
+ if (n_tail_segs > 5)
+ {
+ ctx->drop[ctx->n_drop++] = t;
+ return 0;
+ }
+ }
+
+ switch (n_tail_segs)
+ {
+ case 5:
+ d.sg[7].u = (u64) vlib_buffer_get_current (tail_segs[4]);
+ total_len += d.sg[4].seg3_size = tail_segs[4]->current_length;
+ d.sg[4].segs++;
+ case 4:
+ d.sg[6].u = (u64) vlib_buffer_get_current (tail_segs[3]);
+ total_len += d.sg[4].seg2_size = tail_segs[3]->current_length;
+ d.sg[4].segs++;
+ n_dwords++;
+ case 3:
+ d.sg[5].u = (u64) vlib_buffer_get_current (tail_segs[2]);
+ total_len += d.sg[4].seg1_size = tail_segs[2]->current_length;
+ d.sg[4].segs++;
+ n_dwords++;
+ case 2:
+ d.sg[3].u = (u64) vlib_buffer_get_current (tail_segs[1]);
+ total_len += d.sg[0].seg3_size = tail_segs[1]->current_length;
+ d.sg[0].segs++;
+ case 1:
+ d.sg[2].u = (u64) vlib_buffer_get_current (tail_segs[0]);
+ total_len += d.sg[0].seg2_size = tail_segs[0]->current_length;
+ d.sg[0].segs++;
+ n_dwords++;
+ default:
+ break;
+ };
+ d.hdr_w0.sizem1 = n_dwords - 1;
+ }
+
+ if (!simple && flags & VNET_BUFFER_F_OFFLOAD)
+ {
+ vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
+ if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
+ {
+ d.hdr_w1.ol3type = NIX_SENDL3TYPE_IP4_CKSUM;
+ d.hdr_w1.ol3ptr = vnet_buffer (b)->l3_hdr_offset;
+ d.hdr_w1.ol4ptr =
+ vnet_buffer (b)->l3_hdr_offset + sizeof (ip4_header_t);
+ }
+ if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
+ {
+ d.hdr_w1.ol4type = NIX_SENDL4TYPE_UDP_CKSUM;
+ d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset;
+ }
+ else if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
+ {
+ d.hdr_w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
+ d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset;
+ }
+ }
+
+ total_len += d.sg[0].seg1_size = b->current_length;
+ d.hdr_w0.total = total_len;
+ d.sg[1].u = (u64) vlib_buffer_get_current (b);
+
+ if (trace && flags & VLIB_BUFFER_IS_TRACED)
+ {
+ oct_tx_trace_t *t = vlib_add_trace (vm, ctx->node, b, sizeof (*t));
+ t->desc = d;
+ t->sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX];
+ }
+
+ for (u32 i = 0; i < n_dwords; i++)
+ line->dwords[i] = d.as_u128[i];
+
+ return n_dwords;
+}
+
+static_always_inline u32
+oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq,
+ vlib_buffer_t **b, u32 n_pkts, int trace)
+{
+ u8 dwords_per_line[16], *dpl = dwords_per_line;
+ u64 lmt_arg, ioaddr, n_lines;
+ u32 n_left, or_flags_16 = 0;
+ const u32 not_simple_flags =
+ VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD;
+ lmt_line_t *l = ctx->lmt_lines;
+
+ /* Data Store Memory Barrier - outer shareable domain */
+ asm volatile("dmb oshst" ::: "memory");
+
+ for (n_left = n_pkts; n_left >= 8; n_left -= 8, b += 8, l += 8)
+ {
+ u32 f0, f1, f2, f3, f4, f5, f6, f7, or_f = 0;
+ vlib_prefetch_buffer_header (b[8], LOAD);
+ or_f |= f0 = b[0]->flags;
+ or_f |= f1 = b[1]->flags;
+ vlib_prefetch_buffer_header (b[9], LOAD);
+ or_f |= f2 = b[2]->flags;
+ or_f |= f3 = b[3]->flags;
+ vlib_prefetch_buffer_header (b[10], LOAD);
+ or_f |= f4 = b[4]->flags;
+ or_f |= f5 = b[5]->flags;
+ vlib_prefetch_buffer_header (b[11], LOAD);
+ or_f |= f6 = b[6]->flags;
+ or_f |= f7 = b[7]->flags;
+ vlib_prefetch_buffer_header (b[12], LOAD);
+ or_flags_16 |= or_f;
+
+ if ((or_f & not_simple_flags) == 0)
+ {
+ int simple = 1;
+ oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace);
+ vlib_prefetch_buffer_header (b[13], LOAD);
+ oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace);
+ vlib_prefetch_buffer_header (b[14], LOAD);
+ oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace);
+ vlib_prefetch_buffer_header (b[15], LOAD);
+ oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace);
+ dpl[0] = dpl[1] = dpl[2] = dpl[3] = 2;
+ dpl[4] = dpl[5] = dpl[6] = dpl[7] = 2;
+ }
+ else
+ {
+ int simple = 0;
+ dpl[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace);
+ dpl[1] = oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace);
+ vlib_prefetch_buffer_header (b[13], LOAD);
+ dpl[2] = oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace);
+ dpl[3] = oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace);
+ vlib_prefetch_buffer_header (b[14], LOAD);
+ dpl[4] = oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace);
+ dpl[5] = oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace);
+ vlib_prefetch_buffer_header (b[15], LOAD);
+ dpl[6] = oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace);
+ dpl[7] = oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace);
+ }
+ dpl += 8;
+ }
+
+ for (; n_left > 0; n_left -= 1, b += 1, l += 1)
+ {
+ u32 f0 = b[0]->flags;
+ dpl++[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, 0, trace);
+ or_flags_16 |= f0;
+ }
+
+ lmt_arg = ctx->lmt_id;
+ ioaddr = ctx->lmt_ioaddr;
+ n_lines = n_pkts;
+
+ if (PREDICT_FALSE (or_flags_16 & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ dpl = dwords_per_line;
+ ioaddr |= (dpl[0] - 1) << 4;
+
+ if (n_lines > 1)
+ {
+ lmt_arg |= (--n_lines) << 12;
+
+ for (u8 bit_off = 19; n_lines; n_lines--, bit_off += 3, dpl++)
+ lmt_arg |= ((u64) dpl[1] - 1) << bit_off;
+ }
+ }
+ else
+ {
+ const u64 n_dwords = 2;
+ ioaddr |= (n_dwords - 1) << 4;
+
+ if (n_lines > 1)
+ {
+ lmt_arg |= (--n_lines) << 12;
+
+ for (u8 bit_off = 19; n_lines; n_lines--, bit_off += 3)
+ lmt_arg |= (n_dwords - 1) << bit_off;
+ }
+ }
+
+ roc_lmt_submit_steorl (lmt_arg, ioaddr);
+
+ return n_pkts;
+}
+
+VNET_DEV_NODE_FN (oct_tx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node);
+ vnet_dev_tx_queue_t *txq = rt->tx_queue;
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ u32 node_index = node->node_index;
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n, n_enq, n_left, n_pkts = frame->n_vectors;
+ vlib_buffer_t *buffers[VLIB_FRAME_SIZE + 8], **b = buffers;
+ u64 lmt_id = vm->thread_index << ROC_LMT_LINES_PER_CORE_LOG2;
+
+ oct_tx_ctx_t ctx = {
+ .node = node,
+ .hdr_w0_teplate = {
+ .aura = roc_npa_aura_handle_to_aura (ctq->aura_handle),
+ .sq = ctq->sq.qid,
+ .sizem1 = 1,
+ },
+ .lmt_id = lmt_id,
+ .lmt_ioaddr = ctq->io_addr,
+ .lmt_lines = ctq->lmt_addr + (lmt_id << ROC_LMT_LINE_SIZE_LOG2),
+ };
+
+ vlib_get_buffers (vm, vlib_frame_vector_args (frame), b, n_pkts);
+ for (int i = 0; i < 8; i++)
+ b[n_pkts + i] = b[n_pkts - 1];
+
+ vnet_dev_tx_queue_lock_if_needed (txq);
+
+ n_enq = ctq->n_enq;
+ n_enq -= oct_batch_free (vm, &ctx, txq);
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ {
+ for (n_left = clib_min (n_pkts, txq->size - n_enq), n = 0; n_left >= 16;
+ n_left -= 16, b += 16)
+ n += oct_tx_enq16 (vm, &ctx, txq, b, 16, /* trace */ 1);
+
+ if (n_left)
+ n += oct_tx_enq16 (vm, &ctx, txq, b, n_left, /* trace */ 1);
+ }
+ else
+ {
+ for (n_left = clib_min (n_pkts, txq->size - n_enq), n = 0; n_left >= 16;
+ n_left -= 16, b += 16)
+ n += oct_tx_enq16 (vm, &ctx, txq, b, 16, /* trace */ 0);
+
+ if (n_left)
+ n += oct_tx_enq16 (vm, &ctx, txq, b, n_left, /* trace */ 0);
+ }
+
+ ctq->n_enq = n_enq + n;
+
+ if (n < n_pkts)
+ {
+ u32 n_free = n_pkts - n;
+ vlib_buffer_free (vm, from + n, n_free);
+ vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_NO_FREE_SLOTS,
+ n_free);
+ n_pkts -= n_free;
+ }
+
+ if (ctx.n_drop)
+ vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_CHAIN_TOO_LONG,
+ ctx.n_drop);
+
+ if (ctx.batch_alloc_not_ready)
+ vlib_error_count (vm, node_index,
+ OCT_TX_NODE_CTR_AURA_BATCH_ALLOC_NOT_READY,
+ ctx.batch_alloc_not_ready);
+
+ if (ctx.batch_alloc_issue_fail)
+ vlib_error_count (vm, node_index,
+ OCT_TX_NODE_CTR_AURA_BATCH_ALLOC_ISSUE_FAIL,
+ ctx.batch_alloc_issue_fail);
+
+ vnet_dev_tx_queue_unlock_if_needed (txq);
+
+ if (ctx.n_drop)
+ {
+ u32 bi[VLIB_FRAME_SIZE];
+ vlib_get_buffer_indices (vm, ctx.drop, bi, ctx.n_drop);
+ vlib_buffer_free (vm, bi, ctx.n_drop);
+ n_pkts -= ctx.n_drop;
+ }
+
+ return n_pkts;
+}
diff --git a/src/plugins/dhcp/FEATURE.yaml b/src/plugins/dhcp/FEATURE.yaml
index 469af29cee5..a517cb849de 100644
--- a/src/plugins/dhcp/FEATURE.yaml
+++ b/src/plugins/dhcp/FEATURE.yaml
@@ -6,6 +6,6 @@ features:
- DHCPv6 prefix delegation
- DHCP Proxy / Option 82
-description: "An implemenation of the Dynamic Host Configuration Protocol (DHCP) client"
+description: "An implementation of the Dynamic Host Configuration Protocol (DHCP) client"
state: production
properties: [API, CLI, MULTITHREAD]
diff --git a/src/plugins/dhcp/client.c b/src/plugins/dhcp/client.c
index f93643390e9..8fa67c616b2 100644
--- a/src/plugins/dhcp/client.c
+++ b/src/plugins/dhcp/client.c
@@ -149,7 +149,6 @@ dhcp_client_acquire_address (dhcp_client_main_t * dcm, dhcp_client_t * c)
.ip4 = c->learned.router_address,
};
- /* *INDENT-OFF* */
fib_table_entry_path_add (
fib_table_get_index_for_sw_if_index (
FIB_PROTOCOL_IP4,
@@ -161,7 +160,6 @@ dhcp_client_acquire_address (dhcp_client_main_t * dcm, dhcp_client_t * c)
&nh, c->sw_if_index,
~0, 1, NULL, // no label stack
FIB_ROUTE_PATH_FLAG_NONE);
- /* *INDENT-ON* */
}
}
clib_memcpy (&c->installed, &c->learned, sizeof (c->installed));
@@ -870,7 +868,6 @@ dhcp_client_process (vlib_main_t * vm,
case ~0:
if (pool_elts (dcm->clients))
{
- /* *INDENT-OFF* */
next_expire_time = 1e70;
pool_foreach (c, dcm->clients)
{
@@ -886,7 +883,6 @@ dhcp_client_process (vlib_main_t * vm,
clib_warning ("BUG");
timeout = 1.13;
}
- /* *INDENT-ON* */
}
else
timeout = 1000.0;
@@ -900,7 +896,6 @@ dhcp_client_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp_client_process_node,static) = {
.function = dhcp_client_process,
.type = VLIB_NODE_TYPE_PROCESS,
@@ -909,7 +904,6 @@ VLIB_REGISTER_NODE (dhcp_client_process_node,static) = {
.n_errors = ARRAY_LEN(dhcp_client_process_stat_strings),
.error_strings = dhcp_client_process_stat_strings,
};
-/* *INDENT-ON* */
static clib_error_t *
show_dhcp_client_command_fn (vlib_main_t * vm,
@@ -943,25 +937,21 @@ show_dhcp_client_command_fn (vlib_main_t * vm,
return 0;
}
- /* *INDENT-OFF* */
pool_foreach (c, dcm->clients)
{
vlib_cli_output (vm, "%U",
format_dhcp_client, dcm,
c, verbose);
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_dhcp_client_command, static) = {
.path = "show dhcp client",
.short_help = "show dhcp client [intfc <intfc>][verbose]",
.function = show_dhcp_client_command_fn,
};
-/* *INDENT-ON* */
int
@@ -1118,13 +1108,11 @@ dhcp_client_walk (dhcp_client_walk_cb_t cb, void *ctx)
dhcp_client_main_t *dcm = &dhcp_client_main;
dhcp_client_t *c;
- /* *INDENT-OFF* */
pool_foreach (c, dcm->clients)
{
if (!cb(c, ctx))
break;
}
- /* *INDENT-ON* */
}
@@ -1229,13 +1217,11 @@ dhcp_client_set_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp_client_set_command, static) = {
.path = "set dhcp client",
.short_help = "set dhcp client [del] intfc <interface> [hostname <name>]",
.function = dhcp_client_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcp_client_init (vlib_main_t * vm)
diff --git a/src/plugins/dhcp/dhcp.api b/src/plugins/dhcp/dhcp.api
index 4611d5fadd8..7f559128353 100644
--- a/src/plugins/dhcp/dhcp.api
+++ b/src/plugins/dhcp/dhcp.api
@@ -278,6 +278,15 @@ define dhcp_proxy_details
vl_api_dhcp_server_t servers[count];
};
+autoreply define dhcp_client_detect_enable_disable
+{
+ u32 client_index;
+ u32 context;
+
+ vl_api_interface_index_t sw_if_index;
+ bool enable;
+};
+
/** \brief Set DHCPv6 DUID-LL
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/plugins/dhcp/dhcp4_proxy_node.c b/src/plugins/dhcp/dhcp4_proxy_node.c
index 2ddad25bb11..2b49d49bb7f 100644
--- a/src/plugins/dhcp/dhcp4_proxy_node.c
+++ b/src/plugins/dhcp/dhcp4_proxy_node.c
@@ -463,7 +463,6 @@ dhcp_proxy_to_server_input (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp_proxy_to_server_node, static) = {
.function = dhcp_proxy_to_server_input,
.name = "dhcp-proxy-to-server",
@@ -486,7 +485,6 @@ VLIB_REGISTER_NODE (dhcp_proxy_to_server_node, static) = {
.unformat_buffer = unformat_dhcp_proxy_header,
#endif
};
-/* *INDENT-ON* */
typedef enum
{
@@ -783,7 +781,6 @@ dhcp_proxy_to_client_input (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp_proxy_to_client_node, static) = {
.function = dhcp_proxy_to_client_input,
.name = "dhcp-proxy-to-client",
@@ -803,7 +800,6 @@ VLIB_REGISTER_NODE (dhcp_proxy_to_client_node, static) = {
[DHCP4_PROXY_NEXT_TX] = "interface-output",
},
};
-/* *INDENT-ON* */
void
dhcp_maybe_register_udp_ports (dhcp_port_reg_flags_t ports)
@@ -956,13 +952,11 @@ dhcp4_proxy_set_command_fn (vlib_main_t * vm,
format_unformat_error, input);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp_proxy_set_command, static) = {
.path = "set dhcp proxy",
.short_help = "set dhcp proxy [del] server <ip-addr> src-address <ip-addr> [server-fib-id <n>] [rx-fib-id <n>]",
.function = dhcp4_proxy_set_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_dhcp4_proxy_server (u8 * s, va_list * args)
@@ -980,16 +974,14 @@ format_dhcp4_proxy_server (u8 * s, va_list * args)
rx_fib = ip4_fib_get (proxy->rx_fib_index);
- s = format (s, "%=14u%=16U",
- rx_fib->table_id,
- format_ip46_address, &proxy->dhcp_src_address, IP46_TYPE_ANY);
+ s = format (s, "%=14u%=16U", rx_fib->hash.table_id, format_ip46_address,
+ &proxy->dhcp_src_address, IP46_TYPE_ANY);
vec_foreach (server, proxy->dhcp_servers)
{
server_fib = ip4_fib_get (server->server_fib_index);
- s = format (s, "%u,%U ",
- server_fib->table_id,
- format_ip46_address, &server->dhcp_server, IP46_TYPE_ANY);
+ s = format (s, "%u,%U ", server_fib->hash.table_id, format_ip46_address,
+ &server->dhcp_server, IP46_TYPE_ANY);
}
return s;
}
@@ -1017,13 +1009,11 @@ dhcp4_proxy_show_command_fn (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp_proxy_show_command, static) = {
.path = "show dhcp proxy",
.short_help = "Display dhcp proxy server info",
.function = dhcp4_proxy_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcp_option_82_vss_fn (vlib_main_t * vm,
@@ -1068,13 +1058,11 @@ dhcp_option_82_vss_fn (vlib_main_t * vm,
}
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp_proxy_vss_command,static) = {
.path = "set dhcp option-82 vss",
.short_help = "set dhcp option-82 vss [del] table <table id> [oui <n> vpn-id <n> | vpn-ascii-id <text>]",
.function = dhcp_option_82_vss_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcp_vss_show_command_fn (vlib_main_t * vm,
@@ -1085,13 +1073,11 @@ dhcp_vss_show_command_fn (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp_proxy_vss_show_command, static) = {
.path = "show dhcp vss",
.short_help = "show dhcp VSS",
.function = dhcp_vss_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcp_option_82_address_show_command_fn (vlib_main_t * vm,
@@ -1134,13 +1120,11 @@ dhcp_option_82_address_show_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp_proxy_address_show_command,static) = {
.path = "show dhcp option-82-address interface",
.short_help = "show dhcp option-82-address interface <interface>",
.function = dhcp_option_82_address_show_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dhcp/dhcp6_client_common_dp.c b/src/plugins/dhcp/dhcp6_client_common_dp.c
index da6f61aa2d5..40c4ba94c24 100644
--- a/src/plugins/dhcp/dhcp6_client_common_dp.c
+++ b/src/plugins/dhcp/dhcp6_client_common_dp.c
@@ -61,14 +61,12 @@ generate_client_duid (void)
vnet_hw_interface_t *hi;
ethernet_interface_t *eth_if = 0;
- /* *INDENT-OFF* */
pool_foreach (hi, im->hw_interfaces)
{
eth_if = ethernet_get_interface (&ethernet_main, hi->hw_if_index);
if (eth_if)
break;
}
- /* *INDENT-ON* */
if (eth_if)
clib_memcpy (client_duid.lla, &eth_if->address, 6);
@@ -425,7 +423,6 @@ dhcpv6_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcpv6_client_node, static) = {
.function = dhcpv6_client_node_fn,
.name = "dhcpv6-client",
@@ -442,7 +439,6 @@ VLIB_REGISTER_NODE (dhcpv6_client_node, static) = {
.format_trace = format_dhcpv6_client_trace,
};
-/* *INDENT-ON* */
void
dhcp6_clients_enable_disable (u8 enable)
diff --git a/src/plugins/dhcp/dhcp6_ia_na_client_cp.c b/src/plugins/dhcp/dhcp6_ia_na_client_cp.c
index 4a1156f6e8a..ddaf92c6e42 100644
--- a/src/plugins/dhcp/dhcp6_ia_na_client_cp.c
+++ b/src/plugins/dhcp/dhcp6_ia_na_client_cp.c
@@ -271,7 +271,6 @@ dhcp6_reply_event_handler (vl_api_dhcp6_reply_event_t * mp)
continue;
u8 address_already_present = 0;
- /* *INDENT-OFF* */
pool_foreach (address_info, rm->address_pool)
{
if (address_info->sw_if_index != sw_if_index)
@@ -284,7 +283,6 @@ dhcp6_reply_event_handler (vl_api_dhcp6_reply_event_t * mp)
goto address_pool_foreach_out;
}
}
- /* *INDENT-ON* */
address_pool_foreach_out:
if (address_already_present)
@@ -344,7 +342,6 @@ create_address_list (u32 sw_if_index)
dhcp6_client_cp_main_t *rm = &dhcp6_client_cp_main;
address_info_t *address_info, *address_list = 0;;
- /* *INDENT-OFF* */
pool_foreach (address_info, rm->address_pool)
{
if (address_info->sw_if_index == sw_if_index)
@@ -354,7 +351,6 @@ create_address_list (u32 sw_if_index)
clib_memcpy (&address_list[pos], address_info, sizeof (*address_info));
}
}
- /* *INDENT-ON* */
return address_list;
}
@@ -393,7 +389,6 @@ dhcp6_client_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
do
{
due_time = current_time + 1e9;
- /* *INDENT-OFF* */
pool_foreach (address_info, rm->address_pool)
{
if (address_info->due_time > current_time)
@@ -423,7 +418,6 @@ dhcp6_client_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
}
}
}
- /* *INDENT-ON* */
for (i = 0; i < vec_len (rm->client_state_by_sw_if_index); i++)
{
client_state_t *cs = &rm->client_state_by_sw_if_index[i];
@@ -473,13 +467,11 @@ dhcp6_client_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp6_client_cp_process_node) = {
.function = dhcp6_client_cp_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "dhcp6-client-cp-process",
};
-/* *INDENT-ON* */
static void
interrupt_process (void)
@@ -524,7 +516,6 @@ dhcp6_addresses_show_command_function (vlib_main_t * vm,
address_info_t *address_info;
f64 current_time = vlib_time_now (vm);
- /* *INDENT-OFF* */
pool_foreach (address_info, dm->address_pool)
{
vlib_cli_output (vm, "address: %U, "
@@ -534,18 +525,15 @@ dhcp6_addresses_show_command_function (vlib_main_t * vm,
address_info->preferred_lt, address_info->valid_lt,
address_info->due_time - current_time);
}
- /* *INDENT-ON* */
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp6_addresses_show_command, static) = {
.path = "show dhcp6 addresses",
.short_help = "show dhcp6 addresses",
.function = dhcp6_addresses_show_command_function,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcp6_clients_show_command_function (vlib_main_t * vm,
@@ -601,13 +589,11 @@ dhcp6_clients_show_command_function (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp6_clients_show_command, static) = {
.path = "show dhcp6 clients",
.short_help = "show dhcp6 clients",
.function = dhcp6_clients_show_command_function,
};
-/* *INDENT-ON* */
int
dhcp6_client_enable_disable (u32 sw_if_index, u8 enable)
@@ -659,7 +645,6 @@ dhcp6_client_enable_disable (u32 sw_if_index, u8 enable)
disable_process ();
}
- /* *INDENT-OFF* */
pool_foreach (address_info, rm->address_pool)
{
if (address_info->sw_if_index == sw_if_index)
@@ -680,7 +665,6 @@ dhcp6_client_enable_disable (u32 sw_if_index, u8 enable)
pool_put (rm->address_pool, address_info);
}
}
- /* *INDENT-ON* */
}
if (!enable)
@@ -745,13 +729,11 @@ done:
* @cliexcmd{dhcp6 client GigabitEthernet2/0/0 disable}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp6_client_enable_disable_command, static) = {
.path = "dhcp6 client",
.short_help = "dhcp6 client <interface> [disable]",
.function = dhcp6_client_enable_disable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcp_ia_na_client_cp_init (vlib_main_t * vm)
diff --git a/src/plugins/dhcp/dhcp6_ia_na_client_dp.c b/src/plugins/dhcp/dhcp6_ia_na_client_dp.c
index c240beb3eb3..e957f88884a 100644
--- a/src/plugins/dhcp/dhcp6_ia_na_client_dp.c
+++ b/src/plugins/dhcp/dhcp6_ia_na_client_dp.c
@@ -346,13 +346,11 @@ send_dhcp6_client_message_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (send_dhcp6_client_message_process_node, static) = {
.function = send_dhcp6_client_message_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "send-dhcp6-client-message-process",
};
-/* *INDENT-ON* */
void
dhcp6_send_client_message (vlib_main_t * vm, u32 sw_if_index, u8 stop,
diff --git a/src/plugins/dhcp/dhcp6_packet.h b/src/plugins/dhcp/dhcp6_packet.h
index d5467952a64..78a665f926d 100644
--- a/src/plugins/dhcp/dhcp6_packet.h
+++ b/src/plugins/dhcp/dhcp6_packet.h
@@ -119,7 +119,6 @@ typedef struct dhcpv6_hdr_
u8 data[0];
} dhcpv6_header_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct dhcpv6_relay_ctx_ {
dhcpv6_header_t *pkt;
u32 pkt_len;
@@ -130,10 +129,8 @@ typedef CLIB_PACKED (struct dhcpv6_relay_ctx_ {
char ctx_name[32+1];
u8 dhcp_msg_type;
}) dhcpv6_relay_ctx_t;
-/* *INDENT-ON* */
//Structure for DHCPv6 RELAY-FORWARD and DHCPv6 RELAY-REPLY pkts
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct dhcpv6_relay_hdr_ {
u8 msg_type;
u8 hop_count;
@@ -141,7 +138,6 @@ typedef CLIB_PACKED (struct dhcpv6_relay_hdr_ {
ip6_address_t peer_addr;
u8 data[0];
}) dhcpv6_relay_hdr_t;
-/* *INDENT-ON* */
typedef enum dhcp_stats_action_type_
{
@@ -171,51 +167,39 @@ typedef enum dhcpv6_stats_drop_reason_
#define dhcpv6_optlen(opt) clib_net_to_host_u16((opt)->length)
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u16 option;
u16 length;
u8 data[0];
}) dhcpv6_option_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
dhcpv6_option_t opt;
u16 status_code;
}) dhcpv6_status_code_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
dhcpv6_option_t opt;
u32 int_idx;
}) dhcpv6_int_id_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
dhcpv6_option_t opt;
u8 vss_type;
u8 data[0];
}) dhcpv6_vss_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
dhcpv6_option_t opt;
u32 ent_num;
u32 rmt_id;
}) dhcpv6_rmt_id_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
dhcpv6_option_t opt;
u16 link_type;
u8 data[6]; // data[0]:data[5]: MAC address
}) dhcpv6_client_mac_t;
-/* *INDENT-ON* */
typedef CLIB_PACKED (struct
{
diff --git a/src/plugins/dhcp/dhcp6_pd_client_cp.c b/src/plugins/dhcp/dhcp6_pd_client_cp.c
index f6d30fa0c7d..b30f7c0af79 100644
--- a/src/plugins/dhcp/dhcp6_pd_client_cp.c
+++ b/src/plugins/dhcp/dhcp6_pd_client_cp.c
@@ -371,12 +371,10 @@ dhcp6_pd_reply_event_handler (vl_api_dhcp6_pd_reply_event_t * mp)
* We're going to loop through the pool multiple times,
* so collect active indices.
*/
- /* *INDENT-OFF* */
pool_foreach (prefix_info, pm->prefix_pool)
{
vec_add1 (pm->indices, prefix_info - pm->prefix_pool);
}
- /* *INDENT-ON* */
for (i = 0; i < n_prefixes; i++)
{
@@ -480,7 +478,6 @@ create_prefix_list (u32 sw_if_index)
ip6_prefix_main_t *pm = &ip6_prefix_main;
prefix_info_t *prefix_info, *prefix_list = 0;;
- /* *INDENT-OFF* */
pool_foreach (prefix_info, pm->prefix_pool)
{
if (is_dhcpv6_pd_prefix (prefix_info) &&
@@ -491,7 +488,6 @@ create_prefix_list (u32 sw_if_index)
clib_memcpy (&prefix_list[pos], prefix_info, sizeof (*prefix_info));
}
}
- /* *INDENT-ON* */
return prefix_list;
}
@@ -530,7 +526,6 @@ dhcp6_pd_client_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
do
{
due_time = current_time + 1e9;
- /* *INDENT-OFF* */
pool_foreach (prefix_info, pm->prefix_pool)
{
if (is_dhcpv6_pd_prefix (prefix_info))
@@ -559,7 +554,6 @@ dhcp6_pd_client_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
}
}
}
- /* *INDENT-ON* */
for (i = 0; i < vec_len (rm->client_state_by_sw_if_index); i++)
{
client_state_t *cs = &rm->client_state_by_sw_if_index[i];
@@ -608,13 +602,11 @@ dhcp6_pd_client_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp6_pd_client_cp_process_node) = {
.function = dhcp6_pd_client_cp_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "dhcp6-pd-client-cp-process",
};
-/* *INDENT-ON* */
static void
interrupt_process (void)
@@ -787,14 +779,12 @@ cp_ip6_address_find_new_active_prefix (u32 prefix_group_index,
ip6_prefix_main_t *pm = &ip6_prefix_main;
prefix_info_t *prefix_info;
- /* *INDENT-OFF* */
pool_foreach (prefix_info, pm->prefix_pool)
{
if (prefix_info->prefix_group_index == prefix_group_index &&
prefix_info - pm->prefix_pool != ignore_prefix_index)
return prefix_info - pm->prefix_pool;
}
- /* *INDENT-ON* */
return ~0;
}
@@ -992,7 +982,7 @@ dhcp6_cp_ip6_address_add_del (u32 sw_if_index, const u8 * prefix_group,
return VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
cp_ip6_address_add_del_now (address_info, 0 /* del */ );
*address_info = apm->addresses[n - 1];
- _vec_len (apm->addresses) = n - 1;
+ vec_set_len (apm->addresses, n - 1);
return 0;
}
}
@@ -1080,14 +1070,12 @@ done:
* prefix group my-prefix-group ::7/64 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_address_add_del_command, static) = {
.path = "set ip6 address",
.short_help = "set ip6 address <interface> [prefix group <string>] "
"<address> [del]",
.function = cp_ip6_address_add_del_command_function,
};
-/* *INDENT-ON* */
static clib_error_t *
cp_ip6_addresses_show_command_function (vlib_main_t * vm,
@@ -1119,13 +1107,11 @@ cp_ip6_addresses_show_command_function (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_addresses_show_command, static) = {
.path = "show ip6 addresses",
.short_help = "show ip6 addresses",
.function = cp_ip6_addresses_show_command_function,
};
-/* *INDENT-ON* */
static clib_error_t *
cp_ip6_prefixes_show_command_function (vlib_main_t * vm,
@@ -1138,7 +1124,6 @@ cp_ip6_prefixes_show_command_function (vlib_main_t * vm,
const u8 *prefix_group;
f64 current_time = vlib_time_now (vm);
- /* *INDENT-OFF* */
pool_foreach (prefix_info, pm->prefix_pool)
{
prefix_group =
@@ -1152,18 +1137,15 @@ cp_ip6_prefixes_show_command_function (vlib_main_t * vm,
prefix_info->preferred_lt, prefix_info->valid_lt,
prefix_info->due_time - current_time);
}
- /* *INDENT-ON* */
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_prefixes_show_command, static) = {
.path = "show ip6 prefixes",
.short_help = "show ip6 prefixes",
.function = cp_ip6_prefixes_show_command_function,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_pd_clients_show_command_function (vlib_main_t * vm,
@@ -1224,13 +1206,11 @@ ip6_pd_clients_show_command_function (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_pd_clients_show_command, static) = {
.path = "show ip6 pd clients",
.short_help = "show ip6 pd clients",
.function = ip6_pd_clients_show_command_function,
};
-/* *INDENT-ON* */
@@ -1304,7 +1284,6 @@ dhcp6_pd_client_enable_disable (u32 sw_if_index,
vec_validate (prefix_list, 0);
- /* *INDENT-OFF* */
pool_foreach (prefix_info, pm->prefix_pool)
{
if (is_dhcpv6_pd_prefix (prefix_info) &&
@@ -1325,7 +1304,6 @@ dhcp6_pd_client_enable_disable (u32 sw_if_index,
pool_put (pm->prefix_pool, prefix_info);
}
}
- /* *INDENT-ON* */
vec_free (prefix_list);
@@ -1398,13 +1376,11 @@ done:
* @cliexcmd{dhcp6 pd client GigabitEthernet2/0/0 disable}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp6_pd_client_enable_disable_command, static) = {
.path = "dhcp6 pd client",
.short_help = "dhcp6 pd client <interface> (prefix group <string> | disable)",
.function = dhcp6_pd_client_enable_disable_command_fn,
};
-/* *INDENT-ON* */
#include <vlib/unix/plugin.h>
diff --git a/src/plugins/dhcp/dhcp6_pd_client_dp.c b/src/plugins/dhcp/dhcp6_pd_client_dp.c
index b43e5a4754d..340930c913e 100644
--- a/src/plugins/dhcp/dhcp6_pd_client_dp.c
+++ b/src/plugins/dhcp/dhcp6_pd_client_dp.c
@@ -352,13 +352,11 @@ send_dhcp6_pd_client_message_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (send_dhcp6_pd_client_message_process_node, static) = {
.function = send_dhcp6_pd_client_message_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "send-dhcp6-pd-client-message-process",
};
-/* *INDENT-ON* */
void
dhcp6_pd_send_client_message (vlib_main_t * vm, u32 sw_if_index, u8 stop,
diff --git a/src/plugins/dhcp/dhcp6_pd_doc.md b/src/plugins/dhcp/dhcp6_pd_doc.md
deleted file mode 100644
index 0d0e0865f1b..00000000000
--- a/src/plugins/dhcp/dhcp6_pd_doc.md
+++ /dev/null
@@ -1,86 +0,0 @@
-# DHCPv6 prefix delegation {#dhcp6_pd_doc}
-
-DHCPv6 prefix delegation client implementation is split between Control Plane and Data Plane.
-Data Plane can also be used alone by external application (external Control Plane) using Data Plane Binary API.
-
-Number of different IA\_PDs managed by VPP is currently limited to 1 (and corresponding IAID has value 1).
-Client ID is of type DUID-LLT (Link Layer address plus time) and is created on VPP startup from avaliable interfaces (or chosen at random for debugging purposes).
-Server ID is only visible to Data Plane. Control Plane identifies servers by a 32-bit handle (server\_index) mapped to Server ID by Data Plane.
-
-## Control Plane
-
-DHCPv6 PD clients are configured per interface.
-When configuring a PD client we have to choose a name of a prefix group for that client.
-Each prefix obtained through this client will be flagged as belonging to specified prefix group.
-The prefix groups are used as a filter by prefix consumers.
-
-To enable client on particular interface call Binary API function dhcp6\_pd\_client\_enable\_disable with param 'sw\_if\_index' set to that interface,
-'prefix\_group' set to prefix group name and 'enable' set to true.
-Format of corresponding Debug CLI command is: "dhcp6 pd client <interface> [disable]"
-
-To add/delete IPv6 address potentially using available prefix from specified prefix group call Binary API command ip6\_add\_del\_address\_using\_prefix with parameters:
-> sw\_if\_index - software interface index of interface to add/delete address to/from
-> prefix\_group - name of prefix group, prefix\_group[0] == '\0' means no prefix should be used
-> address - address or suffix to be used with a prefix from selected group
-> prefix\_length - subnet prefix for the address
-> is\_add - 1 for add, 0 for remove
-or Debug CLI command with format: "set ip6 addresses <interface> [prefix group <n>] <address> [del]"
-
-When no prefix is avaliable, no address is physically added, but is added once a prefix becomes avaliable.
-Address is removed when all available prefixes are removed.
-When a used prefix is removed and there is other available prefix, the address that used the prefix is reconfigured using the available prefix.
-
-There are three debug CLI commands (with no parameters) used to show the state of clients, prefixes and addresses:
- show ip6 pd clients
- show ip6 prefixes
- show ip6 addresses
-
-### Example configuration
-
-set int state GigabitEthernet0/8/0 up
-dhcp6 pd client GigabitEthernet0/8/0 prefix group my-dhcp6-pd-group
-set ip6 address GigabitEthernet0/8/0 prefix group my-dhcp6-pd-group ::7/64
-
-## Data Plane
-
-First API message to be called is dhcp6\_clients\_enable\_disable with enable parameter set to 1.
-It enables DHCPv6 client subsystem to receive UDP messages containing DHCPv6 client port (sets the router to DHCPv6 client mode).
-This is to ensure client subsystem gets the messages instead of DHCPv6 proxy subsystem.
-
-There is one common Binary API call for sending DHCPv6 client messages (dhcp6\_pd\_send\_client\_message) with these fields:
-> msg\_type - message type (e.g. Solicit)
-> sw\_if\_index - index of TX interface
-> server\_index - used to dentify DHCPv6 server,
- unique for each DHCPv6 server on the link,
- value obrtained from dhcp6\_pd\_reply\_event API message,
- use ~0 to send message to all DHCPv6 servers
-> param irt - initial retransmission time
-> param mrt - maximum retransmission time
-> param mrc - maximum retransmission count
-> param mrd - maximum retransmission duration for sending the message
-> stop - if non-zero then stop resending the message, otherwise start sending the message
-> T1 - value of T1 in IA\_PD option
-> T2 - value of T2 in IA\_PD option
-> prefixes - list of prefixes in IA\_PD option
-
-The message is automatically resent by Data Plane based on parameters 'irt', 'mrt', 'mrc' and 'mrd'.
-To stop the resending call the same function (same msg\_type is sufficient) with 'stop' set to 1.
-
-To subscribe for notifications of DHCPv6 messages from server call Binary API function
-want\_dhcp6\_pd\_reply\_events with enable\_disable set to 1
-Notification (dhcp6\_pd\_reply\_event) fileds are:
-> sw\_if\_index - index of RX interface
-> server\_index - used to dentify DHCPv6 server, unique for each DHCPv6 server on the link
-> msg\_type - message type
-> T1 - value of T1 in IA\_PD option
-> T2 - value of T2 in IA\_PD option
-> inner\_status\_code - value of status code inside IA\_PD option
-> status\_code - value of status code
-> preference - value of preference option in reply message
-> prefixes - list of prefixes in IA\_PD option
-
-Prefix is a struct with with these fields:
-> prefix - prefix bytes
-> prefix\_length - prefix length
-> valid\_time - valid lifetime
-> preferred\_time - preferred lifetime
diff --git a/src/plugins/dhcp/dhcp6_pd_doc.rst b/src/plugins/dhcp/dhcp6_pd_doc.rst
new file mode 100644
index 00000000000..349abe215e1
--- /dev/null
+++ b/src/plugins/dhcp/dhcp6_pd_doc.rst
@@ -0,0 +1,113 @@
+DHCPv6 prefix delegation
+========================
+
+| DHCPv6 prefix delegation client implementation is split between
+ Control Plane and Data Plane.
+| Data Plane can also be used alone by external application (external
+ Control Plane) using Data Plane Binary API.
+
+| Number of different IA_PDs managed by VPP is currently limited to 1
+ (and corresponding IAID has value 1).
+| Client ID is of type DUID-LLT (Link Layer address plus time) and is
+ created on VPP startup from avaliable interfaces (or chosen at random
+ for debugging purposes).
+| Server ID is only visible to Data Plane. Control Plane identifies
+ servers by a 32-bit handle (server_index) mapped to Server ID by Data
+ Plane.
+
+Control Plane
+-------------
+
+| DHCPv6 PD clients are configured per interface.
+| When configuring a PD client we have to choose a name of a prefix
+ group for that client.
+| Each prefix obtained through this client will be flagged as belonging
+ to specified prefix group.
+| The prefix groups are used as a filter by prefix consumers.
+
+| To enable client on particular interface call Binary API function
+ dhcp6_pd_client_enable_disable with param ‘sw_if_index’ set to that
+ interface, ‘prefix_group’ set to prefix group name and ‘enable’ set to
+ true.
+| Format of corresponding Debug CLI command is: “dhcp6 pd client
+ [disable]”
+
+To add/delete IPv6 address potentially using available prefix from
+specified prefix group call Binary API command
+ip6_add_del_address_using_prefix with parameters:
+> sw_if_index - software interface index of interface to add/delete
+address to/from > prefix_group - name of prefix group, prefix_group[0]
+== ‘\\0’ means no prefix should be used > address - address or suffix to
+be used with a prefix from selected group > prefix_length - subnet
+prefix for the address > is_add - 1 for add, 0 for remove or Debug CLI
+command with format: “set ip6 addresses [prefix group ]
+
+.. raw:: html
+
+ <address>
+
+[del]”
+
+| When no prefix is avaliable, no address is physically added, but is
+ added once a prefix becomes avaliable.
+| Address is removed when all available prefixes are removed.
+| When a used prefix is removed and there is other available prefix, the
+ address that used the prefix is reconfigured using the available
+ prefix.
+
+| There are three debug CLI commands (with no parameters) used to show
+ the state of clients, prefixes and addresses:
+| show ip6 pd clients
+| show ip6 prefixes
+| show ip6 addresses
+
+Example configuration
+~~~~~~~~~~~~~~~~~~~~~
+
+set int state GigabitEthernet0/8/0 up dhcp6 pd client
+GigabitEthernet0/8/0 prefix group my-dhcp6-pd-group set ip6 address
+GigabitEthernet0/8/0 prefix group my-dhcp6-pd-group ::7/64
+
+Data Plane
+----------
+
+| First API message to be called is dhcp6_clients_enable_disable with
+ enable parameter set to 1.
+| It enables DHCPv6 client subsystem to receive UDP messages containing
+ DHCPv6 client port (sets the router to DHCPv6 client mode).
+| This is to ensure client subsystem gets the messages instead of DHCPv6
+ proxy subsystem.
+
+| There is one common Binary API call for sending DHCPv6 client messages
+ (dhcp6_pd_send_client_message) with these fields:
+| > msg_type - message type (e.g. Solicit) > sw_if_index - index of TX
+ interface > server_index - used to dentify DHCPv6 server, unique for
+ each DHCPv6 server on the link, value obrtained from
+ dhcp6_pd_reply_event API message, use ~0 to send message to all DHCPv6
+ servers > param irt - initial retransmission time > param mrt -
+ maximum retransmission time > param mrc - maximum retransmission count
+ > param mrd - maximum retransmission duration for sending the message
+ > stop - if non-zero then stop resending the message, otherwise start
+ sending the message > T1 - value of T1 in IA_PD option > T2 - value of
+ T2 in IA_PD option > prefixes - list of prefixes in IA_PD option
+
+| The message is automatically resent by Data Plane based on parameters
+ ‘irt’, ‘mrt’, ‘mrc’ and ‘mrd’.
+| To stop the resending call the same function (same msg_type is
+ sufficient) with ‘stop’ set to 1.
+
+| To subscribe for notifications of DHCPv6 messages from server call
+ Binary API function
+| want_dhcp6_pd_reply_events with enable_disable set to 1
+| Notification (dhcp6_pd_reply_event) fileds are:
+| > sw_if_index - index of RX interface > server_index - used to dentify
+ DHCPv6 server, unique for each DHCPv6 server on the link > msg_type -
+ message type > T1 - value of T1 in IA_PD option > T2 - value of T2 in
+ IA_PD option > inner_status_code - value of status code inside IA_PD
+ option > status_code - value of status code > preference - value of
+ preference option in reply message > prefixes - list of prefixes in
+ IA_PD option
+
+| Prefix is a struct with with these fields:
+| > prefix - prefix bytes > prefix_length - prefix length > valid_time -
+ valid lifetime > preferred_time - preferred lifetime
diff --git a/src/plugins/dhcp/dhcp6_proxy_node.c b/src/plugins/dhcp/dhcp6_proxy_node.c
index 33fb8a37992..a1d41e83b0f 100644
--- a/src/plugins/dhcp/dhcp6_proxy_node.c
+++ b/src/plugins/dhcp/dhcp6_proxy_node.c
@@ -105,7 +105,6 @@ ip6_interface_first_global_or_site_address (ip6_main_t * im, u32 sw_if_index)
ip_interface_address_t *ia = 0;
ip6_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm, ia, sw_if_index,
1 /* honor unnumbered */,
({
@@ -116,7 +115,6 @@ ip6_interface_first_global_or_site_address (ip6_main_t * im, u32 sw_if_index)
break;
}
}));
- /* *INDENT-ON* */
return result;
}
@@ -136,8 +134,8 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm,
dhcp_proxy_main_t *dpm = &dhcp_proxy_main;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
- u32 pkts_to_server = 0, pkts_to_client = 0, pkts_no_server = 0;
- u32 pkts_no_interface_address = 0, pkts_no_exceeding_max_hop = 0;
+ u32 pkts_to_server = 0, pkts_to_client = 0;
+ u32 pkts_no_interface_address = 0;
u32 pkts_no_src_address = 0;
u32 pkts_wrong_msg_type = 0;
u32 pkts_too_big = 0;
@@ -236,7 +234,6 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm,
{
error0 = DHCPV6_PROXY_ERROR_NO_SERVER;
next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP;
- pkts_no_server++;
goto do_trace;
}
@@ -274,7 +271,6 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm,
{
error0 = DHCPV6_RELAY_PKT_DROP_MAX_HOPS;
next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP;
- pkts_no_exceeding_max_hop++;
goto do_trace;
}
@@ -534,7 +530,6 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcpv6_proxy_to_server_node, static) = {
.function = dhcpv6_proxy_to_server_input,
.name = "dhcpv6-proxy-to-server",
@@ -557,7 +552,6 @@ VLIB_REGISTER_NODE (dhcpv6_proxy_to_server_node, static) = {
.unformat_buffer = unformat_dhcpv6_proxy_header,
#endif
};
-/* *INDENT-ON* */
static uword
dhcpv6_proxy_to_client_input (vlib_main_t * vm,
@@ -583,7 +577,7 @@ dhcpv6_proxy_to_client_input (vlib_main_t * vm,
udp_header_t *u0, *u1 = 0;
dhcpv6_relay_hdr_t *h0;
ip6_header_t *ip1 = 0, *ip0;
- ip6_address_t _ia0, *ia0 = &_ia0;
+ ip6_address_t *ia0 = 0;
ip6_address_t client_address;
ethernet_interface_t *ei0;
ethernet_header_t *mac0;
@@ -828,7 +822,6 @@ dhcpv6_proxy_to_client_input (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcpv6_proxy_to_client_node, static) = {
.function = dhcpv6_proxy_to_client_input,
.name = "dhcpv6-proxy-to-client",
@@ -843,7 +836,6 @@ VLIB_REGISTER_NODE (dhcpv6_proxy_to_client_node, static) = {
.unformat_buffer = unformat_dhcpv6_proxy_header,
#endif
};
-/* *INDENT-ON* */
static clib_error_t *
dhcp6_proxy_init (vlib_main_t * vm)
@@ -927,9 +919,9 @@ dhcp6_proxy_set_server (ip46_address_t * addr,
if (dhcp_proxy_server_add (FIB_PROTOCOL_IP6, addr, src_addr,
rx_fib_index, server_table_id))
{
- mfib_table_entry_path_update (rx_fib_index,
- &all_dhcp_servers,
- MFIB_SOURCE_DHCP, &path_for_us);
+ mfib_table_entry_path_update (rx_fib_index, &all_dhcp_servers,
+ MFIB_SOURCE_DHCP, MFIB_ENTRY_FLAG_NONE,
+ &path_for_us);
/*
* Each interface that is enabled in this table, needs to be added
* as an accepting interface, but this is not easily doable in VPP.
@@ -1018,14 +1010,12 @@ dhcpv6_proxy_set_command_fn (vlib_main_t * vm,
format_unformat_error, input);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcpv6_proxy_set_command, static) = {
.path = "set dhcpv6 proxy",
.short_help = "set dhcpv6 proxy [del] server <ipv6-addr> src-address <ipv6-addr> "
"[server-fib-id <fib-id>] [rx-fib-id <fib-id>] ",
.function = dhcpv6_proxy_set_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_dhcp6_proxy_server (u8 * s, va_list * args)
@@ -1082,13 +1072,11 @@ dhcpv6_proxy_show_command_fn (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcpv6_proxy_show_command, static) = {
.path = "show dhcpv6 proxy",
.short_help = "Display dhcpv6 proxy info",
.function = dhcpv6_proxy_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcpv6_vss_command_fn (vlib_main_t * vm,
@@ -1131,13 +1119,11 @@ dhcpv6_vss_command_fn (vlib_main_t * vm,
}
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcpv6_proxy_vss_command, static) = {
.path = "set dhcpv6 vss",
.short_help = "set dhcpv6 vss table <table-id> [oui <n> vpn-id <n> | vpn-ascii-id <text>]",
.function = dhcpv6_vss_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcpv6_vss_show_command_fn (vlib_main_t * vm,
@@ -1149,13 +1135,11 @@ dhcpv6_vss_show_command_fn (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcpv6_proxy_vss_show_command, static) = {
.path = "show dhcpv6 vss",
.short_help = "show dhcpv6 VSS",
.function = dhcpv6_vss_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcpv6_link_address_show_command_fn (vlib_main_t * vm,
@@ -1197,13 +1181,11 @@ dhcpv6_link_address_show_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcpv6_proxy_address_show_command, static) = {
.path = "show dhcpv6 link-address interface",
.short_help = "show dhcpv6 link-address interface <interface>",
.function = dhcpv6_link_address_show_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dhcp/dhcp_api.c b/src/plugins/dhcp/dhcp_api.c
index 2b8d41a10c1..1458db3527d 100644
--- a/src/plugins/dhcp/dhcp_api.c
+++ b/src/plugins/dhcp/dhcp_api.c
@@ -76,12 +76,10 @@ vl_api_dhcp_plugin_control_ping_t_handler (vl_api_dhcp_plugin_control_ping_t *
vl_api_dhcp_plugin_control_ping_reply_t *rmp;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_DHCP_PLUGIN_CONTROL_PING_REPLY,
({
rmp->vpe_pid = ntohl (getpid ());
}));
- /* *INDENT-ON* */
}
static void
@@ -92,7 +90,7 @@ vl_api_dhcp6_duid_ll_set_t_handler (vl_api_dhcp6_duid_ll_set_t * mp)
int rv = 0;
duid = (dhcpv6_duid_ll_string_t *) mp->duid_ll;
- if (duid->duid_type != htonl (DHCPV6_DUID_LL))
+ if (duid->duid_type != htons (DHCPV6_DUID_LL))
{
rv = VNET_API_ERROR_INVALID_VALUE;
goto reply;
@@ -279,8 +277,8 @@ dhcp_client_lease_encode (vl_api_dhcp_lease_t * lease,
lease->count = vec_len (client->domain_server_address);
for (i = 0; i < lease->count; i++)
- clib_memcpy (&lease->domain_server[i].address,
- (u8 *) & client->domain_server_address[i],
+ clib_memcpy (&lease->domain_server[i].address.un.ip4,
+ (u8 *) &client->domain_server_address[i],
sizeof (ip4_address_t));
clib_memcpy (&lease->host_mac[0], client->client_hardware_address, 6);
@@ -321,7 +319,9 @@ dhcp_compl_event_callback (u32 client_index, const dhcp_client_t * client)
if (!reg)
return;
- mp = vl_msg_api_alloc (sizeof (*mp));
+ mp = vl_msg_api_alloc (sizeof (*mp) +
+ sizeof (vl_api_domain_server_t) *
+ vec_len (client->domain_server_address));
mp->client_index = client_index;
mp->pid = client->pid;
dhcp_client_lease_encode (&mp->lease, client);
@@ -643,6 +643,31 @@ call_dhcp6_reply_event_callbacks (void *data,
return error;
}
+static void
+vl_api_dhcp_client_detect_enable_disable_t_handler (
+ vl_api_dhcp_client_detect_enable_disable_t *mp)
+{
+ vl_api_dhcp_client_detect_enable_disable_reply_t *rmp;
+ int rv = 0;
+ VALIDATE_SW_IF_INDEX (mp);
+
+ if (mp->enable)
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-dhcp-client-detect",
+ clib_net_to_host_u32 (mp->sw_if_index),
+ 1 /* enable */, 0, 0);
+ }
+ else
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-dhcp-client-detect",
+ clib_net_to_host_u32 (mp->sw_if_index),
+ 0 /* disable */, 0, 0);
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_DHCP_CLIENT_DETECT_ENABLE_DISABLE_REPLY);
+}
static uword
dhcp6_reply_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
vlib_frame_t * f)
@@ -699,7 +724,6 @@ dhcp6_reply_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
call_dhcp6_reply_event_callbacks (event, dcpm->functions);
vpe_client_registration_t *reg;
- /* *INDENT-OFF* */
pool_foreach (reg, vpe_api_main.dhcp6_reply_events_registrations)
{
vl_api_registration_t *vl_reg;
@@ -716,7 +740,6 @@ dhcp6_reply_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
vl_api_send_msg (vl_reg, (u8 *) msg);
}
}
- /* *INDENT-ON* */
clib_mem_free (event);
}
@@ -727,13 +750,11 @@ dhcp6_reply_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp6_reply_process_node) = {
.function = dhcp6_reply_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "dhcp6-reply-publisher-process",
};
-/* *INDENT-ON* */
static clib_error_t *
call_dhcp6_pd_reply_event_callbacks (void *data,
@@ -811,7 +832,6 @@ dhcp6_pd_reply_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
call_dhcp6_pd_reply_event_callbacks (event, dpcpm->functions);
vpe_client_registration_t *reg;
- /* *INDENT-OFF* */
pool_foreach (reg, vpe_api_main.dhcp6_pd_reply_events_registrations)
{
vl_api_registration_t *vl_reg;
@@ -828,7 +848,6 @@ dhcp6_pd_reply_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
vl_api_send_msg (vl_reg, (u8 *) msg);
}
}
- /* *INDENT-ON* */
clib_mem_free (event);
}
@@ -839,13 +858,11 @@ dhcp6_pd_reply_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp6_pd_reply_process_node) = {
.function = dhcp6_pd_reply_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "dhcp6-pd-reply-publisher-process",
};
-/* *INDENT-ON* */
/*
* dhcp_api_hookup
@@ -877,12 +894,10 @@ VLIB_API_INIT_FUNCTION (dhcp_api_hookup);
#include <vlib/unix/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Dynamic Host Configuration Protocol (DHCP)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dhcp/dhcp_client_detect.c b/src/plugins/dhcp/dhcp_client_detect.c
index 598bd16cf8d..c02693f2ccf 100644
--- a/src/plugins/dhcp/dhcp_client_detect.c
+++ b/src/plugins/dhcp/dhcp_client_detect.c
@@ -286,7 +286,6 @@ format_dhcp_client_detect_trace (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp_client_detect_node) = {
.name = "ip4-dhcp-client-detect",
.vector_size = sizeof (u32),
@@ -313,7 +312,6 @@ VNET_FEATURE_INIT (ip4_dvr_reinject_feat_node, static) =
.runs_before = VNET_FEATURES ("ip4-not-enabled"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dhcp/dhcp_test.c b/src/plugins/dhcp/dhcp_test.c
index c1894ec01ea..7820f51d442 100644
--- a/src/plugins/dhcp/dhcp_test.c
+++ b/src/plugins/dhcp/dhcp_test.c
@@ -39,13 +39,11 @@ dhcp_test_main_t dhcp_test_main;
#define __plugin_msg_base dhcp_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
static int
api_dhcp_proxy_config (vat_main_t * vam)
@@ -423,6 +421,11 @@ api_dhcp_plugin_get_version (vat_main_t * vam)
{
return -1;
}
+static int
+api_dhcp_client_detect_enable_disable (vat_main_t *vam)
+{
+ return -1;
+}
static void
vl_api_dhcp_plugin_get_version_reply_t_handler
diff --git a/src/plugins/dispatch-trace/CMakeLists.txt b/src/plugins/dispatch-trace/CMakeLists.txt
index 5ba47f9aba3..fd7c62396df 100644
--- a/src/plugins/dispatch-trace/CMakeLists.txt
+++ b/src/plugins/dispatch-trace/CMakeLists.txt
@@ -14,4 +14,7 @@
add_vpp_plugin(dispatch_trace
SOURCES
main.c
+
+ COMPONENT
+ vpp-plugin-devtools
)
diff --git a/src/plugins/dispatch-trace/main.c b/src/plugins/dispatch-trace/main.c
index ce662dc0eea..40a87dde0f3 100644
--- a/src/plugins/dispatch-trace/main.c
+++ b/src/plugins/dispatch-trace/main.c
@@ -480,7 +480,7 @@ dispatch_trace_command_fn (vlib_main_t *vm, unformat_input_t *input,
* pcap dispatch capture on...
* @cliexend
* Example of how to display the status of a tx packet capture in progress:
- * @cliexstart{pcap tx trace status}
+ * @cliexstart{pcap trace tx status}
* max is 35, dispatch trace to file /tmp/vppTest.pcap
* pcap tx capture is on: 20 of 35 pkts...
* @cliexend
diff --git a/src/plugins/dma_intel/CMakeLists.txt b/src/plugins/dma_intel/CMakeLists.txt
new file mode 100644
index 00000000000..b683036f7e3
--- /dev/null
+++ b/src/plugins/dma_intel/CMakeLists.txt
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright(c) 2022 Cisco Systems, Inc.
+
+add_vpp_plugin(dma_intel
+ SOURCES
+ dsa.c
+ format.c
+ main.c
+
+ SUPPORTED_OS_LIST Linux
+)
diff --git a/src/plugins/dma_intel/dsa.c b/src/plugins/dma_intel/dsa.c
new file mode 100644
index 00000000000..473f2efa93e
--- /dev/null
+++ b/src/plugins/dma_intel/dsa.c
@@ -0,0 +1,452 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/pci/pci.h>
+#include <vlib/dma/dma.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/atomics.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <dma_intel/dsa_intel.h>
+
+extern vlib_node_registration_t intel_dsa_node;
+
+VLIB_REGISTER_LOG_CLASS (intel_dsa_log, static) = {
+ .class_name = "intel_dsa",
+ .subclass_name = "dsa",
+};
+
+static void
+intel_dsa_channel_lock (intel_dsa_channel_t *ch)
+{
+ u8 expected = 0;
+ if (ch->n_threads < 2)
+ return;
+
+ /* channel is used by multiple threads so we need to lock it */
+ while (!__atomic_compare_exchange_n (&ch->lock, &expected,
+ /* desired */ 1, /* weak */ 0,
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
+ {
+ while (__atomic_load_n (&ch->lock, __ATOMIC_RELAXED))
+ CLIB_PAUSE ();
+ expected = 0;
+ }
+}
+
+static void
+intel_dsa_channel_unlock (intel_dsa_channel_t *ch)
+{
+ if (ch->n_threads < 2)
+ return;
+
+ __atomic_store_n (&ch->lock, 0, __ATOMIC_RELEASE);
+}
+
+static vlib_dma_batch_t *
+intel_dsa_batch_new (vlib_main_t *vm, struct vlib_dma_config_data *cd)
+{
+ intel_dsa_main_t *idm = &intel_dsa_main;
+ intel_dsa_config_t *idc;
+ intel_dsa_batch_t *b;
+
+ idc = vec_elt_at_index (idm->dsa_config_heap,
+ cd->private_data + vm->thread_index);
+
+ if (vec_len (idc->freelist) > 0)
+ b = vec_pop (idc->freelist);
+ else
+ {
+ clib_spinlock_lock (&idm->lock);
+ b = vlib_physmem_alloc (vm, idc->alloc_size);
+ clib_spinlock_unlock (&idm->lock);
+ /* if no free space in physmem, force quit */
+ ASSERT (b != NULL);
+ *b = idc->batch_template;
+ b->max_transfers = idc->max_transfers;
+
+ u32 def_flags = (INTEL_DSA_OP_MEMMOVE << INTEL_DSA_OP_SHIFT) |
+ INTEL_DSA_FLAG_CACHE_CONTROL;
+ if (b->ch->block_on_fault)
+ def_flags |= INTEL_DSA_FLAG_BLOCK_ON_FAULT;
+ for (int i = 0; i < idc->max_transfers; i++)
+ {
+ intel_dsa_desc_t *dsa_desc = b->descs + i;
+ dsa_desc->op_flags = def_flags;
+ }
+ }
+
+ return &b->batch;
+}
+
+#if defined(__x86_64__) || defined(i386)
+static_always_inline void
+__movdir64b (volatile void *dst, const void *src)
+{
+ asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
+ :
+ : "a"(dst), "d"(src)
+ : "memory");
+}
+#endif
+
+static_always_inline void
+intel_dsa_batch_fallback (vlib_main_t *vm, intel_dsa_batch_t *b,
+ intel_dsa_channel_t *ch)
+{
+ for (u16 i = 0; i < b->batch.n_enq; i++)
+ {
+ intel_dsa_desc_t *desc = &b->descs[i];
+ clib_memcpy_fast (desc->dst, desc->src, desc->size);
+ }
+ b->status = INTEL_DSA_STATUS_CPU_SUCCESS;
+ ch->submitted++;
+ return;
+}
+
+int
+intel_dsa_batch_submit (vlib_main_t *vm, struct vlib_dma_batch *vb)
+{
+ intel_dsa_main_t *idm = &intel_dsa_main;
+ intel_dsa_batch_t *b = (intel_dsa_batch_t *) vb;
+ intel_dsa_channel_t *ch = b->ch;
+ if (PREDICT_FALSE (vb->n_enq == 0))
+ {
+ vec_add1 (idm->dsa_config_heap[b->config_heap_index].freelist, b);
+ return 0;
+ }
+
+ intel_dsa_channel_lock (ch);
+ if (ch->n_enq >= ch->size)
+ {
+ if (!b->sw_fallback)
+ {
+ intel_dsa_channel_unlock (ch);
+ return 0;
+ }
+ /* skip channel limitation if first pending finished */
+ intel_dsa_batch_t *lb = NULL;
+ u32 n_pendings =
+ vec_len (idm->dsa_threads[vm->thread_index].pending_batches);
+ if (n_pendings)
+ lb =
+ idm->dsa_threads[vm->thread_index].pending_batches[n_pendings - 1];
+
+ if (!lb || lb->status != INTEL_DSA_STATUS_SUCCESS)
+ {
+ intel_dsa_batch_fallback (vm, b, ch);
+ goto done;
+ }
+ }
+
+ b->status = INTEL_DSA_STATUS_BUSY;
+ if (PREDICT_FALSE (vb->n_enq == 1))
+ {
+ intel_dsa_desc_t *desc = &b->descs[0];
+ desc->completion = (u64) &b->completion_cl;
+ desc->op_flags |= INTEL_DSA_FLAG_COMPLETION_ADDR_VALID |
+ INTEL_DSA_FLAG_REQUEST_COMPLETION;
+#if defined(__x86_64__) || defined(i386)
+ _mm_sfence (); /* fence before writing desc to device */
+ __movdir64b (ch->portal, (void *) desc);
+#endif
+ }
+ else
+ {
+ intel_dsa_desc_t *batch_desc = &b->descs[b->max_transfers];
+ batch_desc->op_flags = (INTEL_DSA_OP_BATCH << INTEL_DSA_OP_SHIFT) |
+ INTEL_DSA_FLAG_COMPLETION_ADDR_VALID |
+ INTEL_DSA_FLAG_REQUEST_COMPLETION;
+ batch_desc->desc_addr = (void *) (b->descs);
+ batch_desc->size = vb->n_enq;
+ batch_desc->completion = (u64) &b->completion_cl;
+#if defined(__x86_64__) || defined(i386)
+ _mm_sfence (); /* fence before writing desc to device */
+ __movdir64b (ch->portal, (void *) batch_desc);
+#endif
+ }
+
+ ch->submitted++;
+ ch->n_enq++;
+
+done:
+ intel_dsa_channel_unlock (ch);
+ vec_add1 (idm->dsa_threads[vm->thread_index].pending_batches, b);
+ vlib_node_set_interrupt_pending (vm, intel_dsa_node.index);
+ return 1;
+}
+
+static int
+intel_dsa_check_channel (intel_dsa_channel_t *ch, vlib_dma_config_data_t *cd)
+{
+ if (!ch)
+ {
+ dsa_log_error ("no available dsa channel");
+ return 1;
+ }
+ vlib_dma_config_t supported_cfg = {
+ .barrier_before_last = 1,
+ .sw_fallback = 1,
+ };
+
+ if (cd->cfg.features & ~supported_cfg.features)
+ {
+ dsa_log_error ("unsupported feature requested");
+ return 1;
+ }
+
+ if (cd->cfg.max_transfers > ch->max_transfers)
+ {
+ dsa_log_error ("transfer number (%u) too big", cd->cfg.max_transfers);
+ return 1;
+ }
+
+ if (cd->cfg.max_transfer_size > ch->max_transfer_size)
+ {
+ dsa_log_error ("transfer size (%u) too big", cd->cfg.max_transfer_size);
+ return 1;
+ }
+ return 0;
+}
+
+static_always_inline void
+intel_dsa_alloc_dma_batch (vlib_main_t *vm, intel_dsa_config_t *idc)
+{
+ intel_dsa_batch_t *b;
+ b = vlib_physmem_alloc (vm, idc->alloc_size);
+ /* if no free space in physmem, force quit */
+ ASSERT (b != NULL);
+ *b = idc->batch_template;
+ b->max_transfers = idc->max_transfers;
+
+ u32 def_flags = (INTEL_DSA_OP_MEMMOVE << INTEL_DSA_OP_SHIFT) |
+ INTEL_DSA_FLAG_CACHE_CONTROL;
+ if (b->ch->block_on_fault)
+ def_flags |= INTEL_DSA_FLAG_BLOCK_ON_FAULT;
+
+ for (int i = 0; i < idc->max_transfers; i++)
+ {
+ intel_dsa_desc_t *dsa_desc = b->descs + i;
+ dsa_desc->op_flags = def_flags;
+ }
+ vec_add1 (idc->freelist, b);
+}
+
+static int
+intel_dsa_config_add_fn (vlib_main_t *vm, vlib_dma_config_data_t *cd)
+{
+ intel_dsa_main_t *idm = &intel_dsa_main;
+ intel_dsa_config_t *idc;
+ u32 index, n_threads = vlib_get_n_threads ();
+
+ vec_validate (idm->dsa_config_heap_handle_by_config_index, cd->config_index);
+ index = heap_alloc_aligned (
+ idm->dsa_config_heap, n_threads, CLIB_CACHE_LINE_BYTES,
+ idm->dsa_config_heap_handle_by_config_index[cd->config_index]);
+
+ cd->batch_new_fn = intel_dsa_batch_new;
+ cd->private_data = index;
+
+ for (u32 thread = 0; thread < n_threads; thread++)
+ {
+ intel_dsa_batch_t *idb;
+ vlib_dma_batch_t *b;
+ idc = vec_elt_at_index (idm->dsa_config_heap, index + thread);
+
+ /* size of physmem allocation for this config */
+ idc->max_transfers = cd->cfg.max_transfers;
+ idc->alloc_size = sizeof (intel_dsa_batch_t) +
+ sizeof (intel_dsa_desc_t) * (idc->max_transfers + 1);
+ /* fill batch template */
+ idb = &idc->batch_template;
+ idb->ch = idm->dsa_threads[thread].ch;
+ if (intel_dsa_check_channel (idb->ch, cd))
+ return 0;
+
+ dsa_log_debug ("config %d in thread %d using channel %u/%u",
+ cd->config_index, thread, idb->ch->did, idb->ch->qid);
+ idb->config_heap_index = index + thread;
+ idb->config_index = cd->config_index;
+ idb->batch.callback_fn = cd->cfg.callback_fn;
+ idb->features = cd->cfg.features;
+ b = &idb->batch;
+ b->stride = sizeof (intel_dsa_desc_t);
+ b->src_ptr_off = STRUCT_OFFSET_OF (intel_dsa_batch_t, descs[0].src);
+ b->dst_ptr_off = STRUCT_OFFSET_OF (intel_dsa_batch_t, descs[0].dst);
+ b->size_off = STRUCT_OFFSET_OF (intel_dsa_batch_t, descs[0].size);
+ b->submit_fn = intel_dsa_batch_submit;
+ dsa_log_debug (
+ "config %d in thread %d stride %d src/dst/size offset %d-%d-%d",
+ cd->config_index, thread, b->stride, b->src_ptr_off, b->dst_ptr_off,
+ b->size_off);
+
+ /* allocate dma batch in advance */
+ for (u32 index = 0; index < cd->cfg.max_batches; index++)
+ intel_dsa_alloc_dma_batch (vm, idc);
+ }
+
+ dsa_log_info ("config %u added", cd->private_data);
+
+ return 1;
+}
+
+static void
+intel_dsa_config_del_fn (vlib_main_t *vm, vlib_dma_config_data_t *cd)
+{
+ intel_dsa_main_t *idm = &intel_dsa_main;
+ intel_dsa_thread_t *t =
+ vec_elt_at_index (idm->dsa_threads, vm->thread_index);
+ u32 n_pending, n_threads, config_heap_index, n = 0;
+ n_threads = vlib_get_n_threads ();
+
+ if (!t->pending_batches)
+ goto free_heap;
+
+ n_pending = vec_len (t->pending_batches);
+ intel_dsa_batch_t *b;
+
+ /* clean pending list and free list */
+ for (u32 i = 0; i < n_pending; i++)
+ {
+ b = t->pending_batches[i];
+ if (b->config_index == cd->config_index)
+ {
+ vec_add1 (idm->dsa_config_heap[b->config_heap_index].freelist, b);
+ if (b->status == INTEL_DSA_STATUS_SUCCESS ||
+ b->status == INTEL_DSA_STATUS_BUSY)
+ b->ch->n_enq--;
+ }
+ else
+ t->pending_batches[n++] = b;
+ }
+
+ vec_set_len (t->pending_batches, n);
+
+free_heap:
+ for (u32 thread = 0; thread < n_threads; thread++)
+ {
+ config_heap_index = cd->private_data + thread;
+ while (vec_len (idm->dsa_config_heap[config_heap_index].freelist) > 0)
+ {
+ b = vec_pop (idm->dsa_config_heap[config_heap_index].freelist);
+ vlib_physmem_free (vm, b);
+ }
+ }
+
+ heap_dealloc (idm->dsa_config_heap,
+ idm->dsa_config_heap_handle_by_config_index[cd->config_index]);
+
+ dsa_log_debug ("config %u removed", cd->private_data);
+}
+
+static uword
+intel_dsa_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ intel_dsa_main_t *idm = &intel_dsa_main;
+ intel_dsa_thread_t *t =
+ vec_elt_at_index (idm->dsa_threads, vm->thread_index);
+ u32 n_pending = 0, n = 0;
+ u8 glitch = 0, status;
+
+ if (!t->pending_batches)
+ return 0;
+
+ n_pending = vec_len (t->pending_batches);
+
+ for (u32 i = 0; i < n_pending; i++)
+ {
+ intel_dsa_batch_t *b = t->pending_batches[i];
+ intel_dsa_channel_t *ch = b->ch;
+
+ status = b->status;
+ if ((status == INTEL_DSA_STATUS_SUCCESS ||
+ status == INTEL_DSA_STATUS_CPU_SUCCESS) &&
+ !glitch)
+ {
+ /* callback */
+ if (b->batch.callback_fn)
+ b->batch.callback_fn (vm, &b->batch);
+
+ /* restore last descriptor fields */
+ if (b->batch.n_enq == 1)
+ {
+ b->descs[0].completion = 0;
+ b->descs[0].op_flags =
+ (INTEL_DSA_OP_MEMMOVE << INTEL_DSA_OP_SHIFT) |
+ INTEL_DSA_FLAG_CACHE_CONTROL;
+ if (b->ch->block_on_fault)
+ b->descs[0].op_flags |= INTEL_DSA_FLAG_BLOCK_ON_FAULT;
+ }
+ /* add to freelist */
+ vec_add1 (idm->dsa_config_heap[b->config_heap_index].freelist, b);
+
+ intel_dsa_channel_lock (ch);
+ if (status == INTEL_DSA_STATUS_SUCCESS)
+ {
+ ch->n_enq--;
+ ch->completed++;
+ }
+ else
+ ch->sw_fallback++;
+ intel_dsa_channel_unlock (ch);
+
+ b->batch.n_enq = 0;
+ b->status = INTEL_DSA_STATUS_IDLE;
+ }
+ else if (status == INTEL_DSA_STATUS_BUSY)
+ {
+ glitch = 1 & b->barrier_before_last;
+ t->pending_batches[n++] = b;
+ }
+ else if (!glitch)
+ {
+ /* fallback to software if exception happened */
+ intel_dsa_batch_fallback (vm, b, ch);
+ glitch = 1 & b->barrier_before_last;
+ }
+ else
+ {
+ t->pending_batches[n++] = b;
+ }
+ }
+ vec_set_len (t->pending_batches, n);
+
+ if (n)
+ {
+ vlib_node_set_interrupt_pending (vm, intel_dsa_node.index);
+ }
+
+ return n_pending - n;
+}
+
+u8 *
+format_dsa_info (u8 *s, va_list *args)
+{
+ intel_dsa_main_t *idm = &intel_dsa_main;
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ intel_dsa_channel_t *ch;
+ ch = idm->dsa_threads[vm->thread_index].ch;
+ s = format (s, "thread %d dma %u/%u request %-16lld hw %-16lld cpu %-16lld",
+ vm->thread_index, ch->did, ch->qid, ch->submitted, ch->completed,
+ ch->sw_fallback);
+ return s;
+}
+
+VLIB_REGISTER_NODE (intel_dsa_node) = {
+ .function = intel_dsa_node_fn,
+ .name = "intel-dsa",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .vector_size = 4,
+};
+
+vlib_dma_backend_t intel_dsa_backend = {
+ .name = "Intel DSA",
+ .config_add_fn = intel_dsa_config_add_fn,
+ .config_del_fn = intel_dsa_config_del_fn,
+ .info_fn = format_dsa_info,
+};
diff --git a/src/plugins/dma_intel/dsa_intel.h b/src/plugins/dma_intel/dsa_intel.h
new file mode 100644
index 00000000000..a52d4bff323
--- /dev/null
+++ b/src/plugins/dma_intel/dsa_intel.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ */
+
+#ifndef __dma_intel_dsa_intel_h__
+#define __dma_intel_dsa_intel_h__
+
+#include <vlib/vlib.h>
+#include <vlib/dma/dma.h>
+#include <vlib/pci/pci.h>
+#include <vppinfra/format.h>
+typedef struct
+{
+ u32 pasid;
+ u32 op_flags;
+ u64 completion;
+ union
+ {
+ void *src;
+ void *desc_addr;
+ };
+ void *dst;
+ u32 size;
+ u16 intr_handle;
+ /* remaining 26 bytes are reserved */
+ u16 __reserved[13];
+} intel_dsa_desc_t;
+
+STATIC_ASSERT_SIZEOF (intel_dsa_desc_t, 64);
+
+#define DSA_DEV_PATH "/dev/dsa"
+#define SYS_DSA_PATH "/sys/bus/dsa/devices"
+
+typedef enum
+{
+ INTEL_DSA_DEVICE_TYPE_UNKNOWN,
+ INTEL_DSA_DEVICE_TYPE_KERNEL,
+ INTEL_DSA_DEVICE_TYPE_USER,
+ INTEL_DSA_DEVICE_TYPE_MDEV,
+} intel_dsa_wq_type_t;
+
+enum dsa_ops
+{
+ INTEL_DSA_OP_NOP = 0,
+ INTEL_DSA_OP_BATCH,
+ INTEL_DSA_OP_DRAIN,
+ INTEL_DSA_OP_MEMMOVE,
+ INTEL_DSA_OP_FILL
+};
+#define INTEL_DSA_OP_SHIFT 24
+#define INTEL_DSA_FLAG_FENCE (1 << 0)
+#define INTEL_DSA_FLAG_BLOCK_ON_FAULT (1 << 1)
+#define INTEL_DSA_FLAG_COMPLETION_ADDR_VALID (1 << 2)
+#define INTEL_DSA_FLAG_REQUEST_COMPLETION (1 << 3)
+#define INTEL_DSA_FLAG_CACHE_CONTROL (1 << 8)
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ volatile void *portal; /* portal exposed by dedicated work queue */
+ u64 submitted;
+ u64 completed;
+ u64 sw_fallback;
+ u32 max_transfer_size; /* maximum size of each transfer */
+ u16 max_transfers; /* maximum number referenced in a batch */
+ u16 n_threads; /* number of threads using this channel */
+ u16 n_enq; /* number of batches currently enqueued */
+ union
+ {
+ u16 wq_control;
+ struct
+ {
+ u16 type : 2;
+ u16 state : 1;
+ u16 ats_disable : 1;
+ u16 block_on_fault : 1;
+ u16 mode : 1;
+ };
+ };
+ u8 lock; /* spinlock, only used if m_threads > 1 */
+ u8 numa; /* numa node */
+ u8 size; /* size of work queue */
+ u8 did; /* dsa device id */
+ u8 qid; /* work queue id */
+} intel_dsa_channel_t;
+
+typedef struct intel_dsa_batch
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (start);
+ vlib_dma_batch_t batch; /* must be first */
+ intel_dsa_channel_t *ch;
+ u32 config_heap_index;
+ u32 max_transfers;
+ u32 config_index;
+ union
+ {
+ struct
+ {
+ u32 barrier_before_last : 1;
+ u32 sw_fallback : 1;
+ };
+ u32 features;
+ };
+ CLIB_CACHE_LINE_ALIGN_MARK (completion_cl);
+#define INTEL_DSA_STATUS_IDLE 0x0
+#define INTEL_DSA_STATUS_SUCCESS 0x1
+#define INTEL_DSA_STATUS_BUSY 0xa
+#define INTEL_DSA_STATUS_CPU_SUCCESS 0xb
+ u8 status;
+ /* to avoid read-modify-write completion is written as 64-byte
+ * DMA FILL operation */
+ CLIB_CACHE_LINE_ALIGN_MARK (descriptors);
+ intel_dsa_desc_t descs[0];
+} intel_dsa_batch_t;
+
+STATIC_ASSERT_OFFSET_OF (intel_dsa_batch_t, batch, 0);
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ intel_dsa_batch_t batch_template;
+ u32 alloc_size;
+ u32 max_transfers;
+ intel_dsa_batch_t **freelist;
+} intel_dsa_config_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ intel_dsa_channel_t *ch; /* channel used by this thread */
+ intel_dsa_batch_t **pending_batches;
+} intel_dsa_thread_t;
+
+typedef struct
+{
+ intel_dsa_channel_t ***channels;
+ intel_dsa_thread_t *dsa_threads;
+ intel_dsa_config_t *dsa_config_heap;
+ uword *dsa_config_heap_handle_by_config_index;
+ /* spin lock protect pmem */
+ clib_spinlock_t lock;
+} intel_dsa_main_t;
+
+extern intel_dsa_main_t intel_dsa_main;
+extern vlib_dma_backend_t intel_dsa_backend;
+format_function_t format_intel_dsa_addr;
+
+#define dsa_log_debug(f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, intel_dsa_log.class, "%s: " f, __func__, \
+ ##__VA_ARGS__)
+
+#define dsa_log_info(f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_INFO, intel_dsa_log.class, "%s: " f, __func__, \
+ ##__VA_ARGS__)
+
+#define dsa_log_error(f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, intel_dsa_log.class, "%s: " f, __func__, \
+ ##__VA_ARGS__)
+
+#endif
diff --git a/src/plugins/dma_intel/format.c b/src/plugins/dma_intel/format.c
new file mode 100644
index 00000000000..b05a06fb3b1
--- /dev/null
+++ b/src/plugins/dma_intel/format.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ */
+#include <vlib/vlib.h>
+#include <vlib/pci/pci.h>
+#include <vlib/dma/dma.h>
+#include <vnet/plugin/plugin.h>
+#include <dma_intel/dsa_intel.h>
+
+u8 *
+format_intel_dsa_addr (u8 *s, va_list *va)
+{
+ intel_dsa_channel_t *ch = va_arg (*va, intel_dsa_channel_t *);
+ return format (s, "wq%d.%d", ch->did, ch->qid);
+}
diff --git a/src/plugins/dma_intel/main.c b/src/plugins/dma_intel/main.c
new file mode 100644
index 00000000000..0f9ac185b9d
--- /dev/null
+++ b/src/plugins/dma_intel/main.c
@@ -0,0 +1,272 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Cisco Systems, Inc.
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <vlib/vlib.h>
+#include <vlib/pci/pci.h>
+#include <vlib/dma/dma.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <vppinfra/linux/sysfs.h>
+#include <dma_intel/dsa_intel.h>
+
+VLIB_REGISTER_LOG_CLASS (intel_dsa_log, static) = {
+ .class_name = "intel_dsa",
+};
+
+intel_dsa_main_t intel_dsa_main;
+
+void
+intel_dsa_assign_channels (vlib_main_t *vm)
+{
+ intel_dsa_main_t *idm = &intel_dsa_main;
+ intel_dsa_channel_t *ch, **chv = 0;
+ u16 n_threads;
+ int n;
+
+ vec_foreach_index (n, idm->channels)
+ vec_append (chv, idm->channels[n]);
+
+ vec_validate (idm->dsa_threads, vlib_get_n_threads () - 1);
+
+ if (vec_len (chv) == 0)
+ {
+ dsa_log_debug ("No DSA channels found");
+ goto done;
+ }
+
+ if (vec_len (chv) >= vlib_get_n_threads ())
+ n_threads = 1;
+ else
+ n_threads = vlib_get_n_threads () % vec_len (chv) ?
+ vlib_get_n_threads () / vec_len (chv) + 1 :
+ vlib_get_n_threads () / vec_len (chv);
+
+ for (int i = 0; i < vlib_get_n_threads (); i++)
+ {
+ vlib_main_t *tvm = vlib_get_main_by_index (i);
+ ch = *vec_elt_at_index (chv, i / n_threads);
+ idm->dsa_threads[i].ch = ch;
+ ch->n_threads = n_threads;
+ dsa_log_debug ("Assigning channel %u/%u to thread %u (numa %u)", ch->did,
+ ch->qid, i, tvm->numa_node);
+ }
+
+done:
+ /* free */
+ vec_free (chv);
+}
+
+static clib_error_t *
+intel_dsa_map_region (intel_dsa_channel_t *ch)
+{
+ static clib_error_t *error = NULL;
+ /* map one page */
+ uword size = 0x1000;
+ uword offset = 0;
+ char path[256] = { 0 };
+
+ snprintf (path, sizeof (path), "%s/wq%d.%d", DSA_DEV_PATH, ch->did, ch->qid);
+ int fd = open (path, O_RDWR);
+ if (fd < 0)
+ return clib_error_return (0, "failed to open dsa device %s", path);
+
+ ch->portal =
+ clib_mem_vm_map_shared (0, size, fd, offset, "%s", (char *) path);
+ if (ch->portal == CLIB_MEM_VM_MAP_FAILED)
+ {
+ error = clib_error_return (0, "mmap portal %s failed", path);
+ close (fd);
+ return error;
+ }
+
+ return NULL;
+}
+
+static clib_error_t *
+intel_dsa_get_info (intel_dsa_channel_t *ch, clib_error_t **error)
+{
+ clib_error_t *err;
+ u8 *tmpstr;
+ u8 *dev_dir_name = 0, *wq_dir_name = 0;
+
+ u8 *f = 0;
+ dev_dir_name = format (0, "%s/dsa%d", SYS_DSA_PATH, ch->did);
+
+ vec_reset_length (f);
+ f = format (f, "%v/numa_node%c", dev_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ ch->numa = atoi ((char *) tmpstr);
+
+ wq_dir_name = format (0, "%s/%U", SYS_DSA_PATH, format_intel_dsa_addr, ch);
+
+ vec_reset_length (f);
+ f = format (f, "%v/max_transfer_size%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ ch->max_transfer_size = atoi ((char *) tmpstr);
+
+ vec_reset_length (f);
+ f = format (f, "%v/max_batch_size%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ ch->max_transfers = atoi ((char *) tmpstr);
+
+ vec_reset_length (f);
+ f = format (f, "%v/size%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ ch->size = atoi ((char *) tmpstr);
+
+ vec_reset_length (f);
+ f = format (f, "%v/type%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ if (tmpstr)
+ {
+ if (!clib_strcmp ((char *) tmpstr, "enabled"))
+ ch->type = INTEL_DSA_DEVICE_TYPE_UNKNOWN;
+ else if (!clib_strcmp ((char *) tmpstr, "user"))
+ ch->type = INTEL_DSA_DEVICE_TYPE_USER;
+ else if (!clib_strcmp ((char *) tmpstr, "mdev"))
+ ch->type = INTEL_DSA_DEVICE_TYPE_KERNEL;
+ else
+ ch->type = INTEL_DSA_DEVICE_TYPE_UNKNOWN;
+ vec_free (tmpstr);
+ }
+
+ vec_reset_length (f);
+ f = format (f, "%v/state%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ if (tmpstr)
+ {
+ if (!clib_strcmp ((char *) tmpstr, "enabled"))
+ ch->state = 1;
+ else
+ ch->state = 0;
+ vec_free (tmpstr);
+ }
+
+ vec_reset_length (f);
+ f = format (f, "%v/ats_disable%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ ch->ats_disable = atoi ((char *) tmpstr);
+
+ vec_reset_length (f);
+ f = format (f, "%v/block_on_fault%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ ch->block_on_fault = atoi ((char *) tmpstr);
+
+ vec_reset_length (f);
+ f = format (f, "%v/mode%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ if (tmpstr)
+ {
+ if (!clib_strcmp ((char *) tmpstr, "dedicated"))
+ ch->mode = 1;
+ else
+ ch->mode = 0;
+ vec_free (tmpstr);
+ }
+
+ vec_free (f);
+ vec_free (dev_dir_name);
+ vec_free (wq_dir_name);
+ return NULL;
+
+error:
+ vec_free (f);
+ vec_free (dev_dir_name);
+ vec_free (wq_dir_name);
+
+ return err;
+}
+
+clib_error_t *
+intel_dsa_add_channel (vlib_main_t *vm, intel_dsa_channel_t *ch)
+{
+ intel_dsa_main_t *dm = &intel_dsa_main;
+ clib_error_t *err = 0;
+
+ if (intel_dsa_map_region (ch))
+ return clib_error_return (0, "dsa open device failed");
+
+ if (intel_dsa_get_info (ch, &err))
+ return clib_error_return (err, "dsa info not scanned");
+
+ vec_validate (dm->channels, ch->numa);
+ vec_add1 (dm->channels[ch->numa], ch);
+
+ return err;
+}
+
+static clib_error_t *
+dsa_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ clib_error_t *error = 0;
+ intel_dsa_channel_t *ch;
+ u32 did, qid;
+
+ if (intel_dsa_main.lock == 0)
+ clib_spinlock_init (&(intel_dsa_main.lock));
+
+ if ((error = vlib_dma_register_backend (vm, &intel_dsa_backend)))
+ goto done;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "dev wq%d.%d", &did, &qid))
+ {
+ ch = clib_mem_alloc_aligned (sizeof (*ch), CLIB_CACHE_LINE_BYTES);
+ clib_memset (ch, 0, sizeof (*ch));
+ ch->did = did;
+ ch->qid = qid;
+ if (intel_dsa_add_channel (vm, ch))
+ clib_mem_free (ch);
+ }
+ else if (unformat_skip_white_space (input))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+done:
+ return error;
+}
+
+VLIB_CONFIG_FUNCTION (dsa_config, "dsa");
+
+clib_error_t *
+intel_dsa_num_workers_change (vlib_main_t *vm)
+{
+ intel_dsa_assign_channels (vm);
+ return 0;
+}
+
+VLIB_NUM_WORKERS_CHANGE_FN (intel_dsa_num_workers_change);
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Intel DSA Backend",
+};
diff --git a/src/plugins/dns/dns.c b/src/plugins/dns/dns.c
index 0801681b8b3..3cecf942d55 100644
--- a/src/plugins/dns/dns.c
+++ b/src/plugins/dns/dns.c
@@ -16,9 +16,8 @@
#include <vnet/vnet.h>
#include <vnet/udp/udp_local.h>
#include <vnet/plugin/plugin.h>
-#include <vnet/fib/fib_table.h>
#include <dns/dns.h>
-
+#include <vnet/ip/ip_sas.h>
#include <vlibapi/api.h>
#include <vlibmemory/api.h>
#include <vpp/app/version.h>
@@ -31,16 +30,27 @@
#define REPLY_MSG_ID_BASE dm->msg_id_base
#include <vlibapi/api_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
dns_main_t dns_main;
+/* the cache hashtable expects a NULL-terminated C-string but everywhere else
+ * expects a non-NULL terminated vector... The pattern of adding \0 but hiding
+ * it away drives AddressSanitizer crazy, this helper tries to bring some of
+ * its sanity back
+ */
+static_always_inline void
+dns_terminate_c_string (u8 **v)
+{
+ vec_add1 (*v, 0);
+ vec_dec_len (*v, 1);
+ clib_mem_unpoison (vec_end (*v), 1);
+}
+
static int
dns_cache_clear (dns_main_t * dm)
{
@@ -51,13 +61,11 @@ dns_cache_clear (dns_main_t * dm)
dns_cache_lock (dm, 1);
- /* *INDENT-OFF* */
pool_foreach (ep, dm->entries)
{
vec_free (ep->name);
vec_free (ep->pending_requests);
}
- /* *INDENT-ON* */
pool_free (dm->entries);
hash_free (dm->cache_entry_by_name);
@@ -225,66 +233,16 @@ vnet_dns_send_dns4_request (vlib_main_t * vm, dns_main_t * dm,
u32 bi;
vlib_buffer_t *b;
ip4_header_t *ip;
- fib_prefix_t prefix;
- fib_node_index_t fei;
- u32 sw_if_index, fib_index;
udp_header_t *udp;
- ip4_main_t *im4 = &ip4_main;
- ip_lookup_main_t *lm4 = &im4->lookup_main;
- ip_interface_address_t *ia = 0;
- ip4_address_t *src_address;
+ ip4_address_t src_address;
u8 *dns_request;
vlib_frame_t *f;
u32 *to_next;
ASSERT (ep->dns_request);
- /* Find a FIB path to the server */
- clib_memcpy (&prefix.fp_addr.ip4, server, sizeof (*server));
- prefix.fp_proto = FIB_PROTOCOL_IP4;
- prefix.fp_len = 32;
-
- fib_index = fib_table_find (prefix.fp_proto, 0 /* default VRF for now */ );
- if (fib_index == (u32) ~ 0)
- {
- if (0)
- clib_warning ("no fib table");
- return;
- }
-
- fei = fib_table_lookup (fib_index, &prefix);
-
- /* Couldn't find route to destination. Bail out. */
- if (fei == FIB_NODE_INDEX_INVALID)
- {
- if (0)
- clib_warning ("no route to DNS server");
- return;
- }
-
- sw_if_index = fib_entry_get_resolving_interface (fei);
-
- if (sw_if_index == ~0)
- {
- if (0)
- clib_warning
- ("route to %U exists, fei %d, get_resolving_interface returned"
- " ~0", format_ip4_address, &prefix.fp_addr, fei);
- return;
- }
-
- /* *INDENT-OFF* */
- foreach_ip_interface_address(lm4, ia, sw_if_index, 1 /* honor unnumbered */,
- ({
- src_address = ip_interface_address_get_address (lm4, ia);
- goto found_src_address;
- }));
- /* *INDENT-ON* */
-
- clib_warning ("FIB BUG");
- return;
-
-found_src_address:
+ if (!ip4_sas (0 /* default VRF for now */, ~0, server, &src_address))
+ return;
/* Go get a buffer */
if (vlib_buffer_alloc (vm, &bi, 1) != 1)
@@ -311,7 +269,7 @@ found_src_address:
ip->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
ip->ttl = 255;
ip->protocol = IP_PROTOCOL_UDP;
- ip->src_address.as_u32 = src_address->as_u32;
+ ip->src_address.as_u32 = src_address.as_u32;
ip->dst_address.as_u32 = server->as_u32;
ip->checksum = ip4_header_checksum (ip);
@@ -343,14 +301,8 @@ vnet_dns_send_dns6_request (vlib_main_t * vm, dns_main_t * dm,
u32 bi;
vlib_buffer_t *b;
ip6_header_t *ip;
- fib_prefix_t prefix;
- fib_node_index_t fei;
- u32 sw_if_index, fib_index;
udp_header_t *udp;
- ip6_main_t *im6 = &ip6_main;
- ip_lookup_main_t *lm6 = &im6->lookup_main;
- ip_interface_address_t *ia = 0;
- ip6_address_t *src_address;
+ ip6_address_t src_address;
u8 *dns_request;
vlib_frame_t *f;
u32 *to_next;
@@ -358,41 +310,8 @@ vnet_dns_send_dns6_request (vlib_main_t * vm, dns_main_t * dm,
ASSERT (ep->dns_request);
- /* Find a FIB path to the server */
- clib_memcpy (&prefix.fp_addr, server, sizeof (*server));
- prefix.fp_proto = FIB_PROTOCOL_IP6;
- prefix.fp_len = 32;
-
- fib_index = fib_table_find (prefix.fp_proto, 0 /* default VRF for now */ );
- if (fib_index == (u32) ~ 0)
- {
- if (0)
- clib_warning ("no fib table");
- return;
- }
-
- fei = fib_table_lookup (fib_index, &prefix);
-
- /* Couldn't find route to destination. Bail out. */
- if (fei == FIB_NODE_INDEX_INVALID)
- {
- clib_warning ("no route to DNS server");
- }
-
- sw_if_index = fib_entry_get_resolving_interface (fei);
-
- /* *INDENT-OFF* */
- foreach_ip_interface_address(lm6, ia, sw_if_index, 1 /* honor unnumbered */,
- ({
- src_address = ip_interface_address_get_address (lm6, ia);
- goto found_src_address;
- }));
- /* *INDENT-ON* */
-
- clib_warning ("FIB BUG");
- return;
-
-found_src_address:
+ if (!ip6_sas (0 /* default VRF for now */, ~0, server, &src_address))
+ return;
/* Go get a buffer */
if (vlib_buffer_alloc (vm, &bi, 1) != 1)
@@ -421,7 +340,7 @@ found_src_address:
- sizeof (ip6_header_t));
ip->hop_limit = 255;
ip->protocol = IP_PROTOCOL_UDP;
- clib_memcpy (&ip->src_address, src_address, sizeof (ip6_address_t));
+ ip6_address_copy (&ip->src_address, &src_address);
clib_memcpy (&ip->dst_address, server, sizeof (ip6_address_t));
/* UDP header */
@@ -918,8 +837,8 @@ re_resolve:
pool_get (dm->entries, ep);
clib_memset (ep, 0, sizeof (*ep));
- ep->name = format (0, "%s%c", name, 0);
- _vec_len (ep->name) = vec_len (ep->name) - 1;
+ ep->name = format (0, "%s", name);
+ dns_terminate_c_string (&ep->name);
hash_set_mem (dm->cache_entry_by_name, ep->name, ep - dm->entries);
@@ -1077,8 +996,7 @@ found_last_request:
now = vlib_time_now (vm);
cname = vnet_dns_labels_to_name (rr->rdata, reply, &pos2);
/* Save the cname */
- vec_add1 (cname, 0);
- _vec_len (cname) -= 1;
+ dns_terminate_c_string (&cname);
ep = pool_elt_at_index (dm->entries, ep_index);
ep->cname = cname;
ep->flags |= (DNS_CACHE_ENTRY_FLAG_CNAME | DNS_CACHE_ENTRY_FLAG_VALID);
@@ -1096,8 +1014,7 @@ found_last_request:
clib_memset (next_ep, 0, sizeof (*next_ep));
next_ep->name = vec_dup (cname);
- vec_add1 (next_ep->name, 0);
- _vec_len (next_ep->name) -= 1;
+ dns_terminate_c_string (&next_ep->name);
hash_set_mem (dm->cache_entry_by_name, next_ep->name,
next_ep - dm->entries);
@@ -1460,7 +1377,7 @@ vl_api_dns_resolve_name_t_handler (vl_api_dns_resolve_name_t * mp)
dns_main_t *dm = &dns_main;
vl_api_dns_resolve_name_reply_t *rmp;
dns_cache_entry_t *ep = 0;
- dns_pending_request_t _t0, *t0 = &_t0;
+ dns_pending_request_t _t0 = { 0 }, *t0 = &_t0;
int rv;
dns_resolve_name_t rn;
@@ -1484,7 +1401,6 @@ vl_api_dns_resolve_name_t_handler (vl_api_dns_resolve_name_t * mp)
if (ep == 0)
return;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_DNS_RESOLVE_NAME_REPLY, ({
ip_address_copy_addr (rmp->ip4_address, &rn.address);
if (ip_addr_version (&rn.address) == AF_IP4)
@@ -1492,7 +1408,6 @@ vl_api_dns_resolve_name_t_handler (vl_api_dns_resolve_name_t * mp)
else
rmp->ip6_set = 1;
}));
- /* *INDENT-ON* */
}
static void
@@ -1506,7 +1421,7 @@ vl_api_dns_resolve_ip_t_handler (vl_api_dns_resolve_ip_t * mp)
int i, len;
u8 *lookup_name = 0;
u8 digit, nybble;
- dns_pending_request_t _t0, *t0 = &_t0;
+ dns_pending_request_t _t0 = { 0 }, *t0 = &_t0;
if (mp->is_ip6)
{
@@ -1561,13 +1476,11 @@ vl_api_dns_resolve_ip_t_handler (vl_api_dns_resolve_ip_t * mp)
if (ep == 0)
return;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_DNS_RESOLVE_IP_REPLY,
({
rv = vnet_dns_response_to_name (ep->dns_response, rmp, 0 /* ttl-ptr */);
rmp->retval = clib_host_to_net_u32 (rv);
}));
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -2177,7 +2090,6 @@ format_dns_cache (u8 * s, va_list * args)
if (verbose > 0)
{
- /* *INDENT-OFF* */
pool_foreach (ep, dm->entries)
{
if (ep->flags & DNS_CACHE_ENTRY_FLAG_VALID)
@@ -2216,7 +2128,6 @@ format_dns_cache (u8 * s, va_list * args)
}
vec_add1 (s, '\n');
}
- /* *INDENT-ON* */
}
dns_cache_unlock (dm);
@@ -2251,14 +2162,12 @@ show_dns_cache_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_dns_cache_command) =
{
.path = "show dns cache",
.short_help = "show dns cache [verbose [nn]]",
.function = show_dns_cache_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_dns_servers_command_fn (vlib_main_t * vm,
@@ -2288,14 +2197,12 @@ show_dns_servers_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_dns_server_command) =
{
.path = "show dns servers",
.short_help = "show dns servers",
.function = show_dns_servers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -2390,14 +2297,12 @@ dns_cache_add_del_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dns_cache_add_del_command) =
{
.path = "dns cache",
.short_help = "dns cache [add|del|clear] <name> [ip4][ip6]",
.function = dns_cache_add_del_command_fn,
};
-/* *INDENT-ON* */
#define DNS_FORMAT_TEST 1
@@ -2638,14 +2543,12 @@ test_dns_fmt_command_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_dns_fmt_command) =
{
.path = "test dns format",
.short_help = "test dns format",
.function = test_dns_fmt_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_dns_unfmt_command_fn (vlib_main_t * vm,
@@ -2678,14 +2581,12 @@ test_dns_unfmt_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_dns_unfmt_command) =
{
.path = "test dns unformat",
.short_help = "test dns unformat <name> [ip4][ip6]",
.function = test_dns_unfmt_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_dns_expire_command_fn (vlib_main_t * vm,
@@ -2699,10 +2600,7 @@ test_dns_expire_command_fn (vlib_main_t * vm,
dns_cache_entry_t *ep;
if (unformat (input, "%v", &name))
- {
- vec_add1 (name, 0);
- _vec_len (name) -= 1;
- }
+ dns_terminate_c_string (&name);
else
return clib_error_return (0, "no name provided");
@@ -2724,14 +2622,12 @@ test_dns_expire_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_dns_expire_command) =
{
.path = "test dns expire",
.short_help = "test dns expire <name>",
.function = test_dns_expire_command_fn,
};
-/* *INDENT-ON* */
#endif
void
@@ -2749,13 +2645,7 @@ vnet_send_dns4_reply (vlib_main_t * vm, dns_main_t * dm,
vlib_buffer_t * b0)
{
u32 bi = 0;
- fib_prefix_t prefix;
- fib_node_index_t fei;
- u32 sw_if_index, fib_index;
- ip4_main_t *im4 = &ip4_main;
- ip_lookup_main_t *lm4 = &im4->lookup_main;
- ip_interface_address_t *ia = 0;
- ip4_address_t *src_address;
+ ip4_address_t src_address;
ip4_header_t *ip;
udp_header_t *udp;
dns_header_t *dh;
@@ -2839,50 +2729,9 @@ vnet_send_dns4_reply (vlib_main_t * vm, dns_main_t * dm,
vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; /* "local0" */
vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0; /* default VRF for now */
- /* Find a FIB path to the peer we're trying to answer */
- clib_memcpy (&prefix.fp_addr.ip4, pr->dst_address, sizeof (ip4_address_t));
- prefix.fp_proto = FIB_PROTOCOL_IP4;
- prefix.fp_len = 32;
-
- fib_index = fib_table_find (prefix.fp_proto, 0 /* default VRF for now */ );
- if (fib_index == (u32) ~ 0)
- {
- clib_warning ("no fib table");
- return;
- }
-
- fei = fib_table_lookup (fib_index, &prefix);
-
- /* Couldn't find route to destination. Bail out. */
- if (fei == FIB_NODE_INDEX_INVALID)
- {
- clib_warning ("no route to DNS server");
- return;
- }
-
- sw_if_index = fib_entry_get_resolving_interface (fei);
-
- if (sw_if_index == ~0)
- {
- clib_warning (
- "route to %U exists, fei %d, get_resolving_interface returned"
- " ~0",
- format_ip4_address, &prefix.fp_addr, fei);
- return;
- }
-
- /* *INDENT-OFF* */
- foreach_ip_interface_address(lm4, ia, sw_if_index, 1 /* honor unnumbered */,
- ({
- src_address = ip_interface_address_get_address (lm4, ia);
- goto found_src_address;
- }));
- /* *INDENT-ON* */
-
- clib_warning ("FIB BUG");
- return;
-
-found_src_address:
+ if (!ip4_sas (0 /* default VRF for now */, ~0,
+ (const ip4_address_t *) &pr->dst_address, &src_address))
+ return;
ip = vlib_buffer_get_current (b0);
udp = (udp_header_t *) (ip + 1);
@@ -2975,7 +2824,7 @@ found_src_address:
ip->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
ip->ttl = 255;
ip->protocol = IP_PROTOCOL_UDP;
- ip->src_address.as_u32 = src_address->as_u32;
+ ip->src_address.as_u32 = src_address.as_u32;
clib_memcpy (ip->dst_address.as_u8, pr->dst_address,
sizeof (ip4_address_t));
ip->checksum = ip4_header_checksum (ip);
@@ -3020,7 +2869,6 @@ dns_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (dns_init) = {
.init_order = VLIB_INITS ("flow_classify_init", "dns_init"),
};
@@ -3030,7 +2878,6 @@ VLIB_PLUGIN_REGISTER () =
.version = VPP_BUILD_VER,
.description = "Simple DNS name resolver",
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/dns/dns_packet.h b/src/plugins/dns/dns_packet.h
index da5ddfa64fe..13daf7849de 100644
--- a/src/plugins/dns/dns_packet.h
+++ b/src/plugins/dns/dns_packet.h
@@ -20,7 +20,6 @@
* DNS packet header format
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u16 id; /**< transaction ID */
u16 flags; /**< flags */
@@ -29,7 +28,6 @@ typedef CLIB_PACKED (struct {
u16 nscount; /**< number of name servers */
u16 arcount; /**< number of additional records */
}) dns_header_t;
-/* *INDENT-ON* */
#define DNS_RCODE_MASK (0xf)
#define DNS_RCODE_NO_ERROR 0
@@ -99,17 +97,14 @@ typedef CLIB_PACKED (struct {
/**
* DNS "question" fixed header.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u16 type; /**< record type requested */
u16 class; /**< class, 1 = internet */
}) dns_query_t;
-/* *INDENT-ON* */
/**
* DNS RR fixed header.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u16 type; /**< record type */
u16 class; /**< class, 1 = internet */
@@ -118,7 +113,6 @@ typedef CLIB_PACKED (struct {
/**< length of r */
u8 rdata[0];
}) dns_rr_t;
-/* *INDENT-ON* */
/*
* There are quite a number of DNS record types
diff --git a/src/plugins/dns/reply_node.c b/src/plugins/dns/reply_node.c
index b15943a6448..cc9f6065474 100644
--- a/src/plugins/dns/reply_node.c
+++ b/src/plugins/dns/reply_node.c
@@ -200,7 +200,6 @@ dns46_reply_node_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dns46_reply_node) =
{
.function = dns46_reply_node_fn,
@@ -216,7 +215,6 @@ VLIB_REGISTER_NODE (dns46_reply_node) =
[DNS46_REPLY_NEXT_PUNT] = "error-punt",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dns/request_node.c b/src/plugins/dns/request_node.c
index 72a76d12e4f..13ebc4c2c13 100644
--- a/src/plugins/dns/request_node.c
+++ b/src/plugins/dns/request_node.c
@@ -208,13 +208,7 @@ dns46_request_inline (vlib_main_t * vm,
label0 = (u8 *) (d0 + 1);
- /*
- * vnet_dns_labels_to_name produces a non NULL terminated vector
- * vnet_dns_resolve_name expects a C-string.
- */
name0 = vnet_dns_labels_to_name (label0, (u8 *) d0, (u8 **) & q0);
- vec_add1 (name0, 0);
- _vec_len (name0) -= 1;
t0->request_type = DNS_PEER_PENDING_NAME_TO_IP;
@@ -242,6 +236,11 @@ dns46_request_inline (vlib_main_t * vm,
clib_memcpy_fast (t0->dst_address, ip40->src_address.as_u8,
sizeof (ip4_address_t));
+ /*
+ * vnet_dns_labels_to_name produces a non NULL terminated vector
+ * vnet_dns_resolve_name expects a C-string.
+ */
+ vec_add1 (name0, 0);
vnet_dns_resolve_name (vm, dm, name0, t0, &ep0);
if (ep0)
@@ -289,7 +288,6 @@ dns4_request_node_fn (vlib_main_t * vm,
return dns46_request_inline (vm, node, frame, 0 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dns4_request_node) =
{
.function = dns4_request_node_fn,
@@ -306,7 +304,6 @@ VLIB_REGISTER_NODE (dns4_request_node) =
[DNS46_REQUEST_NEXT_IP_LOOKUP] = "ip4-lookup",
},
};
-/* *INDENT-ON* */
static uword
dns6_request_node_fn (vlib_main_t * vm,
@@ -316,7 +313,6 @@ dns6_request_node_fn (vlib_main_t * vm,
return dns46_request_inline (vm, node, frame, 1 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dns6_request_node) =
{
.function = dns6_request_node_fn,
@@ -333,7 +329,6 @@ VLIB_REGISTER_NODE (dns6_request_node) =
[DNS46_REQUEST_NEXT_IP_LOOKUP] = "ip6-lookup",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dpdk/CMakeLists.txt b/src/plugins/dpdk/CMakeLists.txt
index 5de75e76289..48c56f35282 100644
--- a/src/plugins/dpdk/CMakeLists.txt
+++ b/src/plugins/dpdk/CMakeLists.txt
@@ -90,8 +90,10 @@ else()
##############################################################################
# libnuma
##############################################################################
- vpp_plugin_find_library(dpdk NUMA_LIB "numa")
- list(APPEND DPDK_LINK_LIBRARIES ${NUMA_LIB})
+ if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
+ vpp_plugin_find_library(dpdk NUMA_LIB "numa")
+ list(APPEND DPDK_LINK_LIBRARIES ${NUMA_LIB})
+ endif()
##############################################################################
# Mellanox libraries
@@ -103,12 +105,10 @@ else()
list(APPEND DPDK_LINK_LIBRARIES "${MNL_LIB}")
else()
message(WARNING "EXPERIMENTAL: DPDK plugin without dlopen mode")
- vpp_plugin_find_library(dpdk IBVERBS_LIB "libibverbs.a")
- vpp_plugin_find_library(dpdk MLX5_LIB "libmlx5.a")
- vpp_plugin_find_library(dpdk MLX4_LIB "libmlx4.a")
- vpp_plugin_find_library(dpdk CCAN_LIB "libccan.a")
- vpp_plugin_find_library(dpdk RDMA_UTIL_LIB "rdma_util")
- string_append(DPDK_LINK_FLAGS "-Wl,--whole-archive,${IBVERBS_LIB},${MLX5_LIB},${MLX4_LIB},${CCAN_LIB},${RDMA_UTIL_LIB},--no-whole-archive")
+ vpp_plugin_find_library(dpdk IBVERBS_LIB "libibverbs.a")
+ vpp_plugin_find_library(dpdk MLX5_LIB "libmlx5.a")
+ vpp_plugin_find_library(dpdk MLX4_LIB "libmlx4.a")
+ string_append(DPDK_LINK_FLAGS "-Wl,--whole-archive,${IBVERBS_LIB},${MLX5_LIB},${MLX4_LIB} -Wl,--no-whole-archive,--exclude-libs,ALL")
endif()
endif()
endif()
@@ -131,10 +131,10 @@ add_vpp_plugin(dpdk
SOURCES
buffer.c
main.c
- thread.c
device/cli.c
device/common.c
device/device.c
+ device/driver.c
device/flow.c
device/format.c
device/init.c
@@ -158,7 +158,7 @@ add_vpp_plugin(dpdk
${DPDK_LINK_LIBRARIES}
LINK_LIBRARIES
- ${OPENSSL_LIBRARIES}
+ ${OPENSSL_CRYPTO_LIBRARIES}
COMPONENT
vpp-plugin-dpdk
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c
index 97184519428..f3137a996d6 100644
--- a/src/plugins/dpdk/buffer.c
+++ b/src/plugins/dpdk/buffer.c
@@ -19,6 +19,7 @@
#include <rte_config.h>
#include <rte_mbuf.h>
#include <rte_ethdev.h>
+#include <rte_cryptodev.h>
#include <rte_vfio.h>
#include <rte_version.h>
@@ -115,6 +116,9 @@ dpdk_buffer_pool_init (vlib_main_t * vm, vlib_buffer_pool_t * bp)
mp->populated_size++;
nmp->populated_size++;
}
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 3, 0, 0)
+ mp->flags &= ~RTE_MEMPOOL_F_NON_IO;
+#endif
/* call the object initializers */
rte_mempool_obj_iter (mp, rte_pktmbuf_init, 0);
@@ -131,11 +135,11 @@ dpdk_buffer_pool_init (vlib_main_t * vm, vlib_buffer_pool_t * bp)
{
vlib_buffer_t *b;
b = vlib_buffer_ptr_from_index (buffer_mem_start, bp->buffers[i], 0);
- vlib_buffer_copy_template (b, &bp->buffer_template);
+ b->template = bp->buffer_template;
}
/* map DMA pages if at least one physical device exists */
- if (rte_eth_dev_count_avail ())
+ if (rte_eth_dev_count_avail () || rte_cryptodev_count ())
{
uword i;
size_t page_sz;
@@ -193,7 +197,7 @@ dpdk_ops_vpp_free (struct rte_mempool *mp)
#endif
static_always_inline void
-dpdk_ops_vpp_enqueue_one (vlib_buffer_t * bt, void *obj)
+dpdk_ops_vpp_enqueue_one (vlib_buffer_template_t *bt, void *obj)
{
/* Only non-replicated packets (b->ref_count == 1) expected */
@@ -201,7 +205,7 @@ dpdk_ops_vpp_enqueue_one (vlib_buffer_t * bt, void *obj)
vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
ASSERT (b->ref_count == 1);
ASSERT (b->buffer_pool_index == bt->buffer_pool_index);
- vlib_buffer_copy_template (b, bt);
+ b->template = *bt;
}
int
@@ -210,14 +214,14 @@ CLIB_MULTIARCH_FN (dpdk_ops_vpp_enqueue) (struct rte_mempool * mp,
{
const int batch_size = 32;
vlib_main_t *vm = vlib_get_main ();
- vlib_buffer_t bt;
+ vlib_buffer_template_t bt;
u8 buffer_pool_index = mp->pool_id;
vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
u32 bufs[batch_size];
u32 n_left = n;
void *const *obj = obj_table;
- vlib_buffer_copy_template (&bt, &bp->buffer_template);
+ bt = bp->buffer_template;
while (n_left >= 4)
{
@@ -259,9 +263,9 @@ CLIB_MULTIARCH_FN (dpdk_ops_vpp_enqueue) (struct rte_mempool * mp,
CLIB_MARCH_FN_REGISTRATION (dpdk_ops_vpp_enqueue);
static_always_inline void
-dpdk_ops_vpp_enqueue_no_cache_one (vlib_main_t * vm, struct rte_mempool *old,
+dpdk_ops_vpp_enqueue_no_cache_one (vlib_main_t *vm, struct rte_mempool *old,
struct rte_mempool *new, void *obj,
- vlib_buffer_t * bt)
+ vlib_buffer_template_t *bt)
{
struct rte_mbuf *mb = obj;
vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
@@ -269,7 +273,7 @@ dpdk_ops_vpp_enqueue_no_cache_one (vlib_main_t * vm, struct rte_mempool *old,
if (clib_atomic_sub_fetch (&b->ref_count, 1) == 0)
{
u32 bi = vlib_get_buffer_index (vm, b);
- vlib_buffer_copy_template (b, bt);
+ b->template = *bt;
vlib_buffer_pool_put (vm, bt->buffer_pool_index, &bi, 1);
return;
}
@@ -281,12 +285,12 @@ CLIB_MULTIARCH_FN (dpdk_ops_vpp_enqueue_no_cache) (struct rte_mempool * cmp,
unsigned n)
{
vlib_main_t *vm = vlib_get_main ();
- vlib_buffer_t bt;
+ vlib_buffer_template_t bt;
struct rte_mempool *mp;
mp = dpdk_mempool_by_buffer_pool_index[cmp->pool_id];
u8 buffer_pool_index = cmp->pool_id;
vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
- vlib_buffer_copy_template (&bt, &bp->buffer_template);
+ bt = bp->buffer_template;
while (n >= 4)
{
@@ -456,11 +460,9 @@ dpdk_buffer_pools_create (vlib_main_t * vm)
ops.dequeue = dpdk_ops_vpp_dequeue_no_cache;
rte_mempool_register_ops (&ops);
- /* *INDENT-OFF* */
vec_foreach (bp, vm->buffer_main->buffer_pools)
if (bp->start && (err = dpdk_buffer_pool_init (vm, bp)))
return err;
- /* *INDENT-ON* */
return 0;
}
diff --git a/src/plugins/dpdk/cryptodev/cryptodev.c b/src/plugins/dpdk/cryptodev/cryptodev.c
index d52fa407ec5..43c2c879aab 100644
--- a/src/plugins/dpdk/cryptodev/cryptodev.c
+++ b/src/plugins/dpdk/cryptodev/cryptodev.c
@@ -29,7 +29,6 @@
#include <rte_cryptodev.h>
#include <rte_crypto_sym.h>
#include <rte_crypto.h>
-#include <rte_cryptodev_pmd.h>
#include <rte_config.h>
#include "cryptodev.h"
@@ -52,12 +51,19 @@ prepare_aead_xform (struct rte_crypto_sym_xform *xform,
xform->type = RTE_CRYPTO_SYM_XFORM_AEAD;
xform->next = 0;
- if (key->alg != VNET_CRYPTO_ALG_AES_128_GCM &&
- key->alg != VNET_CRYPTO_ALG_AES_192_GCM &&
- key->alg != VNET_CRYPTO_ALG_AES_256_GCM)
+ if (key->alg == VNET_CRYPTO_ALG_AES_128_GCM ||
+ key->alg == VNET_CRYPTO_ALG_AES_192_GCM ||
+ key->alg == VNET_CRYPTO_ALG_AES_256_GCM)
+ {
+ aead_xform->algo = RTE_CRYPTO_AEAD_AES_GCM;
+ }
+ else if (key->alg == VNET_CRYPTO_ALG_CHACHA20_POLY1305)
+ {
+ aead_xform->algo = RTE_CRYPTO_AEAD_CHACHA20_POLY1305;
+ }
+ else
return -1;
- aead_xform->algo = RTE_CRYPTO_AEAD_AES_GCM;
aead_xform->op = (op_type == CRYPTODEV_OP_TYPE_ENCRYPT) ?
RTE_CRYPTO_AEAD_OP_ENCRYPT : RTE_CRYPTO_AEAD_OP_DECRYPT;
aead_xform->aad_length = aad_len;
@@ -135,7 +141,7 @@ prepare_linked_xform (struct rte_crypto_sym_xform *xforms,
}
static_always_inline void
-cryptodev_session_del (struct rte_cryptodev_sym_session *sess)
+cryptodev_session_del (cryptodev_session_t *sess)
{
u32 n_devs, i;
@@ -145,9 +151,14 @@ cryptodev_session_del (struct rte_cryptodev_sym_session *sess)
n_devs = rte_cryptodev_count ();
for (i = 0; i < n_devs; i++)
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (rte_cryptodev_sym_session_free (i, sess) == 0)
+ break;
+#else
rte_cryptodev_sym_session_clear (i, sess);
rte_cryptodev_sym_session_free (sess);
+#endif
}
static int
@@ -310,7 +321,7 @@ cryptodev_sess_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop,
if (cryptodev_check_supported_vnet_alg (key) == 0)
return;
- vec_validate (ckey->keys, idx);
+ vec_validate (ckey->keys, vec_len (cmt->per_numa_data) - 1);
vec_foreach_index (i, ckey->keys)
vec_validate (ckey->keys[i], CRYPTODEV_N_OP_TYPES - 1);
}
@@ -322,6 +333,59 @@ cryptodev_key_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop,
cryptodev_sess_handler (vm, kop, idx, 8);
}
+clib_error_t *
+allocate_session_pools (u32 numa_node,
+ cryptodev_session_pool_t *sess_pools_elt, u32 len)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ u8 *name;
+ clib_error_t *error = NULL;
+
+ name = format (0, "vcrypto_sess_pool_%u_%04x%c", numa_node, len, 0);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ sess_pools_elt->sess_pool = rte_cryptodev_sym_session_pool_create (
+ (char *) name, CRYPTODEV_NB_SESSION, cmt->sess_sz, 0, 0, numa_node);
+#else
+ sess_pools_elt->sess_pool = rte_cryptodev_sym_session_pool_create (
+ (char *) name, CRYPTODEV_NB_SESSION, 0, 0, 0, numa_node);
+#endif
+
+ if (!sess_pools_elt->sess_pool)
+ {
+ error = clib_error_return (0, "Not enough memory for mp %s", name);
+ goto clear_mempools;
+ }
+ vec_free (name);
+
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
+ name = format (0, "crypto_sess_pool_%u_%04x%c", numa_node, len, 0);
+ sess_pools_elt->sess_priv_pool = rte_mempool_create (
+ (char *) name, CRYPTODEV_NB_SESSION * (cmt->drivers_cnt), cmt->sess_sz, 0,
+ 0, NULL, NULL, NULL, NULL, numa_node, 0);
+
+ if (!sess_pools_elt->sess_priv_pool)
+ {
+ error = clib_error_return (0, "Not enough memory for mp %s", name);
+ goto clear_mempools;
+ }
+ vec_free (name);
+#endif
+
+clear_mempools:
+ if (error)
+ {
+ vec_free (name);
+ if (sess_pools_elt->sess_pool)
+ rte_mempool_free (sess_pools_elt->sess_pool);
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
+ if (sess_pools_elt->sess_priv_pool)
+ rte_mempool_free (sess_pools_elt->sess_priv_pool);
+#endif
+ return error;
+ }
+ return 0;
+}
+
int
cryptodev_session_create (vlib_main_t *vm, vnet_crypto_key_index_t idx,
u32 aad_len)
@@ -330,52 +394,106 @@ cryptodev_session_create (vlib_main_t *vm, vnet_crypto_key_index_t idx,
cryptodev_numa_data_t *numa_data;
cryptodev_inst_t *dev_inst;
vnet_crypto_key_t *key = vnet_crypto_get_key (idx);
- struct rte_mempool *sess_pool, *sess_priv_pool;
+ struct rte_mempool *sess_pool;
+ cryptodev_session_pool_t *sess_pools_elt;
cryptodev_key_t *ckey = vec_elt_at_index (cmt->keys, idx);
struct rte_crypto_sym_xform xforms_enc[2] = { { 0 } };
struct rte_crypto_sym_xform xforms_dec[2] = { { 0 } };
- struct rte_cryptodev_sym_session *sessions[CRYPTODEV_N_OP_TYPES] = { 0 };
+ cryptodev_session_t *sessions[CRYPTODEV_N_OP_TYPES] = { 0 };
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
+ struct rte_mempool *sess_priv_pool;
+ struct rte_cryptodev_info dev_info;
+#endif
u32 numa_node = vm->numa_node;
- int ret;
+ clib_error_t *error;
+ int ret = 0;
+ u8 found = 0;
numa_data = vec_elt_at_index (cmt->per_numa_data, numa_node);
- sess_pool = numa_data->sess_pool;
- sess_priv_pool = numa_data->sess_priv_pool;
- sessions[CRYPTODEV_OP_TYPE_ENCRYPT] =
- rte_cryptodev_sym_session_create (sess_pool);
- if (!sessions[CRYPTODEV_OP_TYPE_ENCRYPT])
+ clib_spinlock_lock (&cmt->tlock);
+ vec_foreach (sess_pools_elt, numa_data->sess_pools)
{
- ret = -1;
- goto clear_key;
+ if (sess_pools_elt->sess_pool == NULL)
+ {
+ error = allocate_session_pools (numa_node, sess_pools_elt,
+ vec_len (numa_data->sess_pools) - 1);
+ if (error)
+ {
+ ret = -1;
+ goto clear_key;
+ }
+ }
+ if (rte_mempool_avail_count (sess_pools_elt->sess_pool) >= 2)
+ {
+ found = 1;
+ break;
+ }
}
- sessions[CRYPTODEV_OP_TYPE_DECRYPT] =
- rte_cryptodev_sym_session_create (sess_pool);
- if (!sessions[CRYPTODEV_OP_TYPE_DECRYPT])
+ if (found == 0)
{
- ret = -1;
- goto clear_key;
+ vec_add2 (numa_data->sess_pools, sess_pools_elt, 1);
+ error = allocate_session_pools (numa_node, sess_pools_elt,
+ vec_len (numa_data->sess_pools) - 1);
+ if (error)
+ {
+ ret = -1;
+ goto clear_key;
+ }
}
+ sess_pool = sess_pools_elt->sess_pool;
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
+ sess_priv_pool = sess_pools_elt->sess_priv_pool;
+
+ sessions[CRYPTODEV_OP_TYPE_ENCRYPT] =
+ rte_cryptodev_sym_session_create (sess_pool);
+
+ sessions[CRYPTODEV_OP_TYPE_DECRYPT] =
+ rte_cryptodev_sym_session_create (sess_pool);
+#endif
+
if (key->type == VNET_CRYPTO_KEY_TYPE_LINK)
ret = prepare_linked_xform (xforms_enc, CRYPTODEV_OP_TYPE_ENCRYPT, key);
else
ret =
prepare_aead_xform (xforms_enc, CRYPTODEV_OP_TYPE_ENCRYPT, key, aad_len);
if (ret)
- return 0;
+ {
+ ret = -1;
+ goto clear_key;
+ }
if (key->type == VNET_CRYPTO_KEY_TYPE_LINK)
prepare_linked_xform (xforms_dec, CRYPTODEV_OP_TYPE_DECRYPT, key);
else
prepare_aead_xform (xforms_dec, CRYPTODEV_OP_TYPE_DECRYPT, key, aad_len);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ dev_inst = vec_elt_at_index (cmt->cryptodev_inst, 0);
+ u32 dev_id = dev_inst->dev_id;
+ sessions[CRYPTODEV_OP_TYPE_ENCRYPT] =
+ rte_cryptodev_sym_session_create (dev_id, xforms_enc, sess_pool);
+ sessions[CRYPTODEV_OP_TYPE_DECRYPT] =
+ rte_cryptodev_sym_session_create (dev_id, xforms_dec, sess_pool);
+ if (!sessions[CRYPTODEV_OP_TYPE_ENCRYPT] ||
+ !sessions[CRYPTODEV_OP_TYPE_DECRYPT])
+ {
+ ret = -1;
+ goto clear_key;
+ }
+
+ rte_cryptodev_sym_session_opaque_data_set (
+ sessions[CRYPTODEV_OP_TYPE_ENCRYPT], aad_len);
+ rte_cryptodev_sym_session_opaque_data_set (
+ sessions[CRYPTODEV_OP_TYPE_DECRYPT], aad_len);
+#else
vec_foreach (dev_inst, cmt->cryptodev_inst)
{
u32 dev_id = dev_inst->dev_id;
- struct rte_cryptodev *cdev = rte_cryptodev_pmd_get_dev (dev_id);
- u32 driver_id = cdev->driver_id;
+ rte_cryptodev_info_get (dev_id, &dev_info);
+ u32 driver_id = dev_info.driver_id;
/* if the session is already configured for the driver type, avoid
configuring it again to increase the session data's refcnt */
@@ -390,11 +508,12 @@ cryptodev_session_create (vlib_main_t *vm, vnet_crypto_key_index_t idx,
dev_id, sessions[CRYPTODEV_OP_TYPE_DECRYPT], xforms_dec,
sess_priv_pool);
if (ret < 0)
- return ret;
+ goto clear_key;
}
sessions[CRYPTODEV_OP_TYPE_ENCRYPT]->opaque_data = aad_len;
sessions[CRYPTODEV_OP_TYPE_DECRYPT]->opaque_data = aad_len;
+#endif
CLIB_MEMORY_STORE_BARRIER ();
ckey->keys[numa_node][CRYPTODEV_OP_TYPE_ENCRYPT] =
@@ -408,6 +527,7 @@ clear_key:
cryptodev_session_del (sessions[CRYPTODEV_OP_TYPE_ENCRYPT]);
cryptodev_session_del (sessions[CRYPTODEV_OP_TYPE_DECRYPT]);
}
+ clib_spinlock_unlock (&cmt->tlock);
return ret;
}
@@ -459,14 +579,14 @@ cryptodev_assign_resource (cryptodev_engine_thread_t * cet,
return -EBUSY;
vec_foreach_index (idx, cmt->cryptodev_inst)
- {
- cinst = cmt->cryptodev_inst + idx;
- if (cinst->dev_id == cet->cryptodev_id &&
- cinst->q_id == cet->cryptodev_q)
- break;
- }
+ {
+ cinst = cmt->cryptodev_inst + idx;
+ if (cinst->dev_id == cet->cryptodev_id &&
+ cinst->q_id == cet->cryptodev_q)
+ break;
+ }
/* invalid existing worker resource assignment */
- if (idx == vec_len (cmt->cryptodev_inst))
+ if (idx >= vec_len (cmt->cryptodev_inst))
return -EINVAL;
clib_spinlock_lock (&cmt->tlock);
clib_bitmap_set_no_check (cmt->active_cdev_inst_mask, idx, 0);
@@ -547,6 +667,90 @@ VLIB_CLI_COMMAND (show_cryptodev_assignment, static) = {
};
static clib_error_t *
+cryptodev_show_cache_rings_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ u32 thread_index = 0;
+ u16 i;
+ vec_foreach_index (thread_index, cmt->per_thread_data)
+ {
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 head = ring->head;
+ u16 tail = ring->tail;
+ u16 n_cached = (CRYPTODEV_CACHE_QUEUE_SIZE - tail + head) &
+ CRYPTODEV_CACHE_QUEUE_MASK;
+
+ u16 enq_head = ring->enq_head;
+ u16 deq_tail = ring->deq_tail;
+ u16 n_frames_inflight =
+ (enq_head == deq_tail) ?
+ 0 :
+ ((CRYPTODEV_CACHE_QUEUE_SIZE + enq_head - deq_tail) &
+ CRYPTODEV_CACHE_QUEUE_MASK);
+ /* even if some elements of dequeued frame are still pending for deq
+ * we consider the frame as processed */
+ u16 n_frames_processed =
+ ((tail == deq_tail) && (ring->frames[deq_tail].f == 0)) ?
+ 0 :
+ ((CRYPTODEV_CACHE_QUEUE_SIZE - tail + deq_tail) &
+ CRYPTODEV_CACHE_QUEUE_MASK) +
+ 1;
+ /* even if some elements of enqueued frame are still pending for enq
+ * we consider the frame as enqueued */
+ u16 n_frames_pending =
+ (head == enq_head) ? 0 :
+ ((CRYPTODEV_CACHE_QUEUE_SIZE - enq_head + head) &
+ CRYPTODEV_CACHE_QUEUE_MASK) -
+ 1;
+
+ u16 elts_to_enq =
+ (ring->frames[enq_head].n_elts - ring->frames[enq_head].enq_elts_head);
+ u16 elts_to_deq =
+ (ring->frames[deq_tail].n_elts - ring->frames[deq_tail].deq_elts_tail);
+
+ u32 elts_total = 0;
+
+ for (i = 0; i < CRYPTODEV_CACHE_QUEUE_SIZE; i++)
+ elts_total += ring->frames[i].n_elts;
+
+ if (vlib_num_workers () > 0 && thread_index == 0)
+ continue;
+
+ vlib_cli_output (vm, "\n\n");
+ vlib_cli_output (vm, "Frames cached in the ring: %u", n_cached);
+ vlib_cli_output (vm, "Frames cached but not processed: %u",
+ n_frames_pending);
+ vlib_cli_output (vm, "Frames inflight: %u", n_frames_inflight);
+ vlib_cli_output (vm, "Frames processed: %u", n_frames_processed);
+ vlib_cli_output (vm, "Elements total: %u", elts_total);
+ vlib_cli_output (vm, "Elements inflight: %u", cet->inflight);
+ vlib_cli_output (vm, "Head index: %u", head);
+ vlib_cli_output (vm, "Tail index: %u", tail);
+ vlib_cli_output (vm, "Current frame index beeing enqueued: %u",
+ enq_head);
+ vlib_cli_output (vm, "Current frame index being dequeued: %u", deq_tail);
+ vlib_cli_output (vm,
+ "Elements in current frame to be enqueued: %u, waiting "
+ "to be enqueued: %u",
+ ring->frames[enq_head].n_elts, elts_to_enq);
+ vlib_cli_output (vm,
+ "Elements in current frame to be dequeued: %u, waiting "
+ "to be dequeued: %u",
+ ring->frames[deq_tail].n_elts, elts_to_deq);
+ vlib_cli_output (vm, "\n\n");
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_cryptodev_sw_rings, static) = {
+ .path = "show cryptodev cache status",
+ .short_help = "show status of all cryptodev cache rings",
+ .function = cryptodev_show_cache_rings_fn,
+};
+
+static clib_error_t *
cryptodev_set_assignment_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
@@ -643,6 +847,15 @@ cryptodev_configure (vlib_main_t *vm, u32 cryptodev_id)
rte_cryptodev_info_get (cryptodev_id, &info);
+ /* Starting from DPDK 22.11, VPP does not allow heterogeneous crypto devices
+ anymore. Only devices that have the same driver type as the first
+ initialized device can be initialized.
+ */
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (cmt->drivers_cnt == 1 && cmt->driver_id != info.driver_id)
+ return -1;
+#endif
+
if (!(info.feature_flags & RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO))
return -1;
@@ -656,7 +869,9 @@ cryptodev_configure (vlib_main_t *vm, u32 cryptodev_id)
struct rte_cryptodev_qp_conf qp_cfg;
qp_cfg.mp_session = 0;
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
qp_cfg.mp_session_private = 0;
+#endif
qp_cfg.nb_descriptors = CRYPTODEV_NB_CRYPTO_OPS;
ret = rte_cryptodev_queue_pair_setup (cryptodev_id, i, &qp_cfg,
@@ -675,16 +890,30 @@ cryptodev_configure (vlib_main_t *vm, u32 cryptodev_id)
/* start the device */
rte_cryptodev_start (cryptodev_id);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (cmt->drivers_cnt == 0)
+ {
+ cmt->drivers_cnt = 1;
+ cmt->driver_id = info.driver_id;
+ cmt->sess_sz = rte_cryptodev_sym_get_private_session_size (cryptodev_id);
+ }
+#endif
+
for (i = 0; i < info.max_nb_queue_pairs; i++)
{
cryptodev_inst_t *cdev_inst;
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ const char *dev_name = rte_dev_name (info.device);
+#else
+ const char *dev_name = info.device->name;
+#endif
vec_add2(cmt->cryptodev_inst, cdev_inst, 1);
- cdev_inst->desc = vec_new (char, strlen (info.device->name) + 10);
+ cdev_inst->desc = vec_new (char, strlen (dev_name) + 10);
cdev_inst->dev_id = cryptodev_id;
cdev_inst->q_id = i;
- snprintf (cdev_inst->desc, strlen (info.device->name) + 9,
- "%s_q%u", info.device->name, i);
+ snprintf (cdev_inst->desc, strlen (dev_name) + 9, "%s_q%u",
+ info.device->name, i);
}
return 0;
@@ -1016,46 +1245,26 @@ cryptodev_probe (vlib_main_t *vm, u32 n_workers)
return 0;
}
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
static void
-cryptodev_get_max_sz (u32 *max_sess_sz, u32 *max_dp_sz)
+is_drv_unique (u32 driver_id, u32 **unique_drivers)
{
- cryptodev_main_t *cmt = &cryptodev_main;
- cryptodev_inst_t *cinst;
- u32 max_sess = 0, max_dp = 0;
+ u32 *unique_elt;
+ u8 found = 0;
- vec_foreach (cinst, cmt->cryptodev_inst)
+ vec_foreach (unique_elt, *unique_drivers)
{
- u32 sess_sz = rte_cryptodev_sym_get_private_session_size (cinst->dev_id);
- u32 dp_sz = rte_cryptodev_get_raw_dp_ctx_size (cinst->dev_id);
-
- max_sess = clib_max (sess_sz, max_sess);
- max_dp = clib_max (dp_sz, max_dp);
+ if (*unique_elt == driver_id)
+ {
+ found = 1;
+ break;
+ }
}
- *max_sess_sz = max_sess;
- *max_dp_sz = max_dp;
-}
-
-static void
-dpdk_disable_cryptodev_engine (vlib_main_t *vm)
-{
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- cryptodev_main_t *cmt = &cryptodev_main;
- u32 i;
-
- for (i = (vlib_num_workers () > 0); i < tm->n_vlib_mains; i++)
- {
- u32 numa = vlib_get_main_by_index (i)->numa_node;
- cryptodev_numa_data_t *numa_data;
-
- vec_validate (cmt->per_numa_data, numa);
- numa_data = cmt->per_numa_data + numa;
- if (numa_data->sess_pool)
- rte_mempool_free (numa_data->sess_pool);
- if (numa_data->sess_priv_pool)
- rte_mempool_free (numa_data->sess_priv_pool);
- }
+ if (!found)
+ vec_add1 (*unique_drivers, driver_id);
}
+#endif
clib_error_t *
dpdk_cryptodev_init (vlib_main_t * vm)
@@ -1064,30 +1273,53 @@ dpdk_cryptodev_init (vlib_main_t * vm)
vlib_thread_main_t *tm = vlib_get_thread_main ();
cryptodev_engine_thread_t *cet;
cryptodev_numa_data_t *numa_data;
- struct rte_mempool *mp;
+ u32 node;
+ u8 nodes = 0;
u32 skip_master = vlib_num_workers () > 0;
u32 n_workers = tm->n_vlib_mains - skip_master;
- u32 numa = vm->numa_node;
- u32 sess_sz, dp_sz;
u32 eidx;
u32 i;
- u8 *name = 0;
clib_error_t *error;
cmt->iova_mode = rte_eal_iova_mode ();
- vec_validate (cmt->per_numa_data, vm->numa_node);
+ clib_bitmap_foreach (node, tm->cpu_socket_bitmap)
+ {
+ if (node >= nodes)
+ nodes = node;
+ }
+
+ vec_validate (cmt->per_numa_data, nodes);
+ vec_foreach (numa_data, cmt->per_numa_data)
+ {
+ vec_validate (numa_data->sess_pools, 0);
+ }
/* probe all cryptodev devices and get queue info */
if (cryptodev_probe (vm, n_workers) < 0)
+ return 0;
+
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
+ struct rte_cryptodev_info dev_info;
+ cryptodev_inst_t *dev_inst;
+ u32 *unique_drivers = 0;
+ vec_foreach (dev_inst, cmt->cryptodev_inst)
{
- error = clib_error_return (0, "Failed to configure cryptodev");
- goto err_handling;
+ u32 dev_id = dev_inst->dev_id;
+ rte_cryptodev_info_get (dev_id, &dev_info);
+ u32 driver_id = dev_info.driver_id;
+ is_drv_unique (driver_id, &unique_drivers);
+
+ u32 sess_sz =
+ rte_cryptodev_sym_get_private_session_size (dev_inst->dev_id);
+ cmt->sess_sz = clib_max (cmt->sess_sz, sess_sz);
}
- cryptodev_get_max_sz (&sess_sz, &dp_sz);
+ cmt->drivers_cnt = vec_len (unique_drivers);
+ vec_free (unique_drivers);
+#endif
- clib_bitmap_vec_validate (cmt->active_cdev_inst_mask, tm->n_vlib_mains);
+ clib_bitmap_vec_validate (cmt->active_cdev_inst_mask, n_workers);
clib_spinlock_init (&cmt->tlock);
vec_validate_aligned(cmt->per_thread_data, tm->n_vlib_mains - 1,
@@ -1095,46 +1327,13 @@ dpdk_cryptodev_init (vlib_main_t * vm)
for (i = skip_master; i < tm->n_vlib_mains; i++)
{
cet = cmt->per_thread_data + i;
- numa = vlib_get_main_by_index (i)->numa_node;
- vec_validate (cmt->per_numa_data, numa);
- numa_data = vec_elt_at_index (cmt->per_numa_data, numa);
-
- if (!numa_data->sess_pool)
+ if (cryptodev_assign_resource (cet, 0, CRYPTODEV_RESOURCE_ASSIGN_AUTO) <
+ 0)
{
- /* create session pool for the numa node */
- name = format (0, "vcryptodev_sess_pool_%u%c", numa, 0);
- mp = rte_cryptodev_sym_session_pool_create (
- (char *) name, CRYPTODEV_NB_SESSION, 0, 0, 0, numa);
- if (!mp)
- {
- error =
- clib_error_return (0, "Not enough memory for mp %s", name);
- goto err_handling;
- }
- vec_free (name);
-
- numa_data->sess_pool = mp;
-
- /* create session private pool for the numa node */
- name = format (0, "cryptodev_sess_pool_%u%c", numa, 0);
- mp =
- rte_mempool_create ((char *) name, CRYPTODEV_NB_SESSION, sess_sz,
- 0, 0, NULL, NULL, NULL, NULL, numa, 0);
- if (!mp)
- {
- error =
- clib_error_return (0, "Not enough memory for mp %s", name);
- vec_free (name);
- goto err_handling;
- }
-
- vec_free (name);
-
- numa_data->sess_priv_pool = mp;
+ error = clib_error_return (0, "Failed to configure cryptodev");
+ goto err_handling;
}
-
- cryptodev_assign_resource (cet, 0, CRYPTODEV_RESOURCE_ASSIGN_AUTO);
}
/* register handler */
@@ -1154,13 +1353,10 @@ dpdk_cryptodev_init (vlib_main_t * vm)
/* this engine is only enabled when cryptodev device(s) are presented in
* startup.conf. Assume it is wanted to be used, turn on async mode here.
*/
- vnet_crypto_request_async_mode (1);
ipsec_set_async_mode (1);
return 0;
err_handling:
- dpdk_disable_cryptodev_engine (vm);
-
return error;
}
diff --git a/src/plugins/dpdk/cryptodev/cryptodev.h b/src/plugins/dpdk/cryptodev/cryptodev.h
index 3b47b43f538..7cd525dac56 100644
--- a/src/plugins/dpdk/cryptodev/cryptodev.h
+++ b/src/plugins/dpdk/cryptodev/cryptodev.h
@@ -26,11 +26,13 @@
#define CRYPTODEV_CACHE_QUEUE_MASK (VNET_CRYPTO_FRAME_POOL_SIZE - 1)
#define CRYPTODEV_MAX_INFLIGHT (CRYPTODEV_NB_CRYPTO_OPS - 1)
#define CRYPTODEV_AAD_MASK (CRYPTODEV_NB_CRYPTO_OPS - 1)
-#define CRYPTODEV_DEQ_CACHE_SZ 32
-#define CRYPTODEV_NB_SESSION 10240
+#define CRYPTODE_ENQ_MAX 64
+#define CRYPTODE_DEQ_MAX 64
+#define CRYPTODEV_NB_SESSION 4096
#define CRYPTODEV_MAX_IV_SIZE 16
#define CRYPTODEV_MAX_AAD_SIZE 16
#define CRYPTODEV_MAX_N_SGL 8 /**< maximum number of segments */
+#define CRYPTODEV_MAX_PROCESED_IN_CACHE_QUEUE 8
#define CRYPTODEV_IV_OFFSET (offsetof (cryptodev_op_t, iv))
#define CRYPTODEV_AAD_OFFSET (offsetof (cryptodev_op_t, aad))
@@ -43,7 +45,10 @@
_ (AES_192_GCM, AEAD, AES_GCM, 12, 16, 8, 24) \
_ (AES_192_GCM, AEAD, AES_GCM, 12, 16, 12, 24) \
_ (AES_256_GCM, AEAD, AES_GCM, 12, 16, 8, 32) \
- _ (AES_256_GCM, AEAD, AES_GCM, 12, 16, 12, 32)
+ _ (AES_256_GCM, AEAD, AES_GCM, 12, 16, 12, 32) \
+ _ (CHACHA20_POLY1305, AEAD, CHACHA20_POLY1305, 12, 16, 0, 32) \
+ _ (CHACHA20_POLY1305, AEAD, CHACHA20_POLY1305, 12, 16, 8, 32) \
+ _ (CHACHA20_POLY1305, AEAD, CHACHA20_POLY1305, 12, 16, 12, 32)
/**
* crypto (alg, cryptodev_alg, key_size), hash (alg, digest-size)
@@ -66,7 +71,10 @@
_ (AES_256_CBC, AES_CBC, 32, SHA384, 24) \
_ (AES_128_CBC, AES_CBC, 16, SHA512, 32) \
_ (AES_192_CBC, AES_CBC, 24, SHA512, 32) \
- _ (AES_256_CBC, AES_CBC, 32, SHA512, 32)
+ _ (AES_256_CBC, AES_CBC, 32, SHA512, 32) \
+ _ (AES_128_CTR, AES_CTR, 16, SHA1, 12) \
+ _ (AES_192_CTR, AES_CTR, 24, SHA1, 12) \
+ _ (AES_256_CTR, AES_CTR, 32, SHA1, 12)
typedef enum
{
@@ -75,10 +83,16 @@ typedef enum
CRYPTODEV_N_OP_TYPES,
} cryptodev_op_type_t;
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+typedef void cryptodev_session_t;
+#else
+typedef struct rte_cryptodev_sym_session cryptodev_session_t;
+#endif
+
/* Cryptodev session data, one data per direction per numa */
typedef struct
{
- struct rte_cryptodev_sym_session ***keys;
+ cryptodev_session_t ***keys;
} cryptodev_key_t;
/* Replicate DPDK rte_cryptodev_sym_capability structure with key size ranges
@@ -119,7 +133,14 @@ typedef struct
typedef struct
{
struct rte_mempool *sess_pool;
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
struct rte_mempool *sess_priv_pool;
+#endif
+} cryptodev_session_pool_t;
+
+typedef struct
+{
+ cryptodev_session_pool_t *sess_pools;
} cryptodev_numa_data_t;
typedef struct
@@ -135,26 +156,71 @@ typedef struct
typedef struct
{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- vlib_buffer_t *b[VNET_CRYPTO_FRAME_SIZE];
+ vnet_crypto_async_frame_t *f;
union
{
struct
{
- cryptodev_op_t **cops;
- struct rte_mempool *cop_pool;
- struct rte_ring *ring;
+ /* index of frame elt where enque to
+ * the crypto engine is happening */
+ u8 enq_elts_head;
+ /* index of the frame elt where dequeue
+ * from the crypto engine is happening */
+ u8 deq_elts_tail;
+ u8 elts_inflight;
+
+ u8 op_type;
+ u8 aad_len;
+ u8 n_elts;
+ u16 reserved;
};
+ u64 raw;
+ };
+
+ u64 frame_elts_errs_mask;
+} cryptodev_cache_ring_elt_t;
+
+typedef struct
+{
+ cryptodev_cache_ring_elt_t frames[VNET_CRYPTO_FRAME_POOL_SIZE];
+
+ union
+ {
+ struct
+ {
+ /* head of the cache ring */
+ u16 head;
+ /* tail of the cache ring */
+ u16 tail;
+ /* index of the frame where enqueue
+ * to the crypto engine is happening */
+ u16 enq_head;
+ /* index of the frame where dequeue
+ * from the crypto engine is happening */
+ u16 deq_tail;
+ };
+ u64 raw;
+ };
+} cryptodev_cache_ring_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vlib_buffer_t *b[VNET_CRYPTO_FRAME_SIZE];
+ union
+ {
+ struct rte_mempool *cop_pool;
struct
{
struct rte_crypto_raw_dp_ctx *ctx;
- struct rte_ring *cached_frame;
u16 aad_index;
u8 *aad_buf;
u64 aad_phy_addr;
- struct rte_cryptodev_sym_session *reset_sess;
+ cryptodev_session_t *reset_sess;
};
};
+
+ cryptodev_cache_ring_t cache_ring;
u16 cryptodev_id;
u16 cryptodev_q;
u16 inflight;
@@ -170,20 +236,122 @@ typedef struct
clib_bitmap_t *active_cdev_inst_mask;
clib_spinlock_t tlock;
cryptodev_capability_t *supported_caps;
+ u32 sess_sz;
+ u32 drivers_cnt;
u8 is_raw_api;
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ u8 driver_id;
+#endif
} cryptodev_main_t;
extern cryptodev_main_t cryptodev_main;
+#define CRYPTODEV_CACHE_RING_GET_FRAME(r, i) \
+ ((r)->frames[(i) &CRYPTODEV_CACHE_QUEUE_MASK].f)
+
+#define CRYPTODEV_CACHE_RING_GET_ERR_MASK(r, i) \
+ ((r)->frames[(i) &CRYPTODEV_CACHE_QUEUE_MASK].frame_elts_errs_mask)
+
+#define CRYPTODEV_CACHE_RING_GET_FRAME_ELTS_INFLIGHT(r, i) \
+ (((r)->frames[(i) &CRYPTODEV_CACHE_QUEUE_MASK].enq_elts_head) - \
+ ((r)->frames[(i) &CRYPTODEV_CACHE_QUEUE_MASK].deq_elts_tail))
+
static_always_inline void
-cryptodev_mark_frame_err_status (vnet_crypto_async_frame_t *f,
- vnet_crypto_op_status_t s)
+cryptodev_cache_ring_update_enq_head (cryptodev_cache_ring_t *r,
+ vnet_crypto_async_frame_t *f)
+{
+ if (r->frames[r->enq_head].enq_elts_head == f->n_elts)
+ {
+ r->enq_head++;
+ r->enq_head &= CRYPTODEV_CACHE_QUEUE_MASK;
+ f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED;
+ }
+}
+
+static_always_inline bool
+cryptodev_cache_ring_update_deq_tail (cryptodev_cache_ring_t *r,
+ u16 *const deq)
+{
+ if (r->frames[*deq].deq_elts_tail == r->frames[*deq].n_elts)
+ {
+ *deq += 1;
+ *deq &= CRYPTODEV_CACHE_QUEUE_MASK;
+ return 1;
+ }
+
+ return 0;
+}
+static_always_inline u64
+cryptodev_mark_frame_fill_err (vnet_crypto_async_frame_t *f, u64 current_err,
+ u16 index, u16 n, vnet_crypto_op_status_t op_s)
+{
+ u64 err = current_err;
+ u16 i;
+
+ ERROR_ASSERT (index + n <= VNET_CRYPTO_FRAME_SIZE);
+ ERROR_ASSERT (op_s != VNET_CRYPTO_OP_STATUS_COMPLETED);
+
+ for (i = index; i < (index + n); i++)
+ f->elts[i].status = op_s;
+
+ err |= (~(~(0ull) << n) << index);
+
+ return err;
+}
+
+static_always_inline cryptodev_cache_ring_elt_t *
+cryptodev_cache_ring_push (cryptodev_cache_ring_t *r,
+ vnet_crypto_async_frame_t *f)
+{
+ u16 head = r->head;
+ u16 tail = r->tail;
+
+ cryptodev_cache_ring_elt_t *ring_elt = &r->frames[head];
+ /**
+ * in debug mode we do the ring sanity test when a frame is enqueued to
+ * the ring.
+ **/
+#if CLIB_DEBUG > 0
+ u16 n_cached = (head >= tail) ? (head - tail) :
+ (CRYPTODEV_CACHE_QUEUE_MASK - tail + head);
+ ERROR_ASSERT (n_cached < CRYPTODEV_CACHE_QUEUE_SIZE);
+ ERROR_ASSERT (r->raw == 0 && r->frames[head].raw == 0 &&
+ r->frames[head].f == 0);
+#endif
+ /*the ring capacity is CRYPTODEV_CACHE_QUEUE_SIZE - 1*/
+ if (PREDICT_FALSE (head + 1) == tail)
+ return 0;
+
+ ring_elt->f = f;
+ ring_elt->n_elts = f->n_elts;
+ /* update head */
+ r->head++;
+ r->head &= CRYPTODEV_CACHE_QUEUE_MASK;
+ return ring_elt;
+}
+
+static_always_inline vnet_crypto_async_frame_t *
+cryptodev_cache_ring_pop (cryptodev_cache_ring_t *r)
{
- u32 n_elts = f->n_elts, i;
+ vnet_crypto_async_frame_t *f;
+ u16 tail = r->tail;
+ cryptodev_cache_ring_elt_t *ring_elt = &r->frames[tail];
+
+ ERROR_ASSERT (r->frames[r->head].raw == 0 ? r->head != tail : 1);
+ ERROR_ASSERT (r->frames[tail].raw != 0);
+ ERROR_ASSERT (ring_elt->deq_elts_tail == ring_elt->enq_elts_head &&
+ ring_elt->deq_elts_tail == ring_elt->n_elts);
+
+ f = CRYPTODEV_CACHE_RING_GET_FRAME (r, tail);
+ f->state = CRYPTODEV_CACHE_RING_GET_ERR_MASK (r, r->tail) == 0 ?
+ VNET_CRYPTO_FRAME_STATE_SUCCESS :
+ VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
+
+ clib_memset (ring_elt, 0, sizeof (*ring_elt));
+ r->tail++;
+ r->tail &= CRYPTODEV_CACHE_QUEUE_MASK;
- for (i = 0; i < n_elts; i++)
- f->elts[i].status = s;
- f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED;
+ return f;
}
int cryptodev_session_create (vlib_main_t *vm, vnet_crypto_key_index_t idx,
diff --git a/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c b/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c
index 4545e24fc83..8d55e4fbf0f 100644
--- a/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c
+++ b/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c
@@ -27,7 +27,6 @@
#include <rte_cryptodev.h>
#include <rte_crypto_sym.h>
#include <rte_crypto.h>
-#include <rte_cryptodev_pmd.h>
#include <rte_ring_peek_zc.h>
#include <rte_config.h>
@@ -68,6 +67,23 @@ cryptodev_get_iova (clib_pmalloc_main_t *pm, enum rte_iova_mode mode,
}
static_always_inline void
+cryptodev_validate_mbuf (struct rte_mbuf *mb, vlib_buffer_t *b)
+{
+ /* on vnet side vlib_buffer current_length is updated by cipher padding and
+ * icv_sh. mbuf needs to be sync with these changes */
+ u16 data_len = b->current_length +
+ (b->data + b->current_data - rte_pktmbuf_mtod (mb, u8 *));
+
+ /* for input nodes that are not dpdk-input, it is possible the mbuf
+ * was updated before as one of the chained mbufs. Setting nb_segs
+ * to 1 here to prevent the cryptodev PMD to access potentially
+ * invalid m_src->next pointers.
+ */
+ mb->nb_segs = 1;
+ mb->pkt_len = mb->data_len = data_len;
+}
+
+static_always_inline void
cryptodev_validate_mbuf_chain (vlib_main_t *vm, struct rte_mbuf *mb,
vlib_buffer_t *b)
{
@@ -125,39 +141,66 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
cryptodev_op_type_t op_type)
{
cryptodev_main_t *cmt = &cryptodev_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ ERROR_ASSERT (frame != 0);
+ ERROR_ASSERT (frame->n_elts > 0);
+ cryptodev_cache_ring_elt_t *ring_elt =
+ cryptodev_cache_ring_push (ring, frame);
+
+ if (PREDICT_FALSE (ring_elt == NULL))
+ return -1;
+
+ ring_elt->aad_len = 1;
+ ring_elt->op_type = (u8) op_type;
+ return 0;
+}
+
+static_always_inline void
+cryptodev_frame_linked_algs_enqueue_internal (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame,
+ cryptodev_op_type_t op_type)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
clib_pmalloc_main_t *pm = vm->physmem_main.pmalloc_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 *const enq = &ring->enq_head;
vnet_crypto_async_frame_elt_t *fe;
- struct rte_cryptodev_sym_session *sess = 0;
- cryptodev_op_t **cop;
- u32 *bi;
+ cryptodev_session_t *sess = 0;
+ cryptodev_op_t *cops[CRYPTODE_ENQ_MAX] = {};
+ cryptodev_op_t **cop = cops;
+ u32 *bi = 0;
u32 n_enqueue, n_elts;
u32 last_key_index = ~0;
+ u32 max_to_enq;
if (PREDICT_FALSE (frame == 0 || frame->n_elts == 0))
- return -1;
- n_elts = frame->n_elts;
+ return;
- if (PREDICT_FALSE (CRYPTODEV_NB_CRYPTO_OPS - cet->inflight < n_elts))
- {
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
- }
+ max_to_enq = clib_min (CRYPTODE_ENQ_MAX,
+ frame->n_elts - ring->frames[*enq].enq_elts_head);
+
+ if (cet->inflight + max_to_enq > CRYPTODEV_MAX_INFLIGHT)
+ return;
+
+ n_elts = max_to_enq;
if (PREDICT_FALSE (
- rte_mempool_get_bulk (cet->cop_pool, (void **) cet->cops, n_elts) < 0))
+ rte_mempool_get_bulk (cet->cop_pool, (void **) cops, n_elts) < 0))
{
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
+ cryptodev_mark_frame_fill_err (
+ frame, ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ return;
}
- cop = cet->cops;
- fe = frame->elts;
- bi = frame->buffer_indices;
- cop[0]->frame = frame;
- cop[0]->n_elts = n_elts;
+ fe = frame->elts + ring->frames[*enq].enq_elts_head;
+ bi = frame->buffer_indices + ring->frames[*enq].enq_elts_head;
while (n_elts)
{
@@ -169,8 +212,8 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
if (n_elts > 2)
{
- CLIB_PREFETCH (cop[1], CLIB_CACHE_LINE_BYTES * 3, STORE);
- CLIB_PREFETCH (cop[2], CLIB_CACHE_LINE_BYTES * 3, STORE);
+ CLIB_PREFETCH (cop[1], sizeof (*cop[1]), STORE);
+ CLIB_PREFETCH (cop[2], sizeof (*cop[2]), STORE);
clib_prefetch_load (&fe[1]);
clib_prefetch_load (&fe[2]);
}
@@ -184,9 +227,11 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
if (PREDICT_FALSE (
cryptodev_session_create (vm, last_key_index, 0) < 0))
{
- cryptodev_mark_frame_err_status (
- frame, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
+ cryptodev_mark_frame_fill_err (
+ frame, ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ goto error_exit;
}
}
sess = key->keys[vm->numa_node][op_type];
@@ -216,26 +261,29 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
if (PREDICT_FALSE (fe->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS))
cryptodev_validate_mbuf_chain (vm, sop->m_src, b);
else
- /* for input nodes that are not dpdk-input, it is possible the mbuf
- * was updated before as one of the chained mbufs. Setting nb_segs
- * to 1 here to prevent the cryptodev PMD to access potentially
- * invalid m_src->next pointers.
- */
- sop->m_src->nb_segs = 1;
+ cryptodev_validate_mbuf (sop->m_src, b);
+
clib_memcpy_fast (cop[0]->iv, fe->iv, 16);
+ ring->frames[*enq].enq_elts_head++;
cop++;
bi++;
fe++;
n_elts--;
}
- n_enqueue = rte_cryptodev_enqueue_burst (cet->cryptodev_id, cet->cryptodev_q,
- (struct rte_crypto_op **) cet->cops,
- frame->n_elts);
- ASSERT (n_enqueue == frame->n_elts);
- cet->inflight += n_enqueue;
+ n_enqueue =
+ rte_cryptodev_enqueue_burst (cet->cryptodev_id, cet->cryptodev_q,
+ (struct rte_crypto_op **) cops, max_to_enq);
+ ERROR_ASSERT (n_enqueue == max_to_enq);
+ cet->inflight += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ return;
- return 0;
+error_exit:
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ rte_mempool_put_bulk (cet->cop_pool, (void **) cops, max_to_enq);
}
static_always_inline int
@@ -244,39 +292,64 @@ cryptodev_frame_aead_enqueue (vlib_main_t *vm,
cryptodev_op_type_t op_type, u8 aad_len)
{
cryptodev_main_t *cmt = &cryptodev_main;
- clib_pmalloc_main_t *pm = vm->physmem_main.pmalloc_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ ERROR_ASSERT (frame != 0);
+ ERROR_ASSERT (frame->n_elts > 0);
+ cryptodev_cache_ring_elt_t *ring_elt =
+ cryptodev_cache_ring_push (ring, frame);
+
+ if (PREDICT_FALSE (ring_elt == NULL))
+ return -1;
+
+ ring_elt->aad_len = aad_len;
+ ring_elt->op_type = (u8) op_type;
+ return 0;
+}
+
+static_always_inline int
+cryptodev_aead_enqueue_internal (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame,
+ cryptodev_op_type_t op_type, u8 aad_len)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 *const enq = &ring->enq_head;
+ clib_pmalloc_main_t *pm = vm->physmem_main.pmalloc_main;
vnet_crypto_async_frame_elt_t *fe;
- struct rte_cryptodev_sym_session *sess = 0;
- cryptodev_op_t **cop;
- u32 *bi;
+ cryptodev_session_t *sess = 0;
+ cryptodev_op_t *cops[CRYPTODE_ENQ_MAX] = {};
+ cryptodev_op_t **cop = cops;
+ u32 *bi = 0;
u32 n_enqueue = 0, n_elts;
u32 last_key_index = ~0;
+ u16 left_to_enq = frame->n_elts - ring->frames[*enq].enq_elts_head;
+ const u16 max_to_enq = clib_min (CRYPTODE_ENQ_MAX, left_to_enq);
if (PREDICT_FALSE (frame == 0 || frame->n_elts == 0))
return -1;
- n_elts = frame->n_elts;
- if (PREDICT_FALSE (CRYPTODEV_MAX_INFLIGHT - cet->inflight < n_elts))
- {
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
- }
+ if (cet->inflight + max_to_enq > CRYPTODEV_MAX_INFLIGHT)
+ return -1;
+
+ n_elts = max_to_enq;
if (PREDICT_FALSE (
- rte_mempool_get_bulk (cet->cop_pool, (void **) cet->cops, n_elts) < 0))
+ rte_mempool_get_bulk (cet->cop_pool, (void **) cops, n_elts) < 0))
{
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ cryptodev_mark_frame_fill_err (
+ frame, ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
return -1;
}
- cop = cet->cops;
- fe = frame->elts;
- bi = frame->buffer_indices;
- cop[0]->frame = frame;
- cop[0]->n_elts = n_elts;
+ fe = frame->elts + ring->frames[*enq].enq_elts_head;
+ bi = frame->buffer_indices + ring->frames[*enq].enq_elts_head;
while (n_elts)
{
@@ -286,8 +359,8 @@ cryptodev_frame_aead_enqueue (vlib_main_t *vm,
if (n_elts > 2)
{
- CLIB_PREFETCH (cop[1], CLIB_CACHE_LINE_BYTES * 3, STORE);
- CLIB_PREFETCH (cop[2], CLIB_CACHE_LINE_BYTES * 3, STORE);
+ CLIB_PREFETCH (cop[1], sizeof (*cop[1]), STORE);
+ CLIB_PREFETCH (cop[2], sizeof (*cop[2]), STORE);
clib_prefetch_load (&fe[1]);
clib_prefetch_load (&fe[2]);
}
@@ -301,23 +374,32 @@ cryptodev_frame_aead_enqueue (vlib_main_t *vm,
if (PREDICT_FALSE (cryptodev_session_create (vm, last_key_index,
aad_len) < 0))
{
- cryptodev_mark_frame_err_status (
- frame, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
+ cryptodev_mark_frame_fill_err (
+ frame, ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ goto error_exit;
}
}
else if (PREDICT_FALSE (
- key->keys[vm->numa_node][op_type]->opaque_data !=
- aad_len))
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ rte_cryptodev_sym_session_opaque_data_get (
+ key->keys[vm->numa_node][op_type]) != (u64) aad_len
+#else
+ key->keys[vm->numa_node][op_type]->opaque_data != aad_len
+#endif
+ ))
{
cryptodev_sess_handler (vm, VNET_CRYPTO_KEY_OP_DEL,
fe->key_index, aad_len);
if (PREDICT_FALSE (cryptodev_session_create (vm, last_key_index,
aad_len) < 0))
{
- cryptodev_mark_frame_err_status (
- frame, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
+ cryptodev_mark_frame_fill_err (
+ frame, ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ goto error_exit;
}
}
@@ -348,117 +430,179 @@ cryptodev_frame_aead_enqueue (vlib_main_t *vm,
if (PREDICT_FALSE (fe->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS))
cryptodev_validate_mbuf_chain (vm, sop->m_src, b);
else
- /* for input nodes that are not dpdk-input, it is possible the mbuf
- * was updated before as one of the chained mbufs. Setting nb_segs
- * to 1 here to prevent the cryptodev PMD to access potentially
- * invalid m_src->next pointers.
- */
- sop->m_src->nb_segs = 1;
+ cryptodev_validate_mbuf (sop->m_src, b);
+
clib_memcpy_fast (cop[0]->iv, fe->iv, 12);
clib_memcpy_fast (cop[0]->aad, fe->aad, aad_len);
+
cop++;
bi++;
fe++;
n_elts--;
}
- n_enqueue = rte_cryptodev_enqueue_burst (cet->cryptodev_id, cet->cryptodev_q,
- (struct rte_crypto_op **) cet->cops,
- frame->n_elts);
- ASSERT (n_enqueue == frame->n_elts);
- cet->inflight += n_enqueue;
+ n_enqueue =
+ rte_cryptodev_enqueue_burst (cet->cryptodev_id, cet->cryptodev_q,
+ (struct rte_crypto_op **) cops, max_to_enq);
+ ERROR_ASSERT (n_enqueue == max_to_enq);
+ cet->inflight += max_to_enq;
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
return 0;
-}
-
-static_always_inline u16
-cryptodev_ring_deq (struct rte_ring *r, cryptodev_op_t **cops)
-{
- u16 n, n_elts = 0;
- n = rte_ring_dequeue_bulk_start (r, (void **) cops, 1, 0);
- rte_ring_dequeue_finish (r, 0);
- if (!n)
- return 0;
-
- n = cops[0]->n_elts;
- if (rte_ring_count (r) < n)
- return 0;
-
- n_elts = rte_ring_sc_dequeue_bulk (r, (void **) cops, n, 0);
- ASSERT (n_elts == n);
+error_exit:
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ rte_mempool_put_bulk (cet->cop_pool, (void **) cops, max_to_enq);
- return n_elts;
+ return -1;
}
-static_always_inline vnet_crypto_async_frame_t *
-cryptodev_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
- u32 *enqueue_thread_idx)
+static_always_inline u8
+cryptodev_frame_dequeue_internal (vlib_main_t *vm, u32 *enqueue_thread_idx)
{
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
- cryptodev_op_t **cop = cet->cops;
+ vnet_crypto_async_frame_t *frame = NULL;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 *const deq = &ring->deq_tail;
+ u16 n_deq, left_to_deq;
+ u16 max_to_deq = 0;
+ u16 inflight = cet->inflight;
+ u8 dequeue_more = 0;
+ cryptodev_op_t *cops[CRYPTODE_DEQ_MAX] = {};
+ cryptodev_op_t **cop = cops;
vnet_crypto_async_frame_elt_t *fe;
- vnet_crypto_async_frame_t *frame;
- u32 n_elts, n_completed_ops = rte_ring_count (cet->ring);
- u32 ss0 = 0, ss1 = 0, ss2 = 0, ss3 = 0; /* sum of status */
-
- if (cet->inflight)
- {
- n_elts = rte_cryptodev_dequeue_burst (
- cet->cryptodev_id, cet->cryptodev_q,
- (struct rte_crypto_op **) cet->cops, VNET_CRYPTO_FRAME_SIZE);
+ u32 n_elts, n;
+ u64 err0 = 0, err1 = 0, err2 = 0, err3 = 0; /* partial errors mask */
- if (n_elts)
- {
- cet->inflight -= n_elts;
- n_completed_ops += n_elts;
+ left_to_deq =
+ ring->frames[*deq].f->n_elts - ring->frames[*deq].deq_elts_tail;
+ max_to_deq = clib_min (left_to_deq, CRYPTODE_DEQ_MAX);
- rte_ring_sp_enqueue_burst (cet->ring, (void **) cet->cops, n_elts,
- NULL);
- }
- }
+ /* deq field can be used to track frame that is currently dequeued
+ based on that you can specify the amount of elements to deq for the frame */
+ n_deq =
+ rte_cryptodev_dequeue_burst (cet->cryptodev_id, cet->cryptodev_q,
+ (struct rte_crypto_op **) cops, max_to_deq);
- if (PREDICT_FALSE (n_completed_ops == 0))
- return 0;
+ if (n_deq == 0)
+ return dequeue_more;
- n_elts = cryptodev_ring_deq (cet->ring, cop);
- if (!n_elts)
- return 0;
+ frame = ring->frames[*deq].f;
+ fe = frame->elts + ring->frames[*deq].deq_elts_tail;
- frame = cop[0]->frame;
- fe = frame->elts;
+ n_elts = n_deq;
+ n = ring->frames[*deq].deq_elts_tail;
while (n_elts > 4)
{
- ss0 |= fe[0].status = cryptodev_status_conversion[cop[0]->op.status];
- ss1 |= fe[1].status = cryptodev_status_conversion[cop[1]->op.status];
- ss2 |= fe[2].status = cryptodev_status_conversion[cop[2]->op.status];
- ss3 |= fe[3].status = cryptodev_status_conversion[cop[3]->op.status];
+ fe[0].status = cryptodev_status_conversion[cop[0]->op.status];
+ fe[1].status = cryptodev_status_conversion[cop[1]->op.status];
+ fe[2].status = cryptodev_status_conversion[cop[2]->op.status];
+ fe[3].status = cryptodev_status_conversion[cop[3]->op.status];
+
+ err0 |= ((u64) (fe[0].status == VNET_CRYPTO_OP_STATUS_COMPLETED)) << n;
+ err1 |= ((u64) (fe[1].status == VNET_CRYPTO_OP_STATUS_COMPLETED))
+ << (n + 1);
+ err2 |= ((u64) (fe[2].status == VNET_CRYPTO_OP_STATUS_COMPLETED))
+ << (n + 2);
+ err3 |= ((u64) (fe[3].status == VNET_CRYPTO_OP_STATUS_COMPLETED))
+ << (n + 3);
cop += 4;
fe += 4;
n_elts -= 4;
+ n += 4;
}
while (n_elts)
{
- ss0 |= fe[0].status = cryptodev_status_conversion[cop[0]->op.status];
+ fe[0].status = cryptodev_status_conversion[cop[0]->op.status];
+ err0 |= ((u64) (fe[0].status == VNET_CRYPTO_OP_STATUS_COMPLETED)) << n;
+ n++;
fe++;
cop++;
n_elts--;
}
- frame->state = (ss0 | ss1 | ss2 | ss3) == VNET_CRYPTO_OP_STATUS_COMPLETED ?
- VNET_CRYPTO_FRAME_STATE_SUCCESS :
- VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
+ ring->frames[*deq].frame_elts_errs_mask |= (err0 | err1 | err2 | err3);
- rte_mempool_put_bulk (cet->cop_pool, (void **) cet->cops, frame->n_elts);
- *nb_elts_processed = frame->n_elts;
- *enqueue_thread_idx = frame->enqueue_thread_index;
- return frame;
+ rte_mempool_put_bulk (cet->cop_pool, (void **) cops, n_deq);
+
+ inflight -= n_deq;
+ ring->frames[*deq].deq_elts_tail += n_deq;
+ if (cryptodev_cache_ring_update_deq_tail (ring, deq))
+ {
+ u32 fr_processed =
+ (CRYPTODEV_CACHE_QUEUE_SIZE - ring->tail + ring->deq_tail) &
+ CRYPTODEV_CACHE_QUEUE_MASK;
+
+ *enqueue_thread_idx = frame->enqueue_thread_index;
+ dequeue_more = (fr_processed < CRYPTODEV_MAX_PROCESED_IN_CACHE_QUEUE);
+ }
+
+ cet->inflight = inflight;
+ return dequeue_more;
}
+static_always_inline void
+cryptodev_enqueue_frame (vlib_main_t *vm, cryptodev_cache_ring_elt_t *ring_elt)
+{
+ cryptodev_op_type_t op_type = (cryptodev_op_type_t) ring_elt->op_type;
+ u8 linked_or_aad_len = ring_elt->aad_len;
+
+ if (linked_or_aad_len == 1)
+ cryptodev_frame_linked_algs_enqueue_internal (vm, ring_elt->f, op_type);
+ else
+ cryptodev_aead_enqueue_internal (vm, ring_elt->f, op_type,
+ linked_or_aad_len);
+}
+
+static_always_inline vnet_crypto_async_frame_t *
+cryptodev_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
+ u32 *enqueue_thread_idx)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ vnet_crypto_main_t *cm = &crypto_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ cryptodev_cache_ring_elt_t *ring_elt = &ring->frames[ring->tail];
+
+ vnet_crypto_async_frame_t *ret_frame = 0;
+ u8 dequeue_more = 1;
+
+ while (cet->inflight > 0 && dequeue_more)
+ {
+ dequeue_more = cryptodev_frame_dequeue_internal (vm, enqueue_thread_idx);
+ }
+
+ if (PREDICT_TRUE (ring->frames[ring->enq_head].f != 0))
+ cryptodev_enqueue_frame (vm, &ring->frames[ring->enq_head]);
+
+ if (PREDICT_TRUE (ring_elt->f != 0))
+ {
+ if (ring_elt->n_elts == ring_elt->deq_elts_tail)
+ {
+ *nb_elts_processed = ring_elt->n_elts;
+ vlib_node_set_interrupt_pending (
+ vlib_get_main_by_index (vm->thread_index), cm->crypto_node_index);
+ ret_frame = cryptodev_cache_ring_pop (ring);
+ return ret_frame;
+ }
+ }
+
+ return ret_frame;
+}
+static_always_inline int
+cryptodev_enqueue_aead_aad_0_enc (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return cryptodev_frame_aead_enqueue (vm, frame, CRYPTODEV_OP_TYPE_ENCRYPT,
+ 0);
+}
static_always_inline int
cryptodev_enqueue_aead_aad_8_enc (vlib_main_t *vm,
vnet_crypto_async_frame_t *frame)
@@ -475,6 +619,13 @@ cryptodev_enqueue_aead_aad_12_enc (vlib_main_t *vm,
}
static_always_inline int
+cryptodev_enqueue_aead_aad_0_dec (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return cryptodev_frame_aead_enqueue (vm, frame, CRYPTODEV_OP_TYPE_DECRYPT,
+ 0);
+}
+static_always_inline int
cryptodev_enqueue_aead_aad_8_dec (vlib_main_t *vm,
vnet_crypto_async_frame_t *frame)
{
@@ -515,6 +666,7 @@ cryptodev_register_cop_hdl (vlib_main_t *vm, u32 eidx)
struct rte_cryptodev_sym_capability_idx cap_aead_idx;
u8 *name;
clib_error_t *error = 0;
+ u32 ref_cnt = 0;
vec_foreach (cet, cmt->per_thread_data)
{
@@ -525,43 +677,28 @@ cryptodev_register_cop_hdl (vlib_main_t *vm, u32 eidx)
(char *) name, CRYPTODEV_NB_CRYPTO_OPS, sizeof (cryptodev_op_t), 0,
sizeof (struct rte_crypto_op_pool_private), NULL, NULL, crypto_op_init,
NULL, vm->numa_node, 0);
- if (!cet->cop_pool)
- {
- error = clib_error_return (
- 0, "Failed to create cryptodev op pool %s", name);
-
- goto error_exit;
- }
vec_free (name);
-
- name = format (0, "frames_ring_%u_%u", numa, thread_index);
- cet->ring =
- rte_ring_create ((char *) name, CRYPTODEV_NB_CRYPTO_OPS, vm->numa_node,
- RING_F_SP_ENQ | RING_F_SC_DEQ);
- if (!cet->ring)
+ if (!cet->cop_pool)
{
error = clib_error_return (
0, "Failed to create cryptodev op pool %s", name);
goto error_exit;
}
- vec_free (name);
-
- vec_validate (cet->cops, VNET_CRYPTO_FRAME_SIZE - 1);
}
- /** INDENT-OFF **/
#define _(a, b, c, d, e, f, g) \
cap_aead_idx.type = RTE_CRYPTO_SYM_XFORM_AEAD; \
cap_aead_idx.algo.aead = RTE_CRYPTO_##b##_##c; \
if (cryptodev_check_cap_support (&cap_aead_idx, g, e, f)) \
{ \
- vnet_crypto_register_async_handler ( \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_TAG##e##_AAD##f##_ENC, \
- cryptodev_enqueue_aead_aad_##f##_enc, cryptodev_frame_dequeue); \
- vnet_crypto_register_async_handler ( \
+ cryptodev_enqueue_aead_aad_##f##_enc); \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_TAG##e##_AAD##f##_DEC, \
- cryptodev_enqueue_aead_aad_##f##_dec, cryptodev_frame_dequeue); \
+ cryptodev_enqueue_aead_aad_##f##_dec); \
+ ref_cnt++; \
}
foreach_vnet_aead_crypto_conversion
#undef _
@@ -574,25 +711,25 @@ cryptodev_register_cop_hdl (vlib_main_t *vm, u32 eidx)
if (cryptodev_check_cap_support (&cap_cipher_idx, c, -1, -1) && \
cryptodev_check_cap_support (&cap_auth_idx, -1, e, -1)) \
{ \
- vnet_crypto_register_async_handler ( \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_##d##_TAG##e##_ENC, \
- cryptodev_enqueue_linked_alg_enc, cryptodev_frame_dequeue); \
- vnet_crypto_register_async_handler ( \
+ cryptodev_enqueue_linked_alg_enc); \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_##d##_TAG##e##_DEC, \
- cryptodev_enqueue_linked_alg_dec, cryptodev_frame_dequeue); \
+ cryptodev_enqueue_linked_alg_dec); \
+ ref_cnt++; \
}
foreach_cryptodev_link_async_alg
#undef _
- /** INDENT-ON **/
+
+ if (ref_cnt)
+ vnet_crypto_register_dequeue_handler (vm, eidx, cryptodev_frame_dequeue);
return 0;
error_exit:
vec_foreach (cet, cmt->per_thread_data)
{
- if (cet->ring)
- rte_ring_free (cet->ring);
-
if (cet->cop_pool)
rte_mempool_free (cet->cop_pool);
}
diff --git a/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c b/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c
index 41a1e0c2a09..67ab9c89e67 100644
--- a/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c
+++ b/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c
@@ -29,7 +29,7 @@
#include <rte_cryptodev.h>
#include <rte_crypto_sym.h>
#include <rte_crypto.h>
-#include <rte_cryptodev_pmd.h>
+#include <rte_malloc.h>
#include <rte_config.h>
#include "cryptodev.h"
@@ -96,7 +96,7 @@ cryptodev_reset_ctx (cryptodev_engine_thread_t *cet)
{
union rte_cryptodev_session_ctx sess_ctx;
- ASSERT (cet->reset_sess != 0);
+ ERROR_ASSERT (cet->reset_sess != 0);
sess_ctx.crypto_sess = cet->reset_sess;
@@ -112,30 +112,51 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
{
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ ERROR_ASSERT (frame != 0);
+ ERROR_ASSERT (frame->n_elts > 0);
+ cryptodev_cache_ring_elt_t *ring_elt =
+ cryptodev_cache_ring_push (ring, frame);
+
+ if (PREDICT_FALSE (ring_elt == NULL))
+ return -1;
+
+ ring_elt->aad_len = 1;
+ ring_elt->op_type = (u8) op_type;
+ return 0;
+}
+
+static_always_inline void
+cryptodev_frame_linked_algs_enqueue_internal (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame,
+ cryptodev_op_type_t op_type)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
vnet_crypto_async_frame_elt_t *fe;
vlib_buffer_t **b;
struct rte_crypto_vec vec[CRYPTODEV_MAX_N_SGL];
struct rte_crypto_va_iova_ptr iv_vec, digest_vec;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 *const enq = &ring->enq_head;
u32 n_elts;
u32 last_key_index = ~0;
i16 min_ofs;
u32 max_end;
+ u32 max_to_enq = clib_min (CRYPTODE_ENQ_MAX,
+ frame->n_elts - ring->frames[*enq].enq_elts_head);
u8 is_update = 0;
int status;
- n_elts = frame->n_elts;
+ if (cet->inflight + max_to_enq > CRYPTODEV_MAX_INFLIGHT)
+ return;
- if (PREDICT_FALSE (CRYPTODEV_MAX_INFLIGHT - cet->inflight < n_elts))
- {
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
- }
+ n_elts = max_to_enq;
vlib_get_buffers (vm, frame->buffer_indices, cet->b, frame->n_elts);
- b = cet->b;
- fe = frame->elts;
+ b = cet->b + ring->frames[*enq].enq_elts_head;
+ fe = frame->elts + ring->frames[*enq].enq_elts_head;
while (n_elts)
{
@@ -215,26 +236,31 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
if (PREDICT_FALSE (status < 0))
goto error_exit;
+ ring->frames[*enq].enq_elts_head += 1;
b++;
fe++;
n_elts--;
}
- status = rte_cryptodev_raw_enqueue_done (cet->ctx, frame->n_elts);
+ status = rte_cryptodev_raw_enqueue_done (cet->ctx, max_to_enq);
if (PREDICT_FALSE (status < 0))
- {
- cryptodev_reset_ctx (cet);
- return -1;
- }
+ goto error_exit;
- cet->inflight += frame->n_elts;
- return 0;
+ cet->inflight += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ return;
error_exit:
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ cryptodev_mark_frame_fill_err (frame,
+ ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
cryptodev_reset_ctx (cet);
- return -1;
+
+ return;
}
static_always_inline int
@@ -243,6 +269,28 @@ cryptodev_raw_aead_enqueue (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
{
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ ERROR_ASSERT (frame != 0);
+ ERROR_ASSERT (frame->n_elts > 0);
+ cryptodev_cache_ring_elt_t *ring_elt =
+ cryptodev_cache_ring_push (ring, frame);
+
+ if (PREDICT_FALSE (ring_elt == NULL))
+ return -1;
+
+ ring_elt->aad_len = aad_len;
+ ring_elt->op_type = (u8) op_type;
+ return 0;
+}
+
+static_always_inline void
+cryptodev_raw_aead_enqueue_internal (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame,
+ cryptodev_op_type_t op_type, u8 aad_len)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
vnet_crypto_async_frame_elt_t *fe;
vlib_buffer_t **b;
u32 n_elts;
@@ -250,22 +298,23 @@ cryptodev_raw_aead_enqueue (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
struct rte_crypto_vec vec[CRYPTODEV_MAX_N_SGL];
struct rte_crypto_va_iova_ptr iv_vec, digest_vec, aad_vec;
u32 last_key_index = ~0;
+ u16 *const enq = &ring->enq_head;
+ u16 left_to_enq = frame->n_elts - ring->frames[*enq].enq_elts_head;
+ u16 max_to_enq = clib_min (CRYPTODE_ENQ_MAX, left_to_enq);
u8 is_update = 0;
int status;
- n_elts = frame->n_elts;
-
- if (PREDICT_FALSE (CRYPTODEV_MAX_INFLIGHT - cet->inflight < n_elts))
+ if (cet->inflight + max_to_enq > CRYPTODEV_MAX_INFLIGHT)
{
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
+ return;
}
+ n_elts = max_to_enq;
+
vlib_get_buffers (vm, frame->buffer_indices, cet->b, frame->n_elts);
- fe = frame->elts;
- b = cet->b;
+ fe = frame->elts + ring->frames[*enq].enq_elts_head;
+ b = cet->b + ring->frames[*enq].enq_elts_head;
cofs.raw = 0;
while (n_elts)
@@ -292,8 +341,13 @@ cryptodev_raw_aead_enqueue (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
}
if (PREDICT_FALSE (
- (u8) key->keys[vm->numa_node][op_type]->opaque_data !=
- aad_len))
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ rte_cryptodev_sym_session_opaque_data_get (
+ key->keys[vm->numa_node][op_type]) != (u64) aad_len
+#else
+ (u8) key->keys[vm->numa_node][op_type]->opaque_data != aad_len
+#endif
+ ))
{
cryptodev_sess_handler (vm, VNET_CRYPTO_KEY_OP_DEL,
fe->key_index, aad_len);
@@ -349,7 +403,7 @@ cryptodev_raw_aead_enqueue (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
if (aad_len == 8)
*(u64 *) (cet->aad_buf + aad_offset) = *(u64 *) fe->aad;
- else
+ else if (aad_len != 0)
{
/* aad_len == 12 */
*(u64 *) (cet->aad_buf + aad_offset) = *(u64 *) fe->aad;
@@ -373,31 +427,30 @@ cryptodev_raw_aead_enqueue (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
if (PREDICT_FALSE (status < 0))
goto error_exit;
+ ring->frames[*enq].enq_elts_head += 1;
fe++;
b++;
n_elts--;
}
- status = rte_cryptodev_raw_enqueue_done (cet->ctx, frame->n_elts);
+ status = rte_cryptodev_raw_enqueue_done (cet->ctx, max_to_enq);
if (PREDICT_FALSE (status < 0))
goto error_exit;
- cet->inflight += frame->n_elts;
-
- return 0;
+ cet->inflight += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ return;
error_exit:
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ cryptodev_mark_frame_fill_err (frame,
+ ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
cryptodev_reset_ctx (cet);
- return -1;
-}
-
-static_always_inline u32
-cryptodev_get_frame_n_elts (void *frame)
-{
- vnet_crypto_async_frame_t *f = (vnet_crypto_async_frame_t *) frame;
- return f->n_elts;
+ return;
}
static_always_inline void
@@ -409,180 +462,114 @@ cryptodev_post_dequeue (void *frame, u32 index, u8 is_op_success)
VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
}
-#define GET_RING_OBJ(r, pos, f) \
- do \
- { \
- vnet_crypto_async_frame_t **ring = (void *) &r[1]; \
- f = ring[(r->cons.head + pos) & r->mask]; \
- } \
- while (0)
-
-static_always_inline vnet_crypto_async_frame_t *
-cryptodev_raw_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
- u32 *enqueue_thread_idx)
+static_always_inline u8
+cryptodev_raw_dequeue_internal (vlib_main_t *vm, u32 *enqueue_thread_idx)
{
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
- vnet_crypto_async_frame_t *frame, *frame_ret = 0;
- u32 n_deq, n_success;
- u32 n_cached_frame = rte_ring_count (cet->cached_frame), n_room_left;
- u8 no_job_to_deq = 0;
+ vnet_crypto_async_frame_t *frame;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 *const deq = &ring->deq_tail;
+ u32 n_success;
+ u16 n_deq, i, left_to_deq;
+ u16 max_to_deq = 0;
u16 inflight = cet->inflight;
+ u8 dequeue_more = 0;
int dequeue_status;
- n_room_left = CRYPTODEV_DEQ_CACHE_SZ - n_cached_frame - 1;
+ left_to_deq = ring->frames[*deq].n_elts - ring->frames[*deq].deq_elts_tail;
+ max_to_deq = clib_min (left_to_deq, CRYPTODE_DEQ_MAX);
- if (n_cached_frame)
- {
- u32 i;
- for (i = 0; i < n_cached_frame; i++)
- {
- vnet_crypto_async_frame_t *f;
- void *f_ret;
- enum rte_crypto_op_status op_status;
- u8 n_left, err, j;
+ /* deq field can be used to track frame that is currently dequeued */
+ /* based on thatthe amount of elements to deq for the frame can be specified
+ */
- GET_RING_OBJ (cet->cached_frame, i, f);
-
- if (i < n_cached_frame - 2)
- {
- vnet_crypto_async_frame_t *f1, *f2;
- GET_RING_OBJ (cet->cached_frame, i + 1, f1);
- GET_RING_OBJ (cet->cached_frame, i + 2, f2);
- clib_prefetch_load (f1);
- clib_prefetch_load (f2);
- }
-
- n_left = f->state & 0x7f;
- err = f->state & 0x80;
-
- for (j = f->n_elts - n_left; j < f->n_elts && inflight; j++)
- {
- int ret;
- f_ret = rte_cryptodev_raw_dequeue (cet->ctx, &ret, &op_status);
-
- if (!f_ret)
- break;
-
- switch (op_status)
- {
- case RTE_CRYPTO_OP_STATUS_SUCCESS:
- f->elts[j].status = VNET_CRYPTO_OP_STATUS_COMPLETED;
- break;
- default:
- f->elts[j].status = VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR;
- err |= 1 << 7;
- }
-
- inflight--;
- }
+ n_deq = rte_cryptodev_raw_dequeue_burst (
+ cet->ctx, NULL, max_to_deq, cryptodev_post_dequeue, (void **) &frame, 0,
+ &n_success, &dequeue_status);
- if (j == f->n_elts)
- {
- if (i == 0)
- {
- frame_ret = f;
- f->state = err ? VNET_CRYPTO_FRAME_STATE_ELT_ERROR :
- VNET_CRYPTO_FRAME_STATE_SUCCESS;
- }
- else
- {
- f->state = f->n_elts - j;
- f->state |= err;
- }
- if (inflight)
- continue;
- }
+ if (n_deq == 0)
+ return dequeue_more;
- /* to here f is not completed dequeued and no more job can be
- * dequeued
- */
- f->state = f->n_elts - j;
- f->state |= err;
- no_job_to_deq = 1;
- break;
- }
+ inflight -= n_deq;
+ if (PREDICT_FALSE (n_success < n_deq))
+ {
+ u16 idx = ring->frames[*deq].deq_elts_tail;
- if (frame_ret)
+ for (i = 0; i < n_deq; i++)
{
- rte_ring_sc_dequeue (cet->cached_frame, (void **) &frame_ret);
- n_room_left++;
+ if (frame->elts[idx + i].status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+ ring->frames[*deq].frame_elts_errs_mask |= 1 << (idx + i);
}
}
+ ring->frames[*deq].deq_elts_tail += n_deq;
- /* no point to dequeue further */
- if (!inflight || no_job_to_deq || !n_room_left)
- goto end_deq;
+ if (cryptodev_cache_ring_update_deq_tail (ring, deq))
+ {
+ u32 fr_processed =
+ (CRYPTODEV_CACHE_QUEUE_SIZE - ring->tail + ring->deq_tail) &
+ CRYPTODEV_CACHE_QUEUE_MASK;
-#if RTE_VERSION >= RTE_VERSION_NUM(21, 5, 0, 0)
- n_deq = rte_cryptodev_raw_dequeue_burst (
- cet->ctx, cryptodev_get_frame_n_elts, 0, cryptodev_post_dequeue,
- (void **) &frame, 0, &n_success, &dequeue_status);
-#else
- n_deq = rte_cryptodev_raw_dequeue_burst (
- cet->ctx, cryptodev_get_frame_n_elts, cryptodev_post_dequeue,
- (void **) &frame, 0, &n_success, &dequeue_status);
-#endif
+ *enqueue_thread_idx = frame->enqueue_thread_index;
+ dequeue_more = (fr_processed < CRYPTODEV_MAX_PROCESED_IN_CACHE_QUEUE);
+ }
- if (!n_deq)
- goto end_deq;
+ int res =
+ rte_cryptodev_raw_dequeue_done (cet->ctx, cet->inflight - inflight);
+ ERROR_ASSERT (res == 0);
+ cet->inflight = inflight;
+ return dequeue_more;
+}
- inflight -= n_deq;
- no_job_to_deq = n_deq < frame->n_elts;
- /* we have to cache the frame */
- if (frame_ret || n_cached_frame || no_job_to_deq)
- {
- frame->state = frame->n_elts - n_deq;
- frame->state |= ((n_success < n_deq) << 7);
- rte_ring_sp_enqueue (cet->cached_frame, (void *) frame);
- n_room_left--;
- }
+static_always_inline void
+cryptodev_enqueue_frame_to_qat (vlib_main_t *vm,
+ cryptodev_cache_ring_elt_t *ring_elt)
+{
+ cryptodev_op_type_t op_type = (cryptodev_op_type_t) ring_elt->op_type;
+ u8 linked_or_aad_len = ring_elt->aad_len;
+
+ if (linked_or_aad_len == 1)
+ cryptodev_frame_linked_algs_enqueue_internal (vm, ring_elt->f, op_type);
else
- {
- frame->state = n_success == frame->n_elts ?
- VNET_CRYPTO_FRAME_STATE_SUCCESS :
- VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
- frame_ret = frame;
- }
+ cryptodev_raw_aead_enqueue_internal (vm, ring_elt->f, op_type,
+ linked_or_aad_len);
+}
- /* see if we can dequeue more */
- while (inflight && n_room_left && !no_job_to_deq)
- {
-#if RTE_VERSION >= RTE_VERSION_NUM(21, 5, 0, 0)
- n_deq = rte_cryptodev_raw_dequeue_burst (
- cet->ctx, cryptodev_get_frame_n_elts, 0, cryptodev_post_dequeue,
- (void **) &frame, 0, &n_success, &dequeue_status);
-#else
- n_deq = rte_cryptodev_raw_dequeue_burst (
- cet->ctx, cryptodev_get_frame_n_elts, cryptodev_post_dequeue,
- (void **) &frame, 0, &n_success, &dequeue_status);
-#endif
- if (!n_deq)
- break;
- inflight -= n_deq;
- no_job_to_deq = n_deq < frame->n_elts;
- frame->state = frame->n_elts - n_deq;
- frame->state |= ((n_success < n_deq) << 7);
- rte_ring_sp_enqueue (cet->cached_frame, (void *) frame);
- n_room_left--;
- }
+static_always_inline vnet_crypto_async_frame_t *
+cryptodev_raw_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
+ u32 *enqueue_thread_idx)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ vnet_crypto_main_t *cm = &crypto_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ cryptodev_cache_ring_elt_t *ring_elt = &ring->frames[ring->tail];
+ vnet_crypto_async_frame_t *ret_frame = 0;
+ u8 dequeue_more = 1;
-end_deq:
- if (inflight < cet->inflight)
- {
- int res =
- rte_cryptodev_raw_dequeue_done (cet->ctx, cet->inflight - inflight);
- ASSERT (res == 0);
- cet->inflight = inflight;
- }
+ while (cet->inflight > 0 && dequeue_more)
+ dequeue_more = cryptodev_raw_dequeue_internal (vm, enqueue_thread_idx);
+
+ if (PREDICT_TRUE (ring->frames[ring->enq_head].f != 0))
+ cryptodev_enqueue_frame_to_qat (vm, &ring->frames[ring->enq_head]);
- if (frame_ret)
+ if (PREDICT_TRUE (ring_elt->f != 0) &&
+ (ring_elt->n_elts == ring_elt->deq_elts_tail))
{
- *nb_elts_processed = frame_ret->n_elts;
- *enqueue_thread_idx = frame_ret->enqueue_thread_index;
+ *nb_elts_processed = ring_elt->n_elts;
+ vlib_node_set_interrupt_pending (
+ vlib_get_main_by_index (vm->thread_index), cm->crypto_node_index);
+ ret_frame = cryptodev_cache_ring_pop (ring);
}
- return frame_ret;
+ return ret_frame;
+}
+
+static_always_inline int
+cryptodev_raw_enq_aead_aad_0_enc (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return cryptodev_raw_aead_enqueue (vm, frame, CRYPTODEV_OP_TYPE_ENCRYPT, 0);
}
static_always_inline int
@@ -599,6 +586,13 @@ cryptodev_raw_enq_aead_aad_12_enc (vlib_main_t *vm,
}
static_always_inline int
+cryptodev_raw_enq_aead_aad_0_dec (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return cryptodev_raw_aead_enqueue (vm, frame, CRYPTODEV_OP_TYPE_DECRYPT, 0);
+}
+
+static_always_inline int
cryptodev_raw_enq_aead_aad_8_dec (vlib_main_t *vm,
vnet_crypto_async_frame_t *frame)
{
@@ -639,6 +633,7 @@ cryptodev_register_raw_hdl (vlib_main_t *vm, u32 eidx)
struct rte_cryptodev_sym_capability_idx cap_aead_idx;
u32 support_raw_api = 1, max_ctx_size = 0;
clib_error_t *error = 0;
+ u8 ref_cnt = 0;
vec_foreach (cinst, cmt->cryptodev_inst)
{
@@ -661,11 +656,7 @@ cryptodev_register_raw_hdl (vlib_main_t *vm, u32 eidx)
{
u32 thread_id = cet - cmt->per_thread_data;
u32 numa = vlib_get_main_by_index (thread_id)->numa_node;
- u8 *name = format (0, "cache_frame_ring_%u_%u", numa, thread_id);
-
- cet->cached_frame =
- rte_ring_create ((char *) name, CRYPTODEV_DEQ_CACHE_SZ, numa,
- RING_F_SC_DEQ | RING_F_SP_ENQ);
+ u8 *name = format (0, "cache_cache_ring_%u_%u", numa, thread_id);
cet->aad_buf = rte_zmalloc_socket (
0, CRYPTODEV_NB_CRYPTO_OPS * CRYPTODEV_MAX_AAD_SIZE,
@@ -684,28 +675,21 @@ cryptodev_register_raw_hdl (vlib_main_t *vm, u32 eidx)
error = clib_error_return (0, "Failed to alloc raw dp ctx");
goto err_handling;
}
-
- if (cet->cached_frame == 0)
- {
- error = clib_error_return (0, "Failed to alloc frame ring %s", name);
- goto err_handling;
- }
-
vec_free (name);
}
-/** INDENT-OFF **/
#define _(a, b, c, d, e, f, g) \
cap_aead_idx.type = RTE_CRYPTO_SYM_XFORM_AEAD; \
cap_aead_idx.algo.aead = RTE_CRYPTO_##b##_##c; \
if (cryptodev_check_cap_support (&cap_aead_idx, g, e, f)) \
{ \
- vnet_crypto_register_async_handler ( \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_TAG##e##_AAD##f##_ENC, \
- cryptodev_raw_enq_aead_aad_##f##_enc, cryptodev_raw_dequeue); \
- vnet_crypto_register_async_handler ( \
+ cryptodev_raw_enq_aead_aad_##f##_enc); \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_TAG##e##_AAD##f##_DEC, \
- cryptodev_raw_enq_aead_aad_##f##_dec, cryptodev_raw_dequeue); \
+ cryptodev_raw_enq_aead_aad_##f##_dec); \
+ ref_cnt++; \
}
foreach_vnet_aead_crypto_conversion
#undef _
@@ -718,26 +702,24 @@ cryptodev_register_raw_hdl (vlib_main_t *vm, u32 eidx)
if (cryptodev_check_cap_support (&cap_cipher_idx, c, -1, -1) && \
cryptodev_check_cap_support (&cap_auth_idx, -1, e, -1)) \
{ \
- vnet_crypto_register_async_handler ( \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_##d##_TAG##e##_ENC, \
- cryptodev_raw_enq_linked_alg_enc, cryptodev_raw_dequeue); \
- vnet_crypto_register_async_handler ( \
+ cryptodev_raw_enq_linked_alg_enc); \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_##d##_TAG##e##_DEC, \
- cryptodev_raw_enq_linked_alg_dec, cryptodev_raw_dequeue); \
+ cryptodev_raw_enq_linked_alg_dec); \
+ ref_cnt++; \
}
foreach_cryptodev_link_async_alg
#undef _
- cmt->is_raw_api = 1;
+ if (ref_cnt)
+ vnet_crypto_register_dequeue_handler (vm, eidx, cryptodev_raw_dequeue);
+
+ cmt->is_raw_api = 1;
return 0;
err_handling:
- vec_foreach (cet, cmt->per_thread_data)
- {
- if (cet->cached_frame)
- rte_ring_free (cet->cached_frame);
- }
-
return error;
}
diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c
index 0f771c6ba77..c838800deb4 100644
--- a/src/plugins/dpdk/device/cli.c
+++ b/src/plugins/dpdk/device/cli.c
@@ -77,14 +77,12 @@ show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
* name="mbuf_pool_socket0" available = 15104 allocated = 1280 total = 16384
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_show_dpdk_buffer,static) = {
.path = "show dpdk buffer",
.short_help = "show dpdk buffer",
.function = show_dpdk_buffer,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
show_dpdk_physmem (vlib_main_t * vm, unformat_input_t * input,
@@ -142,7 +140,7 @@ show_dpdk_physmem (vlib_main_t * vm, unformat_input_t * input,
err = clib_error_return_unix (0, "read");
goto error;
}
- _vec_len (s) = len + (n < 0 ? 0 : n);
+ vec_set_len (s, len + (n < 0 ? 0 : n));
}
vlib_cli_output (vm, "%v", s);
@@ -162,14 +160,12 @@ error:
* @cliexstart{show dpdk physmem}
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_show_dpdk_physmem,static) = {
.path = "show dpdk physmem",
.short_help = "show dpdk physmem",
.function = show_dpdk_physmem,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
@@ -198,7 +194,7 @@ test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
first = vec_len (allocated_buffers) - n_free;
vlib_buffer_free (vm, allocated_buffers + first, n_free);
- _vec_len (allocated_buffers) = first;
+ vec_set_len (allocated_buffers, first);
}
if (n_alloc)
{
@@ -208,7 +204,7 @@ test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
actual_alloc = vlib_buffer_alloc (vm, allocated_buffers + first,
n_alloc);
- _vec_len (allocated_buffers) = first + actual_alloc;
+ vec_set_len (allocated_buffers, first + actual_alloc);
if (actual_alloc < n_alloc)
vlib_cli_output (vm, "WARNING: only allocated %d buffers",
@@ -250,14 +246,12 @@ test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = {
.path = "test dpdk buffer",
.short_help = "test dpdk buffer [allocate <nn>] [free <nn>]",
.function = test_dpdk_buffer,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input,
@@ -265,6 +259,7 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input,
{
unformat_input_t _line_input, *line_input = &_line_input;
dpdk_main_t *dm = &dpdk_main;
+ vnet_main_t *vnm = vnet_get_main ();
vnet_hw_interface_t *hw;
dpdk_device_t *xd;
u32 hw_if_index = (u32) ~ 0;
@@ -277,9 +272,8 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input,
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat
- (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
- &hw_if_index))
+ if (unformat (line_input, "%U", unformat_vnet_hw_interface, vnm,
+ &hw_if_index))
;
else if (unformat (line_input, "tx %d", &nb_tx_desc))
;
@@ -299,30 +293,21 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input,
goto done;
}
- hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
xd = vec_elt_at_index (dm->devices, hw->dev_instance);
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- {
- error =
- clib_error_return (0,
- "number of descriptors can be set only for "
- "physical devices");
- goto done;
- }
-
- if ((nb_rx_desc == (u32) ~ 0 || nb_rx_desc == xd->nb_rx_desc) &&
- (nb_tx_desc == (u32) ~ 0 || nb_tx_desc == xd->nb_tx_desc))
+ if ((nb_rx_desc == (u32) ~0 || nb_rx_desc == xd->conf.n_rx_desc) &&
+ (nb_tx_desc == (u32) ~0 || nb_tx_desc == xd->conf.n_tx_desc))
{
error = clib_error_return (0, "nothing changed");
goto done;
}
if (nb_rx_desc != (u32) ~ 0)
- xd->nb_rx_desc = nb_rx_desc;
+ xd->conf.n_rx_desc = nb_rx_desc;
if (nb_tx_desc != (u32) ~ 0)
- xd->nb_tx_desc = nb_tx_desc;
+ xd->conf.n_tx_desc = nb_tx_desc;
dpdk_device_setup (xd);
@@ -345,13 +330,11 @@ done:
* Example of how to set the DPDK interface descriptors:
* @cliexcmd{set dpdk interface descriptors GigabitEthernet0/8/0 rx 512 tx 512}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = {
.path = "set dpdk interface descriptors",
.short_help = "set dpdk interface descriptors <interface> [rx <nn>] [tx <nn>]",
.function = set_dpdk_if_desc,
};
-/* *INDENT-ON* */
static clib_error_t *
show_dpdk_version_command_fn (vlib_main_t * vm,
@@ -373,16 +356,15 @@ show_dpdk_version_command_fn (vlib_main_t * vm,
* Example of how to display how many DPDK buffer test command has allocated:
* @cliexstart{show dpdk version}
* DPDK Version: DPDK 16.11.0
- * DPDK EAL init args: -c 1 -n 4 --huge-dir /run/vpp/hugepages --file-prefix vpp -w 0000:00:08.0 -w 0000:00:09.0 --master-lcore 0 --socket-mem 256
+ * DPDK EAL init args: --in-memory --no-telemetry --file-prefix vpp
+ * -w 0000:00:08.0 -w 0000:00:09.0
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_vpe_version_command, static) = {
.path = "show dpdk version",
.short_help = "show dpdk version",
.function = show_dpdk_version_command_fn,
};
-/* *INDENT-ON* */
/* Dummy function to get us linked in. */
void
diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c
index 89046d1a8c0..7a49c5aaef2 100644
--- a/src/plugins/dpdk/device/common.c
+++ b/src/plugins/dpdk/device/common.c
@@ -29,11 +29,26 @@
#include <dpdk/device/dpdk_priv.h>
#include <vppinfra/error.h>
+/* DPDK TX offload to vnet hw interface caps mapppings */
+static struct
+{
+ u64 offload;
+ vnet_hw_if_caps_t caps;
+} tx_off_caps_map[] = {
+ { RTE_ETH_TX_OFFLOAD_IPV4_CKSUM, VNET_HW_IF_CAP_TX_IP4_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_TCP_CKSUM, VNET_HW_IF_CAP_TX_TCP_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_UDP_CKSUM, VNET_HW_IF_CAP_TX_UDP_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM, VNET_HW_IF_CAP_TX_IP4_OUTER_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM, VNET_HW_IF_CAP_TX_UDP_OUTER_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_TCP_TSO, VNET_HW_IF_CAP_TCP_GSO },
+ { RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO, VNET_HW_IF_CAP_VXLAN_TNL_GSO }
+};
+
void
dpdk_device_error (dpdk_device_t * xd, char *str, int rv)
{
- dpdk_log_err ("Interface %U error %d: %s",
- format_dpdk_device_name, xd->port_id, rv, rte_strerror (rv));
+ dpdk_log_err ("Interface %U error %d: %s", format_dpdk_device_name,
+ xd->device_index, rv, rte_strerror (rv));
xd->errors = clib_error_return (xd->errors, "%s[port:%d, errno:%d]: %s",
str, xd->port_id, rv, rte_strerror (rv));
}
@@ -41,14 +56,16 @@ dpdk_device_error (dpdk_device_t * xd, char *str, int rv)
void
dpdk_device_setup (dpdk_device_t * xd)
{
- dpdk_main_t *dm = &dpdk_main;
vlib_main_t *vm = vlib_get_main ();
vnet_main_t *vnm = vnet_get_main ();
- vlib_thread_main_t *tm = vlib_get_thread_main ();
vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->sw_if_index);
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->hw_if_index);
+ u16 buf_sz = vlib_buffer_get_default_data_size (vm);
+ vnet_hw_if_caps_change_t caps = {};
struct rte_eth_dev_info dev_info;
- u64 bitmap;
+ struct rte_eth_conf conf = {};
+ u64 rxo, txo;
+ u32 max_frame_size;
int rv;
int j;
@@ -59,70 +76,152 @@ dpdk_device_setup (dpdk_device_t * xd)
if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
{
- vnet_hw_interface_set_flags (dm->vnet_main, xd->hw_if_index, 0);
+ vnet_hw_interface_set_flags (vnm, xd->hw_if_index, 0);
dpdk_device_stop (xd);
}
- /* Enable flow director when flows exist */
- if (xd->pmd == VNET_DPDK_PMD_I40E)
+ rte_eth_dev_info_get (xd->port_id, &dev_info);
+
+ dpdk_log_debug ("[%u] configuring device %U", xd->port_id,
+ format_dpdk_rte_device, dev_info.device);
+
+ /* create rx and tx offload wishlist */
+ rxo = RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
+ txo = 0;
+
+ if (xd->conf.enable_tcp_udp_checksum)
+ rxo |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM;
+
+ if (xd->conf.disable_tx_checksum_offload == 0 &&
+ xd->conf.enable_outer_checksum_offload)
+ txo |=
+ RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
+
+ if (xd->conf.disable_tx_checksum_offload == 0)
+ txo |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
+ RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
+
+ if (xd->conf.disable_multi_seg == 0)
{
- if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) != 0)
- xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_PERFECT;
- else
- xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_NONE;
+ txo |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
+ rxo |= RTE_ETH_RX_OFFLOAD_SCATTER;
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+ rxo |= DEV_RX_OFFLOAD_JUMBO_FRAME;
+#endif
}
- rte_eth_dev_info_get (xd->port_id, &dev_info);
-
- bitmap = xd->port_conf.txmode.offloads & ~dev_info.tx_offload_capa;
- if (bitmap)
+ if (xd->conf.enable_lro)
+ rxo |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
+
+ /* per-device offload config */
+ if (xd->conf.enable_tso)
+ txo |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_TSO |
+ RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
+
+ if (xd->conf.disable_rx_scatter)
+ rxo &= ~RTE_ETH_RX_OFFLOAD_SCATTER;
+
+ /* mask unsupported offloads */
+ rxo &= dev_info.rx_offload_capa;
+ txo &= dev_info.tx_offload_capa;
+
+ dpdk_log_debug ("[%u] Supported RX offloads: %U", xd->port_id,
+ format_dpdk_rx_offload_caps, dev_info.rx_offload_capa);
+ dpdk_log_debug ("[%u] Configured RX offloads: %U", xd->port_id,
+ format_dpdk_rx_offload_caps, rxo);
+ dpdk_log_debug ("[%u] Supported TX offloads: %U", xd->port_id,
+ format_dpdk_tx_offload_caps, dev_info.tx_offload_capa);
+ dpdk_log_debug ("[%u] Configured TX offloads: %U", xd->port_id,
+ format_dpdk_tx_offload_caps, txo);
+
+ /* finalize configuration */
+ conf.rxmode.offloads = rxo;
+ conf.txmode.offloads = txo;
+ if (rxo & RTE_ETH_RX_OFFLOAD_TCP_LRO)
+ conf.rxmode.max_lro_pkt_size = xd->conf.max_lro_pkt_size;
+
+ if (xd->conf.enable_lsc_int)
+ conf.intr_conf.lsc = 1;
+ if (xd->conf.enable_rxq_int)
+ conf.intr_conf.rxq = 1;
+
+ conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE;
+ if (xd->conf.n_rx_queues > 1)
{
- dpdk_log_warn ("unsupported tx offloads requested on port %u: %U",
- xd->port_id, format_dpdk_tx_offload_caps, bitmap);
- xd->port_conf.txmode.offloads ^= bitmap;
+ if (xd->conf.disable_rss == 0)
+ {
+ conf.rxmode.mq_mode = RTE_ETH_MQ_RX_RSS;
+ conf.rx_adv_conf.rss_conf.rss_hf = xd->conf.rss_hf;
+ }
}
- bitmap = xd->port_conf.rxmode.offloads & ~dev_info.rx_offload_capa;
- if (bitmap)
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+ if (rxo & DEV_RX_OFFLOAD_JUMBO_FRAME)
+ {
+ conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen;
+ xd->max_supported_frame_size = dev_info.max_rx_pktlen;
+ }
+ else
{
- dpdk_log_warn ("unsupported rx offloads requested on port %u: %U",
- xd->port_id, format_dpdk_rx_offload_caps, bitmap);
- xd->port_conf.rxmode.offloads ^= bitmap;
+ xd->max_supported_frame_size =
+ clib_min (1500 + xd->driver_frame_overhead, buf_sz);
}
+#else
+ if (xd->conf.disable_multi_seg)
+ xd->max_supported_frame_size = clib_min (dev_info.max_rx_pktlen, buf_sz);
+ else
+ xd->max_supported_frame_size = dev_info.max_rx_pktlen;
+#endif
+
+ max_frame_size = clib_min (xd->max_supported_frame_size,
+ ethernet_main.default_mtu + hi->frame_overhead);
- rv = rte_eth_dev_configure (xd->port_id, xd->rx_q_used,
- xd->tx_q_used, &xd->port_conf);
+#if RTE_VERSION >= RTE_VERSION_NUM(21, 11, 0, 0)
+ conf.rxmode.mtu = max_frame_size - xd->driver_frame_overhead;
+#endif
- if (rv < 0)
+retry:
+ rv = rte_eth_dev_configure (xd->port_id, xd->conf.n_rx_queues,
+ xd->conf.n_tx_queues, &conf);
+ if (rv < 0 && conf.intr_conf.rxq)
{
- dpdk_device_error (xd, "rte_eth_dev_configure", rv);
- goto error;
+ conf.intr_conf.rxq = 0;
+ goto retry;
}
- vec_validate_aligned (xd->tx_queues, xd->tx_q_used - 1,
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+ rte_eth_dev_set_mtu (xd->port_id,
+ max_frame_size - xd->driver_frame_overhead);
+#endif
+
+ hi->max_frame_size = 0;
+ vnet_hw_interface_set_max_frame_size (vnm, xd->hw_if_index, max_frame_size);
+ dpdk_log_debug ("[%u] max_frame_size %u max max_frame_size %u "
+ "driver_frame_overhead %u",
+ xd->port_id, hi->max_frame_size,
+ xd->max_supported_frame_size, xd->driver_frame_overhead);
+
+ vec_validate_aligned (xd->tx_queues, xd->conf.n_tx_queues - 1,
CLIB_CACHE_LINE_BYTES);
- for (j = 0; j < xd->tx_q_used; j++)
+ for (j = 0; j < xd->conf.n_tx_queues; j++)
{
- rv =
- rte_eth_tx_queue_setup (xd->port_id, j, xd->nb_tx_desc,
- xd->cpu_socket, &xd->tx_conf);
+ rv = rte_eth_tx_queue_setup (xd->port_id, j, xd->conf.n_tx_desc,
+ xd->cpu_socket, 0);
/* retry with any other CPU socket */
if (rv < 0)
- rv =
- rte_eth_tx_queue_setup (xd->port_id, j,
- xd->nb_tx_desc, SOCKET_ID_ANY,
- &xd->tx_conf);
+ rv = rte_eth_tx_queue_setup (xd->port_id, j, xd->conf.n_tx_desc,
+ SOCKET_ID_ANY, 0);
if (rv < 0)
dpdk_device_error (xd, "rte_eth_tx_queue_setup", rv);
- if (xd->tx_q_used < tm->n_vlib_mains)
- clib_spinlock_init (&vec_elt (xd->tx_queues, j).lock);
+ clib_spinlock_init (&vec_elt (xd->tx_queues, j).lock);
}
- vec_validate_aligned (xd->rx_queues, xd->rx_q_used - 1,
+ vec_validate_aligned (xd->rx_queues, xd->conf.n_rx_queues - 1,
CLIB_CACHE_LINE_BYTES);
- for (j = 0; j < xd->rx_q_used; j++)
+
+ for (j = 0; j < xd->conf.n_rx_queues; j++)
{
dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, j);
u8 bpidx = vlib_buffer_pool_get_default_for_numa (
@@ -130,12 +229,12 @@ dpdk_device_setup (dpdk_device_t * xd)
vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, bpidx);
struct rte_mempool *mp = dpdk_mempool_by_buffer_pool_index[bpidx];
- rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc,
+ rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->conf.n_rx_desc,
xd->cpu_socket, 0, mp);
/* retry with any other CPU socket */
if (rv < 0)
- rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc,
+ rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->conf.n_rx_desc,
SOCKET_ID_ANY, 0, mp);
rxq->buffer_pool_index = bp->index;
@@ -147,7 +246,40 @@ dpdk_device_setup (dpdk_device_t * xd)
if (vec_len (xd->errors))
goto error;
- rte_eth_dev_set_mtu (xd->port_id, hi->max_packet_bytes);
+ xd->buffer_flags =
+ (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID);
+
+ if ((rxo & (RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM)) ==
+ (RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM))
+ xd->buffer_flags |=
+ (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
+
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_RX_IP4_CKSUM,
+ rxo & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM);
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_MAYBE_MULTISEG,
+ rxo & RTE_ETH_RX_OFFLOAD_SCATTER);
+ dpdk_device_flag_set (
+ xd, DPDK_DEVICE_FLAG_TX_OFFLOAD,
+ (txo & (RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) ==
+ (RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM));
+
+ /* unconditionally set mac filtering cap */
+ caps.val = caps.mask = VNET_HW_IF_CAP_MAC_FILTER;
+
+ ethernet_set_flags (vnm, xd->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
+
+ for (int i = 0; i < ARRAY_LEN (tx_off_caps_map); i++)
+ {
+ __typeof__ (tx_off_caps_map[0]) *v = tx_off_caps_map + i;
+ caps.mask |= v->caps;
+ if ((v->offload & txo) == v->offload)
+ caps.val |= v->caps;
+ }
+
+ vnet_hw_if_change_caps (vnm, xd->hw_if_index, &caps);
+ xd->enabled_rx_off = rxo;
+ xd->enabled_tx_off = txo;
if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
dpdk_device_start (xd);
@@ -187,17 +319,18 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
{
vnet_main_t *vnm = vnet_get_main ();
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->hw_if_index);
+ int int_mode = 0;
if (!hi)
return;
- if (!xd->port_conf.intr_conf.rxq)
+ if (!xd->conf.enable_rxq_int)
return;
/* Probe for interrupt support */
if (rte_eth_dev_rx_intr_enable (xd->port_id, 0))
{
dpdk_log_info ("probe for interrupt mode for device %U. Failed.\n",
- format_dpdk_device_name, xd->port_id);
+ format_dpdk_device_name, xd->device_index);
}
else
{
@@ -205,13 +338,13 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
if (!(xd->flags & DPDK_DEVICE_FLAG_INT_UNMASKABLE))
rte_eth_dev_rx_intr_disable (xd->port_id, 0);
dpdk_log_info ("Probe for interrupt mode for device %U. Success.\n",
- format_dpdk_device_name, xd->port_id);
+ format_dpdk_device_name, xd->device_index);
}
if (xd->flags & DPDK_DEVICE_FLAG_INT_SUPPORTED)
{
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
- for (int q = 0; q < xd->rx_q_used; q++)
+ int_mode = 1;
+ for (int q = 0; q < xd->conf.n_rx_queues; q++)
{
dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q);
clib_file_t f = { 0 };
@@ -219,15 +352,15 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
if (rxq->efd < 0)
{
xd->flags &= ~DPDK_DEVICE_FLAG_INT_SUPPORTED;
- hi->caps &= ~VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ int_mode = 0;
break;
}
f.read_function = dpdk_rx_read_ready;
f.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED;
f.file_descriptor = rxq->efd;
f.private_data = rxq->queue_index;
- f.description =
- format (0, "%U queue %u", format_dpdk_device_name, xd->port_id, q);
+ f.description = format (0, "%U queue %u", format_dpdk_device_name,
+ xd->device_index, q);
rxq->clib_file_index = clib_file_add (&file_main, &f);
vnet_hw_if_set_rx_queue_file_index (vnm, rxq->queue_index,
rxq->clib_file_index);
@@ -240,6 +373,11 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
}
}
}
+
+ if (int_mode)
+ vnet_hw_if_set_caps (vnm, hi->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
+ else
+ vnet_hw_if_unset_caps (vnm, hi->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
vnet_hw_if_update_runtime_data (vnm, xd->hw_if_index);
}
@@ -259,6 +397,11 @@ dpdk_device_start (dpdk_device_t * xd)
return;
}
+ dpdk_log_debug ("[%u] RX burst function: %U", xd->port_id,
+ format_dpdk_burst_fn, xd, VLIB_RX);
+ dpdk_log_debug ("[%u] TX burst function: %U", xd->port_id,
+ format_dpdk_burst_fn, xd, VLIB_TX);
+
dpdk_setup_interrupts (xd);
if (xd->default_mac_address)
@@ -275,8 +418,8 @@ dpdk_device_start (dpdk_device_t * xd)
rte_eth_allmulticast_enable (xd->port_id);
- dpdk_log_info ("Interface %U started",
- format_dpdk_device_name, xd->port_id);
+ dpdk_log_info ("Interface %U started", format_dpdk_device_name,
+ xd->device_index);
}
void
@@ -289,8 +432,8 @@ dpdk_device_stop (dpdk_device_t * xd)
rte_eth_dev_stop (xd->port_id);
clib_memset (&xd->link, 0, sizeof (struct rte_eth_link));
- dpdk_log_info ("Interface %U stopped",
- format_dpdk_device_name, xd->port_id);
+ dpdk_log_info ("Interface %U stopped", format_dpdk_device_name,
+ xd->device_index);
}
void vl_api_force_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
@@ -311,10 +454,11 @@ dpdk_port_state_callback_inline (dpdk_portid_t port_id,
rte_eth_link_get_nowait (port_id, &link);
u8 link_up = link.link_status;
if (link_up)
- dpdk_log_info ("Port %d Link Up - speed %u Mbps - %s",
- port_id, (unsigned) link.link_speed,
- (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
- "full-duplex" : "half-duplex");
+ dpdk_log_info ("Port %d Link Up - speed %u Mbps - %s", port_id,
+ (unsigned) link.link_speed,
+ (link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ?
+ "full-duplex" :
+ "half-duplex");
else
dpdk_log_info ("Port %d Link Down\n\n", port_id);
@@ -337,12 +481,17 @@ dpdk_get_pci_device (const struct rte_eth_dev_info *info)
const struct rte_bus *bus;
bus = rte_bus_find_by_device (info->device);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (bus && !strcmp (rte_bus_name (bus), "pci"))
+#else
if (bus && !strcmp (bus->name, "pci"))
+#endif
return RTE_DEV_TO_PCI (info->device);
else
return NULL;
}
+#ifdef __linux__
/* If this device is VMBUS return pointer to info, otherwise NULL */
struct rte_vmbus_device *
dpdk_get_vmbus_device (const struct rte_eth_dev_info *info)
@@ -350,11 +499,16 @@ dpdk_get_vmbus_device (const struct rte_eth_dev_info *info)
const struct rte_bus *bus;
bus = rte_bus_find_by_device (info->device);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (bus && !strcmp (rte_bus_name (bus), "vmbus"))
+#else
if (bus && !strcmp (bus->name, "vmbus"))
+#endif
return container_of (info->device, struct rte_vmbus_device, device);
else
return NULL;
}
+#endif /* __linux__ */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c
index 7c083e1dcf4..0ba59562838 100644
--- a/src/plugins/dpdk/device/device.c
+++ b/src/plugins/dpdk/device/device.c
@@ -25,7 +25,6 @@
#include <vlib/unix/unix.h>
#define foreach_dpdk_tx_func_error \
- _(BAD_RETVAL, "DPDK tx function returned an error") \
_(PKT_DROP, "Tx packet drops (dpdk tx failure)")
typedef enum
@@ -153,52 +152,30 @@ dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b,
* support multiple queues. It returns the number of packets untransmitted
* If all packets are transmitted (the normal case), the function returns 0.
*/
-static_always_inline
- u32 tx_burst_vector_internal (vlib_main_t * vm,
- dpdk_device_t * xd,
- struct rte_mbuf **mb, u32 n_left)
+static_always_inline u32
+tx_burst_vector_internal (vlib_main_t *vm, dpdk_device_t *xd,
+ struct rte_mbuf **mb, u32 n_left, int queue_id,
+ u8 is_shared)
{
- dpdk_main_t *dm = &dpdk_main;
dpdk_tx_queue_t *txq;
u32 n_retry;
int n_sent = 0;
- int queue_id;
n_retry = 16;
- queue_id = vm->thread_index % xd->tx_q_used;
txq = vec_elt_at_index (xd->tx_queues, queue_id);
do
{
- clib_spinlock_lock_if_init (&txq->lock);
+ if (is_shared)
+ clib_spinlock_lock (&txq->lock);
- if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD))
- {
- /* no wrap, transmit in one burst */
- n_sent = rte_eth_tx_burst (xd->port_id, queue_id, mb, n_left);
- n_retry--;
- }
- else
- {
- ASSERT (0);
- n_sent = 0;
- }
-
- clib_spinlock_unlock_if_init (&txq->lock);
-
- if (PREDICT_FALSE (n_sent < 0))
- {
- // emit non-fatal message, bump counter
- vnet_main_t *vnm = dm->vnet_main;
- vnet_interface_main_t *im = &vnm->interface_main;
- u32 node_index;
+ /* no wrap, transmit in one burst */
+ n_sent = rte_eth_tx_burst (xd->port_id, queue_id, mb, n_left);
- node_index = vec_elt_at_index (im->hw_interfaces,
- xd->hw_if_index)->tx_node_index;
+ if (is_shared)
+ clib_spinlock_unlock (&txq->lock);
- vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1);
- return n_left; // untransmitted packets
- }
+ n_retry--;
n_left -= n_sent;
mb += n_sent;
}
@@ -221,7 +198,8 @@ dpdk_buffer_tx_offload (dpdk_device_t * xd, vlib_buffer_t * b,
{
int is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
u32 tso = b->flags & VNET_BUFFER_F_GSO, max_pkt_len;
- u32 ip_cksum, tcp_cksum, udp_cksum;
+ u32 ip_cksum, tcp_cksum, udp_cksum, outer_hdr_len = 0;
+ u32 outer_ip_cksum, vxlan_tunnel;
u64 ol_flags;
vnet_buffer_oflags_t oflags = 0;
@@ -233,25 +211,49 @@ dpdk_buffer_tx_offload (dpdk_device_t * xd, vlib_buffer_t * b,
ip_cksum = oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
tcp_cksum = oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
udp_cksum = oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
+ outer_ip_cksum = oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM;
+ vxlan_tunnel = oflags & VNET_BUFFER_OFFLOAD_F_TNL_VXLAN;
- mb->l2_len = vnet_buffer (b)->l3_hdr_offset - b->current_data;
- mb->l3_len = vnet_buffer (b)->l4_hdr_offset -
- vnet_buffer (b)->l3_hdr_offset;
- mb->outer_l3_len = 0;
- mb->outer_l2_len = 0;
- ol_flags = is_ip4 ? PKT_TX_IPV4 : PKT_TX_IPV6;
- ol_flags |= ip_cksum ? PKT_TX_IP_CKSUM : 0;
- ol_flags |= tcp_cksum ? PKT_TX_TCP_CKSUM : 0;
- ol_flags |= udp_cksum ? PKT_TX_UDP_CKSUM : 0;
+ ol_flags = is_ip4 ? RTE_MBUF_F_TX_IPV4 : RTE_MBUF_F_TX_IPV6;
+ ol_flags |= ip_cksum ? RTE_MBUF_F_TX_IP_CKSUM : 0;
+ ol_flags |= tcp_cksum ? RTE_MBUF_F_TX_TCP_CKSUM : 0;
+ ol_flags |= udp_cksum ? RTE_MBUF_F_TX_UDP_CKSUM : 0;
+
+ if (vxlan_tunnel)
+ {
+ ol_flags |= outer_ip_cksum ?
+ RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IP_CKSUM :
+ RTE_MBUF_F_TX_OUTER_IPV6;
+ ol_flags |= RTE_MBUF_F_TX_TUNNEL_VXLAN;
+ mb->l2_len =
+ vnet_buffer (b)->l3_hdr_offset - vnet_buffer2 (b)->outer_l4_hdr_offset;
+ mb->l3_len =
+ vnet_buffer (b)->l4_hdr_offset - vnet_buffer (b)->l3_hdr_offset;
+ mb->outer_l2_len =
+ vnet_buffer2 (b)->outer_l3_hdr_offset - b->current_data;
+ mb->outer_l3_len = vnet_buffer2 (b)->outer_l4_hdr_offset -
+ vnet_buffer2 (b)->outer_l3_hdr_offset;
+ outer_hdr_len = mb->outer_l2_len + mb->outer_l3_len;
+ }
+ else
+ {
+ mb->l2_len = vnet_buffer (b)->l3_hdr_offset - b->current_data;
+ mb->l3_len =
+ vnet_buffer (b)->l4_hdr_offset - vnet_buffer (b)->l3_hdr_offset;
+ mb->outer_l2_len = 0;
+ mb->outer_l3_len = 0;
+ }
if (tso)
{
mb->l4_len = vnet_buffer2 (b)->gso_l4_hdr_sz;
mb->tso_segsz = vnet_buffer2 (b)->gso_size;
/* ensure packet is large enough to require tso */
- max_pkt_len = mb->l2_len + mb->l3_len + mb->l4_len + mb->tso_segsz;
+ max_pkt_len =
+ outer_hdr_len + mb->l2_len + mb->l3_len + mb->l4_len + mb->tso_segsz;
if (mb->tso_segsz != 0 && mb->pkt_len > max_pkt_len)
- ol_flags |= (tcp_cksum ? PKT_TX_TCP_SEG : PKT_TX_UDP_SEG);
+ ol_flags |=
+ (tcp_cksum ? RTE_MBUF_F_TX_TCP_SEG : RTE_MBUF_F_TX_UDP_SEG);
}
mb->ol_flags |= ol_flags;
@@ -274,11 +276,13 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
dpdk_main_t *dm = &dpdk_main;
vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
dpdk_device_t *xd = vec_elt_at_index (dm->devices, rd->dev_instance);
+ vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (f);
u32 n_packets = f->n_vectors;
u32 n_left;
u32 thread_index = vm->thread_index;
- int queue_id = thread_index;
- u32 tx_pkts = 0, all_or_flags = 0;
+ int queue_id = tf->queue_id;
+ u8 is_shared = tf->shared_queue;
+ u32 tx_pkts = 0;
dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data,
thread_index);
struct rte_mbuf **mb;
@@ -310,7 +314,6 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
b[3] = vlib_buffer_from_rte_mbuf (mb[3]);
or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
- all_or_flags |= or_flags;
if (or_flags & VLIB_BUFFER_NEXT_PRESENT)
{
@@ -368,7 +371,6 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
b[1] = vlib_buffer_from_rte_mbuf (mb[1]);
or_flags = b[0]->flags | b[1]->flags;
- all_or_flags |= or_flags;
if (or_flags & VLIB_BUFFER_NEXT_PRESENT)
{
@@ -404,7 +406,6 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
while (n_left > 0)
{
b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
- all_or_flags |= b[0]->flags;
dpdk_validate_rte_mbuf (vm, b[0], 1);
dpdk_buffer_tx_offload (xd, b[0], mb[0]);
@@ -419,7 +420,8 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
/* transmit as many packets as possible */
tx_pkts = n_packets = mb - ptd->mbufs;
- n_left = tx_burst_vector_internal (vm, xd, ptd->mbufs, n_packets);
+ n_left = tx_burst_vector_internal (vm, xd, ptd->mbufs, n_packets, queue_id,
+ is_shared);
{
/* If there is no callback then drop any non-transmitted packets */
@@ -475,7 +477,7 @@ dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
if (vec_len (xd->errors))
return clib_error_create ("Interface start failed");
xd->flags |= DPDK_DEVICE_FLAG_ADMIN_UP;
- f64 now = vlib_time_now (dm->vlib_main);
+ f64 now = vlib_time_now (vlib_get_main ());
dpdk_update_counters (xd, now);
dpdk_update_link_state (xd, now);
}
@@ -511,7 +513,7 @@ dpdk_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
}
xd->per_interface_next_index =
- vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index);
+ vlib_node_add_next (vlib_get_main (), dpdk_input_node.index, node_index);
}
@@ -533,11 +535,8 @@ dpdk_subif_add_del_function (vnet_main_t * vnm,
else if (xd->num_subifs)
xd->num_subifs--;
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- goto done;
-
/* currently we program VLANS only for IXGBE VF */
- if (xd->pmd != VNET_DPDK_PMD_IXGBEVF)
+ if (xd->driver->program_vlans == 0)
goto done;
if (t->sub.eth.flags.no_tags == 1)
@@ -551,7 +550,7 @@ dpdk_subif_add_del_function (vnet_main_t * vnm,
}
vlan_offload = rte_eth_dev_get_vlan_offload (xd->port_id);
- vlan_offload |= ETH_VLAN_FILTER_OFFLOAD;
+ vlan_offload |= RTE_ETH_VLAN_FILTER_OFFLOAD;
if ((r = rte_eth_dev_set_vlan_offload (xd->port_id, vlan_offload)))
{
@@ -625,7 +624,6 @@ dpdk_interface_set_rss_queues (struct vnet_main_t *vnm,
clib_memset (reta, 0, dev_info.reta_size * sizeof (*reta));
valid_queue_count = 0;
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, bitmap) {
if (i >= dev_info.nb_rx_queues)
{
@@ -634,7 +632,6 @@ dpdk_interface_set_rss_queues (struct vnet_main_t *vnm,
}
reta[valid_queue_count++] = i;
}
- /* *INDENT-ON* */
/* check valid_queue_count not zero, make coverity happy */
if (valid_queue_count == 0)
@@ -651,10 +648,8 @@ dpdk_interface_set_rss_queues (struct vnet_main_t *vnm,
}
/* update reta table */
- reta_conf =
- (struct rte_eth_rss_reta_entry64 *) clib_mem_alloc (dev_info.reta_size /
- RTE_RETA_GROUP_SIZE *
- sizeof (*reta_conf));
+ reta_conf = (struct rte_eth_rss_reta_entry64 *) clib_mem_alloc (
+ dev_info.reta_size / RTE_ETH_RETA_GROUP_SIZE * sizeof (*reta_conf));
if (reta_conf == NULL)
{
err = clib_error_return (0, "clib_mem_alloc failed");
@@ -662,13 +657,13 @@ dpdk_interface_set_rss_queues (struct vnet_main_t *vnm,
}
clib_memset (reta_conf, 0,
- dev_info.reta_size / RTE_RETA_GROUP_SIZE *
- sizeof (*reta_conf));
+ dev_info.reta_size / RTE_ETH_RETA_GROUP_SIZE *
+ sizeof (*reta_conf));
for (i = 0; i < dev_info.reta_size; i++)
{
- uint32_t reta_id = i / RTE_RETA_GROUP_SIZE;
- uint32_t reta_pos = i % RTE_RETA_GROUP_SIZE;
+ uint32_t reta_id = i / RTE_ETH_RETA_GROUP_SIZE;
+ uint32_t reta_pos = i % RTE_ETH_RETA_GROUP_SIZE;
reta_conf[reta_id].mask = UINT64_MAX;
reta_conf[reta_id].reta[reta_pos] = reta[i];
@@ -726,7 +721,6 @@ dpdk_interface_rx_mode_change (vnet_main_t *vnm, u32 hw_if_index, u32 qid,
return 0;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (dpdk_device_class) = {
.name = "dpdk",
.tx_function_n_errors = DPDK_TX_FUNC_N_ERROR,
@@ -745,7 +739,6 @@ VNET_DEVICE_CLASS (dpdk_device_class) = {
.set_rss_queues_function = dpdk_interface_set_rss_queues,
.rx_mode_change_function = dpdk_interface_rx_mode_change,
};
-/* *INDENT-ON* */
#define UP_DOWN_FLAG_EVENT 1
@@ -792,14 +785,12 @@ admin_up_down_process (vlib_main_t * vm,
return 0; /* or not */
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (admin_up_down_process_node) = {
.function = admin_up_down_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "admin-up-down-process",
.process_log2_n_stack_bytes = 17, // 256KB
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h
index 66794a4e67c..88a4d9ff618 100644
--- a/src/plugins/dpdk/device/dpdk.h
+++ b/src/plugins/dpdk/device/dpdk.h
@@ -22,27 +22,25 @@
#include <rte_config.h>
-#include <rte_common.h>
-#include <rte_dev.h>
-#include <rte_memory.h>
#include <rte_eal.h>
-#include <rte_per_lcore.h>
-#include <rte_cycles.h>
-#include <rte_lcore.h>
-#include <rte_per_lcore.h>
-#include <rte_interrupts.h>
-#include <rte_pci.h>
+#include <rte_bus_pci.h>
+#ifdef __linux__
#include <rte_bus_vmbus.h>
-#include <rte_ether.h>
+#endif /* __linux__ */
#include <rte_ethdev.h>
-#include <rte_ring.h>
-#include <rte_mempool.h>
-#include <rte_mbuf.h>
#include <rte_version.h>
-#include <rte_sched.h>
#include <rte_net.h>
-#include <rte_bus_pci.h>
-#include <rte_flow.h>
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+#include <rte_bus.h>
+#include <rte_pci.h>
+#include <ctype.h>
+
+#include <bus_driver.h>
+#include <bus_pci_driver.h>
+#ifdef __linux__
+#include <bus_vmbus_driver.h>
+#endif /* __linux__ */
+#endif
#include <vnet/devices/devices.h>
@@ -60,79 +58,11 @@ extern vnet_device_class_t dpdk_device_class;
extern vlib_node_registration_t dpdk_input_node;
extern vlib_node_registration_t admin_up_down_process_node;
-#if RTE_VERSION < RTE_VERSION_NUM(20, 8, 0, 0)
-#define DPDK_MLX5_PMD_NAME "net_mlx5"
-#else
-#define DPDK_MLX5_PMD_NAME "mlx5_pci"
-#endif
-
-#define foreach_dpdk_pmd \
- _ ("net_thunderx", THUNDERX) \
- _ ("net_e1000_em", E1000EM) \
- _ ("net_e1000_igb", IGB) \
- _ ("net_e1000_igb_vf", IGBVF) \
- _ ("net_ixgbe", IXGBE) \
- _ ("net_ixgbe_vf", IXGBEVF) \
- _ ("net_i40e", I40E) \
- _ ("net_i40e_vf", I40EVF) \
- _ ("net_ice", ICE) \
- _ ("net_iavf", IAVF) \
- _ ("net_igc", IGC) \
- _ ("net_virtio", VIRTIO) \
- _ ("net_enic", ENIC) \
- _ ("net_vmxnet3", VMXNET3) \
- _ ("AF_PACKET PMD", AF_PACKET) \
- _ ("net_fm10k", FM10K) \
- _ ("net_cxgbe", CXGBE) \
- _ ("net_mlx4", MLX4) \
- _ (DPDK_MLX5_PMD_NAME, MLX5) \
- _ ("net_dpaa2", DPAA2) \
- _ ("net_virtio_user", VIRTIO_USER) \
- _ ("net_vhost", VHOST_ETHER) \
- _ ("net_ena", ENA) \
- _ ("net_failsafe", FAILSAFE) \
- _ ("net_liovf", LIOVF_ETHER) \
- _ ("net_qede", QEDE) \
- _ ("net_netvsc", NETVSC) \
- _ ("net_bnxt", BNXT)
-
-typedef enum
-{
- VNET_DPDK_PMD_NONE,
-#define _(s,f) VNET_DPDK_PMD_##f,
- foreach_dpdk_pmd
-#undef _
- VNET_DPDK_PMD_UNKNOWN, /* must be last */
-} dpdk_pmd_t;
-
-typedef enum
-{
- VNET_DPDK_PORT_TYPE_ETH_1G,
- VNET_DPDK_PORT_TYPE_ETH_2_5G,
- VNET_DPDK_PORT_TYPE_ETH_5G,
- VNET_DPDK_PORT_TYPE_ETH_10G,
- VNET_DPDK_PORT_TYPE_ETH_20G,
- VNET_DPDK_PORT_TYPE_ETH_25G,
- VNET_DPDK_PORT_TYPE_ETH_40G,
- VNET_DPDK_PORT_TYPE_ETH_50G,
- VNET_DPDK_PORT_TYPE_ETH_56G,
- VNET_DPDK_PORT_TYPE_ETH_100G,
- VNET_DPDK_PORT_TYPE_ETH_SWITCH,
- VNET_DPDK_PORT_TYPE_AF_PACKET,
- VNET_DPDK_PORT_TYPE_ETH_VF,
- VNET_DPDK_PORT_TYPE_VIRTIO_USER,
- VNET_DPDK_PORT_TYPE_VHOST_ETHER,
- VNET_DPDK_PORT_TYPE_FAILSAFE,
- VNET_DPDK_PORT_TYPE_NETVSC,
- VNET_DPDK_PORT_TYPE_UNKNOWN,
-} dpdk_port_type_t;
-
typedef uint16_t dpdk_portid_t;
#define foreach_dpdk_device_flags \
_ (0, ADMIN_UP, "admin-up") \
_ (1, PROMISC, "promisc") \
- _ (2, PMD, "pmd") \
_ (3, PMD_INIT_FAIL, "pmd-init-fail") \
_ (4, MAYBE_MULTISEG, "maybe-multiseg") \
_ (5, HAVE_SUBIF, "subif") \
@@ -143,12 +73,12 @@ typedef uint16_t dpdk_portid_t;
_ (13, INT_SUPPORTED, "int-supported") \
_ (14, INT_UNMASKABLE, "int-unmaskable")
-enum
+typedef enum
{
#define _(a, b, c) DPDK_DEVICE_FLAG_##b = (1 << a),
foreach_dpdk_device_flags
#undef _
-};
+} dpdk_device_flag_t;
typedef struct
{
@@ -177,10 +107,62 @@ typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
clib_spinlock_t lock;
+ u32 queue_index;
} dpdk_tx_queue_t;
typedef struct
{
+ const char *name;
+ const char *desc;
+} dpdk_driver_name_t;
+
+typedef struct
+{
+ dpdk_driver_name_t *drivers;
+ const char *interface_name_prefix;
+ u16 n_rx_desc;
+ u16 n_tx_desc;
+ u32 supported_flow_actions;
+ u32 enable_lsc_int : 1;
+ u32 enable_rxq_int : 1;
+ u32 disable_rx_scatter : 1;
+ u32 program_vlans : 1;
+ u32 mq_mode_none : 1;
+ u32 interface_number_from_port_id : 1;
+ u32 use_intel_phdr_cksum : 1;
+ u32 int_unmaskable : 1;
+} dpdk_driver_t;
+
+dpdk_driver_t *dpdk_driver_find (const char *name, const char **desc);
+
+typedef union
+{
+ struct
+ {
+ u16 disable_multi_seg : 1;
+ u16 enable_lro : 1;
+ u16 enable_tso : 1;
+ u16 enable_tcp_udp_checksum : 1;
+ u16 enable_outer_checksum_offload : 1;
+ u16 enable_lsc_int : 1;
+ u16 enable_rxq_int : 1;
+ u16 disable_tx_checksum_offload : 1;
+ u16 disable_rss : 1;
+ u16 disable_rx_scatter : 1;
+ u16 n_rx_queues;
+ u16 n_tx_queues;
+ u16 n_rx_desc;
+ u16 n_tx_desc;
+ u32 max_lro_pkt_size;
+ u64 rss_hf;
+ };
+ u64 as_u64[3];
+} dpdk_port_conf_t;
+
+STATIC_ASSERT_SIZEOF (dpdk_port_conf_t, 24);
+
+typedef struct
+{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
dpdk_rx_queue_t *rx_queues;
@@ -191,33 +173,28 @@ typedef struct
u32 hw_if_index;
u32 sw_if_index;
+ u32 buffer_flags;
/* next node index if we decide to steal the rx graph arc */
u32 per_interface_next_index;
- u16 rx_q_used;
- u16 tx_q_used;
u16 flags;
/* DPDK device port number */
dpdk_portid_t port_id;
- dpdk_pmd_t pmd:8;
i8 cpu_socket;
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
- u16 nb_tx_desc;
- u16 nb_rx_desc;
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+ u64 enabled_tx_off;
+ u64 enabled_rx_off;
+ dpdk_driver_t *driver;
u8 *name;
- u8 *interface_name_suffix;
+ const char *if_desc;
/* number of sub-interfaces */
u16 num_subifs;
- /* PMD related */
- struct rte_eth_conf port_conf;
- struct rte_eth_txconf tx_conf;
-
/* flow related */
u32 supported_flow_actions;
dpdk_flow_entry_t *flow_entries; /* pool */
@@ -226,9 +203,6 @@ typedef struct
u32 parked_loop_count;
struct rte_flow_error last_flow_error;
- /* af_packet instance number */
- u16 af_packet_instance_num;
-
struct rte_eth_link link;
f64 time_last_link_update;
@@ -236,26 +210,36 @@ typedef struct
struct rte_eth_stats last_stats;
struct rte_eth_xstat *xstats;
f64 time_last_stats_update;
- dpdk_port_type_t port_type;
/* mac address */
u8 *default_mac_address;
+ /* maximum supported max frame size */
+ u32 max_supported_frame_size;
+
+ /* due to lack of API to get ethernet max_frame_size we store information
+ * deducted from device info */
+ u8 driver_frame_overhead;
+
/* error string */
clib_error_t *errors;
+ dpdk_port_conf_t conf;
} dpdk_device_t;
+#define DPDK_MIN_POLL_INTERVAL (0.001) /* 1msec */
+
#define DPDK_STATS_POLL_INTERVAL (10.0)
-#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */
+#define DPDK_MIN_STATS_POLL_INTERVAL DPDK_MIN_POLL_INTERVAL
#define DPDK_LINK_POLL_INTERVAL (3.0)
-#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */
-
-#define foreach_dpdk_device_config_item \
- _ (num_rx_queues) \
- _ (num_tx_queues) \
- _ (num_rx_desc) \
- _ (num_tx_desc) \
+#define DPDK_MIN_LINK_POLL_INTERVAL DPDK_MIN_POLL_INTERVAL
+
+#define foreach_dpdk_device_config_item \
+ _ (num_rx_queues) \
+ _ (num_tx_queues) \
+ _ (num_rx_desc) \
+ _ (num_tx_desc) \
+ _ (max_lro_pkt_size) \
_ (rss_fn)
typedef enum
@@ -274,11 +258,8 @@ typedef struct
};
dpdk_device_addr_type_t dev_addr_type;
u8 *name;
+ u8 *tag;
u8 is_blacklisted;
- u8 vlan_strip_offload;
-#define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0
-#define DPDK_DEVICE_VLAN_STRIP_OFF 1
-#define DPDK_DEVICE_VLAN_STRIP_ON 2
#define _(x) uword x;
foreach_dpdk_device_config_item
@@ -300,9 +281,7 @@ typedef struct
u8 **eal_init_args;
u8 *eal_init_args_str;
u8 *uio_driver_name;
- u8 no_multi_seg;
- u8 enable_tcp_udp_checksum;
- u8 no_tx_checksum_offload;
+ u8 uio_bind_force;
u8 enable_telemetry;
u16 max_simd_bitwidth;
@@ -310,13 +289,6 @@ typedef struct
#define DPDK_MAX_SIMD_BITWIDTH_256 256
#define DPDK_MAX_SIMD_BITWIDTH_512 512
- /* Required config parameters */
- u8 coremask_set_manually;
- u8 nchannels_set_manually;
- u32 coremask;
- u32 nchannels;
- u32 num_crypto_mbufs;
-
/*
* format interface names ala xxxEthernet%d/%d/%d instead of
* xxxEthernet%x/%x/%x.
@@ -347,20 +319,16 @@ typedef struct
u32 buffers[DPDK_RX_BURST_SZ];
u16 next[DPDK_RX_BURST_SZ];
u16 etype[DPDK_RX_BURST_SZ];
- u16 flags[DPDK_RX_BURST_SZ];
+ u32 flags[DPDK_RX_BURST_SZ];
vlib_buffer_t buffer_template;
} dpdk_per_thread_data_t;
typedef struct
{
-
/* Devices */
dpdk_device_t *devices;
dpdk_per_thread_data_t *per_thread_data;
- /* buffer flags template, configurable to enable/disable tcp / udp cksum */
- u32 buffer_flags_template;
-
/*
* flag indicating that a posted admin up/down
* (via post_sw_interface_set_flags) is in progress
@@ -371,10 +339,8 @@ typedef struct
f64 link_state_poll_interval;
f64 stat_poll_interval;
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
dpdk_config_main_t *conf;
+ dpdk_port_conf_t default_port_conf;
/* API message ID base */
u16 msg_id_base;
@@ -382,7 +348,6 @@ typedef struct
/* logging */
vlib_log_class_t log_default;
vlib_log_class_t log_cryptodev;
- vlib_log_class_t log_ipsec;
} dpdk_main_t;
extern dpdk_main_t dpdk_main;
@@ -440,35 +405,39 @@ typedef enum
vlib_log(VLIB_LOG_LEVEL_NOTICE, dpdk_main.log_default, __VA_ARGS__)
#define dpdk_log_info(...) \
vlib_log(VLIB_LOG_LEVEL_INFO, dpdk_main.log_default, __VA_ARGS__)
+#define dpdk_log_debug(...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, dpdk_main.log_default, __VA_ARGS__)
void dpdk_update_link_state (dpdk_device_t * xd, f64 now);
-#define foreach_dpdk_rss_hf \
- _(0, ETH_RSS_FRAG_IPV4, "ipv4-frag") \
- _(1, ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \
- _(2, ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \
- _(3, ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \
- _(4, ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \
- _(5, ETH_RSS_IPV4, "ipv4") \
- _(6, ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \
- _(7, ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \
- _(8, ETH_RSS_FRAG_IPV6, "ipv6-frag") \
- _(9, ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \
- _(10, ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \
- _(11, ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \
- _(12, ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \
- _(13, ETH_RSS_IPV6_EX, "ipv6-ex") \
- _(14, ETH_RSS_IPV6, "ipv6") \
- _(15, ETH_RSS_L2_PAYLOAD, "l2-payload") \
- _(16, ETH_RSS_PORT, "port") \
- _(17, ETH_RSS_VXLAN, "vxlan") \
- _(18, ETH_RSS_GENEVE, "geneve") \
- _(19, ETH_RSS_NVGRE, "nvgre") \
- _(20, ETH_RSS_GTPU, "gtpu") \
- _(60, ETH_RSS_L4_DST_ONLY, "l4-dst-only") \
- _(61, ETH_RSS_L4_SRC_ONLY, "l4-src-only") \
- _(62, ETH_RSS_L3_DST_ONLY, "l3-dst-only") \
- _(63, ETH_RSS_L3_SRC_ONLY, "l3-src-only")
+#define foreach_dpdk_rss_hf \
+ _ (0, RTE_ETH_RSS_FRAG_IPV4, "ipv4-frag") \
+ _ (1, RTE_ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \
+ _ (2, RTE_ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \
+ _ (3, RTE_ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \
+ _ (4, RTE_ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \
+ _ (5, RTE_ETH_RSS_IPV4, "ipv4") \
+ _ (6, RTE_ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \
+ _ (7, RTE_ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \
+ _ (8, RTE_ETH_RSS_FRAG_IPV6, "ipv6-frag") \
+ _ (9, RTE_ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \
+ _ (10, RTE_ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \
+ _ (11, RTE_ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \
+ _ (12, RTE_ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \
+ _ (13, RTE_ETH_RSS_IPV6_EX, "ipv6-ex") \
+ _ (14, RTE_ETH_RSS_IPV6, "ipv6") \
+ _ (15, RTE_ETH_RSS_L2_PAYLOAD, "l2-payload") \
+ _ (16, RTE_ETH_RSS_PORT, "port") \
+ _ (17, RTE_ETH_RSS_VXLAN, "vxlan") \
+ _ (18, RTE_ETH_RSS_GENEVE, "geneve") \
+ _ (19, RTE_ETH_RSS_NVGRE, "nvgre") \
+ _ (20, RTE_ETH_RSS_GTPU, "gtpu") \
+ _ (21, RTE_ETH_RSS_ESP, "esp") \
+ _ (22, RTE_ETH_RSS_L2TPV3, "l2tpv3") \
+ _ (60, RTE_ETH_RSS_L4_DST_ONLY, "l4-dst-only") \
+ _ (61, RTE_ETH_RSS_L4_SRC_ONLY, "l4-src-only") \
+ _ (62, RTE_ETH_RSS_L3_DST_ONLY, "l3-dst-only") \
+ _ (63, RTE_ETH_RSS_L3_SRC_ONLY, "l3-src-only")
format_function_t format_dpdk_device_name;
format_function_t format_dpdk_device;
@@ -481,6 +450,8 @@ format_function_t format_dpdk_flow;
format_function_t format_dpdk_rss_hf_name;
format_function_t format_dpdk_rx_offload_caps;
format_function_t format_dpdk_tx_offload_caps;
+format_function_t format_dpdk_burst_fn;
+format_function_t format_dpdk_rte_device;
vnet_flow_dev_ops_function_t dpdk_flow_ops_fn;
clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn);
diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h
index a5a8a2ad57d..cb7b185c112 100644
--- a/src/plugins/dpdk/device/dpdk_priv.h
+++ b/src/plugins/dpdk/device/dpdk_priv.h
@@ -15,15 +15,7 @@
#define DPDK_NB_RX_DESC_DEFAULT 1024
#define DPDK_NB_TX_DESC_DEFAULT 1024
-#define DPDK_NB_RX_DESC_VIRTIO 256
-#define DPDK_NB_TX_DESC_VIRTIO 256
-
-#define I40E_DEV_ID_SFP_XL710 0x1572
-#define I40E_DEV_ID_QSFP_A 0x1583
-#define I40E_DEV_ID_QSFP_B 0x1584
-#define I40E_DEV_ID_QSFP_C 0x1585
-#define I40E_DEV_ID_10G_BASE_T 0x1586
-#define I40E_DEV_ID_VF 0x154C
+#define DPDK_MAX_LRO_SIZE_DEFAULT 65536
/* These args appear by themselves */
#define foreach_eal_double_hyphen_predicate_arg \
@@ -32,10 +24,6 @@ _(no-hpet) \
_(no-huge) \
_(vmware-tsc-map)
-#define foreach_eal_single_hyphen_mandatory_arg \
-_(coremask, c) \
-_(nchannels, n) \
-
#define foreach_eal_single_hyphen_arg \
_(mem-alloc-request, m) \
_(force-ranks, r)
@@ -48,10 +36,17 @@ _(proc-type) \
_(file-prefix) \
_(vdev) \
_(log-level) \
+_(block) \
_(iova-mode) \
_(base-virtaddr)
/* clang-format on */
+static_always_inline void
+dpdk_device_flag_set (dpdk_device_t *xd, __typeof__ (xd->flags) flag, int val)
+{
+ xd->flags = val ? xd->flags | flag : xd->flags & ~flag;
+}
+
static inline void
dpdk_get_xstats (dpdk_device_t * xd)
{
@@ -69,11 +64,11 @@ dpdk_get_xstats (dpdk_device_t * xd)
ret = rte_eth_xstats_get (xd->port_id, xd->xstats, len);
if (ret < 0 || ret > len)
{
- _vec_len (xd->xstats) = 0;
+ vec_set_len (xd->xstats, 0);
return;
}
- _vec_len (xd->xstats) = len;
+ vec_set_len (xd->xstats, len);
}
#define DPDK_UPDATE_COUNTER(vnm, tidx, xd, stat, cnt) \
@@ -100,10 +95,6 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now)
vnet_main_t *vnm = vnet_get_main ();
u32 thread_index = vlib_get_thread_index ();
- /* only update counters for PMD interfaces */
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- return;
-
xd->time_last_stats_update = now ? now : xd->time_last_stats_update;
clib_memcpy_fast (&xd->last_stats, &xd->stats, sizeof (xd->last_stats));
rte_eth_stats_get (xd->port_id, &xd->stats);
@@ -119,6 +110,119 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now)
dpdk_get_xstats (xd);
}
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+#define RTE_MBUF_F_RX_FDIR PKT_RX_FDIR
+#define RTE_MBUF_F_RX_FDIR_FLX PKT_RX_FDIR_FLX
+#define RTE_MBUF_F_RX_FDIR_ID PKT_RX_FDIR_ID
+#define RTE_MBUF_F_RX_IEEE1588_PTP PKT_RX_IEEE1588_PTP
+#define RTE_MBUF_F_RX_IEEE1588_TMST PKT_RX_IEEE1588_TMST
+#define RTE_MBUF_F_RX_IP_CKSUM_BAD PKT_RX_IP_CKSUM_BAD
+#define RTE_MBUF_F_RX_IP_CKSUM_GOOD PKT_RX_IP_CKSUM_GOOD
+#define RTE_MBUF_F_RX_IP_CKSUM_NONE PKT_RX_IP_CKSUM_GOOD
+#define RTE_MBUF_F_RX_L4_CKSUM_BAD PKT_RX_L4_CKSUM_BAD
+#define RTE_MBUF_F_RX_L4_CKSUM_GOOD PKT_RX_L4_CKSUM_GOOD
+#define RTE_MBUF_F_RX_L4_CKSUM_NONE PKT_RX_L4_CKSUM_GOOD
+#define RTE_MBUF_F_RX_LRO PKT_RX_LRO
+#define RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD PKT_RX_OUTER_IP_CKSUM_BAD
+#define RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD PKT_RX_OUTER_L4_CKSUM_GOOD
+#define RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD PKT_RX_OUTER_L4_CKSUM_GOOD
+#define RTE_MBUF_F_RX_OUTER_L4_CKSUM_NONE PKT_RX_OUTER_L4_CKSUM_GOOD
+#define RTE_MBUF_F_RX_QINQ PKT_RX_QINQ
+#define RTE_MBUF_F_RX_QINQ_STRIPPED PKT_RX_QINQ_STRIPPED
+#define RTE_MBUF_F_RX_RSS_HASH PKT_RX_RSS_HASH
+#define RTE_MBUF_F_RX_SEC_OFFLOAD PKT_RX_SEC_OFFLOAD
+#define RTE_MBUF_F_RX_SEC_OFFLOAD_FAILED PKT_RX_SEC_OFFLOAD_FAILED
+#define RTE_MBUF_F_RX_VLAN PKT_RX_VLAN
+#define RTE_MBUF_F_RX_VLAN_STRIPPED PKT_RX_VLAN_STRIPPED
+#define RTE_MBUF_F_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
+#define RTE_MBUF_F_TX_IPV4 PKT_TX_IPV4
+#define RTE_MBUF_F_TX_IPV6 PKT_TX_IPV6
+#define RTE_MBUF_F_TX_IP_CKSUM PKT_TX_IP_CKSUM
+#define RTE_MBUF_F_TX_MACSEC PKT_TX_MACSEC
+#define RTE_MBUF_F_TX_OUTER_IPV4 PKT_TX_OUTER_IPV4
+#define RTE_MBUF_F_TX_OUTER_IPV6 PKT_TX_OUTER_IPV6
+#define RTE_MBUF_F_TX_OUTER_IP_CKSUM PKT_TX_OUTER_IP_CKSUM
+#define RTE_MBUF_F_TX_OUTER_UDP_CKSUM PKT_TX_OUTER_UDP_CKSUM
+#define RTE_MBUF_F_TX_QINQ PKT_TX_QINQ
+#define RTE_MBUF_F_TX_SCTP_CKSUM PKT_TX_SCTP_CKSUM
+#define RTE_MBUF_F_TX_SEC_OFFLOAD PKT_TX_SEC_OFFLOAD
+#define RTE_MBUF_F_TX_TCP_CKSUM PKT_TX_TCP_CKSUM
+#define RTE_MBUF_F_TX_TCP_SEG PKT_TX_TCP_SEG
+#define RTE_MBUF_F_TX_TUNNEL_GENEVE PKT_TX_TUNNEL_GENEVE
+#define RTE_MBUF_F_TX_TUNNEL_GRE PKT_TX_TUNNEL_GRE
+#define RTE_MBUF_F_TX_TUNNEL_GTP PKT_TX_TUNNEL_GTP
+#define RTE_MBUF_F_TX_TUNNEL_IP PKT_TX_TUNNEL_IP
+#define RTE_MBUF_F_TX_TUNNEL_IPIP PKT_TX_TUNNEL_IPIP
+#define RTE_MBUF_F_TX_TUNNEL_MPLSINUDP PKT_TX_TUNNEL_MPLSINUDP
+#define RTE_MBUF_F_TX_TUNNEL_UDP PKT_TX_TUNNEL_UDP
+#define RTE_MBUF_F_TX_TUNNEL_VXLAN PKT_TX_TUNNEL_VXLAN
+#define RTE_MBUF_F_TX_TUNNEL_VXLAN_GPE PKT_TX_TUNNEL_VXLAN_GPE
+#define RTE_MBUF_F_TX_UDP_CKSUM PKT_TX_UDP_CKSUM
+#define RTE_MBUF_F_TX_UDP_SEG PKT_TX_UDP_SEG
+#define RTE_MBUF_F_TX_VLAN PKT_TX_VLAN
+#define RTE_ETH_RSS_FRAG_IPV4 ETH_RSS_FRAG_IPV4
+#define RTE_ETH_RSS_NONFRAG_IPV4_TCP ETH_RSS_NONFRAG_IPV4_TCP
+#define RTE_ETH_RSS_NONFRAG_IPV4_UDP ETH_RSS_NONFRAG_IPV4_UDP
+#define RTE_ETH_RSS_NONFRAG_IPV4_SCTP ETH_RSS_NONFRAG_IPV4_SCTP
+#define RTE_ETH_RSS_NONFRAG_IPV4_OTHER ETH_RSS_NONFRAG_IPV4_OTHER
+#define RTE_ETH_RSS_IPV4 ETH_RSS_IPV4
+#define RTE_ETH_RSS_IPV6_TCP_EX ETH_RSS_IPV6_TCP_EX
+#define RTE_ETH_RSS_IPV6_UDP_EX ETH_RSS_IPV6_UDP_EX
+#define RTE_ETH_RSS_FRAG_IPV6 ETH_RSS_FRAG_IPV6
+#define RTE_ETH_RSS_NONFRAG_IPV6_TCP ETH_RSS_NONFRAG_IPV6_TCP
+#define RTE_ETH_RSS_NONFRAG_IPV6_UDP ETH_RSS_NONFRAG_IPV6_UDP
+#define RTE_ETH_RSS_NONFRAG_IPV6_SCTP ETH_RSS_NONFRAG_IPV6_SCTP
+#define RTE_ETH_RSS_NONFRAG_IPV6_OTHER ETH_RSS_NONFRAG_IPV6_OTHER
+#define RTE_ETH_RSS_IPV6_EX ETH_RSS_IPV6_EX
+#define RTE_ETH_RSS_IPV6 ETH_RSS_IPV6
+#define RTE_ETH_RSS_L2_PAYLOAD ETH_RSS_L2_PAYLOAD
+#define RTE_ETH_RSS_PORT ETH_RSS_PORT
+#define RTE_ETH_RSS_VXLAN ETH_RSS_VXLAN
+#define RTE_ETH_RSS_GENEVE ETH_RSS_GENEVE
+#define RTE_ETH_RSS_NVGRE ETH_RSS_NVGRE
+#define RTE_ETH_RSS_GTPU ETH_RSS_GTPU
+#define RTE_ETH_RSS_ESP ETH_RSS_ESP
+#define RTE_ETH_RSS_L4_DST_ONLY ETH_RSS_L4_DST_ONLY
+#define RTE_ETH_RSS_L4_SRC_ONLY ETH_RSS_L4_SRC_ONLY
+#define RTE_ETH_RSS_L3_DST_ONLY ETH_RSS_L3_DST_ONLY
+#define RTE_ETH_RSS_L3_SRC_ONLY ETH_RSS_L3_SRC_ONLY
+#define RTE_ETH_RETA_GROUP_SIZE RTE_RETA_GROUP_SIZE
+#define RTE_ETH_TX_OFFLOAD_IPV4_CKSUM DEV_TX_OFFLOAD_IPV4_CKSUM
+#define RTE_ETH_TX_OFFLOAD_TCP_CKSUM DEV_TX_OFFLOAD_TCP_CKSUM
+#define RTE_ETH_TX_OFFLOAD_UDP_CKSUM DEV_TX_OFFLOAD_UDP_CKSUM
+#define RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM
+#define RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM DEV_TX_OFFLOAD_OUTER_UDP_CKSUM
+#define RTE_ETH_TX_OFFLOAD_TCP_TSO DEV_TX_OFFLOAD_TCP_TSO
+#define RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO DEV_TX_OFFLOAD_VXLAN_TNL_TSO
+#define RTE_ETH_TX_OFFLOAD_MULTI_SEGS DEV_TX_OFFLOAD_MULTI_SEGS
+#define RTE_ETH_RX_OFFLOAD_IPV4_CKSUM DEV_RX_OFFLOAD_IPV4_CKSUM
+#define RTE_ETH_RX_OFFLOAD_SCATTER DEV_RX_OFFLOAD_SCATTER
+#define RTE_ETH_RX_OFFLOAD_TCP_LRO DEV_RX_OFFLOAD_TCP_LRO
+#define RTE_ETH_MQ_RX_RSS ETH_MQ_RX_RSS
+#define RTE_ETH_RX_OFFLOAD_TCP_CKSUM DEV_RX_OFFLOAD_TCP_CKSUM
+#define RTE_ETH_RX_OFFLOAD_UDP_CKSUM DEV_RX_OFFLOAD_UDP_CKSUM
+#define RTE_ETH_MQ_RX_NONE ETH_MQ_RX_NONE
+#define RTE_ETH_LINK_FULL_DUPLEX ETH_LINK_FULL_DUPLEX
+#define RTE_ETH_LINK_HALF_DUPLEX ETH_LINK_HALF_DUPLEX
+#define RTE_ETH_VLAN_STRIP_OFFLOAD ETH_VLAN_STRIP_OFFLOAD
+#define RTE_ETH_VLAN_FILTER_OFFLOAD ETH_VLAN_FILTER_OFFLOAD
+#define RTE_ETH_VLAN_EXTEND_OFFLOAD ETH_VLAN_EXTEND_OFFLOAD
+#define RTE_ETH_LINK_SPEED_200G ETH_LINK_SPEED_200G
+#define RTE_ETH_LINK_SPEED_100G ETH_LINK_SPEED_100G
+#define RTE_ETH_LINK_SPEED_56G ETH_LINK_SPEED_56G
+#define RTE_ETH_LINK_SPEED_50G ETH_LINK_SPEED_50G
+#define RTE_ETH_LINK_SPEED_40G ETH_LINK_SPEED_40G
+#define RTE_ETH_LINK_SPEED_25G ETH_LINK_SPEED_25G
+#define RTE_ETH_LINK_SPEED_20G ETH_LINK_SPEED_20G
+#define RTE_ETH_LINK_SPEED_10G ETH_LINK_SPEED_10G
+#define RTE_ETH_LINK_SPEED_5G ETH_LINK_SPEED_5G
+#define RTE_ETH_LINK_SPEED_2_5G ETH_LINK_SPEED_2_5G
+#define RTE_ETH_LINK_SPEED_1G ETH_LINK_SPEED_1G
+#define RTE_ETH_RSS_IP ETH_RSS_IP
+#define RTE_ETH_RSS_UDP ETH_RSS_UDP
+#define RTE_ETH_RSS_TCP ETH_RSS_TCP
+#endif
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/dpdk/device/driver.c b/src/plugins/dpdk/device/driver.c
new file mode 100644
index 00000000000..9c368dd9038
--- /dev/null
+++ b/src/plugins/dpdk/device/driver.c
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+
+#include <dpdk/device/dpdk.h>
+
+static const u32 supported_flow_actions_intel =
+ (VNET_FLOW_ACTION_MARK | VNET_FLOW_ACTION_REDIRECT_TO_NODE |
+ VNET_FLOW_ACTION_REDIRECT_TO_QUEUE | VNET_FLOW_ACTION_BUFFER_ADVANCE |
+ VNET_FLOW_ACTION_COUNT | VNET_FLOW_ACTION_DROP | VNET_FLOW_ACTION_RSS);
+
+#define DPDK_DRIVERS(...) \
+ (dpdk_driver_name_t[]) \
+ { \
+ __VA_ARGS__, {} \
+ }
+
+static dpdk_driver_t dpdk_drivers[] = {
+ {
+ .drivers = DPDK_DRIVERS ({ "net_ice", "Intel E810 Family" },
+ { "net_igc", "Intel I225 2.5G Family" },
+ { "net_e1000_igb", "Intel e1000" },
+ { "net_e1000_em", "Intel 82540EM (e1000)" }),
+ .enable_rxq_int = 1,
+ .supported_flow_actions = supported_flow_actions_intel,
+ .use_intel_phdr_cksum = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_ixgbe", "Intel 82599" }),
+ .enable_rxq_int = 1,
+ .supported_flow_actions = supported_flow_actions_intel,
+ .use_intel_phdr_cksum = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_i40e", "Intel X710/XL710 Family" }),
+ .enable_rxq_int = 1,
+ .supported_flow_actions = supported_flow_actions_intel,
+ .use_intel_phdr_cksum = 1,
+ .int_unmaskable = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_liovf", "Cavium Lio VF" },
+ { "net_thunderx", "Cavium ThunderX" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_iavf", "Intel iAVF" },
+ { "net_i40e_vf", "Intel X710/XL710 Family VF" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ .supported_flow_actions = supported_flow_actions_intel,
+ .use_intel_phdr_cksum = 1,
+ .int_unmaskable = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_e1000_igb_vf", "Intel e1000 VF" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ .use_intel_phdr_cksum = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_ixgbe_vf", "Intel 82599 VF" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ .use_intel_phdr_cksum = 1,
+ .program_vlans = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_dpaa2", "NXP DPAA2 Mac" }),
+ .interface_name_prefix = "TenGigabitEthernet",
+ },
+ {
+ .drivers =
+ DPDK_DRIVERS ({ "net_fm10k", "Intel FM10000 Family Ethernet Switch" }),
+ .interface_name_prefix = "EthernetSwitch",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_cxgbe", "Chelsio T4/T5" }),
+ .interface_number_from_port_id = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_mlx4", "Mellanox ConnectX-3 Family" },
+ { "net_qede", "Cavium QLogic FastLinQ QL4xxxx" },
+ { "net_bnxt", "Broadcom NetXtreme E/S-Series" }),
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_virtio_user", "Virtio User" }),
+ .interface_name_prefix = "VirtioUser",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_vhost", "VhostEthernet" }),
+ .interface_name_prefix = "VhostEthernet",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "mlx5_pci", "Mellanox ConnectX-4/5/6 Family" },
+ { "net_enic", "Cisco VIC" }),
+ .use_intel_phdr_cksum = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_failsafe", "FailsafeEthernet" }),
+ .interface_name_prefix = "FailsafeEthernet",
+ .enable_lsc_int = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "AF_PACKET PMD", "af_packet" }),
+ .interface_name_prefix = "af_packet",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_netvsc", "Microsoft Hyper-V Netvsc" }),
+ .interface_name_prefix = "NetVSC",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_ena", "AWS ENA VF" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ .enable_rxq_int = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_vmxnet3", "VMware VMXNET3" }),
+ .interface_name_prefix = "GigabitEthernet",
+ .enable_rxq_int = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_virtio", "Red Hat Virtio" }),
+ .interface_name_prefix = "GigabitEthernet",
+ .n_rx_desc = 256,
+ .n_tx_desc = 256,
+ .mq_mode_none = 1,
+ .enable_rxq_int = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_gve", "Google vNIC" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ }
+};
+
+dpdk_driver_t *
+dpdk_driver_find (const char *name, const char **desc)
+{
+ for (int i = 0; i < ARRAY_LEN (dpdk_drivers); i++)
+ {
+ dpdk_driver_t *dr = dpdk_drivers + i;
+ dpdk_driver_name_t *dn = dr->drivers;
+
+ while (dn->name)
+ {
+ if (name && !strcmp (name, dn->name))
+ {
+ *desc = dn->desc;
+ return dr;
+ }
+ dn++;
+ }
+ }
+ return 0;
+}
diff --git a/src/plugins/dpdk/device/flow.c b/src/plugins/dpdk/device/flow.c
index a090ec0e930..635f6f37ebf 100644
--- a/src/plugins/dpdk/device/flow.c
+++ b/src/plugins/dpdk/device/flow.c
@@ -21,7 +21,7 @@
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/ethernet/arp_packet.h>
-#include <vnet/vxlan/vxlan.h>
+#include <vxlan/vxlan.h>
#include <dpdk/device/dpdk.h>
#include <dpdk/device/dpdk_priv.h>
#include <vppinfra/error.h>
@@ -29,22 +29,30 @@
#define FLOW_IS_ETHERNET_CLASS(f) \
(f->type == VNET_FLOW_TYPE_ETHERNET)
-#define FLOW_IS_IPV4_CLASS(f) \
- ((f->type == VNET_FLOW_TYPE_IP4) || \
- (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) || \
- (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \
- (f->type == VNET_FLOW_TYPE_IP4_VXLAN) || \
- (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
- (f->type == VNET_FLOW_TYPE_IP4_GTPU) || \
- (f->type == VNET_FLOW_TYPE_IP4_L2TPV3OIP) || \
- (f->type == VNET_FLOW_TYPE_IP4_IPSEC_ESP) || \
- (f->type == VNET_FLOW_TYPE_IP4_IPSEC_AH))
-
-#define FLOW_IS_IPV6_CLASS(f) \
- ((f->type == VNET_FLOW_TYPE_IP6) || \
- (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) || \
- (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED) || \
- (f->type == VNET_FLOW_TYPE_IP6_VXLAN))
+#define FLOW_IS_IPV4_CLASS(f) \
+ ((f->type == VNET_FLOW_TYPE_IP4) || \
+ (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \
+ (f->type == VNET_FLOW_TYPE_IP4_VXLAN) || \
+ (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
+ (f->type == VNET_FLOW_TYPE_IP4_GTPU) || \
+ (f->type == VNET_FLOW_TYPE_IP4_L2TPV3OIP) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IPSEC_ESP) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IPSEC_AH) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IP4) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IP6) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IP6_N_TUPLE))
+
+#define FLOW_IS_IPV6_CLASS(f) \
+ ((f->type == VNET_FLOW_TYPE_IP6) || \
+ (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED) || \
+ (f->type == VNET_FLOW_TYPE_IP6_VXLAN) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP4) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP6) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP6_N_TUPLE))
/* check if flow is VLAN sensitive */
#define FLOW_HAS_VLAN_TAG(f) \
@@ -70,6 +78,13 @@
(f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
(f->type == VNET_FLOW_TYPE_IP4_GTPU))
+/* check if flow has a inner TCP/UDP header */
+#define FLOW_HAS_INNER_N_TUPLE(f) \
+ ((f->type == VNET_FLOW_TYPE_IP4_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IP6_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP6_N_TUPLE))
+
/* constant structs */
static const struct rte_flow_attr ingress = {.ingress = 1 };
@@ -103,6 +118,25 @@ dpdk_flow_convert_rss_types (u64 type, u64 * dpdk_rss_type)
return;
}
+/** Maximum number of queue indices in struct rte_flow_action_rss. */
+#define ACTION_RSS_QUEUE_NUM 128
+
+static inline void
+dpdk_flow_convert_rss_queues (u32 queue_index, u32 queue_num,
+ struct rte_flow_action_rss *rss)
+{
+ u16 *queues = clib_mem_alloc (sizeof (*queues) * ACTION_RSS_QUEUE_NUM);
+ int i;
+
+ for (i = 0; i < queue_num; i++)
+ queues[i] = queue_index++;
+
+ rss->queue_num = queue_num;
+ rss->queue = queues;
+
+ return;
+}
+
static inline enum rte_eth_hash_function
dpdk_flow_convert_rss_func (vnet_rss_function_t func)
{
@@ -134,14 +168,15 @@ static int
dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
{
struct rte_flow_item_eth eth[2] = { };
- struct rte_flow_item_ipv4 ip4[2] = { };
- struct rte_flow_item_ipv6 ip6[2] = { };
- struct rte_flow_item_udp udp[2] = { };
- struct rte_flow_item_tcp tcp[2] = { };
+ struct rte_flow_item_ipv4 ip4[2] = {}, in_ip4[2] = {};
+ struct rte_flow_item_ipv6 ip6[2] = {}, in_ip6[2] = {};
+ struct rte_flow_item_udp udp[2] = {}, in_UDP[2] = {};
+ struct rte_flow_item_tcp tcp[2] = {}, in_TCP[2] = {};
struct rte_flow_item_gtp gtp[2] = { };
struct rte_flow_item_l2tpv3oip l2tp[2] = { };
struct rte_flow_item_esp esp[2] = { };
struct rte_flow_item_ah ah[2] = { };
+ struct rte_flow_item_raw generic[2] = {};
struct rte_flow_action_mark mark = { 0 };
struct rte_flow_action_queue queue = { 0 };
struct rte_flow_action_rss rss = { 0 };
@@ -165,6 +200,20 @@ dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
u8 protocol = IP_PROTOCOL_RESERVED;
int rv = 0;
+ /* Handle generic flow first */
+ if (f->type == VNET_FLOW_TYPE_GENERIC)
+ {
+ generic[0].pattern = f->generic.pattern.spec;
+ generic[1].pattern = f->generic.pattern.mask;
+
+ vec_add2 (items, item, 1);
+ item->type = RTE_FLOW_ITEM_TYPE_RAW;
+ item->spec = generic;
+ item->mask = generic + 1;
+
+ goto pattern_end;
+ }
+
enum
{
FLOW_UNKNOWN_CLASS,
@@ -285,7 +334,8 @@ dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
if ((ip6_ptr->src_addr.mask.as_u64[0] == 0) &&
(ip6_ptr->src_addr.mask.as_u64[1] == 0) &&
- (!ip6_ptr->protocol.mask))
+ (ip6_ptr->dst_addr.mask.as_u64[0] == 0) &&
+ (ip6_ptr->dst_addr.mask.as_u64[1] == 0) && (!ip6_ptr->protocol.mask))
{
item->spec = NULL;
item->mask = NULL;
@@ -437,13 +487,127 @@ dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
item->mask = raw + 1;
}
break;
+ case IP_PROTOCOL_IPV6:
+ item->type = RTE_FLOW_ITEM_TYPE_IPV6;
+#define fill_inner_ip6_with_outer_ipv(OUTER_IP_VER) \
+ if (f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP6 || \
+ f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP6_N_TUPLE) \
+ { \
+ vnet_flow_ip##OUTER_IP_VER##_ip6_t *ptr = &f->ip##OUTER_IP_VER##_ip6; \
+ if ((ptr->in_src_addr.mask.as_u64[0] == 0) && \
+ (ptr->in_src_addr.mask.as_u64[1] == 0) && \
+ (ptr->in_dst_addr.mask.as_u64[0] == 0) && \
+ (ptr->in_dst_addr.mask.as_u64[1] == 0) && (!ptr->in_protocol.mask)) \
+ { \
+ item->spec = NULL; \
+ item->mask = NULL; \
+ } \
+ else \
+ { \
+ clib_memcpy (in_ip6[0].hdr.src_addr, &ptr->in_src_addr.addr, \
+ ARRAY_LEN (ptr->in_src_addr.addr.as_u8)); \
+ clib_memcpy (in_ip6[1].hdr.src_addr, &ptr->in_src_addr.mask, \
+ ARRAY_LEN (ptr->in_src_addr.mask.as_u8)); \
+ clib_memcpy (in_ip6[0].hdr.dst_addr, &ptr->in_dst_addr.addr, \
+ ARRAY_LEN (ptr->in_dst_addr.addr.as_u8)); \
+ clib_memcpy (in_ip6[1].hdr.dst_addr, &ptr->in_dst_addr.mask, \
+ ARRAY_LEN (ptr->in_dst_addr.mask.as_u8)); \
+ item->spec = in_ip6; \
+ item->mask = in_ip6 + 1; \
+ } \
+ }
+ fill_inner_ip6_with_outer_ipv (6) fill_inner_ip6_with_outer_ipv (4)
+#undef fill_inner_ip6_with_outer_ipv
+ break;
+ case IP_PROTOCOL_IP_IN_IP:
+ item->type = RTE_FLOW_ITEM_TYPE_IPV4;
+
+#define fill_inner_ip4_with_outer_ipv(OUTER_IP_VER) \
+ if (f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP4 || \
+ f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP4_N_TUPLE) \
+ { \
+ vnet_flow_ip##OUTER_IP_VER##_ip4_t *ptr = &f->ip##OUTER_IP_VER##_ip4; \
+ if ((!ptr->in_src_addr.mask.as_u32) && \
+ (!ptr->in_dst_addr.mask.as_u32) && (!ptr->in_protocol.mask)) \
+ { \
+ item->spec = NULL; \
+ item->mask = NULL; \
+ } \
+ else \
+ { \
+ in_ip4[0].hdr.src_addr = ptr->in_src_addr.addr.as_u32; \
+ in_ip4[1].hdr.src_addr = ptr->in_src_addr.mask.as_u32; \
+ in_ip4[0].hdr.dst_addr = ptr->in_dst_addr.addr.as_u32; \
+ in_ip4[1].hdr.dst_addr = ptr->in_dst_addr.mask.as_u32; \
+ item->spec = in_ip4; \
+ item->mask = in_ip4 + 1; \
+ } \
+ }
+ fill_inner_ip4_with_outer_ipv (6) fill_inner_ip4_with_outer_ipv (4)
+#undef fill_inner_ip4_with_outer_ipv
+ break;
default:
rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
goto done;
}
+ if (FLOW_HAS_INNER_N_TUPLE (f))
+ {
+ vec_add2 (items, item, 1);
+
+#define fill_inner_n_tuple_of(proto) \
+ item->type = RTE_FLOW_ITEM_TYPE_##proto; \
+ if ((ptr->in_src_port.mask == 0) && (ptr->in_dst_port.mask == 0)) \
+ { \
+ item->spec = NULL; \
+ item->mask = NULL; \
+ } \
+ else \
+ { \
+ in_##proto[0].hdr.src_port = \
+ clib_host_to_net_u16 (ptr->in_src_port.port); \
+ in_##proto[1].hdr.src_port = \
+ clib_host_to_net_u16 (ptr->in_src_port.mask); \
+ in_##proto[0].hdr.dst_port = \
+ clib_host_to_net_u16 (ptr->in_dst_port.port); \
+ in_##proto[1].hdr.dst_port = \
+ clib_host_to_net_u16 (ptr->in_dst_port.mask); \
+ item->spec = in_##proto; \
+ item->mask = in_##proto + 1; \
+ }
+
+#define fill_inner_n_tuple(OUTER_IP_VER, INNER_IP_VER) \
+ if (f->type == \
+ VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP##INNER_IP_VER##_N_TUPLE) \
+ { \
+ vnet_flow_ip##OUTER_IP_VER##_ip##INNER_IP_VER##_n_tuple_t *ptr = \
+ &f->ip##OUTER_IP_VER##_ip##INNER_IP_VER##_n_tuple; \
+ switch (ptr->in_protocol.prot) \
+ { \
+ case IP_PROTOCOL_UDP: \
+ fill_inner_n_tuple_of (UDP) break; \
+ case IP_PROTOCOL_TCP: \
+ fill_inner_n_tuple_of (TCP) break; \
+ default: \
+ break; \
+ } \
+ }
+ fill_inner_n_tuple (6, 4) fill_inner_n_tuple (4, 4)
+ fill_inner_n_tuple (6, 6) fill_inner_n_tuple (4, 6)
+#undef fill_inner_n_tuple
+#undef fill_inner_n_tuple_of
+ }
+
pattern_end:
+ if ((f->actions & VNET_FLOW_ACTION_RSS) &&
+ (f->rss_types & (1ULL << VNET_FLOW_RSS_TYPES_ESP)))
+ {
+
+ vec_add2 (items, item, 1);
+ item->type = RTE_FLOW_ITEM_TYPE_ESP;
+ }
+
vec_add2 (items, item, 1);
item->type = RTE_FLOW_ITEM_TYPE_END;
@@ -482,6 +646,10 @@ pattern_end:
/* convert types to DPDK rss bitmask */
dpdk_flow_convert_rss_types (f->rss_types, &rss_type);
+ if (f->queue_num)
+ /* convert rss queues to array */
+ dpdk_flow_convert_rss_queues (f->queue_index, f->queue_num, &rss);
+
rss.types = rss_type;
if ((rss.func = dpdk_flow_convert_rss_func (f->rss_fun)) ==
RTE_ETH_HASH_FUNCTION_MAX)
@@ -547,6 +715,7 @@ int
dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
u32 flow_index, uword * private_data)
{
+ vlib_main_t *vm = vlib_get_main ();
dpdk_main_t *dm = &dpdk_main;
vnet_flow_t *flow = vnet_get_flow (flow_index);
dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
@@ -557,7 +726,7 @@ dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
/* recycle old flow lookup entries only after the main loop counter
increases - i.e. previously DMA'ed packets were handled */
if (vec_len (xd->parked_lookup_indexes) > 0 &&
- xd->parked_loop_count != dm->vlib_main->main_loop_count)
+ xd->parked_loop_count != vm->main_loop_count)
{
u32 *fl_index;
@@ -580,7 +749,7 @@ dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
fle = pool_elt_at_index (xd->flow_lookup_entries, fe->mark);
clib_memset (fle, -1, sizeof (*fle));
vec_add1 (xd->parked_lookup_indexes, fe->mark);
- xd->parked_loop_count = dm->vlib_main->main_loop_count;
+ xd->parked_loop_count = vm->main_loop_count;
}
clib_memset (fe, 0, sizeof (*fe));
@@ -644,6 +813,15 @@ dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
case VNET_FLOW_TYPE_IP4_L2TPV3OIP:
case VNET_FLOW_TYPE_IP4_IPSEC_ESP:
case VNET_FLOW_TYPE_IP4_IPSEC_AH:
+ case VNET_FLOW_TYPE_IP4_IP4:
+ case VNET_FLOW_TYPE_IP4_IP4_N_TUPLE:
+ case VNET_FLOW_TYPE_IP4_IP6:
+ case VNET_FLOW_TYPE_IP4_IP6_N_TUPLE:
+ case VNET_FLOW_TYPE_IP6_IP4:
+ case VNET_FLOW_TYPE_IP6_IP4_N_TUPLE:
+ case VNET_FLOW_TYPE_IP6_IP6:
+ case VNET_FLOW_TYPE_IP6_IP6_N_TUPLE:
+ case VNET_FLOW_TYPE_GENERIC:
if ((rv = dpdk_flow_add (xd, flow, fe)))
goto done;
break;
diff --git a/src/plugins/dpdk/device/format.c b/src/plugins/dpdk/device/format.c
index 24994aa9426..c4170c20329 100644
--- a/src/plugins/dpdk/device/format.c
+++ b/src/plugins/dpdk/device/format.c
@@ -17,9 +17,6 @@
#include <vppinfra/format.h>
#include <assert.h>
-#define __USE_GNU
-#include <dlfcn.h>
-
#include <vnet/ethernet/ethernet.h>
#include <vnet/ethernet/sfp.h>
#include <dpdk/device/dpdk.h>
@@ -49,18 +46,28 @@
#endif
#define foreach_dpdk_pkt_rx_offload_flag \
- _ (PKT_RX_VLAN, "RX packet is a 802.1q VLAN packet") \
- _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \
- _ (PKT_RX_FDIR, "RX packet with FDIR infos") \
- _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \
- _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \
- _ (PKT_RX_OUTER_IP_CKSUM_BAD, "External IP header checksum error") \
- _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped") \
- _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \
- _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \
- _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \
- _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \
- _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped")
+ _ (RX_FDIR, "RX packet with FDIR infos") \
+ _ (RX_FDIR_FLX, "RX packet with FDIR_FLX info") \
+ _ (RX_FDIR_ID, "RX packet with FDIR_ID info") \
+ _ (RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \
+ _ (RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \
+ _ (RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \
+ _ (RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \
+ _ (RX_IP_CKSUM_NONE, "no IP cksum of RX pkt.") \
+ _ (RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \
+ _ (RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \
+ _ (RX_L4_CKSUM_NONE, "no L4 cksum of RX pkt.") \
+ _ (RX_LRO, "LRO packet") \
+ _ (RX_OUTER_IP_CKSUM_BAD, "External IP header checksum error") \
+ _ (RX_OUTER_L4_CKSUM_BAD, "External L4 header checksum error") \
+ _ (RX_OUTER_L4_CKSUM_GOOD, "External L4 header checksum OK") \
+ _ (RX_QINQ, "RX packet with QinQ tags") \
+ _ (RX_QINQ_STRIPPED, "RX packet QinQ tags stripped") \
+ _ (RX_RSS_HASH, "RX packet with RSS hash result") \
+ _ (RX_SEC_OFFLOAD, "RX packet with security offload") \
+ _ (RX_SEC_OFFLOAD_FAILED, "RX packet with security offload failed") \
+ _ (RX_VLAN, "RX packet is a 802.1q VLAN packet") \
+ _ (RX_VLAN_STRIPPED, "RX packet VLAN tag stripped")
#define foreach_dpdk_pkt_type \
_ (L2, ETHER, "Ethernet packet") \
@@ -103,14 +110,32 @@
_ (INNER_L4, NONFRAG, "Inner non-fragmented IP packet")
#define foreach_dpdk_pkt_tx_offload_flag \
- _ (PKT_TX_VLAN_PKT, "TX packet is a 802.1q VLAN packet") \
- _ (PKT_TX_TUNNEL_VXLAN, "TX packet is a VXLAN packet") \
- _ (PKT_TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \
- _ (PKT_TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \
- _ (PKT_TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \
- _ (PKT_TX_OUTER_IP_CKSUM, "Outer IP cksum of Tx pkt. computed by NIC") \
- _ (PKT_TX_TCP_SEG, "TSO of TX pkt. done by NIC") \
- _ (PKT_TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp")
+ _ (TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp") \
+ _ (TX_IPV4, "TX IPV4") \
+ _ (TX_IPV6, "TX IPV6") \
+ _ (TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \
+ _ (TX_MACSEC, "TX MACSEC") \
+ _ (TX_OUTER_IPV4, "TX outer IPV4") \
+ _ (TX_OUTER_IPV6, "TX outer IPV6") \
+ _ (TX_OUTER_IP_CKSUM, "Outer IP cksum of Tx pkt. computed by NIC") \
+ _ (TX_OUTER_UDP_CKSUM, "TX outer UDP cksum") \
+ _ (TX_QINQ, "TX QINQ") \
+ _ (TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \
+ _ (TX_SEC_OFFLOAD, "TX SEC OFFLOAD") \
+ _ (TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \
+ _ (TX_TCP_SEG, "TSO of TX pkt. done by NIC") \
+ _ (TX_TUNNEL_GENEVE, "TX tunnel GENEVE") \
+ _ (TX_TUNNEL_GRE, "TX tunnel GRE") \
+ _ (TX_TUNNEL_GTP, "TX tunnel GTP") \
+ _ (TX_TUNNEL_IP, "TX tunnel IP") \
+ _ (TX_TUNNEL_IPIP, "TX tunnel IPIP") \
+ _ (TX_TUNNEL_MPLSINUDP, "TX tunnel MPLSinUDP") \
+ _ (TX_TUNNEL_UDP, "TX tunnel UDP") \
+ _ (TX_TUNNEL_VXLAN, "TX packet is a VXLAN packet") \
+ _ (TX_TUNNEL_VXLAN_GPE, "TX tunnel VXLAN GPE") \
+ _ (TX_UDP_CKSUM, "TX UDP cksum") \
+ _ (TX_UDP_SEG, "TX UDP SEG") \
+ _ (TX_VLAN, "TX packet is a 802.1q VLAN packet")
#define foreach_dpdk_pkt_offload_flag \
foreach_dpdk_pkt_rx_offload_flag \
@@ -123,105 +148,10 @@ u8 *
format_dpdk_device_name (u8 * s, va_list * args)
{
dpdk_main_t *dm = &dpdk_main;
- char *devname_format;
- char *device_name;
u32 i = va_arg (*args, u32);
dpdk_device_t *xd = vec_elt_at_index (dm->devices, i);
- struct rte_eth_dev_info dev_info;
- struct rte_pci_device *pci_dev;
- u8 *ret;
-
- if (xd->name)
- return format (s, "%s", xd->name);
-
- if (dm->conf->interface_name_format_decimal)
- devname_format = "%s%d/%d/%d";
- else
- devname_format = "%s%x/%x/%x";
-
- switch (xd->port_type)
- {
- case VNET_DPDK_PORT_TYPE_ETH_1G:
- device_name = "GigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_2_5G:
- device_name = "Two_FiveGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_5G:
- device_name = "FiveGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_10G:
- device_name = "TenGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_20G:
- device_name = "TwentyGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_25G:
- device_name = "TwentyFiveGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_40G:
- device_name = "FortyGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_50G:
- device_name = "FiftyGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_56G:
- device_name = "FiftySixGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_100G:
- device_name = "HundredGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_SWITCH:
- device_name = "EthernetSwitch";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_VF:
- device_name = "VirtualFunctionEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_AF_PACKET:
- return format (s, "af_packet%d", xd->af_packet_instance_num);
-
- case VNET_DPDK_PORT_TYPE_VIRTIO_USER:
- device_name = "VirtioUser";
- break;
-
- case VNET_DPDK_PORT_TYPE_VHOST_ETHER:
- device_name = "VhostEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_FAILSAFE:
- device_name = "FailsafeEthernet";
- break;
-
- default:
- case VNET_DPDK_PORT_TYPE_UNKNOWN:
- device_name = "UnknownEthernet";
- break;
- }
-
- rte_eth_dev_info_get (xd->port_id, &dev_info);
- pci_dev = dpdk_get_pci_device (&dev_info);
-
- if (pci_dev && xd->port_type != VNET_DPDK_PORT_TYPE_FAILSAFE)
- ret = format (s, devname_format, device_name, pci_dev->addr.bus,
- pci_dev->addr.devid, pci_dev->addr.function);
- else
- ret = format (s, "%s%d", device_name, xd->port_id);
- if (xd->interface_name_suffix)
- return format (ret, "/%s", xd->interface_name_suffix);
- return ret;
+ return format (s, "%v", xd->name);
}
u8 *
@@ -243,126 +173,12 @@ static u8 *
format_dpdk_device_type (u8 * s, va_list * args)
{
dpdk_main_t *dm = &dpdk_main;
- char *dev_type;
u32 i = va_arg (*args, u32);
- switch (dm->devices[i].pmd)
- {
- case VNET_DPDK_PMD_E1000EM:
- dev_type = "Intel 82540EM (e1000)";
- break;
-
- case VNET_DPDK_PMD_IGB:
- dev_type = "Intel e1000";
- break;
-
- case VNET_DPDK_PMD_I40E:
- dev_type = "Intel X710/XL710 Family";
- break;
-
- case VNET_DPDK_PMD_I40EVF:
- dev_type = "Intel X710/XL710 Family VF";
- break;
-
- case VNET_DPDK_PMD_ICE:
- dev_type = "Intel E810 Family";
- break;
-
- case VNET_DPDK_PMD_IAVF:
- dev_type = "Intel iAVF";
- break;
-
- case VNET_DPDK_PMD_FM10K:
- dev_type = "Intel FM10000 Family Ethernet Switch";
- break;
-
- case VNET_DPDK_PMD_IGBVF:
- dev_type = "Intel e1000 VF";
- break;
-
- case VNET_DPDK_PMD_VIRTIO:
- dev_type = "Red Hat Virtio";
- break;
-
- case VNET_DPDK_PMD_IXGBEVF:
- dev_type = "Intel 82599 VF";
- break;
-
- case VNET_DPDK_PMD_IXGBE:
- dev_type = "Intel 82599";
- break;
-
- case VNET_DPDK_PMD_ENIC:
- dev_type = "Cisco VIC";
- break;
-
- case VNET_DPDK_PMD_CXGBE:
- dev_type = "Chelsio T4/T5";
- break;
-
- case VNET_DPDK_PMD_MLX4:
- dev_type = "Mellanox ConnectX-3 Family";
- break;
-
- case VNET_DPDK_PMD_MLX5:
- dev_type = "Mellanox ConnectX-4 Family";
- break;
-
- case VNET_DPDK_PMD_VMXNET3:
- dev_type = "VMware VMXNET3";
- break;
-
- case VNET_DPDK_PMD_AF_PACKET:
- dev_type = "af_packet";
- break;
-
- case VNET_DPDK_PMD_DPAA2:
- dev_type = "NXP DPAA2 Mac";
- break;
-
- case VNET_DPDK_PMD_VIRTIO_USER:
- dev_type = "Virtio User";
- break;
-
- case VNET_DPDK_PMD_THUNDERX:
- dev_type = "Cavium ThunderX";
- break;
-
- case VNET_DPDK_PMD_VHOST_ETHER:
- dev_type = "VhostEthernet";
- break;
-
- case VNET_DPDK_PMD_ENA:
- dev_type = "AWS ENA VF";
- break;
-
- case VNET_DPDK_PMD_FAILSAFE:
- dev_type = "FailsafeEthernet";
- break;
-
- case VNET_DPDK_PMD_LIOVF_ETHER:
- dev_type = "Cavium Lio VF";
- break;
-
- case VNET_DPDK_PMD_QEDE:
- dev_type = "Cavium QLogic FastLinQ QL4xxxx";
- break;
-
- case VNET_DPDK_PMD_NETVSC:
- dev_type = "Microsoft Hyper-V Netvsc";
- break;
-
- case VNET_DPDK_PMD_BNXT:
- dev_type = "Broadcom NetXtreme E/S-Series";
- break;
-
- default:
- case VNET_DPDK_PMD_UNKNOWN:
- dev_type = "### UNKNOWN ###";
- break;
- }
-
- return format (s, dev_type);
+ if (dm->devices[i].if_desc)
+ return format (s, dm->devices[i].if_desc);
+ else
+ return format (s, "### UNKNOWN ###");
}
static u8 *
@@ -378,10 +194,11 @@ format_dpdk_link_status (u8 * s, va_list * args)
{
u32 promisc = rte_eth_promiscuous_get (xd->port_id);
- s = format (s, "%s duplex ", (l->link_duplex == ETH_LINK_FULL_DUPLEX) ?
- "full" : "half");
- s = format (s, "mtu %d %s\n", hi->max_packet_bytes, promisc ?
- " promisc" : "");
+ s = format (s, "%s duplex ",
+ (l->link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ? "full" :
+ "half");
+ s = format (s, "max-frame-size %d %s\n", hi->max_frame_size,
+ promisc ? " promisc" : "");
}
else
s = format (s, "\n");
@@ -419,8 +236,6 @@ format_offload (u8 * s, va_list * va)
uword i, l;
l = ~0;
- if (clib_mem_is_vec (id))
- l = vec_len (id);
if (id)
for (i = 0; id[i] != 0 && i < l; i++)
@@ -523,15 +338,29 @@ format_dpdk_device_module_info (u8 * s, va_list * args)
return s;
}
-static const char *
-ptr2sname (void *p)
+u8 *
+format_dpdk_burst_fn (u8 *s, va_list *args)
{
- Dl_info info = { 0 };
+ dpdk_device_t *xd = va_arg (*args, dpdk_device_t *);
+ vlib_rx_or_tx_t dir = va_arg (*args, vlib_rx_or_tx_t);
+ void *p;
+ clib_elf_symbol_t sym;
- if (dladdr (p, &info) == 0)
- return 0;
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+#define rte_eth_fp_ops rte_eth_devices
+#endif
+
+ p = (dir == VLIB_TX) ? rte_eth_fp_ops[xd->port_id].tx_pkt_burst :
+ rte_eth_fp_ops[xd->port_id].rx_pkt_burst;
- return info.dli_sname;
+ if (clib_elf_symbol_by_address (pointer_to_uword (p), &sym))
+ {
+ return format (s, "%s", clib_elf_symbol_name (&sym));
+ }
+ else
+ {
+ return format (s, "(not available)");
+ }
}
static u8 *
@@ -549,16 +378,51 @@ format_switch_info (u8 * s, va_list * args)
}
u8 *
+format_dpdk_rte_device (u8 *s, va_list *args)
+{
+ struct rte_device *d = va_arg (*args, struct rte_device *);
+
+ if (!d)
+ return format (s, "not available");
+
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ s =
+ format (s, "name: %s, numa: %d", rte_dev_name (d), rte_dev_numa_node (d));
+
+ if (rte_dev_driver (d))
+ s = format (s, ", driver: %s", rte_driver_name (rte_dev_driver (d)));
+
+ if (rte_dev_bus (d))
+ s = format (s, ", bus: %s", rte_bus_name (rte_dev_bus (d)));
+#else
+ s = format (s, "name: %s, numa: %d", d->name, d->numa_node);
+
+ if (d->driver)
+ s = format (s, ", driver: %s", d->driver->name);
+
+ if (d->bus)
+ s = format (s, ", bus: %s", d->bus->name);
+#endif
+
+ return s;
+}
+
+u8 *
format_dpdk_device (u8 * s, va_list * args)
{
u32 dev_instance = va_arg (*args, u32);
int verbose = va_arg (*args, int);
dpdk_main_t *dm = &dpdk_main;
+ vlib_main_t *vm = vlib_get_main ();
dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
u32 indent = format_get_indent (s);
- f64 now = vlib_time_now (dm->vlib_main);
+ f64 now = vlib_time_now (vm);
struct rte_eth_dev_info di;
struct rte_eth_burst_mode mode;
+ struct rte_pci_device *pci;
+ struct rte_eth_rss_conf rss_conf;
+ int vlan_off;
+ int retval;
dpdk_update_counters (xd, now);
dpdk_update_link_state (xd, now);
@@ -569,126 +433,114 @@ format_dpdk_device (u8 * s, va_list * args)
format_white_space, indent + 2, format_dpdk_link_status, xd);
s = format (s, "%Uflags: %U\n",
format_white_space, indent + 2, format_dpdk_device_flags, xd);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (rte_dev_devargs (di.device) && rte_dev_devargs (di.device)->args)
+ s = format (s, "%UDevargs: %s\n", format_white_space, indent + 2,
+ rte_dev_devargs (di.device)->args);
+#else
if (di.device->devargs && di.device->devargs->args)
s = format (s, "%UDevargs: %s\n",
format_white_space, indent + 2, di.device->devargs->args);
- s = format (s, "%Urx: queues %d (max %d), desc %d "
+#endif
+ s = format (s,
+ "%Urx: queues %d (max %d), desc %d "
"(min %d max %d align %d)\n",
- format_white_space, indent + 2, xd->rx_q_used, di.max_rx_queues,
- xd->nb_rx_desc, di.rx_desc_lim.nb_min, di.rx_desc_lim.nb_max,
- di.rx_desc_lim.nb_align);
- s = format (s, "%Utx: queues %d (max %d), desc %d "
+ format_white_space, indent + 2, xd->conf.n_rx_queues,
+ di.max_rx_queues, xd->conf.n_rx_desc, di.rx_desc_lim.nb_min,
+ di.rx_desc_lim.nb_max, di.rx_desc_lim.nb_align);
+ s = format (s,
+ "%Utx: queues %d (max %d), desc %d "
"(min %d max %d align %d)\n",
- format_white_space, indent + 2, xd->tx_q_used, di.max_tx_queues,
- xd->nb_tx_desc, di.tx_desc_lim.nb_min, di.tx_desc_lim.nb_max,
- di.tx_desc_lim.nb_align);
+ format_white_space, indent + 2, xd->conf.n_tx_queues,
+ di.max_tx_queues, xd->conf.n_tx_desc, di.tx_desc_lim.nb_min,
+ di.tx_desc_lim.nb_max, di.tx_desc_lim.nb_align);
- if (xd->flags & DPDK_DEVICE_FLAG_PMD)
- {
- struct rte_pci_device *pci;
- struct rte_eth_rss_conf rss_conf;
- int vlan_off;
- int retval;
+ rss_conf.rss_key = 0;
+ rss_conf.rss_hf = 0;
+ retval = rte_eth_dev_rss_hash_conf_get (xd->port_id, &rss_conf);
+ if (retval < 0)
+ clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval);
- rss_conf.rss_key = 0;
- rss_conf.rss_hf = 0;
- retval = rte_eth_dev_rss_hash_conf_get (xd->port_id, &rss_conf);
- if (retval < 0)
- clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval);
+ pci = dpdk_get_pci_device (&di);
- pci = dpdk_get_pci_device (&di);
+ if (pci)
+ {
+ u8 *s2;
+ if (xd->cpu_socket > -1)
+ s2 = format (0, "%d", xd->cpu_socket);
+ else
+ s2 = format (0, "unknown");
+ s = format (s,
+ "%Upci: device %04x:%04x subsystem %04x:%04x "
+ "address %04x:%02x:%02x.%02x numa %v\n",
+ format_white_space, indent + 2, pci->id.vendor_id,
+ pci->id.device_id, pci->id.subsystem_vendor_id,
+ pci->id.subsystem_device_id, pci->addr.domain, pci->addr.bus,
+ pci->addr.devid, pci->addr.function, s2);
+ vec_free (s2);
+ }
- if (pci)
- {
- u8 *s2;
- if (xd->cpu_socket > -1)
- s2 = format (0, "%d", xd->cpu_socket);
- else
- s2 = format (0, "unknown");
- s = format (s, "%Upci: device %04x:%04x subsystem %04x:%04x "
- "address %04x:%02x:%02x.%02x numa %v\n",
- format_white_space, indent + 2, pci->id.vendor_id,
- pci->id.device_id, pci->id.subsystem_vendor_id,
- pci->id.subsystem_device_id, pci->addr.domain,
- pci->addr.bus, pci->addr.devid, pci->addr.function, s2);
- vec_free (s2);
- }
+ if (di.switch_info.domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
+ {
+ s = format (s, "%Uswitch info: %U\n", format_white_space, indent + 2,
+ format_switch_info, &di.switch_info);
+ }
- if (di.switch_info.domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
- {
- s =
- format (s, "%Uswitch info: %U\n", format_white_space, indent + 2,
- format_switch_info, &di.switch_info);
- }
+ if (1 < verbose)
+ {
+ s = format (s, "%Umodule: %U\n", format_white_space, indent + 2,
+ format_dpdk_device_module_info, xd);
+ }
- if (1 < verbose)
- {
- s = format (s, "%Umodule: %U\n", format_white_space, indent + 2,
- format_dpdk_device_module_info, xd);
- }
+ s = format (s, "%Umax rx packet len: %d\n", format_white_space, indent + 2,
+ di.max_rx_pktlen);
+ s = format (s, "%Upromiscuous: unicast %s all-multicast %s\n",
+ format_white_space, indent + 2,
+ rte_eth_promiscuous_get (xd->port_id) ? "on" : "off",
+ rte_eth_allmulticast_get (xd->port_id) ? "on" : "off");
+ vlan_off = rte_eth_dev_get_vlan_offload (xd->port_id);
+ s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n",
+ format_white_space, indent + 2,
+ vlan_off & RTE_ETH_VLAN_STRIP_OFFLOAD ? "on" : "off",
+ vlan_off & RTE_ETH_VLAN_FILTER_OFFLOAD ? "on" : "off",
+ vlan_off & RTE_ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off");
+ s = format (s, "%Urx offload avail: %U\n", format_white_space, indent + 2,
+ format_dpdk_rx_offload_caps, di.rx_offload_capa);
+ s = format (s, "%Urx offload active: %U\n", format_white_space, indent + 2,
+ format_dpdk_rx_offload_caps, xd->enabled_rx_off);
+ s = format (s, "%Utx offload avail: %U\n", format_white_space, indent + 2,
+ format_dpdk_tx_offload_caps, di.tx_offload_capa);
+ s = format (s, "%Utx offload active: %U\n", format_white_space, indent + 2,
+ format_dpdk_tx_offload_caps, xd->enabled_tx_off);
+ s = format (s,
+ "%Urss avail: %U\n"
+ "%Urss active: %U\n",
+ format_white_space, indent + 2, format_dpdk_rss_hf_name,
+ di.flow_type_rss_offloads, format_white_space, indent + 2,
+ format_dpdk_rss_hf_name, rss_conf.rss_hf);
+
+ if (rte_eth_tx_burst_mode_get (xd->port_id, 0, &mode) == 0)
+ {
+ s = format (s, "%Utx burst mode: %s%s\n", format_white_space, indent + 2,
+ mode.info,
+ mode.flags & RTE_ETH_BURST_FLAG_PER_QUEUE ? " (per queue)" :
+ "");
+ }
- s = format (s, "%Umax rx packet len: %d\n", format_white_space,
- indent + 2, di.max_rx_pktlen);
- s = format (s, "%Upromiscuous: unicast %s all-multicast %s\n",
- format_white_space, indent + 2,
- rte_eth_promiscuous_get (xd->port_id) ? "on" : "off",
- rte_eth_allmulticast_get (xd->port_id) ? "on" : "off");
- vlan_off = rte_eth_dev_get_vlan_offload (xd->port_id);
- s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n",
- format_white_space, indent + 2,
- vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off",
- vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off",
- vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off");
- s = format (s, "%Urx offload avail: %U\n",
- format_white_space, indent + 2,
- format_dpdk_rx_offload_caps, di.rx_offload_capa);
- s = format (s, "%Urx offload active: %U\n",
- format_white_space, indent + 2,
- format_dpdk_rx_offload_caps, xd->port_conf.rxmode.offloads);
- s = format (s, "%Utx offload avail: %U\n",
- format_white_space, indent + 2,
- format_dpdk_tx_offload_caps, di.tx_offload_capa);
- s = format (s, "%Utx offload active: %U\n",
- format_white_space, indent + 2,
- format_dpdk_tx_offload_caps, xd->port_conf.txmode.offloads);
- s = format (s, "%Urss avail: %U\n"
- "%Urss active: %U\n",
- format_white_space, indent + 2,
- format_dpdk_rss_hf_name, di.flow_type_rss_offloads,
- format_white_space, indent + 2,
- format_dpdk_rss_hf_name, rss_conf.rss_hf);
-
- if (rte_eth_tx_burst_mode_get (xd->port_id, 0, &mode) == 0)
- {
- s = format (s, "%Utx burst mode: %s%s\n",
- format_white_space, indent + 2,
- mode.info,
- mode.flags & RTE_ETH_BURST_FLAG_PER_QUEUE ?
- " (per queue)" : "");
- }
- else
- {
- s = format (s, "%Utx burst function: %s\n",
- format_white_space, indent + 2,
- ptr2sname (rte_eth_devices[xd->port_id].tx_pkt_burst));
- }
+ s = format (s, "%Utx burst function: %U\n", format_white_space, indent + 2,
+ format_dpdk_burst_fn, xd, VLIB_TX);
- if (rte_eth_rx_burst_mode_get (xd->port_id, 0, &mode) == 0)
- {
- s = format (s, "%Urx burst mode: %s%s\n",
- format_white_space, indent + 2,
- mode.info,
- mode.flags & RTE_ETH_BURST_FLAG_PER_QUEUE ?
- " (per queue)" : "");
- }
- else
- {
- s = format (s, "%Urx burst function: %s\n",
- format_white_space, indent + 2,
- ptr2sname (rte_eth_devices[xd->port_id].rx_pkt_burst));
- }
+ if (rte_eth_rx_burst_mode_get (xd->port_id, 0, &mode) == 0)
+ {
+ s = format (s, "%Urx burst mode: %s%s\n", format_white_space, indent + 2,
+ mode.info,
+ mode.flags & RTE_ETH_BURST_FLAG_PER_QUEUE ? " (per queue)" :
+ "");
}
+ s = format (s, "%Urx burst function: %U\n", format_white_space, indent + 2,
+ format_dpdk_burst_fn, xd, VLIB_RX);
+
/* $$$ MIB counters */
{
#define _(N, V) \
@@ -713,7 +565,6 @@ format_dpdk_device (u8 * s, va_list * args)
if (ret >= 0 && ret <= len)
{
- /* *INDENT-OFF* */
vec_foreach_index(i, xd->xstats)
{
xstat = vec_elt_at_index(xd->xstats, i);
@@ -725,7 +576,6 @@ format_dpdk_device (u8 * s, va_list * args)
xstat->value);
}
}
- /* *INDENT-ON* */
vec_free (xstat_names);
}
@@ -756,14 +606,12 @@ format_dpdk_tx_trace (u8 * s, va_list * va)
dpdk_main_t *dm = &dpdk_main;
dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index);
u32 indent = format_get_indent (s);
- vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->sw_if_index);
- s = format (s, "%U tx queue %d",
- format_vnet_sw_interface_name, vnm, sw, t->queue_index);
+ s = format (s, "%U tx queue %d", format_vnet_sw_if_index_name, vnm,
+ xd->sw_if_index, t->queue_index);
- s = format (s, "\n%Ubuffer 0x%x: %U",
- format_white_space, indent,
- t->buffer_index, format_vnet_buffer, &t->buffer);
+ s = format (s, "\n%Ubuffer 0x%x: %U", format_white_space, indent,
+ t->buffer_index, format_vnet_buffer_no_chain, &t->buffer);
s = format (s, "\n%U%U",
format_white_space, indent,
@@ -787,14 +635,12 @@ format_dpdk_rx_trace (u8 * s, va_list * va)
dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index);
format_function_t *f;
u32 indent = format_get_indent (s);
- vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->sw_if_index);
- s = format (s, "%U rx queue %d",
- format_vnet_sw_interface_name, vnm, sw, t->queue_index);
+ s = format (s, "%U rx queue %d", format_vnet_sw_if_index_name, vnm,
+ xd->sw_if_index, t->queue_index);
- s = format (s, "\n%Ubuffer 0x%x: %U",
- format_white_space, indent,
- t->buffer_index, format_vnet_buffer, &t->buffer);
+ s = format (s, "\n%Ubuffer 0x%x: %U", format_white_space, indent,
+ t->buffer_index, format_vnet_buffer_no_chain, &t->buffer);
s = format (s, "\n%U%U",
format_white_space, indent,
@@ -855,11 +701,11 @@ format_dpdk_pkt_offload_flags (u8 * s, va_list * va)
s = format (s, "Packet Offload Flags");
-#define _(F, S) \
- if (*ol_flags & F) \
- { \
- s = format (s, "\n%U%s (0x%04x) %s", \
- format_white_space, indent, #F, F, S); \
+#define _(F, S) \
+ if ((*ol_flags & RTE_MBUF_F_##F) == RTE_MBUF_F_##F) \
+ { \
+ s = format (s, "\n%U%s (0x%04x) %s", format_white_space, indent, \
+ "PKT_" #F, RTE_MBUF_F_##F, S); \
}
foreach_dpdk_pkt_offload_flag
@@ -887,7 +733,7 @@ u8 *
format_dpdk_rte_mbuf_tso (u8 *s, va_list *va)
{
struct rte_mbuf *mb = va_arg (*va, struct rte_mbuf *);
- if (mb->ol_flags & PKT_TX_TCP_SEG)
+ if (mb->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
{
s = format (s, "l4_len %u tso_segsz %u", mb->l4_len, mb->tso_segsz);
}
@@ -940,8 +786,9 @@ format_dpdk_rte_mbuf (u8 * s, va_list * va)
s = format (s, "\n%U%U", format_white_space, indent,
format_dpdk_pkt_offload_flags, &mb->ol_flags);
- if ((mb->ol_flags & PKT_RX_VLAN) &&
- ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0))
+ if ((mb->ol_flags & RTE_MBUF_F_RX_VLAN) &&
+ ((mb->ol_flags &
+ (RTE_MBUF_F_RX_VLAN_STRIPPED | RTE_MBUF_F_RX_QINQ_STRIPPED)) == 0))
{
ethernet_vlan_header_tv_t *vlan_hdr =
((ethernet_vlan_header_tv_t *) & (eth_hdr->type));
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
index f923da6c09e..2d038b907bf 100644
--- a/src/plugins/dpdk/device/init.c
+++ b/src/plugins/dpdk/device/init.c
@@ -21,8 +21,10 @@
#include <vlib/unix/unix.h>
#include <vlib/log.h>
+#include <vnet/vnet.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
#include <dpdk/buffer.h>
#include <dpdk/device/dpdk.h>
#include <dpdk/cryptodev/cryptodev.h>
@@ -43,71 +45,69 @@
#include <dpdk/device/dpdk_priv.h>
-#define ETHER_MAX_LEN 1518 /**< Maximum frame len, including CRC. */
-
dpdk_main_t dpdk_main;
dpdk_config_main_t dpdk_config_main;
#define LINK_STATE_ELOGS 0
-/* Port configuration, mildly modified Intel app values */
+/* dev_info.speed_capa -> interface name mapppings */
+const struct
+{
+ u32 link_speed;
+ const char *pfx;
+} if_name_prefixes[] = {
+ /* sorted, higher speed first */
+ { RTE_ETH_LINK_SPEED_200G, "TwoHundredGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_100G, "HundredGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_56G, "FiftySixGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_50G, "FiftyGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_40G, "FortyGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_25G, "TwentyFiveGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_20G, "TwentyGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_10G, "TenGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_5G, "FiveGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_2_5G, "TwoDotFiveGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_1G, "GigabitEthernet" },
+};
-static dpdk_port_type_t
-port_type_from_speed_capa (struct rte_eth_dev_info *dev_info)
+static clib_error_t *
+dpdk_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+ u32 frame_size)
{
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance);
+ int rv;
+ u32 mtu;
- if (dev_info->speed_capa & ETH_LINK_SPEED_100G)
- return VNET_DPDK_PORT_TYPE_ETH_100G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_56G)
- return VNET_DPDK_PORT_TYPE_ETH_56G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_50G)
- return VNET_DPDK_PORT_TYPE_ETH_50G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_40G)
- return VNET_DPDK_PORT_TYPE_ETH_40G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_25G)
- return VNET_DPDK_PORT_TYPE_ETH_25G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_20G)
- return VNET_DPDK_PORT_TYPE_ETH_20G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_10G)
- return VNET_DPDK_PORT_TYPE_ETH_10G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_5G)
- return VNET_DPDK_PORT_TYPE_ETH_5G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_2_5G)
- return VNET_DPDK_PORT_TYPE_ETH_2_5G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_1G)
- return VNET_DPDK_PORT_TYPE_ETH_1G;
-
- return VNET_DPDK_PORT_TYPE_UNKNOWN;
-}
+ mtu = frame_size - xd->driver_frame_overhead;
-static dpdk_port_type_t
-port_type_from_link_speed (u32 link_speed)
-{
- switch (link_speed)
+ rv = rte_eth_dev_set_mtu (xd->port_id, mtu);
+
+ if (rv < 0)
{
- case ETH_SPEED_NUM_1G:
- return VNET_DPDK_PORT_TYPE_ETH_1G;
- case ETH_SPEED_NUM_2_5G:
- return VNET_DPDK_PORT_TYPE_ETH_2_5G;
- case ETH_SPEED_NUM_5G:
- return VNET_DPDK_PORT_TYPE_ETH_5G;
- case ETH_SPEED_NUM_10G:
- return VNET_DPDK_PORT_TYPE_ETH_10G;
- case ETH_SPEED_NUM_20G:
- return VNET_DPDK_PORT_TYPE_ETH_20G;
- case ETH_SPEED_NUM_25G:
- return VNET_DPDK_PORT_TYPE_ETH_25G;
- case ETH_SPEED_NUM_40G:
- return VNET_DPDK_PORT_TYPE_ETH_40G;
- case ETH_SPEED_NUM_50G:
- return VNET_DPDK_PORT_TYPE_ETH_50G;
- case ETH_SPEED_NUM_56G:
- return VNET_DPDK_PORT_TYPE_ETH_56G;
- case ETH_SPEED_NUM_100G:
- return VNET_DPDK_PORT_TYPE_ETH_100G;
- default:
- return VNET_DPDK_PORT_TYPE_UNKNOWN;
+ dpdk_log_err ("[%u] rte_eth_dev_set_mtu failed (mtu %u, rv %d)",
+ xd->port_id, mtu, rv);
+ switch (rv)
+ {
+ case -ENOTSUP:
+ return vnet_error (VNET_ERR_UNSUPPORTED,
+ "dpdk driver doesn't support MTU change");
+ case -EBUSY:
+ return vnet_error (VNET_ERR_BUSY, "port is running");
+ case -EINVAL:
+ return vnet_error (VNET_ERR_INVALID_VALUE, "invalid MTU");
+ default:
+ return vnet_error (VNET_ERR_BUG,
+ "unexpected return value %d returned from "
+ "rte_eth_dev_set_mtu(...)",
+ rv);
+ }
}
+ else
+ dpdk_log_debug ("[%u] max_frame_size set to %u by setting MTU to %u",
+ xd->port_id, frame_size, mtu);
+
+ return 0;
}
static u32
@@ -121,15 +121,11 @@ dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
{
case ETHERNET_INTERFACE_FLAG_DEFAULT_L3:
/* set to L3/non-promisc mode */
- xd->flags &= ~DPDK_DEVICE_FLAG_PROMISC;
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_PROMISC, 0);
break;
case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
- xd->flags |= DPDK_DEVICE_FLAG_PROMISC;
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_PROMISC, 1);
break;
- case ETHERNET_INTERFACE_FLAG_MTU:
- xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes;
- dpdk_device_setup (xd);
- return 0;
default:
return ~0;
}
@@ -145,12 +141,6 @@ dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
return old;
}
-static int
-dpdk_port_crc_strip_enabled (dpdk_device_t * xd)
-{
- return !(xd->port_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC);
-}
-
/* The function check_l3cache helps check if Level 3 cache exists or not on current CPUs
return value 1: exist.
return value 0: not exist.
@@ -192,701 +182,360 @@ check_l3cache ()
return 0;
}
-static void
-dpdk_enable_l4_csum_offload (dpdk_device_t * xd)
+static dpdk_device_config_t *
+dpdk_find_startup_config (struct rte_eth_dev_info *di)
{
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- xd->flags |= DPDK_DEVICE_FLAG_TX_OFFLOAD |
- DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
+ dpdk_main_t *dm = &dpdk_main;
+ struct rte_pci_device *pci_dev;
+ vlib_pci_addr_t pci_addr;
+#ifdef __linux__
+ struct rte_vmbus_device *vmbus_dev;
+ vlib_vmbus_addr_t vmbus_addr;
+#endif /* __linux__ */
+ uword *p = 0;
+
+ if ((pci_dev = dpdk_get_pci_device (di)))
+ {
+ pci_addr.domain = pci_dev->addr.domain;
+ pci_addr.bus = pci_dev->addr.bus;
+ pci_addr.slot = pci_dev->addr.devid;
+ pci_addr.function = pci_dev->addr.function;
+ p =
+ hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
+ }
+
+#ifdef __linux__
+ if ((vmbus_dev = dpdk_get_vmbus_device (di)))
+ {
+ unformat_input_t input_vmbus;
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ const char *dev_name = rte_dev_name (di->device);
+#else
+ const char *dev_name = di->device->name;
+#endif
+ unformat_init_string (&input_vmbus, dev_name, strlen (dev_name));
+ if (unformat (&input_vmbus, "%U", unformat_vlib_vmbus_addr, &vmbus_addr))
+ p = mhash_get (&dm->conf->device_config_index_by_vmbus_addr,
+ &vmbus_addr);
+ unformat_free (&input_vmbus);
+ }
+#endif /* __linux__ */
+
+ if (p)
+ return pool_elt_at_index (dm->conf->dev_confs, p[0]);
+ return &dm->conf->default_devconf;
}
static clib_error_t *
dpdk_lib_init (dpdk_main_t * dm)
{
- u32 nports;
- u32 mtu, max_rx_frame;
- int i;
- clib_error_t *error;
- vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ u16 port_id;
vlib_thread_main_t *tm = vlib_get_thread_main ();
vnet_device_main_t *vdm = &vnet_device_main;
vnet_sw_interface_t *sw;
vnet_hw_interface_t *hi;
dpdk_device_t *xd;
- vlib_pci_addr_t last_pci_addr;
- u32 last_pci_addr_port = 0;
- u8 af_packet_instance_num = 0;
- last_pci_addr.as_u32 = ~0;
-
- nports = rte_eth_dev_count_avail ();
-
- if (nports < 1)
- {
- dpdk_log_notice ("DPDK drivers found no Ethernet devices...");
- }
-
- if (CLIB_DEBUG > 0)
- dpdk_log_notice ("DPDK drivers found %d ports...", nports);
-
- if (dm->conf->enable_tcp_udp_checksum)
- dm->buffer_flags_template &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT
- | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
+ char *if_num_fmt;
/* vlib_buffer_t template */
vec_validate_aligned (dm->per_thread_data, tm->n_vlib_mains - 1,
CLIB_CACHE_LINE_BYTES);
- for (i = 0; i < tm->n_vlib_mains; i++)
+ for (int i = 0; i < tm->n_vlib_mains; i++)
{
dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data, i);
clib_memset (&ptd->buffer_template, 0, sizeof (vlib_buffer_t));
- ptd->buffer_template.flags = dm->buffer_flags_template;
vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_TX] = (u32) ~ 0;
}
- /* *INDENT-OFF* */
- RTE_ETH_FOREACH_DEV(i)
+ if_num_fmt =
+ dm->conf->interface_name_format_decimal ? "%d/%d/%d" : "%x/%x/%x";
+
+ /* device config defaults */
+ dm->default_port_conf.n_rx_desc = DPDK_NB_RX_DESC_DEFAULT;
+ dm->default_port_conf.n_tx_desc = DPDK_NB_TX_DESC_DEFAULT;
+ dm->default_port_conf.n_rx_queues = 1;
+ dm->default_port_conf.n_tx_queues = tm->n_vlib_mains;
+ dm->default_port_conf.rss_hf =
+ RTE_ETH_RSS_IP | RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP;
+ dm->default_port_conf.max_lro_pkt_size = DPDK_MAX_LRO_SIZE_DEFAULT;
+
+ if ((clib_mem_get_default_hugepage_size () == 2 << 20) &&
+ check_l3cache () == 0)
+ dm->default_port_conf.n_rx_desc = dm->default_port_conf.n_tx_desc = 512;
+
+ RTE_ETH_FOREACH_DEV (port_id)
{
u8 addr[6];
- int vlan_off;
- struct rte_eth_dev_info dev_info;
- struct rte_pci_device *pci_dev;
- struct rte_vmbus_device *vmbus_dev;
- dpdk_portid_t next_port_id;
+ int rv, q;
+ struct rte_eth_dev_info di;
dpdk_device_config_t *devconf = 0;
- vlib_pci_addr_t pci_addr;
- vlib_vmbus_addr_t vmbus_addr;
- uword *p = 0;
+ vnet_eth_interface_registration_t eir = {};
+ dpdk_driver_t *dr;
+ i8 numa_node;
- if (!rte_eth_dev_is_valid_port(i))
+ if (!rte_eth_dev_is_valid_port (port_id))
continue;
- rte_eth_dev_info_get (i, &dev_info);
-
- if (dev_info.device == 0)
+ if ((rv = rte_eth_dev_info_get (port_id, &di)) != 0)
{
- dpdk_log_notice ("DPDK bug: missing device info. Skipping %s device",
- dev_info.driver_name);
+ dpdk_log_warn ("[%u] failed to get device info. skipping device.",
+ port_id);
continue;
}
- pci_dev = dpdk_get_pci_device (&dev_info);
-
- if (pci_dev)
+ if (di.device == 0)
{
- pci_addr.domain = pci_dev->addr.domain;
- pci_addr.bus = pci_dev->addr.bus;
- pci_addr.slot = pci_dev->addr.devid;
- pci_addr.function = pci_dev->addr.function;
- p = hash_get (dm->conf->device_config_index_by_pci_addr,
- pci_addr.as_u32);
+ dpdk_log_warn ("[%u] missing device info. Skipping '%s' device",
+ port_id, di.driver_name);
+ continue;
}
- vmbus_dev = dpdk_get_vmbus_device (&dev_info);
+ devconf = dpdk_find_startup_config (&di);
- if (vmbus_dev)
+ /* If device is blacklisted, we should skip it */
+ if (devconf->is_blacklisted)
{
- unformat_input_t input_vmbus;
-
- unformat_init_vector (&input_vmbus, (u8 *) dev_info.device->name);
- if (unformat (&input_vmbus, "%U", unformat_vlib_vmbus_addr,
- &vmbus_addr))
- {
- p = mhash_get (&dm->conf->device_config_index_by_vmbus_addr,
- &vmbus_addr);
- }
+ dpdk_log_notice ("[%d] Device %s blacklisted. Skipping...", port_id,
+ di.driver_name);
+ continue;
}
- if (p)
+ vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES);
+ xd->port_id = port_id;
+ xd->device_index = xd - dm->devices;
+ xd->per_interface_next_index = ~0;
+
+ clib_memcpy (&xd->conf, &dm->default_port_conf,
+ sizeof (dpdk_port_conf_t));
+
+ /* find driver datea for this PMD */
+ if ((dr = dpdk_driver_find (di.driver_name, &xd->if_desc)))
{
- devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
- /* If device is blacklisted, we should skip it */
- if (devconf->is_blacklisted)
- {
- continue;
- }
+ xd->driver = dr;
+ xd->supported_flow_actions = dr->supported_flow_actions;
+ xd->conf.disable_rss = dr->mq_mode_none;
+ xd->conf.disable_rx_scatter = dr->disable_rx_scatter;
+ xd->conf.enable_rxq_int = dr->enable_rxq_int;
+ if (dr->use_intel_phdr_cksum)
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM, 1);
+ if (dr->int_unmaskable)
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_INT_UNMASKABLE, 1);
}
else
- devconf = &dm->conf->default_devconf;
+ dpdk_log_warn ("[%u] unknown driver '%s'", port_id, di.driver_name);
- /* Create vnet interface */
- vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES);
- xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT;
- xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT;
- xd->cpu_socket = (i8) rte_eth_dev_socket_id (i);
- if (p)
+ if (devconf->name)
{
xd->name = devconf->name;
}
-
- /* Handle representor devices that share the same PCI ID */
- if (dev_info.switch_info.domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
- {
- if (dev_info.switch_info.port_id != (uint16_t)-1)
- xd->interface_name_suffix = format (0, "%d", dev_info.switch_info.port_id);
- }
- /* Handle interface naming for devices with multiple ports sharing same PCI ID */
- else if (pci_dev &&
- ((next_port_id = rte_eth_find_next (i + 1)) != RTE_MAX_ETHPORTS))
+ else
{
- struct rte_eth_dev_info di = { 0 };
- struct rte_pci_device *next_pci_dev;
- rte_eth_dev_info_get (next_port_id, &di);
- next_pci_dev = di.device ? RTE_DEV_TO_PCI (di.device) : 0;
- if (next_pci_dev &&
- pci_addr.as_u32 != last_pci_addr.as_u32 &&
- memcmp (&pci_dev->addr, &next_pci_dev->addr,
- sizeof (struct rte_pci_addr)) == 0)
+ struct rte_pci_device *pci_dev;
+ if (dr && dr->interface_name_prefix)
{
- xd->interface_name_suffix = format (0, "0");
- last_pci_addr.as_u32 = pci_addr.as_u32;
- last_pci_addr_port = i;
- }
- else if (pci_addr.as_u32 == last_pci_addr.as_u32)
- {
- xd->interface_name_suffix =
- format (0, "%u", i - last_pci_addr_port);
+ /* prefix override by driver */
+ xd->name = format (xd->name, "%s", dr->interface_name_prefix);
}
else
{
- last_pci_addr.as_u32 = ~0;
+ /* interface name prefix from speed_capa */
+ u64 mask = ~((if_name_prefixes[0].link_speed << 1) - 1);
+
+ if (di.speed_capa & mask)
+ dpdk_log_warn ("[%u] unknown speed capability 0x%x reported",
+ xd->port_id, di.speed_capa & mask);
+
+ for (int i = 0; i < ARRAY_LEN (if_name_prefixes); i++)
+ if (if_name_prefixes[i].link_speed & di.speed_capa)
+ {
+ xd->name =
+ format (xd->name, "%s", if_name_prefixes[i].pfx);
+ break;
+ }
+ if (xd->name == 0)
+ xd->name = format (xd->name, "Ethernet");
}
- }
- else
- last_pci_addr.as_u32 = ~0;
- clib_memcpy (&xd->tx_conf, &dev_info.default_txconf,
- sizeof (struct rte_eth_txconf));
+ if (dr && dr->interface_number_from_port_id)
+ xd->name = format (xd->name, "%u", port_id);
+ else if ((pci_dev = dpdk_get_pci_device (&di)))
+ xd->name = format (xd->name, if_num_fmt, pci_dev->addr.bus,
+ pci_dev->addr.devid, pci_dev->addr.function);
+ else
+ xd->name = format (xd->name, "%u", port_id);
- if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
- {
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_IPV4_CKSUM;
- xd->flags |= DPDK_DEVICE_FLAG_RX_IP4_CKSUM;
+ /* Handle representor devices that share the same PCI ID */
+ if ((di.switch_info.domain_id !=
+ RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) &&
+ (di.switch_info.port_id != (uint16_t) -1))
+ xd->name = format (xd->name, "/%d", di.switch_info.port_id);
}
- if (dm->conf->enable_tcp_udp_checksum)
+ /* number of RX and TX queues */
+ if (devconf->num_tx_queues > 0)
{
- if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_UDP_CKSUM;
- if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_TCP_CKSUM;
+ if (di.max_tx_queues < devconf->num_tx_queues)
+ dpdk_log_warn ("[%u] Configured number of TX queues (%u) is "
+ "bigger than maximum supported (%u)",
+ port_id, devconf->num_tx_queues, di.max_tx_queues);
+ xd->conf.n_tx_queues = devconf->num_tx_queues;
}
- if (dm->conf->no_multi_seg)
- {
- xd->port_conf.txmode.offloads &= ~DEV_TX_OFFLOAD_MULTI_SEGS;
- xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
- xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_SCATTER;
- }
- else
- {
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
- xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG;
- }
-
- xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains);
-
- if (devconf->num_tx_queues > 0
- && devconf->num_tx_queues < xd->tx_q_used)
- xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues);
+ xd->conf.n_tx_queues = clib_min (di.max_tx_queues, xd->conf.n_tx_queues);
- if (devconf->num_rx_queues > 1
- && dev_info.max_rx_queues >= devconf->num_rx_queues)
+ if (devconf->num_rx_queues > 1 &&
+ di.max_rx_queues >= devconf->num_rx_queues)
{
- xd->rx_q_used = devconf->num_rx_queues;
- xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
- if (devconf->rss_fn == 0)
- xd->port_conf.rx_adv_conf.rss_conf.rss_hf =
- ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP;
- else
+ xd->conf.n_rx_queues = devconf->num_rx_queues;
+ if (devconf->rss_fn)
{
u64 unsupported_bits;
- xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn;
- unsupported_bits = xd->port_conf.rx_adv_conf.rss_conf.rss_hf;
- unsupported_bits &= ~dev_info.flow_type_rss_offloads;
+ xd->conf.rss_hf = devconf->rss_fn;
+ unsupported_bits = xd->conf.rss_hf;
+ unsupported_bits &= ~di.flow_type_rss_offloads;
if (unsupported_bits)
dpdk_log_warn ("Unsupported RSS hash functions: %U",
format_dpdk_rss_hf_name, unsupported_bits);
}
- xd->port_conf.rx_adv_conf.rss_conf.rss_hf &=
- dev_info.flow_type_rss_offloads;
+ xd->conf.rss_hf &= di.flow_type_rss_offloads;
+ dpdk_log_debug ("[%u] rss_hf: %U", port_id, format_dpdk_rss_hf_name,
+ xd->conf.rss_hf);
}
- else
- xd->rx_q_used = 1;
- vec_validate_aligned (xd->rx_queues, xd->rx_q_used - 1,
+#ifndef RTE_VLAN_HLEN
+#define RTE_VLAN_HLEN 4
+#endif
+ xd->driver_frame_overhead =
+ RTE_ETHER_HDR_LEN + 2 * RTE_VLAN_HLEN + RTE_ETHER_CRC_LEN;
+#if RTE_VERSION >= RTE_VERSION_NUM(21, 11, 0, 0)
+ q = di.max_rx_pktlen - di.max_mtu;
+
+ /* attempt to protect from bogus value provided by pmd */
+ if (q < (2 * xd->driver_frame_overhead) && q > 0 &&
+ di.max_mtu != UINT16_MAX)
+ xd->driver_frame_overhead = q;
+ dpdk_log_debug ("[%u] min_mtu: %u, max_mtu: %u, min_rx_bufsize: %u, "
+ "max_rx_pktlen: %u, max_lro_pkt_size: %u",
+ xd->port_id, di.min_mtu, di.max_mtu, di.min_rx_bufsize,
+ di.max_rx_pktlen, di.max_lro_pkt_size);
+#endif
+ dpdk_log_debug ("[%u] driver frame overhead is %u", port_id,
+ xd->driver_frame_overhead);
+
+ /* number of RX and TX tescriptors */
+ if (devconf->num_rx_desc)
+ xd->conf.n_rx_desc = devconf->num_rx_desc;
+ else if (dr && dr->n_rx_desc)
+ xd->conf.n_rx_desc = dr->n_rx_desc;
+
+ if (devconf->num_tx_desc)
+ xd->conf.n_tx_desc = devconf->num_tx_desc;
+ else if (dr && dr->n_tx_desc)
+ xd->conf.n_tx_desc = dr->n_tx_desc;
+
+ dpdk_log_debug (
+ "[%u] n_rx_queues: %u n_tx_queues: %u n_rx_desc: %u n_tx_desc: %u",
+ port_id, xd->conf.n_rx_queues, xd->conf.n_tx_queues,
+ xd->conf.n_rx_desc, xd->conf.n_tx_desc);
+
+ vec_validate_aligned (xd->rx_queues, xd->conf.n_rx_queues - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (xd->tx_queues, xd->conf.n_tx_queues - 1,
CLIB_CACHE_LINE_BYTES);
- xd->flags |= DPDK_DEVICE_FLAG_PMD;
-
- /* workaround for drivers not setting driver_name */
- if ((!dev_info.driver_name) && (pci_dev))
- dev_info.driver_name = pci_dev->driver->driver.name;
-
- ASSERT (dev_info.driver_name);
-
- if (!xd->pmd)
- {
-
-
-#define _(s,f) else if (dev_info.driver_name && \
- !strcmp(dev_info.driver_name, s)) \
- xd->pmd = VNET_DPDK_PMD_##f;
- if (0)
- ;
- foreach_dpdk_pmd
-#undef _
- else
- xd->pmd = VNET_DPDK_PMD_UNKNOWN;
-
- xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
- xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT;
- xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT;
-
- switch (xd->pmd)
- {
- /* Drivers with valid speed_capa set */
- case VNET_DPDK_PMD_I40E:
- xd->flags |= DPDK_DEVICE_FLAG_INT_UNMASKABLE;
- case VNET_DPDK_PMD_E1000EM:
- case VNET_DPDK_PMD_IGB:
- case VNET_DPDK_PMD_IGC:
- case VNET_DPDK_PMD_IXGBE:
- case VNET_DPDK_PMD_ICE:
- xd->port_type = port_type_from_speed_capa (&dev_info);
- xd->supported_flow_actions = VNET_FLOW_ACTION_MARK |
- VNET_FLOW_ACTION_REDIRECT_TO_NODE |
- VNET_FLOW_ACTION_REDIRECT_TO_QUEUE |
- VNET_FLOW_ACTION_BUFFER_ADVANCE |
- VNET_FLOW_ACTION_COUNT | VNET_FLOW_ACTION_DROP |
- VNET_FLOW_ACTION_RSS;
-
- if (dm->conf->no_tx_checksum_offload == 0)
- {
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- xd->flags |=
- DPDK_DEVICE_FLAG_TX_OFFLOAD |
- DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
- }
-
- xd->port_conf.intr_conf.rxq = 1;
- break;
- case VNET_DPDK_PMD_CXGBE:
- case VNET_DPDK_PMD_MLX4:
- case VNET_DPDK_PMD_MLX5:
- case VNET_DPDK_PMD_QEDE:
- case VNET_DPDK_PMD_BNXT:
- xd->port_type = port_type_from_speed_capa (&dev_info);
- break;
-
- /* SR-IOV VFs */
- case VNET_DPDK_PMD_I40EVF:
- xd->flags |= DPDK_DEVICE_FLAG_INT_UNMASKABLE;
- case VNET_DPDK_PMD_IGBVF:
- case VNET_DPDK_PMD_IXGBEVF:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
- if (dm->conf->no_tx_checksum_offload == 0)
- {
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- xd->flags |=
- DPDK_DEVICE_FLAG_TX_OFFLOAD |
- DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
- }
- /* DPDK bug in multiqueue... */
- /* xd->port_conf.intr_conf.rxq = 1; */
- break;
-
- /* iAVF */
- case VNET_DPDK_PMD_IAVF:
- xd->flags |= DPDK_DEVICE_FLAG_INT_UNMASKABLE;
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
- xd->supported_flow_actions =
- VNET_FLOW_ACTION_MARK | VNET_FLOW_ACTION_REDIRECT_TO_NODE |
- VNET_FLOW_ACTION_REDIRECT_TO_QUEUE |
- VNET_FLOW_ACTION_BUFFER_ADVANCE | VNET_FLOW_ACTION_COUNT |
- VNET_FLOW_ACTION_DROP | VNET_FLOW_ACTION_RSS;
-
- if (dm->conf->no_tx_checksum_offload == 0)
- {
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- xd->flags |=
- DPDK_DEVICE_FLAG_TX_OFFLOAD |
- DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
- }
- /* DPDK bug in multiqueue... */
- /* xd->port_conf.intr_conf.rxq = 1; */
- break;
-
- case VNET_DPDK_PMD_THUNDERX:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
-
- if (dm->conf->no_tx_checksum_offload == 0)
- {
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- xd->flags |= DPDK_DEVICE_FLAG_TX_OFFLOAD;
- }
- break;
-
- case VNET_DPDK_PMD_ENA:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
- xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_SCATTER;
- xd->port_conf.intr_conf.rxq = 1;
- break;
-
- case VNET_DPDK_PMD_DPAA2:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G;
- break;
-
- /* Cisco VIC */
- case VNET_DPDK_PMD_ENIC:
- {
- struct rte_eth_link l;
- rte_eth_link_get_nowait (i, &l);
- xd->port_type = port_type_from_link_speed (l.link_speed);
- if (dm->conf->enable_tcp_udp_checksum)
- dpdk_enable_l4_csum_offload (xd);
- }
- break;
-
- /* Intel Red Rock Canyon */
- case VNET_DPDK_PMD_FM10K:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH;
- break;
-
- /* virtio */
- case VNET_DPDK_PMD_VIRTIO:
- xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G;
- xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO;
- xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO;
- /*
- * Enable use of RX interrupts if supported.
- *
- * There is no device flag or capability for this, so
- * use the same check that the virtio driver does.
- */
- if (pci_dev && rte_intr_cap_multiple (&pci_dev->intr_handle))
- xd->port_conf.intr_conf.rxq = 1;
- break;
-
- /* vmxnet3 */
- case VNET_DPDK_PMD_VMXNET3:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
- break;
-
- case VNET_DPDK_PMD_AF_PACKET:
- xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET;
- xd->af_packet_instance_num = af_packet_instance_num++;
- break;
-
- case VNET_DPDK_PMD_VIRTIO_USER:
- xd->port_type = VNET_DPDK_PORT_TYPE_VIRTIO_USER;
- break;
-
- case VNET_DPDK_PMD_VHOST_ETHER:
- xd->port_type = VNET_DPDK_PORT_TYPE_VHOST_ETHER;
- break;
-
- case VNET_DPDK_PMD_LIOVF_ETHER:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
- break;
-
- case VNET_DPDK_PMD_FAILSAFE:
- xd->port_type = VNET_DPDK_PORT_TYPE_FAILSAFE;
- xd->port_conf.intr_conf.lsc = 1;
- break;
-
- case VNET_DPDK_PMD_NETVSC:
- {
- struct rte_eth_link l;
- rte_eth_link_get_nowait (i, &l);
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
- }
- break;
-
- default:
- xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
- }
-
- if (devconf->num_rx_desc)
- xd->nb_rx_desc = devconf->num_rx_desc;
- else {
-
- /* If num_rx_desc is not specified by VPP user, the current CPU is working
- with 2M page and has no L3 cache, default num_rx_desc is changed to 512
- from original 1024 to help reduce TLB misses.
- */
- if ((clib_mem_get_default_hugepage_size () == 2 << 20)
- && check_l3cache() == 0)
- xd->nb_rx_desc = 512;
- }
-
- if (devconf->num_tx_desc)
- xd->nb_tx_desc = devconf->num_tx_desc;
- else {
-
- /* If num_tx_desc is not specified by VPP user, the current CPU is working
- with 2M page and has no L3 cache, default num_tx_desc is changed to 512
- from original 1024 to help reduce TLB misses.
- */
- if ((clib_mem_get_default_hugepage_size () == 2 << 20)
- && check_l3cache() == 0)
- xd->nb_tx_desc = 512;
- }
- }
-
- if (xd->pmd == VNET_DPDK_PMD_AF_PACKET)
- {
- f64 now = vlib_time_now (vm);
- u32 rnd;
- rnd = (u32) (now * 1e6);
- rnd = random_u32 (&rnd);
- clib_memcpy (addr + 2, &rnd, sizeof (rnd));
- addr[0] = 2;
- addr[1] = 0xfe;
- }
- else
- rte_eth_macaddr_get (i, (void *) addr);
-
- xd->port_id = i;
- xd->device_index = xd - dm->devices;
- xd->per_interface_next_index = ~0;
-
- /* assign interface to input thread */
- int q;
-
- error = ethernet_register_interface
- (dm->vnet_main, dpdk_device_class.index, xd->device_index,
- /* ethernet address */ addr,
- &xd->hw_if_index, dpdk_flag_change);
- if (error)
- return error;
-
- /*
- * Ensure default mtu is not > the mtu read from the hardware.
- * Otherwise rte_eth_dev_configure() will fail and the port will
- * not be available.
- * Calculate max_frame_size and mtu supported by NIC
- */
- if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen)
- {
- /*
- * This device does not support the platforms's max frame
- * size. Use it's advertised mru instead.
- */
- max_rx_frame = dev_info.max_rx_pktlen;
- mtu = dev_info.max_rx_pktlen - sizeof (ethernet_header_t);
- }
+ rte_eth_macaddr_get (port_id, (void *) addr);
+
+ /* create interface */
+ eir.dev_class_index = dpdk_device_class.index;
+ eir.dev_instance = xd->device_index;
+ eir.address = addr;
+ eir.cb.flag_change = dpdk_flag_change;
+ eir.cb.set_max_frame_size = dpdk_set_max_frame_size;
+ xd->hw_if_index = vnet_eth_register_interface (vnm, &eir);
+ hi = vnet_get_hw_interface (vnm, xd->hw_if_index);
+ numa_node = (i8) rte_eth_dev_socket_id (port_id);
+ if (numa_node == SOCKET_ID_ANY)
+ /* numa_node is not set, default to 0 */
+ hi->numa_node = xd->cpu_socket = 0;
else
- {
- /* VPP treats MTU and max_rx_pktlen both equal to
- * ETHERNET_MAX_PACKET_BYTES, if dev_info.max_rx_pktlen >=
- * ETHERNET_MAX_PACKET_BYTES + sizeof(ethernet_header_t)
- */
- if (dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES +
- sizeof (ethernet_header_t)))
- {
- mtu = ETHERNET_MAX_PACKET_BYTES;
- max_rx_frame = ETHERNET_MAX_PACKET_BYTES;
-
- /*
- * Some platforms do not account for Ethernet FCS (4 bytes) in
- * MTU calculations. To interop with them increase mru but only
- * if the device's settings can support it.
- */
- if (dpdk_port_crc_strip_enabled (xd) &&
- (dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES +
- sizeof (ethernet_header_t) +
- 4)))
- {
- max_rx_frame += 4;
- }
- }
- else
- {
- max_rx_frame = ETHERNET_MAX_PACKET_BYTES;
- mtu = ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t);
-
- if (dpdk_port_crc_strip_enabled (xd) &&
- (dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)))
- {
- max_rx_frame += 4;
- }
- }
- }
-
- if (xd->pmd == VNET_DPDK_PMD_FAILSAFE)
- {
- /* failsafe device numerables are reported with active device only,
- * need to query the mtu for current device setup to overwrite
- * reported value.
- */
- uint16_t dev_mtu;
- if (!rte_eth_dev_get_mtu (i, &dev_mtu))
- {
- mtu = dev_mtu;
- max_rx_frame = mtu + sizeof (ethernet_header_t);
-
- if (dpdk_port_crc_strip_enabled (xd))
- {
- max_rx_frame += 4;
- }
- }
- }
+ hi->numa_node = xd->cpu_socket = numa_node;
+ sw = vnet_get_hw_sw_interface (vnm, xd->hw_if_index);
+ xd->sw_if_index = sw->sw_if_index;
+ dpdk_log_debug ("[%u] interface %v created", port_id, hi->name);
- /*Set port rxmode config */
- xd->port_conf.rxmode.max_rx_pkt_len = max_rx_frame;
+ if (devconf->tag)
+ vnet_set_sw_interface_tag (vnm, devconf->tag, sw->sw_if_index);
- sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->hw_if_index);
- xd->sw_if_index = sw->sw_if_index;
- vnet_hw_if_set_input_node (dm->vnet_main, xd->hw_if_index,
- dpdk_input_node.index);
+ ethernet_set_flags (vnm, xd->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
+ /* assign worker threads */
+ vnet_hw_if_set_input_node (vnm, xd->hw_if_index, dpdk_input_node.index);
if (devconf->workers)
{
- int i;
+ int j;
q = 0;
- clib_bitmap_foreach (i, devconf->workers) {
+ clib_bitmap_foreach (j, devconf->workers)
+ {
dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q);
rxq->queue_index = vnet_hw_if_register_rx_queue (
- dm->vnet_main, xd->hw_if_index, q++,
- vdm->first_worker_thread_index + i);
- }
+ vnm, xd->hw_if_index, q++, vdm->first_worker_thread_index + j);
+ }
}
else
- for (q = 0; q < xd->rx_q_used; q++)
+ for (q = 0; q < xd->conf.n_rx_queues; q++)
{
dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q);
rxq->queue_index = vnet_hw_if_register_rx_queue (
- dm->vnet_main, xd->hw_if_index, q, VNET_HW_IF_RXQ_THREAD_ANY);
+ vnm, xd->hw_if_index, q, VNET_HW_IF_RXQ_THREAD_ANY);
}
- vnet_hw_if_update_runtime_data (dm->vnet_main, xd->hw_if_index);
-
- /*Get vnet hardware interface */
- hi = vnet_get_hw_interface (dm->vnet_main, xd->hw_if_index);
+ for (q = 0; q < xd->conf.n_tx_queues; q++)
+ {
+ dpdk_tx_queue_t *txq = vec_elt_at_index (xd->tx_queues, q);
+ txq->queue_index =
+ vnet_hw_if_register_tx_queue (vnm, xd->hw_if_index, q);
+ }
- /*Override default max_packet_bytes and max_supported_bytes set in
- * ethernet_register_interface() above*/
- if (hi)
+ for (q = 0; q < tm->n_vlib_mains; q++)
{
- hi->max_packet_bytes = mtu;
- hi->max_supported_packet_bytes = max_rx_frame;
- hi->numa_node = xd->cpu_socket;
-
- /* Indicate ability to support L3 DMAC filtering and
- * initialize interface to L3 non-promisc mode */
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER;
- ethernet_set_flags (dm->vnet_main, xd->hw_if_index,
- ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
+ u32 qi = xd->tx_queues[q % xd->conf.n_tx_queues].queue_index;
+ vnet_hw_if_tx_queue_assign_thread (vnm, qi, q);
}
- if (dm->conf->no_tx_checksum_offload == 0)
- if (xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD && hi != NULL)
- {
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TX_IP4_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM;
- }
- if (devconf->tso == DPDK_DEVICE_TSO_ON && hi != NULL)
+ if (devconf->tso == DPDK_DEVICE_TSO_ON)
{
/*tcp_udp checksum must be enabled*/
- if ((dm->conf->enable_tcp_udp_checksum) &&
- (hi->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TX_CKSUM))
- {
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_UDP_GSO;
- xd->port_conf.txmode.offloads |=
- DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_UDP_TSO;
- }
+ if (xd->conf.enable_tcp_udp_checksum == 0)
+ dpdk_log_warn ("[%u] TCP/UDP checksum offload must be enabled",
+ xd->port_id);
+ else if ((di.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) == 0)
+ dpdk_log_warn ("[%u] TSO not supported by device", xd->port_id);
else
- clib_warning ("%s: TCP/UDP checksum offload must be enabled",
- hi->name);
+ xd->conf.enable_tso = 1;
}
+ if (devconf->max_lro_pkt_size)
+ xd->conf.max_lro_pkt_size = devconf->max_lro_pkt_size;
+
dpdk_device_setup (xd);
/* rss queues should be configured after dpdk_device_setup() */
- if ((hi != NULL) && (devconf->rss_queues != NULL))
- {
- if (vnet_hw_interface_set_rss_queues
- (vnet_get_main (), hi, devconf->rss_queues))
- {
- clib_warning ("%s: Failed to set rss queues", hi->name);
- }
- }
+ if (devconf->rss_queues)
+ {
+ if (vnet_hw_interface_set_rss_queues (vnet_get_main (), hi,
+ devconf->rss_queues))
+ dpdk_log_warn ("[%u] Failed to set rss queues", port_id);
+ }
if (vec_len (xd->errors))
- dpdk_log_err ("setup failed for device %U. Errors:\n %U",
- format_dpdk_device_name, i,
- format_dpdk_device_errors, xd);
-
- /*
- * A note on Cisco VIC (PMD_ENIC) and VLAN:
- *
- * With Cisco VIC vNIC, every ingress packet is tagged. On a
- * trunk vNIC (C series "standalone" server), packets on no VLAN
- * are tagged with vlan 0. On an access vNIC (standalone or B
- * series "blade" server), packets on the default/native VLAN
- * are tagged with that vNIC's VLAN. VPP expects these packets
- * to be untagged, and previously enabled VLAN strip on VIC by
- * default. But it also broke vlan sub-interfaces.
- *
- * The VIC adapter has "untag default vlan" ingress VLAN rewrite
- * mode, which removes tags from these packets. VPP now includes
- * a local patch for the enic driver to use this untag mode, so
- * enabling vlan stripping is no longer needed. In future, the
- * driver + dpdk will have an API to set the mode after
- * rte_eal_init. Then, this note and local patch will be
- * removed.
- */
-
- /*
- * VLAN stripping: default to VLAN strip disabled, unless specified
- * otherwise in the startup config.
- */
-
- vlan_off = rte_eth_dev_get_vlan_offload (xd->port_id);
- if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON)
- {
- vlan_off |= ETH_VLAN_STRIP_OFFLOAD;
- if (rte_eth_dev_set_vlan_offload (xd->port_id, vlan_off) >= 0)
- dpdk_log_info ("VLAN strip enabled for interface\n");
- else
- dpdk_log_warn ("VLAN strip cannot be supported by interface\n");
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
- }
- else
- {
- if (vlan_off & ETH_VLAN_STRIP_OFFLOAD)
- {
- vlan_off &= ~ETH_VLAN_STRIP_OFFLOAD;
- if (rte_eth_dev_set_vlan_offload (xd->port_id, vlan_off) >= 0)
- dpdk_log_warn ("set VLAN offload failed\n");
- }
- xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
- }
-
- if (hi)
- hi->max_packet_bytes = xd->port_conf.rxmode.max_rx_pkt_len
- - sizeof (ethernet_header_t);
- else
- dpdk_log_warn ("hi NULL");
-
- if (dm->conf->no_multi_seg)
- mtu = mtu > ETHER_MAX_LEN ? ETHER_MAX_LEN : mtu;
-
- rte_eth_dev_set_mtu (xd->port_id, mtu);
-}
+ dpdk_log_err ("[%u] setup failed Errors:\n %U", port_id,
+ format_dpdk_device_errors, xd);
+ }
- /* *INDENT-ON* */
+ for (int i = 0; i < vec_len (dm->devices); i++)
+ vnet_hw_if_update_runtime_data (vnm, dm->devices[i].hw_if_index);
return 0;
}
@@ -903,7 +552,6 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
int i;
addrs = vlib_pci_get_all_dev_addrs ();
- /* *INDENT-OFF* */
vec_foreach (addr, addrs)
{
dpdk_device_config_t * devconf = 0;
@@ -922,8 +570,18 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
continue;
}
+#ifdef __FreeBSD__
+ /*
+ * The defines for the PCI_CLASS_* types are platform specific and differ
+ * on FreeBSD.
+ */
+ if (d->device_class != PCI_CLASS_NETWORK &&
+ d->device_class != PCI_CLASS_PROCESSOR_CO)
+ continue;
+#else
if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO)
continue;
+#endif /* __FreeBSD__ */
if (num_whitelisted)
{
@@ -991,9 +649,13 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_NETWORK_ETHERNET)
;
/* all Intel QAT devices VFs */
- else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_PROCESSOR_CO &&
- (d->device_id == 0x0443 || d->device_id == 0x18a1 || d->device_id == 0x19e3 ||
- d->device_id == 0x37c9 || d->device_id == 0x6f55))
+ else if (d->vendor_id == 0x8086 &&
+ d->device_class == PCI_CLASS_PROCESSOR_CO &&
+ (d->device_id == 0x0443 || d->device_id == 0x18a1 ||
+ d->device_id == 0x19e3 || d->device_id == 0x37c9 ||
+ d->device_id == 0x6f55 || d->device_id == 0x18ef ||
+ d->device_id == 0x4941 || d->device_id == 0x4943 ||
+ d->device_id == 0x4945))
;
/* Cisco VIC */
else if (d->vendor_id == 0x1137 &&
@@ -1021,10 +683,28 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
{
continue;
}
- /* Mellanox CX6, CX6VF, CX6DX, CX6DXVF */
- else if (d->vendor_id == 0x15b3 && d->device_id >= 0x101b && d->device_id <= 0x101e)
+ /* Mellanox CX6, CX6VF, CX6DX, CX6DXVF, CX6LX */
+ else if (d->vendor_id == 0x15b3 &&
+ (d->device_id >= 0x101b && d->device_id <= 0x101f))
{
- continue;
+ continue;
+ }
+ /* Mellanox CX7 */
+ else if (d->vendor_id == 0x15b3 && d->device_id == 0x1021)
+ {
+ continue;
+ }
+ /* Mellanox BF, BFVF */
+ else if (d->vendor_id == 0x15b3 &&
+ (d->device_id >= 0xa2d2 && d->device_id <= 0Xa2d3))
+ {
+ continue;
+ }
+ /* Mellanox BF2, BF3 */
+ else if (d->vendor_id == 0x15b3 &&
+ (d->device_id == 0xa2d6 || d->device_id == 0xa2dc))
+ {
+ continue;
}
/* Broadcom NetXtreme S, and E series only */
else if (d->vendor_id == 0x14e4 &&
@@ -1039,6 +719,9 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
d->device_id == 0x1614 || d->device_id == 0x1606 ||
d->device_id == 0x1609 || d->device_id == 0x1614)))
;
+ /* Google vNIC */
+ else if (d->vendor_id == 0x1ae0 && d->device_id == 0x0042)
+ ;
else
{
dpdk_log_warn ("Unsupported PCI device 0x%04x:0x%04x found "
@@ -1047,7 +730,8 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
continue;
}
- error = vlib_pci_bind_to_uio (vm, addr, (char *) conf->uio_driver_name);
+ error = vlib_pci_bind_to_uio (vm, addr, (char *) conf->uio_driver_name,
+ conf->uio_bind_force);
if (error)
{
@@ -1063,7 +747,6 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
clib_error_report (error);
}
}
- /* *INDENT-ON* */
vec_free (pci_addr);
vlib_pci_free_device_info (d);
}
@@ -1078,7 +761,6 @@ dpdk_bind_vmbus_devices_to_uio (dpdk_config_main_t * conf)
addrs = vlib_vmbus_get_all_dev_addrs ();
- /* *INDENT-OFF* */
vec_foreach (addr, addrs)
{
dpdk_device_config_t *devconf = 0;
@@ -1143,7 +825,6 @@ dpdk_bind_vmbus_devices_to_uio (dpdk_config_main_t * conf)
clib_error_report (error);
}
}
- /* *INDENT-ON* */
}
uword
@@ -1240,7 +921,9 @@ dpdk_device_config (dpdk_config_main_t *conf, void *addr,
;
else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc))
;
- else if (unformat (input, "name %s", &devconf->name))
+ else if (unformat (input, "name %v", &devconf->name))
+ ;
+ else if (unformat (input, "tag %s", &devconf->tag))
;
else if (unformat (input, "workers %U", unformat_bitmap_list,
&devconf->workers))
@@ -1253,10 +936,6 @@ dpdk_device_config (dpdk_config_main_t *conf, void *addr,
if (error)
break;
}
- else if (unformat (input, "vlan-strip-offload off"))
- devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF;
- else if (unformat (input, "vlan-strip-offload on"))
- devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON;
else if (unformat (input, "tso on"))
{
devconf->tso = DPDK_DEVICE_TSO_ON;
@@ -1270,6 +949,9 @@ dpdk_device_config (dpdk_config_main_t *conf, void *addr,
else if (unformat (input, "rss-queues %U",
unformat_bitmap_list, &devconf->rss_queues))
;
+ else if (unformat (input, "max-lro-pkt-size %u",
+ &devconf->max_lro_pkt_size))
+ ;
else
{
error = clib_error_return (0, "unknown input `%U'",
@@ -1310,14 +992,26 @@ dpdk_log_read_ready (clib_file_t * uf)
n = read (uf->file_descriptor, s + len, n_try);
if (n < 0 && errno != EAGAIN)
return clib_error_return_unix (0, "read");
- _vec_len (s) = len + (n < 0 ? 0 : n);
+ vec_set_len (s, len + (n < 0 ? 0 : n));
}
unformat_init_vector (&input, s);
while (unformat_user (&input, unformat_line, &line))
{
- dpdk_log_notice ("%v", line);
+ int skip = 0;
+ vec_add1 (line, 0);
+
+ /* unfortunatelly DPDK polutes log with this error messages
+ * even when we pass --in-memory which means no secondary process */
+ if (strstr ((char *) line, "WARNING! Base virtual address hint"))
+ skip = 1;
+ else if (strstr ((char *) line, "This may cause issues with mapping "
+ "memory into secondary processes"))
+ skip = 1;
+ vec_pop (line);
+ if (!skip)
+ dpdk_log_notice ("%v", line);
vec_free (line);
}
@@ -1326,8 +1020,29 @@ dpdk_log_read_ready (clib_file_t * uf)
}
static clib_error_t *
+dpdk_set_stat_poll_interval (f64 interval)
+{
+ if (interval < DPDK_MIN_STATS_POLL_INTERVAL)
+ return clib_error_return (0, "wrong stats-poll-interval value");
+
+ dpdk_main.stat_poll_interval = interval;
+ return 0;
+}
+
+static clib_error_t *
+dpdk_set_link_state_poll_interval (f64 interval)
+{
+ if (interval < DPDK_MIN_LINK_POLL_INTERVAL)
+ return clib_error_return (0, "wrong link-state-poll-interval value");
+
+ dpdk_main.link_state_poll_interval = interval;
+ return 0;
+}
+
+static clib_error_t *
dpdk_config (vlib_main_t * vm, unformat_input_t * input)
{
+ dpdk_main_t *dm = &dpdk_main;
clib_error_t *error = 0;
dpdk_config_main_t *conf = &dpdk_config_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
@@ -1344,11 +1059,10 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
u8 no_vmbus = 0;
u8 file_prefix = 0;
u8 *socket_mem = 0;
- u8 *huge_dir_path = 0;
u32 vendor, device, domain, bus, func;
-
- huge_dir_path =
- format (0, "%s/hugepages%c", vlib_unix_get_runtime_dir (), 0);
+ void *fmt_func;
+ void *fmt_addr;
+ f64 poll_interval;
conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword));
mhash_init (&conf->device_config_index_by_vmbus_addr, sizeof (uword),
@@ -1366,19 +1080,36 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
conf->enable_telemetry = 1;
else if (unformat (input, "enable-tcp-udp-checksum"))
- conf->enable_tcp_udp_checksum = 1;
-
+ {
+ dm->default_port_conf.enable_tcp_udp_checksum = 1;
+ if (unformat (input, "enable-outer-checksum-offload"))
+ dm->default_port_conf.enable_outer_checksum_offload = 1;
+ }
else if (unformat (input, "no-tx-checksum-offload"))
- conf->no_tx_checksum_offload = 1;
+ dm->default_port_conf.disable_tx_checksum_offload = 1;
else if (unformat (input, "decimal-interface-names"))
conf->interface_name_format_decimal = 1;
else if (unformat (input, "no-multi-seg"))
- conf->no_multi_seg = 1;
+ dm->default_port_conf.disable_multi_seg = 1;
+ else if (unformat (input, "enable-lro"))
+ dm->default_port_conf.enable_lro = 1;
else if (unformat (input, "max-simd-bitwidth %U",
unformat_max_simd_bitwidth, &conf->max_simd_bitwidth))
;
+ else if (unformat (input, "link-state-poll-interval %f", &poll_interval))
+ {
+ error = dpdk_set_link_state_poll_interval (poll_interval);
+ if (error != 0)
+ return error;
+ }
+ else if (unformat (input, "stats-poll-interval %f", &poll_interval))
+ {
+ error = dpdk_set_stat_poll_interval (poll_interval);
+ if (error != 0)
+ return error;
+ }
else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input,
&sub_input))
{
@@ -1433,13 +1164,10 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
num_whitelisted++;
}
- else if (unformat (input, "num-mem-channels %d", &conf->nchannels))
- conf->nchannels_set_manually = 0;
- else if (unformat (input, "num-crypto-mbufs %d",
- &conf->num_crypto_mbufs))
- ;
else if (unformat (input, "uio-driver %s", &conf->uio_driver_name))
;
+ else if (unformat (input, "uio-bind-force"))
+ conf->uio_bind_force = 1;
else if (unformat (input, "socket-mem %s", &socket_mem))
;
else if (unformat (input, "no-pci"))
@@ -1514,28 +1242,13 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
}
foreach_eal_single_hyphen_arg
#undef _
-#define _(a,b) \
- else if (unformat(input, #a " %s", &s)) \
- { \
- tmp = format (0, "-%s%c", #b, 0); \
- vec_add1 (conf->eal_init_args, tmp); \
- vec_add1 (s, 0); \
- vec_add1 (conf->eal_init_args, s); \
- conf->a##_set_manually = 1; \
- }
- foreach_eal_single_hyphen_mandatory_arg
-#undef _
else if (unformat (input, "default"))
;
else if (unformat_skip_white_space (input))
;
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
+ else return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
- goto done;
- }
}
if (!conf->uio_driver_name)
@@ -1547,7 +1260,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
default_hugepage_sz = clib_mem_get_default_hugepage_size ();
- /* *INDENT-OFF* */
clib_bitmap_foreach (x, tm->cpu_socket_bitmap)
{
clib_error_t *e;
@@ -1560,7 +1272,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
if ((e = clib_sysfs_prealloc_hugepages(x, 0, n_pages)))
clib_error_report (e);
}
- /* *INDENT-ON* */
}
/* on/off dpdk's telemetry thread */
@@ -1577,41 +1288,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
vec_add1 (conf->eal_init_args, tmp);
}
- if (error)
- return error;
-
- /* I'll bet that -c and -n must be the first and second args... */
- if (!conf->coremask_set_manually)
- {
- vlib_thread_registration_t *tr;
- uword *coremask = 0;
- int i;
-
- /* main thread core */
- coremask = clib_bitmap_set (coremask, tm->main_lcore, 1);
-
- for (i = 0; i < vec_len (tm->registrations); i++)
- {
- tr = tm->registrations[i];
- coremask = clib_bitmap_or (coremask, tr->coremask);
- }
-
- vec_insert (conf->eal_init_args, 2, 1);
- conf->eal_init_args[1] = (u8 *) "-c";
- tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0);
- conf->eal_init_args[2] = tmp;
- clib_bitmap_free (coremask);
- }
-
- if (!conf->nchannels_set_manually)
- {
- vec_insert (conf->eal_init_args, 2, 3);
- conf->eal_init_args[3] = (u8 *) "-n";
- tmp = format (0, "%d", conf->nchannels);
- vec_terminate_c_string (tmp);
- conf->eal_init_args[4] = tmp;
- }
-
if (no_pci == 0 && geteuid () == 0)
dpdk_bind_devices_to_uio (conf);
@@ -1622,15 +1298,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
if (devconf->x == 0 && conf->default_devconf.x > 0) \
devconf->x = conf->default_devconf.x ;
- /* *INDENT-OFF* */
pool_foreach (devconf, conf->dev_confs) {
/* default per-device config items */
foreach_dpdk_device_config_item
- /* copy vlan_strip config from default device */
- _ (vlan_strip_offload)
-
/* copy tso config from default device */
_ (tso)
@@ -1640,56 +1312,57 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
/* copy rss_queues config from default device */
_ (rss_queues)
- /* add DPDK EAL whitelist/blacklist entry */
- if (num_whitelisted > 0 && devconf->is_blacklisted == 0 &&
- devconf->dev_addr_type == VNET_DEV_ADDR_PCI)
- {
- tmp = format (0, "-a%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- if (devconf->devargs)
+ /* assume that default is PCI */
+ fmt_func = format_vlib_pci_addr;
+ fmt_addr = &devconf->pci_addr;
+
+ if (devconf->dev_addr_type == VNET_DEV_ADDR_VMBUS)
+ {
+ fmt_func = format_vlib_vmbus_addr;
+ fmt_addr = &devconf->vmbus_addr;
+ }
+
+ /* add DPDK EAL whitelist/blacklist entry */
+ if (num_whitelisted > 0 && devconf->is_blacklisted == 0)
+ {
+ tmp = format (0, "-a%c", 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ if (devconf->devargs)
{
- tmp = format (0, "%U,%s%c", format_vlib_pci_addr,
- &devconf->pci_addr, devconf->devargs, 0);
+ tmp =
+ format (0, "%U,%s%c", fmt_func, fmt_addr, devconf->devargs, 0);
}
else
{
- tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
+ tmp = format (0, "%U%c", fmt_func, fmt_addr, 0);
}
vec_add1 (conf->eal_init_args, tmp);
- }
- else if (num_whitelisted == 0 && devconf->is_blacklisted != 0 &&
- devconf->dev_addr_type == VNET_DEV_ADDR_PCI)
- {
- tmp = format (0, "-b%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
- vec_add1 (conf->eal_init_args, tmp);
- }
+ }
+ else if (num_whitelisted == 0 && devconf->is_blacklisted != 0)
+ {
+ tmp = format (0, "-b%c", 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ tmp = format (0, "%U%c", fmt_func, fmt_addr, 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ }
}
- /* *INDENT-ON* */
#undef _
- /* set master-lcore */
- tmp = format (0, "--main-lcore%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- tmp = format (0, "%u%c", tm->main_lcore, 0);
- vec_add1 (conf->eal_init_args, tmp);
-
-
if (socket_mem)
clib_warning ("socket-mem argument is deprecated");
/* NULL terminate the "argv" vector, in case of stupidity */
vec_add1 (conf->eal_init_args, 0);
- _vec_len (conf->eal_init_args) -= 1;
+ vec_dec_len (conf->eal_init_args, 1);
/* Set up DPDK eal and packet mbuf pool early. */
int log_fds[2] = { 0 };
if (pipe (log_fds) == 0)
{
- if (fcntl (log_fds[1], F_SETFL, O_NONBLOCK) == 0)
+ if (fcntl (log_fds[0], F_SETFL, O_NONBLOCK) == 0 &&
+ fcntl (log_fds[1], F_SETFL, O_NONBLOCK) == 0)
{
FILE *f = fdopen (log_fds[1], "a");
if (f && rte_openlog_stream (f) == 0)
@@ -1720,6 +1393,8 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
dpdk_log_notice ("EAL init args: %s", conf->eal_init_args_str);
ret = rte_eal_init (vec_len (conf->eal_init_args),
(char **) conf->eal_init_args);
+ if (ret < 0)
+ return clib_error_return (0, "rte_eal_init returned %d", ret);
/* enable the AVX-512 vPMDs in DPDK */
if (clib_cpu_supports_avx512_bitalg () &&
@@ -1731,20 +1406,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
RTE_VECT_SIMD_256 :
RTE_VECT_SIMD_512);
- /* lazy umount hugepages */
- umount2 ((char *) huge_dir_path, MNT_DETACH);
- rmdir ((char *) huge_dir_path);
- vec_free (huge_dir_path);
-
- if (ret < 0)
- return clib_error_return (0, "rte_eal_init returned %d", ret);
-
/* main thread 1st */
if ((error = dpdk_buffer_pools_create (vm)))
return error;
-done:
- return error;
+ return 0;
}
VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk");
@@ -1757,10 +1423,6 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now)
u32 hw_flags = 0;
u8 hw_flags_chg = 0;
- /* only update link state for PMD interfaces */
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- return;
-
xd->time_last_link_update = now ? now : xd->time_last_link_update;
clib_memset (&xd->link, 0, sizeof (xd->link));
rte_eth_link_get_nowait (xd->port_id, &xd->link);
@@ -1788,35 +1450,32 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now)
ed->new_link_state = (u8) xd->link.link_status;
}
- if ((xd->link.link_duplex != prev_link.link_duplex))
+ hw_flags_chg = ((xd->link.link_duplex != prev_link.link_duplex) ||
+ (xd->link.link_status != prev_link.link_status));
+
+ if (xd->link.link_speed != prev_link.link_speed)
+ vnet_hw_interface_set_link_speed (vnm, xd->hw_if_index,
+ (xd->link.link_speed == UINT32_MAX) ?
+ UINT32_MAX :
+ xd->link.link_speed * 1000);
+
+ if (hw_flags_chg)
{
- hw_flags_chg = 1;
+ if (xd->link.link_status)
+ hw_flags |= VNET_HW_INTERFACE_FLAG_LINK_UP;
+
switch (xd->link.link_duplex)
{
- case ETH_LINK_HALF_DUPLEX:
+ case RTE_ETH_LINK_HALF_DUPLEX:
hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX;
break;
- case ETH_LINK_FULL_DUPLEX:
+ case RTE_ETH_LINK_FULL_DUPLEX:
hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX;
break;
default:
break;
}
- }
- if (xd->link.link_speed != prev_link.link_speed)
- vnet_hw_interface_set_link_speed (vnm, xd->hw_if_index,
- xd->link.link_speed * 1000);
-
- if (xd->link.link_status != prev_link.link_status)
- {
- hw_flags_chg = 1;
-
- if (xd->link.link_status)
- hw_flags |= VNET_HW_INTERFACE_FLAG_LINK_UP;
- }
- if (hw_flags_chg)
- {
if (LINK_STATE_ELOGS)
{
ELOG_TYPE_DECLARE (e) =
@@ -1846,6 +1505,7 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
dpdk_device_t *xd;
vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_worker_thread_barrier_sync (vm);
error = dpdk_lib_init (dm);
if (error)
@@ -1862,6 +1522,7 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
}
}
+ vlib_worker_thread_barrier_release (vm);
tm->worker_thread_release = 1;
f64 now = vlib_time_now (vm);
@@ -1870,16 +1531,17 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
dpdk_update_link_state (xd, now);
}
+ f64 timeout =
+ clib_min (dm->link_state_poll_interval, dm->stat_poll_interval);
+
while (1)
{
- /*
- * check each time through the loop in case intervals are changed
- */
- f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ?
- dm->link_state_poll_interval : dm->stat_poll_interval;
-
+ f64 min_wait = clib_max (timeout, DPDK_MIN_POLL_INTERVAL);
vlib_process_wait_for_event_or_clock (vm, min_wait);
+ timeout =
+ clib_min (dm->link_state_poll_interval, dm->stat_poll_interval);
+
if (dm->admin_up_down_in_progress)
/* skip the poll if an admin up down is in progress (on any interface) */
continue;
@@ -1893,19 +1555,25 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
dpdk_update_link_state (xd, now);
}
- }
+ now = vlib_time_now (vm);
+ vec_foreach (xd, dm->devices)
+ {
+ timeout = clib_min (timeout, xd->time_last_stats_update +
+ dm->stat_poll_interval - now);
+ timeout = clib_min (timeout, xd->time_last_link_update +
+ dm->link_state_poll_interval - now);
+ }
+ }
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dpdk_process_node,static) = {
.function = dpdk_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "dpdk-process",
.process_log2_n_stack_bytes = 17,
};
-/* *INDENT-ON* */
static clib_error_t *
dpdk_init (vlib_main_t * vm)
@@ -1924,35 +1592,28 @@ dpdk_init (vlib_main_t * vm)
dpdk_cli_reference ();
- dm->vlib_main = vm;
- dm->vnet_main = vnet_get_main ();
dm->conf = &dpdk_config_main;
- dm->conf->nchannels = 4;
vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet");
- /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */
- dm->buffer_flags_template = (VLIB_BUFFER_TOTAL_LENGTH_VALID |
- VLIB_BUFFER_EXT_HDR_VALID |
- VNET_BUFFER_F_L4_CHECKSUM_COMPUTED |
- VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
-
dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL;
dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL;
dm->log_default = vlib_log_register_class ("dpdk", 0);
dm->log_cryptodev = vlib_log_register_class ("dpdk", "cryptodev");
- dm->log_ipsec = vlib_log_register_class ("dpdk", "ipsec");
return error;
}
VLIB_INIT_FUNCTION (dpdk_init);
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+static clib_error_t *
+dpdk_worker_thread_init (vlib_main_t *vm)
+{
+ if (rte_thread_register () < 0)
+ clib_panic ("dpdk: cannot register thread %u - %s", vm->thread_index,
+ rte_strerror (rte_errno));
+ return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (dpdk_worker_thread_init);
diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c
index 25222856912..ca1690b708f 100644
--- a/src/plugins/dpdk/device/node.c
+++ b/src/plugins/dpdk/device/node.c
@@ -23,10 +23,10 @@
#include <dpdk/device/dpdk.h>
#include <vnet/classify/vnet_classify.h>
#include <vnet/mpls/packet.h>
-#include <vnet/handoff.h>
#include <vnet/devices/devices.h>
#include <vnet/interface/rx_queue_funcs.h>
#include <vnet/feature/feature.h>
+#include <vnet/tcp/tcp_packet.h>
#include <dpdk/device/dpdk_priv.h>
@@ -36,9 +36,13 @@ static char *dpdk_error_strings[] = {
#undef _
};
-/* make sure all flags we need are stored in lower 8 bits */
-STATIC_ASSERT ((PKT_RX_IP_CKSUM_BAD | PKT_RX_FDIR) <
- 256, "dpdk flags not un lower byte, fix needed");
+/* make sure all flags we need are stored in lower 32 bits */
+STATIC_ASSERT ((u64) (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD |
+ RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_LRO) < (1ULL << 32),
+ "dpdk flags not in lower word, fix needed");
+
+STATIC_ASSERT (RTE_MBUF_F_RX_L4_CKSUM_BAD == (1ULL << 3),
+ "bit number of RTE_MBUF_F_RX_L4_CKSUM_BAD is no longer 3!");
static_always_inline uword
dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b,
@@ -97,13 +101,13 @@ dpdk_prefetch_buffer_x4 (struct rte_mbuf *mb[])
{
vlib_buffer_t *b;
b = vlib_buffer_from_rte_mbuf (mb[0]);
- clib_prefetch_load (b);
+ clib_prefetch_store (b);
b = vlib_buffer_from_rte_mbuf (mb[1]);
- clib_prefetch_load (b);
+ clib_prefetch_store (b);
b = vlib_buffer_from_rte_mbuf (mb[2]);
- clib_prefetch_load (b);
+ clib_prefetch_store (b);
b = vlib_buffer_from_rte_mbuf (mb[3]);
- clib_prefetch_load (b);
+ clib_prefetch_store (b);
}
/** \brief Main DPDK input node
@@ -125,18 +129,18 @@ dpdk_prefetch_buffer_x4 (struct rte_mbuf *mb[])
@em Uses:
- <code>struct rte_mbuf mb->ol_flags</code>
- - PKT_RX_IP_CKSUM_BAD
+ - RTE_MBUF_F_RX_IP_CKSUM_BAD
@em Sets:
- <code>b->error</code> if the packet is to be dropped immediately
- <code>b->current_data, b->current_length</code>
- - adjusted as needed to skip the L2 header in direct-dispatch cases
+ - adjusted as needed to skip the L2 header in direct-dispatch cases
- <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
- - rx interface sw_if_index
+ - rx interface sw_if_index
- <code>vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0</code>
- - required by ipX-lookup
+ - required by ipX-lookup
- <code>b->flags</code>
- - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc.
+ - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc.
<em>Next Nodes:</em>
- Static arcs to: error-drop, ethernet-input,
@@ -145,31 +149,30 @@ dpdk_prefetch_buffer_x4 (struct rte_mbuf *mb[])
<code>xd->per_interface_next_index</code>
*/
-static_always_inline u16
-dpdk_ol_flags_extract (struct rte_mbuf **mb, u16 * flags, int count)
+static_always_inline u32
+dpdk_ol_flags_extract (struct rte_mbuf **mb, u32 *flags, int count)
{
- u16 rv = 0;
+ u32 rv = 0;
int i;
for (i = 0; i < count; i++)
{
/* all flags we are interested in are in lower 8 bits but
that might change */
- flags[i] = (u16) mb[i]->ol_flags;
+ flags[i] = (u32) mb[i]->ol_flags;
rv |= flags[i];
}
return rv;
}
static_always_inline uword
-dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd,
- uword n_rx_packets, int maybe_multiseg,
- u16 * or_flagsp)
+dpdk_process_rx_burst (vlib_main_t *vm, dpdk_per_thread_data_t *ptd,
+ uword n_rx_packets, int maybe_multiseg, u32 *or_flagsp)
{
u32 n_left = n_rx_packets;
vlib_buffer_t *b[4];
struct rte_mbuf **mb = ptd->mbufs;
uword n_bytes = 0;
- u16 *flags, or_flags = 0;
+ u32 *flags, or_flags = 0;
vlib_buffer_t bt;
mb = ptd->mbufs;
@@ -254,7 +257,7 @@ dpdk_process_flow_offload (dpdk_device_t * xd, dpdk_per_thread_data_t * ptd,
/* TODO prefetch and quad-loop */
for (n = 0; n < n_rx_packets; n++)
{
- if ((ptd->flags[n] & PKT_RX_FDIR_ID) == 0)
+ if ((ptd->flags[n] & RTE_MBUF_F_RX_FDIR_ID) == 0)
continue;
fle = pool_elt_at_index (xd->flow_lookup_entries,
@@ -277,6 +280,65 @@ dpdk_process_flow_offload (dpdk_device_t * xd, dpdk_per_thread_data_t * ptd,
}
}
+static_always_inline u16
+dpdk_lro_find_l4_hdr_sz (vlib_buffer_t *b)
+{
+ u16 l4_hdr_sz = 0;
+ u16 current_offset = 0;
+ ethernet_header_t *e;
+ tcp_header_t *tcp;
+ u8 *data = vlib_buffer_get_current (b);
+ u16 ethertype;
+ e = (void *) data;
+ current_offset += sizeof (e[0]);
+ ethertype = clib_net_to_host_u16 (e->type);
+ if (ethernet_frame_is_tagged (ethertype))
+ {
+ ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (e + 1);
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ current_offset += sizeof (*vlan);
+ if (ethertype == ETHERNET_TYPE_VLAN)
+ {
+ vlan++;
+ current_offset += sizeof (*vlan);
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ }
+ }
+ data += current_offset;
+ if (ethertype == ETHERNET_TYPE_IP4)
+ {
+ data += sizeof (ip4_header_t);
+ tcp = (void *) data;
+ l4_hdr_sz = tcp_header_bytes (tcp);
+ }
+ else
+ {
+ /* FIXME: extension headers...*/
+ data += sizeof (ip6_header_t);
+ tcp = (void *) data;
+ l4_hdr_sz = tcp_header_bytes (tcp);
+ }
+ return l4_hdr_sz;
+}
+
+static_always_inline void
+dpdk_process_lro_offload (dpdk_device_t *xd, dpdk_per_thread_data_t *ptd,
+ uword n_rx_packets)
+{
+ uword n;
+ vlib_buffer_t *b0;
+ for (n = 0; n < n_rx_packets; n++)
+ {
+ b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]);
+ if (ptd->flags[n] & RTE_MBUF_F_RX_LRO)
+ {
+ b0->flags |= VNET_BUFFER_F_GSO;
+ vnet_buffer2 (b0)->gso_size = ptd->mbufs[n]->tso_segsz;
+ vnet_buffer2 (b0)->gso_l4_hdr_sz = dpdk_lro_find_l4_hdr_sz (b0);
+ }
+ }
+}
+
static_always_inline u32
dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
vlib_node_runtime_t * node, u32 thread_index, u16 queue_id)
@@ -289,7 +351,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
struct rte_mbuf **mb;
vlib_buffer_t *b0;
u16 *next;
- u16 or_flags;
+ u32 or_flags;
u32 n;
int single_next = 0;
@@ -303,12 +365,13 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
/* get up to DPDK_RX_BURST_SZ buffers from PMD */
while (n_rx_packets < DPDK_RX_BURST_SZ)
{
- n = rte_eth_rx_burst (xd->port_id, queue_id,
- ptd->mbufs + n_rx_packets,
- DPDK_RX_BURST_SZ - n_rx_packets);
+ u32 n_to_rx = clib_min (DPDK_RX_BURST_SZ - n_rx_packets, 32);
+
+ n = rte_eth_rx_burst (xd->port_id, queue_id, ptd->mbufs + n_rx_packets,
+ n_to_rx);
n_rx_packets += n;
- if (n < 32)
+ if (n < n_to_rx)
break;
}
@@ -318,6 +381,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
/* Update buffer template */
vnet_buffer (bt)->sw_if_index[VLIB_RX] = xd->sw_if_index;
bt->error = node->errors[DPDK_ERROR_NONE];
+ bt->flags = xd->buffer_flags;
/* as DPDK is allocating empty buffers from mempool provided before interface
start for each queue, it is safe to store this in the template */
bt->buffer_pool_index = rxq->buffer_pool_index;
@@ -332,14 +396,34 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
/* as all packets belong to the same interface feature arc lookup
can be don once and result stored in the buffer template */
if (PREDICT_FALSE (vnet_device_input_have_features (xd->sw_if_index)))
- vnet_feature_start_device_input_x1 (xd->sw_if_index, &next_index, bt);
+ vnet_feature_start_device_input (xd->sw_if_index, &next_index, bt);
if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG)
n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 1, &or_flags);
else
n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 0, &or_flags);
- if (PREDICT_FALSE (or_flags & PKT_RX_FDIR))
+ if (PREDICT_FALSE ((or_flags & RTE_MBUF_F_RX_LRO)))
+ dpdk_process_lro_offload (xd, ptd, n_rx_packets);
+
+ if (PREDICT_FALSE ((or_flags & RTE_MBUF_F_RX_L4_CKSUM_BAD) &&
+ (xd->buffer_flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT)))
+ {
+ for (n = 0; n < n_rx_packets; n++)
+ {
+ /* Check and reset VNET_BUFFER_F_L4_CHECKSUM_CORRECT flag
+ if RTE_MBUF_F_RX_L4_CKSUM_BAD is set.
+ The magic num 3 is the bit number of RTE_MBUF_F_RX_L4_CKSUM_BAD
+ which is defined in DPDK.
+ Have made a STATIC_ASSERT in this file to ensure this.
+ */
+ b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]);
+ b0->flags ^= (ptd->flags[n] & RTE_MBUF_F_RX_L4_CKSUM_BAD)
+ << (VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT - 3);
+ }
+ }
+
+ if (PREDICT_FALSE (or_flags & RTE_MBUF_F_RX_FDIR))
{
/* some packets will need to go to different next nodes */
for (n = 0; n < n_rx_packets; n++)
@@ -348,7 +432,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
/* flow offload - process if rx flow offload enabled and at least one
packet is marked */
if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) &&
- (or_flags & PKT_RX_FDIR)))
+ (or_flags & RTE_MBUF_F_RX_FDIR)))
dpdk_process_flow_offload (xd, ptd, n_rx_packets);
/* enqueue buffers to the next node */
@@ -385,7 +469,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
marked as ip4 checksum bad we can notify ethernet input so it
can send pacets to ip4-input-no-checksum node */
if (xd->flags & DPDK_DEVICE_FLAG_RX_IP4_CKSUM &&
- (or_flags & PKT_RX_IP_CKSUM_BAD) == 0)
+ (or_flags & RTE_MBUF_F_RX_IP_CKSUM_BAD) == 0)
f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
vlib_frame_no_append (f);
}
@@ -459,7 +543,7 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
dpdk_device_t *xd;
uword n_rx_packets = 0;
vnet_hw_if_rxq_poll_vector_t *pv;
- u32 thread_index = node->thread_index;
+ u32 thread_index = vm->thread_index;
/*
* Poll all devices on this cpu for input/interrupts.
@@ -476,7 +560,6 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return n_rx_packets;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dpdk_input_node) = {
.type = VLIB_NODE_TYPE_INPUT,
.name = "dpdk-input",
@@ -492,7 +575,6 @@ VLIB_REGISTER_NODE (dpdk_input_node) = {
.n_errors = DPDK_N_ERROR,
.error_strings = dpdk_error_strings,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c
index 413a0f0df9d..9781d0ed7f0 100644
--- a/src/plugins/dpdk/main.c
+++ b/src/plugins/dpdk/main.c
@@ -13,13 +13,6 @@
* limitations under the License.
*/
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <linux/vfio.h>
-#include <sys/ioctl.h>
-
#include <vnet/vnet.h>
#include <vnet/plugin/plugin.h>
#include <dpdk/device/dpdk.h>
@@ -79,19 +72,14 @@ rte_delay_us_override_cb (unsigned us)
static clib_error_t * dpdk_main_init (vlib_main_t * vm)
{
- dpdk_main_t * dm = &dpdk_main;
clib_error_t * error = 0;
- dm->vlib_main = vm;
- dm->vnet_main = vnet_get_main ();
-
/* register custom delay function */
rte_delay_us_callback_register (rte_delay_us_override_cb);
return error;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (dpdk_main_init) =
{
.runs_after = VLIB_INITS("dpdk_init"),
@@ -101,4 +89,3 @@ VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Data Plane Development Kit (DPDK)",
};
-/* *INDENT-ON* */
diff --git a/src/plugins/dpdk/thread.c b/src/plugins/dpdk/thread.c
deleted file mode 100644
index 3a3fcc6cea6..00000000000
--- a/src/plugins/dpdk/thread.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2017 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <rte_config.h>
-
-#include <rte_common.h>
-#include <rte_log.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_tailq.h>
-#include <rte_eal.h>
-#include <rte_per_lcore.h>
-#include <rte_launch.h>
-#include <rte_atomic.h>
-#include <rte_cycles.h>
-#include <rte_prefetch.h>
-#include <rte_lcore.h>
-#include <rte_per_lcore.h>
-#include <rte_branch_prediction.h>
-#include <rte_interrupts.h>
-#include <rte_pci.h>
-#include <rte_random.h>
-#include <rte_debug.h>
-#include <rte_ether.h>
-#include <rte_ethdev.h>
-#include <rte_ring.h>
-#include <rte_mempool.h>
-#include <rte_mbuf.h>
-#include <rte_version.h>
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <dpdk/device/dpdk.h>
-#include <dpdk/device/dpdk_priv.h>
-
-static clib_error_t *
-dpdk_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id)
-{
- int r;
- r = rte_eal_remote_launch (fp, (void *) w, lcore_id);
- if (r)
- return clib_error_return (0, "Failed to launch thread %u", lcore_id);
- return 0;
-}
-
-static clib_error_t *
-dpdk_thread_set_lcore (u32 thread, u16 lcore)
-{
- return 0;
-}
-
-static vlib_thread_callbacks_t callbacks = {
- .vlib_launch_thread_cb = &dpdk_launch_thread,
- .vlib_thread_set_lcore_cb = &dpdk_thread_set_lcore,
-};
-
-static clib_error_t *
-dpdk_thread_init (vlib_main_t * vm)
-{
- vlib_thread_cb_register (vm, &callbacks);
- return 0;
-}
-
-VLIB_INIT_FUNCTION (dpdk_thread_init);
-
-/** @endcond */
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/fateshare/CMakeLists.txt b/src/plugins/fateshare/CMakeLists.txt
new file mode 100644
index 00000000000..4916d1ffbaf
--- /dev/null
+++ b/src/plugins/fateshare/CMakeLists.txt
@@ -0,0 +1,25 @@
+
+# Copyright (c) 2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(fateshare
+ SOURCES
+ fateshare.c
+ fateshare.h
+)
+
+add_vpp_executable(vpp_fateshare_monitor
+ SOURCES vpp_fateshare_monitor.c
+ LINK_LIBRARIES vppinfra
+)
+
diff --git a/src/plugins/fateshare/fateshare.c b/src/plugins/fateshare/fateshare.c
new file mode 100644
index 00000000000..971d32303db
--- /dev/null
+++ b/src/plugins/fateshare/fateshare.c
@@ -0,0 +1,309 @@
+/*
+ * fateshare.c - skeleton vpp engine plug-in
+ *
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <vppinfra/unix.h>
+#include <fateshare/fateshare.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp/app/version.h>
+#include <stdbool.h>
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#ifdef __linux__
+#include <sys/prctl.h> // prctl(), PR_SET_PDEATHSIG
+#else
+#include <sys/procctl.h>
+#endif /* __linux__ */
+#include <limits.h>
+
+fateshare_main_t fateshare_main;
+
+/* Action function shared between message handler and debug CLI */
+
+static void
+child_handler (int sig)
+{
+ pid_t pid;
+ int status;
+ fateshare_main_t *kmp = &fateshare_main;
+
+ while ((pid = waitpid (-1, &status, WNOHANG)) > 0)
+ {
+ if (pid == kmp->monitor_pid)
+ {
+ clib_warning ("Monitor child %d exited with status %d!", pid,
+ status);
+ kmp->vlib_main->main_loop_exit_now = 1;
+ }
+ else
+ {
+ clib_warning ("child %d exited with status %d!", pid, status);
+ }
+ }
+}
+
+clib_error_t *
+launch_monitor (fateshare_main_t *kmp)
+{
+ clib_error_t *error = 0;
+ pid_t ppid_before_fork = getpid ();
+ pid_t cpid = fork ();
+ if (cpid == -1)
+ {
+ perror (0);
+ error = clib_error_return (0, "can not fork");
+ goto done;
+ }
+ clib_warning ("fateshare about to launch monitor %v.", kmp->monitor_cmd);
+ int logfd =
+ open ((char *) kmp->monitor_logfile, O_APPEND | O_RDWR | O_CREAT, 0777);
+ if (logfd < 0)
+ {
+ error = clib_error_return (0, "can not open log file");
+ goto done;
+ }
+ if (cpid)
+ {
+ /* parent */
+ kmp->monitor_pid = cpid;
+ close (logfd);
+ return 0;
+ }
+ else
+ {
+ dup2 (logfd, 1);
+ dup2 (logfd, 2);
+#ifdef __linux__
+ int r = prctl (PR_SET_PDEATHSIG, SIGTERM);
+ if (r == -1)
+ {
+ perror (0);
+ exit (1);
+ }
+#else
+ int r, s = SIGTERM;
+
+ r = procctl (P_PID, 0, PROC_PDEATHSIG_CTL, &s);
+ if (r == -1)
+ {
+ perror (0);
+ exit (1);
+ }
+#endif /* __linux__ */
+ pid_t current_ppid = getppid ();
+ if (current_ppid != ppid_before_fork)
+ {
+ fprintf (stderr, "parent pid changed while starting (%d => %d)\n",
+ ppid_before_fork, current_ppid);
+ if (current_ppid == 1)
+ {
+ fprintf (stderr, "exiting.\n");
+ exit (1);
+ }
+ }
+
+ int r1 = setpgid (getpid (), 0);
+ if (r1 != 0)
+ {
+ perror ("setpgid error");
+ exit (1);
+ }
+
+ u8 *scmd = format (0, "%v\0", kmp->monitor_cmd);
+ u8 *logfile_base = format (0, "%v\0", kmp->monitor_logfile);
+ int fd = logfd - 1;
+ while (fd > 2)
+ {
+ close (fd);
+ fd--;
+ }
+
+ fd = open ("/dev/null", O_RDONLY);
+ if (fd < 0)
+ {
+ exit (1);
+ }
+ dup2 (fd, 0);
+
+ char *ppid_str = (char *) format (0, "%lld\0", current_ppid);
+
+ char **argv = 0;
+ vec_validate (argv, vec_len (kmp->commands) + 3 - 1);
+ argv[0] = (void *) scmd;
+ argv[1] = ppid_str;
+ argv[2] = (char *) logfile_base;
+ int i;
+ vec_foreach_index (i, kmp->commands)
+ {
+ argv[3 + i] = (char *) kmp->commands[i];
+ }
+
+ int res = execv (argv[0], argv);
+ clib_warning ("ERROR during execve: %d", res);
+ perror ("execve");
+
+ exit (0);
+ }
+done:
+
+ return error;
+}
+
+static clib_error_t *
+fateshare_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ fateshare_main_t *fmp = &fateshare_main;
+ u8 *command = 0;
+ u8 **new_command = 0;
+ clib_error_t *error = 0;
+
+ /* unix config may make vpp fork, we want to run after that. */
+ if ((error = vlib_call_config_function (vm, unix_config)))
+ return error;
+
+ /* Defaults */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "monitor %s", &fmp->monitor_cmd))
+ {
+ clib_warning ("setting monitor to %v", fmp->monitor_cmd);
+ }
+ else if (unformat (input, "logfile %s", &fmp->monitor_logfile))
+ {
+ clib_warning ("setting logfile to %v", fmp->monitor_logfile);
+ }
+ else if (unformat (input, "command %s", &command))
+ {
+ vec_add2 (fmp->commands, new_command, 1);
+ *new_command = command;
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ vec_add2 (fmp->commands, new_command, 1);
+ *new_command = 0;
+
+ /* Establish handler. */
+ struct sigaction sa;
+ sigemptyset (&sa.sa_mask);
+ sa.sa_flags = 0;
+ sa.sa_handler = child_handler;
+
+ sigaction (SIGCHLD, &sa, NULL);
+
+ if (fmp->monitor_cmd == 0)
+ {
+ char *p;
+ u8 *path;
+
+ /* find executable path */
+ path = os_get_exec_path ();
+
+ if (path == 0)
+ return clib_error_return (
+ 0, "could not get exec path - set monitor manually");
+
+ /* add null termination */
+ vec_add1 (path, 0);
+
+ /* strip filename */
+ if ((p = strrchr ((char *) path, '/')) == 0)
+ {
+ vec_free (path);
+ return clib_error_return (
+ 0, "could not determine vpp directory - set monitor manually");
+ }
+ *p = 0;
+
+ fmp->monitor_cmd = format (0, "%s/vpp_fateshare_monitor\0", path);
+ vec_free (path);
+ }
+ if (fmp->monitor_logfile == 0)
+ {
+ fmp->monitor_logfile =
+ format (0, "/tmp/vpp-fateshare-monitor-log.txt\0");
+ }
+ error = launch_monitor (fmp);
+
+ return error;
+}
+
+clib_error_t *
+fateshare_init (vlib_main_t *vm)
+{
+ fateshare_main_t *kmp = &fateshare_main;
+ clib_error_t *error = 0;
+
+ kmp->vlib_main = vm;
+
+ return error;
+}
+
+static clib_error_t *
+fateshare_send_hup_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ clib_error_t *error = 0;
+ fateshare_main_t *kmp = &fateshare_main;
+
+ if (kmp->monitor_pid)
+ {
+ int rc = kill (kmp->monitor_pid, SIGHUP);
+ if (rc)
+ {
+ error = clib_error_return (
+ 0, "can not send signal to monitor process: %s", strerror (errno));
+ }
+ }
+ else
+ {
+ error = clib_error_return (0, "can not find monitor process");
+ }
+
+ return error;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (fateshare_config, "fateshare");
+
+VLIB_INIT_FUNCTION (fateshare_init);
+
+VLIB_CLI_COMMAND (fateshare_restart_process_command, static) = {
+ .path = "fateshare restart-processes",
+ .short_help = "restart dependent processes",
+ .function = fateshare_send_hup_fn,
+};
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Run child processes which will share fate with VPP, restart "
+ "them if they quit",
+ .default_disabled = 1,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/fateshare/fateshare.h b/src/plugins/fateshare/fateshare.h
new file mode 100644
index 00000000000..4ad7ac1df16
--- /dev/null
+++ b/src/plugins/fateshare/fateshare.h
@@ -0,0 +1,48 @@
+
+/*
+ * fateshare.h - skeleton vpp engine plug-in header file
+ *
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_fateshare_h__
+#define __included_fateshare_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+
+typedef struct
+{
+ /* convenience */
+ vlib_main_t *vlib_main;
+
+ u8 *monitor_cmd;
+ u8 *monitor_logfile;
+ pid_t monitor_pid;
+ u8 **commands;
+} fateshare_main_t;
+
+extern fateshare_main_t fateshare_main;
+
+#endif /* __included_fateshare_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/fateshare/vpp_fateshare_monitor.c b/src/plugins/fateshare/vpp_fateshare_monitor.c
new file mode 100644
index 00000000000..7af451ccffe
--- /dev/null
+++ b/src/plugins/fateshare/vpp_fateshare_monitor.c
@@ -0,0 +1,289 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#ifdef __linux__
+#include <sys/prctl.h> // prctl(), PR_SET_PDEATHSIG
+#else
+#include <signal.h>
+#include <sys/procctl.h>
+#endif /* __linux__ */
+
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <limits.h>
+
+typedef struct
+{
+ pid_t pid;
+ char *cmd;
+} child_record_t;
+
+int n_children = 0;
+child_record_t *children = NULL;
+
+static void
+child_handler (int sig)
+{
+ pid_t pid;
+ int status;
+
+ while ((pid = waitpid (-1, &status, WNOHANG)) > 0)
+ {
+ int i;
+ printf ("fateshare: pid %d quit with status %d\n", pid, status);
+ for (i = 0; i < n_children; i++)
+ {
+ if (children[i].pid == pid)
+ {
+ children[i].pid = 0;
+ }
+ }
+ }
+}
+
+static void
+term_handler (int sig)
+{
+ int i;
+
+ printf ("fateshare: terminating!\n");
+ for (i = 0; i < n_children; i++)
+ {
+ kill (-children[i].pid, SIGTERM);
+ }
+ exit (0);
+}
+
+static void
+hup_handler (int sig)
+{
+ int i;
+
+ printf ("fateshare: terminating all the child processes!\n");
+ for (i = 0; i < n_children; i++)
+ {
+ kill (-children[i].pid, SIGTERM);
+ }
+}
+
+pid_t
+launch_command (char *scmd, char *logname_base)
+{
+ pid_t ppid_before_fork = getpid ();
+ pid_t cpid = fork ();
+ if (cpid == -1)
+ {
+ perror ("fork");
+ sleep (1);
+ return 0;
+ }
+ if (cpid)
+ {
+ /* parent */
+ return cpid;
+ }
+
+ /* child */
+#ifdef __linux__
+ int r = prctl (PR_SET_PDEATHSIG, SIGTERM);
+ if (r == -1)
+ {
+ perror ("prctl");
+ sleep (5);
+ exit (1);
+ }
+#else
+ int r, s = SIGTERM;
+
+ r = procctl (P_PID, 0, PROC_PDEATHSIG_CTL, &s);
+ if (r == -1)
+ {
+ perror ("procctl");
+ exit (1);
+ }
+#endif /* __linux__ */
+
+ if (getppid () != ppid_before_fork)
+ {
+ sleep (5);
+ exit (1);
+ }
+
+ int r1 = setpgid (getpid (), 0);
+ if (r1 != 0)
+ {
+ perror ("setpgid error");
+ sleep (5);
+ exit (1);
+ }
+
+ int fd = open ("/dev/null", O_RDONLY);
+ if (fd < 0)
+ {
+ sleep (5);
+ exit (1);
+ }
+ while (fd >= 0)
+ {
+ close (fd);
+ fd--;
+ }
+ fd = open ("/dev/null", O_RDONLY);
+ if (fd < 0)
+ {
+ sleep (5);
+ exit (1);
+ }
+ dup2 (fd, 0);
+
+ char logname_stdout[PATH_MAX];
+ char logname_stderr[PATH_MAX];
+
+ snprintf (logname_stdout, PATH_MAX - 1, "%s-stdout.txt", logname_base);
+ snprintf (logname_stderr, PATH_MAX - 1, "%s-stderr.txt", logname_base);
+
+ printf ("LOG STDOUT %s: %s\n", scmd, logname_stdout);
+ printf ("LOG STDERR %s: %s\n", scmd, logname_stderr);
+
+ fd = open ((char *) logname_stdout, O_APPEND | O_RDWR | O_CREAT, 0777);
+ if (fd < 0)
+ {
+ sleep (5);
+ exit (1);
+ }
+ dup2 (fd, 1);
+ fd = open ((char *) logname_stderr, O_APPEND | O_RDWR | O_CREAT, 0777);
+ if (fd < 0)
+ {
+ sleep (5);
+ exit (1);
+ }
+ dup2 (fd, 2);
+
+ char *argv[] = { (char *) scmd, 0 };
+ int res = execv (argv[0], argv);
+ if (res != 0)
+ {
+ perror ("execve");
+ }
+ sleep (10);
+
+ exit (42);
+}
+
+int
+main (int argc, char **argv)
+{
+ pid_t ppid = getppid ();
+ int i = 0;
+ if (argc < 3)
+ {
+ printf ("usage: %s <parent_pid> <logfile-basename>\n", argv[0]);
+ exit (1);
+ }
+ char *errptr = 0;
+ pid_t parent_pid = strtoll (argv[1], &errptr, 10);
+ char *logname_base = argv[2];
+
+ printf ("DEBUG: pid %d starting for parent pid %d\n", getpid (), ppid);
+ printf ("DEBUG: parent pid: %d\n", parent_pid);
+ printf ("DEBUG: base log name: %s\n", logname_base);
+ if (*errptr)
+ {
+ printf ("%s is not a valid parent pid\n", errptr);
+ exit (2);
+ }
+
+#ifdef __linux__
+ int r = prctl (PR_SET_PDEATHSIG, SIGTERM);
+ if (r == -1)
+ {
+ perror (0);
+ exit (1);
+ }
+#else
+ int r, s = SIGTERM;
+
+ r = procctl (P_PID, 0, PROC_PDEATHSIG_CTL, &s);
+ if (r == -1)
+ {
+ perror ("procctl");
+ exit (1);
+ }
+#endif /* __linux__ */
+
+ /* Establish handler. */
+ struct sigaction sa;
+ sigemptyset (&sa.sa_mask);
+ sa.sa_flags = 0;
+ sa.sa_handler = child_handler;
+
+ sigaction (SIGCHLD, &sa, NULL);
+
+ sigemptyset (&sa.sa_mask);
+ sa.sa_flags = 0;
+ sa.sa_handler = term_handler;
+
+ sigaction (SIGTERM, &sa, NULL);
+
+ sigemptyset (&sa.sa_mask);
+ sa.sa_flags = 0;
+ sa.sa_handler = hup_handler;
+
+ sigaction (SIGHUP, &sa, NULL);
+
+ if (getppid () != parent_pid)
+ {
+ printf ("parent process unexpectedly finished\n");
+ exit (3);
+ }
+
+ argc -= 3; /* skip over argv0, ppid, and log base */
+ argv += 3;
+
+ n_children = argc;
+ printf ("DEBUG: total %d children\n", n_children);
+ children = calloc (n_children, sizeof (children[0]));
+ for (i = 0; i < n_children; i++)
+ {
+ /* argv persists, so we can just use that pointer */
+ children[i].cmd = argv[i];
+ children[i].pid = launch_command (children[i].cmd, logname_base);
+ printf ("DEBUG: child %d (%s): initial launch pid %d\n", i,
+ children[i].cmd, children[i].pid);
+ }
+
+ while (1)
+ {
+ sleep (1);
+ pid_t curr_ppid = getppid ();
+ printf ("pid: %d, current ppid %d, original ppid %d\n", getpid (),
+ curr_ppid, ppid);
+ if (curr_ppid != ppid)
+ {
+ printf ("current ppid %d != original ppid %d - force quit\n",
+ curr_ppid, ppid);
+ fflush (stdout);
+ exit (1);
+ }
+ int restarted = 0;
+ for (i = 0; i < n_children; i++)
+ {
+ if (children[i].pid == 0)
+ {
+ printf ("child %s exited, restarting\n", children[i].cmd);
+ restarted = 1;
+ children[i].pid = launch_command (children[i].cmd, logname_base);
+ }
+ }
+ if (restarted)
+ {
+ sleep (1);
+ }
+
+ fflush (stdout);
+ }
+}
diff --git a/src/plugins/flowprobe/FEATURE.yaml b/src/plugins/flowprobe/FEATURE.yaml
index 66382433d03..9c80b12dc9f 100644
--- a/src/plugins/flowprobe/FEATURE.yaml
+++ b/src/plugins/flowprobe/FEATURE.yaml
@@ -2,12 +2,11 @@
name: IPFIX probe
maintainer: Ole Troan <ot@cisco.com>
features:
- - L2 input feature
- - IPv4 / IPv6 input feature
- - Recording of L2, L3 and L4 information
-description: "IPFIX flow probe. Works in the L2, or IP input feature path."
+ - L2 input and output feature path
+ - IPv4 / IPv6 input and output feature path
+ - Recording of L2, L3, and L4 information
+description: "IPFIX flow probe. Works in the L2 or IP feature path both input and output."
missing:
- - Output path
- Export over IPv6
- Export over TCP/SCTP
state: production
diff --git a/src/plugins/flowprobe/flowprobe.api b/src/plugins/flowprobe/flowprobe.api
index 55dd51d3c30..c2090637cc8 100644
--- a/src/plugins/flowprobe/flowprobe.api
+++ b/src/plugins/flowprobe/flowprobe.api
@@ -5,7 +5,7 @@
used to control the flowprobe plugin
*/
-option version = "1.0.0";
+option version = "2.1.0";
import "vnet/interface_types.api";
@@ -16,6 +16,13 @@ enum flowprobe_which_flags : u8
FLOWPROBE_WHICH_FLAG_IP6 = 0x4,
};
+enum flowprobe_which : u8
+{
+ FLOWPROBE_WHICH_IP4 = 0,
+ FLOWPROBE_WHICH_IP6,
+ FLOWPROBE_WHICH_L2,
+};
+
enum flowprobe_record_flags : u8
{
FLOWPROBE_RECORD_FLAG_L2 = 0x1,
@@ -23,6 +30,13 @@ enum flowprobe_record_flags : u8
FLOWPROBE_RECORD_FLAG_L4 = 0x4,
};
+enum flowprobe_direction : u8
+{
+ FLOWPROBE_DIRECTION_RX = 0,
+ FLOWPROBE_DIRECTION_TX,
+ FLOWPROBE_DIRECTION_BOTH,
+};
+
/** \brief Enable / disable per-packet IPFIX recording on an interface
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -32,6 +46,8 @@ enum flowprobe_record_flags : u8
*/
autoreply define flowprobe_tx_interface_add_del
{
+ option replaced_by="flowprobe_interface_add_del";
+
/* Client identifier, set from api_main.my_client_index */
u32 client_index;
@@ -47,8 +63,59 @@ autoreply define flowprobe_tx_interface_add_del
option vat_help = "<intfc> [disable]";
};
+/** \brief Enable or disable IPFIX flow record generation on an interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add interface if non-zero, else delete
+ @param which - datapath on which to record flows
+ @param direction - direction of recorded flows
+ @param sw_if_index - index of the interface
+*/
+autoreply define flowprobe_interface_add_del
+{
+ option in_progress;
+ u32 client_index;
+ u32 context;
+ bool is_add;
+ vl_api_flowprobe_which_t which;
+ vl_api_flowprobe_direction_t direction;
+ vl_api_interface_index_t sw_if_index;
+ option vat_help = "(<intfc> | sw_if_index <if-idx>) [(ip4|ip6|l2)] [(rx|tx|both)] [disable]";
+};
+
+/** \brief Dump interfaces for which IPFIX flow record generation is enabled
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index to use as filter (0xffffffff is "all")
+*/
+define flowprobe_interface_dump
+{
+ option in_progress;
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index [default=0xffffffff];
+ option vat_help = "[<if-idx>]";
+};
+
+/** \brief Details about IPFIX flow record generation enabled on interface
+ @param context - sender context which was passed in the request
+ @param which - datapath on which to record flows
+ @param direction - direction of recorded flows
+ @param sw_if_index - index of the interface
+*/
+define flowprobe_interface_details
+{
+ option in_progress;
+ u32 context;
+ vl_api_flowprobe_which_t which;
+ vl_api_flowprobe_direction_t direction;
+ vl_api_interface_index_t sw_if_index;
+};
+
autoreply define flowprobe_params
{
+ option replaced_by="flowprobe_set_params";
+
u32 client_index;
u32 context;
vl_api_flowprobe_record_flags_t record_flags;
@@ -56,3 +123,53 @@ autoreply define flowprobe_params
u32 passive_timer; /* ~0 is off, 0 is default */
option vat_help = "record <[l2] [l3] [l4]> [active <timer> passive <timer>]";
};
+
+/** \brief Set IPFIX flow record generation parameters
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param record_flags - flags indicating what data to record
+ @param active_timer - time in seconds after which active flow records are
+ to be exported (0 is "off", 0xffffffff is "use default value")
+ @param passive_timer - time in seconds after which passive flow records are
+ to be deleted (0 is "off", 0xffffffff is "use default value")
+*/
+autoreply define flowprobe_set_params
+{
+ option in_progress;
+ u32 client_index;
+ u32 context;
+ vl_api_flowprobe_record_flags_t record_flags;
+ u32 active_timer [default=0xffffffff];
+ u32 passive_timer [default=0xffffffff];
+ option vat_help = "record [l2] [l3] [l4] [active <timer>] [passive <timer>]";
+};
+
+/** \brief Get IPFIX flow record generation parameters
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define flowprobe_get_params
+{
+ option in_progress;
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply to get IPFIX flow record generation parameters
+ @param context - sender context, to match reply w/ request
+ @param retval - error (0 is "no error")
+ @param record_flags - flags indicating what data to record
+ @param active_timer - time in seconds after which active flow records are
+ to be exported (0 is "off")
+ @param passive_timer - time in seconds after which passive flow records are
+ to be deleted (0 is "off")
+*/
+define flowprobe_get_params_reply
+{
+ option in_progress;
+ u32 context;
+ i32 retval;
+ vl_api_flowprobe_record_flags_t record_flags;
+ u32 active_timer;
+ u32 passive_timer;
+};
diff --git a/src/plugins/flowprobe/flowprobe.c b/src/plugins/flowprobe/flowprobe.c
index ffc43bcd440..58a7cfe22f1 100644
--- a/src/plugins/flowprobe/flowprobe.c
+++ b/src/plugins/flowprobe/flowprobe.c
@@ -45,35 +45,54 @@ uword flowprobe_walker_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
#include <vlibapi/api_helper_macros.h>
/* Define the per-interface configurable features */
-/* *INDENT-OFF* */
-VNET_FEATURE_INIT (flow_perpacket_ip4, static) =
-{
+VNET_FEATURE_INIT (flowprobe_input_ip4_unicast, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "flowprobe-input-ip4",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+VNET_FEATURE_INIT (flowprobe_input_ip4_multicast, static) = {
+ .arc_name = "ip4-multicast",
+ .node_name = "flowprobe-input-ip4",
+ .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
+};
+VNET_FEATURE_INIT (flowprobe_input_ip6_unicast, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "flowprobe-input-ip6",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+};
+VNET_FEATURE_INIT (flowprobe_input_ip6_multicast, static) = {
+ .arc_name = "ip6-multicast",
+ .node_name = "flowprobe-input-ip6",
+ .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
+};
+VNET_FEATURE_INIT (flowprobe_input_l2, static) = {
+ .arc_name = "device-input",
+ .node_name = "flowprobe-input-l2",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+VNET_FEATURE_INIT (flowprobe_output_ip4, static) = {
.arc_name = "ip4-output",
- .node_name = "flowprobe-ip4",
+ .node_name = "flowprobe-output-ip4",
.runs_before = VNET_FEATURES ("interface-output"),
};
-VNET_FEATURE_INIT (flow_perpacket_ip6, static) =
-{
+VNET_FEATURE_INIT (flowprobe_output_ip6, static) = {
.arc_name = "ip6-output",
- .node_name = "flowprobe-ip6",
+ .node_name = "flowprobe-output-ip6",
.runs_before = VNET_FEATURES ("interface-output"),
};
-VNET_FEATURE_INIT (flow_perpacket_l2, static) = {
+VNET_FEATURE_INIT (flowprobe_output_l2, static) = {
.arc_name = "interface-output",
- .node_name = "flowprobe-l2",
+ .node_name = "flowprobe-output-l2",
.runs_before = VNET_FEATURES ("interface-output-arc-end"),
};
-/* *INDENT-ON* */
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
static inline ipfix_field_specifier_t *
flowprobe_template_ip4_fields (ipfix_field_specifier_t * f)
@@ -143,7 +162,7 @@ flowprobe_template_l2_fields (ipfix_field_specifier_t * f)
static inline ipfix_field_specifier_t *
flowprobe_template_common_fields (ipfix_field_specifier_t * f)
{
-#define flowprobe_template_common_field_count() 5
+#define flowprobe_template_common_field_count() 6
/* ingressInterface, TLV type 10, u32 */
f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
ingressInterface, 4);
@@ -154,6 +173,10 @@ flowprobe_template_common_fields (ipfix_field_specifier_t * f)
egressInterface, 4);
f++;
+ /* flowDirection, TLV type 61, u8 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */, flowDirection, 1);
+ f++;
+
/* packetDeltaCount, TLV type 2, u64 */
f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
packetDeltaCount, 8);
@@ -202,10 +225,7 @@ flowprobe_template_l4_fields (ipfix_field_specifier_t * f)
* @returns u8 * vector containing the indicated IPFIX template packet
*/
static inline u8 *
-flowprobe_template_rewrite_inline (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+flowprobe_template_rewrite_inline (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
flowprobe_variant_t which)
{
@@ -223,8 +243,9 @@ flowprobe_template_rewrite_inline (flow_report_main_t * frm,
flowprobe_main_t *fm = &flowprobe_main;
flowprobe_record_t flags = fr->opaque.as_uword;
bool collect_ip4 = false, collect_ip6 = false;
+ bool collect_l4 = false;
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
if (flags & FLOW_RECORD_L3)
{
@@ -235,6 +256,10 @@ flowprobe_template_rewrite_inline (flow_report_main_t * frm,
if (which == FLOW_VARIANT_L2_IP6)
flags |= FLOW_RECORD_L2_IP6;
}
+ if (flags & FLOW_RECORD_L4)
+ {
+ collect_l4 = (which != FLOW_VARIANT_L2);
+ }
field_count += flowprobe_template_common_field_count ();
if (flags & FLOW_RECORD_L2)
@@ -243,7 +268,7 @@ flowprobe_template_rewrite_inline (flow_report_main_t * frm,
field_count += flowprobe_template_ip4_field_count ();
if (collect_ip6)
field_count += flowprobe_template_ip6_field_count ();
- if (flags & FLOW_RECORD_L4)
+ if (collect_l4)
field_count += flowprobe_template_l4_field_count ();
/* allocate rewrite space */
@@ -263,8 +288,8 @@ flowprobe_template_rewrite_inline (flow_report_main_t * frm,
ip->ip_version_and_header_length = 0x45;
ip->ttl = 254;
ip->protocol = IP_PROTOCOL_UDP;
- ip->src_address.as_u32 = src_address->as_u32;
- ip->dst_address.as_u32 = collector_address->as_u32;
+ ip->src_address.as_u32 = exp->src_address.ip.ip4.as_u32;
+ ip->dst_address.as_u32 = exp->ipfix_collector.ip.ip4.as_u32;
udp->src_port = clib_host_to_net_u16 (stream->src_port);
udp->dst_port = clib_host_to_net_u16 (collector_port);
udp->length = clib_host_to_net_u16 (vec_len (rewrite) - sizeof (*ip));
@@ -282,7 +307,7 @@ flowprobe_template_rewrite_inline (flow_report_main_t * frm,
f = flowprobe_template_ip4_fields (f);
if (collect_ip6)
f = flowprobe_template_ip6_fields (f);
- if (flags & FLOW_RECORD_L4)
+ if (collect_l4)
f = flowprobe_template_l4_fields (f);
/* Back to the template packet... */
@@ -309,73 +334,53 @@ flowprobe_template_rewrite_inline (flow_report_main_t * frm,
}
static u8 *
-flowprobe_template_rewrite_ip6 (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+flowprobe_template_rewrite_ip6 (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return flowprobe_template_rewrite_inline
- (frm, fr, collector_address, src_address, collector_port,
- FLOW_VARIANT_IP6);
+ return flowprobe_template_rewrite_inline (exp, fr, collector_port,
+ FLOW_VARIANT_IP6);
}
static u8 *
-flowprobe_template_rewrite_ip4 (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+flowprobe_template_rewrite_ip4 (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return flowprobe_template_rewrite_inline
- (frm, fr, collector_address, src_address, collector_port,
- FLOW_VARIANT_IP4);
+ return flowprobe_template_rewrite_inline (exp, fr, collector_port,
+ FLOW_VARIANT_IP4);
}
static u8 *
-flowprobe_template_rewrite_l2 (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+flowprobe_template_rewrite_l2 (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return flowprobe_template_rewrite_inline
- (frm, fr, collector_address, src_address, collector_port,
- FLOW_VARIANT_L2);
+ return flowprobe_template_rewrite_inline (exp, fr, collector_port,
+ FLOW_VARIANT_L2);
}
static u8 *
-flowprobe_template_rewrite_l2_ip4 (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+flowprobe_template_rewrite_l2_ip4 (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return flowprobe_template_rewrite_inline
- (frm, fr, collector_address, src_address, collector_port,
- FLOW_VARIANT_L2_IP4);
+ return flowprobe_template_rewrite_inline (exp, fr, collector_port,
+ FLOW_VARIANT_L2_IP4);
}
static u8 *
-flowprobe_template_rewrite_l2_ip6 (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+flowprobe_template_rewrite_l2_ip6 (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return flowprobe_template_rewrite_inline
- (frm, fr, collector_address, src_address, collector_port,
- FLOW_VARIANT_L2_IP6);
+ return flowprobe_template_rewrite_inline (exp, fr, collector_port,
+ FLOW_VARIANT_L2_IP6);
}
/**
@@ -389,27 +394,27 @@ flowprobe_template_rewrite_l2_ip6 (flow_report_main_t * frm,
* will be sent.
*/
vlib_frame_t *
-flowprobe_data_callback_ip4 (flow_report_main_t * frm,
- flow_report_t * fr,
- vlib_frame_t * f, u32 * to_next, u32 node_index)
+flowprobe_data_callback_ip4 (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f, u32 *to_next,
+ u32 node_index)
{
flowprobe_flush_callback_ip4 ();
return f;
}
vlib_frame_t *
-flowprobe_data_callback_ip6 (flow_report_main_t * frm,
- flow_report_t * fr,
- vlib_frame_t * f, u32 * to_next, u32 node_index)
+flowprobe_data_callback_ip6 (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f, u32 *to_next,
+ u32 node_index)
{
flowprobe_flush_callback_ip6 ();
return f;
}
vlib_frame_t *
-flowprobe_data_callback_l2 (flow_report_main_t * frm,
- flow_report_t * fr,
- vlib_frame_t * f, u32 * to_next, u32 node_index)
+flowprobe_data_callback_l2 (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f, u32 *to_next,
+ u32 node_index)
{
flowprobe_flush_callback_l2 ();
return f;
@@ -422,7 +427,7 @@ flowprobe_template_add_del (u32 domain_id, u16 src_port,
vnet_flow_rewrite_callback_t * rewrite_callback,
bool is_add, u16 * template_id)
{
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &flow_report_main.exporters[0];
vnet_flow_report_add_del_args_t a = {
.rewrite_callback = rewrite_callback,
.flow_data_callback = flow_data_callback,
@@ -431,7 +436,7 @@ flowprobe_template_add_del (u32 domain_id, u16 src_port,
.src_port = src_port,
.opaque.as_uword = flags,
};
- return vnet_flow_report_add_del (frm, &a, template_id);
+ return vnet_flow_report_add_del (exp, &a, template_id);
}
static void
@@ -501,11 +506,49 @@ flowprobe_create_state_tables (u32 active_timer)
return error;
}
+static clib_error_t *
+flowprobe_clear_state_if_index (u32 sw_if_index)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ clib_error_t *error = 0;
+ u32 worker_i;
+ u32 entry_i;
+
+ if (fm->active_timer > 0)
+ {
+ vec_foreach_index (worker_i, fm->pool_per_worker)
+ {
+ pool_foreach_index (entry_i, fm->pool_per_worker[worker_i])
+ {
+ flowprobe_entry_t *e =
+ pool_elt_at_index (fm->pool_per_worker[worker_i], entry_i);
+ if (e->key.rx_sw_if_index == sw_if_index ||
+ e->key.tx_sw_if_index == sw_if_index)
+ {
+ e->packetcount = 0;
+ e->octetcount = 0;
+ e->prot.tcp.flags = 0;
+ if (fm->passive_timer > 0)
+ {
+ tw_timer_stop_2t_1w_2048sl (
+ fm->timers_per_worker[worker_i],
+ e->passive_timer_handle);
+ }
+ flowprobe_delete_by_index (worker_i, entry_i);
+ }
+ }
+ }
+ }
+
+ return error;
+}
+
static int
validate_feature_on_interface (flowprobe_main_t * fm, u32 sw_if_index,
u8 which)
{
vec_validate_init_empty (fm->flow_per_interface, sw_if_index, ~0);
+ vec_validate_init_empty (fm->direction_per_interface, sw_if_index, ~0);
if (fm->flow_per_interface[sw_if_index] == (u8) ~ 0)
return -1;
@@ -519,13 +562,15 @@ validate_feature_on_interface (flowprobe_main_t * fm, u32 sw_if_index,
* @brief configure / deconfigure the IPFIX flow-per-packet
* @param fm flowprobe_main_t * fm
* @param sw_if_index u32 the desired interface
+ * @param which u8 the desired datapath
+ * @param direction u8 the desired direction
* @param is_add int 1 to enable the feature, 0 to disable it
* @returns 0 if successful, non-zero otherwise
*/
static int
-flowprobe_tx_interface_add_del_feature (flowprobe_main_t * fm,
- u32 sw_if_index, u8 which, int is_add)
+flowprobe_interface_add_del_feature (flowprobe_main_t *fm, u32 sw_if_index,
+ u8 which, u8 direction, int is_add)
{
vlib_main_t *vm = vlib_get_main ();
int rv = 0;
@@ -533,6 +578,7 @@ flowprobe_tx_interface_add_del_feature (flowprobe_main_t * fm,
flowprobe_record_t flags = fm->record;
fm->flow_per_interface[sw_if_index] = (is_add) ? which : (u8) ~ 0;
+ fm->direction_per_interface[sw_if_index] = (is_add) ? direction : (u8) ~0;
fm->template_per_flow[which] += (is_add) ? 1 : -1;
if (is_add && fm->template_per_flow[which] > 1)
template_id = fm->template_reports[flags];
@@ -542,12 +588,17 @@ flowprobe_tx_interface_add_del_feature (flowprobe_main_t * fm,
{
if (which == FLOW_VARIANT_L2)
{
+ if (!is_add)
+ {
+ flowprobe_flush_callback_l2 ();
+ }
if (fm->record & FLOW_RECORD_L2)
{
rv = flowprobe_template_add_del (1, UDP_DST_PORT_ipfix, flags,
flowprobe_data_callback_l2,
flowprobe_template_rewrite_l2,
is_add, &template_id);
+ fm->template_reports[flags] = (is_add) ? template_id : 0;
}
if (fm->record & FLOW_RECORD_L3 || fm->record & FLOW_RECORD_L4)
{
@@ -570,20 +621,30 @@ flowprobe_tx_interface_add_del_feature (flowprobe_main_t * fm,
flags | FLOW_RECORD_L2_IP4;
fm->context[FLOW_VARIANT_L2_IP6].flags =
flags | FLOW_RECORD_L2_IP6;
-
- fm->template_reports[flags] = template_id;
}
}
else if (which == FLOW_VARIANT_IP4)
- rv = flowprobe_template_add_del (1, UDP_DST_PORT_ipfix, flags,
- flowprobe_data_callback_ip4,
- flowprobe_template_rewrite_ip4,
- is_add, &template_id);
+ {
+ if (!is_add)
+ {
+ flowprobe_flush_callback_ip4 ();
+ }
+ rv = flowprobe_template_add_del (
+ 1, UDP_DST_PORT_ipfix, flags, flowprobe_data_callback_ip4,
+ flowprobe_template_rewrite_ip4, is_add, &template_id);
+ fm->template_reports[flags] = (is_add) ? template_id : 0;
+ }
else if (which == FLOW_VARIANT_IP6)
- rv = flowprobe_template_add_del (1, UDP_DST_PORT_ipfix, flags,
- flowprobe_data_callback_ip6,
- flowprobe_template_rewrite_ip6,
- is_add, &template_id);
+ {
+ if (!is_add)
+ {
+ flowprobe_flush_callback_ip6 ();
+ }
+ rv = flowprobe_template_add_del (
+ 1, UDP_DST_PORT_ipfix, flags, flowprobe_data_callback_ip6,
+ flowprobe_template_rewrite_ip6, is_add, &template_id);
+ fm->template_reports[flags] = (is_add) ? template_id : 0;
+ }
}
if (rv && rv != VNET_API_ERROR_VALUE_EXIST)
{
@@ -594,18 +655,41 @@ flowprobe_tx_interface_add_del_feature (flowprobe_main_t * fm,
if (which != (u8) ~ 0)
{
fm->context[which].flags = fm->record;
- fm->template_reports[flags] = (is_add) ? template_id : 0;
}
- if (which == FLOW_VARIANT_IP4)
- vnet_feature_enable_disable ("ip4-output", "flowprobe-ip4",
- sw_if_index, is_add, 0, 0);
- else if (which == FLOW_VARIANT_IP6)
- vnet_feature_enable_disable ("ip6-output", "flowprobe-ip6",
- sw_if_index, is_add, 0, 0);
- else if (which == FLOW_VARIANT_L2)
- vnet_feature_enable_disable ("interface-output", "flowprobe-l2",
- sw_if_index, is_add, 0, 0);
+ if (direction == FLOW_DIRECTION_RX || direction == FLOW_DIRECTION_BOTH)
+ {
+ if (which == FLOW_VARIANT_IP4)
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "flowprobe-input-ip4",
+ sw_if_index, is_add, 0, 0);
+ vnet_feature_enable_disable ("ip4-multicast", "flowprobe-input-ip4",
+ sw_if_index, is_add, 0, 0);
+ }
+ else if (which == FLOW_VARIANT_IP6)
+ {
+ vnet_feature_enable_disable ("ip6-unicast", "flowprobe-input-ip6",
+ sw_if_index, is_add, 0, 0);
+ vnet_feature_enable_disable ("ip6-multicast", "flowprobe-input-ip6",
+ sw_if_index, is_add, 0, 0);
+ }
+ else if (which == FLOW_VARIANT_L2)
+ vnet_feature_enable_disable ("device-input", "flowprobe-input-l2",
+ sw_if_index, is_add, 0, 0);
+ }
+
+ if (direction == FLOW_DIRECTION_TX || direction == FLOW_DIRECTION_BOTH)
+ {
+ if (which == FLOW_VARIANT_IP4)
+ vnet_feature_enable_disable ("ip4-output", "flowprobe-output-ip4",
+ sw_if_index, is_add, 0, 0);
+ else if (which == FLOW_VARIANT_IP6)
+ vnet_feature_enable_disable ("ip6-output", "flowprobe-output-ip6",
+ sw_if_index, is_add, 0, 0);
+ else if (which == FLOW_VARIANT_L2)
+ vnet_feature_enable_disable ("interface-output", "flowprobe-output-l2",
+ sw_if_index, is_add, 0, 0);
+ }
/* Stateful flow collection */
if (is_add && !fm->initialized)
@@ -615,6 +699,11 @@ flowprobe_tx_interface_add_del_feature (flowprobe_main_t * fm,
vlib_process_signal_event (vm, flowprobe_timer_node.index, 1, 0);
}
+ if (!is_add && fm->initialized)
+ {
+ flowprobe_clear_state_if_index (sw_if_index);
+ }
+
return 0;
}
@@ -646,8 +735,8 @@ void vl_api_flowprobe_tx_interface_add_del_t_handler
goto out;
}
- rv = flowprobe_tx_interface_add_del_feature
- (fm, sw_if_index, mp->which, mp->is_add);
+ rv = flowprobe_interface_add_del_feature (fm, sw_if_index, mp->which,
+ FLOW_DIRECTION_TX, mp->is_add);
out:
BAD_SW_IF_INDEX_LABEL;
@@ -655,6 +744,167 @@ out:
REPLY_MACRO (VL_API_FLOWPROBE_TX_INTERFACE_ADD_DEL_REPLY);
}
+void
+vl_api_flowprobe_interface_add_del_t_handler (
+ vl_api_flowprobe_interface_add_del_t *mp)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ vl_api_flowprobe_interface_add_del_reply_t *rmp;
+ u32 sw_if_index;
+ u8 which;
+ u8 direction;
+ bool is_add;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ sw_if_index = ntohl (mp->sw_if_index);
+ is_add = mp->is_add;
+
+ if (mp->which == FLOWPROBE_WHICH_IP4)
+ which = FLOW_VARIANT_IP4;
+ else if (mp->which == FLOWPROBE_WHICH_IP6)
+ which = FLOW_VARIANT_IP6;
+ else if (mp->which == FLOWPROBE_WHICH_L2)
+ which = FLOW_VARIANT_L2;
+ else
+ {
+ clib_warning ("Invalid value of which");
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+
+ if (mp->direction == FLOWPROBE_DIRECTION_RX)
+ direction = FLOW_DIRECTION_RX;
+ else if (mp->direction == FLOWPROBE_DIRECTION_TX)
+ direction = FLOW_DIRECTION_TX;
+ else if (mp->direction == FLOWPROBE_DIRECTION_BOTH)
+ direction = FLOW_DIRECTION_BOTH;
+ else
+ {
+ clib_warning ("Invalid value of direction");
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+
+ if (fm->record == 0)
+ {
+ clib_warning ("Please specify flowprobe params record first");
+ rv = VNET_API_ERROR_CANNOT_ENABLE_DISABLE_FEATURE;
+ goto out;
+ }
+
+ rv = validate_feature_on_interface (fm, sw_if_index, which);
+ if (rv == 1)
+ {
+ if (is_add)
+ {
+ clib_warning ("Variant is already enabled for given interface");
+ rv = VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
+ goto out;
+ }
+ }
+ else if (rv == 0)
+ {
+ clib_warning ("Interface has different variant enabled");
+ rv = VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
+ goto out;
+ }
+ else if (rv == -1)
+ {
+ if (!is_add)
+ {
+ clib_warning ("Interface has no variant enabled");
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto out;
+ }
+ }
+
+ rv = flowprobe_interface_add_del_feature (fm, sw_if_index, which, direction,
+ is_add);
+
+out:
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_FLOWPROBE_INTERFACE_ADD_DEL_REPLY);
+}
+
+static void
+send_flowprobe_interface_details (u32 sw_if_index, u8 which, u8 direction,
+ vl_api_registration_t *reg, u32 context)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ vl_api_flowprobe_interface_details_t *rmp = 0;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ if (!rmp)
+ return;
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_FLOWPROBE_INTERFACE_DETAILS + REPLY_MSG_ID_BASE);
+ rmp->context = context;
+
+ rmp->sw_if_index = htonl (sw_if_index);
+
+ if (which == FLOW_VARIANT_IP4)
+ rmp->which = FLOWPROBE_WHICH_IP4;
+ else if (which == FLOW_VARIANT_IP6)
+ rmp->which = FLOWPROBE_WHICH_IP6;
+ else if (which == FLOW_VARIANT_L2)
+ rmp->which = FLOWPROBE_WHICH_L2;
+ else
+ ASSERT (0);
+
+ if (direction == FLOW_DIRECTION_RX)
+ rmp->direction = FLOWPROBE_DIRECTION_RX;
+ else if (direction == FLOW_DIRECTION_TX)
+ rmp->direction = FLOWPROBE_DIRECTION_TX;
+ else if (direction == FLOW_DIRECTION_BOTH)
+ rmp->direction = FLOWPROBE_DIRECTION_BOTH;
+ else
+ ASSERT (0);
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_flowprobe_interface_dump_t_handler (
+ vl_api_flowprobe_interface_dump_t *mp)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ vl_api_registration_t *reg;
+ u32 sw_if_index;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (sw_if_index == ~0)
+ {
+ u8 *which;
+
+ vec_foreach (which, fm->flow_per_interface)
+ {
+ if (*which == (u8) ~0)
+ continue;
+
+ sw_if_index = which - fm->flow_per_interface;
+ send_flowprobe_interface_details (
+ sw_if_index, *which, fm->direction_per_interface[sw_if_index], reg,
+ mp->context);
+ }
+ }
+ else if (vec_len (fm->flow_per_interface) > sw_if_index &&
+ fm->flow_per_interface[sw_if_index] != (u8) ~0)
+ {
+ send_flowprobe_interface_details (
+ sw_if_index, fm->flow_per_interface[sw_if_index],
+ fm->direction_per_interface[sw_if_index], reg, mp->context);
+ }
+}
+
#define vec_neg_search(v,E) \
({ \
word _v(i) = 0; \
@@ -675,7 +925,7 @@ flowprobe_params (flowprobe_main_t * fm, u8 record_l2,
flowprobe_record_t flags = 0;
if (vec_neg_search (fm->flow_per_interface, (u8) ~ 0) != ~0)
- return ~0;
+ return VNET_API_ERROR_UNSUPPORTED;
if (record_l2)
flags |= FLOW_RECORD_L2;
@@ -715,17 +965,89 @@ vl_api_flowprobe_params_t_handler (vl_api_flowprobe_params_t * mp)
REPLY_MACRO (VL_API_FLOWPROBE_PARAMS_REPLY);
}
-/* *INDENT-OFF* */
+void
+vl_api_flowprobe_set_params_t_handler (vl_api_flowprobe_set_params_t *mp)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ vl_api_flowprobe_set_params_reply_t *rmp;
+ bool record_l2, record_l3, record_l4;
+ u32 active_timer;
+ u32 passive_timer;
+ int rv = 0;
+
+ record_l2 = (mp->record_flags & FLOWPROBE_RECORD_FLAG_L2);
+ record_l3 = (mp->record_flags & FLOWPROBE_RECORD_FLAG_L3);
+ record_l4 = (mp->record_flags & FLOWPROBE_RECORD_FLAG_L4);
+
+ active_timer = clib_net_to_host_u32 (mp->active_timer);
+ passive_timer = clib_net_to_host_u32 (mp->passive_timer);
+
+ if (passive_timer > 0 && active_timer > passive_timer)
+ {
+ clib_warning ("Passive timer must be greater than active timer");
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+
+ rv = flowprobe_params (fm, record_l2, record_l3, record_l4, active_timer,
+ passive_timer);
+ if (rv == VNET_API_ERROR_UNSUPPORTED)
+ clib_warning (
+ "Cannot change params when feature is enabled on some interfaces");
+
+out:
+ REPLY_MACRO (VL_API_FLOWPROBE_SET_PARAMS_REPLY);
+}
+
+void
+vl_api_flowprobe_get_params_t_handler (vl_api_flowprobe_get_params_t *mp)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ vl_api_flowprobe_get_params_reply_t *rmp;
+ u8 record_flags = 0;
+ int rv = 0;
+
+ if (fm->record & FLOW_RECORD_L2)
+ record_flags |= FLOWPROBE_RECORD_FLAG_L2;
+ if (fm->record & FLOW_RECORD_L3)
+ record_flags |= FLOWPROBE_RECORD_FLAG_L3;
+ if (fm->record & FLOW_RECORD_L4)
+ record_flags |= FLOWPROBE_RECORD_FLAG_L4;
+
+ // clang-format off
+ REPLY_MACRO2 (VL_API_FLOWPROBE_GET_PARAMS_REPLY,
+ ({
+ rmp->record_flags = record_flags;
+ rmp->active_timer = htonl (fm->active_timer);
+ rmp->passive_timer = htonl (fm->passive_timer);
+ }));
+ // clang-format on
+}
+
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Flow per Packet",
};
-/* *INDENT-ON* */
+
+u8 *
+format_flowprobe_direction (u8 *s, va_list *args)
+{
+ u8 *direction = va_arg (*args, u8 *);
+ if (*direction == FLOW_DIRECTION_RX)
+ s = format (s, "rx");
+ else if (*direction == FLOW_DIRECTION_TX)
+ s = format (s, "tx");
+ else if (*direction == FLOW_DIRECTION_BOTH)
+ s = format (s, "rx tx");
+
+ return s;
+}
u8 *
format_flowprobe_entry (u8 * s, va_list * args)
{
flowprobe_entry_t *e = va_arg (*args, flowprobe_entry_t *);
+ s = format (s, " %U", format_flowprobe_direction, &e->key.direction);
s = format (s, " %d/%d", e->key.rx_sw_if_index, e->key.tx_sw_if_index);
s = format (s, " %U %U", format_ethernet_address, &e->key.src_mac,
@@ -789,14 +1111,12 @@ flowprobe_show_table_fn (vlib_main_t * vm,
for (i = 0; i < vec_len (fm->pool_per_worker); i++)
{
- /* *INDENT-OFF* */
pool_foreach (e, fm->pool_per_worker[i])
{
vlib_cli_output (vm, "%U",
format_flowprobe_entry,
e);
}
- /* *INDENT-ON* */
}
return 0;
@@ -822,14 +1142,15 @@ flowprobe_show_stats_fn (vlib_main_t * vm,
}
static clib_error_t *
-flowprobe_tx_interface_add_del_feature_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+flowprobe_interface_add_del_feature_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
flowprobe_main_t *fm = &flowprobe_main;
u32 sw_if_index = ~0;
int is_add = 1;
u8 which = FLOW_VARIANT_IP4;
+ flowprobe_direction_t direction = FLOW_DIRECTION_TX;
int rv;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
@@ -844,6 +1165,12 @@ flowprobe_tx_interface_add_del_feature_command_fn (vlib_main_t * vm,
which = FLOW_VARIANT_IP6;
else if (unformat (input, "l2"))
which = FLOW_VARIANT_L2;
+ else if (unformat (input, "rx"))
+ direction = FLOW_DIRECTION_RX;
+ else if (unformat (input, "tx"))
+ direction = FLOW_DIRECTION_TX;
+ else if (unformat (input, "both"))
+ direction = FLOW_DIRECTION_BOTH;
else
break;
}
@@ -865,9 +1192,16 @@ flowprobe_tx_interface_add_del_feature_command_fn (vlib_main_t * vm,
else if (rv == 0)
return clib_error_return (0,
"Interface has enable different datapath ...");
+ else if (rv == -1)
+ {
+ if (!is_add)
+ {
+ return clib_error_return (0, "Interface has no datapath enabled");
+ }
+ }
- rv =
- flowprobe_tx_interface_add_del_feature (fm, sw_if_index, which, is_add);
+ rv = flowprobe_interface_add_del_feature (fm, sw_if_index, which, direction,
+ is_add);
switch (rv)
{
case 0:
@@ -904,9 +1238,10 @@ flowprobe_show_feature_command_fn (vlib_main_t * vm,
continue;
sw_if_index = which - fm->flow_per_interface;
- vlib_cli_output (vm, " %U %U", format_vnet_sw_if_index_name,
+ vlib_cli_output (vm, " %U %U %U", format_vnet_sw_if_index_name,
vnet_get_main (), sw_if_index, format_flowprobe_feature,
- which);
+ which, format_flowprobe_direction,
+ &fm->direction_per_interface[sw_if_index]);
}
return 0;
}
@@ -983,18 +1318,17 @@ flowprobe_show_params_command_fn (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (flowprobe_enable_disable_command, static) = {
- .path = "flowprobe feature add-del",
- .short_help =
- "flowprobe feature add-del <interface-name> <l2|ip4|ip6> disable",
- .function = flowprobe_tx_interface_add_del_feature_command_fn,
+ .path = "flowprobe feature add-del",
+ .short_help = "flowprobe feature add-del <interface-name> [(l2|ip4|ip6)] "
+ "[(rx|tx|both)] [disable]",
+ .function = flowprobe_interface_add_del_feature_command_fn,
};
VLIB_CLI_COMMAND (flowprobe_params_command, static) = {
- .path = "flowprobe params",
- .short_help =
- "flowprobe params record <[l2] [l3] [l4]> [active <timer> passive <timer>]",
- .function = flowprobe_params_command_fn,
+ .path = "flowprobe params",
+ .short_help = "flowprobe params record [l2] [l3] [l4] [active <timer>] "
+ "[passive <timer>]",
+ .function = flowprobe_params_command_fn,
};
VLIB_CLI_COMMAND (flowprobe_show_feature_command, static) = {
@@ -1019,7 +1353,6 @@ VLIB_CLI_COMMAND (flowprobe_show_stats_command, static) = {
.short_help = "show flowprobe statistics",
.function = flowprobe_show_stats_fn,
};
-/* *INDENT-ON* */
/*
* Main-core process, sending an interrupt to the per worker input
@@ -1073,13 +1406,11 @@ timer_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return 0; /* or not */
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (flowprobe_timer_node,static) = {
.function = timer_process,
.name = "flowprobe-timer-process",
.type = VLIB_NODE_TYPE_PROCESS,
};
-/* *INDENT-ON* */
#include <flowprobe/flowprobe.api.c>
diff --git a/src/plugins/flowprobe/flowprobe.h b/src/plugins/flowprobe/flowprobe.h
index 2d28c81de33..1fde5f94491 100644
--- a/src/plugins/flowprobe/flowprobe.h
+++ b/src/plugins/flowprobe/flowprobe.h
@@ -42,17 +42,22 @@ typedef enum
FLOW_N_RECORDS = 1 << 5,
} flowprobe_record_t;
-/* *INDENT-OFF* */
typedef enum __attribute__ ((__packed__))
{
- FLOW_VARIANT_IP4,
+ FLOW_VARIANT_IP4 = 0,
FLOW_VARIANT_IP6,
FLOW_VARIANT_L2,
FLOW_VARIANT_L2_IP4,
FLOW_VARIANT_L2_IP6,
FLOW_N_VARIANTS,
} flowprobe_variant_t;
-/* *INDENT-ON* */
+
+typedef enum __attribute__ ((__packed__))
+{
+ FLOW_DIRECTION_RX = 0,
+ FLOW_DIRECTION_TX,
+ FLOW_DIRECTION_BOTH,
+} flowprobe_direction_t;
STATIC_ASSERT (sizeof (flowprobe_variant_t) == 1,
"flowprobe_variant_t is expected to be 1 byte, "
@@ -72,7 +77,6 @@ typedef struct
u16 *next_record_offset_per_worker;
} flowprobe_protocol_context_t;
-/* *INDENT-OFF* */
typedef struct __attribute__ ((aligned (8))) {
u32 rx_sw_if_index;
u32 tx_sw_if_index;
@@ -85,8 +89,8 @@ typedef struct __attribute__ ((aligned (8))) {
u16 src_port;
u16 dst_port;
flowprobe_variant_t which;
+ flowprobe_direction_t direction;
} flowprobe_key_t;
-/* *INDENT-ON* */
typedef struct
{
@@ -134,9 +138,7 @@ typedef struct
u8 ht_log2len; /* Hash table size is 2^log2len */
u32 **hash_per_worker;
flowprobe_entry_t **pool_per_worker;
- /* *INDENT-OFF* */
TWT (tw_timer_wheel) ** timers_per_worker;
- /* *INDENT-ON* */
u32 **expired_passive_per_worker;
flowprobe_record_t record;
@@ -149,6 +151,7 @@ typedef struct
u16 template_per_flow[FLOW_N_VARIANTS];
u8 *flow_per_interface;
+ u8 *direction_per_interface;
/** convenience vlib_main_t pointer */
vlib_main_t *vlib_main;
@@ -159,6 +162,8 @@ typedef struct
extern flowprobe_main_t flowprobe_main;
extern vlib_node_registration_t flowprobe_walker_node;
+void flowprobe_delete_by_index (u32 my_cpu_number, u32 poolindex);
+
void flowprobe_flush_callback_ip4 (void);
void flowprobe_flush_callback_ip6 (void);
void flowprobe_flush_callback_l2 (void);
diff --git a/src/plugins/flowprobe/flowprobe_plugin_doc.md b/src/plugins/flowprobe/flowprobe_plugin_doc.md
deleted file mode 100644
index 4c9b2342a83..00000000000
--- a/src/plugins/flowprobe/flowprobe_plugin_doc.md
+++ /dev/null
@@ -1,13 +0,0 @@
-IPFIX flow record plugin {#flowprobe_plugin_doc}
-========================
-
-## Introduction
-
-This plugin generates ipfix flow records on interfaces which have the feature enabled
-
-## Sample configuration
-
-set ipfix exporter collector 192.168.6.2 src 192.168.6.1 template-interval 20 port 4739 path-mtu 1500
-
-flowprobe params record l3 active 20 passive 120
-flowprobe feature add-del GigabitEthernet2/3/0 l2 \ No newline at end of file
diff --git a/src/plugins/flowprobe/flowprobe_plugin_doc.rst b/src/plugins/flowprobe/flowprobe_plugin_doc.rst
new file mode 100644
index 00000000000..4add41f5611
--- /dev/null
+++ b/src/plugins/flowprobe/flowprobe_plugin_doc.rst
@@ -0,0 +1,18 @@
+IPFIX flow record plugin
+========================
+
+Introduction
+------------
+
+This plugin generates ipfix flow records on interfaces which have the
+feature enabled
+
+Sample configuration
+--------------------
+
+::
+
+ set ipfix exporter collector 192.168.6.2 src 192.168.6.1 template-interval 20 port 4739 path-mtu 1450
+
+ flowprobe params record l3 active 20 passive 120
+ flowprobe feature add-del GigabitEthernet2/3/0 l2
diff --git a/src/plugins/flowprobe/flowprobe_test.c b/src/plugins/flowprobe/flowprobe_test.c
index a694e45ae9b..37b91207e29 100644
--- a/src/plugins/flowprobe/flowprobe_test.c
+++ b/src/plugins/flowprobe/flowprobe_test.c
@@ -93,6 +93,136 @@ api_flowprobe_tx_interface_add_del (vat_main_t * vam)
}
static int
+api_flowprobe_interface_add_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ int enable_disable = 1;
+ u8 which = FLOWPROBE_WHICH_IP4;
+ u8 direction = FLOWPROBE_DIRECTION_TX;
+ u32 sw_if_index = ~0;
+ vl_api_flowprobe_interface_add_del_t *mp;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "disable"))
+ enable_disable = 0;
+ else if (unformat (i, "ip4"))
+ which = FLOWPROBE_WHICH_IP4;
+ else if (unformat (i, "ip6"))
+ which = FLOWPROBE_WHICH_IP6;
+ else if (unformat (i, "l2"))
+ which = FLOWPROBE_WHICH_L2;
+ else if (unformat (i, "rx"))
+ direction = FLOWPROBE_DIRECTION_RX;
+ else if (unformat (i, "tx"))
+ direction = FLOWPROBE_DIRECTION_TX;
+ else if (unformat (i, "both"))
+ direction = FLOWPROBE_DIRECTION_BOTH;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("Missing interface name / explicit sw_if_index number\n");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (FLOWPROBE_INTERFACE_ADD_DEL, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_add = enable_disable;
+ mp->which = which;
+ mp->direction = direction;
+
+ /* Send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_flowprobe_interface_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_flowprobe_interface_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 sw_if_index = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%d", &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M (FLOWPROBE_INTERFACE_DUMP, mp);
+ mp->sw_if_index = htonl (sw_if_index);
+
+ /* Send it... */
+ S (mp);
+
+ /* Use control ping for synchronization */
+ PING (&flowprobe_test_main, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_flowprobe_interface_details_t_handler (
+ vl_api_flowprobe_interface_details_t *mp)
+{
+ vat_main_t *vam = flowprobe_test_main.vat_main;
+ u32 sw_if_index;
+ u8 which;
+ u8 direction;
+ u8 *out = 0;
+ const char *variants[] = {
+ [FLOWPROBE_WHICH_IP4] = "ip4",
+ [FLOWPROBE_WHICH_IP6] = "ip6",
+ [FLOWPROBE_WHICH_L2] = "l2",
+ "Erroneous variant",
+ };
+ const char *directions[] = {
+ [FLOWPROBE_DIRECTION_RX] = "rx",
+ [FLOWPROBE_DIRECTION_TX] = "tx",
+ [FLOWPROBE_DIRECTION_BOTH] = "rx tx",
+ "Erroneous direction",
+ };
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ which = mp->which;
+ if (which > ARRAY_LEN (variants) - 2)
+ which = ARRAY_LEN (variants) - 1;
+
+ direction = mp->direction;
+ if (direction > ARRAY_LEN (directions) - 2)
+ direction = ARRAY_LEN (directions) - 1;
+
+ out = format (0, "sw_if_index: %u, variant: %s, direction: %s\n%c",
+ sw_if_index, variants[which], directions[direction], 0);
+
+ fformat (vam->ofp, (char *) out);
+ vec_free (out);
+}
+
+static int
api_flowprobe_params (vat_main_t * vam)
{
unformat_input_t *i = vam->input;
@@ -145,6 +275,94 @@ api_flowprobe_params (vat_main_t * vam)
return ret;
}
+static int
+api_flowprobe_set_params (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_flowprobe_set_params_t *mp;
+ u32 active_timer = ~0;
+ u32 passive_timer = ~0;
+ u8 record_flags = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "active %d", &active_timer))
+ ;
+ else if (unformat (i, "passive %d", &passive_timer))
+ ;
+ else if (unformat (i, "record"))
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "l2"))
+ record_flags |= FLOWPROBE_RECORD_FLAG_L2;
+ else if (unformat (i, "l3"))
+ record_flags |= FLOWPROBE_RECORD_FLAG_L3;
+ else if (unformat (i, "l4"))
+ record_flags |= FLOWPROBE_RECORD_FLAG_L4;
+ else
+ break;
+ }
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M (FLOWPROBE_SET_PARAMS, mp);
+ mp->record_flags = record_flags;
+ mp->active_timer = ntohl (active_timer);
+ mp->passive_timer = ntohl (passive_timer);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+
+ return ret;
+}
+
+static int
+api_flowprobe_get_params (vat_main_t *vam)
+{
+ vl_api_flowprobe_get_params_t *mp;
+ int ret;
+
+ /* Construct the API message */
+ M (FLOWPROBE_GET_PARAMS, mp);
+
+ /* Send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_flowprobe_get_params_reply_t_handler (
+ vl_api_flowprobe_get_params_reply_t *mp)
+{
+ vat_main_t *vam = flowprobe_test_main.vat_main;
+ u8 *out = 0;
+
+ out =
+ format (0, "active: %u, passive: %u, record:", ntohl (mp->active_timer),
+ ntohl (mp->passive_timer));
+
+ if (mp->record_flags & FLOWPROBE_RECORD_FLAG_L2)
+ out = format (out, " l2");
+ if (mp->record_flags & FLOWPROBE_RECORD_FLAG_L3)
+ out = format (out, " l3");
+ if (mp->record_flags & FLOWPROBE_RECORD_FLAG_L4)
+ out = format (out, " l4");
+
+ out = format (out, "\n%c", 0);
+ fformat (vam->ofp, (char *) out);
+ vec_free (out);
+ vam->result_ready = 1;
+}
+
/*
* List of messages that the api test plugin sends,
* and that the data plane plugin processes
diff --git a/src/plugins/flowprobe/node.c b/src/plugins/flowprobe/node.c
index e7a39a7ed7e..03511689dda 100644
--- a/src/plugins/flowprobe/node.c
+++ b/src/plugins/flowprobe/node.c
@@ -17,6 +17,7 @@
#include <vlib/vlib.h>
#include <vnet/vnet.h>
#include <vppinfra/crc32.h>
+#include <vppinfra/xxhash.h>
#include <vppinfra/error.h>
#include <flowprobe/flowprobe.h>
#include <vnet/ip/ip6_packet.h>
@@ -98,9 +99,15 @@ format_flowprobe_trace (u8 * s, va_list * args)
return s;
}
-vlib_node_registration_t flowprobe_ip4_node;
-vlib_node_registration_t flowprobe_ip6_node;
-vlib_node_registration_t flowprobe_l2_node;
+vlib_node_registration_t flowprobe_input_ip4_node;
+vlib_node_registration_t flowprobe_input_ip6_node;
+vlib_node_registration_t flowprobe_input_l2_node;
+vlib_node_registration_t flowprobe_output_ip4_node;
+vlib_node_registration_t flowprobe_output_ip6_node;
+vlib_node_registration_t flowprobe_output_l2_node;
+vlib_node_registration_t flowprobe_flush_ip4_node;
+vlib_node_registration_t flowprobe_flush_ip6_node;
+vlib_node_registration_t flowprobe_flush_l2_node;
/* No counters at the moment */
#define foreach_flowprobe_error \
@@ -166,6 +173,11 @@ flowprobe_common_add (vlib_buffer_t * to_b, flowprobe_entry_t * e, u16 offset)
clib_memcpy_fast (to_b->data + offset, &tx_if, sizeof (tx_if));
offset += sizeof (tx_if);
+ /* Flow direction
+ 0x00: ingress flow
+ 0x01: egress flow */
+ to_b->data[offset++] = (e->key.direction == FLOW_DIRECTION_TX);
+
/* packet delta count */
u64 packetdelta = clib_host_to_net_u64 (e->packetcount);
clib_memcpy_fast (to_b->data + offset, &packetdelta, sizeof (u64));
@@ -357,25 +369,30 @@ flowprobe_create (u32 my_cpu_number, flowprobe_key_t * k, u32 * poolindex)
}
static inline void
-add_to_flow_record_state (vlib_main_t * vm, vlib_node_runtime_t * node,
- flowprobe_main_t * fm, vlib_buffer_t * b,
+add_to_flow_record_state (vlib_main_t *vm, vlib_node_runtime_t *node,
+ flowprobe_main_t *fm, vlib_buffer_t *b,
timestamp_nsec_t timestamp, u16 length,
- flowprobe_variant_t which, flowprobe_trace_t * t)
+ flowprobe_variant_t which,
+ flowprobe_direction_t direction,
+ flowprobe_trace_t *t)
{
if (fm->disabled)
return;
+ ASSERT (direction == FLOW_DIRECTION_RX || direction == FLOW_DIRECTION_TX);
+
u32 my_cpu_number = vm->thread_index;
u16 octets = 0;
flowprobe_record_t flags = fm->context[which].flags;
bool collect_ip4 = false, collect_ip6 = false;
ASSERT (b);
- ethernet_header_t *eth = vlib_buffer_get_current (b);
+ ethernet_header_t *eth = (direction == FLOW_DIRECTION_TX) ?
+ vlib_buffer_get_current (b) :
+ ethernet_buffer_get_header (b);
u16 ethertype = clib_net_to_host_u16 (eth->type);
- /* *INDENT-OFF* */
+ i16 l3_hdr_offset = (u8 *) eth - b->data + sizeof (ethernet_header_t);
flowprobe_key_t k = {};
- /* *INDENT-ON* */
ip4_header_t *ip4 = 0;
ip6_header_t *ip6 = 0;
udp_header_t *udp = 0;
@@ -392,6 +409,7 @@ add_to_flow_record_state (vlib_main_t * vm, vlib_node_runtime_t * node,
k.tx_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX];
k.which = which;
+ k.direction = direction;
if (flags & FLOW_RECORD_L2)
{
@@ -399,9 +417,22 @@ add_to_flow_record_state (vlib_main_t * vm, vlib_node_runtime_t * node,
clib_memcpy_fast (k.dst_mac, eth->dst_address, 6);
k.ethertype = ethertype;
}
+ if (ethertype == ETHERNET_TYPE_VLAN)
+ {
+ /*VLAN TAG*/
+ ethernet_vlan_header_tv_t *ethv =
+ (ethernet_vlan_header_tv_t *) (&(eth->type));
+ /*Q in Q possibility */
+ while (clib_net_to_host_u16 (ethv->type) == ETHERNET_TYPE_VLAN)
+ {
+ ethv++;
+ l3_hdr_offset += sizeof (ethernet_vlan_header_tv_t);
+ }
+ k.ethertype = ethertype = clib_net_to_host_u16 ((ethv)->type);
+ }
if (collect_ip6 && ethertype == ETHERNET_TYPE_IP6)
{
- ip6 = (ip6_header_t *) (eth + 1);
+ ip6 = (ip6_header_t *) (b->data + l3_hdr_offset);
if (flags & FLOW_RECORD_L3)
{
k.src_address.as_u64[0] = ip6->src_address.as_u64[0];
@@ -420,7 +451,7 @@ add_to_flow_record_state (vlib_main_t * vm, vlib_node_runtime_t * node,
}
if (collect_ip4 && ethertype == ETHERNET_TYPE_IP4)
{
- ip4 = (ip4_header_t *) (eth + 1);
+ ip4 = (ip4_header_t *) (b->data + l3_hdr_offset);
if (flags & FLOW_RECORD_L3)
{
k.src_address.ip4.as_u32 = ip4->src_address.as_u32;
@@ -520,6 +551,7 @@ flowprobe_export_send (vlib_main_t * vm, vlib_buffer_t * b0,
{
flowprobe_main_t *fm = &flowprobe_main;
flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
vlib_frame_t *f;
ip4_ipfix_template_packet_t *tp;
ipfix_set_header_t *s;
@@ -537,19 +569,19 @@ flowprobe_export_send (vlib_main_t * vm, vlib_buffer_t * b0,
flowprobe_get_headersize ())
return;
- u32 i, index = vec_len (frm->streams);
+ u32 i, index = vec_len (exp->streams);
for (i = 0; i < index; i++)
- if (frm->streams[i].domain_id == 1)
+ if (exp->streams[i].domain_id == 1)
{
index = i;
break;
}
- if (i == vec_len (frm->streams))
+ if (i == vec_len (exp->streams))
{
- vec_validate (frm->streams, index);
- frm->streams[index].domain_id = 1;
+ vec_validate (exp->streams, index);
+ exp->streams[index].domain_id = 1;
}
- stream = &frm->streams[index];
+ stream = &exp->streams[index];
tp = vlib_buffer_get_current (b0);
ip = (ip4_header_t *) & tp->ip4;
@@ -561,16 +593,15 @@ flowprobe_export_send (vlib_main_t * vm, vlib_buffer_t * b0,
ip->ttl = 254;
ip->protocol = IP_PROTOCOL_UDP;
ip->flags_and_fragment_offset = 0;
- ip->src_address.as_u32 = frm->src_address.as_u32;
- ip->dst_address.as_u32 = frm->ipfix_collector.as_u32;
+ ip->src_address.as_u32 = exp->src_address.ip.ip4.as_u32;
+ ip->dst_address.as_u32 = exp->ipfix_collector.ip.ip4.as_u32;
udp->src_port = clib_host_to_net_u16 (stream->src_port);
- udp->dst_port = clib_host_to_net_u16 (frm->collector_port);
+ udp->dst_port = clib_host_to_net_u16 (exp->collector_port);
udp->checksum = 0;
/* FIXUP: message header export_time */
- h->export_time = (u32)
- (((f64) frm->unix_time_0) +
- (vlib_time_now (frm->vlib_main) - frm->vlib_time_0));
+ h->export_time =
+ (u32) (((f64) frm->unix_time_0) + (vlib_time_now (vm) - frm->vlib_time_0));
h->export_time = clib_host_to_net_u32 (h->export_time);
h->domain_id = clib_host_to_net_u32 (stream->domain_id);
@@ -590,7 +621,7 @@ flowprobe_export_send (vlib_main_t * vm, vlib_buffer_t * b0,
ip->checksum = ip4_header_checksum (ip);
udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
- if (frm->udp_checksum)
+ if (exp->udp_checksum)
{
/* RFC 7011 section 10.3.2. */
udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
@@ -616,7 +647,7 @@ flowprobe_export_send (vlib_main_t * vm, vlib_buffer_t * b0,
}
vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
- vlib_node_increment_counter (vm, flowprobe_l2_node.index,
+ vlib_node_increment_counter (vm, flowprobe_output_l2_node.index,
FLOWPROBE_ERROR_EXPORTED_PACKETS, 1);
fm->context[which].frames_per_worker[my_cpu_number] = 0;
@@ -629,7 +660,7 @@ static vlib_buffer_t *
flowprobe_get_buffer (vlib_main_t * vm, flowprobe_variant_t which)
{
flowprobe_main_t *fm = &flowprobe_main;
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = pool_elt_at_index (flow_report_main.exporters, 0);
vlib_buffer_t *b0;
u32 bi0;
u32 my_cpu_number = vm->thread_index;
@@ -642,7 +673,7 @@ flowprobe_get_buffer (vlib_main_t * vm, flowprobe_variant_t which)
{
if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
{
- vlib_node_increment_counter (vm, flowprobe_l2_node.index,
+ vlib_node_increment_counter (vm, flowprobe_output_l2_node.index,
FLOWPROBE_ERROR_BUFFER, 1);
return 0;
}
@@ -656,7 +687,7 @@ flowprobe_get_buffer (vlib_main_t * vm, flowprobe_variant_t which)
b0->flags |=
(VLIB_BUFFER_TOTAL_LENGTH_VALID | VNET_BUFFER_F_FLOW_REPORT);
vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = exp->fib_index;
fm->context[which].next_record_offset_per_worker[my_cpu_number] =
b0->current_length;
}
@@ -669,9 +700,10 @@ flowprobe_export_entry (vlib_main_t * vm, flowprobe_entry_t * e)
{
u32 my_cpu_number = vm->thread_index;
flowprobe_main_t *fm = &flowprobe_main;
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = pool_elt_at_index (flow_report_main.exporters, 0);
vlib_buffer_t *b0;
bool collect_ip4 = false, collect_ip6 = false;
+ bool collect_l4 = false;
flowprobe_variant_t which = e->key.which;
flowprobe_record_t flags = fm->context[which].flags;
u16 offset =
@@ -690,6 +722,10 @@ flowprobe_export_entry (vlib_main_t * vm, flowprobe_entry_t * e)
collect_ip4 = which == FLOW_VARIANT_L2_IP4 || which == FLOW_VARIANT_IP4;
collect_ip6 = which == FLOW_VARIANT_L2_IP6 || which == FLOW_VARIANT_IP6;
}
+ if (flags & FLOW_RECORD_L4)
+ {
+ collect_l4 = (which != FLOW_VARIANT_L2);
+ }
offset += flowprobe_common_add (b0, e, offset);
@@ -699,26 +735,27 @@ flowprobe_export_entry (vlib_main_t * vm, flowprobe_entry_t * e)
offset += flowprobe_l3_ip6_add (b0, e, offset);
if (collect_ip4)
offset += flowprobe_l3_ip4_add (b0, e, offset);
- if (flags & FLOW_RECORD_L4)
+ if (collect_l4)
offset += flowprobe_l4_add (b0, e, offset);
/* Reset per flow-export counters */
e->packetcount = 0;
e->octetcount = 0;
e->last_exported = vlib_time_now (vm);
+ e->prot.tcp.flags = 0;
b0->current_length = offset;
fm->context[which].next_record_offset_per_worker[my_cpu_number] = offset;
/* Time to flush the buffer? */
- if (offset + fm->template_size[flags] > frm->path_mtu)
+ if (offset + fm->template_size[flags] > exp->path_mtu)
flowprobe_export_send (vm, b0, which);
}
uword
-flowprobe_node_fn (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame,
- flowprobe_variant_t which)
+flowprobe_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, flowprobe_variant_t which,
+ flowprobe_direction_t direction)
{
u32 n_left_from, *from, *to_next;
flowprobe_next_t next_index;
@@ -778,20 +815,22 @@ flowprobe_node_fn (vlib_main_t * vm,
u16 ethertype0 = clib_net_to_host_u16 (eh0->type);
if (PREDICT_TRUE ((b0->flags & VNET_BUFFER_F_FLOW_REPORT) == 0))
- add_to_flow_record_state (vm, node, fm, b0, timestamp, len0,
- flowprobe_get_variant
- (which, fm->context[which].flags,
- ethertype0), 0);
+ add_to_flow_record_state (
+ vm, node, fm, b0, timestamp, len0,
+ flowprobe_get_variant (which, fm->context[which].flags,
+ ethertype0),
+ direction, 0);
len1 = vlib_buffer_length_in_chain (vm, b1);
ethernet_header_t *eh1 = vlib_buffer_get_current (b1);
u16 ethertype1 = clib_net_to_host_u16 (eh1->type);
if (PREDICT_TRUE ((b1->flags & VNET_BUFFER_F_FLOW_REPORT) == 0))
- add_to_flow_record_state (vm, node, fm, b1, timestamp, len1,
- flowprobe_get_variant
- (which, fm->context[which].flags,
- ethertype1), 0);
+ add_to_flow_record_state (
+ vm, node, fm, b1, timestamp, len1,
+ flowprobe_get_variant (which, fm->context[which].flags,
+ ethertype1),
+ direction, 0);
/* verify speculative enqueues, maybe switch current next frame */
vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
@@ -829,10 +868,11 @@ flowprobe_node_fn (vlib_main_t * vm,
&& (b0->flags & VLIB_BUFFER_IS_TRACED)))
t = vlib_add_trace (vm, node, b0, sizeof (*t));
- add_to_flow_record_state (vm, node, fm, b0, timestamp, len0,
- flowprobe_get_variant
- (which, fm->context[which].flags,
- ethertype0), t);
+ add_to_flow_record_state (
+ vm, node, fm, b0, timestamp, len0,
+ flowprobe_get_variant (which, fm->context[which].flags,
+ ethertype0),
+ direction, t);
}
/* verify speculative enqueue, maybe switch current next frame */
@@ -847,24 +887,51 @@ flowprobe_node_fn (vlib_main_t * vm,
}
static uword
-flowprobe_ip4_node_fn (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
+flowprobe_input_ip4_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_IP4,
+ FLOW_DIRECTION_RX);
+}
+
+static uword
+flowprobe_input_ip6_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_IP6,
+ FLOW_DIRECTION_RX);
+}
+
+static uword
+flowprobe_input_l2_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_L2,
+ FLOW_DIRECTION_RX);
+}
+
+static uword
+flowprobe_output_ip4_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
{
- return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_IP4);
+ return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_IP4,
+ FLOW_DIRECTION_TX);
}
static uword
-flowprobe_ip6_node_fn (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
+flowprobe_output_ip6_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
{
- return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_IP6);
+ return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_IP6,
+ FLOW_DIRECTION_TX);
}
static uword
-flowprobe_l2_node_fn (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
+flowprobe_output_l2_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
{
- return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_L2);
+ return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_L2,
+ FLOW_DIRECTION_TX);
}
static inline void
@@ -879,25 +946,63 @@ flush_record (flowprobe_variant_t which)
void
flowprobe_flush_callback_ip4 (void)
{
+ vlib_main_t *worker_vm;
+ u32 i;
+
+ /* Flush for each worker thread */
+ for (i = 1; i < vlib_get_n_threads (); i++)
+ {
+ worker_vm = vlib_get_main_by_index (i);
+ if (worker_vm)
+ vlib_node_set_interrupt_pending (worker_vm,
+ flowprobe_flush_ip4_node.index);
+ }
+
+ /* Flush for the main thread */
flush_record (FLOW_VARIANT_IP4);
}
void
flowprobe_flush_callback_ip6 (void)
{
+ vlib_main_t *worker_vm;
+ u32 i;
+
+ /* Flush for each worker thread */
+ for (i = 1; i < vlib_get_n_threads (); i++)
+ {
+ worker_vm = vlib_get_main_by_index (i);
+ if (worker_vm)
+ vlib_node_set_interrupt_pending (worker_vm,
+ flowprobe_flush_ip6_node.index);
+ }
+
+ /* Flush for the main thread */
flush_record (FLOW_VARIANT_IP6);
}
void
flowprobe_flush_callback_l2 (void)
{
+ vlib_main_t *worker_vm;
+ u32 i;
+
+ /* Flush for each worker thread */
+ for (i = 1; i < vlib_get_n_threads (); i++)
+ {
+ worker_vm = vlib_get_main_by_index (i);
+ if (worker_vm)
+ vlib_node_set_interrupt_pending (worker_vm,
+ flowprobe_flush_l2_node.index);
+ }
+
+ /* Flush for the main thread */
flush_record (FLOW_VARIANT_L2);
flush_record (FLOW_VARIANT_L2_IP4);
flush_record (FLOW_VARIANT_L2_IP6);
}
-
-static void
+void
flowprobe_delete_by_index (u32 my_cpu_number, u32 poolindex)
{
flowprobe_main_t *fm = &flowprobe_main;
@@ -922,14 +1027,15 @@ flowprobe_walker_process (vlib_main_t * vm,
vlib_node_runtime_t * rt, vlib_frame_t * f)
{
flowprobe_main_t *fm = &flowprobe_main;
- flow_report_main_t *frm = &flow_report_main;
flowprobe_entry_t *e;
+ ipfix_exporter_t *exp = pool_elt_at_index (flow_report_main.exporters, 0);
/*
* $$$$ Remove this check from here and track FRM status and disable
* this process if required.
*/
- if (frm->ipfix_collector.as_u32 == 0 || frm->src_address.as_u32 == 0)
+ if (ip_address_is_zero (&exp->ipfix_collector) ||
+ ip_address_is_zero (&exp->src_address))
{
fm->disabled = true;
return 0;
@@ -996,36 +1102,94 @@ flowprobe_walker_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (flowprobe_ip4_node) = {
- .function = flowprobe_ip4_node_fn,
- .name = "flowprobe-ip4",
+static uword
+flowprobe_flush_ip4 (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ flush_record (FLOW_VARIANT_IP4);
+
+ return 0;
+}
+
+static uword
+flowprobe_flush_ip6 (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ flush_record (FLOW_VARIANT_IP6);
+
+ return 0;
+}
+
+static uword
+flowprobe_flush_l2 (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ flush_record (FLOW_VARIANT_L2);
+ flush_record (FLOW_VARIANT_L2_IP4);
+ flush_record (FLOW_VARIANT_L2_IP6);
+
+ return 0;
+}
+
+VLIB_REGISTER_NODE (flowprobe_input_ip4_node) = {
+ .function = flowprobe_input_ip4_node_fn,
+ .name = "flowprobe-input-ip4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_flowprobe_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (flowprobe_error_strings),
+ .error_strings = flowprobe_error_strings,
+ .n_next_nodes = FLOWPROBE_N_NEXT,
+ .next_nodes = FLOWPROBE_NEXT_NODES,
+};
+VLIB_REGISTER_NODE (flowprobe_input_ip6_node) = {
+ .function = flowprobe_input_ip6_node_fn,
+ .name = "flowprobe-input-ip6",
.vector_size = sizeof (u32),
.format_trace = format_flowprobe_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(flowprobe_error_strings),
+ .n_errors = ARRAY_LEN (flowprobe_error_strings),
.error_strings = flowprobe_error_strings,
.n_next_nodes = FLOWPROBE_N_NEXT,
.next_nodes = FLOWPROBE_NEXT_NODES,
};
-VLIB_REGISTER_NODE (flowprobe_ip6_node) = {
- .function = flowprobe_ip6_node_fn,
- .name = "flowprobe-ip6",
+VLIB_REGISTER_NODE (flowprobe_input_l2_node) = {
+ .function = flowprobe_input_l2_node_fn,
+ .name = "flowprobe-input-l2",
.vector_size = sizeof (u32),
.format_trace = format_flowprobe_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(flowprobe_error_strings),
+ .n_errors = ARRAY_LEN (flowprobe_error_strings),
.error_strings = flowprobe_error_strings,
.n_next_nodes = FLOWPROBE_N_NEXT,
.next_nodes = FLOWPROBE_NEXT_NODES,
};
-VLIB_REGISTER_NODE (flowprobe_l2_node) = {
- .function = flowprobe_l2_node_fn,
- .name = "flowprobe-l2",
+VLIB_REGISTER_NODE (flowprobe_output_ip4_node) = {
+ .function = flowprobe_output_ip4_node_fn,
+ .name = "flowprobe-output-ip4",
.vector_size = sizeof (u32),
.format_trace = format_flowprobe_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(flowprobe_error_strings),
+ .n_errors = ARRAY_LEN (flowprobe_error_strings),
+ .error_strings = flowprobe_error_strings,
+ .n_next_nodes = FLOWPROBE_N_NEXT,
+ .next_nodes = FLOWPROBE_NEXT_NODES,
+};
+VLIB_REGISTER_NODE (flowprobe_output_ip6_node) = {
+ .function = flowprobe_output_ip6_node_fn,
+ .name = "flowprobe-output-ip6",
+ .vector_size = sizeof (u32),
+ .format_trace = format_flowprobe_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (flowprobe_error_strings),
+ .error_strings = flowprobe_error_strings,
+ .n_next_nodes = FLOWPROBE_N_NEXT,
+ .next_nodes = FLOWPROBE_NEXT_NODES,
+};
+VLIB_REGISTER_NODE (flowprobe_output_l2_node) = {
+ .function = flowprobe_output_l2_node_fn,
+ .name = "flowprobe-output-l2",
+ .vector_size = sizeof (u32),
+ .format_trace = format_flowprobe_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (flowprobe_error_strings),
.error_strings = flowprobe_error_strings,
.n_next_nodes = FLOWPROBE_N_NEXT,
.next_nodes = FLOWPROBE_NEXT_NODES,
@@ -1036,7 +1200,24 @@ VLIB_REGISTER_NODE (flowprobe_walker_node) = {
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_INTERRUPT,
};
-/* *INDENT-ON* */
+VLIB_REGISTER_NODE (flowprobe_flush_ip4_node) = {
+ .function = flowprobe_flush_ip4,
+ .name = "flowprobe-flush-ip4",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+};
+VLIB_REGISTER_NODE (flowprobe_flush_ip6_node) = {
+ .function = flowprobe_flush_ip6,
+ .name = "flowprobe-flush-ip6",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+};
+VLIB_REGISTER_NODE (flowprobe_flush_l2_node) = {
+ .function = flowprobe_flush_l2,
+ .name = "flowprobe-flush-l2",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/gbp/gbp.api b/src/plugins/gbp/gbp.api
deleted file mode 100644
index 525e70536bd..00000000000
--- a/src/plugins/gbp/gbp.api
+++ /dev/null
@@ -1,470 +0,0 @@
-/* Hey Emacs use -*- mode: C -*- */
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-option version = "2.0.0";
-
-import "vnet/ip/ip_types.api";
-import "vnet/ethernet/ethernet_types.api";
-import "vnet/interface_types.api";
-
-enum gbp_bridge_domain_flags
-{
- GBP_BD_API_FLAG_NONE = 0,
- GBP_BD_API_FLAG_DO_NOT_LEARN = 1,
- GBP_BD_API_FLAG_UU_FWD_DROP = 2,
- GBP_BD_API_FLAG_MCAST_DROP = 4,
- GBP_BD_API_FLAG_UCAST_ARP = 8,
-};
-
-typedef gbp_bridge_domain
-{
- u32 bd_id;
- u32 rd_id;
- vl_api_gbp_bridge_domain_flags_t flags;
- vl_api_interface_index_t bvi_sw_if_index;
- vl_api_interface_index_t uu_fwd_sw_if_index;
- vl_api_interface_index_t bm_flood_sw_if_index;
-};
-
- autoreply define gbp_bridge_domain_add
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- vl_api_gbp_bridge_domain_t bd;
-};
- autoreply define gbp_bridge_domain_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- u32 bd_id;
-};
-autoreply define gbp_bridge_domain_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-define gbp_bridge_domain_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_bridge_domain_t bd;
-};
-
-typedef u16 gbp_scope;
-
-typedef gbp_route_domain
-{
- u32 rd_id;
- u32 ip4_table_id;
- u32 ip6_table_id;
- vl_api_interface_index_t ip4_uu_sw_if_index;
- vl_api_interface_index_t ip6_uu_sw_if_index;
- vl_api_gbp_scope_t scope;
-};
-
- autoreply define gbp_route_domain_add
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- vl_api_gbp_route_domain_t rd;
-};
- autoreply define gbp_route_domain_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- u32 rd_id;
-};
-autoreply define gbp_route_domain_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-define gbp_route_domain_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_route_domain_t rd;
-};
-
-/** \brief Endpoint
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
-*/
-
-enum gbp_endpoint_flags
-{
- GBP_API_ENDPOINT_FLAG_NONE = 0,
- GBP_API_ENDPOINT_FLAG_BOUNCE = 0x1,
- GBP_API_ENDPOINT_FLAG_REMOTE = 0x2,
- GBP_API_ENDPOINT_FLAG_LEARNT = 0x4,
- GBP_API_ENDPOINT_FLAG_EXTERNAL = 0x8,
-};
-
-typedef gbp_endpoint_tun
-{
- vl_api_address_t src;
- vl_api_address_t dst;
-};
-
-typedef gbp_endpoint
-{
- vl_api_interface_index_t sw_if_index;
- u16 sclass;
- vl_api_gbp_endpoint_flags_t flags;
- vl_api_mac_address_t mac;
- vl_api_gbp_endpoint_tun_t tun;
- u8 n_ips;
- vl_api_address_t ips[n_ips];
-};
-
- define gbp_endpoint_add
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- vl_api_gbp_endpoint_t endpoint;
-};
-
-define gbp_endpoint_add_reply
-{
- option status="in_progress";
- u32 context;
- i32 retval;
- u32 handle;
-};
-
- autoreply define gbp_endpoint_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- u32 handle;
-};
-
-define gbp_endpoint_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-
-define gbp_endpoint_details
-{
- option status="in_progress";
- u32 context;
- f64 age;
- u32 handle;
- vl_api_gbp_endpoint_t endpoint;
-};
-
-typedef gbp_endpoint_retention
-{
- u32 remote_ep_timeout;
-};
-
-typedef gbp_endpoint_group
-{
- u32 vnid;
- u16 sclass;
- u32 bd_id;
- u32 rd_id;
- vl_api_interface_index_t uplink_sw_if_index;
- vl_api_gbp_endpoint_retention_t retention;
-};
-
- autoreply define gbp_endpoint_group_add
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- vl_api_gbp_endpoint_group_t epg;
-};
- autoreply define gbp_endpoint_group_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- u16 sclass;
-};
-
-define gbp_endpoint_group_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-
-define gbp_endpoint_group_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_endpoint_group_t epg;
-};
-
-typedef gbp_recirc
-{
- vl_api_interface_index_t sw_if_index;
- u16 sclass;
- bool is_ext;
-};
-
- autoreply define gbp_recirc_add_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- bool is_add;
- vl_api_gbp_recirc_t recirc;
-};
-
-define gbp_recirc_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-
-define gbp_recirc_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_recirc_t recirc;
-};
-
-enum gbp_subnet_type
-{
- GBP_API_SUBNET_TRANSPORT,
- GBP_API_SUBNET_STITCHED_INTERNAL,
- GBP_API_SUBNET_STITCHED_EXTERNAL,
- GBP_API_SUBNET_L3_OUT,
- GBP_API_SUBNET_ANON_L3_OUT,
-};
-
-typedef gbp_subnet
-{
- u32 rd_id;
- vl_api_interface_index_t sw_if_index [default= 0xffffffff];
- u16 sclass [default=0xffffffff];
- vl_api_gbp_subnet_type_t type;
- vl_api_prefix_t prefix;
-};
-
- autoreply define gbp_subnet_add_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- bool is_add;
- vl_api_gbp_subnet_t subnet;
-};
-
-define gbp_subnet_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-
-define gbp_subnet_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_subnet_t subnet;
-};
-
-typedef gbp_next_hop
-{
- vl_api_address_t ip;
- vl_api_mac_address_t mac;
- u32 bd_id;
- u32 rd_id;
-};
-
-enum gbp_hash_mode
-{
- GBP_API_HASH_MODE_SRC_IP,
- GBP_API_HASH_MODE_DST_IP,
- GBP_API_HASH_MODE_SYMMETRIC,
-};
-
-typedef gbp_next_hop_set
-{
- vl_api_gbp_hash_mode_t hash_mode;
- u8 n_nhs;
- vl_api_gbp_next_hop_t nhs[8];
-};
-
-enum gbp_rule_action
-{
- GBP_API_RULE_PERMIT,
- GBP_API_RULE_DENY,
- GBP_API_RULE_REDIRECT,
-};
-
-typedef gbp_rule
-{
- vl_api_gbp_rule_action_t action;
- vl_api_gbp_next_hop_set_t nh_set;
-};
-
-typedef gbp_contract
-{
- vl_api_gbp_scope_t scope;
- u16 sclass;
- u16 dclass;
- u32 acl_index;
- u8 n_ether_types;
- u16 allowed_ethertypes[16];
- u8 n_rules;
- vl_api_gbp_rule_t rules[n_rules];
-};
-
- define gbp_contract_add_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- bool is_add;
- vl_api_gbp_contract_t contract;
-};
-define gbp_contract_add_del_reply
-{
- option status="in_progress";
- u32 context;
- i32 retval;
- u32 stats_index;
-};
-
-define gbp_contract_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-
-define gbp_contract_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_contract_t contract;
-};
-
-/**
- * @brief Configure a 'base' tunnel from which learned tunnels
- * are permitted to derive
- * A base tunnel consists only of the VNI, any src,dst IP
- * pair is thus allowed.
- */
-enum gbp_vxlan_tunnel_mode
-{
- GBP_VXLAN_TUNNEL_MODE_L2,
- GBP_VXLAN_TUNNEL_MODE_L3,
-};
-
-typedef gbp_vxlan_tunnel
-{
- u32 vni;
- vl_api_gbp_vxlan_tunnel_mode_t mode;
- u32 bd_rd_id;
- vl_api_ip4_address_t src;
-};
-
- define gbp_vxlan_tunnel_add
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- vl_api_gbp_vxlan_tunnel_t tunnel;
-};
-
-define gbp_vxlan_tunnel_add_reply
-{
- option status="in_progress";
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
- autoreply define gbp_vxlan_tunnel_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- u32 vni;
-};
-
-define gbp_vxlan_tunnel_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-
-define gbp_vxlan_tunnel_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_vxlan_tunnel_t tunnel;
-};
-
-enum gbp_ext_itf_flags
-{
- GBP_API_EXT_ITF_F_NONE = 0,
- GBP_API_EXT_ITF_F_ANON = 1,
-};
-
-typedef gbp_ext_itf
-{
- vl_api_interface_index_t sw_if_index;
- u32 bd_id;
- u32 rd_id;
- vl_api_gbp_ext_itf_flags_t flags;
-};
-
- autoreply define gbp_ext_itf_add_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- bool is_add;
- vl_api_gbp_ext_itf_t ext_itf;
-};
-
-define gbp_ext_itf_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-
-define gbp_ext_itf_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_ext_itf_t ext_itf;
-};
-
-/*
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp.h b/src/plugins/gbp/gbp.h
deleted file mode 100644
index 50039b3bdcf..00000000000
--- a/src/plugins/gbp/gbp.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Group Base Policy (GBP) defines:
- * - endpoints: typically a VM or container that is connected to the
- * virtual switch/router (i.e. to VPP)
- * - endpoint-group: (EPG) a collection of endpoints
- * - policy: rules determining which traffic can pass between EPGs a.k.a
- * a 'contract'
- *
- * Here, policy is implemented via an ACL.
- * EPG classification for transit packets is determined by:
- * - source EPG: from the packet's input interface
- * - destination EPG: from the packet's destination IP address.
- *
- */
-
-#ifndef __GBP_H__
-#define __GBP_H__
-
-#include <plugins/acl/exports.h>
-
-#include <plugins/gbp/gbp_types.h>
-#include <plugins/gbp/gbp_endpoint.h>
-#include <plugins/gbp/gbp_endpoint_group.h>
-#include <plugins/gbp/gbp_subnet.h>
-#include <plugins/gbp/gbp_recirc.h>
-
-typedef struct
-{
- u32 gbp_acl_user_id;
- acl_plugin_methods_t acl_plugin;
-} gbp_main_t;
-
-extern gbp_main_t gbp_main;
-
-typedef enum gbp_policy_type_t_
-{
- GBP_POLICY_PORT,
- GBP_POLICY_MAC,
- GBP_POLICY_LPM,
- GBP_N_POLICY
-#define GBP_N_POLICY GBP_N_POLICY
-} gbp_policy_type_t;
-
-/**
- * Grouping of global data for the GBP source EPG classification feature
- */
-typedef struct gbp_policy_main_t_
-{
- /**
- * Next nodes for L2 output features
- */
- u32 l2_output_feat_next[GBP_N_POLICY][32];
-} gbp_policy_main_t;
-
-extern gbp_policy_main_t gbp_policy_main;
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_api.c b/src/plugins/gbp/gbp_api.c
deleted file mode 100644
index ab89172b1af..00000000000
--- a/src/plugins/gbp/gbp_api.c
+++ /dev/null
@@ -1,1154 +0,0 @@
-/*
- *------------------------------------------------------------------
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vnet/plugin/plugin.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vnet/ip/ip_types_api.h>
-#include <vnet/ethernet/ethernet_types_api.h>
-#include <vpp/app/version.h>
-
-#include <gbp/gbp.h>
-#include <gbp/gbp_learn.h>
-#include <gbp/gbp_itf.h>
-#include <gbp/gbp_vxlan.h>
-#include <gbp/gbp_bridge_domain.h>
-#include <gbp/gbp_route_domain.h>
-#include <gbp/gbp_ext_itf.h>
-#include <gbp/gbp_contract.h>
-
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
-
-/* define message IDs */
-#include <gbp/gbp.api_enum.h>
-#include <gbp/gbp.api_types.h>
-#include <vnet/format_fns.h>
-#include <vlibapi/api_helper_macros.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-
-gbp_main_t gbp_main;
-
-static u16 msg_id_base;
-
-#define GBP_MSG_BASE msg_id_base
-
-static gbp_endpoint_flags_t
-gbp_endpoint_flags_decode (vl_api_gbp_endpoint_flags_t v)
-{
- gbp_endpoint_flags_t f = GBP_ENDPOINT_FLAG_NONE;
-
- v = ntohl (v);
-
- if (v & GBP_API_ENDPOINT_FLAG_BOUNCE)
- f |= GBP_ENDPOINT_FLAG_BOUNCE;
- if (v & GBP_API_ENDPOINT_FLAG_REMOTE)
- f |= GBP_ENDPOINT_FLAG_REMOTE;
- if (v & GBP_API_ENDPOINT_FLAG_LEARNT)
- f |= GBP_ENDPOINT_FLAG_LEARNT;
- if (v & GBP_API_ENDPOINT_FLAG_EXTERNAL)
- f |= GBP_ENDPOINT_FLAG_EXTERNAL;
-
- return (f);
-}
-
-static vl_api_gbp_endpoint_flags_t
-gbp_endpoint_flags_encode (gbp_endpoint_flags_t f)
-{
- vl_api_gbp_endpoint_flags_t v = 0;
-
-
- if (f & GBP_ENDPOINT_FLAG_BOUNCE)
- v |= GBP_API_ENDPOINT_FLAG_BOUNCE;
- if (f & GBP_ENDPOINT_FLAG_REMOTE)
- v |= GBP_API_ENDPOINT_FLAG_REMOTE;
- if (f & GBP_ENDPOINT_FLAG_LEARNT)
- v |= GBP_API_ENDPOINT_FLAG_LEARNT;
- if (f & GBP_ENDPOINT_FLAG_EXTERNAL)
- v |= GBP_API_ENDPOINT_FLAG_EXTERNAL;
-
- v = htonl (v);
-
- return (v);
-}
-
-static void
-vl_api_gbp_endpoint_add_t_handler (vl_api_gbp_endpoint_add_t * mp)
-{
- vl_api_gbp_endpoint_add_reply_t *rmp;
- gbp_endpoint_flags_t gef;
- u32 sw_if_index, handle;
- ip46_address_t *ips;
- mac_address_t mac;
- int rv = 0, ii;
-
- handle = INDEX_INVALID;
-
- VALIDATE_SW_IF_INDEX (&(mp->endpoint));
-
- gef = gbp_endpoint_flags_decode (mp->endpoint.flags), ips = NULL;
- sw_if_index = ntohl (mp->endpoint.sw_if_index);
-
- if (mp->endpoint.n_ips)
- {
- vec_validate (ips, mp->endpoint.n_ips - 1);
-
- vec_foreach_index (ii, ips)
- {
- ip_address_decode (&mp->endpoint.ips[ii], &ips[ii]);
- }
- }
- mac_address_decode (mp->endpoint.mac, &mac);
-
- if (GBP_ENDPOINT_FLAG_REMOTE & gef)
- {
- ip46_address_t tun_src, tun_dst;
-
- ip_address_decode (&mp->endpoint.tun.src, &tun_src);
- ip_address_decode (&mp->endpoint.tun.dst, &tun_dst);
-
- rv = gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_CP,
- sw_if_index, ips, &mac,
- INDEX_INVALID, INDEX_INVALID,
- ntohs (mp->endpoint.sclass),
- gef, &tun_src, &tun_dst, &handle);
- }
- else
- {
- rv = gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_CP,
- sw_if_index, ips, &mac,
- INDEX_INVALID, INDEX_INVALID,
- ntohs (mp->endpoint.sclass),
- gef, NULL, NULL, &handle);
- }
- vec_free (ips);
- BAD_SW_IF_INDEX_LABEL;
-
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_GBP_ENDPOINT_ADD_REPLY + GBP_MSG_BASE,
- ({
- rmp->handle = htonl (handle);
- }));
- /* *INDENT-ON* */
-}
-
-static void
-vl_api_gbp_endpoint_del_t_handler (vl_api_gbp_endpoint_del_t * mp)
-{
- vl_api_gbp_endpoint_del_reply_t *rmp;
- int rv = 0;
-
- gbp_endpoint_unlock (GBP_ENDPOINT_SRC_CP, ntohl (mp->handle));
-
- REPLY_MACRO (VL_API_GBP_ENDPOINT_DEL_REPLY + GBP_MSG_BASE);
-}
-
-typedef struct gbp_walk_ctx_t_
-{
- vl_api_registration_t *reg;
- u32 context;
-} gbp_walk_ctx_t;
-
-static walk_rc_t
-gbp_endpoint_send_details (index_t gei, void *args)
-{
- vl_api_gbp_endpoint_details_t *mp;
- gbp_endpoint_loc_t *gel;
- gbp_endpoint_fwd_t *gef;
- gbp_endpoint_t *ge;
- gbp_walk_ctx_t *ctx;
- u8 n_ips, ii;
-
- ctx = args;
- ge = gbp_endpoint_get (gei);
-
- n_ips = vec_len (ge->ge_key.gek_ips);
- mp = vl_msg_api_alloc (sizeof (*mp) + (sizeof (*mp->endpoint.ips) * n_ips));
- if (!mp)
- return 1;
-
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_ENDPOINT_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- gel = &ge->ge_locs[0];
- gef = &ge->ge_fwd;
-
- if (gbp_endpoint_is_remote (ge))
- {
- mp->endpoint.sw_if_index = ntohl (gel->tun.gel_parent_sw_if_index);
- ip_address_encode (&gel->tun.gel_src, IP46_TYPE_ANY,
- &mp->endpoint.tun.src);
- ip_address_encode (&gel->tun.gel_dst, IP46_TYPE_ANY,
- &mp->endpoint.tun.dst);
- }
- else
- {
- mp->endpoint.sw_if_index =
- ntohl (gbp_itf_get_sw_if_index (gef->gef_itf));
- }
- mp->endpoint.sclass = ntohs (ge->ge_fwd.gef_sclass);
- mp->endpoint.n_ips = n_ips;
- mp->endpoint.flags = gbp_endpoint_flags_encode (gef->gef_flags);
- mp->handle = htonl (gei);
- mp->age =
- clib_host_to_net_f64 (vlib_time_now (vlib_get_main ()) -
- ge->ge_last_time);
- mac_address_encode (&ge->ge_key.gek_mac, mp->endpoint.mac);
-
- vec_foreach_index (ii, ge->ge_key.gek_ips)
- {
- ip_address_encode (&ge->ge_key.gek_ips[ii].fp_addr,
- IP46_TYPE_ANY, &mp->endpoint.ips[ii]);
- }
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (WALK_CONTINUE);
-}
-
-static void
-vl_api_gbp_endpoint_dump_t_handler (vl_api_gbp_endpoint_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_endpoint_walk (gbp_endpoint_send_details, &ctx);
-}
-
-static void
-gbp_retention_decode (const vl_api_gbp_endpoint_retention_t * in,
- gbp_endpoint_retention_t * out)
-{
- out->remote_ep_timeout = ntohl (in->remote_ep_timeout);
-}
-
-static void
- vl_api_gbp_endpoint_group_add_t_handler
- (vl_api_gbp_endpoint_group_add_t * mp)
-{
- vl_api_gbp_endpoint_group_add_reply_t *rmp;
- gbp_endpoint_retention_t retention;
- int rv = 0;
-
- gbp_retention_decode (&mp->epg.retention, &retention);
-
- rv = gbp_endpoint_group_add_and_lock (ntohl (mp->epg.vnid),
- ntohs (mp->epg.sclass),
- ntohl (mp->epg.bd_id),
- ntohl (mp->epg.rd_id),
- ntohl (mp->epg.uplink_sw_if_index),
- &retention);
-
- REPLY_MACRO (VL_API_GBP_ENDPOINT_GROUP_ADD_REPLY + GBP_MSG_BASE);
-}
-
-static void
- vl_api_gbp_endpoint_group_del_t_handler
- (vl_api_gbp_endpoint_group_del_t * mp)
-{
- vl_api_gbp_endpoint_group_del_reply_t *rmp;
- int rv = 0;
-
- rv = gbp_endpoint_group_delete (ntohs (mp->sclass));
-
- REPLY_MACRO (VL_API_GBP_ENDPOINT_GROUP_DEL_REPLY + GBP_MSG_BASE);
-}
-
-static gbp_bridge_domain_flags_t
-gbp_bridge_domain_flags_from_api (vl_api_gbp_bridge_domain_flags_t a)
-{
- gbp_bridge_domain_flags_t g;
-
- g = GBP_BD_FLAG_NONE;
- a = clib_net_to_host_u32 (a);
-
- if (a & GBP_BD_API_FLAG_DO_NOT_LEARN)
- g |= GBP_BD_FLAG_DO_NOT_LEARN;
- if (a & GBP_BD_API_FLAG_UU_FWD_DROP)
- g |= GBP_BD_FLAG_UU_FWD_DROP;
- if (a & GBP_BD_API_FLAG_MCAST_DROP)
- g |= GBP_BD_FLAG_MCAST_DROP;
- if (a & GBP_BD_API_FLAG_UCAST_ARP)
- g |= GBP_BD_FLAG_UCAST_ARP;
-
- return (g);
-}
-
-static void
-vl_api_gbp_bridge_domain_add_t_handler (vl_api_gbp_bridge_domain_add_t * mp)
-{
- vl_api_gbp_bridge_domain_add_reply_t *rmp;
- int rv = 0;
-
- rv = gbp_bridge_domain_add_and_lock (ntohl (mp->bd.bd_id),
- ntohl (mp->bd.rd_id),
- gbp_bridge_domain_flags_from_api
- (mp->bd.flags),
- ntohl (mp->bd.bvi_sw_if_index),
- ntohl (mp->bd.uu_fwd_sw_if_index),
- ntohl (mp->bd.bm_flood_sw_if_index));
-
- REPLY_MACRO (VL_API_GBP_BRIDGE_DOMAIN_ADD_REPLY + GBP_MSG_BASE);
-}
-
-static void
-vl_api_gbp_bridge_domain_del_t_handler (vl_api_gbp_bridge_domain_del_t * mp)
-{
- vl_api_gbp_bridge_domain_del_reply_t *rmp;
- int rv = 0;
-
- rv = gbp_bridge_domain_delete (ntohl (mp->bd_id));
-
- REPLY_MACRO (VL_API_GBP_BRIDGE_DOMAIN_DEL_REPLY + GBP_MSG_BASE);
-}
-
-static void
-vl_api_gbp_route_domain_add_t_handler (vl_api_gbp_route_domain_add_t * mp)
-{
- vl_api_gbp_route_domain_add_reply_t *rmp;
- int rv = 0;
-
- rv = gbp_route_domain_add_and_lock (ntohl (mp->rd.rd_id),
- ntohs (mp->rd.scope),
- ntohl (mp->rd.ip4_table_id),
- ntohl (mp->rd.ip6_table_id),
- ntohl (mp->rd.ip4_uu_sw_if_index),
- ntohl (mp->rd.ip6_uu_sw_if_index));
-
- REPLY_MACRO (VL_API_GBP_ROUTE_DOMAIN_ADD_REPLY + GBP_MSG_BASE);
-}
-
-static void
-vl_api_gbp_route_domain_del_t_handler (vl_api_gbp_route_domain_del_t * mp)
-{
- vl_api_gbp_route_domain_del_reply_t *rmp;
- int rv = 0;
-
- rv = gbp_route_domain_delete (ntohl (mp->rd_id));
-
- REPLY_MACRO (VL_API_GBP_ROUTE_DOMAIN_DEL_REPLY + GBP_MSG_BASE);
-}
-
-static int
-gub_subnet_type_from_api (vl_api_gbp_subnet_type_t a, gbp_subnet_type_t * t)
-{
- a = clib_net_to_host_u32 (a);
-
- switch (a)
- {
- case GBP_API_SUBNET_TRANSPORT:
- *t = GBP_SUBNET_TRANSPORT;
- return (0);
- case GBP_API_SUBNET_L3_OUT:
- *t = GBP_SUBNET_L3_OUT;
- return (0);
- case GBP_API_SUBNET_ANON_L3_OUT:
- *t = GBP_SUBNET_ANON_L3_OUT;
- return (0);
- case GBP_API_SUBNET_STITCHED_INTERNAL:
- *t = GBP_SUBNET_STITCHED_INTERNAL;
- return (0);
- case GBP_API_SUBNET_STITCHED_EXTERNAL:
- *t = GBP_SUBNET_STITCHED_EXTERNAL;
- return (0);
- }
-
- return (-1);
-}
-
-static void
-vl_api_gbp_subnet_add_del_t_handler (vl_api_gbp_subnet_add_del_t * mp)
-{
- vl_api_gbp_subnet_add_del_reply_t *rmp;
- gbp_subnet_type_t type;
- fib_prefix_t pfx;
- int rv = 0;
-
- ip_prefix_decode (&mp->subnet.prefix, &pfx);
-
- rv = gub_subnet_type_from_api (mp->subnet.type, &type);
-
- if (0 != rv)
- goto out;
-
- if (mp->is_add)
- rv = gbp_subnet_add (ntohl (mp->subnet.rd_id),
- &pfx, type,
- ntohl (mp->subnet.sw_if_index),
- ntohs (mp->subnet.sclass));
- else
- rv = gbp_subnet_del (ntohl (mp->subnet.rd_id), &pfx);
-
-out:
- REPLY_MACRO (VL_API_GBP_SUBNET_ADD_DEL_REPLY + GBP_MSG_BASE);
-}
-
-static vl_api_gbp_subnet_type_t
-gub_subnet_type_to_api (gbp_subnet_type_t t)
-{
- vl_api_gbp_subnet_type_t a = 0;
-
- switch (t)
- {
- case GBP_SUBNET_TRANSPORT:
- a = GBP_API_SUBNET_TRANSPORT;
- break;
- case GBP_SUBNET_STITCHED_INTERNAL:
- a = GBP_API_SUBNET_STITCHED_INTERNAL;
- break;
- case GBP_SUBNET_STITCHED_EXTERNAL:
- a = GBP_API_SUBNET_STITCHED_EXTERNAL;
- break;
- case GBP_SUBNET_L3_OUT:
- a = GBP_API_SUBNET_L3_OUT;
- break;
- case GBP_SUBNET_ANON_L3_OUT:
- a = GBP_API_SUBNET_ANON_L3_OUT;
- break;
- }
-
- a = clib_host_to_net_u32 (a);
-
- return (a);
-}
-
-static walk_rc_t
-gbp_subnet_send_details (u32 rd_id,
- const fib_prefix_t * pfx,
- gbp_subnet_type_t type,
- u32 sw_if_index, sclass_t sclass, void *args)
-{
- vl_api_gbp_subnet_details_t *mp;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return 1;
-
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_SUBNET_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- mp->subnet.type = gub_subnet_type_to_api (type);
- mp->subnet.sw_if_index = ntohl (sw_if_index);
- mp->subnet.sclass = ntohs (sclass);
- mp->subnet.rd_id = ntohl (rd_id);
- ip_prefix_encode (pfx, &mp->subnet.prefix);
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (WALK_CONTINUE);
-}
-
-static void
-vl_api_gbp_subnet_dump_t_handler (vl_api_gbp_subnet_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_subnet_walk (gbp_subnet_send_details, &ctx);
-}
-
-static int
-gbp_endpoint_group_send_details (gbp_endpoint_group_t * gg, void *args)
-{
- vl_api_gbp_endpoint_group_details_t *mp;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return 1;
-
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_ENDPOINT_GROUP_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- mp->epg.uplink_sw_if_index = ntohl (gg->gg_uplink_sw_if_index);
- mp->epg.vnid = ntohl (gg->gg_vnid);
- mp->epg.sclass = ntohs (gg->gg_sclass);
- mp->epg.bd_id = ntohl (gbp_endpoint_group_get_bd_id (gg));
- mp->epg.rd_id = ntohl (gbp_route_domain_get_rd_id (gg->gg_rd));
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (1);
-}
-
-static void
-vl_api_gbp_endpoint_group_dump_t_handler (vl_api_gbp_endpoint_group_dump_t *
- mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_endpoint_group_walk (gbp_endpoint_group_send_details, &ctx);
-}
-
-static int
-gbp_bridge_domain_send_details (gbp_bridge_domain_t * gb, void *args)
-{
- vl_api_gbp_bridge_domain_details_t *mp;
- gbp_route_domain_t *gr;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return 1;
-
- memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_BRIDGE_DOMAIN_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- gr = gbp_route_domain_get (gb->gb_rdi);
-
- mp->bd.bd_id = ntohl (gb->gb_bd_id);
- mp->bd.rd_id = ntohl (gr->grd_id);
- mp->bd.bvi_sw_if_index = ntohl (gb->gb_bvi_sw_if_index);
- mp->bd.uu_fwd_sw_if_index = ntohl (gb->gb_uu_fwd_sw_if_index);
- mp->bd.bm_flood_sw_if_index =
- ntohl (gbp_itf_get_sw_if_index (gb->gb_bm_flood_itf));
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (1);
-}
-
-static void
-vl_api_gbp_bridge_domain_dump_t_handler (vl_api_gbp_bridge_domain_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_bridge_domain_walk (gbp_bridge_domain_send_details, &ctx);
-}
-
-static int
-gbp_route_domain_send_details (gbp_route_domain_t * grd, void *args)
-{
- vl_api_gbp_route_domain_details_t *mp;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return 1;
-
- memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_ROUTE_DOMAIN_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- mp->rd.rd_id = ntohl (grd->grd_id);
- mp->rd.ip4_uu_sw_if_index =
- ntohl (grd->grd_uu_sw_if_index[FIB_PROTOCOL_IP4]);
- mp->rd.ip6_uu_sw_if_index =
- ntohl (grd->grd_uu_sw_if_index[FIB_PROTOCOL_IP6]);
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (1);
-}
-
-static void
-vl_api_gbp_route_domain_dump_t_handler (vl_api_gbp_route_domain_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_route_domain_walk (gbp_route_domain_send_details, &ctx);
-}
-
-static void
-vl_api_gbp_recirc_add_del_t_handler (vl_api_gbp_recirc_add_del_t * mp)
-{
- vl_api_gbp_recirc_add_del_reply_t *rmp;
- u32 sw_if_index;
- int rv = 0;
-
- sw_if_index = ntohl (mp->recirc.sw_if_index);
- if (!vnet_sw_if_index_is_api_valid (sw_if_index))
- goto bad_sw_if_index;
-
- if (mp->is_add)
- rv = gbp_recirc_add (sw_if_index,
- ntohs (mp->recirc.sclass), mp->recirc.is_ext);
- else
- rv = gbp_recirc_delete (sw_if_index);
-
- BAD_SW_IF_INDEX_LABEL;
-
- REPLY_MACRO (VL_API_GBP_RECIRC_ADD_DEL_REPLY + GBP_MSG_BASE);
-}
-
-static walk_rc_t
-gbp_recirc_send_details (gbp_recirc_t * gr, void *args)
-{
- vl_api_gbp_recirc_details_t *mp;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return (WALK_STOP);
-
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_RECIRC_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- mp->recirc.sclass = ntohs (gr->gr_sclass);
- mp->recirc.sw_if_index = ntohl (gr->gr_sw_if_index);
- mp->recirc.is_ext = gr->gr_is_ext;
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (WALK_CONTINUE);
-}
-
-static void
-vl_api_gbp_recirc_dump_t_handler (vl_api_gbp_recirc_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_recirc_walk (gbp_recirc_send_details, &ctx);
-}
-
-static void
-vl_api_gbp_ext_itf_add_del_t_handler (vl_api_gbp_ext_itf_add_del_t * mp)
-{
- vl_api_gbp_ext_itf_add_del_reply_t *rmp;
- u32 sw_if_index = ~0;
- vl_api_gbp_ext_itf_t *ext_itf;
- int rv = 0;
-
- ext_itf = &mp->ext_itf;
- if (ext_itf)
- sw_if_index = ntohl (ext_itf->sw_if_index);
-
- if (!vnet_sw_if_index_is_api_valid (sw_if_index))
- goto bad_sw_if_index;
-
- if (mp->is_add)
- rv = gbp_ext_itf_add (sw_if_index,
- ntohl (ext_itf->bd_id), ntohl (ext_itf->rd_id),
- ntohl (ext_itf->flags));
- else
- rv = gbp_ext_itf_delete (sw_if_index);
-
- BAD_SW_IF_INDEX_LABEL;
-
- REPLY_MACRO (VL_API_GBP_EXT_ITF_ADD_DEL_REPLY + GBP_MSG_BASE);
-}
-
-static walk_rc_t
-gbp_ext_itf_send_details (gbp_ext_itf_t * gx, void *args)
-{
- vl_api_gbp_ext_itf_details_t *mp;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return (WALK_STOP);
-
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_EXT_ITF_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- mp->ext_itf.flags = ntohl (gx->gx_flags);
- mp->ext_itf.bd_id = ntohl (gbp_bridge_domain_get_bd_id (gx->gx_bd));
- mp->ext_itf.rd_id = ntohl (gbp_route_domain_get_rd_id (gx->gx_rd));
- mp->ext_itf.sw_if_index = ntohl (gbp_itf_get_sw_if_index (gx->gx_itf));
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (WALK_CONTINUE);
-}
-
-static void
-vl_api_gbp_ext_itf_dump_t_handler (vl_api_gbp_ext_itf_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_ext_itf_walk (gbp_ext_itf_send_details, &ctx);
-}
-
-static int
-gbp_contract_rule_action_deocde (vl_api_gbp_rule_action_t in,
- gbp_rule_action_t * out)
-{
- in = clib_net_to_host_u32 (in);
-
- switch (in)
- {
- case GBP_API_RULE_PERMIT:
- *out = GBP_RULE_PERMIT;
- return (0);
- case GBP_API_RULE_DENY:
- *out = GBP_RULE_DENY;
- return (0);
- case GBP_API_RULE_REDIRECT:
- *out = GBP_RULE_REDIRECT;
- return (0);
- }
-
- return (-1);
-}
-
-static int
-gbp_hash_mode_decode (vl_api_gbp_hash_mode_t in, gbp_hash_mode_t * out)
-{
- in = clib_net_to_host_u32 (in);
-
- switch (in)
- {
- case GBP_API_HASH_MODE_SRC_IP:
- *out = GBP_HASH_MODE_SRC_IP;
- return (0);
- case GBP_API_HASH_MODE_DST_IP:
- *out = GBP_HASH_MODE_DST_IP;
- return (0);
- case GBP_API_HASH_MODE_SYMMETRIC:
- *out = GBP_HASH_MODE_SYMMETRIC;
- return (0);
- }
-
- return (-2);
-}
-
-static int
-gbp_next_hop_decode (const vl_api_gbp_next_hop_t * in, index_t * gnhi)
-{
- ip46_address_t ip;
- mac_address_t mac;
- index_t grd, gbd;
-
- gbd = gbp_bridge_domain_find_and_lock (ntohl (in->bd_id));
-
- if (INDEX_INVALID == gbd)
- return (VNET_API_ERROR_BD_NOT_MODIFIABLE);
-
- grd = gbp_route_domain_find_and_lock (ntohl (in->rd_id));
-
- if (INDEX_INVALID == grd)
- return (VNET_API_ERROR_NO_SUCH_FIB);
-
- ip_address_decode (&in->ip, &ip);
- mac_address_decode (in->mac, &mac);
-
- *gnhi = gbp_next_hop_alloc (&ip, grd, &mac, gbd);
-
- return (0);
-}
-
-static int
-gbp_next_hop_set_decode (const vl_api_gbp_next_hop_set_t * in,
- gbp_hash_mode_t * hash_mode, index_t ** out)
-{
-
- index_t *gnhis = NULL;
- int rv;
- u8 ii;
-
- rv = gbp_hash_mode_decode (in->hash_mode, hash_mode);
-
- if (0 != rv)
- return rv;
-
- vec_validate (gnhis, in->n_nhs - 1);
-
- for (ii = 0; ii < in->n_nhs; ii++)
- {
- rv = gbp_next_hop_decode (&in->nhs[ii], &gnhis[ii]);
-
- if (0 != rv)
- {
- vec_free (gnhis);
- break;
- }
- }
-
- *out = gnhis;
- return (rv);
-}
-
-static int
-gbp_contract_rule_decode (const vl_api_gbp_rule_t * in, index_t * gui)
-{
- gbp_hash_mode_t hash_mode;
- gbp_rule_action_t action;
- index_t *nhs = NULL;
- int rv;
-
- rv = gbp_contract_rule_action_deocde (in->action, &action);
-
- if (0 != rv)
- return rv;
-
- if (GBP_RULE_REDIRECT == action)
- {
- rv = gbp_next_hop_set_decode (&in->nh_set, &hash_mode, &nhs);
-
- if (0 != rv)
- return (rv);
- }
- else
- {
- hash_mode = GBP_HASH_MODE_SRC_IP;
- }
-
- *gui = gbp_rule_alloc (action, hash_mode, nhs);
-
- return (rv);
-}
-
-static int
-gbp_contract_rules_decode (u8 n_rules,
- const vl_api_gbp_rule_t * rules, index_t ** out)
-{
- index_t *guis = NULL;
- int rv;
- u8 ii;
-
- if (0 == n_rules)
- {
- *out = NULL;
- return (0);
- }
-
- vec_validate (guis, n_rules - 1);
-
- for (ii = 0; ii < n_rules; ii++)
- {
- rv = gbp_contract_rule_decode (&rules[ii], &guis[ii]);
-
- if (0 != rv)
- {
- index_t *gui;
- vec_foreach (gui, guis) gbp_rule_free (*gui);
- vec_free (guis);
- return (rv);
- }
- }
-
- *out = guis;
- return (rv);
-}
-
-static void
-vl_api_gbp_contract_add_del_t_handler (vl_api_gbp_contract_add_del_t * mp)
-{
- vl_api_gbp_contract_add_del_reply_t *rmp;
- u16 *allowed_ethertypes;
- u32 stats_index = ~0;
- index_t *rules;
- int ii, rv = 0;
- u8 n_et;
-
- if (mp->is_add)
- {
- rv = gbp_contract_rules_decode (mp->contract.n_rules,
- mp->contract.rules, &rules);
- if (0 != rv)
- goto out;
-
- allowed_ethertypes = NULL;
-
- /*
- * allowed ether types
- */
- n_et = mp->contract.n_ether_types;
- vec_validate (allowed_ethertypes, n_et - 1);
-
- for (ii = 0; ii < n_et; ii++)
- {
- /* leave the ether types in network order */
- allowed_ethertypes[ii] = mp->contract.allowed_ethertypes[ii];
- }
-
- rv = gbp_contract_update (ntohs (mp->contract.scope),
- ntohs (mp->contract.sclass),
- ntohs (mp->contract.dclass),
- ntohl (mp->contract.acl_index),
- rules, allowed_ethertypes, &stats_index);
- }
- else
- rv = gbp_contract_delete (ntohs (mp->contract.scope),
- ntohs (mp->contract.sclass),
- ntohs (mp->contract.dclass));
-
-out:
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_GBP_CONTRACT_ADD_DEL_REPLY + GBP_MSG_BASE,
- ({
- rmp->stats_index = htonl (stats_index);
- }));
- /* *INDENT-ON* */
-}
-
-static int
-gbp_contract_send_details (gbp_contract_t * gbpc, void *args)
-{
- vl_api_gbp_contract_details_t *mp;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return 1;
-
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_CONTRACT_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- mp->contract.sclass = ntohs (gbpc->gc_key.gck_src);
- mp->contract.dclass = ntohs (gbpc->gc_key.gck_dst);
- mp->contract.acl_index = ntohl (gbpc->gc_acl_index);
- mp->contract.scope = ntohs (gbpc->gc_key.gck_scope);
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (1);
-}
-
-static void
-vl_api_gbp_contract_dump_t_handler (vl_api_gbp_contract_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_contract_walk (gbp_contract_send_details, &ctx);
-}
-
-static int
-gbp_vxlan_tunnel_mode_2_layer (vl_api_gbp_vxlan_tunnel_mode_t mode,
- gbp_vxlan_tunnel_layer_t * l)
-{
- mode = clib_net_to_host_u32 (mode);
-
- switch (mode)
- {
- case GBP_VXLAN_TUNNEL_MODE_L2:
- *l = GBP_VXLAN_TUN_L2;
- return (0);
- case GBP_VXLAN_TUNNEL_MODE_L3:
- *l = GBP_VXLAN_TUN_L3;
- return (0);
- }
- return (-1);
-}
-
-static void
-vl_api_gbp_vxlan_tunnel_add_t_handler (vl_api_gbp_vxlan_tunnel_add_t * mp)
-{
- vl_api_gbp_vxlan_tunnel_add_reply_t *rmp;
- gbp_vxlan_tunnel_layer_t layer;
- ip4_address_t src;
- u32 sw_if_index;
- int rv = 0;
-
- ip4_address_decode (mp->tunnel.src, &src);
- rv = gbp_vxlan_tunnel_mode_2_layer (mp->tunnel.mode, &layer);
-
- if (0 != rv)
- goto out;
-
- rv = gbp_vxlan_tunnel_add (ntohl (mp->tunnel.vni),
- layer,
- ntohl (mp->tunnel.bd_rd_id), &src, &sw_if_index);
-
-out:
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_GBP_VXLAN_TUNNEL_ADD_REPLY + GBP_MSG_BASE,
- ({
- rmp->sw_if_index = htonl (sw_if_index);
- }));
- /* *INDENT-ON* */
-}
-
-static void
-vl_api_gbp_vxlan_tunnel_del_t_handler (vl_api_gbp_vxlan_tunnel_add_t * mp)
-{
- vl_api_gbp_vxlan_tunnel_del_reply_t *rmp;
- int rv = 0;
-
- rv = gbp_vxlan_tunnel_del (ntohl (mp->tunnel.vni));
-
- REPLY_MACRO (VL_API_GBP_VXLAN_TUNNEL_DEL_REPLY + GBP_MSG_BASE);
-}
-
-static vl_api_gbp_vxlan_tunnel_mode_t
-gbp_vxlan_tunnel_layer_2_mode (gbp_vxlan_tunnel_layer_t layer)
-{
- vl_api_gbp_vxlan_tunnel_mode_t mode = GBP_VXLAN_TUNNEL_MODE_L2;
-
- switch (layer)
- {
- case GBP_VXLAN_TUN_L2:
- mode = GBP_VXLAN_TUNNEL_MODE_L2;
- break;
- case GBP_VXLAN_TUN_L3:
- mode = GBP_VXLAN_TUNNEL_MODE_L3;
- break;
- }
- mode = clib_host_to_net_u32 (mode);
-
- return (mode);
-}
-
-static walk_rc_t
-gbp_vxlan_tunnel_send_details (gbp_vxlan_tunnel_t * gt, void *args)
-{
- vl_api_gbp_vxlan_tunnel_details_t *mp;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return 1;
-
- memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = htons (VL_API_GBP_VXLAN_TUNNEL_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- mp->tunnel.vni = htonl (gt->gt_vni);
- mp->tunnel.mode = gbp_vxlan_tunnel_layer_2_mode (gt->gt_layer);
- mp->tunnel.bd_rd_id = htonl (gt->gt_bd_rd_id);
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (1);
-}
-
-static void
-vl_api_gbp_vxlan_tunnel_dump_t_handler (vl_api_gbp_vxlan_tunnel_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_vxlan_walk (gbp_vxlan_tunnel_send_details, &ctx);
-}
-
-#include <gbp/gbp.api.c>
-static clib_error_t *
-gbp_init (vlib_main_t * vm)
-{
- gbp_main_t *gbpm = &gbp_main;
-
- gbpm->gbp_acl_user_id = ~0;
-
- /* Ask for a correctly-sized block of API message decode slots */
- msg_id_base = setup_message_id_table ();
-
- return (NULL);
-}
-
-VLIB_API_INIT_FUNCTION (gbp_init);
-
-/* *INDENT-OFF* */
-VLIB_PLUGIN_REGISTER () = {
- .version = VPP_BUILD_VER,
- .description = "Group Based Policy (GBP)",
-};
-/* *INDENT-ON* */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_bridge_domain.c b/src/plugins/gbp/gbp_bridge_domain.c
deleted file mode 100644
index 279169abb1d..00000000000
--- a/src/plugins/gbp/gbp_bridge_domain.c
+++ /dev/null
@@ -1,503 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-#include <plugins/gbp/gbp_endpoint.h>
-#include <plugins/gbp/gbp_learn.h>
-#include <plugins/gbp/gbp_itf.h>
-
-#include <vnet/dpo/dvr_dpo.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/feat_bitmap.h>
-#include <vnet/l2/l2_bvi.h>
-#include <vnet/l2/l2_fib.h>
-
-/**
- * Pool of GBP bridge_domains
- */
-gbp_bridge_domain_t *gbp_bridge_domain_pool;
-
-/**
- * DB of bridge_domains
- */
-gbp_bridge_domain_db_t gbp_bridge_domain_db;
-
-/**
- * Map of BD index to contract scope
- */
-gbp_scope_t *gbp_scope_by_bd_index;
-
-/**
- * logger
- */
-vlib_log_class_t gb_logger;
-
-#define GBP_BD_DBG(...) \
- vlib_log_debug (gb_logger, __VA_ARGS__);
-
-index_t
-gbp_bridge_domain_index (const gbp_bridge_domain_t * gbd)
-{
- return (gbd - gbp_bridge_domain_pool);
-}
-
-static void
-gbp_bridge_domain_lock (index_t i)
-{
- gbp_bridge_domain_t *gb;
-
- gb = gbp_bridge_domain_get (i);
- gb->gb_locks++;
-}
-
-u32
-gbp_bridge_domain_get_bd_id (index_t gbdi)
-{
- gbp_bridge_domain_t *gb;
-
- gb = gbp_bridge_domain_get (gbdi);
-
- return (gb->gb_bd_id);
-}
-
-static index_t
-gbp_bridge_domain_find (u32 bd_id)
-{
- uword *p;
-
- p = hash_get (gbp_bridge_domain_db.gbd_by_bd_id, bd_id);
-
- if (NULL != p)
- return p[0];
-
- return (INDEX_INVALID);
-}
-
-index_t
-gbp_bridge_domain_find_and_lock (u32 bd_id)
-{
- uword *p;
-
- p = hash_get (gbp_bridge_domain_db.gbd_by_bd_id, bd_id);
-
- if (NULL != p)
- {
- gbp_bridge_domain_lock (p[0]);
- return p[0];
- }
- return (INDEX_INVALID);
-}
-
-static void
-gbp_bridge_domain_db_add (gbp_bridge_domain_t * gb)
-{
- index_t gbi = gb - gbp_bridge_domain_pool;
-
- hash_set (gbp_bridge_domain_db.gbd_by_bd_id, gb->gb_bd_id, gbi);
- vec_validate_init_empty (gbp_bridge_domain_db.gbd_by_bd_index,
- gb->gb_bd_index, INDEX_INVALID);
- gbp_bridge_domain_db.gbd_by_bd_index[gb->gb_bd_index] = gbi;
-}
-
-static void
-gbp_bridge_domain_db_remove (gbp_bridge_domain_t * gb)
-{
- hash_unset (gbp_bridge_domain_db.gbd_by_bd_id, gb->gb_bd_id);
- gbp_bridge_domain_db.gbd_by_bd_index[gb->gb_bd_index] = INDEX_INVALID;
-}
-
-u8 *
-format_gbp_bridge_domain_flags (u8 * s, va_list * args)
-{
- gbp_bridge_domain_flags_t gf = va_arg (*args, gbp_bridge_domain_flags_t);
-
- if (gf)
- {
- if (gf & GBP_BD_FLAG_DO_NOT_LEARN)
- s = format (s, "do-not-learn ");
- if (gf & GBP_BD_FLAG_UU_FWD_DROP)
- s = format (s, "uu-fwd-drop ");
- if (gf & GBP_BD_FLAG_MCAST_DROP)
- s = format (s, "mcast-drop ");
- if (gf & GBP_BD_FLAG_UCAST_ARP)
- s = format (s, "ucast-arp ");
- }
- else
- {
- s = format (s, "none");
- }
- return (s);
-}
-
-static u8 *
-format_gbp_bridge_domain_ptr (u8 * s, va_list * args)
-{
- gbp_bridge_domain_t *gb = va_arg (*args, gbp_bridge_domain_t *);
- vnet_main_t *vnm = vnet_get_main ();
-
- if (NULL != gb)
- s =
- format (s,
- "[%d] bd:[%d,%d], bvi:%U uu-flood:%U bm-flood:%U flags:%U locks:%d",
- gb - gbp_bridge_domain_pool, gb->gb_bd_id, gb->gb_bd_index,
- format_vnet_sw_if_index_name, vnm, gb->gb_bvi_sw_if_index,
- format_vnet_sw_if_index_name, vnm, gb->gb_uu_fwd_sw_if_index,
- format_gbp_itf_hdl, gb->gb_bm_flood_itf,
- format_gbp_bridge_domain_flags, gb->gb_flags, gb->gb_locks);
- else
- s = format (s, "NULL");
-
- return (s);
-}
-
-u8 *
-format_gbp_bridge_domain (u8 * s, va_list * args)
-{
- index_t gbi = va_arg (*args, index_t);
-
- s =
- format (s, "%U", format_gbp_bridge_domain_ptr,
- gbp_bridge_domain_get (gbi));
-
- return (s);
-}
-
-int
-gbp_bridge_domain_add_and_lock (u32 bd_id,
- u32 rd_id,
- gbp_bridge_domain_flags_t flags,
- u32 bvi_sw_if_index,
- u32 uu_fwd_sw_if_index,
- u32 bm_flood_sw_if_index)
-{
- gbp_bridge_domain_t *gb;
- index_t gbi;
-
- gbi = gbp_bridge_domain_find (bd_id);
-
- if (INDEX_INVALID == gbi)
- {
- gbp_route_domain_t *gr;
- u32 bd_index;
-
- bd_index = bd_find_index (&bd_main, bd_id);
-
- if (~0 == bd_index)
- return (VNET_API_ERROR_BD_NOT_MODIFIABLE);
-
- bd_flags_t bd_flags = L2_NONE;
- if (flags & GBP_BD_FLAG_UU_FWD_DROP)
- bd_flags |= L2_UU_FLOOD;
- if (flags & GBP_BD_FLAG_MCAST_DROP)
- bd_flags |= L2_FLOOD;
-
- pool_get (gbp_bridge_domain_pool, gb);
- memset (gb, 0, sizeof (*gb));
-
- gbi = gb - gbp_bridge_domain_pool;
- gb->gb_bd_id = bd_id;
- gb->gb_bd_index = bd_index;
- gb->gb_uu_fwd_sw_if_index = uu_fwd_sw_if_index;
- gb->gb_bvi_sw_if_index = bvi_sw_if_index;
- gbp_itf_hdl_reset (&gb->gb_bm_flood_itf);
- gb->gb_locks = 1;
- gb->gb_flags = flags;
- gb->gb_rdi = gbp_route_domain_find_and_lock (rd_id);
-
- /*
- * set the scope from the BD's RD's scope
- */
- gr = gbp_route_domain_get (gb->gb_rdi);
- vec_validate (gbp_scope_by_bd_index, gb->gb_bd_index);
- gbp_scope_by_bd_index[gb->gb_bd_index] = gr->grd_scope;
-
- /*
- * Set the BVI and uu-flood interfaces into the BD
- */
- gbp_bridge_domain_itf_add (gbi, gb->gb_bvi_sw_if_index,
- L2_BD_PORT_TYPE_BVI);
-
- if ((!(flags & GBP_BD_FLAG_UU_FWD_DROP) ||
- (flags & GBP_BD_FLAG_UCAST_ARP)) &&
- ~0 != gb->gb_uu_fwd_sw_if_index)
- gbp_bridge_domain_itf_add (gbi, gb->gb_uu_fwd_sw_if_index,
- L2_BD_PORT_TYPE_UU_FWD);
-
- if (!(flags & GBP_BD_FLAG_MCAST_DROP) && ~0 != bm_flood_sw_if_index)
- {
- gb->gb_bm_flood_itf =
- gbp_itf_l2_add_and_lock (bm_flood_sw_if_index, gbi);
- gbp_itf_l2_set_input_feature (gb->gb_bm_flood_itf,
- L2INPUT_FEAT_GBP_LEARN);
- }
-
- /*
- * unset any flag(s) set above
- */
- bd_set_flags (vlib_get_main (), bd_index, bd_flags, 0);
-
- if (flags & GBP_BD_FLAG_UCAST_ARP)
- {
- bd_flags = L2_ARP_UFWD;
- bd_set_flags (vlib_get_main (), bd_index, bd_flags, 1);
- }
-
- /*
- * Add the BVI's MAC to the L2FIB
- */
- l2fib_add_entry (vnet_sw_interface_get_hw_address
- (vnet_get_main (), gb->gb_bvi_sw_if_index),
- gb->gb_bd_index, gb->gb_bvi_sw_if_index,
- (L2FIB_ENTRY_RESULT_FLAG_STATIC |
- L2FIB_ENTRY_RESULT_FLAG_BVI));
-
- gbp_bridge_domain_db_add (gb);
- }
- else
- {
- gb = gbp_bridge_domain_get (gbi);
- gb->gb_locks++;
- }
-
- GBP_BD_DBG ("add: %U", format_gbp_bridge_domain_ptr, gb);
-
- return (0);
-}
-
-void
-gbp_bridge_domain_itf_add (index_t gbdi,
- u32 sw_if_index, l2_bd_port_type_t type)
-{
- gbp_bridge_domain_t *gb;
-
- gb = gbp_bridge_domain_get (gbdi);
-
- set_int_l2_mode (vlib_get_main (), vnet_get_main (), MODE_L2_BRIDGE,
- sw_if_index, gb->gb_bd_index, type, 0, 0);
- /*
- * adding an interface to the bridge enables learning on the
- * interface. Disable learning on the interface by default for gbp
- * interfaces
- */
- l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_LEARN, 0);
-}
-
-void
-gbp_bridge_domain_itf_del (index_t gbdi,
- u32 sw_if_index, l2_bd_port_type_t type)
-{
- gbp_bridge_domain_t *gb;
-
- gb = gbp_bridge_domain_get (gbdi);
-
- set_int_l2_mode (vlib_get_main (), vnet_get_main (), MODE_L3, sw_if_index,
- gb->gb_bd_index, type, 0, 0);
-}
-
-void
-gbp_bridge_domain_unlock (index_t gbdi)
-{
- gbp_bridge_domain_t *gb;
-
- gb = gbp_bridge_domain_get (gbdi);
-
- gb->gb_locks--;
-
- if (0 == gb->gb_locks)
- {
- GBP_BD_DBG ("destroy: %U", format_gbp_bridge_domain_ptr, gb);
-
- l2fib_del_entry (vnet_sw_interface_get_hw_address
- (vnet_get_main (), gb->gb_bvi_sw_if_index),
- gb->gb_bd_index, gb->gb_bvi_sw_if_index);
-
- gbp_bridge_domain_itf_del (gbdi, gb->gb_bvi_sw_if_index,
- L2_BD_PORT_TYPE_BVI);
- if (~0 != gb->gb_uu_fwd_sw_if_index)
- gbp_bridge_domain_itf_del (gbdi, gb->gb_uu_fwd_sw_if_index,
- L2_BD_PORT_TYPE_UU_FWD);
- gbp_itf_unlock (&gb->gb_bm_flood_itf);
-
- gbp_bridge_domain_db_remove (gb);
- gbp_route_domain_unlock (gb->gb_rdi);
-
- pool_put (gbp_bridge_domain_pool, gb);
- }
-}
-
-int
-gbp_bridge_domain_delete (u32 bd_id)
-{
- index_t gbi;
-
- GBP_BD_DBG ("del: %d", bd_id);
- gbi = gbp_bridge_domain_find (bd_id);
-
- if (INDEX_INVALID != gbi)
- {
- GBP_BD_DBG ("del: %U", format_gbp_bridge_domain, gbi);
- gbp_bridge_domain_unlock (gbi);
-
- return (0);
- }
-
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-}
-
-void
-gbp_bridge_domain_walk (gbp_bridge_domain_cb_t cb, void *ctx)
-{
- gbp_bridge_domain_t *gbpe;
-
- /* *INDENT-OFF* */
- pool_foreach (gbpe, gbp_bridge_domain_pool)
- {
- if (!cb(gbpe, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static clib_error_t *
-gbp_bridge_domain_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vnet_main_t *vnm = vnet_get_main ();
- gbp_bridge_domain_flags_t flags;
- u32 bm_flood_sw_if_index = ~0;
- u32 uu_fwd_sw_if_index = ~0;
- u32 bd_id = ~0, rd_id = ~0;
- u32 bvi_sw_if_index = ~0;
- u8 add = 1;
-
- flags = GBP_BD_FLAG_NONE;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "bvi %U", unformat_vnet_sw_interface,
- vnm, &bvi_sw_if_index))
- ;
- else if (unformat (input, "uu-fwd %U", unformat_vnet_sw_interface,
- vnm, &uu_fwd_sw_if_index))
- ;
- else if (unformat (input, "bm-flood %U", unformat_vnet_sw_interface,
- vnm, &bm_flood_sw_if_index))
- ;
- else if (unformat (input, "add"))
- add = 1;
- else if (unformat (input, "del"))
- add = 0;
- else if (unformat (input, "flags %d", &flags))
- ;
- else if (unformat (input, "bd %d", &bd_id))
- ;
- else if (unformat (input, "rd %d", &rd_id))
- ;
- else
- break;
- }
-
- if (~0 == bd_id)
- return clib_error_return (0, "BD-ID must be specified");
- if (~0 == rd_id)
- return clib_error_return (0, "RD-ID must be specified");
-
- if (add)
- {
- if (~0 == bvi_sw_if_index)
- return clib_error_return (0, "interface must be specified");
-
- gbp_bridge_domain_add_and_lock (bd_id, rd_id,
- flags,
- bvi_sw_if_index,
- uu_fwd_sw_if_index,
- bm_flood_sw_if_index);
- }
- else
- gbp_bridge_domain_delete (bd_id);
-
- return (NULL);
-}
-
-/*?
- * Configure a GBP bridge-domain
- *
- * @cliexpar
- * @cliexstart{gbp bridge-domain [del] bd <ID> bvi <interface> [uu-fwd <interface>] [bm-flood <interface>] [flags <flags>]}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_bridge_domain_cli_node, static) = {
- .path = "gbp bridge-domain",
- .short_help = "gbp bridge-domain [del] bd <ID> bvi <interface> [uu-fwd <interface>] [bm-flood <interface>] [flags <flags>]",
- .function = gbp_bridge_domain_cli,
-};
-
-static int
-gbp_bridge_domain_show_one (gbp_bridge_domain_t *gb, void *ctx)
-{
- vlib_main_t *vm;
-
- vm = ctx;
- vlib_cli_output (vm, " %U", format_gbp_bridge_domain_ptr, gb);
-
- return (1);
-}
-
-static clib_error_t *
-gbp_bridge_domain_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vlib_cli_output (vm, "Bridge-Domains:");
- gbp_bridge_domain_walk (gbp_bridge_domain_show_one, vm);
-
- return (NULL);
-}
-
-
-/*?
- * Show Group Based Policy Bridge_Domains and derived information
- *
- * @cliexpar
- * @cliexstart{show gbp bridge_domain}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_bridge_domain_show_node, static) = {
- .path = "show gbp bridge-domain",
- .short_help = "show gbp bridge-domain\n",
- .function = gbp_bridge_domain_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_bridge_domain_init (vlib_main_t * vm)
-{
- gb_logger = vlib_log_register_class ("gbp", "bd");
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_bridge_domain_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_bridge_domain.h b/src/plugins/gbp/gbp_bridge_domain.h
deleted file mode 100644
index 0449240083c..00000000000
--- a/src/plugins/gbp/gbp_bridge_domain.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_BRIDGE_DOMAIN_H__
-#define __GBP_BRIDGE_DOMAIN_H__
-
-#include <plugins/gbp/gbp_types.h>
-#include <plugins/gbp/gbp_itf.h>
-
-#include <vnet/fib/fib_types.h>
-#include <vnet/l2/l2_bd.h>
-
-/**
- * Bridge Domain Flags
- */
-typedef enum gbp_bridge_domain_flags_t_
-{
- GBP_BD_FLAG_NONE = 0,
- GBP_BD_FLAG_DO_NOT_LEARN = (1 << 0),
- GBP_BD_FLAG_UU_FWD_DROP = (1 << 1),
- GBP_BD_FLAG_MCAST_DROP = (1 << 2),
- GBP_BD_FLAG_UCAST_ARP = (1 << 3),
-} gbp_bridge_domain_flags_t;
-
-/**
- * A bridge Domain Representation.
- * This is a standard bridge-domain plus all the attributes it must
- * have to supprt the GBP model.
- */
-typedef struct gbp_bridge_domain_t_
-{
- /**
- * Bridge-domain ID
- */
- u32 gb_bd_id;
- u32 gb_bd_index;
-
- /**
- * Index of the Route-domain this BD is associated with. This is used as the
- * 'scope' of the packets for contract matching.
- */
- u32 gb_rdi;
-
- /**
- * Flags conttrolling behaviour
- */
- gbp_bridge_domain_flags_t gb_flags;
-
- /**
- * The BD's BVI interface (obligatory)
- */
- u32 gb_bvi_sw_if_index;
-
- /**
- * The BD's MAC spine-proxy interface (optional)
- */
- u32 gb_uu_fwd_sw_if_index;
-
- /**
- * The BD's interface to sned Broadcast and multicast packets
- */
- gbp_itf_hdl_t gb_bm_flood_itf;
-
- /**
- * The index of the BD's VNI interface on which packets from
- * unkown endpoints arrive
- */
- u32 gb_vni;
-
- /**
- * locks/references to the BD so it does not get deleted (from the API)
- * whilst it is still being used
- */
- u32 gb_locks;
-} gbp_bridge_domain_t;
-
-extern void gbp_bridge_domain_itf_add (index_t gbdi,
- u32 sw_if_index,
- l2_bd_port_type_t type);
-extern void gbp_bridge_domain_itf_del (index_t gbdi,
- u32 sw_if_index,
- l2_bd_port_type_t type);
-
-extern int gbp_bridge_domain_add_and_lock (u32 bd_id,
- u32 rd_id,
- gbp_bridge_domain_flags_t flags,
- u32 bvi_sw_if_index,
- u32 uu_fwd_sw_if_index,
- u32 bm_flood_sw_if_index);
-
-extern void gbp_bridge_domain_unlock (index_t gbi);
-extern index_t gbp_bridge_domain_find_and_lock (u32 bd_id);
-extern int gbp_bridge_domain_delete (u32 bd_id);
-extern index_t gbp_bridge_domain_index (const gbp_bridge_domain_t *);
-extern u32 gbp_bridge_domain_get_bd_id (index_t gbdi);
-
-typedef int (*gbp_bridge_domain_cb_t) (gbp_bridge_domain_t * gb, void *ctx);
-extern void gbp_bridge_domain_walk (gbp_bridge_domain_cb_t bgpe, void *ctx);
-
-extern u8 *format_gbp_bridge_domain (u8 * s, va_list * args);
-extern u8 *format_gbp_bridge_domain_flags (u8 * s, va_list * args);
-
-/**
- * DB of bridge_domains
- */
-typedef struct gbp_bridge_domain_db_t
-{
- uword *gbd_by_bd_id;
- index_t *gbd_by_bd_index;
-} gbp_bridge_domain_db_t;
-
-extern gbp_bridge_domain_db_t gbp_bridge_domain_db;
-extern gbp_bridge_domain_t *gbp_bridge_domain_pool;
-
-always_inline gbp_bridge_domain_t *
-gbp_bridge_domain_get (index_t i)
-{
- return (pool_elt_at_index (gbp_bridge_domain_pool, i));
-}
-
-always_inline gbp_bridge_domain_t *
-gbp_bridge_domain_get_by_bd_index (u32 bd_index)
-{
- return (gbp_bridge_domain_get
- (gbp_bridge_domain_db.gbd_by_bd_index[bd_index]));
-}
-
-extern gbp_scope_t *gbp_scope_by_bd_index;
-
-always_inline gbp_scope_t
-gbp_bridge_domain_get_scope (u32 bd_index)
-{
- return (gbp_scope_by_bd_index[bd_index]);
-}
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_classify.c b/src/plugins/gbp/gbp_classify.c
deleted file mode 100644
index 255db252871..00000000000
--- a/src/plugins/gbp/gbp_classify.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * gbp.h : Group Based Policy
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_classify.h>
-#include <vnet/l2/l2_input.h>
-
-gbp_src_classify_main_t gbp_src_classify_main;
-
-static clib_error_t *
-gbp_src_classify_init (vlib_main_t * vm)
-{
- gbp_src_classify_main_t *em = &gbp_src_classify_main;
-
- vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "gbp-src-classify");
-
- /* Initialize the feature next-node indexes */
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2INPUT_N_FEAT,
- l2input_get_feat_names (),
- em->l2_input_feat_next[GBP_SRC_CLASSIFY_NULL]);
-
- node = vlib_get_node_by_name (vm, (u8 *) "gbp-null-classify");
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2INPUT_N_FEAT,
- l2input_get_feat_names (),
- em->l2_input_feat_next[GBP_SRC_CLASSIFY_PORT]);
-
- node = vlib_get_node_by_name (vm, (u8 *) "l2-gbp-lpm-classify");
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2INPUT_N_FEAT,
- l2input_get_feat_names (),
- em->l2_input_feat_next[GBP_SRC_CLASSIFY_LPM]);
-
- node = vlib_get_node_by_name (vm, (u8 *) "l2-gbp-lpm-anon-classify");
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2INPUT_N_FEAT,
- l2input_get_feat_names (),
- em->l2_input_feat_next
- [GBP_SRC_CLASSIFY_LPM_ANON]);
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (gbp_src_classify_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_classify.h b/src/plugins/gbp/gbp_classify.h
deleted file mode 100644
index ca7db94a2c0..00000000000
--- a/src/plugins/gbp/gbp_classify.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * gbp.h : Group Based Policy
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_CLASSIFY_H__
-#define __GBP_CLASSIFY_H__
-
-#include <plugins/gbp/gbp.h>
-#include <vnet/ethernet/arp_packet.h>
-
-typedef enum gbp_src_classify_type_t_
-{
- GBP_SRC_CLASSIFY_NULL,
- GBP_SRC_CLASSIFY_PORT,
- GBP_SRC_CLASSIFY_LPM,
- GBP_SRC_CLASSIFY_LPM_ANON,
- GBP_SRC_N_CLASSIFY
-#define GBP_SRC_N_CLASSIFY GBP_SRC_N_CLASSIFY
-} gbp_src_classify_type_t;
-
-/**
- * Grouping of global data for the GBP source EPG classification feature
- */
-typedef struct gbp_src_classify_main_t_
-{
- /**
- * Next nodes for L2 output features
- */
- u32 l2_input_feat_next[GBP_SRC_N_CLASSIFY][32];
-} gbp_src_classify_main_t;
-
-extern gbp_src_classify_main_t gbp_src_classify_main;
-
-enum gbp_classify_get_ip_way
-{
- GBP_CLASSIFY_GET_IP_SRC = 0,
- GBP_CLASSIFY_GET_IP_DST = 1
-};
-
-static_always_inline dpo_proto_t
-gbp_classify_get_ip_address (const ethernet_header_t * eh0,
- const ip4_address_t ** ip4,
- const ip6_address_t ** ip6,
- const enum gbp_classify_get_ip_way way)
-{
- u16 etype = clib_net_to_host_u16 (eh0->type);
- const void *l3h0 = eh0 + 1;
-
- if (ETHERNET_TYPE_VLAN == etype)
- {
- const ethernet_vlan_header_t *vh0 =
- (ethernet_vlan_header_t *) (eh0 + 1);
- etype = clib_net_to_host_u16 (vh0->type);
- l3h0 = vh0 + 1;
- }
-
- switch (etype)
- {
- case ETHERNET_TYPE_IP4:
- *ip4 = &(&((const ip4_header_t *) l3h0)->src_address)[way];
- return DPO_PROTO_IP4;
- case ETHERNET_TYPE_IP6:
- *ip6 = &(&((const ip6_header_t *) l3h0)->src_address)[way];
- return DPO_PROTO_IP6;
- case ETHERNET_TYPE_ARP:
- *ip4 = &((ethernet_arp_header_t *) l3h0)->ip4_over_ethernet[way].ip4;
- return DPO_PROTO_IP4;
- }
-
- return DPO_PROTO_NONE;
-}
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_classify_node.c b/src/plugins/gbp/gbp_classify_node.c
deleted file mode 100644
index a2058a21284..00000000000
--- a/src/plugins/gbp/gbp_classify_node.c
+++ /dev/null
@@ -1,628 +0,0 @@
-/*
- * gbp.h : Group Based Policy
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_classify.h>
-#include <plugins/gbp/gbp_policy_dpo.h>
-#include <plugins/gbp/gbp_ext_itf.h>
-#include <vnet/fib/ip4_fib.h>
-#include <vnet/fib/ip6_fib.h>
-#include <vnet/dpo/load_balance.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/feat_bitmap.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/vxlan-gbp/vxlan_gbp_packet.h>
-#include <vnet/ethernet/arp_packet.h>
-
-/**
- * per-packet trace data
- */
-typedef struct gbp_classify_trace_t_
-{
- /* per-pkt trace data */
- sclass_t sclass;
-} gbp_classify_trace_t;
-
-/*
- * determine the SRC EPG form the input port
- */
-always_inline uword
-gbp_classify_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame,
- gbp_src_classify_type_t type, dpo_proto_t dproto)
-{
- gbp_src_classify_main_t *gscm = &gbp_src_classify_main;
- u32 n_left_from, *from, *to_next;
- u32 next_index;
-
- next_index = 0;
- n_left_from = frame->n_vectors;
- from = vlib_frame_vector_args (frame);
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 next0, bi0, sw_if_index0;
- const gbp_endpoint_t *ge0;
- vlib_buffer_t *b0;
- sclass_t sclass0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- vnet_buffer2 (b0)->gbp.flags = VXLAN_GBP_GPFLAGS_NONE;
-
- if (GBP_SRC_CLASSIFY_NULL == type)
- {
- sclass0 = SCLASS_INVALID;
- next0 =
- vnet_l2_feature_next (b0, gscm->l2_input_feat_next[type],
- L2INPUT_FEAT_GBP_NULL_CLASSIFY);
- }
- else
- {
- if (DPO_PROTO_ETHERNET == dproto)
- {
- const ethernet_header_t *h0;
-
- h0 = vlib_buffer_get_current (b0);
- next0 =
- vnet_l2_feature_next (b0, gscm->l2_input_feat_next[type],
- L2INPUT_FEAT_GBP_SRC_CLASSIFY);
- ge0 = gbp_endpoint_find_mac (h0->src_address,
- vnet_buffer (b0)->l2.bd_index);
- }
- else if (DPO_PROTO_IP4 == dproto)
- {
- const ip4_header_t *h0;
-
- h0 = vlib_buffer_get_current (b0);
-
- ge0 = gbp_endpoint_find_ip4
- (&h0->src_address,
- fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
- sw_if_index0));
-
-
- /*
- * Go straight to looukp, do not pass go, do not collect $200
- */
- next0 = 0;
- }
- else if (DPO_PROTO_IP6 == dproto)
- {
- const ip6_header_t *h0;
-
- h0 = vlib_buffer_get_current (b0);
-
- ge0 = gbp_endpoint_find_ip6
- (&h0->src_address,
- fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6,
- sw_if_index0));
-
-
- /*
- * Go straight to lookup, do not pass go, do not collect $200
- */
- next0 = 0;
- }
- else
- {
- ge0 = NULL;
- next0 = 0;
- ASSERT (0);
- }
-
- if (PREDICT_TRUE (NULL != ge0))
- sclass0 = ge0->ge_fwd.gef_sclass;
- else
- sclass0 = SCLASS_INVALID;
- }
-
- vnet_buffer2 (b0)->gbp.sclass = sclass0;
-
- if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- gbp_classify_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sclass = sclass0;
- }
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (gbp_src_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_classify_inline (vm, node, frame,
- GBP_SRC_CLASSIFY_PORT, DPO_PROTO_ETHERNET));
-}
-
-VLIB_NODE_FN (gbp_null_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_classify_inline (vm, node, frame,
- GBP_SRC_CLASSIFY_NULL, DPO_PROTO_ETHERNET));
-}
-
-VLIB_NODE_FN (gbp_ip4_src_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_classify_inline (vm, node, frame,
- GBP_SRC_CLASSIFY_PORT, DPO_PROTO_IP4));
-}
-
-VLIB_NODE_FN (gbp_ip6_src_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_classify_inline (vm, node, frame,
- GBP_SRC_CLASSIFY_PORT, DPO_PROTO_IP6));
-}
-
-
-/* packet trace format function */
-static u8 *
-format_gbp_classify_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_classify_trace_t *t = va_arg (*args, gbp_classify_trace_t *);
-
- s = format (s, "sclass:%d", t->sclass);
-
- return s;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_null_classify_node) = {
- .name = "gbp-null-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 0,
-};
-
-VLIB_REGISTER_NODE (gbp_src_classify_node) = {
- .name = "gbp-src-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 0,
-};
-
-VLIB_REGISTER_NODE (gbp_ip4_src_classify_node) = {
- .name = "ip4-gbp-src-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 1,
- .next_nodes = {
- [0] = "ip4-lookup"
- },
-};
-
-VLIB_REGISTER_NODE (gbp_ip6_src_classify_node) = {
- .name = "ip6-gbp-src-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 1,
- .next_nodes = {
- [0] = "ip6-lookup"
- },
-};
-
-VNET_FEATURE_INIT (gbp_ip4_src_classify_feat_node, static) =
-{
- .arc_name = "ip4-unicast",
- .node_name = "ip4-gbp-src-classify",
- .runs_before = VNET_FEATURES ("nat44-out2in"),
-};
-VNET_FEATURE_INIT (gbp_ip6_src_classify_feat_node, static) =
-{
- .arc_name = "ip6-unicast",
- .node_name = "ip6-gbp-src-classify",
- .runs_before = VNET_FEATURES ("nat66-out2in"),
-};
-
-/* *INDENT-ON* */
-
-typedef enum gbp_lpm_classify_next_t_
-{
- GPB_LPM_CLASSIFY_DROP,
-} gbp_lpm_classify_next_t;
-
-/**
- * per-packet trace data
- */
-typedef struct gbp_lpm_classify_trace_t_
-{
- sclass_t sclass;
- index_t lbi;
- ip46_address_t src;
-} gbp_lpm_classify_trace_t;
-
-/* packet trace format function */
-static u8 *
-format_gbp_lpm_classify_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_lpm_classify_trace_t *t = va_arg (*args, gbp_lpm_classify_trace_t *);
-
- s = format (s, "sclass:%d lb:%d src:%U",
- t->sclass, t->lbi, format_ip46_address, &t->src, IP46_TYPE_ANY);
-
- return s;
-}
-
-enum gbp_lpm_type
-{
- GBP_LPM_RECIRC,
- GBP_LPM_EPG,
- GBP_LPM_ANON
-};
-
-/*
- * Determine the SRC EPG from a LPM
- */
-always_inline uword
-gbp_lpm_classify_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame,
- const dpo_proto_t dproto,
- const enum gbp_lpm_type type)
-{
- gbp_src_classify_main_t *gscm = &gbp_src_classify_main;
- u32 n_left_from, *from, *to_next;
- u32 next_index;
-
- next_index = 0;
- n_left_from = frame->n_vectors;
- from = vlib_frame_vector_args (frame);
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0, sw_if_index0, fib_index0, lbi0;
- const gbp_endpoint_t *ge0, *ge_lpm0;
- gbp_lpm_classify_next_t next0;
- const ethernet_header_t *eh0;
- const gbp_policy_dpo_t *gpd0;
- const ip4_address_t *ip4_0;
- const ip6_address_t *ip6_0;
- const gbp_recirc_t *gr0;
- vlib_buffer_t *b0;
- sclass_t sclass0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
- ip4_0 = NULL;
- ip6_0 = NULL;
- next0 = GPB_LPM_CLASSIFY_DROP;
-
- lbi0 = ~0;
- eh0 = NULL;
- b0 = vlib_get_buffer (vm, bi0);
-
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- vnet_buffer2 (b0)->gbp.flags = VXLAN_GBP_GPFLAGS_NONE;
-
- if (DPO_PROTO_IP4 == dproto)
- ip4_0 =
- &((ip4_header_t *) vlib_buffer_get_current (b0))->src_address;
- else if (DPO_PROTO_IP6 == dproto)
- ip6_0 =
- &((ip6_header_t *) vlib_buffer_get_current (b0))->src_address;
- else if (DPO_PROTO_ETHERNET == dproto)
- {
- eh0 = vlib_buffer_get_current (b0);
- gbp_classify_get_ip_address (eh0, &ip4_0, &ip6_0,
- GBP_CLASSIFY_GET_IP_SRC);
- }
-
- if (GBP_LPM_RECIRC == type)
- {
- gr0 = gbp_recirc_get (sw_if_index0);
- fib_index0 = gr0->gr_fib_index[dproto];
- ge0 = NULL;
-
- vnet_feature_next (&next0, b0);
- }
- else
- {
- if (NULL == eh0)
- {
- /* packet should be l2 */
- sclass0 = SCLASS_INVALID;
- goto trace;
- }
-
- if (GBP_LPM_ANON == type)
- {
- /*
- * anonymous LPM classification: only honour LPM as no EP
- * were programmed
- */
- gbp_ext_itf_t *gei = gbp_ext_itf_get (sw_if_index0);
- if (ip4_0)
- fib_index0 = gei->gx_fib_index[DPO_PROTO_IP4];
- else if (ip6_0)
- fib_index0 = gei->gx_fib_index[DPO_PROTO_IP6];
- else
- {
- /* not IP so no LPM classify possible */
- sclass0 = SCLASS_INVALID;
- next0 = GPB_LPM_CLASSIFY_DROP;
- goto trace;
- }
- next0 = vnet_l2_feature_next
- (b0, gscm->l2_input_feat_next[GBP_SRC_CLASSIFY_LPM_ANON],
- L2INPUT_FEAT_GBP_LPM_ANON_CLASSIFY);
- }
- else
- {
- /*
- * not an anonymous LPM classification: check it comes from
- * an EP, and use EP RD info
- */
- ge0 = gbp_endpoint_find_mac (eh0->src_address,
- vnet_buffer (b0)->l2.bd_index);
-
- if (NULL == ge0)
- {
- /* packet must have come from an EP's mac */
- sclass0 = SCLASS_INVALID;
- goto trace;
- }
-
- fib_index0 = ge0->ge_fwd.gef_fib_index;
-
- if (~0 == fib_index0)
- {
- sclass0 = SCLASS_INVALID;
- goto trace;
- }
-
- if (ip4_0)
- {
- ge_lpm0 = gbp_endpoint_find_ip4 (ip4_0, fib_index0);
- }
- else if (ip6_0)
- {
- ge_lpm0 = gbp_endpoint_find_ip6 (ip6_0, fib_index0);
- }
- else
- {
- ge_lpm0 = NULL;
- }
-
- next0 = vnet_l2_feature_next
- (b0, gscm->l2_input_feat_next[GBP_SRC_CLASSIFY_LPM],
- L2INPUT_FEAT_GBP_LPM_CLASSIFY);
-
- /*
- * if we found the EP by IP lookup, it must be from the EP
- * not a network behind it
- */
- if (NULL != ge_lpm0)
- {
- if (PREDICT_FALSE (ge0 != ge_lpm0))
- {
- /* an EP spoofing another EP */
- sclass0 = SCLASS_INVALID;
- next0 = GPB_LPM_CLASSIFY_DROP;
- }
- else
- {
- sclass0 = ge0->ge_fwd.gef_sclass;
- }
- goto trace;
- }
- }
- }
-
- gpd0 = gbp_classify_get_gpd (ip4_0, ip6_0, fib_index0);
- if (0 == gpd0)
- {
- /* could not classify => drop */
- sclass0 = SCLASS_INVALID;
- next0 = GPB_LPM_CLASSIFY_DROP;
- goto trace;
- }
-
- sclass0 = gpd0->gpd_sclass;
-
- /* all packets from an external network should not be learned by the
- * reciever. so set the Do-not-learn bit here */
- vnet_buffer2 (b0)->gbp.flags = VXLAN_GBP_GPFLAGS_D;
-
- trace:
- vnet_buffer2 (b0)->gbp.sclass = sclass0;
-
- if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- gbp_lpm_classify_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sclass = sclass0;
- t->lbi = lbi0;
- if (ip4_0)
- t->src.ip4 = *ip4_0;
- if (ip6_0)
- t->src.ip6 = *ip6_0;
- }
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (gbp_ip4_lpm_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_lpm_classify_inline
- (vm, node, frame, DPO_PROTO_IP4, GBP_LPM_RECIRC));
-}
-
-VLIB_NODE_FN (gbp_ip6_lpm_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_lpm_classify_inline
- (vm, node, frame, DPO_PROTO_IP6, GBP_LPM_RECIRC));
-}
-
-VLIB_NODE_FN (gbp_l2_lpm_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_lpm_classify_inline
- (vm, node, frame, DPO_PROTO_ETHERNET, GBP_LPM_EPG));
-}
-
-VLIB_NODE_FN (gbp_l2_lpm_anon_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_lpm_classify_inline
- (vm, node, frame, DPO_PROTO_ETHERNET, GBP_LPM_ANON));
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_ip4_lpm_classify_node) = {
- .name = "ip4-gbp-lpm-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_lpm_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 1,
- .next_nodes = {
- [GPB_LPM_CLASSIFY_DROP] = "ip4-drop"
- },
-};
-
-VLIB_REGISTER_NODE (gbp_ip6_lpm_classify_node) = {
- .name = "ip6-gbp-lpm-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_lpm_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 1,
- .next_nodes = {
- [GPB_LPM_CLASSIFY_DROP] = "ip6-drop"
- },
-};
-
-VLIB_REGISTER_NODE (gbp_l2_lpm_classify_node) = {
- .name = "l2-gbp-lpm-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_lpm_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 1,
- .next_nodes = {
- [GPB_LPM_CLASSIFY_DROP] = "error-drop"
- },
-};
-
-VLIB_REGISTER_NODE (gbp_l2_lpm_anon_classify_node) = {
- .name = "l2-gbp-lpm-anon-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_lpm_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 1,
- .next_nodes = {
- [GPB_LPM_CLASSIFY_DROP] = "error-drop"
- },
-};
-
-VNET_FEATURE_INIT (gbp_ip4_lpm_classify_feat_node, static) =
-{
- .arc_name = "ip4-unicast",
- .node_name = "ip4-gbp-lpm-classify",
- .runs_before = VNET_FEATURES ("nat44-out2in"),
-};
-VNET_FEATURE_INIT (gbp_ip6_lpm_classify_feat_node, static) =
-{
- .arc_name = "ip6-unicast",
- .node_name = "ip6-gbp-lpm-classify",
- .runs_before = VNET_FEATURES ("nat66-out2in"),
-};
-
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_contract.c b/src/plugins/gbp/gbp_contract.c
deleted file mode 100644
index dd433f28a84..00000000000
--- a/src/plugins/gbp/gbp_contract.c
+++ /dev/null
@@ -1,819 +0,0 @@
-/*
- * gbp.h : Group Based Policy
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-#include <plugins/gbp/gbp_policy_dpo.h>
-#include <plugins/gbp/gbp_contract.h>
-
-#include <vnet/dpo/load_balance.h>
-#include <vnet/dpo/drop_dpo.h>
-
-char *gbp_contract_error_strings[] = {
-#define _(sym,string) string,
- foreach_gbp_contract_error
-#undef _
-};
-
-/**
- * Single contract DB instance
- */
-gbp_contract_db_t gbp_contract_db;
-
-gbp_contract_t *gbp_contract_pool;
-
-vlib_log_class_t gc_logger;
-
-fib_node_type_t gbp_next_hop_fib_type;
-
-gbp_rule_t *gbp_rule_pool;
-gbp_next_hop_t *gbp_next_hop_pool;
-
-#define GBP_CONTRACT_DBG(...) \
- vlib_log_notice (gc_logger, __VA_ARGS__);
-
-/* Adjacency packet/byte counters indexed by adjacency index. */
-vlib_combined_counter_main_t gbp_contract_permit_counters = {
- .name = "gbp-contracts-permit",
- .stat_segment_name = "/net/gbp/contract/permit",
-};
-
-vlib_combined_counter_main_t gbp_contract_drop_counters = {
- .name = "gbp-contracts-drop",
- .stat_segment_name = "/net/gbp/contract/drop",
-};
-
-index_t
-gbp_rule_alloc (gbp_rule_action_t action,
- gbp_hash_mode_t hash_mode, index_t * nhs)
-{
- gbp_rule_t *gu;
-
- pool_get_zero (gbp_rule_pool, gu);
-
- gu->gu_hash_mode = hash_mode;
- gu->gu_nhs = nhs;
- gu->gu_action = action;
-
- return (gu - gbp_rule_pool);
-}
-
-void
-gbp_rule_free (index_t gui)
-{
- pool_put_index (gbp_rule_pool, gui);
-}
-
-index_t
-gbp_next_hop_alloc (const ip46_address_t * ip,
- index_t grd, const mac_address_t * mac, index_t gbd)
-{
- fib_protocol_t fproto;
- gbp_next_hop_t *gnh;
-
- pool_get_zero (gbp_next_hop_pool, gnh);
-
- fib_node_init (&gnh->gnh_node, gbp_next_hop_fib_type);
-
- ip46_address_copy (&gnh->gnh_ip, ip);
- mac_address_copy (&gnh->gnh_mac, mac);
-
- gnh->gnh_rd = grd;
- gnh->gnh_bd = gbd;
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto) gnh->gnh_ai[fproto] = INDEX_INVALID;
-
- return (gnh - gbp_next_hop_pool);
-}
-
-static inline gbp_next_hop_t *
-gbp_next_hop_get (index_t gui)
-{
- return (pool_elt_at_index (gbp_next_hop_pool, gui));
-}
-
-static void
-gbp_contract_rules_free (index_t * rules)
-{
- index_t *gui, *gnhi;
-
- vec_foreach (gui, rules)
- {
- gbp_policy_node_t pnode;
- fib_protocol_t fproto;
- gbp_next_hop_t *gnh;
- gbp_rule_t *gu;
-
- gu = gbp_rule_get (*gui);
-
- FOR_EACH_GBP_POLICY_NODE (pnode)
- {
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- dpo_reset (&gu->gu_dpo[pnode][fproto]);
- dpo_reset (&gu->gu_dpo[pnode][fproto]);
- }
- }
-
- vec_foreach (gnhi, gu->gu_nhs)
- {
- fib_protocol_t fproto;
-
- gnh = gbp_next_hop_get (*gnhi);
- gbp_bridge_domain_unlock (gnh->gnh_bd);
- gbp_route_domain_unlock (gnh->gnh_rd);
- gbp_endpoint_child_remove (gnh->gnh_ge, gnh->gnh_sibling);
- gbp_endpoint_unlock (GBP_ENDPOINT_SRC_RR, gnh->gnh_ge);
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- adj_unlock (gnh->gnh_ai[fproto]);
- }
- }
-
- gbp_rule_free (*gui);
- }
- vec_free (rules);
-}
-
-static u8 *
-format_gbp_next_hop (u8 * s, va_list * args)
-{
- index_t gnhi = va_arg (*args, index_t);
- gbp_next_hop_t *gnh;
-
- gnh = gbp_next_hop_get (gnhi);
-
- s = format (s, "%U, %U, %U EP:%d",
- format_mac_address_t, &gnh->gnh_mac,
- format_gbp_bridge_domain, gnh->gnh_bd,
- format_ip46_address, &gnh->gnh_ip, IP46_TYPE_ANY, gnh->gnh_ge);
-
- return (s);
-}
-
-u8 *
-format_gbp_rule_action (u8 * s, va_list * args)
-{
- gbp_rule_action_t action = va_arg (*args, gbp_rule_action_t);
-
- switch (action)
- {
-#define _(v,a) case GBP_RULE_##v: return (format (s, "%s", a));
- foreach_gbp_rule_action
-#undef _
- }
-
- return (format (s, "unknown"));
-}
-
-static u8 *
-format_gbp_hash_mode (u8 * s, va_list * args)
-{
- gbp_hash_mode_t hash_mode = va_arg (*args, gbp_hash_mode_t);
-
- switch (hash_mode)
- {
-#define _(v,a) case GBP_HASH_MODE_##v: return (format (s, "%s", a));
- foreach_gbp_hash_mode
-#undef _
- }
-
- return (format (s, "unknown"));
-}
-
-static u8 *
-format_gbp_policy_node (u8 * s, va_list * args)
-{
- gbp_policy_node_t action = va_arg (*args, gbp_policy_node_t);
-
- switch (action)
- {
-#define _(v,a) case GBP_POLICY_NODE_##v: return (format (s, "%s", a));
- foreach_gbp_policy_node
-#undef _
- }
-
- return (format (s, "unknown"));
-}
-
-static u8 *
-format_gbp_rule (u8 * s, va_list * args)
-{
- index_t gui = va_arg (*args, index_t);
- gbp_policy_node_t pnode;
- fib_protocol_t fproto;
- gbp_rule_t *gu;
- index_t *gnhi;
-
- gu = gbp_rule_get (gui);
- s = format (s, "%U", format_gbp_rule_action, gu->gu_action);
-
- switch (gu->gu_action)
- {
- case GBP_RULE_PERMIT:
- case GBP_RULE_DENY:
- return (s);
- case GBP_RULE_REDIRECT:
- s = format (s, ", %U", format_gbp_hash_mode, gu->gu_hash_mode);
- break;
- }
-
- vec_foreach (gnhi, gu->gu_nhs)
- {
- s = format (s, "\n [%U]", format_gbp_next_hop, *gnhi);
- }
-
- FOR_EACH_GBP_POLICY_NODE (pnode)
- {
- s = format (s, "\n policy-%U", format_gbp_policy_node, pnode);
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- if (dpo_id_is_valid (&gu->gu_dpo[pnode][fproto]))
- {
- s =
- format (s, "\n %U", format_dpo_id,
- &gu->gu_dpo[pnode][fproto], 8);
- }
- }
- }
-
- return (s);
-}
-
-static void
-gbp_contract_mk_adj (gbp_next_hop_t * gnh, fib_protocol_t fproto)
-{
- ethernet_header_t *eth;
- gbp_endpoint_t *ge;
- index_t old_ai;
- u8 *rewrite;
-
- old_ai = gnh->gnh_ai[fproto];
- rewrite = NULL;
- vec_validate (rewrite, sizeof (*eth) - 1);
- eth = (ethernet_header_t *) rewrite;
-
- GBP_CONTRACT_DBG ("...mk-adj: %U", format_gbp_next_hop,
- gnh - gbp_next_hop_pool);
-
- ge = gbp_endpoint_get (gnh->gnh_ge);
-
- eth->type = clib_host_to_net_u16 ((fproto == FIB_PROTOCOL_IP4 ?
- ETHERNET_TYPE_IP4 : ETHERNET_TYPE_IP6));
- mac_address_to_bytes (gbp_route_domain_get_local_mac (), eth->src_address);
- mac_address_to_bytes (&gnh->gnh_mac, eth->dst_address);
-
- gnh->gnh_ai[fproto] =
- adj_nbr_add_or_lock_w_rewrite (fproto,
- fib_proto_to_link (fproto),
- &gnh->gnh_ip,
- gbp_itf_get_sw_if_index (ge->
- ge_fwd.gef_itf),
- rewrite);
-
- adj_unlock (old_ai);
-}
-
-static flow_hash_config_t
-gbp_contract_mk_lb_hp (gbp_hash_mode_t gu_hash_mode)
-{
- switch (gu_hash_mode)
- {
- case GBP_HASH_MODE_SRC_IP:
- return IP_FLOW_HASH_SRC_ADDR;
- case GBP_HASH_MODE_DST_IP:
- return IP_FLOW_HASH_DST_ADDR;
- case GBP_HASH_MODE_SYMMETRIC:
- return (IP_FLOW_HASH_SRC_ADDR | IP_FLOW_HASH_DST_ADDR |
- IP_FLOW_HASH_PROTO | IP_FLOW_HASH_SYMMETRIC);
- }
-
- return 0;
-}
-
-static void
-gbp_contract_mk_lb (index_t gui, fib_protocol_t fproto)
-{
- load_balance_path_t *paths = NULL;
- gbp_policy_node_t pnode;
- gbp_next_hop_t *gnh;
- dpo_proto_t dproto;
- gbp_rule_t *gu;
- u32 ii;
-
- u32 policy_nodes[] = {
- [GBP_POLICY_NODE_L2] = gbp_policy_port_node.index,
- [GBP_POLICY_NODE_IP4] = ip4_gbp_policy_dpo_node.index,
- [GBP_POLICY_NODE_IP6] = ip6_gbp_policy_dpo_node.index,
- };
-
- GBP_CONTRACT_DBG ("..mk-lb: %U", format_gbp_rule, gui);
-
- gu = gbp_rule_get (gui);
- dproto = fib_proto_to_dpo (fproto);
-
- if (GBP_RULE_REDIRECT != gu->gu_action)
- return;
-
- vec_foreach_index (ii, gu->gu_nhs)
- {
- gnh = gbp_next_hop_get (gu->gu_nhs[ii]);
-
- gbp_contract_mk_adj (gnh, FIB_PROTOCOL_IP4);
- gbp_contract_mk_adj (gnh, FIB_PROTOCOL_IP6);
- }
-
- FOR_EACH_GBP_POLICY_NODE (pnode)
- {
- vec_validate (paths, vec_len (gu->gu_nhs) - 1);
-
- vec_foreach_index (ii, gu->gu_nhs)
- {
- gnh = gbp_next_hop_get (gu->gu_nhs[ii]);
-
- paths[ii].path_index = FIB_NODE_INDEX_INVALID;
- paths[ii].path_weight = 1;
- dpo_set (&paths[ii].path_dpo, DPO_ADJACENCY,
- dproto, gnh->gnh_ai[fproto]);
- }
-
- if (!dpo_id_is_valid (&gu->gu_dpo[pnode][fproto]))
- {
- dpo_id_t dpo = DPO_INVALID;
-
- dpo_set (&dpo, DPO_LOAD_BALANCE, dproto,
- load_balance_create (vec_len (paths),
- dproto,
- gbp_contract_mk_lb_hp
- (gu->gu_hash_mode)));
- dpo_stack_from_node (policy_nodes[pnode], &gu->gu_dpo[pnode][fproto],
- &dpo);
- dpo_reset (&dpo);
- }
-
- load_balance_multipath_update (&gu->gu_dpo[pnode][fproto],
- paths, LOAD_BALANCE_FLAG_NONE);
- vec_free (paths);
- }
-}
-
-static void
-gbp_contract_mk_one_lb (index_t gui)
-{
- gbp_contract_mk_lb (gui, FIB_PROTOCOL_IP4);
- gbp_contract_mk_lb (gui, FIB_PROTOCOL_IP6);
-}
-
-static int
-gbp_contract_next_hop_resolve (index_t gui, index_t gnhi)
-{
- gbp_bridge_domain_t *gbd;
- gbp_next_hop_t *gnh;
- ip46_address_t *ips;
- int rv;
-
- ips = NULL;
- gnh = gbp_next_hop_get (gnhi);
- gbd = gbp_bridge_domain_get (gnh->gnh_bd);
-
- gnh->gnh_gu = gui;
- vec_add1 (ips, gnh->gnh_ip);
-
- /*
- * source the endpoint this contract needs to forward via.
- * give ofrwarding details via the spine proxy. if this EP is known
- * to us, then since we source here with a low priority, the learned
- * info will take precedenc.
- */
- rv = gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_RR,
- gbd->gb_uu_fwd_sw_if_index,
- ips,
- &gnh->gnh_mac,
- gnh->gnh_bd, gnh->gnh_rd, SCLASS_INVALID,
- GBP_ENDPOINT_FLAG_NONE, NULL, NULL,
- &gnh->gnh_ge);
-
- if (0 == rv)
- {
- gnh->gnh_sibling = gbp_endpoint_child_add (gnh->gnh_ge,
- gbp_next_hop_fib_type, gnhi);
- }
-
- GBP_CONTRACT_DBG ("..resolve: %d: %d: %U", gui, gnhi, format_gbp_next_hop,
- gnhi);
-
- vec_free (ips);
- return (rv);
-}
-
-static void
-gbp_contract_rule_resolve (index_t gui)
-{
- gbp_rule_t *gu;
- index_t *gnhi;
-
- gu = gbp_rule_get (gui);
-
- GBP_CONTRACT_DBG ("..resolve: %U", format_gbp_rule, gui);
-
- vec_foreach (gnhi, gu->gu_nhs)
- {
- gbp_contract_next_hop_resolve (gui, *gnhi);
- }
-}
-
-static void
-gbp_contract_resolve (index_t * guis)
-{
- index_t *gui;
-
- vec_foreach (gui, guis)
- {
- gbp_contract_rule_resolve (*gui);
- }
-}
-
-static void
-gbp_contract_mk_lbs (index_t * guis)
-{
- index_t *gui;
-
- vec_foreach (gui, guis)
- {
- gbp_contract_mk_one_lb (*gui);
- }
-}
-
-int
-gbp_contract_update (gbp_scope_t scope,
- sclass_t sclass,
- sclass_t dclass,
- u32 acl_index,
- index_t * rules,
- u16 * allowed_ethertypes, u32 * stats_index)
-{
- gbp_main_t *gm = &gbp_main;
- u32 *acl_vec = NULL;
- gbp_contract_t *gc;
- index_t gci;
- uword *p;
-
- gbp_contract_key_t key = {
- .gck_scope = scope,
- .gck_src = sclass,
- .gck_dst = dclass,
- };
-
- if (~0 == gm->gbp_acl_user_id)
- {
- acl_plugin_exports_init (&gm->acl_plugin);
- gm->gbp_acl_user_id =
- gm->acl_plugin.register_user_module ("GBP ACL", "src-epg", "dst-epg");
- }
-
- p = hash_get (gbp_contract_db.gc_hash, key.as_u64);
- if (p != NULL)
- {
- gci = p[0];
- gc = gbp_contract_get (gci);
- gbp_contract_rules_free (gc->gc_rules);
- gbp_main.acl_plugin.put_lookup_context_index (gc->gc_lc_index);
- gc->gc_rules = NULL;
- vec_free (gc->gc_allowed_ethertypes);
- }
- else
- {
- pool_get_zero (gbp_contract_pool, gc);
- gc->gc_key = key;
- gci = gc - gbp_contract_pool;
- hash_set (gbp_contract_db.gc_hash, key.as_u64, gci);
-
- vlib_validate_combined_counter (&gbp_contract_drop_counters, gci);
- vlib_zero_combined_counter (&gbp_contract_drop_counters, gci);
- vlib_validate_combined_counter (&gbp_contract_permit_counters, gci);
- vlib_zero_combined_counter (&gbp_contract_permit_counters, gci);
- }
-
- GBP_CONTRACT_DBG ("update: %U", format_gbp_contract, gci);
-
- gc->gc_rules = rules;
- gc->gc_allowed_ethertypes = allowed_ethertypes;
- gbp_contract_resolve (gc->gc_rules);
- gbp_contract_mk_lbs (gc->gc_rules);
-
- gc->gc_acl_index = acl_index;
- gc->gc_lc_index =
- gm->acl_plugin.get_lookup_context_index (gm->gbp_acl_user_id,
- sclass, dclass);
-
- vec_add1 (acl_vec, gc->gc_acl_index);
- gm->acl_plugin.set_acl_vec_for_context (gc->gc_lc_index, acl_vec);
- vec_free (acl_vec);
-
- *stats_index = gci;
-
- return (0);
-}
-
-int
-gbp_contract_delete (gbp_scope_t scope, sclass_t sclass, sclass_t dclass)
-{
- gbp_contract_key_t key = {
- .gck_scope = scope,
- .gck_src = sclass,
- .gck_dst = dclass,
- };
- gbp_contract_t *gc;
- uword *p;
-
- p = hash_get (gbp_contract_db.gc_hash, key.as_u64);
- if (p != NULL)
- {
- gc = gbp_contract_get (p[0]);
-
- gbp_contract_rules_free (gc->gc_rules);
- gbp_main.acl_plugin.put_lookup_context_index (gc->gc_lc_index);
- vec_free (gc->gc_allowed_ethertypes);
-
- hash_unset (gbp_contract_db.gc_hash, key.as_u64);
- pool_put (gbp_contract_pool, gc);
-
- return (0);
- }
-
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-}
-
-void
-gbp_contract_walk (gbp_contract_cb_t cb, void *ctx)
-{
- gbp_contract_t *gc;
-
- /* *INDENT-OFF* */
- pool_foreach (gc, gbp_contract_pool)
- {
- if (!cb(gc, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static clib_error_t *
-gbp_contract_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- sclass_t sclass = SCLASS_INVALID, dclass = SCLASS_INVALID;
- u32 acl_index = ~0, stats_index, scope;
- u8 add = 1;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "add"))
- add = 1;
- else if (unformat (input, "del"))
- add = 0;
- else if (unformat (input, "scope %d", &scope))
- ;
- else if (unformat (input, "sclass %d", &sclass))
- ;
- else if (unformat (input, "dclass %d", &dclass))
- ;
- else if (unformat (input, "acl-index %d", &acl_index))
- ;
- else
- break;
- }
-
- if (SCLASS_INVALID == sclass)
- return clib_error_return (0, "Source EPG-ID must be specified");
- if (SCLASS_INVALID == dclass)
- return clib_error_return (0, "Destination EPG-ID must be specified");
-
- if (add)
- {
- gbp_contract_update (scope, sclass, dclass, acl_index,
- NULL, NULL, &stats_index);
- }
- else
- {
- gbp_contract_delete (scope, sclass, dclass);
- }
-
- return (NULL);
-}
-
-/*?
- * Configure a GBP Contract
- *
- * @cliexpar
- * @cliexstart{set gbp contract [del] src-epg <ID> dst-epg <ID> acl-index <ACL>}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_contract_cli_node, static) =
-{
- .path = "gbp contract",
- .short_help =
- "gbp contract [del] src-epg <ID> dst-epg <ID> acl-index <ACL>",
- .function = gbp_contract_cli,
-};
-/* *INDENT-ON* */
-
-static u8 *
-format_gbp_contract_key (u8 * s, va_list * args)
-{
- gbp_contract_key_t *gck = va_arg (*args, gbp_contract_key_t *);
-
- s = format (s, "{%d,%d,%d}", gck->gck_scope, gck->gck_src, gck->gck_dst);
-
- return (s);
-}
-
-u8 *
-format_gbp_contract (u8 * s, va_list * args)
-{
- index_t gci = va_arg (*args, index_t);
- vlib_counter_t counts;
- gbp_contract_t *gc;
- index_t *gui;
- u16 *et;
-
- gc = gbp_contract_get (gci);
-
- s = format (s, "[%d] %U: acl-index:%d",
- gci, format_gbp_contract_key, &gc->gc_key, gc->gc_acl_index);
-
- s = format (s, "\n rules:");
- vec_foreach (gui, gc->gc_rules)
- {
- s = format (s, "\n %d: %U", *gui, format_gbp_rule, *gui);
- }
-
- s = format (s, "\n allowed-ethertypes:");
- s = format (s, "\n [");
- vec_foreach (et, gc->gc_allowed_ethertypes)
- {
- int host_et = clib_net_to_host_u16 (*et);
- if (0 != host_et)
- s = format (s, "0x%x, ", host_et);
- }
- s = format (s, "]");
-
- s = format (s, "\n stats:");
- vlib_get_combined_counter (&gbp_contract_drop_counters, gci, &counts);
- s = format (s, "\n drop:[%Ld:%Ld]", counts.packets, counts.bytes);
- vlib_get_combined_counter (&gbp_contract_permit_counters, gci, &counts);
- s = format (s, "\n permit:[%Ld:%Ld]", counts.packets, counts.bytes);
-
- s = format (s, "]");
-
- return (s);
-}
-
-static clib_error_t *
-gbp_contract_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- gbp_contract_t *gc;
- u32 src, dst;
- index_t gci;
-
- src = dst = SCLASS_INVALID;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "src %d", &src))
- ;
- else if (unformat (input, "dst %d", &dst))
- ;
- else
- break;
- }
-
- vlib_cli_output (vm, "Contracts:");
-
- /* *INDENT-OFF* */
- pool_foreach (gc, gbp_contract_pool)
- {
- gci = gc - gbp_contract_pool;
-
- if (SCLASS_INVALID != src && SCLASS_INVALID != dst)
- {
- if (gc->gc_key.gck_src == src &&
- gc->gc_key.gck_dst == dst)
- vlib_cli_output (vm, " %U", format_gbp_contract, gci);
- }
- else if (SCLASS_INVALID != src)
- {
- if (gc->gc_key.gck_src == src)
- vlib_cli_output (vm, " %U", format_gbp_contract, gci);
- }
- else if (SCLASS_INVALID != dst)
- {
- if (gc->gc_key.gck_dst == dst)
- vlib_cli_output (vm, " %U", format_gbp_contract, gci);
- }
- else
- vlib_cli_output (vm, " %U", format_gbp_contract, gci);
- }
- /* *INDENT-ON* */
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Policy Contracts
- *
- * @cliexpar
- * @cliexstart{show gbp contract}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_contract_show_node, static) = {
- .path = "show gbp contract",
- .short_help = "show gbp contract [src <SRC>] [dst <DST>]\n",
- .function = gbp_contract_show,
-};
-/* *INDENT-ON* */
-
-static fib_node_t *
-gbp_next_hop_get_node (fib_node_index_t index)
-{
- gbp_next_hop_t *gnh;
-
- gnh = gbp_next_hop_get (index);
-
- return (&gnh->gnh_node);
-}
-
-static void
-gbp_next_hop_last_lock_gone (fib_node_t * node)
-{
- ASSERT (0);
-}
-
-static gbp_next_hop_t *
-gbp_next_hop_from_fib_node (fib_node_t * node)
-{
- ASSERT (gbp_next_hop_fib_type == node->fn_type);
- return ((gbp_next_hop_t *) node);
-}
-
-static fib_node_back_walk_rc_t
-gbp_next_hop_back_walk_notify (fib_node_t * node,
- fib_node_back_walk_ctx_t * ctx)
-{
- gbp_next_hop_t *gnh;
-
- gnh = gbp_next_hop_from_fib_node (node);
-
- gbp_contract_mk_one_lb (gnh->gnh_gu);
-
- return (FIB_NODE_BACK_WALK_CONTINUE);
-}
-
-/*
- * The FIB path's graph node virtual function table
- */
-static const fib_node_vft_t gbp_next_hop_vft = {
- .fnv_get = gbp_next_hop_get_node,
- .fnv_last_lock = gbp_next_hop_last_lock_gone,
- .fnv_back_walk = gbp_next_hop_back_walk_notify,
- // .fnv_mem_show = fib_path_memory_show,
-};
-
-static clib_error_t *
-gbp_contract_init (vlib_main_t * vm)
-{
- gc_logger = vlib_log_register_class ("gbp", "con");
- gbp_next_hop_fib_type = fib_node_register_new_type (&gbp_next_hop_vft);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_contract_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_contract.h b/src/plugins/gbp/gbp_contract.h
deleted file mode 100644
index 1e74db60116..00000000000
--- a/src/plugins/gbp/gbp_contract.h
+++ /dev/null
@@ -1,362 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_CONTRACT_H__
-#define __GBP_CONTRACT_H__
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_types.h>
-
-#define foreach_gbp_contract_error \
- _(ALLOW_NO_SCLASS, "allow-no-sclass") \
- _(ALLOW_INTRA, "allow-intra-sclass") \
- _(ALLOW_A_BIT, "allow-a-bit-set") \
- _(ALLOW_SCLASS_1, "allow-sclass-1") \
- _(ALLOW_CONTRACT, "allow-contract") \
- _(DROP_CONTRACT, "drop-contract") \
- _(DROP_ETHER_TYPE, "drop-ether-type") \
- _(DROP_NO_CONTRACT, "drop-no-contract") \
- _(DROP_NO_DCLASS, "drop-no-dclass") \
- _(DROP_NO_RULE, "drop-no-rule")
-
-typedef enum
-{
-#define _(sym,str) GBP_CONTRACT_ERROR_##sym,
- foreach_gbp_contract_error
-#undef _
- GBP_CONTRACT_N_ERROR,
-#define GBP_CONTRACT_N_ERROR GBP_CONTRACT_N_ERROR
-} gbp_contract_error_t;
-
-extern char *gbp_contract_error_strings[GBP_CONTRACT_N_ERROR];
-
-/**
- * The key for an Contract
- */
-typedef struct gbp_contract_key_t_
-{
- union
- {
- struct
- {
- gbp_scope_t gck_scope;
- /**
- * source and destination EPGs for which the ACL applies
- */
- sclass_t gck_src;
- sclass_t gck_dst;
- };
- u64 as_u64;
- };
-} gbp_contract_key_t;
-
-typedef struct gbp_next_hop_t_
-{
- fib_node_t gnh_node;
- ip46_address_t gnh_ip;
- mac_address_t gnh_mac;
- index_t gnh_gu;
- index_t gnh_bd;
- index_t gnh_rd;
- u32 gnh_ge;
- u32 gnh_sibling;
- index_t gnh_ai[FIB_PROTOCOL_IP_MAX];
-} gbp_next_hop_t;
-
-#define foreach_gbp_hash_mode \
- _(SRC_IP, "src-ip") \
- _(DST_IP, "dst-ip") \
- _(SYMMETRIC, "symmetric")
-
-typedef enum gbp_hash_mode_t_
-{
-#define _(v,s) GBP_HASH_MODE_##v,
- foreach_gbp_hash_mode
-#undef _
-} gbp_hash_mode_t;
-
-#define foreach_gbp_rule_action \
- _(PERMIT, "permit") \
- _(DENY, "deny") \
- _(REDIRECT, "redirect")
-
-typedef enum gbp_rule_action_t_
-{
-#define _(v,s) GBP_RULE_##v,
- foreach_gbp_rule_action
-#undef _
-} gbp_rule_action_t;
-
-#define foreach_gbp_policy_node \
- _(L2, "L2") \
- _(IP4, "ip4") \
- _(IP6, "ip6")
-
-typedef enum gbp_policy_node_t_
-{
-#define _(v,s) GBP_POLICY_NODE_##v,
- foreach_gbp_policy_node
-#undef _
-} gbp_policy_node_t;
-#define GBP_POLICY_N_NODES (GBP_POLICY_NODE_IP6+1)
-
-#define FOR_EACH_GBP_POLICY_NODE(pnode) \
- for (pnode = GBP_POLICY_NODE_L2; pnode < GBP_POLICY_N_NODES; pnode++)
-
-typedef struct gbp_rule_t_
-{
- gbp_rule_action_t gu_action;
- gbp_hash_mode_t gu_hash_mode;
- index_t *gu_nhs;
-
- /**
- * DPO of the load-balance object used to redirect
- */
- dpo_id_t gu_dpo[GBP_POLICY_N_NODES][FIB_PROTOCOL_IP_MAX];
-} gbp_rule_t;
-
-/**
- * A Group Based Policy Contract.
- * Determines the ACL that applies to traffic pass between two endpoint groups
- */
-typedef struct gbp_contract_t_
-{
- /**
- * source and destination EPGs
- */
- gbp_contract_key_t gc_key;
-
- u32 gc_acl_index;
- u32 gc_lc_index;
-
- /**
- * The ACL to apply for packets from the source to the destination EPG
- */
- index_t *gc_rules;
-
- /**
- * An ethertype whitelist
- */
- u16 *gc_allowed_ethertypes;
-} gbp_contract_t;
-
-/**
- * EPG src,dst pair to ACL mapping table, aka contract DB
- */
-typedef struct gbp_contract_db_t_
-{
- /**
- * We can form a u64 key from the pair, so use a simple hash table
- */
- uword *gc_hash;
-} gbp_contract_db_t;
-
-extern int gbp_contract_update (gbp_scope_t scope,
- sclass_t sclass,
- sclass_t dclass,
- u32 acl_index,
- index_t * rules,
- u16 * allowed_ethertypes, u32 * stats_index);
-extern int gbp_contract_delete (gbp_scope_t scope, sclass_t sclass,
- sclass_t dclass);
-
-extern index_t gbp_rule_alloc (gbp_rule_action_t action,
- gbp_hash_mode_t hash_mode, index_t * nhs);
-extern void gbp_rule_free (index_t gui);
-extern index_t gbp_next_hop_alloc (const ip46_address_t * ip,
- index_t grd,
- const mac_address_t * mac, index_t gbd);
-
-typedef int (*gbp_contract_cb_t) (gbp_contract_t * gbpe, void *ctx);
-extern void gbp_contract_walk (gbp_contract_cb_t bgpe, void *ctx);
-
-extern u8 *format_gbp_rule_action (u8 * s, va_list * args);
-extern u8 *format_gbp_contract (u8 * s, va_list * args);
-
-/**
- * DP functions and databases
- */
-extern gbp_contract_db_t gbp_contract_db;
-
-always_inline index_t
-gbp_contract_find (gbp_contract_key_t * key)
-{
- uword *p;
-
- p = hash_get (gbp_contract_db.gc_hash, key->as_u64);
-
- if (NULL != p)
- return (p[0]);
-
- return (INDEX_INVALID);
-}
-
-extern gbp_contract_t *gbp_contract_pool;
-
-always_inline gbp_contract_t *
-gbp_contract_get (index_t gci)
-{
- return (pool_elt_at_index (gbp_contract_pool, gci));
-}
-
-extern gbp_rule_t *gbp_rule_pool;
-
-always_inline gbp_rule_t *
-gbp_rule_get (index_t gui)
-{
- return (pool_elt_at_index (gbp_rule_pool, gui));
-}
-
-extern vlib_combined_counter_main_t gbp_contract_permit_counters;
-extern vlib_combined_counter_main_t gbp_contract_drop_counters;
-
-typedef enum
-{
- GBP_CONTRACT_APPLY_L2,
- GBP_CONTRACT_APPLY_IP4,
- GBP_CONTRACT_APPLY_IP6,
-} gbp_contract_apply_type_t;
-
-static_always_inline gbp_rule_action_t
-gbp_contract_apply (vlib_main_t * vm, gbp_main_t * gm,
- gbp_contract_key_t * key, vlib_buffer_t * b,
- gbp_rule_t ** rule, u32 * intra, u32 * sclass1,
- u32 * acl_match, u32 * rule_match,
- gbp_contract_error_t * err,
- gbp_contract_apply_type_t type)
-{
- fa_5tuple_opaque_t fa_5tuple;
- const gbp_contract_t *contract;
- index_t contract_index;
- u32 acl_pos, trace_bitmap;
- u16 etype;
- u8 ip6, action;
-
- *rule = 0;
- trace_bitmap = 0;
-
- if (key->gck_src == key->gck_dst)
- {
- /* intra-epg allowed */
- (*intra)++;
- *err = GBP_CONTRACT_ERROR_ALLOW_INTRA;
- return GBP_RULE_PERMIT;
- }
-
- if (1 == key->gck_src || 1 == key->gck_dst)
- {
- /* sclass 1 allowed */
- (*sclass1)++;
- *err = GBP_CONTRACT_ERROR_ALLOW_SCLASS_1;
- return GBP_RULE_PERMIT;
- }
-
- /* look for contract */
- contract_index = gbp_contract_find (key);
- if (INDEX_INVALID == contract_index)
- {
- *err = GBP_CONTRACT_ERROR_DROP_NO_CONTRACT;
- return GBP_RULE_DENY;
- }
-
- contract = gbp_contract_get (contract_index);
-
- *err = GBP_CONTRACT_ERROR_DROP_CONTRACT;
-
- switch (type)
- {
- case GBP_CONTRACT_APPLY_IP4:
- ip6 = 0;
- break;
- case GBP_CONTRACT_APPLY_IP6:
- ip6 = 1;
- break;
- case GBP_CONTRACT_APPLY_L2:
- {
- /* check ethertype */
- etype =
- ((u16 *) (vlib_buffer_get_current (b) +
- vnet_buffer (b)->l2.l2_len))[-1];
-
- if (~0 == vec_search (contract->gc_allowed_ethertypes, etype))
- {
- *err = GBP_CONTRACT_ERROR_DROP_ETHER_TYPE;
- goto contract_deny;
- }
-
- switch (clib_net_to_host_u16 (etype))
- {
- case ETHERNET_TYPE_IP4:
- ip6 = 0;
- break;
- case ETHERNET_TYPE_IP6:
- ip6 = 1;
- break;
- default:
- goto contract_deny;
- }
- }
- break;
- }
-
- /* check ACL */
- action = 0;
- acl_plugin_fill_5tuple_inline (gm->acl_plugin.p_acl_main,
- contract->gc_lc_index, b, ip6,
- GBP_CONTRACT_APPLY_L2 != type /* input */ ,
- GBP_CONTRACT_APPLY_L2 == type /* l2_path */ ,
- &fa_5tuple);
- acl_plugin_match_5tuple_inline (gm->acl_plugin.p_acl_main,
- contract->gc_lc_index, &fa_5tuple, ip6,
- &action, &acl_pos, acl_match, rule_match,
- &trace_bitmap);
- if (action <= 0)
- goto contract_deny;
-
- if (PREDICT_FALSE (*rule_match >= vec_len (contract->gc_rules)))
- {
- *err = GBP_CONTRACT_ERROR_DROP_NO_RULE;
- goto contract_deny;
- }
-
- *rule = gbp_rule_get (contract->gc_rules[*rule_match]);
- switch ((*rule)->gu_action)
- {
- case GBP_RULE_PERMIT:
- case GBP_RULE_REDIRECT:
- *err = GBP_CONTRACT_ERROR_ALLOW_CONTRACT;
- vlib_increment_combined_counter (&gbp_contract_permit_counters,
- vm->thread_index, contract_index, 1,
- vlib_buffer_length_in_chain (vm, b));
- return (*rule)->gu_action;
- case GBP_RULE_DENY:
- break;
- }
-
-contract_deny:
- vlib_increment_combined_counter (&gbp_contract_drop_counters,
- vm->thread_index, contract_index, 1,
- vlib_buffer_length_in_chain (vm, b));
- return GBP_RULE_DENY;
-}
-
-#endif /* __GBP_CONTRACT_H__ */
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_endpoint.c b/src/plugins/gbp/gbp_endpoint.c
deleted file mode 100644
index b0cf64ced2d..00000000000
--- a/src/plugins/gbp/gbp_endpoint.c
+++ /dev/null
@@ -1,1597 +0,0 @@
-/*
- * gbp.h : Group Based Policy
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_endpoint.h>
-#include <plugins/gbp/gbp_endpoint_group.h>
-#include <plugins/gbp/gbp_itf.h>
-#include <plugins/gbp/gbp_scanner.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-#include <plugins/gbp/gbp_policy_dpo.h>
-#include <plugins/gbp/gbp_vxlan.h>
-
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/l2_output.h>
-#include <vnet/l2/feat_bitmap.h>
-#include <vnet/l2/l2_fib.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/ip-neighbor/ip_neighbor.h>
-#include <vnet/ip-neighbor/ip4_neighbor.h>
-#include <vnet/ip-neighbor/ip6_neighbor.h>
-#include <vnet/fib/fib_walk.h>
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-
-static const char *gbp_endpoint_attr_names[] = GBP_ENDPOINT_ATTR_NAMES;
-
-/**
- * EP DBs
- */
-gbp_ep_db_t gbp_ep_db;
-
-static fib_source_t gbp_fib_source_hi;
-static fib_source_t gbp_fib_source_low;
-static fib_node_type_t gbp_endpoint_fib_type;
-static vlib_log_class_t gbp_ep_logger;
-
-#define GBP_ENDPOINT_DBG(...) \
- vlib_log_debug (gbp_ep_logger, __VA_ARGS__);
-
-#define GBP_ENDPOINT_INFO(...) \
- vlib_log_notice (gbp_ep_logger, __VA_ARGS__);
-
-/**
- * Pool of GBP endpoints
- */
-gbp_endpoint_t *gbp_endpoint_pool;
-
-/**
- * A count of the number of dynamic entries
- */
-static u32 gbp_n_learnt_endpoints;
-
-#define FOR_EACH_GBP_ENDPOINT_ATTR(_item) \
- for (_item = GBP_ENDPOINT_ATTR_FIRST; \
- _item < GBP_ENDPOINT_ATTR_LAST; \
- _item++)
-
-u8 *
-format_gbp_endpoint_flags (u8 * s, va_list * args)
-{
- gbp_endpoint_attr_t attr;
- gbp_endpoint_flags_t flags = va_arg (*args, gbp_endpoint_flags_t);
-
- FOR_EACH_GBP_ENDPOINT_ATTR (attr)
- {
- if ((1 << attr) & flags)
- {
- s = format (s, "%s,", gbp_endpoint_attr_names[attr]);
- }
- }
-
- return (s);
-}
-
-int
-gbp_endpoint_is_remote (const gbp_endpoint_t * ge)
-{
- return (! !(ge->ge_fwd.gef_flags & GBP_ENDPOINT_FLAG_REMOTE));
-}
-
-int
-gbp_endpoint_is_local (const gbp_endpoint_t * ge)
-{
- return (!(ge->ge_fwd.gef_flags & GBP_ENDPOINT_FLAG_REMOTE));
-}
-
-int
-gbp_endpoint_is_external (const gbp_endpoint_t * ge)
-{
- return (! !(ge->ge_fwd.gef_flags & GBP_ENDPOINT_FLAG_EXTERNAL));
-}
-
-int
-gbp_endpoint_is_learnt (const gbp_endpoint_t * ge)
-{
- if (0 == vec_len (ge->ge_locs))
- return 0;
-
- /* DP is the highest source so if present it will be first */
- return (ge->ge_locs[0].gel_src == GBP_ENDPOINT_SRC_DP);
-}
-
-static void
-gbp_endpoint_extract_key_mac_itf (const clib_bihash_kv_16_8_t * key,
- mac_address_t * mac, u32 * sw_if_index)
-{
- mac_address_from_u64 (mac, key->key[0]);
- *sw_if_index = key->key[1];
-}
-
-static void
-gbp_endpoint_extract_key_ip_itf (const clib_bihash_kv_24_8_t * key,
- ip46_address_t * ip, u32 * sw_if_index)
-{
- ip->as_u64[0] = key->key[0];
- ip->as_u64[1] = key->key[1];
- *sw_if_index = key->key[2];
-}
-
-gbp_endpoint_t *
-gbp_endpoint_find_ip (const ip46_address_t * ip, u32 fib_index)
-{
- clib_bihash_kv_24_8_t key, value;
- int rv;
-
- gbp_endpoint_mk_key_ip (ip, fib_index, &key);
-
- rv = clib_bihash_search_24_8 (&gbp_ep_db.ged_by_ip_rd, &key, &value);
-
- if (0 != rv)
- return NULL;
-
- return (gbp_endpoint_get (value.value));
-}
-
-static void
-gbp_endpoint_add_itf (u32 sw_if_index, index_t gei)
-{
- vec_validate_init_empty (gbp_ep_db.ged_by_sw_if_index, sw_if_index, ~0);
-
- gbp_ep_db.ged_by_sw_if_index[sw_if_index] = gei;
-}
-
-static bool
-gbp_endpoint_add_mac (const mac_address_t * mac, u32 bd_index, index_t gei)
-{
- clib_bihash_kv_16_8_t key;
- int rv;
-
- gbp_endpoint_mk_key_mac (mac->bytes, bd_index, &key);
- key.value = gei;
-
- rv = clib_bihash_add_del_16_8 (&gbp_ep_db.ged_by_mac_bd, &key, 1);
-
-
- return (0 == rv);
-}
-
-static bool
-gbp_endpoint_add_ip (const ip46_address_t * ip, u32 fib_index, index_t gei)
-{
- clib_bihash_kv_24_8_t key;
- int rv;
-
- gbp_endpoint_mk_key_ip (ip, fib_index, &key);
- key.value = gei;
-
- rv = clib_bihash_add_del_24_8 (&gbp_ep_db.ged_by_ip_rd, &key, 1);
-
- return (0 == rv);
-}
-
-static void
-gbp_endpoint_del_mac (const mac_address_t * mac, u32 bd_index)
-{
- clib_bihash_kv_16_8_t key;
-
- gbp_endpoint_mk_key_mac (mac->bytes, bd_index, &key);
-
- clib_bihash_add_del_16_8 (&gbp_ep_db.ged_by_mac_bd, &key, 0);
-}
-
-static void
-gbp_endpoint_del_ip (const ip46_address_t * ip, u32 fib_index)
-{
- clib_bihash_kv_24_8_t key;
-
- gbp_endpoint_mk_key_ip (ip, fib_index, &key);
-
- clib_bihash_add_del_24_8 (&gbp_ep_db.ged_by_ip_rd, &key, 0);
-}
-
-static index_t
-gbp_endpoint_index (const gbp_endpoint_t * ge)
-{
- return (ge - gbp_endpoint_pool);
-}
-
-static int
-gbp_endpoint_ip_is_equal (const fib_prefix_t * fp, const ip46_address_t * ip)
-{
- return (ip46_address_is_equal (ip, &fp->fp_addr));
-}
-
-static void
-gbp_endpoint_ips_update (gbp_endpoint_t * ge,
- const ip46_address_t * ips,
- const gbp_route_domain_t * grd)
-{
- const ip46_address_t *ip;
- index_t gei, grdi;
-
- gei = gbp_endpoint_index (ge);
- grdi = gbp_route_domain_index (grd);
-
- ASSERT ((ge->ge_key.gek_grd == INDEX_INVALID) ||
- (ge->ge_key.gek_grd == grdi));
-
- vec_foreach (ip, ips)
- {
- if (~0 == vec_search_with_function (ge->ge_key.gek_ips, ip,
- gbp_endpoint_ip_is_equal))
- {
- fib_prefix_t *pfx;
-
- vec_add2 (ge->ge_key.gek_ips, pfx, 1);
- fib_prefix_from_ip46_addr (ip, pfx);
-
- gbp_endpoint_add_ip (&pfx->fp_addr,
- grd->grd_fib_index[pfx->fp_proto], gei);
- }
- ge->ge_key.gek_grd = grdi;
- }
-}
-
-static gbp_endpoint_t *
-gbp_endpoint_alloc (const ip46_address_t * ips,
- const gbp_route_domain_t * grd,
- const mac_address_t * mac,
- const gbp_bridge_domain_t * gbd)
-{
- gbp_endpoint_t *ge;
- index_t gei;
-
- pool_get_zero (gbp_endpoint_pool, ge);
-
- fib_node_init (&ge->ge_node, gbp_endpoint_fib_type);
- gei = gbp_endpoint_index (ge);
- ge->ge_key.gek_gbd =
- ge->ge_key.gek_grd = ge->ge_fwd.gef_fib_index = INDEX_INVALID;
- gbp_itf_hdl_reset (&ge->ge_fwd.gef_itf);
- ge->ge_last_time = vlib_time_now (vlib_get_main ());
- ge->ge_key.gek_gbd = gbp_bridge_domain_index (gbd);
-
- if (NULL != mac)
- {
- mac_address_copy (&ge->ge_key.gek_mac, mac);
- gbp_endpoint_add_mac (mac, gbd->gb_bd_index, gei);
- }
- gbp_endpoint_ips_update (ge, ips, grd);
-
- return (ge);
-}
-
-static int
-gbp_endpoint_loc_is_equal (gbp_endpoint_loc_t * a, gbp_endpoint_loc_t * b)
-{
- return (a->gel_src == b->gel_src);
-}
-
-static int
-gbp_endpoint_loc_cmp_for_sort (gbp_endpoint_loc_t * a, gbp_endpoint_loc_t * b)
-{
- return (a->gel_src - b->gel_src);
-}
-
-static gbp_endpoint_loc_t *
-gbp_endpoint_loc_find (gbp_endpoint_t * ge, gbp_endpoint_src_t src)
-{
- gbp_endpoint_loc_t gel = {
- .gel_src = src,
- };
- u32 pos;
-
- pos = vec_search_with_function (ge->ge_locs, &gel,
- gbp_endpoint_loc_is_equal);
-
- if (~0 != pos)
- return (&ge->ge_locs[pos]);
-
- return NULL;
-}
-
-static int
-gbp_endpoint_loc_unlock (gbp_endpoint_t * ge, gbp_endpoint_loc_t * gel)
-{
- u32 pos;
-
- gel->gel_locks--;
-
- if (0 == gel->gel_locks)
- {
- pos = gel - ge->ge_locs;
-
- vec_del1 (ge->ge_locs, pos);
- if (vec_len (ge->ge_locs) > 1)
- vec_sort_with_function (ge->ge_locs, gbp_endpoint_loc_cmp_for_sort);
-
- /* This could be the last lock, so don't access the EP from
- * this point on */
- fib_node_unlock (&ge->ge_node);
-
- return (1);
- }
- return (0);
-}
-
-static void
-gbp_endpoint_loc_destroy (gbp_endpoint_loc_t * gel)
-{
- gbp_endpoint_group_unlock (gel->gel_epg);
- gbp_itf_unlock (&gel->gel_itf);
-}
-
-static gbp_endpoint_loc_t *
-gbp_endpoint_loc_find_or_add (gbp_endpoint_t * ge, gbp_endpoint_src_t src)
-{
- gbp_endpoint_loc_t gel = {
- .gel_src = src,
- .gel_epg = INDEX_INVALID,
- .gel_itf = GBP_ITF_HDL_INVALID,
- .gel_locks = 0,
- };
- u32 pos;
-
- pos = vec_search_with_function (ge->ge_locs, &gel,
- gbp_endpoint_loc_is_equal);
-
- if (~0 == pos)
- {
- vec_add1 (ge->ge_locs, gel);
-
- if (vec_len (ge->ge_locs) > 1)
- {
- vec_sort_with_function (ge->ge_locs, gbp_endpoint_loc_cmp_for_sort);
-
- pos = vec_search_with_function (ge->ge_locs, &gel,
- gbp_endpoint_loc_is_equal);
- }
- else
- pos = 0;
-
- /*
- * it's the sources and children that lock the endpoints
- */
- fib_node_lock (&ge->ge_node);
- }
-
- return (&ge->ge_locs[pos]);
-}
-
-/**
- * Find an EP inthe DBs and check that if we find it in the L2 DB
- * it has the same IPs as this update
- */
-static int
-gbp_endpoint_find_for_update (const ip46_address_t * ips,
- const gbp_route_domain_t * grd,
- const mac_address_t * mac,
- const gbp_bridge_domain_t * gbd,
- gbp_endpoint_t ** ge)
-{
- gbp_endpoint_t *l2_ge, *l3_ge, *tmp;
-
- l2_ge = l3_ge = NULL;
-
- if (NULL != mac && !mac_address_is_zero (mac))
- {
- ASSERT (gbd);
- l2_ge = gbp_endpoint_find_mac (mac->bytes, gbd->gb_bd_index);
- }
- if (NULL != ips && !ip46_address_is_zero (ips))
- {
- const ip46_address_t *ip;
- fib_protocol_t fproto;
-
- ASSERT (grd);
- vec_foreach (ip, ips)
- {
- fproto = fib_proto_from_ip46 (ip46_address_get_type (ip));
-
- tmp = gbp_endpoint_find_ip (ip, grd->grd_fib_index[fproto]);
-
- if (NULL == tmp)
- /* not found */
- continue;
- else if (NULL == l3_ge)
- /* first match against an IP address */
- l3_ge = tmp;
- else if (tmp == l3_ge)
- /* another match against IP address that is the same endpoint */
- continue;
- else
- {
- /*
- * a match agains a different endpoint.
- * this means the KEY of the EP is changing which is not allowed
- */
- return (-1);
- }
- }
- }
-
- if (NULL == l2_ge && NULL == l3_ge)
- /* not found */
- *ge = NULL;
- else if (NULL == l2_ge)
- /* found at L3 */
- *ge = l3_ge;
- else if (NULL == l3_ge)
- /* found at L2 */
- *ge = l2_ge;
- else
- {
- /* found both L3 and L2 - they must be the same else the KEY
- * is changing
- */
- if (l2_ge == l3_ge)
- *ge = l2_ge;
- else
- return (-1);
- }
-
- return (0);
-}
-
-static gbp_endpoint_src_t
-gbp_endpoint_get_best_src (const gbp_endpoint_t * ge)
-{
- if (0 == vec_len (ge->ge_locs))
- return (GBP_ENDPOINT_SRC_MAX);
-
- return (ge->ge_locs[0].gel_src);
-}
-
-static void
-gbp_endpoint_n_learned (int n)
-{
- gbp_n_learnt_endpoints += n;
-
- if (n > 0 && 1 == gbp_n_learnt_endpoints)
- {
- vlib_process_signal_event (vlib_get_main (),
- gbp_scanner_node.index,
- GBP_ENDPOINT_SCAN_START, 0);
- }
- if (n < 0 && 0 == gbp_n_learnt_endpoints)
- {
- vlib_process_signal_event (vlib_get_main (),
- gbp_scanner_node.index,
- GBP_ENDPOINT_SCAN_STOP, 0);
- }
-}
-
-static void
-gbp_endpoint_loc_update (const gbp_endpoint_t * ge,
- gbp_endpoint_loc_t * gel,
- const gbp_bridge_domain_t * gb,
- u32 sw_if_index,
- index_t ggi,
- gbp_endpoint_flags_t flags,
- const ip46_address_t * tun_src,
- const ip46_address_t * tun_dst)
-{
- int was_learnt, is_learnt;
-
- gel->gel_locks++;
- was_learnt = ! !(gel->gel_flags & GBP_ENDPOINT_FLAG_REMOTE);
- gel->gel_flags = flags;
- is_learnt = ! !(gel->gel_flags & GBP_ENDPOINT_FLAG_REMOTE);
-
- gbp_endpoint_n_learned (is_learnt - was_learnt);
-
- /*
- * update the EPG
- */
- gbp_endpoint_group_lock (ggi);
- gbp_endpoint_group_unlock (gel->gel_epg);
- gel->gel_epg = ggi;
-
- if (gel->gel_flags & GBP_ENDPOINT_FLAG_REMOTE)
- {
- if (NULL != tun_src)
- ip46_address_copy (&gel->tun.gel_src, tun_src);
- if (NULL != tun_dst)
- ip46_address_copy (&gel->tun.gel_dst, tun_dst);
-
- if (ip46_address_is_multicast (&gel->tun.gel_src))
- {
- /*
- * we learnt the EP from the multicast tunnel.
- * Create a unicast TEP from the packet's source
- * and the fixed address of the BD's parent tunnel
- */
- const gbp_vxlan_tunnel_t *gt;
-
- gt = gbp_vxlan_tunnel_get (gb->gb_vni);
-
- if (NULL != gt)
- {
- ip46_address_copy (&gel->tun.gel_src, &gt->gt_src);
- sw_if_index = gt->gt_sw_if_index;
- }
- }
-
- /*
- * the input interface may be the parent GBP-vxlan interface,
- * create a child vlxan-gbp tunnel and use that as the endpoint's
- * interface.
- */
- gbp_itf_hdl_t old = gel->gel_itf;
-
- switch (gbp_vxlan_tunnel_get_type (sw_if_index))
- {
- case GBP_VXLAN_TEMPLATE_TUNNEL:
- gel->tun.gel_parent_sw_if_index = sw_if_index;
- gel->gel_itf = gbp_vxlan_tunnel_clone_and_lock (sw_if_index,
- &gel->tun.gel_src,
- &gel->tun.gel_dst);
- break;
- case VXLAN_GBP_TUNNEL:
- gel->tun.gel_parent_sw_if_index =
- vxlan_gbp_tunnel_get_parent (sw_if_index);
- gel->gel_itf = vxlan_gbp_tunnel_lock_itf (sw_if_index);
- break;
- }
-
- gbp_itf_unlock (&old);
- }
- else
- {
- gel->gel_itf = gbp_itf_l2_add_and_lock (sw_if_index,
- ge->ge_key.gek_gbd);
- }
-}
-
-static void
-gbb_endpoint_fwd_reset (gbp_endpoint_t * ge)
-{
- const gbp_route_domain_t *grd;
- const gbp_bridge_domain_t *gbd;
- gbp_endpoint_fwd_t *gef;
- const fib_prefix_t *pfx;
- index_t *ai;
-
- gbd = gbp_bridge_domain_get (ge->ge_key.gek_gbd);
- gef = &ge->ge_fwd;
-
- vec_foreach (pfx, ge->ge_key.gek_ips)
- {
- u32 fib_index;
-
- grd = gbp_route_domain_get (ge->ge_key.gek_grd);
- fib_index = grd->grd_fib_index[pfx->fp_proto];
-
- bd_add_del_ip_mac (gbd->gb_bd_index, fib_proto_to_ip46 (pfx->fp_proto),
- &pfx->fp_addr, &ge->ge_key.gek_mac, 0);
-
- /*
- * remove a host route
- */
- if (gbp_endpoint_is_remote (ge))
- {
- fib_table_entry_special_remove (fib_index, pfx, gbp_fib_source_hi);
- }
-
- fib_table_entry_delete (fib_index, pfx, gbp_fib_source_low);
- }
- vec_foreach (ai, gef->gef_adjs)
- {
- adj_unlock (*ai);
- }
-
- if (gbp_itf_hdl_is_valid (gef->gef_itf))
- {
- l2fib_del_entry (ge->ge_key.gek_mac.bytes,
- gbd->gb_bd_index,
- gbp_itf_get_sw_if_index (gef->gef_itf));
- }
-
- gbp_itf_unlock (&gef->gef_itf);
- vec_free (gef->gef_adjs);
-}
-
-static void
-gbb_endpoint_fwd_recalc (gbp_endpoint_t * ge)
-{
- const gbp_bridge_domain_t *gbd;
- const gbp_endpoint_group_t *gg;
- const gbp_route_domain_t *grd;
- gbp_endpoint_loc_t *gel;
- gbp_endpoint_fwd_t *gef;
- const fib_prefix_t *pfx;
- index_t gei;
-
- /*
- * locations are sort in source priority order
- */
- gei = gbp_endpoint_index (ge);
- gel = &ge->ge_locs[0];
- gef = &ge->ge_fwd;
- gbd = gbp_bridge_domain_get (ge->ge_key.gek_gbd);
-
- gef->gef_flags = gel->gel_flags;
-
- if (INDEX_INVALID != gel->gel_epg)
- {
- gg = gbp_endpoint_group_get (gel->gel_epg);
- gef->gef_sclass = gg->gg_sclass;
- }
- else
- {
- gg = NULL;
- }
-
- gef->gef_itf = gbp_itf_clone_and_lock (gel->gel_itf);
-
- if (!mac_address_is_zero (&ge->ge_key.gek_mac))
- {
- gbp_itf_l2_set_input_feature (gef->gef_itf, L2INPUT_FEAT_GBP_FWD);
-
- if (gbp_endpoint_is_remote (ge) || gbp_endpoint_is_external (ge))
- {
- /*
- * bridged packets to external endpoints should be classifed
- * based on the EP's/BD's EPG
- */
- gbp_itf_l2_set_output_feature (gef->gef_itf,
- L2OUTPUT_FEAT_GBP_POLICY_MAC);
- }
- else
- {
- gbp_endpoint_add_itf (gbp_itf_get_sw_if_index (gef->gef_itf), gei);
- gbp_itf_l2_set_output_feature (gef->gef_itf,
- L2OUTPUT_FEAT_GBP_POLICY_PORT);
- }
- l2fib_add_entry (ge->ge_key.gek_mac.bytes,
- gbd->gb_bd_index,
- gbp_itf_get_sw_if_index (gef->gef_itf),
- L2FIB_ENTRY_RESULT_FLAG_STATIC);
- }
-
- vec_foreach (pfx, ge->ge_key.gek_ips)
- {
- ethernet_header_t *eth;
- u32 ip_sw_if_index;
- u32 fib_index;
- u8 *rewrite;
- index_t ai;
-
- rewrite = NULL;
- grd = gbp_route_domain_get (ge->ge_key.gek_grd);
- fib_index = grd->grd_fib_index[pfx->fp_proto];
- gef->gef_fib_index = fib_index;
-
- bd_add_del_ip_mac (gbd->gb_bd_index, fib_proto_to_ip46 (pfx->fp_proto),
- &pfx->fp_addr, &ge->ge_key.gek_mac, 1);
-
- /*
- * add a host route via the EPG's BVI we need this because the
- * adj fib does not install, due to cover refinement check, since
- * the BVI's prefix is /32
- */
- vec_validate (rewrite, sizeof (*eth) - 1);
- eth = (ethernet_header_t *) rewrite;
-
- eth->type = clib_host_to_net_u16 ((pfx->fp_proto == FIB_PROTOCOL_IP4 ?
- ETHERNET_TYPE_IP4 :
- ETHERNET_TYPE_IP6));
-
- if (gbp_endpoint_is_remote (ge))
- {
- /*
- * for dynamic EPs we must add the IP adjacency via the learned
- * tunnel since the BD will not contain the EP's MAC since it was
- * L3 learned. The dst MAC address used is the 'BD's MAC'.
- */
- ip_sw_if_index = gbp_itf_get_sw_if_index (gef->gef_itf);
-
- mac_address_to_bytes (gbp_route_domain_get_local_mac (),
- eth->src_address);
- mac_address_to_bytes (gbp_route_domain_get_remote_mac (),
- eth->dst_address);
- }
- else
- {
- /*
- * for the static EPs we add the IP adjacency via the BVI
- * knowing that the BD has the MAC address to route to and
- * that policy will be applied on egress to the EP's port
- */
- ip_sw_if_index = gbd->gb_bvi_sw_if_index;
-
- clib_memcpy (eth->src_address,
- vnet_sw_interface_get_hw_address (vnet_get_main (),
- ip_sw_if_index),
- sizeof (eth->src_address));
- mac_address_to_bytes (&ge->ge_key.gek_mac, eth->dst_address);
- }
-
- fib_table_entry_path_add (fib_index, pfx,
- gbp_fib_source_low,
- FIB_ENTRY_FLAG_NONE,
- fib_proto_to_dpo (pfx->fp_proto),
- &pfx->fp_addr, ip_sw_if_index,
- ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
-
- ai = adj_nbr_add_or_lock_w_rewrite (pfx->fp_proto,
- fib_proto_to_link (pfx->fp_proto),
- &pfx->fp_addr,
- ip_sw_if_index, rewrite);
- vec_add1 (gef->gef_adjs, ai);
-
- /*
- * if the endpoint is external then routed packet to it must be
- * classifed to the BD's EPG. but this will happen anyway with
- * the GBP_MAC classification.
- */
-
- if (NULL != gg)
- {
- if (gbp_endpoint_is_remote (ge))
- {
- dpo_id_t policy_dpo = DPO_INVALID;
-
- /*
- * interpose a policy DPO from the endpoint so that policy
- * is applied
- */
- gbp_policy_dpo_add_or_lock (fib_proto_to_dpo (pfx->fp_proto),
- grd->grd_scope,
- gg->gg_sclass, ~0, &policy_dpo);
-
- fib_table_entry_special_dpo_add (fib_index, pfx,
- gbp_fib_source_hi,
- FIB_ENTRY_FLAG_INTERPOSE,
- &policy_dpo);
- dpo_reset (&policy_dpo);
- }
-
- /*
- * send a gratuitous ARP on the EPG's uplink. this is done so
- * that if this EP has moved from some other place in the
- * 'fabric', upstream devices are informed
- */
- if (gbp_endpoint_is_local (ge) && ~0 != gg->gg_uplink_sw_if_index)
- {
- gbp_endpoint_add_itf (gbp_itf_get_sw_if_index (gef->gef_itf),
- gei);
- if (FIB_PROTOCOL_IP4 == pfx->fp_proto)
- ip4_neighbor_advertise (vlib_get_main (),
- vnet_get_main (),
- gg->gg_uplink_sw_if_index,
- &pfx->fp_addr.ip4);
- else
- ip6_neighbor_advertise (vlib_get_main (),
- vnet_get_main (),
- gg->gg_uplink_sw_if_index,
- &pfx->fp_addr.ip6);
- }
- }
- }
-
- if (gbp_endpoint_is_external (ge))
- {
- gbp_itf_l2_set_input_feature (gef->gef_itf,
- L2INPUT_FEAT_GBP_LPM_CLASSIFY);
- }
- else if (gbp_endpoint_is_local (ge))
- {
- /*
- * non-remote endpoints (i.e. those not arriving on iVXLAN
- * tunnels) need to be classifed based on the the input interface.
- * We enable the GBP-FWD feature only if the group has an uplink
- * interface (on which the GBP-FWD feature would send UU traffic).
- * External endpoints get classified based on an LPM match
- */
- l2input_feat_masks_t feats = L2INPUT_FEAT_GBP_SRC_CLASSIFY;
-
- if (NULL != gg && ~0 != gg->gg_uplink_sw_if_index)
- feats |= L2INPUT_FEAT_GBP_FWD;
- gbp_itf_l2_set_input_feature (gef->gef_itf, feats);
- }
-
- /*
- * update children with the new forwarding info
- */
- fib_node_back_walk_ctx_t bw_ctx = {
- .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE,
- .fnbw_flags = FIB_NODE_BW_FLAG_FORCE_SYNC,
- };
-
- fib_walk_sync (gbp_endpoint_fib_type, gei, &bw_ctx);
-}
-
-int
-gbp_endpoint_update_and_lock (gbp_endpoint_src_t src,
- u32 sw_if_index,
- const ip46_address_t * ips,
- const mac_address_t * mac,
- index_t gbdi, index_t grdi,
- sclass_t sclass,
- gbp_endpoint_flags_t flags,
- const ip46_address_t * tun_src,
- const ip46_address_t * tun_dst, u32 * handle)
-{
- gbp_bridge_domain_t *gbd;
- gbp_endpoint_group_t *gg;
- gbp_endpoint_src_t best;
- gbp_route_domain_t *grd;
- gbp_endpoint_loc_t *gel;
- gbp_endpoint_t *ge;
- index_t ggi, gei;
- int rv;
-
- if (~0 == sw_if_index)
- return (VNET_API_ERROR_INVALID_SW_IF_INDEX);
-
- ge = NULL;
- gg = NULL;
-
- /*
- * we need to determine the bridge-domain, either from the EPG or
- * the BD passed
- */
- if (SCLASS_INVALID != sclass)
- {
- ggi = gbp_endpoint_group_find (sclass);
-
- if (INDEX_INVALID == ggi)
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-
- gg = gbp_endpoint_group_get (ggi);
- gbdi = gg->gg_gbd;
- grdi = gg->gg_rd;
- }
- else
- {
- if (INDEX_INVALID == gbdi)
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
- if (INDEX_INVALID == grdi)
- return (VNET_API_ERROR_NO_SUCH_FIB);
- ggi = INDEX_INVALID;
- }
-
- gbd = gbp_bridge_domain_get (gbdi);
- grd = gbp_route_domain_get (grdi);
- rv = gbp_endpoint_find_for_update (ips, grd, mac, gbd, &ge);
-
- if (0 != rv)
- return (rv);
-
- if (NULL == ge)
- {
- ge = gbp_endpoint_alloc (ips, grd, mac, gbd);
- }
- else
- {
- gbp_endpoint_ips_update (ge, ips, grd);
- }
-
- best = gbp_endpoint_get_best_src (ge);
- gei = gbp_endpoint_index (ge);
- gel = gbp_endpoint_loc_find_or_add (ge, src);
-
- gbp_endpoint_loc_update (ge, gel, gbd, sw_if_index, ggi, flags,
- tun_src, tun_dst);
-
- if (src <= best)
- {
- /*
- * either the best source has been updated or we have a new best source
- */
- gbb_endpoint_fwd_reset (ge);
- gbb_endpoint_fwd_recalc (ge);
- }
- else
- {
- /*
- * an update to a lower priority source, so we need do nothing
- */
- }
-
- if (handle)
- *handle = gei;
-
- GBP_ENDPOINT_INFO ("update: %U", format_gbp_endpoint, gei);
-
- return (0);
-}
-
-void
-gbp_endpoint_unlock (gbp_endpoint_src_t src, index_t gei)
-{
- gbp_endpoint_loc_t *gel, gel_copy;
- gbp_endpoint_src_t best;
- gbp_endpoint_t *ge;
- int removed;
-
- if (pool_is_free_index (gbp_endpoint_pool, gei))
- return;
-
- GBP_ENDPOINT_INFO ("delete: %U", format_gbp_endpoint, gei);
-
- ge = gbp_endpoint_get (gei);
-
- gel = gbp_endpoint_loc_find (ge, src);
-
- if (NULL == gel)
- return;
-
- /*
- * lock the EP so we can control when it is deleted
- */
- fib_node_lock (&ge->ge_node);
- best = gbp_endpoint_get_best_src (ge);
-
- /*
- * copy the location info since we'll lose it when it's removed from
- * the vector
- */
- clib_memcpy (&gel_copy, gel, sizeof (gel_copy));
-
- /*
- * remove the source we no longer need
- */
- removed = gbp_endpoint_loc_unlock (ge, gel);
-
- if (src == best)
- {
- /*
- * we have removed the old best source => recalculate fwding
- */
- if (0 == vec_len (ge->ge_locs))
- {
- /*
- * if there are no more sources left, then we need only release
- * the fwding resources held and then this EP is gawn.
- */
- gbb_endpoint_fwd_reset (ge);
- }
- else
- {
- /*
- * else there are more sources. release the old and get new
- * fwding objects
- */
- gbb_endpoint_fwd_reset (ge);
- gbb_endpoint_fwd_recalc (ge);
- }
- }
- /*
- * else
- * we removed a lower priority source so we need to do nothing
- */
-
- /*
- * clear up any resources held by the source
- */
- if (removed)
- gbp_endpoint_loc_destroy (&gel_copy);
-
- /*
- * remove the lock taken above
- */
- fib_node_unlock (&ge->ge_node);
- /*
- * We may have removed the last source and so this EP is now TOAST
- * DO NOTHING BELOW HERE
- */
-}
-
-u32
-gbp_endpoint_child_add (index_t gei,
- fib_node_type_t type, fib_node_index_t index)
-{
- return (fib_node_child_add (gbp_endpoint_fib_type, gei, type, index));
-}
-
-void
-gbp_endpoint_child_remove (index_t gei, u32 sibling)
-{
- return (fib_node_child_remove (gbp_endpoint_fib_type, gei, sibling));
-}
-
-typedef struct gbp_endpoint_flush_ctx_t_
-{
- u32 sw_if_index;
- gbp_endpoint_src_t src;
- index_t *geis;
-} gbp_endpoint_flush_ctx_t;
-
-static walk_rc_t
-gbp_endpoint_flush_cb (index_t gei, void *args)
-{
- gbp_endpoint_flush_ctx_t *ctx = args;
- gbp_endpoint_loc_t *gel;
- gbp_endpoint_t *ge;
-
- ge = gbp_endpoint_get (gei);
- gel = gbp_endpoint_loc_find (ge, ctx->src);
-
- if ((NULL != gel) && ctx->sw_if_index == gel->tun.gel_parent_sw_if_index)
- {
- vec_add1 (ctx->geis, gei);
- }
-
- return (WALK_CONTINUE);
-}
-
-/**
- * remove all learnt endpoints using the interface
- */
-void
-gbp_endpoint_flush (gbp_endpoint_src_t src, u32 sw_if_index)
-{
- gbp_endpoint_flush_ctx_t ctx = {
- .sw_if_index = sw_if_index,
- .src = src,
- };
- index_t *gei;
-
- GBP_ENDPOINT_INFO ("flush: %U %U",
- format_gbp_endpoint_src, src,
- format_vnet_sw_if_index_name, vnet_get_main (),
- sw_if_index);
- gbp_endpoint_walk (gbp_endpoint_flush_cb, &ctx);
-
- vec_foreach (gei, ctx.geis)
- {
- gbp_endpoint_unlock (src, *gei);
- }
-
- vec_free (ctx.geis);
-}
-
-void
-gbp_endpoint_walk (gbp_endpoint_cb_t cb, void *ctx)
-{
- u32 index;
-
- /* *INDENT-OFF* */
- pool_foreach_index (index, gbp_endpoint_pool)
- {
- if (!cb(index, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static clib_error_t *
-gbp_endpoint_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- ip46_address_t ip = ip46_address_initializer, *ips = NULL;
- mac_address_t mac = ZERO_MAC_ADDRESS;
- vnet_main_t *vnm = vnet_get_main ();
- u32 sclass = SCLASS_INVALID;
- u32 handle = INDEX_INVALID;
- u32 sw_if_index = ~0;
- u32 flags = GBP_ENDPOINT_FLAG_NONE;
- u8 add = 1;
- int rv;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- ip46_address_reset (&ip);
-
- if (unformat (input, "%U", unformat_vnet_sw_interface,
- vnm, &sw_if_index))
- ;
- else if (unformat (input, "add"))
- add = 1;
- else if (unformat (input, "del"))
- add = 0;
- else if (unformat (input, "sclass %d", &sclass))
- ;
- else if (unformat (input, "handle %d", &handle))
- ;
- else if (unformat (input, "ip %U", unformat_ip4_address, &ip.ip4))
- vec_add1 (ips, ip);
- else if (unformat (input, "ip %U", unformat_ip6_address, &ip.ip6))
- vec_add1 (ips, ip);
- else if (unformat (input, "mac %U", unformat_mac_address, &mac))
- ;
- else if (unformat (input, "flags 0x%x", &flags))
- ;
- else
- break;
- }
-
- if (add)
- {
- if (~0 == sw_if_index)
- return clib_error_return (0, "interface must be specified");
- if (SCLASS_INVALID == sclass)
- return clib_error_return (0, "SCLASS must be specified");
-
- rv =
- gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_CP,
- sw_if_index, ips, &mac,
- INDEX_INVALID, INDEX_INVALID,
- sclass, flags, NULL, NULL, &handle);
-
- if (rv)
- return clib_error_return (0, "GBP Endpoint update returned %d", rv);
- else
- vlib_cli_output (vm, "handle %d\n", handle);
- }
- else
- {
- if (INDEX_INVALID == handle)
- return clib_error_return (0, "handle must be specified");
-
- gbp_endpoint_unlock (GBP_ENDPOINT_SRC_CP, handle);
- }
-
- vec_free (ips);
-
- return (NULL);
-}
-
-/*?
- * Configure a GBP Endpoint
- *
- * @cliexpar
- * @cliexstart{gbp endpoint del <handle> | [add] <interface> sclass <SCLASS> ip <IP> mac <MAC> [flags <flags>]}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_endpoint_cli_node, static) = {
- .path = "gbp endpoint",
- .short_help = "gbp endpoint del <handle> | [add] <interface> sclass <SCLASS> ip <IP> mac <MAC> [flags <flags>]",
- .function = gbp_endpoint_cli,
-};
-/* *INDENT-ON* */
-
-u8 *
-format_gbp_endpoint_src (u8 * s, va_list * args)
-{
- gbp_endpoint_src_t action = va_arg (*args, gbp_endpoint_src_t);
-
- switch (action)
- {
-#define _(v,a) case GBP_ENDPOINT_SRC_##v: return (format (s, "%s", a));
- foreach_gbp_endpoint_src
-#undef _
- }
-
- return (format (s, "unknown"));
-}
-
-static u8 *
-format_gbp_endpoint_fwd (u8 * s, va_list * args)
-{
- gbp_endpoint_fwd_t *gef = va_arg (*args, gbp_endpoint_fwd_t *);
-
- s = format (s, "fwd:");
- s = format (s, "\n itf:[%U]", format_gbp_itf_hdl, gef->gef_itf);
- if (GBP_ENDPOINT_FLAG_NONE != gef->gef_flags)
- {
- s = format (s, " flags:%U", format_gbp_endpoint_flags, gef->gef_flags);
- }
-
- return (s);
-}
-
-static u8 *
-format_gbp_endpoint_key (u8 * s, va_list * args)
-{
- gbp_endpoint_key_t *gek = va_arg (*args, gbp_endpoint_key_t *);
- const fib_prefix_t *pfx;
-
- s = format (s, "ips:[");
-
- vec_foreach (pfx, gek->gek_ips)
- {
- s = format (s, "%U, ", format_fib_prefix, pfx);
- }
- s = format (s, "]");
-
- s = format (s, " mac:%U", format_mac_address_t, &gek->gek_mac);
-
- return (s);
-}
-
-static u8 *
-format_gbp_endpoint_loc (u8 * s, va_list * args)
-{
- gbp_endpoint_loc_t *gel = va_arg (*args, gbp_endpoint_loc_t *);
-
- s = format (s, "%U", format_gbp_endpoint_src, gel->gel_src);
- s = format (s, "\n EPG:%d [%U]", gel->gel_epg,
- format_gbp_itf_hdl, gel->gel_itf);
-
- if (GBP_ENDPOINT_FLAG_NONE != gel->gel_flags)
- {
- s = format (s, " flags:%U", format_gbp_endpoint_flags, gel->gel_flags);
- }
- if (GBP_ENDPOINT_FLAG_REMOTE & gel->gel_flags)
- {
- s = format (s, " tun:[");
- s = format (s, "parent:%U", format_vnet_sw_if_index_name,
- vnet_get_main (), gel->tun.gel_parent_sw_if_index);
- s = format (s, " {%U,%U}]",
- format_ip46_address, &gel->tun.gel_src, IP46_TYPE_ANY,
- format_ip46_address, &gel->tun.gel_dst, IP46_TYPE_ANY);
- }
-
- return (s);
-}
-
-u8 *
-format_gbp_endpoint (u8 * s, va_list * args)
-{
- index_t gei = va_arg (*args, index_t);
- gbp_endpoint_loc_t *gel;
- gbp_endpoint_t *ge;
-
- ge = gbp_endpoint_get (gei);
-
- s = format (s, "[@%d] %U", gei, format_gbp_endpoint_key, &ge->ge_key);
- s = format (s, " last-time:[%f]", ge->ge_last_time);
-
- vec_foreach (gel, ge->ge_locs)
- {
- s = format (s, "\n %U", format_gbp_endpoint_loc, gel);
- }
- s = format (s, "\n %U", format_gbp_endpoint_fwd, &ge->ge_fwd);
-
- return s;
-}
-
-static walk_rc_t
-gbp_endpoint_show_one (index_t gei, void *ctx)
-{
- vlib_main_t *vm;
-
- vm = ctx;
- vlib_cli_output (vm, " %U", format_gbp_endpoint, gei);
-
- return (WALK_CONTINUE);
-}
-
-static int
-gbp_endpoint_walk_ip_itf (clib_bihash_kv_24_8_t * kvp, void *arg)
-{
- ip46_address_t ip;
- vlib_main_t *vm;
- u32 sw_if_index;
-
- vm = arg;
-
- gbp_endpoint_extract_key_ip_itf (kvp, &ip, &sw_if_index);
-
- vlib_cli_output (vm, " {%U, %U} -> %d",
- format_ip46_address, &ip, IP46_TYPE_ANY,
- format_vnet_sw_if_index_name, vnet_get_main (),
- sw_if_index, kvp->value);
- return (BIHASH_WALK_CONTINUE);
-}
-
-static int
-gbp_endpoint_walk_mac_itf (clib_bihash_kv_16_8_t * kvp, void *arg)
-{
- mac_address_t mac;
- vlib_main_t *vm;
- u32 sw_if_index;
-
- vm = arg;
-
- gbp_endpoint_extract_key_mac_itf (kvp, &mac, &sw_if_index);
-
- vlib_cli_output (vm, " {%U, %U} -> %d",
- format_mac_address_t, &mac,
- format_vnet_sw_if_index_name, vnet_get_main (),
- sw_if_index, kvp->value);
- return (BIHASH_WALK_CONTINUE);
-}
-
-static clib_error_t *
-gbp_endpoint_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- u32 show_dbs, handle;
-
- handle = INDEX_INVALID;
- show_dbs = 0;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "%d", &handle))
- ;
- else if (unformat (input, "db"))
- show_dbs = 1;
- else
- break;
- }
-
- if (INDEX_INVALID != handle)
- {
- vlib_cli_output (vm, "%U", format_gbp_endpoint, handle);
- }
- else if (show_dbs)
- {
- vlib_cli_output (vm, "\nDatabases:");
- clib_bihash_foreach_key_value_pair_24_8 (&gbp_ep_db.ged_by_ip_rd,
- gbp_endpoint_walk_ip_itf, vm);
- clib_bihash_foreach_key_value_pair_16_8
- (&gbp_ep_db.ged_by_mac_bd, gbp_endpoint_walk_mac_itf, vm);
- }
- else
- {
- vlib_cli_output (vm, "Endpoints:");
- gbp_endpoint_walk (gbp_endpoint_show_one, vm);
- }
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Policy Endpoints and derived information
- *
- * @cliexpar
- * @cliexstart{show gbp endpoint}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_endpoint_show_node, static) = {
- .path = "show gbp endpoint",
- .short_help = "show gbp endpoint\n",
- .function = gbp_endpoint_show,
-};
-/* *INDENT-ON* */
-
-static void
-gbp_endpoint_check (index_t gei, f64 start_time)
-{
- gbp_endpoint_group_t *gg;
- gbp_endpoint_loc_t *gel;
- gbp_endpoint_t *ge;
-
- ge = gbp_endpoint_get (gei);
- gel = gbp_endpoint_loc_find (ge, GBP_ENDPOINT_SRC_DP);
-
- if (NULL != gel)
- {
- gg = gbp_endpoint_group_get (gel->gel_epg);
-
- if ((start_time - ge->ge_last_time) >
- gg->gg_retention.remote_ep_timeout)
- {
- gbp_endpoint_unlock (GBP_ENDPOINT_SRC_DP, gei);
- }
- }
-}
-
-static void
-gbp_endpoint_scan_l2 (vlib_main_t * vm)
-{
- clib_bihash_16_8_t *gte_table = &gbp_ep_db.ged_by_mac_bd;
- f64 last_start, start_time, delta_t;
- int i, j, k;
-
- if (!gte_table->instantiated)
- return;
-
- delta_t = 0;
- last_start = start_time = vlib_time_now (vm);
-
- for (i = 0; i < gte_table->nbuckets; i++)
- {
- clib_bihash_bucket_16_8_t *b;
- clib_bihash_value_16_8_t *v;
-
- /* allow no more than 20us without a pause */
- delta_t = vlib_time_now (vm) - last_start;
- if (delta_t > 20e-6)
- {
- /* suspend for 100 us */
- vlib_process_suspend (vm, 100e-6);
- last_start = vlib_time_now (vm);
- }
-
- b = clib_bihash_get_bucket_16_8 (gte_table, i);
- if (clib_bihash_bucket_is_empty_16_8 (b))
- continue;
- v = clib_bihash_get_value_16_8 (gte_table, b->offset);
-
- for (j = 0; j < (1 << b->log2_pages); j++)
- {
- for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
- {
- if (clib_bihash_is_free_16_8 (&v->kvp[k]))
- continue;
-
- gbp_endpoint_check (v->kvp[k].value, start_time);
-
- /*
- * Note: we may have just freed the bucket's backing
- * storage, so check right here...
- */
- if (clib_bihash_bucket_is_empty_16_8 (b))
- goto doublebreak;
- }
- v++;
- }
- doublebreak:
- ;
- }
-}
-
-static void
-gbp_endpoint_scan_l3 (vlib_main_t * vm)
-{
- clib_bihash_24_8_t *gte_table = &gbp_ep_db.ged_by_ip_rd;
- f64 last_start, start_time, delta_t;
- int i, j, k;
-
- if (!gte_table->instantiated)
- return;
-
- delta_t = 0;
- last_start = start_time = vlib_time_now (vm);
-
- for (i = 0; i < gte_table->nbuckets; i++)
- {
- clib_bihash_bucket_24_8_t *b;
- clib_bihash_value_24_8_t *v;
-
- /* allow no more than 20us without a pause */
- delta_t = vlib_time_now (vm) - last_start;
- if (delta_t > 20e-6)
- {
- /* suspend for 100 us */
- vlib_process_suspend (vm, 100e-6);
- last_start = vlib_time_now (vm);
- }
-
- b = clib_bihash_get_bucket_24_8 (gte_table, i);
- if (clib_bihash_bucket_is_empty_24_8 (b))
- continue;
- v = clib_bihash_get_value_24_8 (gte_table, b->offset);
-
- for (j = 0; j < (1 << b->log2_pages); j++)
- {
- for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
- {
- if (clib_bihash_is_free_24_8 (&v->kvp[k]))
- continue;
-
- gbp_endpoint_check (v->kvp[k].value, start_time);
-
- /*
- * Note: we may have just freed the bucket's backing
- * storage, so check right here...
- */
- if (clib_bihash_bucket_is_empty_24_8 (b))
- goto doublebreak;
- }
- v++;
- }
- doublebreak:
- ;
- }
-}
-
-void
-gbp_endpoint_scan (vlib_main_t * vm)
-{
- gbp_endpoint_scan_l2 (vm);
- gbp_endpoint_scan_l3 (vm);
-}
-
-static fib_node_t *
-gbp_endpoint_get_node (fib_node_index_t index)
-{
- gbp_endpoint_t *ge;
-
- ge = gbp_endpoint_get (index);
-
- return (&ge->ge_node);
-}
-
-static gbp_endpoint_t *
-gbp_endpoint_from_fib_node (fib_node_t * node)
-{
- ASSERT (gbp_endpoint_fib_type == node->fn_type);
- return ((gbp_endpoint_t *) node);
-}
-
-static void
-gbp_endpoint_last_lock_gone (fib_node_t * node)
-{
- const gbp_bridge_domain_t *gbd;
- const gbp_route_domain_t *grd;
- const fib_prefix_t *pfx;
- gbp_endpoint_t *ge;
-
- ge = gbp_endpoint_from_fib_node (node);
-
- ASSERT (0 == vec_len (ge->ge_locs));
-
- gbd = gbp_bridge_domain_get (ge->ge_key.gek_gbd);
-
- /*
- * we have removed the last source. this EP is toast
- */
- if (INDEX_INVALID != ge->ge_key.gek_gbd)
- {
- gbp_endpoint_del_mac (&ge->ge_key.gek_mac, gbd->gb_bd_index);
- }
- vec_foreach (pfx, ge->ge_key.gek_ips)
- {
- grd = gbp_route_domain_get (ge->ge_key.gek_grd);
- gbp_endpoint_del_ip (&pfx->fp_addr, grd->grd_fib_index[pfx->fp_proto]);
- }
- pool_put (gbp_endpoint_pool, ge);
-}
-
-static fib_node_back_walk_rc_t
-gbp_endpoint_back_walk_notify (fib_node_t * node,
- fib_node_back_walk_ctx_t * ctx)
-{
- ASSERT (0);
-
- return (FIB_NODE_BACK_WALK_CONTINUE);
-}
-
-/*
- * The FIB path's graph node virtual function table
- */
-static const fib_node_vft_t gbp_endpoint_vft = {
- .fnv_get = gbp_endpoint_get_node,
- .fnv_last_lock = gbp_endpoint_last_lock_gone,
- .fnv_back_walk = gbp_endpoint_back_walk_notify,
- // .fnv_mem_show = fib_path_memory_show,
-};
-
-static clib_error_t *
-gbp_endpoint_init (vlib_main_t * vm)
-{
-#define GBP_EP_HASH_NUM_BUCKETS (2 * 1024)
-#define GBP_EP_HASH_MEMORY_SIZE (1 << 20)
-
- clib_bihash_init_24_8 (&gbp_ep_db.ged_by_ip_rd,
- "GBP Endpoints - IP/RD",
- GBP_EP_HASH_NUM_BUCKETS, GBP_EP_HASH_MEMORY_SIZE);
-
- clib_bihash_init_16_8 (&gbp_ep_db.ged_by_mac_bd,
- "GBP Endpoints - MAC/BD",
- GBP_EP_HASH_NUM_BUCKETS, GBP_EP_HASH_MEMORY_SIZE);
-
- gbp_ep_logger = vlib_log_register_class ("gbp", "ep");
- gbp_endpoint_fib_type = fib_node_register_new_type (&gbp_endpoint_vft);
- gbp_fib_source_hi = fib_source_allocate ("gbp-endpoint-hi",
- FIB_SOURCE_PRIORITY_HI,
- FIB_SOURCE_BH_SIMPLE);
- gbp_fib_source_low = fib_source_allocate ("gbp-endpoint-low",
- FIB_SOURCE_PRIORITY_LOW,
- FIB_SOURCE_BH_SIMPLE);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_endpoint_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_endpoint.h b/src/plugins/gbp/gbp_endpoint.h
deleted file mode 100644
index 3155e7be4e0..00000000000
--- a/src/plugins/gbp/gbp_endpoint.h
+++ /dev/null
@@ -1,376 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_ENDPOINT_H__
-#define __GBP_ENDPOINT_H__
-
-#include <plugins/gbp/gbp_types.h>
-#include <plugins/gbp/gbp_itf.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/mac_address.h>
-
-#include <vppinfra/bihash_16_8.h>
-#include <vppinfra/bihash_template.h>
-#include <vppinfra/bihash_24_8.h>
-#include <vppinfra/bihash_template.h>
-
-/**
- * Flags for each endpoint
- */
-typedef enum gbp_endpoint_attr_t_
-{
- GBP_ENDPOINT_ATTR_FIRST = 0,
- GBP_ENDPOINT_ATTR_BOUNCE = GBP_ENDPOINT_ATTR_FIRST,
- GBP_ENDPOINT_ATTR_REMOTE,
- GBP_ENDPOINT_ATTR_LEARNT,
- GBP_ENDPOINT_ATTR_EXTERNAL,
- GBP_ENDPOINT_ATTR_LAST,
-} gbp_endpoint_attr_t;
-
-typedef enum gbp_endpoint_flags_t_
-{
- GBP_ENDPOINT_FLAG_NONE = 0,
- GBP_ENDPOINT_FLAG_BOUNCE = (1 << GBP_ENDPOINT_ATTR_BOUNCE),
- GBP_ENDPOINT_FLAG_REMOTE = (1 << GBP_ENDPOINT_ATTR_REMOTE),
- GBP_ENDPOINT_FLAG_LEARNT = (1 << GBP_ENDPOINT_ATTR_LEARNT),
- GBP_ENDPOINT_FLAG_EXTERNAL = (1 << GBP_ENDPOINT_ATTR_EXTERNAL),
-} gbp_endpoint_flags_t;
-
-#define GBP_ENDPOINT_ATTR_NAMES { \
- [GBP_ENDPOINT_ATTR_BOUNCE] = "bounce", \
- [GBP_ENDPOINT_ATTR_REMOTE] = "remote", \
- [GBP_ENDPOINT_ATTR_LEARNT] = "learnt", \
- [GBP_ENDPOINT_ATTR_EXTERNAL] = "external", \
-}
-
-extern u8 *format_gbp_endpoint_flags (u8 * s, va_list * args);
-
-/**
- * Sources of Endpoints in priority order. The best (lowest value) source
- * provides the forwarding information.
- * Data-plane takes preference because the CP data is not always complete,
- * it may not have the sclass.
- */
-#define foreach_gbp_endpoint_src \
- _(DP, "data-plane") \
- _(CP, "control-plane") \
- _(RR, "recursive-resolution")
-
-typedef enum gbp_endpoint_src_t_
-{
-#define _(v,s) GBP_ENDPOINT_SRC_##v,
- foreach_gbp_endpoint_src
-#undef _
-} gbp_endpoint_src_t;
-
-#define GBP_ENDPOINT_SRC_MAX (GBP_ENDPOINT_SRC_RR+1)
-
-extern u8 *format_gbp_endpoint_src (u8 * s, va_list * args);
-
-/**
- * This is the identity of an endpoint, as such it is information
- * about an endpoint that is idempotent.
- * The ID is used to add the EP into the various data-bases for retrieval.
- */
-typedef struct gbp_endpoint_key_t_
-{
- /**
- * A vector of ip addresses that belong to the endpoint.
- * Together with the route EPG's RD this forms the EP's L3 key
- */
- fib_prefix_t *gek_ips;
-
- /**
- * MAC address of the endpoint.
- * Together with the route EPG's BD this forms the EP's L2 key
- */
- mac_address_t gek_mac;
-
- /**
- * Index of the Bridge-Domain
- */
- index_t gek_gbd;
-
- /**
- * Index of the Route-Domain
- */
- index_t gek_grd;
-} gbp_endpoint_key_t;
-
-/**
- * Information about the location of the endpoint provided by a source
- * of endpoints
- */
-typedef struct gbp_endpoint_loc_t_
-{
- /**
- * The source providing this location information
- */
- gbp_endpoint_src_t gel_src;
-
- /**
- * The interface on which the EP is connected
- */
- gbp_itf_hdl_t gel_itf;
-
- /**
- * Endpoint flags
- */
- gbp_endpoint_flags_t gel_flags;
-
- /**
- * Endpoint Group.
- */
- index_t gel_epg;
-
- /**
- * number of times this source has locked this
- */
- u32 gel_locks;
-
- /**
- * Tunnel info for remote endpoints
- */
- struct
- {
- u32 gel_parent_sw_if_index;
- ip46_address_t gel_src;
- ip46_address_t gel_dst;
- } tun;
-} gbp_endpoint_loc_t;
-
-/**
- * And endpoints current forwarding state
- */
-typedef struct gbp_endpoint_fwd_t_
-{
- /**
- * The interface on which the EP is connected
- */
- gbp_itf_hdl_t gef_itf;
-
- /**
- * The L3 adj, if created
- */
- index_t *gef_adjs;
-
- /**
- * Endpoint Group's sclass. cached for fast DP access.
- */
- sclass_t gef_sclass;
-
- /**
- * FIB index the EP is in
- */
- u32 gef_fib_index;
-
- gbp_endpoint_flags_t gef_flags;
-} gbp_endpoint_fwd_t;
-
-/**
- * A Group Based Policy Endpoint.
- * This is typically a VM or container. If the endpoint is local (i.e. on
- * the same compute node as VPP) then there is one interface per-endpoint.
- * If the EP is remote,e.g. reachable over a [vxlan] tunnel, then there
- * will be multiple EPs reachable over the tunnel and they can be distinguished
- * via either their MAC or IP Address[es].
- */
-typedef struct gbp_endpoint_t_
-{
- /**
- * A FIB node that allows the tracking of children.
- */
- fib_node_t ge_node;
-
- /**
- * The key/ID of this EP
- */
- gbp_endpoint_key_t ge_key;
-
- /**
- * Location information provided by the various sources.
- * These are sorted based on source priority.
- */
- gbp_endpoint_loc_t *ge_locs;
-
- gbp_endpoint_fwd_t ge_fwd;
-
- /**
- * The last time a packet from seen from this end point
- */
- f64 ge_last_time;
-} gbp_endpoint_t;
-
-extern u8 *format_gbp_endpoint (u8 * s, va_list * args);
-
-/**
- * GBP Endpoint Databases
- */
-typedef struct gbp_ep_by_ip_itf_db_t_
-{
- index_t *ged_by_sw_if_index;
- clib_bihash_24_8_t ged_by_ip_rd;
- clib_bihash_16_8_t ged_by_mac_bd;
-} gbp_ep_db_t;
-
-extern int gbp_endpoint_update_and_lock (gbp_endpoint_src_t src,
- u32 sw_if_index,
- const ip46_address_t * ip,
- const mac_address_t * mac,
- index_t gbd, index_t grd,
- sclass_t sclass,
- gbp_endpoint_flags_t flags,
- const ip46_address_t * tun_src,
- const ip46_address_t * tun_dst,
- u32 * handle);
-extern void gbp_endpoint_unlock (gbp_endpoint_src_t src, index_t gbpei);
-extern u32 gbp_endpoint_child_add (index_t gei,
- fib_node_type_t type,
- fib_node_index_t index);
-extern void gbp_endpoint_child_remove (index_t gei, u32 sibling);
-
-typedef walk_rc_t (*gbp_endpoint_cb_t) (index_t gbpei, void *ctx);
-extern void gbp_endpoint_walk (gbp_endpoint_cb_t cb, void *ctx);
-extern void gbp_endpoint_scan (vlib_main_t * vm);
-extern int gbp_endpoint_is_remote (const gbp_endpoint_t * ge);
-extern int gbp_endpoint_is_local (const gbp_endpoint_t * ge);
-extern int gbp_endpoint_is_external (const gbp_endpoint_t * ge);
-extern int gbp_endpoint_is_learnt (const gbp_endpoint_t * ge);
-
-
-extern void gbp_endpoint_flush (gbp_endpoint_src_t src, u32 sw_if_index);
-
-/**
- * DP functions and databases
- */
-extern gbp_ep_db_t gbp_ep_db;
-extern gbp_endpoint_t *gbp_endpoint_pool;
-
-/**
- * Get the endpoint from a port/interface
- */
-always_inline gbp_endpoint_t *
-gbp_endpoint_get (index_t gbpei)
-{
- return (pool_elt_at_index (gbp_endpoint_pool, gbpei));
-}
-
-static_always_inline void
-gbp_endpoint_mk_key_mac (const u8 * mac,
- u32 bd_index, clib_bihash_kv_16_8_t * key)
-{
- key->key[0] = ethernet_mac_address_u64 (mac);
- key->key[1] = bd_index;
-}
-
-static_always_inline gbp_endpoint_t *
-gbp_endpoint_find_mac (const u8 * mac, u32 bd_index)
-{
- clib_bihash_kv_16_8_t key, value;
- int rv;
-
- gbp_endpoint_mk_key_mac (mac, bd_index, &key);
-
- rv = clib_bihash_search_16_8 (&gbp_ep_db.ged_by_mac_bd, &key, &value);
-
- if (0 != rv)
- return NULL;
-
- return (gbp_endpoint_get (value.value));
-}
-
-static_always_inline void
-gbp_endpoint_mk_key_ip (const ip46_address_t * ip,
- u32 fib_index, clib_bihash_kv_24_8_t * key)
-{
- key->key[0] = ip->as_u64[0];
- key->key[1] = ip->as_u64[1];
- key->key[2] = fib_index;
-}
-
-static_always_inline void
-gbp_endpoint_mk_key_ip4 (const ip4_address_t * ip,
- u32 fib_index, clib_bihash_kv_24_8_t * key)
-{
- const ip46_address_t a = {
- .ip4 = *ip,
- };
- gbp_endpoint_mk_key_ip (&a, fib_index, key);
-}
-
-static_always_inline gbp_endpoint_t *
-gbp_endpoint_find_ip4 (const ip4_address_t * ip, u32 fib_index)
-{
- clib_bihash_kv_24_8_t key, value;
- int rv;
-
- gbp_endpoint_mk_key_ip4 (ip, fib_index, &key);
-
- rv = clib_bihash_search_24_8 (&gbp_ep_db.ged_by_ip_rd, &key, &value);
-
- if (0 != rv)
- return NULL;
-
- return (gbp_endpoint_get (value.value));
-}
-
-static_always_inline void
-gbp_endpoint_mk_key_ip6 (const ip6_address_t * ip,
- u32 fib_index, clib_bihash_kv_24_8_t * key)
-{
- key->key[0] = ip->as_u64[0];
- key->key[1] = ip->as_u64[1];
- key->key[2] = fib_index;
-}
-
-static_always_inline gbp_endpoint_t *
-gbp_endpoint_find_ip6 (const ip6_address_t * ip, u32 fib_index)
-{
- clib_bihash_kv_24_8_t key, value;
- int rv;
-
- gbp_endpoint_mk_key_ip6 (ip, fib_index, &key);
-
- rv = clib_bihash_search_24_8 (&gbp_ep_db.ged_by_ip_rd, &key, &value);
-
- if (0 != rv)
- return NULL;
-
- return (gbp_endpoint_get (value.value));
-}
-
-static_always_inline gbp_endpoint_t *
-gbp_endpoint_find_itf (u32 sw_if_index)
-{
- index_t gei;
-
- gei = gbp_ep_db.ged_by_sw_if_index[sw_if_index];
-
- if (INDEX_INVALID != gei)
- return (gbp_endpoint_get (gei));
-
- return (NULL);
-}
-
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_endpoint_group.c b/src/plugins/gbp/gbp_endpoint_group.c
deleted file mode 100644
index b9044378e3b..00000000000
--- a/src/plugins/gbp/gbp_endpoint_group.c
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
- * gbp.h : Group Based Policy
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_endpoint_group.h>
-#include <plugins/gbp/gbp_endpoint.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-#include <plugins/gbp/gbp_itf.h>
-
-#include <vnet/dpo/dvr_dpo.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/l2/l2_input.h>
-
-/**
- * Pool of GBP endpoint_groups
- */
-gbp_endpoint_group_t *gbp_endpoint_group_pool;
-
-/**
- * DB of endpoint_groups
- */
-gbp_endpoint_group_db_t gbp_endpoint_group_db;
-
-/**
- * Map sclass to EPG
- */
-uword *gbp_epg_sclass_db;
-
-vlib_log_class_t gg_logger;
-
-#define GBP_EPG_DBG(...) \
- vlib_log_debug (gg_logger, __VA_ARGS__);
-
-gbp_endpoint_group_t *
-gbp_endpoint_group_get (index_t i)
-{
- return (pool_elt_at_index (gbp_endpoint_group_pool, i));
-}
-
-void
-gbp_endpoint_group_lock (index_t ggi)
-{
- gbp_endpoint_group_t *gg;
-
- if (INDEX_INVALID == ggi)
- return;
-
- gg = gbp_endpoint_group_get (ggi);
- gg->gg_locks++;
-}
-
-index_t
-gbp_endpoint_group_find (sclass_t sclass)
-{
- uword *p;
-
- p = hash_get (gbp_endpoint_group_db.gg_hash_sclass, sclass);
-
- if (NULL != p)
- return p[0];
-
- return (INDEX_INVALID);
-}
-
-int
-gbp_endpoint_group_add_and_lock (vnid_t vnid,
- u16 sclass,
- u32 bd_id,
- u32 rd_id,
- u32 uplink_sw_if_index,
- const gbp_endpoint_retention_t * retention)
-{
- gbp_endpoint_group_t *gg;
- index_t ggi;
-
- ggi = gbp_endpoint_group_find (sclass);
-
- if (INDEX_INVALID == ggi)
- {
- fib_protocol_t fproto;
- index_t gbi, grdi;
-
- gbi = gbp_bridge_domain_find_and_lock (bd_id);
-
- if (~0 == gbi)
- return (VNET_API_ERROR_BD_NOT_MODIFIABLE);
-
- grdi = gbp_route_domain_find_and_lock (rd_id);
-
- if (~0 == grdi)
- {
- gbp_bridge_domain_unlock (gbi);
- return (VNET_API_ERROR_NO_SUCH_FIB);
- }
-
- pool_get_zero (gbp_endpoint_group_pool, gg);
-
- gg->gg_vnid = vnid;
- gg->gg_rd = grdi;
- gg->gg_gbd = gbi;
-
- gg->gg_uplink_sw_if_index = uplink_sw_if_index;
- gbp_itf_hdl_reset (&gg->gg_uplink_itf);
- gg->gg_locks = 1;
- gg->gg_sclass = sclass;
- gg->gg_retention = *retention;
-
- if (SCLASS_INVALID != gg->gg_sclass)
- hash_set (gbp_epg_sclass_db, gg->gg_sclass, gg->gg_vnid);
-
- /*
- * an egress DVR dpo for internal subnets to use when sending
- * on the uplink interface
- */
- if (~0 != gg->gg_uplink_sw_if_index)
- {
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- dvr_dpo_add_or_lock (uplink_sw_if_index,
- fib_proto_to_dpo (fproto),
- &gg->gg_dpo[fproto]);
- }
-
- /*
- * Add the uplink to the BD
- * packets direct from the uplink have had policy applied
- */
- gg->gg_uplink_itf =
- gbp_itf_l2_add_and_lock (gg->gg_uplink_sw_if_index, gbi);
-
- gbp_itf_l2_set_input_feature (gg->gg_uplink_itf,
- L2INPUT_FEAT_GBP_NULL_CLASSIFY);
- }
-
- hash_set (gbp_endpoint_group_db.gg_hash_sclass,
- gg->gg_sclass, gg - gbp_endpoint_group_pool);
- }
- else
- {
- gg = gbp_endpoint_group_get (ggi);
- gg->gg_locks++;
- }
-
- GBP_EPG_DBG ("add: %U", format_gbp_endpoint_group, gg);
-
- return (0);
-}
-
-void
-gbp_endpoint_group_unlock (index_t ggi)
-{
- gbp_endpoint_group_t *gg;
-
- if (INDEX_INVALID == ggi)
- return;
-
- gg = gbp_endpoint_group_get (ggi);
-
- gg->gg_locks--;
-
- if (0 == gg->gg_locks)
- {
- fib_protocol_t fproto;
-
- gg = pool_elt_at_index (gbp_endpoint_group_pool, ggi);
-
- gbp_itf_unlock (&gg->gg_uplink_itf);
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- dpo_reset (&gg->gg_dpo[fproto]);
- }
- gbp_bridge_domain_unlock (gg->gg_gbd);
- gbp_route_domain_unlock (gg->gg_rd);
-
- if (SCLASS_INVALID != gg->gg_sclass)
- hash_unset (gbp_epg_sclass_db, gg->gg_sclass);
- hash_unset (gbp_endpoint_group_db.gg_hash_sclass, gg->gg_sclass);
-
- pool_put (gbp_endpoint_group_pool, gg);
- }
-}
-
-int
-gbp_endpoint_group_delete (sclass_t sclass)
-{
- index_t ggi;
-
- ggi = gbp_endpoint_group_find (sclass);
-
- if (INDEX_INVALID != ggi)
- {
- GBP_EPG_DBG ("del: %U", format_gbp_endpoint_group,
- gbp_endpoint_group_get (ggi));
- gbp_endpoint_group_unlock (ggi);
-
- return (0);
- }
-
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-}
-
-u32
-gbp_endpoint_group_get_bd_id (const gbp_endpoint_group_t * gg)
-{
- const gbp_bridge_domain_t *gb;
-
- gb = gbp_bridge_domain_get (gg->gg_gbd);
-
- return (gb->gb_bd_id);
-}
-
-index_t
-gbp_endpoint_group_get_fib_index (const gbp_endpoint_group_t * gg,
- fib_protocol_t fproto)
-{
- const gbp_route_domain_t *grd;
-
- grd = gbp_route_domain_get (gg->gg_rd);
-
- return (grd->grd_fib_index[fproto]);
-}
-
-void
-gbp_endpoint_group_walk (gbp_endpoint_group_cb_t cb, void *ctx)
-{
- gbp_endpoint_group_t *gbpe;
-
- /* *INDENT-OFF* */
- pool_foreach (gbpe, gbp_endpoint_group_pool)
- {
- if (!cb(gbpe, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static clib_error_t *
-gbp_endpoint_group_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- gbp_endpoint_retention_t retention = { 0 };
- vnid_t vnid = VNID_INVALID, sclass;
- vnet_main_t *vnm = vnet_get_main ();
- u32 uplink_sw_if_index = ~0;
- u32 bd_id = ~0;
- u32 rd_id = ~0;
- u8 add = 1;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "%U", unformat_vnet_sw_interface,
- vnm, &uplink_sw_if_index))
- ;
- else if (unformat (input, "add"))
- add = 1;
- else if (unformat (input, "del"))
- add = 0;
- else if (unformat (input, "epg %d", &vnid))
- ;
- else if (unformat (input, "sclass %d", &sclass))
- ;
- else if (unformat (input, "bd %d", &bd_id))
- ;
- else if (unformat (input, "rd %d", &rd_id))
- ;
- else
- break;
- }
-
- if (VNID_INVALID == vnid)
- return clib_error_return (0, "EPG-ID must be specified");
-
- if (add)
- {
- if (~0 == bd_id)
- return clib_error_return (0, "Bridge-domain must be specified");
- if (~0 == rd_id)
- return clib_error_return (0, "route-domain must be specified");
-
- gbp_endpoint_group_add_and_lock (vnid, sclass, bd_id, rd_id,
- uplink_sw_if_index, &retention);
- }
- else
- gbp_endpoint_group_delete (vnid);
-
- return (NULL);
-}
-
-/*?
- * Configure a GBP Endpoint Group
- *
- * @cliexpar
- * @cliexstart{gbp endpoint-group [del] epg <ID> bd <ID> rd <ID> [sclass <ID>] [<interface>]}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_endpoint_group_cli_node, static) = {
- .path = "gbp endpoint-group",
- .short_help = "gbp endpoint-group [del] epg <ID> bd <ID> rd <ID> [sclass <ID>] [<interface>]",
- .function = gbp_endpoint_group_cli,
-};
-
-static u8 *
-format_gbp_endpoint_retention (u8 * s, va_list * args)
-{
- gbp_endpoint_retention_t *rt = va_arg (*args, gbp_endpoint_retention_t*);
-
- s = format (s, "[remote-EP-timeout:%d]", rt->remote_ep_timeout);
-
- return (s);
-}
-
-u8 *
-format_gbp_endpoint_group (u8 * s, va_list * args)
-{
- gbp_endpoint_group_t *gg = va_arg (*args, gbp_endpoint_group_t*);
-
- if (NULL != gg)
- s = format (s, "[%d] %d, sclass:%d bd:%d rd:%d uplink:%U retention:%U locks:%d",
- gg - gbp_endpoint_group_pool,
- gg->gg_vnid,
- gg->gg_sclass,
- gg->gg_gbd,
- gg->gg_rd,
- format_gbp_itf_hdl, gg->gg_uplink_itf,
- format_gbp_endpoint_retention, &gg->gg_retention,
- gg->gg_locks);
- else
- s = format (s, "NULL");
-
- return (s);
-}
-
-static int
-gbp_endpoint_group_show_one (gbp_endpoint_group_t *gg, void *ctx)
-{
- vlib_main_t *vm;
-
- vm = ctx;
- vlib_cli_output (vm, " %U",format_gbp_endpoint_group, gg);
-
- return (1);
-}
-
-static clib_error_t *
-gbp_endpoint_group_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vlib_cli_output (vm, "Endpoint-Groups:");
- gbp_endpoint_group_walk (gbp_endpoint_group_show_one, vm);
-
- return (NULL);
-}
-
-
-/*?
- * Show Group Based Policy Endpoint_Groups and derived information
- *
- * @cliexpar
- * @cliexstart{show gbp endpoint_group}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_endpoint_group_show_node, static) = {
- .path = "show gbp endpoint-group",
- .short_help = "show gbp endpoint-group\n",
- .function = gbp_endpoint_group_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_endpoint_group_init (vlib_main_t * vm)
-{
- gg_logger = vlib_log_register_class ("gbp", "epg");
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_endpoint_group_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_endpoint_group.h b/src/plugins/gbp/gbp_endpoint_group.h
deleted file mode 100644
index c5fdff8463d..00000000000
--- a/src/plugins/gbp/gbp_endpoint_group.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_ENDPOINT_GROUP_H__
-#define __GBP_ENDPOINT_GROUP_H__
-
-#include <plugins/gbp/gbp_types.h>
-#include <plugins/gbp/gbp_itf.h>
-
-#include <vnet/fib/fib_types.h>
-
-/**
- * Endpoint Retnetion Policy
- */
-typedef struct gbp_endpoint_retention_t_
-{
- /** Aging timeout for remote endpoints */
- u32 remote_ep_timeout;
-} gbp_endpoint_retention_t;
-
-/**
- * An Endpoint Group representation
- */
-typedef struct gpb_endpoint_group_t_
-{
- /**
- * ID
- */
- vnid_t gg_vnid;
-
- /**
- * Sclass. Could be unset => ~0
- */
- u16 gg_sclass;
-
- /**
- * Bridge-domain ID the EPG is in
- */
- index_t gg_gbd;
-
- /**
- * route-domain/IP-table ID the EPG is in
- */
- index_t gg_rd;
-
- /**
- * Is the EPG an external/NAT
- */
- u8 gg_is_ext;
-
- /**
- * the uplink interface dedicated to the EPG
- */
- u32 gg_uplink_sw_if_index;
- gbp_itf_hdl_t gg_uplink_itf;
-
- /**
- * The DPO used in the L3 path for forwarding internal subnets
- */
- dpo_id_t gg_dpo[FIB_PROTOCOL_IP_MAX];
-
- /**
- * Locks/references to this EPG
- */
- u32 gg_locks;
-
- /**
- * EP retention policy
- */
- gbp_endpoint_retention_t gg_retention;
-} gbp_endpoint_group_t;
-
-/**
- * EPG DB, key'd on EGP-ID
- */
-typedef struct gbp_endpoint_group_db_t_
-{
- uword *gg_hash_sclass;
-} gbp_endpoint_group_db_t;
-
-extern int gbp_endpoint_group_add_and_lock (vnid_t vnid,
- u16 sclass,
- u32 bd_id,
- u32 rd_id,
- u32 uplink_sw_if_index,
- const gbp_endpoint_retention_t *
- retention);
-extern index_t gbp_endpoint_group_find (sclass_t sclass);
-extern int gbp_endpoint_group_delete (sclass_t sclass);
-extern void gbp_endpoint_group_unlock (index_t index);
-extern void gbp_endpoint_group_lock (index_t index);
-extern u32 gbp_endpoint_group_get_bd_id (const gbp_endpoint_group_t *);
-
-extern gbp_endpoint_group_t *gbp_endpoint_group_get (index_t i);
-extern index_t gbp_endpoint_group_get_fib_index (const gbp_endpoint_group_t *
- gg, fib_protocol_t fproto);
-
-typedef int (*gbp_endpoint_group_cb_t) (gbp_endpoint_group_t * gbpe,
- void *ctx);
-extern void gbp_endpoint_group_walk (gbp_endpoint_group_cb_t bgpe, void *ctx);
-
-
-extern u8 *format_gbp_endpoint_group (u8 * s, va_list * args);
-
-/**
- * DP functions and databases
- */
-extern gbp_endpoint_group_db_t gbp_endpoint_group_db;
-extern gbp_endpoint_group_t *gbp_endpoint_group_pool;
-extern uword *gbp_epg_sclass_db;
-
-always_inline u32
-gbp_epg_itf_lookup_sclass (sclass_t sclass)
-{
- uword *p;
-
- p = hash_get (gbp_endpoint_group_db.gg_hash_sclass, sclass);
-
- if (NULL != p)
- {
- gbp_endpoint_group_t *gg;
-
- gg = pool_elt_at_index (gbp_endpoint_group_pool, p[0]);
- return (gg->gg_uplink_sw_if_index);
- }
- return (~0);
-}
-
-always_inline const dpo_id_t *
-gbp_epg_dpo_lookup (sclass_t sclass, fib_protocol_t fproto)
-{
- uword *p;
-
- p = hash_get (gbp_endpoint_group_db.gg_hash_sclass, sclass);
-
- if (NULL != p)
- {
- gbp_endpoint_group_t *gg;
-
- gg = pool_elt_at_index (gbp_endpoint_group_pool, p[0]);
- return (&gg->gg_dpo[fproto]);
- }
- return (NULL);
-}
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_ext_itf.c b/src/plugins/gbp/gbp_ext_itf.c
deleted file mode 100644
index c5506661c2d..00000000000
--- a/src/plugins/gbp/gbp_ext_itf.c
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_ext_itf.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-#include <plugins/gbp/gbp_itf.h>
-
-/**
- * Pool of GBP ext_itfs
- */
-gbp_ext_itf_t *gbp_ext_itf_pool;
-
-/**
- * external interface configs keyed by sw_if_index
- */
-index_t *gbp_ext_itf_db;
-
-#define GBP_EXT_ITF_ID 0x00000080
-
-/**
- * logger
- */
-vlib_log_class_t gx_logger;
-
-#define GBP_EXT_ITF_DBG(...) \
- vlib_log_debug (gx_logger, __VA_ARGS__);
-
-u8 *
-format_gbp_ext_itf (u8 * s, va_list * args)
-{
- gbp_ext_itf_t *gx = va_arg (*args, gbp_ext_itf_t *);
-
- return (format (s, "%U%s in %U",
- format_gbp_itf_hdl, gx->gx_itf,
- (gx->gx_flags & GBP_EXT_ITF_F_ANON) ? " [anon]" : "",
- format_gbp_bridge_domain, gx->gx_bd));
-}
-
-int
-gbp_ext_itf_add (u32 sw_if_index, u32 bd_id, u32 rd_id, u32 flags)
-{
- gbp_ext_itf_t *gx;
- index_t gxi;
-
- vec_validate_init_empty (gbp_ext_itf_db, sw_if_index, INDEX_INVALID);
-
- gxi = gbp_ext_itf_db[sw_if_index];
-
- if (INDEX_INVALID == gxi)
- {
- gbp_route_domain_t *gr;
- fib_protocol_t fproto;
- index_t gbi, gri;
-
- gbi = gbp_bridge_domain_find_and_lock (bd_id);
-
- if (INDEX_INVALID == gbi)
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-
- gri = gbp_route_domain_find_and_lock (rd_id);
-
- if (INDEX_INVALID == gri)
- {
- gbp_bridge_domain_unlock (gbi);
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
- }
-
- pool_get_zero (gbp_ext_itf_pool, gx);
- gxi = gx - gbp_ext_itf_pool;
-
- gr = gbp_route_domain_get (gri);
-
- gx->gx_bd = gbi;
- gx->gx_rd = gri;
- gbp_itf_hdl_reset (&gx->gx_itf);
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- gx->gx_fib_index[fproto] =
- gr->grd_fib_index[fib_proto_to_dpo (fproto)];
- }
-
- if (flags & GBP_EXT_ITF_F_ANON)
- {
- /* add interface to the BD */
- gx->gx_itf = gbp_itf_l2_add_and_lock (sw_if_index, gbi);
-
- /* setup GBP L2 features on this interface */
- gbp_itf_l2_set_input_feature (gx->gx_itf,
- L2INPUT_FEAT_GBP_LPM_ANON_CLASSIFY |
- L2INPUT_FEAT_LEARN);
- gbp_itf_l2_set_output_feature (gx->gx_itf,
- L2OUTPUT_FEAT_GBP_POLICY_LPM);
- }
-
- gx->gx_flags = flags;
-
- gbp_ext_itf_db[sw_if_index] = gxi;
-
- GBP_EXT_ITF_DBG ("add: %U", format_gbp_ext_itf, gx);
-
- return (0);
- }
-
- return (VNET_API_ERROR_ENTRY_ALREADY_EXISTS);
-}
-
-int
-gbp_ext_itf_delete (u32 sw_if_index)
-{
- gbp_ext_itf_t *gx;
- index_t gxi;
-
- if (vec_len (gbp_ext_itf_db) <= sw_if_index)
- return (VNET_API_ERROR_INVALID_SW_IF_INDEX);
-
- gxi = gbp_ext_itf_db[sw_if_index];
-
- if (INDEX_INVALID != gxi)
- {
- gx = pool_elt_at_index (gbp_ext_itf_pool, gxi);
-
- GBP_EXT_ITF_DBG ("del: %U", format_gbp_ext_itf, gx);
-
- gbp_itf_unlock (&gx->gx_itf);
- gbp_route_domain_unlock (gx->gx_rd);
- gbp_bridge_domain_unlock (gx->gx_bd);
-
- gbp_ext_itf_db[sw_if_index] = INDEX_INVALID;
- pool_put (gbp_ext_itf_pool, gx);
-
- return (0);
- }
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-}
-
-static clib_error_t *
-gbp_ext_itf_add_del_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- u32 sw_if_index = ~0, bd_id = ~0, rd_id = ~0, flags = 0;
- int add = 1;
- int rv;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "del"))
- add = 0;
- else
- if (unformat
- (line_input, "%U", unformat_vnet_sw_interface, vnet_get_main (),
- &sw_if_index))
- ;
- else if (unformat (line_input, "bd %d", &bd_id))
- ;
- else if (unformat (line_input, "rd %d", &rd_id))
- ;
- else if (unformat (line_input, "anon-l3-out"))
- flags |= GBP_EXT_ITF_F_ANON;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- }
- unformat_free (line_input);
-
- if (~0 == sw_if_index)
- return clib_error_return (0, "interface must be specified");
-
- if (add)
- {
- if (~0 == bd_id)
- return clib_error_return (0, "BD-ID must be specified");
- if (~0 == rd_id)
- return clib_error_return (0, "RD-ID must be specified");
- rv = gbp_ext_itf_add (sw_if_index, bd_id, rd_id, flags);
- }
- else
- rv = gbp_ext_itf_delete (sw_if_index);
-
- switch (rv)
- {
- case 0:
- return 0;
- case VNET_API_ERROR_ENTRY_ALREADY_EXISTS:
- return clib_error_return (0, "interface already exists");
- case VNET_API_ERROR_NO_SUCH_ENTRY: /* fallthrough */
- case VNET_API_ERROR_INVALID_SW_IF_INDEX:
- return clib_error_return (0, "unknown interface");
- default:
- return clib_error_return (0, "error %d", rv);
- }
-
- /* never reached */
- return 0;
-}
-
-/*?
- * Add Group Based Interface as anonymous L3out interface
- *
- * @cliexpar
- * @cliexstart{gbp interface [del] anon-l3out <interface> bd <ID>}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_itf_anon_l3out_add_del_node, static) = {
- .path = "gbp ext-itf",
- .short_help = "gbp ext-itf [del] <interface> bd <ID> rd <ID> [anon-l3-out]\n",
- .function = gbp_ext_itf_add_del_cli,
-};
-/* *INDENT-ON* */
-
-void
-gbp_ext_itf_walk (gbp_ext_itf_cb_t cb, void *ctx)
-{
- gbp_ext_itf_t *ge;
-
- /* *INDENT-OFF* */
- pool_foreach (ge, gbp_ext_itf_pool)
- {
- if (!cb(ge, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static walk_rc_t
-gbp_ext_itf_show_one (gbp_ext_itf_t * gx, void *ctx)
-{
- vlib_cli_output (ctx, " %U", format_gbp_ext_itf, gx);
-
- return (WALK_CONTINUE);
-}
-
-static clib_error_t *
-gbp_ext_itf_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vlib_cli_output (vm, "External-Interfaces:");
- gbp_ext_itf_walk (gbp_ext_itf_show_one, vm);
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Policy external interface and derived information
- *
- * @cliexpar
- * @cliexstart{show gbp ext-itf}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_ext_itf_show_node, static) = {
- .path = "show gbp ext-itf",
- .short_help = "show gbp ext-itf\n",
- .function = gbp_ext_itf_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_ext_itf_init (vlib_main_t * vm)
-{
- gx_logger = vlib_log_register_class ("gbp", "ext-itf");
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_ext_itf_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_ext_itf.h b/src/plugins/gbp/gbp_ext_itf.h
deleted file mode 100644
index 03b1992ca45..00000000000
--- a/src/plugins/gbp/gbp_ext_itf.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_EXT_ITF_H__
-#define __GBP_EXT_ITF_H__
-
-#include <gbp/gbp.h>
-
-enum
-{
- GBP_EXT_ITF_F_NONE = 0,
- GBP_EXT_ITF_F_ANON = 1 << 0,
-};
-
-/**
- * An external interface maps directly to an oflex L3ExternalInterface.
- * The special characteristics of an external interface is the way the source
- * EPG is determined for input packets which, like a recirc interface, is via
- * a LPM.
- */
-typedef struct gpb_ext_itf_t_
-{
- /**
- * The interface
- */
- gbp_itf_hdl_t gx_itf;
-
- /**
- * The BD this external interface is a member of
- */
- index_t gx_bd;
-
- /**
- * The RD this external interface is a member of
- */
- index_t gx_rd;
-
- /**
- * cached FIB indices from the RD
- */
- u32 gx_fib_index[DPO_PROTO_NUM];
-
- /**
- * The associated flags
- */
- u32 gx_flags;
-
-} gbp_ext_itf_t;
-
-
-extern int gbp_ext_itf_add (u32 sw_if_index, u32 bd_id, u32 rd_id, u32 flags);
-extern int gbp_ext_itf_delete (u32 sw_if_index);
-
-extern u8 *format_gbp_ext_itf (u8 * s, va_list * args);
-
-typedef walk_rc_t (*gbp_ext_itf_cb_t) (gbp_ext_itf_t * gbpe, void *ctx);
-extern void gbp_ext_itf_walk (gbp_ext_itf_cb_t bgpe, void *ctx);
-
-
-/**
- * Exposed types for the data-plane
- */
-extern gbp_ext_itf_t *gbp_ext_itf_pool;
-extern index_t *gbp_ext_itf_db;
-
-always_inline gbp_ext_itf_t *
-gbp_ext_itf_get (u32 sw_if_index)
-{
- return (pool_elt_at_index (gbp_ext_itf_pool, gbp_ext_itf_db[sw_if_index]));
-}
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_fwd.c b/src/plugins/gbp/gbp_fwd.c
deleted file mode 100644
index 4ecc4779b92..00000000000
--- a/src/plugins/gbp/gbp_fwd.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <plugins/gbp/gbp.h>
-#include <vnet/l2/l2_input.h>
-#include <plugins/gbp/gbp_learn.h>
-
-/**
- * Grouping of global data for the GBP source EPG classification feature
- */
-typedef struct gbp_fwd_main_t_
-{
- /**
- * Next nodes for L2 output features
- */
- u32 l2_input_feat_next[32];
-} gbp_fwd_main_t;
-
-gbp_fwd_main_t gbp_fwd_main;
-
-static clib_error_t *
-gbp_fwd_init (vlib_main_t * vm)
-{
- gbp_fwd_main_t *gpm = &gbp_fwd_main;
- vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "gbp-fwd");
-
- /* Initialize the feature next-node indices */
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2INPUT_N_FEAT,
- l2input_get_feat_names (),
- gpm->l2_input_feat_next);
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (gbp_fwd_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_fwd_dpo.c b/src/plugins/gbp/gbp_fwd_dpo.c
deleted file mode 100644
index b1023f5e78f..00000000000
--- a/src/plugins/gbp/gbp_fwd_dpo.c
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_fwd_dpo.h>
-
-#include <vnet/ethernet/ethernet.h>
-
-
-#ifndef CLIB_MARCH_VARIANT
-/**
- * The 'DB' of GBP FWD DPOs.
- * There is one per-proto
- */
-static index_t gbp_fwd_dpo_db[DPO_PROTO_NUM] = { INDEX_INVALID };
-
-/**
- * DPO type registered for these GBP FWD
- */
-static dpo_type_t gbp_fwd_dpo_type;
-
-/**
- * @brief pool of all interface DPOs
- */
-gbp_fwd_dpo_t *gbp_fwd_dpo_pool;
-
-static gbp_fwd_dpo_t *
-gbp_fwd_dpo_alloc (void)
-{
- gbp_fwd_dpo_t *gfd;
-
- pool_get (gbp_fwd_dpo_pool, gfd);
-
- return (gfd);
-}
-
-static inline gbp_fwd_dpo_t *
-gbp_fwd_dpo_get_from_dpo (const dpo_id_t * dpo)
-{
- ASSERT (gbp_fwd_dpo_type == dpo->dpoi_type);
-
- return (gbp_fwd_dpo_get (dpo->dpoi_index));
-}
-
-static inline index_t
-gbp_fwd_dpo_get_index (gbp_fwd_dpo_t * gfd)
-{
- return (gfd - gbp_fwd_dpo_pool);
-}
-
-static void
-gbp_fwd_dpo_lock (dpo_id_t * dpo)
-{
- gbp_fwd_dpo_t *gfd;
-
- gfd = gbp_fwd_dpo_get_from_dpo (dpo);
- gfd->gfd_locks++;
-}
-
-static void
-gbp_fwd_dpo_unlock (dpo_id_t * dpo)
-{
- gbp_fwd_dpo_t *gfd;
-
- gfd = gbp_fwd_dpo_get_from_dpo (dpo);
- gfd->gfd_locks--;
-
- if (0 == gfd->gfd_locks)
- {
- gbp_fwd_dpo_db[gfd->gfd_proto] = INDEX_INVALID;
- pool_put (gbp_fwd_dpo_pool, gfd);
- }
-}
-
-void
-gbp_fwd_dpo_add_or_lock (dpo_proto_t dproto, dpo_id_t * dpo)
-{
- gbp_fwd_dpo_t *gfd;
-
- if (INDEX_INVALID == gbp_fwd_dpo_db[dproto])
- {
- gfd = gbp_fwd_dpo_alloc ();
-
- gfd->gfd_proto = dproto;
-
- gbp_fwd_dpo_db[dproto] = gbp_fwd_dpo_get_index (gfd);
- }
- else
- {
- gfd = gbp_fwd_dpo_get (gbp_fwd_dpo_db[dproto]);
- }
-
- dpo_set (dpo, gbp_fwd_dpo_type, dproto, gbp_fwd_dpo_get_index (gfd));
-}
-
-u8 *
-format_gbp_fwd_dpo (u8 * s, va_list * ap)
-{
- index_t index = va_arg (*ap, index_t);
- CLIB_UNUSED (u32 indent) = va_arg (*ap, u32);
- gbp_fwd_dpo_t *gfd = gbp_fwd_dpo_get (index);
-
- return (format (s, "gbp-fwd-dpo: %U", format_dpo_proto, gfd->gfd_proto));
-}
-
-const static dpo_vft_t gbp_fwd_dpo_vft = {
- .dv_lock = gbp_fwd_dpo_lock,
- .dv_unlock = gbp_fwd_dpo_unlock,
- .dv_format = format_gbp_fwd_dpo,
-};
-
-/**
- * @brief The per-protocol VLIB graph nodes that are assigned to a glean
- * object.
- *
- * this means that these graph nodes are ones from which a glean is the
- * parent object in the DPO-graph.
- */
-const static char *const gbp_fwd_dpo_ip4_nodes[] = {
- "ip4-gbp-fwd-dpo",
- NULL,
-};
-
-const static char *const gbp_fwd_dpo_ip6_nodes[] = {
- "ip6-gbp-fwd-dpo",
- NULL,
-};
-
-const static char *const *const gbp_fwd_dpo_nodes[DPO_PROTO_NUM] = {
- [DPO_PROTO_IP4] = gbp_fwd_dpo_ip4_nodes,
- [DPO_PROTO_IP6] = gbp_fwd_dpo_ip6_nodes,
-};
-
-dpo_type_t
-gbp_fwd_dpo_get_type (void)
-{
- return (gbp_fwd_dpo_type);
-}
-
-static clib_error_t *
-gbp_fwd_dpo_module_init (vlib_main_t * vm)
-{
- dpo_proto_t dproto;
-
- FOR_EACH_DPO_PROTO (dproto)
- {
- gbp_fwd_dpo_db[dproto] = INDEX_INVALID;
- }
-
- gbp_fwd_dpo_type = dpo_register_new_type (&gbp_fwd_dpo_vft,
- gbp_fwd_dpo_nodes);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_fwd_dpo_module_init);
-#endif /* CLIB_MARCH_VARIANT */
-
-typedef struct gbp_fwd_dpo_trace_t_
-{
- u32 sclass;
- u32 dpo_index;
-} gbp_fwd_dpo_trace_t;
-
-typedef enum
-{
- GBP_FWD_DROP,
- GBP_FWD_FWD,
- GBP_FWD_N_NEXT,
-} gbp_fwd_next_t;
-
-always_inline uword
-gbp_fwd_dpo_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, fib_protocol_t fproto)
-{
- u32 n_left_from, next_index, *from, *to_next;
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
-
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- const dpo_id_t *next_dpo0;
- vlib_buffer_t *b0;
- sclass_t sclass0;
- u32 bi0, next0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- sclass0 = vnet_buffer2 (b0)->gbp.sclass;
- next_dpo0 = gbp_epg_dpo_lookup (sclass0, fproto);
-
- if (PREDICT_TRUE (NULL != next_dpo0))
- {
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = next_dpo0->dpoi_index;
- next0 = GBP_FWD_FWD;
- }
- else
- {
- next0 = GBP_FWD_DROP;
- }
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- gbp_fwd_dpo_trace_t *tr;
-
- tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->sclass = sclass0;
- tr->dpo_index = (NULL != next_dpo0 ?
- next_dpo0->dpoi_index : ~0);
- }
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
- return from_frame->n_vectors;
-}
-
-static u8 *
-format_gbp_fwd_dpo_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_fwd_dpo_trace_t *t = va_arg (*args, gbp_fwd_dpo_trace_t *);
-
- s = format (s, " sclass:%d dpo:%d", t->sclass, t->dpo_index);
-
- return s;
-}
-
-VLIB_NODE_FN (ip4_gbp_fwd_dpo_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return (gbp_fwd_dpo_inline (vm, node, from_frame, FIB_PROTOCOL_IP4));
-}
-
-VLIB_NODE_FN (ip6_gbp_fwd_dpo_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return (gbp_fwd_dpo_inline (vm, node, from_frame, FIB_PROTOCOL_IP6));
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip4_gbp_fwd_dpo_node) = {
- .name = "ip4-gbp-fwd-dpo",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_fwd_dpo_trace,
- .n_next_nodes = GBP_FWD_N_NEXT,
- .next_nodes =
- {
- [GBP_FWD_DROP] = "ip4-drop",
- [GBP_FWD_FWD] = "ip4-dvr-dpo",
- }
-};
-VLIB_REGISTER_NODE (ip6_gbp_fwd_dpo_node) = {
- .name = "ip6-gbp-fwd-dpo",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_fwd_dpo_trace,
- .n_next_nodes = GBP_FWD_N_NEXT,
- .next_nodes =
- {
- [GBP_FWD_DROP] = "ip6-drop",
- [GBP_FWD_FWD] = "ip6-dvr-dpo",
- }
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_fwd_dpo.h b/src/plugins/gbp/gbp_fwd_dpo.h
deleted file mode 100644
index 6092d6241b5..00000000000
--- a/src/plugins/gbp/gbp_fwd_dpo.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_FWD_DPO_H__
-#define __GBP_FWD_DPO_H__
-
-#include <vnet/dpo/dpo.h>
-
-/**
- * @brief
- * The GBP FWD DPO. Used in the L3 path to select the correct EPG uplink
- * based on the source EPG.
- */
-typedef struct gbp_fwd_dpo_t_
-{
- /**
- * The protocol of packets using this DPO
- */
- dpo_proto_t gfd_proto;
-
- /**
- * number of locks.
- */
- u16 gfd_locks;
-} gbp_fwd_dpo_t;
-
-extern void gbp_fwd_dpo_add_or_lock (dpo_proto_t dproto, dpo_id_t * dpo);
-
-extern dpo_type_t gbp_fwd_dpo_get_type (void);
-
-/**
- * @brief pool of all interface DPOs
- */
-extern gbp_fwd_dpo_t *gbp_fwd_dpo_pool;
-
-static inline gbp_fwd_dpo_t *
-gbp_fwd_dpo_get (index_t index)
-{
- return (pool_elt_at_index (gbp_fwd_dpo_pool, index));
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
-
-#endif
diff --git a/src/plugins/gbp/gbp_fwd_node.c b/src/plugins/gbp/gbp_fwd_node.c
deleted file mode 100644
index 6ea56fd8074..00000000000
--- a/src/plugins/gbp/gbp_fwd_node.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <vnet/l2/l2_input.h>
-
-#define foreach_gbp_fwd \
- _(DROP, "drop") \
- _(OUTPUT, "output")
-
-typedef enum
-{
-#define _(sym,str) GBP_FWD_ERROR_##sym,
- foreach_gbp_fwd
-#undef _
- GBP_FWD_N_ERROR,
-} gbp_fwd_error_t;
-
-static char *gbp_fwd_error_strings[] = {
-#define _(sym,string) string,
- foreach_gbp_fwd
-#undef _
-};
-
-typedef enum
-{
-#define _(sym,str) GBP_FWD_NEXT_##sym,
- foreach_gbp_fwd
-#undef _
- GBP_FWD_N_NEXT,
-} gbp_fwd_next_t;
-
-/**
- * per-packet trace data
- */
-typedef struct gbp_fwd_trace_t_
-{
- /* per-pkt trace data */
- sclass_t sclass;
- u32 sw_if_index;
-} gbp_fwd_trace_t;
-
-VLIB_NODE_FN (gbp_fwd_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- u32 n_left_from, *from, *to_next;
- u32 next_index;
-
- next_index = 0;
- n_left_from = frame->n_vectors;
- from = vlib_frame_vector_args (frame);
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0, sw_if_index0;
- gbp_fwd_next_t next0;
- vlib_buffer_t *b0;
- sclass_t sclass0;
-
- next0 = GBP_FWD_NEXT_DROP;
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- /*
- * lookup the uplink based on src EPG
- */
- sclass0 = vnet_buffer2 (b0)->gbp.sclass;
-
- sw_if_index0 = gbp_epg_itf_lookup_sclass (sclass0);
-
- if (~0 != sw_if_index0)
- {
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
-
- next0 = GBP_FWD_NEXT_OUTPUT;
- }
- /*
- * else
- * don't know the uplink interface for this EPG => drop
- */
-
- if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- gbp_fwd_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sclass = sclass0;
- t->sw_if_index = sw_if_index0;
- }
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-/* packet trace format function */
-static u8 *
-format_gbp_fwd_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_fwd_trace_t *t = va_arg (*args, gbp_fwd_trace_t *);
-
- s = format (s, "sclass:%d", t->sclass);
-
- return s;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_fwd_node) = {
- .name = "gbp-fwd",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_fwd_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(gbp_fwd_error_strings),
- .error_strings = gbp_fwd_error_strings,
-
- .n_next_nodes = GBP_FWD_N_NEXT,
-
- .next_nodes = {
- [GBP_FWD_NEXT_DROP] = "error-drop",
- [GBP_FWD_NEXT_OUTPUT] = "l2-output",
- },
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_itf.c b/src/plugins/gbp/gbp_itf.c
deleted file mode 100644
index 0c8f6a45a87..00000000000
--- a/src/plugins/gbp/gbp_itf.c
+++ /dev/null
@@ -1,575 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_itf.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-
-#include <vnet/ip/ip.h>
-
-#define foreach_gbp_itf_mode \
- _(L2, "l2") \
- _(L3, "L3")
-
-typedef enum gbp_ift_mode_t_
-{
-#define _(s,v) GBP_ITF_MODE_##s,
- foreach_gbp_itf_mode
-#undef _
-} gbp_itf_mode_t;
-
-/**
- * Attributes and configurations attached to interfaces by GBP
- */
-typedef struct gbp_itf_t_
-{
- /**
- * Number of references to this interface
- */
- u32 gi_locks;
-
- /**
- * The interface this wrapper is managing
- */
- u32 gi_sw_if_index;
-
- /**
- * The mode of the interface
- */
- gbp_itf_mode_t gi_mode;
-
- /**
- * Users of this interface - this is encoded in the user's handle
- */
- u32 *gi_users;
-
- /**
- * L2/L3 Features configured by each user
- */
- u32 *gi_input_fbs;
- u32 gi_input_fb;
- u32 *gi_output_fbs;
- u32 gi_output_fb;
-
- /**
- * function to call when the interface is deleted.
- */
- gbp_itf_free_fn_t gi_free_fn;
-
- union
- {
- /**
- * GBP BD or RD index
- */
- u32 gi_gbi;
- index_t gi_gri;
- };
-} gbp_itf_t;
-
-static gbp_itf_t *gbp_itf_pool;
-static uword *gbp_itf_db;
-
-static const char *gbp_itf_feat_bit_pos_to_arc[] = {
-#define _(s,v,a) [GBP_ITF_L3_FEAT_POS_##s] = a,
- foreach_gdb_l3_feature
-#undef _
-};
-
-static const char *gbp_itf_feat_bit_pos_to_feat[] = {
-#define _(s,v,a) [GBP_ITF_L3_FEAT_POS_##s] = v,
- foreach_gdb_l3_feature
-#undef _
-};
-
-u8 *
-format_gbp_itf_l3_feat (u8 * s, va_list * args)
-{
- gbp_itf_l3_feat_t flags = va_arg (*args, gbp_itf_l3_feat_t);
-
-#define _(a, b, c) \
- if (flags & GBP_ITF_L3_FEAT_##a) \
- s = format (s, "%s ", b);
- foreach_gdb_l3_feature
-#undef _
- return (s);
-}
-
-void
-gbp_itf_hdl_reset (gbp_itf_hdl_t * gh)
-{
- *gh = GBP_ITF_HDL_INVALID;
-}
-
-bool
-gbp_itf_hdl_is_valid (gbp_itf_hdl_t gh)
-{
- return (gh.gh_which != GBP_ITF_HDL_INVALID.gh_which);
-}
-
-static gbp_itf_t *
-gbp_itf_get (index_t gii)
-{
- if (pool_is_free_index (gbp_itf_pool, gii))
- return (NULL);
-
- return (pool_elt_at_index (gbp_itf_pool, gii));
-}
-
-static gbp_itf_t *
-gbp_itf_find (u32 sw_if_index)
-{
- uword *p;
-
- p = hash_get (gbp_itf_db, sw_if_index);
-
- if (NULL != p)
- return (gbp_itf_get (p[0]));
-
- return (NULL);
-}
-
-static gbp_itf_t *
-gbp_itf_find_hdl (gbp_itf_hdl_t gh)
-{
- return (gbp_itf_find (gh.gh_which));
-}
-
-u32
-gbp_itf_get_sw_if_index (gbp_itf_hdl_t hdl)
-{
- return (hdl.gh_which);
-}
-
-static gbp_itf_hdl_t
-gbp_itf_mk_hdl (gbp_itf_t * gi)
-{
- gbp_itf_hdl_t gh;
- u32 *useri;
-
- pool_get (gi->gi_users, useri);
- *useri = 0;
-
- gh.gh_who = useri - gi->gi_users;
- gh.gh_which = gi->gi_sw_if_index;
-
- return (gh);
-}
-
-static gbp_itf_hdl_t
-gbp_itf_l2_add_and_lock_i (u32 sw_if_index, index_t gbi, gbp_itf_free_fn_t ff)
-{
- gbp_itf_t *gi;
-
- gi = gbp_itf_find (sw_if_index);
-
- if (NULL == gi)
- {
- pool_get_zero (gbp_itf_pool, gi);
-
- gi->gi_sw_if_index = sw_if_index;
- gi->gi_gbi = gbi;
- gi->gi_mode = GBP_ITF_MODE_L2;
- gi->gi_free_fn = ff;
-
- gbp_bridge_domain_itf_add (gi->gi_gbi, gi->gi_sw_if_index,
- L2_BD_PORT_TYPE_NORMAL);
-
- hash_set (gbp_itf_db, gi->gi_sw_if_index, gi - gbp_itf_pool);
- }
-
- gi->gi_locks++;
-
- return (gbp_itf_mk_hdl (gi));
-}
-
-gbp_itf_hdl_t
-gbp_itf_l2_add_and_lock (u32 sw_if_index, index_t gbi)
-{
- return (gbp_itf_l2_add_and_lock_i (sw_if_index, gbi, NULL));
-}
-
-gbp_itf_hdl_t
-gbp_itf_l2_add_and_lock_w_free (u32 sw_if_index,
- index_t gbi, gbp_itf_free_fn_t ff)
-{
- return (gbp_itf_l2_add_and_lock_i (sw_if_index, gbi, ff));
-}
-
-gbp_itf_hdl_t
-gbp_itf_l3_add_and_lock_i (u32 sw_if_index, index_t gri, gbp_itf_free_fn_t ff)
-{
- gbp_itf_t *gi;
-
- gi = gbp_itf_find (sw_if_index);
-
- if (NULL == gi)
- {
- const gbp_route_domain_t *grd;
- fib_protocol_t fproto;
-
- pool_get_zero (gbp_itf_pool, gi);
-
- gi->gi_sw_if_index = sw_if_index;
- gi->gi_mode = GBP_ITF_MODE_L3;
- gi->gi_gri = gri;
- gi->gi_free_fn = ff;
-
- grd = gbp_route_domain_get (gi->gi_gri);
-
- ip4_sw_interface_enable_disable (gi->gi_sw_if_index, 1);
- ip6_sw_interface_enable_disable (gi->gi_sw_if_index, 1);
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- ip_table_bind (fproto, gi->gi_sw_if_index,
- grd->grd_table_id[fproto], 1);
-
- hash_set (gbp_itf_db, gi->gi_sw_if_index, gi - gbp_itf_pool);
- }
-
- gi->gi_locks++;
-
- return (gbp_itf_mk_hdl (gi));
-}
-
-gbp_itf_hdl_t
-gbp_itf_l3_add_and_lock (u32 sw_if_index, index_t gri)
-{
- return (gbp_itf_l3_add_and_lock_i (sw_if_index, gri, NULL));
-}
-
-gbp_itf_hdl_t
-gbp_itf_l3_add_and_lock_w_free (u32 sw_if_index,
- index_t gri, gbp_itf_free_fn_t ff)
-{
- return (gbp_itf_l3_add_and_lock_i (sw_if_index, gri, ff));
-}
-
-void
-gbp_itf_lock (gbp_itf_hdl_t gh)
-{
- gbp_itf_t *gi;
-
- if (!gbp_itf_hdl_is_valid (gh))
- return;
-
- gi = gbp_itf_find_hdl (gh);
-
- gi->gi_locks++;
-}
-
-gbp_itf_hdl_t
-gbp_itf_clone_and_lock (gbp_itf_hdl_t gh)
-{
- gbp_itf_t *gi;
-
- if (!gbp_itf_hdl_is_valid (gh))
- return (GBP_ITF_HDL_INVALID);
-
- gi = gbp_itf_find_hdl (gh);
-
- gi->gi_locks++;
-
- return (gbp_itf_mk_hdl (gi));
-}
-
-void
-gbp_itf_unlock (gbp_itf_hdl_t * gh)
-{
- gbp_itf_t *gi;
-
- if (!gbp_itf_hdl_is_valid (*gh))
- return;
-
- gi = gbp_itf_find_hdl (*gh);
- ASSERT (gi->gi_locks > 0);
- gi->gi_locks--;
-
- if (0 == gi->gi_locks)
- {
- if (GBP_ITF_MODE_L2 == gi->gi_mode)
- {
- gbp_itf_l2_set_input_feature (*gh, L2INPUT_FEAT_NONE);
- gbp_itf_l2_set_output_feature (*gh, L2OUTPUT_FEAT_NONE);
- gbp_bridge_domain_itf_del (gi->gi_gbi,
- gi->gi_sw_if_index,
- L2_BD_PORT_TYPE_NORMAL);
- }
- else
- {
- fib_protocol_t fproto;
-
- gbp_itf_l3_set_input_feature (*gh, GBP_ITF_L3_FEAT_NONE);
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- ip_table_bind (fproto, gi->gi_sw_if_index, 0, 0);
-
- ip4_sw_interface_enable_disable (gi->gi_sw_if_index, 0);
- ip6_sw_interface_enable_disable (gi->gi_sw_if_index, 0);
- }
-
- hash_unset (gbp_itf_db, gi->gi_sw_if_index);
-
- if (gi->gi_free_fn)
- gi->gi_free_fn (gi->gi_sw_if_index);
-
- pool_free (gi->gi_users);
- vec_free (gi->gi_input_fbs);
- vec_free (gi->gi_output_fbs);
-
- memset (gi, 0, sizeof (*gi));
- }
-
- gbp_itf_hdl_reset (gh);
-}
-
-void
-gbp_itf_l3_set_input_feature (gbp_itf_hdl_t gh, gbp_itf_l3_feat_t feats)
-{
- u32 diff_fb, new_fb, *fb, feat;
- gbp_itf_t *gi;
-
- gi = gbp_itf_find_hdl (gh);
-
- if (NULL == gi || GBP_ITF_MODE_L3 != gi->gi_mode)
- return;
-
- vec_validate (gi->gi_input_fbs, gh.gh_who);
- gi->gi_input_fbs[gh.gh_who] = feats;
-
- new_fb = 0;
- vec_foreach (fb, gi->gi_input_fbs)
- {
- new_fb |= *fb;
- }
-
- /* add new features */
- diff_fb = (gi->gi_input_fb ^ new_fb) & new_fb;
-
- /* *INDENT-OFF* */
- foreach_set_bit (feat, diff_fb,
- ({
- vnet_feature_enable_disable (gbp_itf_feat_bit_pos_to_arc[feat],
- gbp_itf_feat_bit_pos_to_feat[feat],
- gi->gi_sw_if_index, 1, 0, 0);
- }));
- /* *INDENT-ON* */
-
- /* remove unneeded features */
- diff_fb = (gi->gi_input_fb ^ new_fb) & gi->gi_input_fb;
-
- /* *INDENT-OFF* */
- foreach_set_bit (feat, diff_fb,
- ({
- vnet_feature_enable_disable (gbp_itf_feat_bit_pos_to_arc[feat],
- gbp_itf_feat_bit_pos_to_feat[feat],
- gi->gi_sw_if_index, 0, 0, 0);
- }));
- /* *INDENT-ON* */
-
- gi->gi_input_fb = new_fb;
-}
-
-void
-gbp_itf_l2_set_input_feature (gbp_itf_hdl_t gh, l2input_feat_masks_t feats)
-{
- u32 diff_fb, new_fb, *fb, feat;
- gbp_itf_t *gi;
-
- gi = gbp_itf_find_hdl (gh);
-
- if (NULL == gi || GBP_ITF_MODE_L2 != gi->gi_mode)
- {
- ASSERT (0);
- return;
- }
-
- vec_validate (gi->gi_input_fbs, gh.gh_who);
- gi->gi_input_fbs[gh.gh_who] = feats;
-
- new_fb = 0;
- vec_foreach (fb, gi->gi_input_fbs)
- {
- new_fb |= *fb;
- }
-
- /* add new features */
- diff_fb = (gi->gi_input_fb ^ new_fb) & new_fb;
-
- /* *INDENT-OFF* */
- foreach_set_bit (feat, diff_fb,
- ({
- l2input_intf_bitmap_enable (gi->gi_sw_if_index, (1 << feat), 1);
- }));
- /* *INDENT-ON* */
-
- /* remove unneeded features */
- diff_fb = (gi->gi_input_fb ^ new_fb) & gi->gi_input_fb;
-
- /* *INDENT-OFF* */
- foreach_set_bit (feat, diff_fb,
- ({
- l2input_intf_bitmap_enable (gi->gi_sw_if_index, (1 << feat), 0);
- }));
- /* *INDENT-ON* */
-
- gi->gi_input_fb = new_fb;
-}
-
-void
-gbp_itf_l2_set_output_feature (gbp_itf_hdl_t gh, l2output_feat_masks_t feats)
-{
- u32 diff_fb, new_fb, *fb, feat;
- gbp_itf_t *gi;
-
- gi = gbp_itf_find_hdl (gh);
-
- if (NULL == gi || GBP_ITF_MODE_L2 != gi->gi_mode)
- {
- ASSERT (0);
- return;
- }
-
- vec_validate (gi->gi_output_fbs, gh.gh_who);
- gi->gi_output_fbs[gh.gh_who] = feats;
-
- new_fb = 0;
- vec_foreach (fb, gi->gi_output_fbs)
- {
- new_fb |= *fb;
- }
-
- /* add new features */
- diff_fb = (gi->gi_output_fb ^ new_fb) & new_fb;
-
- /* *INDENT-OFF* */
- foreach_set_bit (feat, diff_fb,
- ({
- l2output_intf_bitmap_enable (gi->gi_sw_if_index, (1 << feat), 1);
- }));
- /* *INDENT-ON* */
-
- /* remove unneeded features */
- diff_fb = (gi->gi_output_fb ^ new_fb) & gi->gi_output_fb;
-
- /* *INDENT-OFF* */
- foreach_set_bit (feat, diff_fb,
- ({
- l2output_intf_bitmap_enable (gi->gi_sw_if_index, (1 << feat), 0);
- }));
- /* *INDENT-ON* */
-
- gi->gi_output_fb = new_fb;
-}
-
-static u8 *
-format_gbp_itf_mode (u8 * s, va_list * args)
-{
- gbp_itf_mode_t mode = va_arg (*args, gbp_itf_mode_t);
-
- switch (mode)
- {
-#define _(a,v) \
- case GBP_ITF_MODE_##a: \
- return format(s, "%s", v);
- foreach_gbp_itf_mode
-#undef _
- }
- return (s);
-}
-
-static u8 *
-format_gbp_itf (u8 * s, va_list * args)
-{
- index_t gii = va_arg (*args, index_t);
- gbp_itf_t *gi;
-
- if (INDEX_INVALID == gii)
- return (format (s, "unset"));
-
- gi = gbp_itf_get (gii);
-
- s = format (s, "%U locks:%d mode:%U ",
- format_vnet_sw_if_index_name, vnet_get_main (),
- gi->gi_sw_if_index, gi->gi_locks,
- format_gbp_itf_mode, gi->gi_mode);
-
- if (GBP_ITF_MODE_L2 == gi->gi_mode)
- s = format (s, "gbp-bd:%d input-feats:[%U] output-feats:[%U]",
- gi->gi_gbi,
- format_l2_input_features, gi->gi_input_fb, 0,
- format_l2_output_features, gi->gi_output_fb, 0);
- else
- s = format (s, "gbp-rd:%d input-feats:[%U] output-feats:[%U]",
- gi->gi_gbi,
- format_gbp_itf_l3_feat, gi->gi_input_fb,
- format_gbp_itf_l3_feat, gi->gi_output_fb);
-
- return (s);
-}
-
-u8 *
-format_gbp_itf_hdl (u8 * s, va_list * args)
-{
- gbp_itf_hdl_t gh = va_arg (*args, gbp_itf_hdl_t);
- gbp_itf_t *gi;
-
- gi = gbp_itf_find_hdl (gh);
-
- if (NULL == gi)
- return format (s, "INVALID");
-
- return (format (s, "%U", format_gbp_itf, gi - gbp_itf_pool));
-}
-
-static clib_error_t *
-gbp_itf_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- u32 gii;
-
- vlib_cli_output (vm, "Interfaces:");
-
- /* *INDENT-OFF* */
- pool_foreach_index (gii, gbp_itf_pool)
- {
- vlib_cli_output (vm, " [%d] %U", gii, format_gbp_itf, gii);
- }
- /* *INDENT-ON* */
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Interfaces
- *
- * @cliexpar
- * @cliexstart{show gbp contract}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_contract_show_node, static) = {
- .path = "show gbp interface",
- .short_help = "show gbp interface\n",
- .function = gbp_itf_show,
-};
-/* *INDENT-ON* */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_itf.h b/src/plugins/gbp/gbp_itf.h
deleted file mode 100644
index 23a09b2a9ff..00000000000
--- a/src/plugins/gbp/gbp_itf.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_INTERFACE_H__
-#define __GBP_INTERFACE_H__
-
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/l2_output.h>
-#include <vnet/dpo/dpo.h>
-
-
-#define foreach_gdb_l3_feature \
- _(LEARN_IP4, "gbp-learn-ip4", "ip4-unicast") \
- _(LEARN_IP6, "gbp-learn-ip6", "ip6-unicast")
-
-typedef enum gbp_itf_l3_feat_pos_t_
-{
-#define _(s,v,a) GBP_ITF_L3_FEAT_POS_##s,
- foreach_gdb_l3_feature
-#undef _
-} gbp_itf_l3_feat_pos_t;
-
-typedef enum gbp_itf_l3_feat_t_
-{
- GBP_ITF_L3_FEAT_NONE,
-#define _(s,v,a) GBP_ITF_L3_FEAT_##s = (1 << GBP_ITF_L3_FEAT_POS_##s),
- foreach_gdb_l3_feature
-#undef _
-} gbp_itf_l3_feat_t;
-
-#define GBP_ITF_L3_FEAT_LEARN (GBP_ITF_L3_FEAT_LEARN_IP4|GBP_ITF_L3_FEAT_LEARN_IP6)
-
-typedef struct gbp_itf_hdl_t_
-{
- union
- {
- struct
- {
- u32 gh_who;
- u32 gh_which;
- };
- };
-} gbp_itf_hdl_t;
-
-#define GBP_ITF_HDL_INIT {.gh_which = ~0}
-const static gbp_itf_hdl_t GBP_ITF_HDL_INVALID = GBP_ITF_HDL_INIT;
-
-extern void gbp_itf_hdl_reset (gbp_itf_hdl_t * gh);
-extern bool gbp_itf_hdl_is_valid (gbp_itf_hdl_t gh);
-
-typedef void (*gbp_itf_free_fn_t) (u32 sw_if_index);
-
-extern gbp_itf_hdl_t gbp_itf_l2_add_and_lock (u32 sw_if_index, u32 bd_index);
-extern gbp_itf_hdl_t gbp_itf_l3_add_and_lock (u32 sw_if_index, index_t gri);
-extern gbp_itf_hdl_t gbp_itf_l2_add_and_lock_w_free (u32 sw_if_index,
- u32 bd_index,
- gbp_itf_free_fn_t ff);
-extern gbp_itf_hdl_t gbp_itf_l3_add_and_lock_w_free (u32 sw_if_index,
- index_t gri,
- gbp_itf_free_fn_t ff);
-
-extern void gbp_itf_unlock (gbp_itf_hdl_t * hdl);
-extern void gbp_itf_lock (gbp_itf_hdl_t hdl);
-extern gbp_itf_hdl_t gbp_itf_clone_and_lock (gbp_itf_hdl_t hdl);
-extern u32 gbp_itf_get_sw_if_index (gbp_itf_hdl_t hdl);
-
-extern void gbp_itf_l2_set_input_feature (gbp_itf_hdl_t hdl,
- l2input_feat_masks_t feats);
-extern void gbp_itf_l2_set_output_feature (gbp_itf_hdl_t hdl,
- l2output_feat_masks_t feats);
-
-extern void gbp_itf_l3_set_input_feature (gbp_itf_hdl_t hdl,
- gbp_itf_l3_feat_t feats);
-
-extern u8 *format_gbp_itf_hdl (u8 * s, va_list * args);
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_learn.c b/src/plugins/gbp/gbp_learn.c
deleted file mode 100644
index af3a6fb52ac..00000000000
--- a/src/plugins/gbp/gbp_learn.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_learn.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-
-#include <vnet/l2/l2_input.h>
-
-gbp_learn_main_t gbp_learn_main;
-
-void
-gbp_learn_enable (u32 sw_if_index)
-{
- vnet_feature_enable_disable ("ip4-unicast",
- "gbp-learn-ip4", sw_if_index, 1, 0, 0);
- vnet_feature_enable_disable ("ip6-unicast",
- "gbp-learn-ip6", sw_if_index, 1, 0, 0);
-}
-
-void
-gbp_learn_disable (u32 sw_if_index)
-{
- vnet_feature_enable_disable ("ip4-unicast",
- "gbp-learn-ip4", sw_if_index, 0, 0, 0);
- vnet_feature_enable_disable ("ip6-unicast",
- "gbp-learn-ip6", sw_if_index, 0, 0, 0);
-}
-
-static clib_error_t *
-gbp_learn_init (vlib_main_t * vm)
-{
- gbp_learn_main_t *glm = &gbp_learn_main;
- vlib_thread_main_t *tm = &vlib_thread_main;
-
- vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "gbp-learn-l2");
-
- /* Initialize the feature next-node indices */
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2INPUT_N_FEAT,
- l2input_get_feat_names (),
- glm->gl_l2_input_feat_next);
-
- throttle_init (&glm->gl_l2_throttle,
- tm->n_vlib_mains, GBP_ENDPOINT_HASH_LEARN_RATE);
-
- throttle_init (&glm->gl_l3_throttle,
- tm->n_vlib_mains, GBP_ENDPOINT_HASH_LEARN_RATE);
-
- glm->gl_logger = vlib_log_register_class ("gbp", "learn");
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (gbp_learn_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_learn.h b/src/plugins/gbp/gbp_learn.h
deleted file mode 100644
index b4f3ae0a23d..00000000000
--- a/src/plugins/gbp/gbp_learn.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_LEARN_H__
-#define __GBP_LEARN_H__
-
-#include <plugins/gbp/gbp.h>
-
-#include <vnet/util/throttle.h>
-
-/**
- * The maximum learning rate per-hashed EP
- */
-#define GBP_ENDPOINT_HASH_LEARN_RATE (1e-2)
-
-/**
- * Grouping of global data for the GBP source EPG classification feature
- */
-typedef struct gbp_learn_main_t_
-{
- /**
- * Next nodes for L2 output features
- */
- u32 gl_l2_input_feat_next[32];
-
- /**
- * logger - VLIB log class
- */
- vlib_log_class_t gl_logger;
-
- /**
- * throttles for the DP leanring
- */
- throttle_t gl_l2_throttle;
- throttle_t gl_l3_throttle;
-} gbp_learn_main_t;
-
-extern gbp_learn_main_t gbp_learn_main;
-
-extern void gbp_learn_enable (u32 sw_if_index);
-extern void gbp_learn_disable (u32 sw_if_index);
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_learn_node.c b/src/plugins/gbp/gbp_learn_node.c
deleted file mode 100644
index a6c54971956..00000000000
--- a/src/plugins/gbp/gbp_learn_node.c
+++ /dev/null
@@ -1,718 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_learn.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/util/throttle.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/vxlan-gbp/vxlan_gbp_packet.h>
-#include <vnet/ethernet/arp_packet.h>
-
-#define GBP_LEARN_DBG(...) \
- vlib_log_debug (gbp_learn_main.gl_logger, __VA_ARGS__);
-
-#define foreach_gbp_learn \
- _(DROP, "drop")
-
-typedef enum
-{
-#define _(sym,str) GBP_LEARN_ERROR_##sym,
- foreach_gbp_learn
-#undef _
- GBP_LEARN_N_ERROR,
-} gbp_learn_error_t;
-
-static char *gbp_learn_error_strings[] = {
-#define _(sym,string) string,
- foreach_gbp_learn
-#undef _
-};
-
-typedef enum
-{
-#define _(sym,str) GBP_LEARN_NEXT_##sym,
- foreach_gbp_learn
-#undef _
- GBP_LEARN_N_NEXT,
-} gbp_learn_next_t;
-
-typedef struct gbp_learn_l2_t_
-{
- ip46_address_t ip;
- mac_address_t mac;
- u32 sw_if_index;
- u32 bd_index;
- sclass_t sclass;
- ip46_address_t outer_src;
- ip46_address_t outer_dst;
-} gbp_learn_l2_t;
-
-
-static void
-gbp_learn_l2_cp (const gbp_learn_l2_t * gl2)
-{
- ip46_address_t *ips = NULL;
-
- GBP_LEARN_DBG ("L2 EP: %U %U, %d",
- format_mac_address_t, &gl2->mac,
- format_ip46_address, &gl2->ip, IP46_TYPE_ANY, gl2->sclass);
-
- if (!ip46_address_is_zero (&gl2->ip))
- vec_add1 (ips, gl2->ip);
-
- /*
- * flip the source and dst, since that's how it was received, this API
- * takes how it's sent
- */
- gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_DP,
- gl2->sw_if_index, ips,
- &gl2->mac, INDEX_INVALID,
- INDEX_INVALID, gl2->sclass,
- (GBP_ENDPOINT_FLAG_LEARNT |
- GBP_ENDPOINT_FLAG_REMOTE),
- &gl2->outer_dst, &gl2->outer_src, NULL);
- vec_free (ips);
-}
-
-static void
-gbp_learn_l2_ip4_dp (const u8 * mac, const ip4_address_t * ip,
- u32 bd_index, u32 sw_if_index, sclass_t sclass,
- const ip4_address_t * outer_src,
- const ip4_address_t * outer_dst)
-{
- gbp_learn_l2_t gl2 = {
- .sw_if_index = sw_if_index,
- .bd_index = bd_index,
- .sclass = sclass,
- .ip.ip4 = *ip,
- .outer_src.ip4 = *outer_src,
- .outer_dst.ip4 = *outer_dst,
- };
- mac_address_from_bytes (&gl2.mac, mac);
-
- vl_api_rpc_call_main_thread (gbp_learn_l2_cp, (u8 *) & gl2, sizeof (gl2));
-}
-
-static void
-gbp_learn_l2_ip6_dp (const u8 * mac, const ip6_address_t * ip,
- u32 bd_index, u32 sw_if_index, sclass_t sclass,
- const ip4_address_t * outer_src,
- const ip4_address_t * outer_dst)
-{
- gbp_learn_l2_t gl2 = {
- .sw_if_index = sw_if_index,
- .bd_index = bd_index,
- .sclass = sclass,
- .ip.ip6 = *ip,
- .outer_src.ip4 = *outer_src,
- .outer_dst.ip4 = *outer_dst,
- };
- mac_address_from_bytes (&gl2.mac, mac);
-
- vl_api_rpc_call_main_thread (gbp_learn_l2_cp, (u8 *) & gl2, sizeof (gl2));
-}
-
-static void
-gbp_learn_l2_dp (const u8 * mac, u32 bd_index, u32 sw_if_index,
- sclass_t sclass,
- const ip4_address_t * outer_src,
- const ip4_address_t * outer_dst)
-{
- gbp_learn_l2_t gl2 = {
- .sw_if_index = sw_if_index,
- .bd_index = bd_index,
- .sclass = sclass,
- .outer_src.ip4 = *outer_src,
- .outer_dst.ip4 = *outer_dst,
- };
- mac_address_from_bytes (&gl2.mac, mac);
-
- vl_api_rpc_call_main_thread (gbp_learn_l2_cp, (u8 *) & gl2, sizeof (gl2));
-}
-
-/**
- * per-packet trace data
- */
-typedef struct gbp_learn_l2_trace_t_
-{
- /* per-pkt trace data */
- mac_address_t mac;
- u32 sw_if_index;
- u32 new;
- u32 throttled;
- u32 sclass;
- u32 d_bit;
- gbp_bridge_domain_flags_t gb_flags;
-} gbp_learn_l2_trace_t;
-
-always_inline void
-gbp_learn_get_outer (const ethernet_header_t * eh0,
- ip4_address_t * outer_src, ip4_address_t * outer_dst)
-{
- ip4_header_t *ip0;
- u8 *buff;
-
- /* rewind back to the ivxlan header */
- buff = (u8 *) eh0;
- buff -= (sizeof (vxlan_gbp_header_t) +
- sizeof (udp_header_t) + sizeof (ip4_header_t));
-
- ip0 = (ip4_header_t *) buff;
-
- *outer_src = ip0->src_address;
- *outer_dst = ip0->dst_address;
-}
-
-always_inline int
-gbp_endpoint_update_required (const gbp_endpoint_t * ge0,
- u32 rx_sw_if_index, sclass_t sclass)
-{
- /* Conditions for [re]learning this EP */
-
- /* 1. it doesn't have a dataplane source */
- if (!gbp_endpoint_is_learnt (ge0))
- return (!0);
-
- /* 2. has the input interface changed */
- if (gbp_itf_get_sw_if_index (ge0->ge_fwd.gef_itf) != rx_sw_if_index)
- return (!0);
-
- /* 3. has the sclass changed */
- if (sclass != ge0->ge_fwd.gef_sclass)
- return (!0);
-
- /* otherwise it's unchanged */
- return (0);
-}
-
-VLIB_NODE_FN (gbp_learn_l2_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- u32 n_left_from, *from, *to_next, next_index, thread_index, seed;
- gbp_learn_main_t *glm;
- f64 time_now;
-
- glm = &gbp_learn_main;
- next_index = 0;
- n_left_from = frame->n_vectors;
- from = vlib_frame_vector_args (frame);
- time_now = vlib_time_now (vm);
- thread_index = vm->thread_index;
-
- seed = throttle_seed (&glm->gl_l2_throttle, thread_index, time_now);
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- ip4_address_t outer_src, outer_dst;
- const ethernet_header_t *eh0;
- u32 bi0, sw_if_index0, t0;
- gbp_bridge_domain_t *gb0;
- gbp_learn_next_t next0;
- gbp_endpoint_t *ge0;
- vlib_buffer_t *b0;
- sclass_t sclass0;
-
- next0 = GBP_LEARN_NEXT_DROP;
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-
- eh0 = vlib_buffer_get_current (b0);
- sclass0 = vnet_buffer2 (b0)->gbp.sclass;
-
- next0 = vnet_l2_feature_next (b0, glm->gl_l2_input_feat_next,
- L2INPUT_FEAT_GBP_LEARN);
-
- ge0 = gbp_endpoint_find_mac (eh0->src_address,
- vnet_buffer (b0)->l2.bd_index);
- gb0 =
- gbp_bridge_domain_get_by_bd_index (vnet_buffer (b0)->l2.bd_index);
-
- if ((vnet_buffer2 (b0)->gbp.flags & VXLAN_GBP_GPFLAGS_D) ||
- (gb0->gb_flags & GBP_BD_FLAG_DO_NOT_LEARN))
- {
- t0 = 1;
- goto trace;
- }
-
- /*
- * check for new EP or a moved EP
- */
- if (NULL == ge0 ||
- gbp_endpoint_update_required (ge0, sw_if_index0, sclass0))
- {
- /*
- * use the last 4 bytes of the mac address as the hash for the EP
- */
- t0 = throttle_check (&glm->gl_l2_throttle, thread_index,
- *((u32 *) (eh0->src_address + 2)), seed);
- if (!t0)
- {
- gbp_learn_get_outer (eh0, &outer_src, &outer_dst);
-
- if (outer_src.as_u32 == 0 || outer_dst.as_u32 == 0)
- {
- t0 = 2;
- goto trace;
- }
-
- switch (clib_net_to_host_u16 (eh0->type))
- {
- case ETHERNET_TYPE_IP4:
- {
- const ip4_header_t *ip0;
-
- ip0 = (ip4_header_t *) (eh0 + 1);
-
- gbp_learn_l2_ip4_dp (eh0->src_address,
- &ip0->src_address,
- vnet_buffer (b0)->l2.bd_index,
- sw_if_index0, sclass0,
- &outer_src, &outer_dst);
-
- break;
- }
- case ETHERNET_TYPE_IP6:
- {
- const ip6_header_t *ip0;
-
- ip0 = (ip6_header_t *) (eh0 + 1);
-
- gbp_learn_l2_ip6_dp (eh0->src_address,
- &ip0->src_address,
- vnet_buffer (b0)->l2.bd_index,
- sw_if_index0, sclass0,
- &outer_src, &outer_dst);
-
- break;
- }
- case ETHERNET_TYPE_ARP:
- {
- const ethernet_arp_header_t *arp0;
-
- arp0 = (ethernet_arp_header_t *) (eh0 + 1);
-
- gbp_learn_l2_ip4_dp (eh0->src_address,
- &arp0->ip4_over_ethernet[0].ip4,
- vnet_buffer (b0)->l2.bd_index,
- sw_if_index0, sclass0,
- &outer_src, &outer_dst);
- break;
- }
- default:
- gbp_learn_l2_dp (eh0->src_address,
- vnet_buffer (b0)->l2.bd_index,
- sw_if_index0, sclass0,
- &outer_src, &outer_dst);
- break;
- }
- }
- }
- else
- {
- /*
- * this update could happen simultaneoulsy from multiple workers
- * but that's ok we are not interested in being very accurate.
- */
- t0 = 0;
- ge0->ge_last_time = time_now;
- }
- trace:
- if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- gbp_learn_l2_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- clib_memcpy_fast (t->mac.bytes, eh0->src_address, 6);
- t->new = (NULL == ge0);
- t->throttled = t0;
- t->sw_if_index = sw_if_index0;
- t->sclass = sclass0;
- t->gb_flags = gb0->gb_flags;
- t->d_bit = ! !(vnet_buffer2 (b0)->gbp.flags &
- VXLAN_GBP_GPFLAGS_D);
- }
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-/* packet trace format function */
-static u8 *
-format_gbp_learn_l2_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_learn_l2_trace_t *t = va_arg (*args, gbp_learn_l2_trace_t *);
-
- s = format (s, "new:%d throttled:%d d-bit:%d mac:%U itf:%d sclass:%d"
- " gb-flags:%U",
- t->new, t->throttled, t->d_bit,
- format_mac_address_t, &t->mac, t->sw_if_index, t->sclass,
- format_gbp_bridge_domain_flags, t->gb_flags);
-
- return s;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_learn_l2_node) = {
- .name = "gbp-learn-l2",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_learn_l2_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(gbp_learn_error_strings),
- .error_strings = gbp_learn_error_strings,
-
- .n_next_nodes = GBP_LEARN_N_NEXT,
-
- .next_nodes = {
- [GBP_LEARN_NEXT_DROP] = "error-drop",
- },
-};
-/* *INDENT-ON* */
-
-typedef struct gbp_learn_l3_t_
-{
- ip46_address_t ip;
- u32 fib_index;
- u32 sw_if_index;
- sclass_t sclass;
- ip46_address_t outer_src;
- ip46_address_t outer_dst;
-} gbp_learn_l3_t;
-
-static void
-gbp_learn_l3_cp (const gbp_learn_l3_t * gl3)
-{
- ip46_address_t *ips = NULL;
-
- GBP_LEARN_DBG ("L3 EP: %U, %d", format_ip46_address, &gl3->ip,
- IP46_TYPE_ANY, gl3->sclass);
-
- vec_add1 (ips, gl3->ip);
-
- gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_DP,
- gl3->sw_if_index, ips, NULL,
- INDEX_INVALID, INDEX_INVALID, gl3->sclass,
- (GBP_ENDPOINT_FLAG_REMOTE |
- GBP_ENDPOINT_FLAG_LEARNT),
- &gl3->outer_dst, &gl3->outer_src, NULL);
- vec_free (ips);
-}
-
-static void
-gbp_learn_ip4_dp (const ip4_address_t * ip,
- u32 fib_index, u32 sw_if_index, sclass_t sclass,
- const ip4_address_t * outer_src,
- const ip4_address_t * outer_dst)
-{
- /* *INDENT-OFF* */
- gbp_learn_l3_t gl3 = {
- .ip = {
- .ip4 = *ip,
- },
- .sw_if_index = sw_if_index,
- .fib_index = fib_index,
- .sclass = sclass,
- .outer_src.ip4 = *outer_src,
- .outer_dst.ip4 = *outer_dst,
- };
- /* *INDENT-ON* */
-
- vl_api_rpc_call_main_thread (gbp_learn_l3_cp, (u8 *) & gl3, sizeof (gl3));
-}
-
-static void
-gbp_learn_ip6_dp (const ip6_address_t * ip,
- u32 fib_index, u32 sw_if_index, sclass_t sclass,
- const ip4_address_t * outer_src,
- const ip4_address_t * outer_dst)
-{
- /* *INDENT-OFF* */
- gbp_learn_l3_t gl3 = {
- .ip = {
- .ip6 = *ip,
- },
- .sw_if_index = sw_if_index,
- .fib_index = fib_index,
- .sclass = sclass,
- .outer_src.ip4 = *outer_src,
- .outer_dst.ip4 = *outer_dst,
- };
- /* *INDENT-ON* */
-
- vl_api_rpc_call_main_thread (gbp_learn_l3_cp, (u8 *) & gl3, sizeof (gl3));
-}
-
-/**
- * per-packet trace data
- */
-typedef struct gbp_learn_l3_trace_t_
-{
- /* per-pkt trace data */
- ip46_address_t ip;
- u32 sw_if_index;
- u32 new;
- u32 throttled;
- u32 sclass;
-} gbp_learn_l3_trace_t;
-
-static uword
-gbp_learn_l3 (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame,
- fib_protocol_t fproto)
-{
- u32 n_left_from, *from, *to_next, next_index, thread_index, seed;
- gbp_learn_main_t *glm;
- f64 time_now;
-
- glm = &gbp_learn_main;
- next_index = 0;
- n_left_from = frame->n_vectors;
- from = vlib_frame_vector_args (frame);
- time_now = vlib_time_now (vm);
- thread_index = vm->thread_index;
-
- seed = throttle_seed (&glm->gl_l3_throttle, thread_index, time_now);
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- CLIB_UNUSED (const ip4_header_t *) ip4_0;
- CLIB_UNUSED (const ip6_header_t *) ip6_0;
- u32 bi0, sw_if_index0, t0, fib_index0;
- ip4_address_t outer_src, outer_dst;
- ethernet_header_t *eth0;
- gbp_learn_next_t next0;
- gbp_endpoint_t *ge0;
- vlib_buffer_t *b0;
- sclass_t sclass0;
-
- next0 = GBP_LEARN_NEXT_DROP;
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- sclass0 = vnet_buffer2 (b0)->gbp.sclass;
- ip6_0 = NULL;
- ip4_0 = NULL;
-
- vnet_feature_next (&next0, b0);
-
- if (vnet_buffer2 (b0)->gbp.flags & VXLAN_GBP_GPFLAGS_D)
- {
- t0 = 1;
- ge0 = NULL;
- goto trace;
- }
-
- fib_index0 = fib_table_get_index_for_sw_if_index (fproto,
- sw_if_index0);
-
- if (FIB_PROTOCOL_IP6 == fproto)
- {
- ip6_0 = vlib_buffer_get_current (b0);
- eth0 = (ethernet_header_t *) (((u8 *) ip6_0) - sizeof (*eth0));
-
- gbp_learn_get_outer (eth0, &outer_src, &outer_dst);
-
- ge0 = gbp_endpoint_find_ip6 (&ip6_0->src_address, fib_index0);
-
- if ((NULL == ge0) ||
- gbp_endpoint_update_required (ge0, sw_if_index0, sclass0))
- {
- t0 = throttle_check (&glm->gl_l3_throttle,
- thread_index,
- ip6_address_hash_to_u32
- (&ip6_0->src_address), seed);
-
- if (!t0)
- {
- gbp_learn_ip6_dp (&ip6_0->src_address,
- fib_index0, sw_if_index0, sclass0,
- &outer_src, &outer_dst);
- }
- }
- else
- {
- /*
- * this update could happen simultaneoulsy from multiple
- * workers but that's ok we are not interested in being
- * very accurate.
- */
- t0 = 0;
- ge0->ge_last_time = time_now;
- }
- }
- else
- {
- ip4_0 = vlib_buffer_get_current (b0);
- eth0 = (ethernet_header_t *) (((u8 *) ip4_0) - sizeof (*eth0));
-
- gbp_learn_get_outer (eth0, &outer_src, &outer_dst);
- ge0 = gbp_endpoint_find_ip4 (&ip4_0->src_address, fib_index0);
-
- if ((NULL == ge0) ||
- gbp_endpoint_update_required (ge0, sw_if_index0, sclass0))
- {
- t0 = throttle_check (&glm->gl_l3_throttle, thread_index,
- ip4_0->src_address.as_u32, seed);
-
- if (!t0)
- {
- gbp_learn_ip4_dp (&ip4_0->src_address,
- fib_index0, sw_if_index0, sclass0,
- &outer_src, &outer_dst);
- }
- }
- else
- {
- /*
- * this update could happen simultaneoulsy from multiple
- * workers but that's ok we are not interested in being
- * very accurate.
- */
- t0 = 0;
- ge0->ge_last_time = time_now;
- }
- }
- trace:
- if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- gbp_learn_l3_trace_t *t;
-
- t = vlib_add_trace (vm, node, b0, sizeof (*t));
- if (FIB_PROTOCOL_IP6 == fproto && ip6_0)
- ip46_address_set_ip6 (&t->ip, &ip6_0->src_address);
- if (FIB_PROTOCOL_IP4 == fproto && ip4_0)
- ip46_address_set_ip4 (&t->ip, &ip4_0->src_address);
- t->new = (NULL == ge0);
- t->throttled = t0;
- t->sw_if_index = sw_if_index0;
- t->sclass = sclass0;
- }
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-/* packet trace format function */
-static u8 *
-format_gbp_learn_l3_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_learn_l3_trace_t *t = va_arg (*args, gbp_learn_l3_trace_t *);
-
- s = format (s, "new:%d throttled:%d ip:%U itf:%d sclass:%d",
- t->new, t->throttled,
- format_ip46_address, &t->ip, IP46_TYPE_ANY, t->sw_if_index,
- t->sclass);
-
- return s;
-}
-
-VLIB_NODE_FN (gbp_learn_ip4_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_learn_l3 (vm, node, frame, FIB_PROTOCOL_IP4));
-}
-
-VLIB_NODE_FN (gbp_learn_ip6_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_learn_l3 (vm, node, frame, FIB_PROTOCOL_IP6));
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_learn_ip4_node) = {
- .name = "gbp-learn-ip4",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_learn_l3_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-};
-
-VNET_FEATURE_INIT (gbp_learn_ip4, static) =
-{
- .arc_name = "ip4-unicast",
- .node_name = "gbp-learn-ip4",
-};
-
-VLIB_REGISTER_NODE (gbp_learn_ip6_node) = {
- .name = "gbp-learn-ip6",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_learn_l3_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-};
-
-VNET_FEATURE_INIT (gbp_learn_ip6, static) =
-{
- .arc_name = "ip6-unicast",
- .node_name = "gbp-learn-ip6",
-};
-
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_policy.c b/src/plugins/gbp/gbp_policy.c
deleted file mode 100644
index 127c6d3f059..00000000000
--- a/src/plugins/gbp/gbp_policy.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_policy.h>
-#include <vnet/vxlan-gbp/vxlan_gbp_packet.h>
-
-gbp_policy_main_t gbp_policy_main;
-
-/* packet trace format function */
-u8 *
-format_gbp_policy_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_policy_trace_t *t = va_arg (*args, gbp_policy_trace_t *);
-
- s =
- format (s,
- "scope:%d sclass:%d, dclass:%d, action:%U flags:%U acl: %d rule: %d",
- t->scope, t->sclass, t->dclass, format_gbp_rule_action, t->action,
- format_vxlan_gbp_header_gpflags, t->flags, t->acl_match,
- t->rule_match);
-
- return s;
-}
-
-static clib_error_t *
-gbp_policy_init (vlib_main_t * vm)
-{
- gbp_policy_main_t *gpm = &gbp_policy_main;
- clib_error_t *error = 0;
-
- /* Initialize the feature next-node indexes */
- vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "gbp-policy-port");
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2OUTPUT_N_FEAT,
- l2output_get_feat_names (),
- gpm->l2_output_feat_next[GBP_POLICY_PORT]);
-
- node = vlib_get_node_by_name (vm, (u8 *) "gbp-policy-mac");
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2OUTPUT_N_FEAT,
- l2output_get_feat_names (),
- gpm->l2_output_feat_next[GBP_POLICY_MAC]);
-
- node = vlib_get_node_by_name (vm, (u8 *) "gbp-policy-lpm");
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2OUTPUT_N_FEAT,
- l2output_get_feat_names (),
- gpm->l2_output_feat_next[GBP_POLICY_LPM]);
-
- return error;
-}
-
-VLIB_INIT_FUNCTION (gbp_policy_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_policy.h b/src/plugins/gbp/gbp_policy.h
deleted file mode 100644
index 6f87f2ec7c4..00000000000
--- a/src/plugins/gbp/gbp_policy.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_POLICY_H__
-#define __GBP_POLICY_H__
-
-#include <plugins/gbp/gbp_contract.h>
-
-/**
- * per-packet trace data
- */
-typedef struct gbp_policy_trace_t_
-{
- /* per-pkt trace data */
- gbp_scope_t scope;
- sclass_t sclass;
- sclass_t dclass;
- gbp_rule_action_t action;
- u32 flags;
- u32 acl_match;
- u32 rule_match;
-} gbp_policy_trace_t;
-
-/* packet trace format function */
-u8 * format_gbp_policy_trace (u8 * s, va_list * args);
-
-static_always_inline void
-gbp_policy_trace(vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t *b, const gbp_contract_key_t *key, gbp_rule_action_t action, u32 acl_match, u32 rule_match)
-{
- gbp_policy_trace_t *t;
-
- if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_IS_TRACED)))
- return;
-
- t = vlib_add_trace (vm, node, b, sizeof (*t));
- t->sclass = key->gck_src;
- t->dclass = key->gck_dst;
- t->scope = key->gck_scope;
- t->action = action;
- t->flags = vnet_buffer2 (b)->gbp.flags;
- t->acl_match = acl_match;
- t->rule_match = rule_match;
-}
-
-#endif /* __GBP_POLICY_H__ */
diff --git a/src/plugins/gbp/gbp_policy_dpo.c b/src/plugins/gbp/gbp_policy_dpo.c
deleted file mode 100644
index 9f26b9c67ab..00000000000
--- a/src/plugins/gbp/gbp_policy_dpo.c
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/dpo/dvr_dpo.h>
-#include <vnet/dpo/drop_dpo.h>
-#include <vnet/vxlan-gbp/vxlan_gbp_packet.h>
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_policy.h>
-#include <plugins/gbp/gbp_policy_dpo.h>
-#include <plugins/gbp/gbp_recirc.h>
-#include <plugins/gbp/gbp_contract.h>
-
-#ifndef CLIB_MARCH_VARIANT
-/**
- * DPO pool
- */
-gbp_policy_dpo_t *gbp_policy_dpo_pool;
-
-/**
- * DPO type registered for these GBP FWD
- */
-dpo_type_t gbp_policy_dpo_type;
-
-static gbp_policy_dpo_t *
-gbp_policy_dpo_alloc (void)
-{
- gbp_policy_dpo_t *gpd;
-
- pool_get_aligned_zero (gbp_policy_dpo_pool, gpd, CLIB_CACHE_LINE_BYTES);
-
- return (gpd);
-}
-
-static inline gbp_policy_dpo_t *
-gbp_policy_dpo_get_from_dpo (const dpo_id_t * dpo)
-{
- ASSERT (gbp_policy_dpo_type == dpo->dpoi_type);
-
- return (gbp_policy_dpo_get (dpo->dpoi_index));
-}
-
-static inline index_t
-gbp_policy_dpo_get_index (gbp_policy_dpo_t * gpd)
-{
- return (gpd - gbp_policy_dpo_pool);
-}
-
-static void
-gbp_policy_dpo_lock (dpo_id_t * dpo)
-{
- gbp_policy_dpo_t *gpd;
-
- gpd = gbp_policy_dpo_get_from_dpo (dpo);
- gpd->gpd_locks++;
-}
-
-static void
-gbp_policy_dpo_unlock (dpo_id_t * dpo)
-{
- gbp_policy_dpo_t *gpd;
-
- gpd = gbp_policy_dpo_get_from_dpo (dpo);
- gpd->gpd_locks--;
-
- if (0 == gpd->gpd_locks)
- {
- dpo_reset (&gpd->gpd_dpo);
- pool_put (gbp_policy_dpo_pool, gpd);
- }
-}
-
-static u32
-gbp_policy_dpo_get_urpf (const dpo_id_t * dpo)
-{
- gbp_policy_dpo_t *gpd;
-
- gpd = gbp_policy_dpo_get_from_dpo (dpo);
-
- return (gpd->gpd_sw_if_index);
-}
-
-void
-gbp_policy_dpo_add_or_lock (dpo_proto_t dproto,
- gbp_scope_t scope,
- sclass_t sclass, u32 sw_if_index, dpo_id_t * dpo)
-{
- gbp_policy_dpo_t *gpd;
- dpo_id_t parent = DPO_INVALID;
-
- gpd = gbp_policy_dpo_alloc ();
-
- gpd->gpd_proto = dproto;
- gpd->gpd_sw_if_index = sw_if_index;
- gpd->gpd_sclass = sclass;
- gpd->gpd_scope = scope;
-
- if (~0 != sw_if_index)
- {
- /*
- * stack on the DVR DPO for the output interface
- */
- dvr_dpo_add_or_lock (sw_if_index, dproto, &parent);
- }
- else
- {
- dpo_copy (&parent, drop_dpo_get (dproto));
- }
-
- dpo_stack (gbp_policy_dpo_type, dproto, &gpd->gpd_dpo, &parent);
- dpo_set (dpo, gbp_policy_dpo_type, dproto, gbp_policy_dpo_get_index (gpd));
-}
-
-u8 *
-format_gbp_policy_dpo (u8 * s, va_list * ap)
-{
- index_t index = va_arg (*ap, index_t);
- u32 indent = va_arg (*ap, u32);
- gbp_policy_dpo_t *gpd = gbp_policy_dpo_get (index);
- vnet_main_t *vnm = vnet_get_main ();
-
- s = format (s, "gbp-policy-dpo: %U, scope:%d sclass:%d out:%U",
- format_dpo_proto, gpd->gpd_proto,
- gpd->gpd_scope, (int) gpd->gpd_sclass,
- format_vnet_sw_if_index_name, vnm, gpd->gpd_sw_if_index);
- s = format (s, "\n%U", format_white_space, indent + 2);
- s = format (s, "%U", format_dpo_id, &gpd->gpd_dpo, indent + 4);
-
- return (s);
-}
-
-/**
- * Interpose a policy DPO
- */
-static void
-gbp_policy_dpo_interpose (const dpo_id_t * original,
- const dpo_id_t * parent, dpo_id_t * clone)
-{
- gbp_policy_dpo_t *gpd, *gpd_clone;
-
- gpd_clone = gbp_policy_dpo_alloc ();
- gpd = gbp_policy_dpo_get (original->dpoi_index);
-
- gpd_clone->gpd_proto = gpd->gpd_proto;
- gpd_clone->gpd_scope = gpd->gpd_scope;
- gpd_clone->gpd_sclass = gpd->gpd_sclass;
- gpd_clone->gpd_sw_if_index = gpd->gpd_sw_if_index;
-
- /*
- * if no interface is provided, grab one from the parent
- * on which we stack
- */
- if (~0 == gpd_clone->gpd_sw_if_index)
- gpd_clone->gpd_sw_if_index = dpo_get_urpf (parent);
-
- dpo_stack (gbp_policy_dpo_type,
- gpd_clone->gpd_proto, &gpd_clone->gpd_dpo, parent);
-
- dpo_set (clone,
- gbp_policy_dpo_type,
- gpd_clone->gpd_proto, gbp_policy_dpo_get_index (gpd_clone));
-}
-
-const static dpo_vft_t gbp_policy_dpo_vft = {
- .dv_lock = gbp_policy_dpo_lock,
- .dv_unlock = gbp_policy_dpo_unlock,
- .dv_format = format_gbp_policy_dpo,
- .dv_get_urpf = gbp_policy_dpo_get_urpf,
- .dv_mk_interpose = gbp_policy_dpo_interpose,
-};
-
-/**
- * @brief The per-protocol VLIB graph nodes that are assigned to a glean
- * object.
- *
- * this means that these graph nodes are ones from which a glean is the
- * parent object in the DPO-graph.
- */
-const static char *const gbp_policy_dpo_ip4_nodes[] = {
- "ip4-gbp-policy-dpo",
- NULL,
-};
-
-const static char *const gbp_policy_dpo_ip6_nodes[] = {
- "ip6-gbp-policy-dpo",
- NULL,
-};
-
-const static char *const *const gbp_policy_dpo_nodes[DPO_PROTO_NUM] = {
- [DPO_PROTO_IP4] = gbp_policy_dpo_ip4_nodes,
- [DPO_PROTO_IP6] = gbp_policy_dpo_ip6_nodes,
-};
-
-dpo_type_t
-gbp_policy_dpo_get_type (void)
-{
- return (gbp_policy_dpo_type);
-}
-
-static clib_error_t *
-gbp_policy_dpo_module_init (vlib_main_t * vm)
-{
- gbp_policy_dpo_type = dpo_register_new_type (&gbp_policy_dpo_vft,
- gbp_policy_dpo_nodes);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_policy_dpo_module_init);
-#endif /* CLIB_MARCH_VARIANT */
-
-typedef enum
-{
- GBP_POLICY_DROP,
- GBP_POLICY_N_NEXT,
-} gbp_policy_next_t;
-
-always_inline u32
-gbp_rule_l3_redirect (const gbp_rule_t * gu, vlib_buffer_t * b0, int is_ip6)
-{
- gbp_policy_node_t pnode;
- const dpo_id_t *dpo;
- dpo_proto_t dproto;
-
- pnode = (is_ip6 ? GBP_POLICY_NODE_IP6 : GBP_POLICY_NODE_IP4);
- dproto = (is_ip6 ? DPO_PROTO_IP6 : DPO_PROTO_IP4);
- dpo = &gu->gu_dpo[pnode][dproto];
-
- /* The flow hash is still valid as this is a IP packet being switched */
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo->dpoi_index;
-
- return (dpo->dpoi_next_node);
-}
-
-always_inline uword
-gbp_policy_dpo_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, u8 is_ip6)
-{
- gbp_main_t *gm = &gbp_main;
- u32 n_left_from, next_index, *from, *to_next;
- u32 n_allow_intra, n_allow_a_bit, n_allow_sclass_1;
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
- n_allow_intra = n_allow_a_bit = n_allow_sclass_1 = 0;
-
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- gbp_rule_action_t action0 = GBP_RULE_DENY;
- u32 acl_match = ~0, rule_match = ~0;
- const gbp_policy_dpo_t *gpd0;
- gbp_contract_error_t err0;
- gbp_contract_key_t key0;
- vlib_buffer_t *b0;
- gbp_rule_t *rule0;
- u32 bi0, next0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
- next0 = GBP_POLICY_DROP;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- gpd0 = gbp_policy_dpo_get (vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = gpd0->gpd_dpo.dpoi_index;
-
- /*
- * Reflection check; in and out on an ivxlan tunnel
- */
- if ((~0 != vxlan_gbp_tunnel_by_sw_if_index (gpd0->gpd_sw_if_index))
- && (vnet_buffer2 (b0)->gbp.flags & VXLAN_GBP_GPFLAGS_R))
- {
- goto trace;
- }
-
- if (vnet_buffer2 (b0)->gbp.flags & VXLAN_GBP_GPFLAGS_A)
- {
- next0 = gpd0->gpd_dpo.dpoi_next_node;
- key0.as_u64 = ~0;
- n_allow_a_bit++;
- goto trace;
- }
-
- /* zero out the key to ensure the pad space is clear */
- key0.as_u64 = 0;
- key0.gck_src = vnet_buffer2 (b0)->gbp.sclass;
-
- if (SCLASS_INVALID == key0.gck_src)
- {
- /*
- * the src EPG is not set when the packet arrives on an EPG
- * uplink interface and we do not need to apply policy
- */
- next0 = gpd0->gpd_dpo.dpoi_next_node;
- goto trace;
- }
-
- key0.gck_scope = gpd0->gpd_scope;
- key0.gck_dst = gpd0->gpd_sclass;
-
- action0 =
- gbp_contract_apply (vm, gm, &key0, b0, &rule0, &n_allow_intra,
- &n_allow_sclass_1, &acl_match, &rule_match,
- &err0,
- is_ip6 ? GBP_CONTRACT_APPLY_IP6 :
- GBP_CONTRACT_APPLY_IP4);
- switch (action0)
- {
- case GBP_RULE_PERMIT:
- next0 = gpd0->gpd_dpo.dpoi_next_node;
- vnet_buffer2 (b0)->gbp.flags |= VXLAN_GBP_GPFLAGS_A;
- break;
- case GBP_RULE_REDIRECT:
- next0 = gbp_rule_l3_redirect (rule0, b0, is_ip6);
- vnet_buffer2 (b0)->gbp.flags |= VXLAN_GBP_GPFLAGS_A;
- break;
- case GBP_RULE_DENY:
- next0 = GBP_POLICY_DROP;
- b0->error = node->errors[err0];
- break;
- }
-
- trace:
- gbp_policy_trace (vm, node, b0, &key0, action0, acl_match,
- rule_match);
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- vlib_node_increment_counter (vm, node->node_index,
- GBP_CONTRACT_ERROR_ALLOW_INTRA, n_allow_intra);
- vlib_node_increment_counter (vm, node->node_index,
- GBP_CONTRACT_ERROR_ALLOW_A_BIT, n_allow_a_bit);
- vlib_node_increment_counter (vm, node->node_index,
- GBP_CONTRACT_ERROR_ALLOW_SCLASS_1,
- n_allow_sclass_1);
- return from_frame->n_vectors;
-}
-
-VLIB_NODE_FN (ip4_gbp_policy_dpo_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return (gbp_policy_dpo_inline (vm, node, from_frame, 0));
-}
-
-VLIB_NODE_FN (ip6_gbp_policy_dpo_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return (gbp_policy_dpo_inline (vm, node, from_frame, 1));
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip4_gbp_policy_dpo_node) = {
- .name = "ip4-gbp-policy-dpo",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_policy_trace,
-
- .n_errors = ARRAY_LEN(gbp_contract_error_strings),
- .error_strings = gbp_contract_error_strings,
-
- .n_next_nodes = GBP_POLICY_N_NEXT,
- .next_nodes =
- {
- [GBP_POLICY_DROP] = "ip4-drop",
- }
-};
-VLIB_REGISTER_NODE (ip6_gbp_policy_dpo_node) = {
- .name = "ip6-gbp-policy-dpo",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_policy_trace,
-
- .n_errors = ARRAY_LEN(gbp_contract_error_strings),
- .error_strings = gbp_contract_error_strings,
-
- .n_next_nodes = GBP_POLICY_N_NEXT,
- .next_nodes =
- {
- [GBP_POLICY_DROP] = "ip6-drop",
- }
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_policy_dpo.h b/src/plugins/gbp/gbp_policy_dpo.h
deleted file mode 100644
index 77ca5d93bd0..00000000000
--- a/src/plugins/gbp/gbp_policy_dpo.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_POLICY_DPO_H__
-#define __GBP_POLICY_DPO_H__
-
-#include <vnet/dpo/dpo.h>
-#include <vnet/dpo/load_balance.h>
-#include <vnet/fib/ip4_fib.h>
-#include <vnet/fib/ip6_fib.h>
-
-/**
- * @brief
- * The GBP FWD DPO. Used in the L3 path to select the correct EPG uplink
- * based on the source EPG.
- */
-typedef struct gbp_policy_dpo_t_
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-
- /**
- * The protocol of packets using this DPO
- */
- dpo_proto_t gpd_proto;
-
- /**
- * SClass
- */
- sclass_t gpd_sclass;
-
- /**
- * sclass scope
- */
- gbp_scope_t gpd_scope;
-
- /**
- * output sw_if_index
- */
- u32 gpd_sw_if_index;
-
- /**
- * number of locks.
- */
- u16 gpd_locks;
-
- /**
- * Stacked DPO on DVR/ADJ of output interface
- */
- dpo_id_t gpd_dpo;
-} gbp_policy_dpo_t;
-
-extern void gbp_policy_dpo_add_or_lock (dpo_proto_t dproto,
- gbp_scope_t scope,
- sclass_t sclass,
- u32 sw_if_index, dpo_id_t * dpo);
-
-extern dpo_type_t gbp_policy_dpo_get_type (void);
-
-extern vlib_node_registration_t ip4_gbp_policy_dpo_node;
-extern vlib_node_registration_t ip6_gbp_policy_dpo_node;
-extern vlib_node_registration_t gbp_policy_port_node;
-
-/**
- * Types exposed for the Data-plane
- */
-extern dpo_type_t gbp_policy_dpo_type;
-extern gbp_policy_dpo_t *gbp_policy_dpo_pool;
-
-always_inline gbp_policy_dpo_t *
-gbp_policy_dpo_get (index_t index)
-{
- return (pool_elt_at_index (gbp_policy_dpo_pool, index));
-}
-
-static_always_inline const gbp_policy_dpo_t *
-gbp_classify_get_gpd (const ip4_address_t * ip4, const ip6_address_t * ip6,
- const u32 fib_index)
-{
- const gbp_policy_dpo_t *gpd;
- const dpo_id_t *dpo;
- const load_balance_t *lb;
- u32 lbi;
-
- if (ip4)
- lbi = ip4_fib_forwarding_lookup (fib_index, ip4);
- else if (ip6)
- lbi = ip6_fib_table_fwding_lookup (fib_index, ip6);
- else
- return 0;
-
- lb = load_balance_get (lbi);
- dpo = load_balance_get_bucket_i (lb, 0);
-
- if (dpo->dpoi_type != gbp_policy_dpo_type)
- return 0;
-
- gpd = gbp_policy_dpo_get (dpo->dpoi_index);
- return gpd;
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
-
-#endif
diff --git a/src/plugins/gbp/gbp_policy_node.c b/src/plugins/gbp/gbp_policy_node.c
deleted file mode 100644
index 8c6ef5c2b94..00000000000
--- a/src/plugins/gbp/gbp_policy_node.c
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_classify.h>
-#include <plugins/gbp/gbp_policy.h>
-#include <plugins/gbp/gbp_policy_dpo.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_ext_itf.h>
-#include <plugins/gbp/gbp_contract.h>
-
-#include <vnet/vxlan-gbp/vxlan_gbp_packet.h>
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-
-typedef enum
-{
- GBP_POLICY_NEXT_DROP,
- GBP_POLICY_N_NEXT,
-} gbp_policy_next_t;
-
-always_inline dpo_proto_t
-ethertype_to_dpo_proto (u16 etype)
-{
- etype = clib_net_to_host_u16 (etype);
-
- switch (etype)
- {
- case ETHERNET_TYPE_IP4:
- return (DPO_PROTO_IP4);
- case ETHERNET_TYPE_IP6:
- return (DPO_PROTO_IP6);
- }
-
- return (DPO_PROTO_NONE);
-}
-
-always_inline u32
-gbp_rule_l2_redirect (const gbp_rule_t * gu, vlib_buffer_t * b0)
-{
- const ethernet_header_t *eth0;
- const dpo_id_t *dpo;
- dpo_proto_t dproto;
-
- eth0 = vlib_buffer_get_current (b0);
- /* pop the ethernet header to prepare for L3 rewrite */
- vlib_buffer_advance (b0, vnet_buffer (b0)->l2.l2_len);
-
- dproto = ethertype_to_dpo_proto (eth0->type);
- dpo = &gu->gu_dpo[GBP_POLICY_NODE_L2][dproto];
-
- /* save the LB index for the next node and reset the IP flow hash
- * so it's recalculated */
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo->dpoi_index;
- vnet_buffer (b0)->ip.flow_hash = 0;
-
- return (dpo->dpoi_next_node);
-}
-
-static_always_inline gbp_policy_next_t
-gbp_policy_l2_feature_next (gbp_policy_main_t * gpm, vlib_buffer_t * b,
- const gbp_policy_type_t type)
-{
- u32 feat_bit;
-
- switch (type)
- {
- case GBP_POLICY_PORT:
- feat_bit = L2OUTPUT_FEAT_GBP_POLICY_PORT;
- break;
- case GBP_POLICY_MAC:
- feat_bit = L2OUTPUT_FEAT_GBP_POLICY_MAC;
- break;
- case GBP_POLICY_LPM:
- feat_bit = L2OUTPUT_FEAT_GBP_POLICY_LPM;
- break;
- default:
- return GBP_POLICY_NEXT_DROP;
- }
-
- return vnet_l2_feature_next (b, gpm->l2_output_feat_next[type], feat_bit);
-}
-
-static uword
-gbp_policy_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, const gbp_policy_type_t type)
-{
- gbp_main_t *gm = &gbp_main;
- gbp_policy_main_t *gpm = &gbp_policy_main;
- u32 n_left_from, *from, *to_next;
- u32 next_index;
- u32 n_allow_intra, n_allow_a_bit, n_allow_sclass_1;
-
- next_index = 0;
- n_left_from = frame->n_vectors;
- from = vlib_frame_vector_args (frame);
- n_allow_intra = n_allow_a_bit = n_allow_sclass_1 = 0;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- gbp_rule_action_t action0 = GBP_RULE_DENY;
- const ethernet_header_t *h0;
- const gbp_endpoint_t *ge0;
- gbp_contract_error_t err0;
- u32 acl_match = ~0, rule_match = ~0;
- gbp_policy_next_t next0;
- gbp_contract_key_t key0;
- u32 bi0, sw_if_index0;
- vlib_buffer_t *b0;
- gbp_rule_t *rule0;
-
- next0 = GBP_POLICY_NEXT_DROP;
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- h0 = vlib_buffer_get_current (b0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
-
- /*
- * Reflection check; in and out on an ivxlan tunnel
- */
- if ((~0 != vxlan_gbp_tunnel_by_sw_if_index (sw_if_index0)) &&
- (vnet_buffer2 (b0)->gbp.flags & VXLAN_GBP_GPFLAGS_R))
- {
- goto trace;
- }
-
- /*
- * If the A-bit is set then policy has already been applied
- * and we skip enforcement here.
- */
- if (vnet_buffer2 (b0)->gbp.flags & VXLAN_GBP_GPFLAGS_A)
- {
- next0 = gbp_policy_l2_feature_next (gpm, b0, type);
- n_allow_a_bit++;
- key0.as_u64 = ~0;
- goto trace;
- }
-
- /*
- * determine the src and dst EPG
- */
-
- /* zero out the key to ensure the pad space is clear */
- key0.as_u64 = 0;
- key0.gck_src = vnet_buffer2 (b0)->gbp.sclass;
- key0.gck_dst = SCLASS_INVALID;
-
- if (GBP_POLICY_LPM == type)
- {
- const ip4_address_t *ip4 = 0;
- const ip6_address_t *ip6 = 0;
- const dpo_proto_t proto =
- gbp_classify_get_ip_address (h0, &ip4, &ip6,
- GBP_CLASSIFY_GET_IP_DST);
- if (PREDICT_TRUE (DPO_PROTO_NONE != proto))
- {
- const gbp_ext_itf_t *ext_itf =
- gbp_ext_itf_get (sw_if_index0);
- const gbp_policy_dpo_t *gpd =
- gbp_classify_get_gpd (ip4, ip6,
- ext_itf->gx_fib_index[proto]);
- if (gpd)
- key0.gck_dst = gpd->gpd_sclass;
- }
- }
- else
- {
- if (GBP_POLICY_PORT == type)
- ge0 = gbp_endpoint_find_itf (sw_if_index0);
- else
- ge0 = gbp_endpoint_find_mac (h0->dst_address,
- vnet_buffer (b0)->l2.bd_index);
- if (NULL != ge0)
- key0.gck_dst = ge0->ge_fwd.gef_sclass;
- }
-
- if (SCLASS_INVALID == key0.gck_dst)
- {
- /* If you cannot determine the destination EP then drop */
- b0->error = node->errors[GBP_CONTRACT_ERROR_DROP_NO_DCLASS];
- goto trace;
- }
-
- key0.gck_src = vnet_buffer2 (b0)->gbp.sclass;
- if (SCLASS_INVALID == key0.gck_src)
- {
- /*
- * the src EPG is not set when the packet arrives on an EPG
- * uplink interface and we do not need to apply policy
- */
- next0 = gbp_policy_l2_feature_next (gpm, b0, type);
- goto trace;
- }
-
- key0.gck_scope =
- gbp_bridge_domain_get_scope (vnet_buffer (b0)->l2.bd_index);
-
- action0 =
- gbp_contract_apply (vm, gm, &key0, b0, &rule0, &n_allow_intra,
- &n_allow_sclass_1, &acl_match, &rule_match,
- &err0, GBP_CONTRACT_APPLY_L2);
- switch (action0)
- {
- case GBP_RULE_PERMIT:
- next0 = gbp_policy_l2_feature_next (gpm, b0, type);
- vnet_buffer2 (b0)->gbp.flags |= VXLAN_GBP_GPFLAGS_A;
- break;
- case GBP_RULE_REDIRECT:
- next0 = gbp_rule_l2_redirect (rule0, b0);
- vnet_buffer2 (b0)->gbp.flags |= VXLAN_GBP_GPFLAGS_A;
- break;
- case GBP_RULE_DENY:
- next0 = GBP_POLICY_NEXT_DROP;
- b0->error = node->errors[err0];
- break;
- }
-
- trace:
- gbp_policy_trace (vm, node, b0, &key0, action0, acl_match,
- rule_match);
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- vlib_node_increment_counter (vm, node->node_index,
- GBP_CONTRACT_ERROR_ALLOW_INTRA, n_allow_intra);
- vlib_node_increment_counter (vm, node->node_index,
- GBP_CONTRACT_ERROR_ALLOW_A_BIT, n_allow_a_bit);
- vlib_node_increment_counter (vm, node->node_index,
- GBP_CONTRACT_ERROR_ALLOW_SCLASS_1,
- n_allow_sclass_1);
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (gbp_policy_port_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_policy_inline (vm, node, frame, GBP_POLICY_PORT));
-}
-
-VLIB_NODE_FN (gbp_policy_mac_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_policy_inline (vm, node, frame, GBP_POLICY_MAC));
-}
-
-VLIB_NODE_FN (gbp_policy_lpm_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_policy_inline (vm, node, frame, GBP_POLICY_LPM));
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_policy_port_node) = {
- .name = "gbp-policy-port",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_policy_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(gbp_contract_error_strings),
- .error_strings = gbp_contract_error_strings,
-
- .n_next_nodes = GBP_POLICY_N_NEXT,
- .next_nodes = {
- [GBP_POLICY_NEXT_DROP] = "error-drop",
- },
-};
-
-VLIB_REGISTER_NODE (gbp_policy_mac_node) = {
- .name = "gbp-policy-mac",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_policy_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(gbp_contract_error_strings),
- .error_strings = gbp_contract_error_strings,
-
- .n_next_nodes = GBP_POLICY_N_NEXT,
- .next_nodes = {
- [GBP_POLICY_NEXT_DROP] = "error-drop",
- },
-};
-
-VLIB_REGISTER_NODE (gbp_policy_lpm_node) = {
- .name = "gbp-policy-lpm",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_policy_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(gbp_contract_error_strings),
- .error_strings = gbp_contract_error_strings,
-
- .n_next_nodes = GBP_POLICY_N_NEXT,
- .next_nodes = {
- [GBP_POLICY_NEXT_DROP] = "error-drop",
- },
-};
-
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_recirc.c b/src/plugins/gbp/gbp_recirc.c
deleted file mode 100644
index 8d56f11b4e3..00000000000
--- a/src/plugins/gbp/gbp_recirc.c
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_recirc.h>
-#include <plugins/gbp/gbp_endpoint_group.h>
-#include <plugins/gbp/gbp_endpoint.h>
-#include <plugins/gbp/gbp_itf.h>
-
-#include <vnet/dpo/dvr_dpo.h>
-#include <vnet/fib/fib_table.h>
-
-#include <vlib/unix/plugin.h>
-
-/**
- * Pool of GBP recircs
- */
-gbp_recirc_t *gbp_recirc_pool;
-
-/**
- * Recirc configs keyed by sw_if_index
- */
-index_t *gbp_recirc_db;
-
-/**
- * logger
- */
-vlib_log_class_t gr_logger;
-
-/**
- * L2 Emulation enable/disable symbols
- */
-static void (*l2e_enable) (u32 sw_if_index);
-static void (*l2e_disable) (u32 sw_if_index);
-
-#define GBP_RECIRC_DBG(...) \
- vlib_log_debug (gr_logger, __VA_ARGS__);
-
-u8 *
-format_gbp_recirc (u8 * s, va_list * args)
-{
- gbp_recirc_t *gr = va_arg (*args, gbp_recirc_t *);
- vnet_main_t *vnm = vnet_get_main ();
-
- return format (s, " %U, sclass:%d, ext:%d",
- format_vnet_sw_if_index_name, vnm,
- gr->gr_sw_if_index, gr->gr_sclass, gr->gr_is_ext);
-}
-
-int
-gbp_recirc_add (u32 sw_if_index, sclass_t sclass, u8 is_ext)
-{
- gbp_recirc_t *gr;
- index_t gri;
-
- vec_validate_init_empty (gbp_recirc_db, sw_if_index, INDEX_INVALID);
-
- gri = gbp_recirc_db[sw_if_index];
-
- if (INDEX_INVALID == gri)
- {
- gbp_endpoint_group_t *gg;
- fib_protocol_t fproto;
- index_t ggi;
-
- ggi = gbp_endpoint_group_find (sclass);
-
- if (INDEX_INVALID == ggi)
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-
- gbp_endpoint_group_lock (ggi);
- pool_get_zero (gbp_recirc_pool, gr);
- gri = gr - gbp_recirc_pool;
-
- gr->gr_sclass = sclass;
- gr->gr_is_ext = is_ext;
- gr->gr_sw_if_index = sw_if_index;
-
- /*
- * IP enable the recirc interface
- */
- ip4_sw_interface_enable_disable (gr->gr_sw_if_index, 1);
- ip6_sw_interface_enable_disable (gr->gr_sw_if_index, 1);
-
- /*
- * cache the FIB indicies of the EPG
- */
- gr->gr_epgi = ggi;
-
- gg = gbp_endpoint_group_get (gr->gr_epgi);
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- gr->gr_fib_index[fib_proto_to_dpo (fproto)] =
- gbp_endpoint_group_get_fib_index (gg, fproto);
- }
-
- /*
- * bind to the bridge-domain of the EPG
- */
- gr->gr_itf = gbp_itf_l2_add_and_lock (gr->gr_sw_if_index, gg->gg_gbd);
-
- /*
- * set the interface into L2 emulation mode
- */
- l2e_enable (gr->gr_sw_if_index);
-
- /*
- * Packets on the recirculation interface are subject to src-EPG
- * classification. Recirc interfaces are L2-emulation mode.
- * for internal EPGs this is via an LPM on all external subnets.
- * for external EPGs this is via a port mapping.
- */
- if (gr->gr_is_ext)
- {
- mac_address_t mac;
- /*
- * recirc is for post-NAT translation packets going into
- * the external EPG, these are classified to the NAT EPG
- * based on its port
- */
- mac_address_from_bytes (&mac,
- vnet_sw_interface_get_hw_address
- (vnet_get_main (), gr->gr_sw_if_index));
- gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_CP,
- gr->gr_sw_if_index,
- NULL, &mac, INDEX_INVALID,
- INDEX_INVALID, gr->gr_sclass,
- GBP_ENDPOINT_FLAG_NONE,
- NULL, NULL, &gr->gr_ep);
- vnet_feature_enable_disable ("ip4-unicast",
- "ip4-gbp-src-classify",
- gr->gr_sw_if_index, 1, 0, 0);
- vnet_feature_enable_disable ("ip6-unicast",
- "ip6-gbp-src-classify",
- gr->gr_sw_if_index, 1, 0, 0);
- }
- else
- {
- /*
- * recirc is for pre-NAT translation packets coming from
- * the external EPG, these are classified based on a LPM
- * in the EPG's route-domain
- */
- vnet_feature_enable_disable ("ip4-unicast",
- "ip4-gbp-lpm-classify",
- gr->gr_sw_if_index, 1, 0, 0);
- vnet_feature_enable_disable ("ip6-unicast",
- "ip6-gbp-lpm-classify",
- gr->gr_sw_if_index, 1, 0, 0);
- }
-
- gbp_recirc_db[sw_if_index] = gri;
- }
- else
- {
- gr = gbp_recirc_get (gri);
- }
-
- GBP_RECIRC_DBG ("add: %U", format_gbp_recirc, gr);
- return (0);
-}
-
-int
-gbp_recirc_delete (u32 sw_if_index)
-{
- gbp_recirc_t *gr;
- index_t gri;
-
- if (vec_len (gbp_recirc_db) <= sw_if_index)
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
- gri = gbp_recirc_db[sw_if_index];
-
- if (INDEX_INVALID != gri)
- {
- gr = pool_elt_at_index (gbp_recirc_pool, gri);
-
- GBP_RECIRC_DBG ("del: %U", format_gbp_recirc, gr);
-
- if (gr->gr_is_ext)
- {
- gbp_endpoint_unlock (GBP_ENDPOINT_SRC_CP, gr->gr_ep);
- vnet_feature_enable_disable ("ip4-unicast",
- "ip4-gbp-src-classify",
- gr->gr_sw_if_index, 0, 0, 0);
- vnet_feature_enable_disable ("ip6-unicast",
- "ip6-gbp-src-classify",
- gr->gr_sw_if_index, 0, 0, 0);
- }
- else
- {
- vnet_feature_enable_disable ("ip4-unicast",
- "ip4-gbp-lpm-classify",
- gr->gr_sw_if_index, 0, 0, 0);
- vnet_feature_enable_disable ("ip6-unicast",
- "ip6-gbp-lpm-classify",
- gr->gr_sw_if_index, 0, 0, 0);
- }
-
- ip4_sw_interface_enable_disable (gr->gr_sw_if_index, 0);
- ip6_sw_interface_enable_disable (gr->gr_sw_if_index, 0);
- l2e_disable (gr->gr_sw_if_index);
-
- gbp_itf_unlock (&gr->gr_itf);
-
- gbp_endpoint_group_unlock (gr->gr_epgi);
- gbp_recirc_db[sw_if_index] = INDEX_INVALID;
- pool_put (gbp_recirc_pool, gr);
- return (0);
- }
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-}
-
-void
-gbp_recirc_walk (gbp_recirc_cb_t cb, void *ctx)
-{
- gbp_recirc_t *ge;
-
- /* *INDENT-OFF* */
- pool_foreach (ge, gbp_recirc_pool)
- {
- if (!cb(ge, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static walk_rc_t
-gbp_recirc_show_one (gbp_recirc_t * gr, void *ctx)
-{
- vlib_cli_output (ctx, " %U", format_gbp_recirc, gr);
-
- return (WALK_CONTINUE);
-}
-
-static clib_error_t *
-gbp_recirc_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vlib_cli_output (vm, "Recirculation-Interfaces:");
- gbp_recirc_walk (gbp_recirc_show_one, vm);
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Policy Recircs and derived information
- *
- * @cliexpar
- * @cliexstart{show gbp recirc}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_recirc_show_node, static) = {
- .path = "show gbp recirc",
- .short_help = "show gbp recirc\n",
- .function = gbp_recirc_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_recirc_init (vlib_main_t * vm)
-{
- gr_logger = vlib_log_register_class ("gbp", "recirc");
-
- l2e_enable =
- vlib_get_plugin_symbol ("l2e_plugin.so", "l2_emulation_enable");
- l2e_disable =
- vlib_get_plugin_symbol ("l2e_plugin.so", "l2_emulation_disable");
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_recirc_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_recirc.h b/src/plugins/gbp/gbp_recirc.h
deleted file mode 100644
index 2f3354b794e..00000000000
--- a/src/plugins/gbp/gbp_recirc.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_RECIRC_H__
-#define __GBP_RECIRC_H__
-
-#include <plugins/gbp/gbp_types.h>
-#include <plugins/gbp/gbp_itf.h>
-#include <vnet/fib/fib_types.h>
-
-/**
- * A GBP recirculation interface representation
- * Thes interfaces join Bridge domains that are internal to those that are
- * NAT external, so the packets can be NAT translated and then undergo the
- * whole policy process again.
- */
-typedef struct gpb_recirc_t_
-{
- /**
- * EPG ID that packets will classify to when they arrive on this recirc
- */
- sclass_t gr_sclass;
-
- /**
- * The index of the EPG
- */
- index_t gr_epgi;
-
- /**
- * FIB indices the EPG is mapped to
- */
- u32 gr_fib_index[DPO_PROTO_NUM];
-
- /**
- * Is the interface for packets post-NAT translation (i.e. ext)
- * or pre-NAT translation (i.e. internal)
- */
- u8 gr_is_ext;
-
- /**
- */
- u32 gr_sw_if_index;
- gbp_itf_hdl_t gr_itf;
-
- /**
- * The endpoint created to represent the reric interface
- */
- index_t gr_ep;
-} gbp_recirc_t;
-
-extern int gbp_recirc_add (u32 sw_if_index, sclass_t sclass, u8 is_ext);
-extern int gbp_recirc_delete (u32 sw_if_index);
-
-typedef walk_rc_t (*gbp_recirc_cb_t) (gbp_recirc_t * gbpe, void *ctx);
-extern void gbp_recirc_walk (gbp_recirc_cb_t bgpe, void *ctx);
-
-/**
- * Data plane functions
- */
-extern gbp_recirc_t *gbp_recirc_pool;
-extern index_t *gbp_recirc_db;
-
-always_inline gbp_recirc_t *
-gbp_recirc_get (u32 sw_if_index)
-{
- return (pool_elt_at_index (gbp_recirc_pool, gbp_recirc_db[sw_if_index]));
-}
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_route_domain.c b/src/plugins/gbp/gbp_route_domain.c
deleted file mode 100644
index 6cc595d0fa9..00000000000
--- a/src/plugins/gbp/gbp_route_domain.c
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_route_domain.h>
-#include <plugins/gbp/gbp_endpoint.h>
-
-#include <vnet/dpo/dvr_dpo.h>
-#include <vnet/fib/fib_table.h>
-
-/**
- * A fixed MAC address to use as the source MAC for packets L3 switched
- * onto the routed uu-fwd interfaces.
- * Magic values - origin lost to the mists of time...
- */
-/* *INDENT-OFF* */
-const static mac_address_t GBP_ROUTED_SRC_MAC = {
- .bytes = {
- 0x0, 0x22, 0xBD, 0xF8, 0x19, 0xFF,
- }
-};
-
-const static mac_address_t GBP_ROUTED_DST_MAC = {
- .bytes = {
- 00, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
- }
-};
-/* *INDENT-ON* */
-
-/**
- * Pool of GBP route_domains
- */
-gbp_route_domain_t *gbp_route_domain_pool;
-
-/**
- * DB of route_domains
- */
-typedef struct gbp_route_domain_db_t
-{
- uword *gbd_by_rd_id;
-} gbp_route_domain_db_t;
-
-static gbp_route_domain_db_t gbp_route_domain_db;
-static fib_source_t gbp_fib_source;
-
-/**
- * logger
- */
-vlib_log_class_t grd_logger;
-
-#define GBP_BD_DBG(...) \
- vlib_log_debug (grd_logger, __VA_ARGS__);
-
-index_t
-gbp_route_domain_index (const gbp_route_domain_t * grd)
-{
- return (grd - gbp_route_domain_pool);
-}
-
-gbp_route_domain_t *
-gbp_route_domain_get (index_t i)
-{
- return (pool_elt_at_index (gbp_route_domain_pool, i));
-}
-
-static void
-gbp_route_domain_lock (index_t i)
-{
- gbp_route_domain_t *grd;
-
- grd = gbp_route_domain_get (i);
- grd->grd_locks++;
-}
-
-index_t
-gbp_route_domain_find (u32 rd_id)
-{
- uword *p;
-
- p = hash_get (gbp_route_domain_db.gbd_by_rd_id, rd_id);
-
- if (NULL != p)
- return p[0];
-
- return (INDEX_INVALID);
-}
-
-index_t
-gbp_route_domain_find_and_lock (u32 rd_id)
-{
- index_t grdi;
-
- grdi = gbp_route_domain_find (rd_id);
-
- if (INDEX_INVALID != grdi)
- {
- gbp_route_domain_lock (grdi);
- }
- return (grdi);
-}
-
-static void
-gbp_route_domain_db_add (gbp_route_domain_t * grd)
-{
- index_t grdi = grd - gbp_route_domain_pool;
-
- hash_set (gbp_route_domain_db.gbd_by_rd_id, grd->grd_id, grdi);
-}
-
-static void
-gbp_route_domain_db_remove (gbp_route_domain_t * grd)
-{
- hash_unset (gbp_route_domain_db.gbd_by_rd_id, grd->grd_id);
-}
-
-int
-gbp_route_domain_add_and_lock (u32 rd_id,
- gbp_scope_t scope,
- u32 ip4_table_id,
- u32 ip6_table_id,
- u32 ip4_uu_sw_if_index, u32 ip6_uu_sw_if_index)
-{
- gbp_route_domain_t *grd;
- index_t grdi;
-
- grdi = gbp_route_domain_find (rd_id);
-
- if (INDEX_INVALID == grdi)
- {
- fib_protocol_t fproto;
-
- pool_get_zero (gbp_route_domain_pool, grd);
-
- grd->grd_id = rd_id;
- grd->grd_scope = scope;
- grd->grd_table_id[FIB_PROTOCOL_IP4] = ip4_table_id;
- grd->grd_table_id[FIB_PROTOCOL_IP6] = ip6_table_id;
- grd->grd_uu_sw_if_index[FIB_PROTOCOL_IP4] = ip4_uu_sw_if_index;
- grd->grd_uu_sw_if_index[FIB_PROTOCOL_IP6] = ip6_uu_sw_if_index;
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- grd->grd_fib_index[fproto] =
- fib_table_find_or_create_and_lock (fproto,
- grd->grd_table_id[fproto],
- gbp_fib_source);
-
- if (~0 != grd->grd_uu_sw_if_index[fproto])
- {
- ethernet_header_t *eth;
- u8 *rewrite;
-
- rewrite = NULL;
- vec_validate (rewrite, sizeof (*eth) - 1);
- eth = (ethernet_header_t *) rewrite;
-
- eth->type = clib_host_to_net_u16 ((fproto == FIB_PROTOCOL_IP4 ?
- ETHERNET_TYPE_IP4 :
- ETHERNET_TYPE_IP6));
-
- mac_address_to_bytes (gbp_route_domain_get_local_mac (),
- eth->src_address);
- mac_address_to_bytes (gbp_route_domain_get_remote_mac (),
- eth->dst_address);
-
- /*
- * create an adjacency out of the uu-fwd interfaces that will
- * be used when adding subnet routes.
- */
- grd->grd_adj[fproto] =
- adj_nbr_add_or_lock_w_rewrite (fproto,
- fib_proto_to_link (fproto),
- &ADJ_BCAST_ADDR,
- grd->grd_uu_sw_if_index[fproto],
- rewrite);
- }
- else
- {
- grd->grd_adj[fproto] = INDEX_INVALID;
- }
- }
-
- gbp_route_domain_db_add (grd);
- }
- else
- {
- grd = gbp_route_domain_get (grdi);
- }
-
- grd->grd_locks++;
- GBP_BD_DBG ("add: %U", format_gbp_route_domain, grd);
-
- return (0);
-}
-
-void
-gbp_route_domain_unlock (index_t index)
-{
- gbp_route_domain_t *grd;
-
- grd = gbp_route_domain_get (index);
-
- grd->grd_locks--;
-
- if (0 == grd->grd_locks)
- {
- fib_protocol_t fproto;
-
- GBP_BD_DBG ("destroy: %U", format_gbp_route_domain, grd);
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- fib_table_unlock (grd->grd_fib_index[fproto], fproto, gbp_fib_source);
- if (INDEX_INVALID != grd->grd_adj[fproto])
- adj_unlock (grd->grd_adj[fproto]);
- }
-
- gbp_route_domain_db_remove (grd);
-
- pool_put (gbp_route_domain_pool, grd);
- }
-}
-
-u32
-gbp_route_domain_get_rd_id (index_t grdi)
-{
- gbp_route_domain_t *grd;
-
- grd = gbp_route_domain_get (grdi);
-
- return (grd->grd_id);
-}
-
-gbp_scope_t
-gbp_route_domain_get_scope (index_t grdi)
-{
- gbp_route_domain_t *grd;
-
- grd = gbp_route_domain_get (grdi);
-
- return (grd->grd_scope);
-}
-
-int
-gbp_route_domain_delete (u32 rd_id)
-{
- index_t grdi;
-
- GBP_BD_DBG ("del: %d", rd_id);
- grdi = gbp_route_domain_find (rd_id);
-
- if (INDEX_INVALID != grdi)
- {
- GBP_BD_DBG ("del: %U", format_gbp_route_domain,
- gbp_route_domain_get (grdi));
- gbp_route_domain_unlock (grdi);
-
- return (0);
- }
-
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-}
-
-const mac_address_t *
-gbp_route_domain_get_local_mac (void)
-{
- return (&GBP_ROUTED_SRC_MAC);
-}
-
-const mac_address_t *
-gbp_route_domain_get_remote_mac (void)
-{
- return (&GBP_ROUTED_DST_MAC);
-}
-
-void
-gbp_route_domain_walk (gbp_route_domain_cb_t cb, void *ctx)
-{
- gbp_route_domain_t *gbpe;
-
- /* *INDENT-OFF* */
- pool_foreach (gbpe, gbp_route_domain_pool)
- {
- if (!cb(gbpe, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static clib_error_t *
-gbp_route_domain_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vnet_main_t *vnm = vnet_get_main ();
- u32 ip4_uu_sw_if_index = ~0;
- u32 ip6_uu_sw_if_index = ~0;
- u32 ip4_table_id = ~0;
- u32 ip6_table_id = ~0;
- u32 scope = ~0;
- u32 rd_id = ~0;
- u8 add = 1;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "ip4-uu %U", unformat_vnet_sw_interface,
- vnm, &ip4_uu_sw_if_index))
- ;
- else if (unformat (input, "ip6-uu %U", unformat_vnet_sw_interface,
- vnm, &ip6_uu_sw_if_index))
- ;
- else if (unformat (input, "ip4-table-id %d", &ip4_table_id))
- ;
- else if (unformat (input, "ip6-table-id %d", &ip6_table_id))
- ;
- else if (unformat (input, "add"))
- add = 1;
- else if (unformat (input, "del"))
- add = 0;
- else if (unformat (input, "rd %d", &rd_id))
- ;
- else if (unformat (input, "scope %d", &scope))
- ;
- else
- break;
- }
-
- if (~0 == rd_id)
- return clib_error_return (0, "RD-ID must be specified");
-
- if (add)
- {
- if (~0 == ip4_table_id)
- return clib_error_return (0, "IP4 table-ID must be specified");
- if (~0 == ip6_table_id)
- return clib_error_return (0, "IP6 table-ID must be specified");
-
- gbp_route_domain_add_and_lock (rd_id, scope,
- ip4_table_id,
- ip6_table_id,
- ip4_uu_sw_if_index, ip6_uu_sw_if_index);
- }
- else
- gbp_route_domain_delete (rd_id);
-
- return (NULL);
-}
-
-/*?
- * Configure a GBP route-domain
- *
- * @cliexpar
- * @cliexstart{gbp route-domain [del] rd <ID> ip4-table-id <ID> ip6-table-id <ID> [ip4-uu <interface>] [ip6-uu <interface>]}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_route_domain_cli_node, static) = {
- .path = "gbp route-domain",
- .short_help = "gbp route-domain [del] rd <ID> ip4-table-id <ID> ip6-table-id <ID> [ip4-uu <interface>] [ip6-uu <interface>]",
- .function = gbp_route_domain_cli,
-};
-
-u8 *
-format_gbp_route_domain (u8 * s, va_list * args)
-{
- gbp_route_domain_t *grd = va_arg (*args, gbp_route_domain_t*);
- vnet_main_t *vnm = vnet_get_main ();
-
- if (NULL != grd)
- s = format (s, "[%d] rd:%d ip4-uu:%U ip6-uu:%U locks:%d",
- grd - gbp_route_domain_pool,
- grd->grd_id,
- format_vnet_sw_if_index_name, vnm, grd->grd_uu_sw_if_index[FIB_PROTOCOL_IP4],
- format_vnet_sw_if_index_name, vnm, grd->grd_uu_sw_if_index[FIB_PROTOCOL_IP6],
- grd->grd_locks);
- else
- s = format (s, "NULL");
-
- return (s);
-}
-
-static int
-gbp_route_domain_show_one (gbp_route_domain_t *gb, void *ctx)
-{
- vlib_main_t *vm;
-
- vm = ctx;
- vlib_cli_output (vm, " %U",format_gbp_route_domain, gb);
-
- return (1);
-}
-
-static clib_error_t *
-gbp_route_domain_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vlib_cli_output (vm, "Route-Domains:");
- gbp_route_domain_walk (gbp_route_domain_show_one, vm);
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Policy Route_Domains and derived information
- *
- * @cliexpar
- * @cliexstart{show gbp route_domain}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_route_domain_show_node, static) = {
- .path = "show gbp route-domain",
- .short_help = "show gbp route-domain\n",
- .function = gbp_route_domain_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_route_domain_init (vlib_main_t * vm)
-{
- grd_logger = vlib_log_register_class ("gbp", "rd");
- gbp_fib_source = fib_source_allocate ("gbp-rd",
- FIB_SOURCE_PRIORITY_HI,
- FIB_SOURCE_BH_DROP);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_route_domain_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_route_domain.h b/src/plugins/gbp/gbp_route_domain.h
deleted file mode 100644
index 897c1bdd7ac..00000000000
--- a/src/plugins/gbp/gbp_route_domain.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_ROUTE_DOMAIN_H__
-#define __GBP_ROUTE_DOMAIN_H__
-
-#include <plugins/gbp/gbp_types.h>
-
-#include <vnet/fib/fib_types.h>
-#include <vnet/ethernet/mac_address.h>
-
-/**
- * A route Domain Representation.
- * This is a standard route-domain plus all the attributes it must
- * have to supprt the GBP model.
- */
-typedef struct gpb_route_domain_t_
-{
- /**
- * Route-domain ID
- */
- u32 grd_id;
- gbp_scope_t grd_scope;
- u32 grd_fib_index[FIB_PROTOCOL_IP_MAX];
- u32 grd_table_id[FIB_PROTOCOL_IP_MAX];
-
- /**
- * The interfaces on which to send packets to unnknown EPs
- */
- u32 grd_uu_sw_if_index[FIB_PROTOCOL_IP_MAX];
-
- /**
- * adjacencies on the UU interfaces.
- */
- u32 grd_adj[FIB_PROTOCOL_IP_MAX];
-
- u32 grd_locks;
-} gbp_route_domain_t;
-
-extern int gbp_route_domain_add_and_lock (u32 rd_id,
- gbp_scope_t scope,
- u32 ip4_table_id,
- u32 ip6_table_id,
- u32 ip4_uu_sw_if_index,
- u32 ip6_uu_sw_if_index);
-extern void gbp_route_domain_unlock (index_t grdi);
-extern index_t gbp_route_domain_find_and_lock (u32 rd_id);
-extern index_t gbp_route_domain_find (u32 rd_id);
-extern index_t gbp_route_domain_index (const gbp_route_domain_t *);
-
-extern int gbp_route_domain_delete (u32 rd_id);
-extern gbp_route_domain_t *gbp_route_domain_get (index_t i);
-extern u32 gbp_route_domain_get_rd_id (index_t i);
-extern gbp_scope_t gbp_route_domain_get_scope (index_t i);
-
-typedef int (*gbp_route_domain_cb_t) (gbp_route_domain_t * gb, void *ctx);
-extern void gbp_route_domain_walk (gbp_route_domain_cb_t bgpe, void *ctx);
-
-extern const mac_address_t *gbp_route_domain_get_local_mac (void);
-extern const mac_address_t *gbp_route_domain_get_remote_mac (void);
-
-extern u8 *format_gbp_route_domain (u8 * s, va_list * args);
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_scanner.c b/src/plugins/gbp/gbp_scanner.c
deleted file mode 100644
index 9ae962b7449..00000000000
--- a/src/plugins/gbp/gbp_scanner.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * gbp.h : Group Based Policy
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_scanner.h>
-#include <plugins/gbp/gbp_endpoint.h>
-#include <plugins/gbp/gbp_vxlan.h>
-
-/**
- * Scanner logger
- */
-vlib_log_class_t gs_logger;
-
-/**
- * Scanner state
- */
-static bool gs_enabled;
-
-#define GBP_SCANNER_DBG(...) \
- vlib_log_debug (gs_logger, __VA_ARGS__);
-
-static uword
-gbp_scanner (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
-{
- uword event_type, *event_data = 0;
- bool do_scan = 0;
-
- while (1)
- {
- do_scan = 0;
-
- if (gs_enabled)
- {
- /* scan every 'inactive threshold' seconds */
- vlib_process_wait_for_event_or_clock (vm, 2);
- }
- else
- vlib_process_wait_for_event (vm);
-
- event_type = vlib_process_get_events (vm, &event_data);
- vec_reset_length (event_data);
-
- switch (event_type)
- {
- case ~0:
- /* timer expired */
- do_scan = 1;
- break;
-
- case GBP_ENDPOINT_SCAN_START:
- gs_enabled = 1;
- break;
-
- case GBP_ENDPOINT_SCAN_STOP:
- gs_enabled = 0;
- break;
-
- case GBP_ENDPOINT_SCAN_SET_TIME:
- break;
-
- default:
- ASSERT (0);
- }
-
- if (do_scan)
- {
- GBP_SCANNER_DBG ("start");
- gbp_endpoint_scan (vm);
- GBP_SCANNER_DBG ("stop");
- }
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_scanner_node) = {
- .function = gbp_scanner,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "gbp-scanner",
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_scanner_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vlib_cli_output (vm, "GBP-scanner: enabled:%d interval:2", gs_enabled);
-
- return (NULL);
-}
-
-/*?
- * Show GBP scanner
- *
- * @cliexpar
- * @cliexstart{show gbp scanner}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_scanner_cli_node, static) = {
- .path = "show gbp scanner",
- .short_help = "show gbp scanner",
- .function = gbp_scanner_cli,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_scanner_init (vlib_main_t * vm)
-{
- gs_logger = vlib_log_register_class ("gbp", "scan");
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_scanner_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_subnet.c b/src/plugins/gbp/gbp_subnet.c
deleted file mode 100644
index 8d3b571657c..00000000000
--- a/src/plugins/gbp/gbp_subnet.c
+++ /dev/null
@@ -1,598 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_fwd_dpo.h>
-#include <plugins/gbp/gbp_policy_dpo.h>
-#include <plugins/gbp/gbp_route_domain.h>
-
-#include <vnet/fib/fib_table.h>
-#include <vnet/dpo/load_balance.h>
-
-/**
- * a key for the DB
- */
-typedef struct gbp_subnet_key_t_
-{
- fib_prefix_t gsk_pfx;
- u32 gsk_fib_index;
-} gbp_subnet_key_t;
-
-/**
- * Subnet
- */
-typedef struct gbp_subnet_t_
-{
- gbp_subnet_key_t *gs_key;
- gbp_subnet_type_t gs_type;
- index_t gs_rd;
-
- union
- {
- struct
- {
- sclass_t gs_sclass;
- u32 gs_sw_if_index;
- } gs_stitched_external;
- struct
- {
- sclass_t gs_sclass;
- } gs_l3_out;
- };
-
- fib_node_index_t gs_fei;
-} gbp_subnet_t;
-
-/**
- * A DB of the subnets; key={pfx,fib-index}
- */
-uword *gbp_subnet_db;
-
-/**
- * pool of subnets
- */
-gbp_subnet_t *gbp_subnet_pool;
-
-static fib_source_t gbp_fib_source;
-
-static index_t
-gbp_subnet_db_find (u32 fib_index, const fib_prefix_t * pfx)
-{
- gbp_subnet_key_t key = {
- .gsk_pfx = *pfx,
- .gsk_fib_index = fib_index,
- };
- uword *p;
-
- p = hash_get_mem (gbp_subnet_db, &key);
-
- if (NULL != p)
- return p[0];
-
- return (INDEX_INVALID);
-}
-
-static void
-gbp_subnet_db_add (u32 fib_index, const fib_prefix_t * pfx, gbp_subnet_t * gs)
-{
- gbp_subnet_key_t *key;
-
- key = clib_mem_alloc (sizeof (*key));
-
- clib_memcpy (&(key->gsk_pfx), pfx, sizeof (*pfx));
- key->gsk_fib_index = fib_index;
-
- hash_set_mem (gbp_subnet_db, key, (gs - gbp_subnet_pool));
-
- gs->gs_key = key;
-}
-
-static void
-gbp_subnet_db_del (gbp_subnet_t * gs)
-{
- hash_unset_mem (gbp_subnet_db, gs->gs_key);
-
- clib_mem_free (gs->gs_key);
- gs->gs_key = NULL;
-}
-
-
-static int
-gbp_subnet_transport_add (gbp_subnet_t * gs)
-{
- dpo_id_t gfd = DPO_INVALID;
- gbp_route_domain_t *grd;
- fib_protocol_t fproto;
-
- fproto = gs->gs_key->gsk_pfx.fp_proto;
- grd = gbp_route_domain_get (gs->gs_rd);
-
- if (~0 == grd->grd_uu_sw_if_index[fproto])
- return (VNET_API_ERROR_INVALID_SW_IF_INDEX);
-
- gs->gs_fei = fib_table_entry_update_one_path (gs->gs_key->gsk_fib_index,
- &gs->gs_key->gsk_pfx,
- gbp_fib_source,
- FIB_ENTRY_FLAG_NONE,
- fib_proto_to_dpo (fproto),
- &ADJ_BCAST_ADDR,
- grd->grd_uu_sw_if_index
- [fproto], ~0, 1, NULL,
- FIB_ROUTE_PATH_FLAG_NONE);
-
- dpo_reset (&gfd);
-
- return (0);
-}
-
-static int
-gbp_subnet_internal_add (gbp_subnet_t * gs)
-{
- dpo_id_t gfd = DPO_INVALID;
-
- gbp_fwd_dpo_add_or_lock (fib_proto_to_dpo (gs->gs_key->gsk_pfx.fp_proto),
- &gfd);
-
- gs->gs_fei = fib_table_entry_special_dpo_update (gs->gs_key->gsk_fib_index,
- &gs->gs_key->gsk_pfx,
- gbp_fib_source,
- FIB_ENTRY_FLAG_EXCLUSIVE,
- &gfd);
-
- dpo_reset (&gfd);
-
- return (0);
-}
-
-static int
-gbp_subnet_external_add (gbp_subnet_t * gs, u32 sw_if_index, sclass_t sclass)
-{
- dpo_id_t gpd = DPO_INVALID;
-
- gs->gs_stitched_external.gs_sclass = sclass;
- gs->gs_stitched_external.gs_sw_if_index = sw_if_index;
-
- gbp_policy_dpo_add_or_lock (fib_proto_to_dpo (gs->gs_key->gsk_pfx.fp_proto),
- gbp_route_domain_get_scope (gs->gs_rd),
- gs->gs_stitched_external.gs_sclass,
- gs->gs_stitched_external.gs_sw_if_index, &gpd);
-
- gs->gs_fei = fib_table_entry_special_dpo_update (gs->gs_key->gsk_fib_index,
- &gs->gs_key->gsk_pfx,
- gbp_fib_source,
- (FIB_ENTRY_FLAG_EXCLUSIVE |
- FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT),
- &gpd);
-
- dpo_reset (&gpd);
-
- return (0);
-}
-
-static int
-gbp_subnet_l3_out_add (gbp_subnet_t * gs, sclass_t sclass, int is_anon)
-{
- fib_entry_flag_t flags;
- dpo_id_t gpd = DPO_INVALID;
-
- gs->gs_l3_out.gs_sclass = sclass;
-
- gbp_policy_dpo_add_or_lock (fib_proto_to_dpo (gs->gs_key->gsk_pfx.fp_proto),
- gbp_route_domain_get_scope (gs->gs_rd),
- gs->gs_l3_out.gs_sclass, ~0, &gpd);
-
- flags = FIB_ENTRY_FLAG_INTERPOSE;
- if (is_anon)
- flags |= FIB_ENTRY_FLAG_COVERED_INHERIT;
-
- gs->gs_fei = fib_table_entry_special_dpo_add (gs->gs_key->gsk_fib_index,
- &gs->gs_key->gsk_pfx,
- FIB_SOURCE_SPECIAL,
- flags, &gpd);
-
- dpo_reset (&gpd);
-
- return (0);
-}
-
-static void
-gbp_subnet_del_i (index_t gsi)
-{
- gbp_subnet_t *gs;
-
- gs = pool_elt_at_index (gbp_subnet_pool, gsi);
-
- fib_table_entry_delete_index (gs->gs_fei,
- (GBP_SUBNET_L3_OUT == gs->gs_type
- || GBP_SUBNET_ANON_L3_OUT ==
- gs->gs_type) ? FIB_SOURCE_SPECIAL :
- gbp_fib_source);
-
- gbp_subnet_db_del (gs);
- gbp_route_domain_unlock (gs->gs_rd);
-
- pool_put (gbp_subnet_pool, gs);
-}
-
-int
-gbp_subnet_del (u32 rd_id, const fib_prefix_t * pfx)
-{
- gbp_route_domain_t *grd;
- index_t gsi, grdi;
- u32 fib_index;
-
- grdi = gbp_route_domain_find (rd_id);
-
- if (~0 == grdi)
- return (VNET_API_ERROR_NO_SUCH_FIB);
-
- grd = gbp_route_domain_get (grdi);
- fib_index = grd->grd_fib_index[pfx->fp_proto];
-
- gsi = gbp_subnet_db_find (fib_index, pfx);
-
- if (INDEX_INVALID == gsi)
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-
- gbp_subnet_del_i (gsi);
-
- return (0);
-}
-
-int
-gbp_subnet_add (u32 rd_id,
- const fib_prefix_t * pfx,
- gbp_subnet_type_t type, u32 sw_if_index, sclass_t sclass)
-{
- gbp_route_domain_t *grd;
- index_t grdi, gsi;
- gbp_subnet_t *gs;
- u32 fib_index;
- int rv;
-
- switch (type)
- {
- case GBP_SUBNET_TRANSPORT:
- case GBP_SUBNET_STITCHED_INTERNAL:
- case GBP_SUBNET_STITCHED_EXTERNAL:
- case GBP_SUBNET_L3_OUT:
- case GBP_SUBNET_ANON_L3_OUT:
- break;
- default:
- return (VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE);
- }
-
- grdi = gbp_route_domain_find_and_lock (rd_id);
-
- if (~0 == grdi)
- return (VNET_API_ERROR_NO_SUCH_FIB);
-
- grd = gbp_route_domain_get (grdi);
- fib_index = grd->grd_fib_index[pfx->fp_proto];
-
- gsi = gbp_subnet_db_find (fib_index, pfx);
-
- /*
- * this is an update if the subnet already exists, so remove the old
- */
- if (INDEX_INVALID != gsi)
- gbp_subnet_del_i (gsi);
-
- rv = -2;
-
- pool_get (gbp_subnet_pool, gs);
-
- gs->gs_type = type;
- gs->gs_rd = grdi;
- gbp_subnet_db_add (fib_index, pfx, gs);
-
- switch (type)
- {
- case GBP_SUBNET_STITCHED_INTERNAL:
- rv = gbp_subnet_internal_add (gs);
- break;
- case GBP_SUBNET_STITCHED_EXTERNAL:
- rv = gbp_subnet_external_add (gs, sw_if_index, sclass);
- break;
- case GBP_SUBNET_TRANSPORT:
- rv = gbp_subnet_transport_add (gs);
- break;
- case GBP_SUBNET_L3_OUT:
- rv = gbp_subnet_l3_out_add (gs, sclass, 0 /* is_anon */ );
- break;
- case GBP_SUBNET_ANON_L3_OUT:
- rv = gbp_subnet_l3_out_add (gs, sclass, 1 /* is_anon */ );
- break;
- }
-
- return (rv);
-}
-
-static clib_error_t *
-gbp_subnet_add_del_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- vnet_main_t *vnm = vnet_get_main ();
- fib_prefix_t pfx = {.fp_addr = ip46_address_initializer };
- int length;
- u32 rd_id = ~0;
- u32 sw_if_index = ~0;
- gbp_subnet_type_t type = ~0;
- u32 sclass = ~0;
- int is_add = 1;
- int rv;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "del"))
- is_add = 0;
- else if (unformat (line_input, "rd %d", &rd_id))
- ;
- else
- if (unformat
- (line_input, "prefix %U/%d", unformat_ip4_address,
- &pfx.fp_addr.ip4, &length))
- pfx.fp_proto = FIB_PROTOCOL_IP4;
- else
- if (unformat
- (line_input, "prefix %U/%d", unformat_ip6_address,
- &pfx.fp_addr.ip6, &length))
- pfx.fp_proto = FIB_PROTOCOL_IP6;
- else if (unformat (line_input, "type transport"))
- type = GBP_SUBNET_TRANSPORT;
- else if (unformat (line_input, "type stitched-internal"))
- type = GBP_SUBNET_STITCHED_INTERNAL;
- else if (unformat (line_input, "type stitched-external"))
- type = GBP_SUBNET_STITCHED_EXTERNAL;
- else if (unformat (line_input, "type anon-l3-out"))
- type = GBP_SUBNET_ANON_L3_OUT;
- else if (unformat (line_input, "type l3-out"))
- type = GBP_SUBNET_L3_OUT;
- else
- if (unformat_user
- (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
- ;
- else if (unformat (line_input, "sclass %u", &sclass))
- ;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- }
- unformat_free (line_input);
-
- pfx.fp_len = length;
-
- if (is_add)
- rv = gbp_subnet_add (rd_id, &pfx, type, sw_if_index, sclass);
- else
- rv = gbp_subnet_del (rd_id, &pfx);
-
- switch (rv)
- {
- case 0:
- return 0;
- case VNET_API_ERROR_NO_SUCH_FIB:
- return clib_error_return (0, "no such FIB");
- }
-
- return clib_error_return (0, "unknown error %d", rv);
-}
-
-/*?
- * Add Group Based Policy Subnets
- *
- * @cliexpar
- * @cliexstart{gbp subnet [del] rd <ID> prefix <prefix> type <type> [<interface>] [sclass <sclass>]}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_subnet_add_del, static) = {
- .path = "gbp subnet",
- .short_help = "gbp subnet [del] rd <ID> prefix <prefix> type <type> [<interface>] [sclass <sclass>]\n",
- .function = gbp_subnet_add_del_cli,
-};
-/* *INDENT-ON* */
-
-
-
-void
-gbp_subnet_walk (gbp_subnet_cb_t cb, void *ctx)
-{
- gbp_route_domain_t *grd;
- gbp_subnet_t *gs;
- u32 sw_if_index;
- sclass_t sclass;
-
- sclass = SCLASS_INVALID;
- sw_if_index = ~0;
-
- /* *INDENT-OFF* */
- pool_foreach (gs, gbp_subnet_pool)
- {
- grd = gbp_route_domain_get(gs->gs_rd);
-
- switch (gs->gs_type)
- {
- case GBP_SUBNET_STITCHED_INTERNAL:
- case GBP_SUBNET_TRANSPORT:
- /* use defaults above */
- break;
- case GBP_SUBNET_STITCHED_EXTERNAL:
- sw_if_index = gs->gs_stitched_external.gs_sw_if_index;
- sclass = gs->gs_stitched_external.gs_sclass;
- break;
- case GBP_SUBNET_L3_OUT:
- case GBP_SUBNET_ANON_L3_OUT:
- sclass = gs->gs_l3_out.gs_sclass;
- break;
- }
-
- if (WALK_STOP == cb (grd->grd_id, &gs->gs_key->gsk_pfx,
- gs->gs_type, sw_if_index, sclass, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-typedef enum gsb_subnet_show_flags_t_
-{
- GBP_SUBNET_SHOW_BRIEF,
- GBP_SUBNET_SHOW_DETAILS,
-} gsb_subnet_show_flags_t;
-
-static u8 *
-format_gbp_subnet_type (u8 * s, va_list * args)
-{
- gbp_subnet_type_t type = va_arg (*args, gbp_subnet_type_t);
-
- switch (type)
- {
- case GBP_SUBNET_STITCHED_INTERNAL:
- return (format (s, "stitched-internal"));
- case GBP_SUBNET_STITCHED_EXTERNAL:
- return (format (s, "stitched-external"));
- case GBP_SUBNET_TRANSPORT:
- return (format (s, "transport"));
- case GBP_SUBNET_L3_OUT:
- return (format (s, "l3-out"));
- case GBP_SUBNET_ANON_L3_OUT:
- return (format (s, "anon-l3-out"));
- }
-
- return (format (s, "unknown"));
-}
-
-u8 *
-format_gbp_subnet (u8 * s, va_list * args)
-{
- index_t gsi = va_arg (*args, index_t);
- gsb_subnet_show_flags_t flags = va_arg (*args, gsb_subnet_show_flags_t);
- gbp_subnet_t *gs;
- u32 table_id;
-
- gs = pool_elt_at_index (gbp_subnet_pool, gsi);
-
- table_id = fib_table_get_table_id (gs->gs_key->gsk_fib_index,
- gs->gs_key->gsk_pfx.fp_proto);
-
- s = format (s, "[%d] tbl:%d %U %U", gsi, table_id,
- format_fib_prefix, &gs->gs_key->gsk_pfx,
- format_gbp_subnet_type, gs->gs_type);
-
- switch (gs->gs_type)
- {
- case GBP_SUBNET_STITCHED_INTERNAL:
- case GBP_SUBNET_TRANSPORT:
- break;
- case GBP_SUBNET_STITCHED_EXTERNAL:
- s = format (s, " {sclass:%d %U}", gs->gs_stitched_external.gs_sclass,
- format_vnet_sw_if_index_name,
- vnet_get_main (), gs->gs_stitched_external.gs_sw_if_index);
- break;
- case GBP_SUBNET_L3_OUT:
- case GBP_SUBNET_ANON_L3_OUT:
- s = format (s, " {sclass:%d}", gs->gs_l3_out.gs_sclass);
- break;
- }
-
- switch (flags)
- {
- case GBP_SUBNET_SHOW_DETAILS:
- {
- s = format (s, "\n %U", format_fib_entry, gs->gs_fei,
- FIB_ENTRY_FORMAT_DETAIL);
- }
- case GBP_SUBNET_SHOW_BRIEF:
- break;
- }
- return (s);
-}
-
-static clib_error_t *
-gbp_subnet_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- u32 gsi;
-
- gsi = INDEX_INVALID;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "%d", &gsi))
- ;
- else
- break;
- }
-
- if (INDEX_INVALID != gsi)
- {
- vlib_cli_output (vm, "%U", format_gbp_subnet, gsi,
- GBP_SUBNET_SHOW_DETAILS);
- }
- else
- {
- /* *INDENT-OFF* */
- pool_foreach_index (gsi, gbp_subnet_pool)
- {
- vlib_cli_output (vm, "%U", format_gbp_subnet, gsi,
- GBP_SUBNET_SHOW_BRIEF);
- }
- /* *INDENT-ON* */
- }
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Policy Subnets
- *
- * @cliexpar
- * @cliexstart{show gbp subnet}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_subnet_show_node, static) = {
- .path = "show gbp subnet",
- .short_help = "show gbp subnet\n",
- .function = gbp_subnet_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_subnet_init (vlib_main_t * vm)
-{
- gbp_subnet_db = hash_create_mem (0,
- sizeof (gbp_subnet_key_t), sizeof (u32));
- gbp_fib_source = fib_source_allocate ("gbp-subnet",
- FIB_SOURCE_PRIORITY_HI,
- FIB_SOURCE_BH_SIMPLE);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_subnet_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_subnet.h b/src/plugins/gbp/gbp_subnet.h
deleted file mode 100644
index 6fbef01ceba..00000000000
--- a/src/plugins/gbp/gbp_subnet.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_SUBNET_H__
-#define __GBP_SUBNET_H__
-
-#include <plugins/gbp/gbp_types.h>
-
-typedef enum gbp_subnet_type_t_
-{
- GBP_SUBNET_TRANSPORT,
- GBP_SUBNET_STITCHED_INTERNAL,
- GBP_SUBNET_STITCHED_EXTERNAL,
- GBP_SUBNET_L3_OUT,
- GBP_SUBNET_ANON_L3_OUT,
-} gbp_subnet_type_t;
-
-extern int gbp_subnet_add (u32 rd_id,
- const fib_prefix_t * pfx,
- gbp_subnet_type_t type,
- u32 sw_if_index, sclass_t sclass);
-
-extern int gbp_subnet_del (u32 rd_id, const fib_prefix_t * pfx);
-
-typedef walk_rc_t (*gbp_subnet_cb_t) (u32 rd_id,
- const fib_prefix_t * pfx,
- gbp_subnet_type_t type,
- u32 sw_if_index,
- sclass_t sclass, void *ctx);
-
-extern void gbp_subnet_walk (gbp_subnet_cb_t cb, void *ctx);
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_vxlan.c b/src/plugins/gbp/gbp_vxlan.c
deleted file mode 100644
index 77e4d7ac11b..00000000000
--- a/src/plugins/gbp/gbp_vxlan.c
+++ /dev/null
@@ -1,654 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_vxlan.h>
-#include <plugins/gbp/gbp_learn.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-#include <vlibmemory/api.h>
-#include <vnet/fib/fib_table.h>
-#include <vlib/punt.h>
-
-/**
- * A reference to a VXLAN-GBP tunnel created as a child/dependent tunnel
- * of the template GBP-VXLAN tunnel
- */
-typedef struct vxlan_tunnel_ref_t_
-{
- gbp_itf_hdl_t vxr_itf;
- u32 vxr_sw_if_index;
- index_t vxr_parent;
- gbp_vxlan_tunnel_layer_t vxr_layer;
-} vxlan_tunnel_ref_t;
-
-/**
- * DB of added tunnels
- */
-uword *gv_db;
-
-/**
- * Logger
- */
-static vlib_log_class_t gt_logger;
-
-/**
- * Pool of template tunnels
- */
-static gbp_vxlan_tunnel_t *gbp_vxlan_tunnel_pool;
-
-/**
- * Pool of child tunnels
- */
-static vxlan_tunnel_ref_t *vxlan_tunnel_ref_pool;
-
-/**
- * DB of template interfaces by SW interface index
- */
-static index_t *gbp_vxlan_tunnel_db;
-
-/**
- * DB of child interfaces by SW interface index
- */
-static index_t *vxlan_tunnel_ref_db;
-
-/**
- * handle registered with the ;unt infra
- */
-static vlib_punt_hdl_t punt_hdl;
-
-static char *gbp_vxlan_tunnel_layer_strings[] = {
-#define _(n,s) [GBP_VXLAN_TUN_##n] = s,
- foreach_gbp_vxlan_tunnel_layer
-#undef _
-};
-
-#define GBP_VXLAN_TUN_DBG(...) \
- vlib_log_debug (gt_logger, __VA_ARGS__);
-
-
-gbp_vxlan_tunnel_t *
-gbp_vxlan_tunnel_get (index_t gti)
-{
- return (pool_elt_at_index (gbp_vxlan_tunnel_pool, gti));
-}
-
-static vxlan_tunnel_ref_t *
-vxlan_tunnel_ref_get (index_t vxri)
-{
- return (pool_elt_at_index (vxlan_tunnel_ref_pool, vxri));
-}
-
-static u8 *
-format_vxlan_tunnel_ref (u8 * s, va_list * args)
-{
- index_t vxri = va_arg (*args, u32);
- vxlan_tunnel_ref_t *vxr;
-
- vxr = vxlan_tunnel_ref_get (vxri);
-
- s = format (s, "[%U]", format_gbp_itf_hdl, vxr->vxr_itf);
-
- return (s);
-}
-
-static void
-gdb_vxlan_dep_del (u32 sw_if_index)
-{
- vxlan_tunnel_ref_t *vxr;
- gbp_vxlan_tunnel_t *gt;
- index_t vxri;
- u32 pos;
-
- vxr = vxlan_tunnel_ref_get (vxlan_tunnel_ref_db[sw_if_index]);
- vxri = vxr - vxlan_tunnel_ref_pool;
- gt = gbp_vxlan_tunnel_get (vxr->vxr_parent);
-
- GBP_VXLAN_TUN_DBG ("del-dep:%U", format_vxlan_tunnel_ref, vxri);
-
- vxlan_tunnel_ref_db[vxr->vxr_sw_if_index] = INDEX_INVALID;
- pos = vec_search (gt->gt_tuns, vxri);
-
- ASSERT (~0 != pos);
- vec_del1 (gt->gt_tuns, pos);
-
- vnet_vxlan_gbp_tunnel_del (vxr->vxr_sw_if_index);
-
- pool_put (vxlan_tunnel_ref_pool, vxr);
-}
-
-static gbp_itf_hdl_t
-gdb_vxlan_dep_add (gbp_vxlan_tunnel_t * gt,
- const ip46_address_t * src, const ip46_address_t * dst)
-{
- vnet_vxlan_gbp_tunnel_add_del_args_t args = {
- .is_add = 1,
- .is_ip6 = !ip46_address_is_ip4 (src),
- .vni = gt->gt_vni,
- .src = *src,
- .dst = *dst,
- .instance = ~0,
- .mode = (GBP_VXLAN_TUN_L2 == gt->gt_layer ?
- VXLAN_GBP_TUNNEL_MODE_L2 : VXLAN_GBP_TUNNEL_MODE_L3),
- };
- vxlan_tunnel_ref_t *vxr;
- u32 sw_if_index;
- index_t vxri;
- int rv;
-
- sw_if_index = ~0;
- rv = vnet_vxlan_gbp_tunnel_add_del (&args, &sw_if_index);
-
- if (VNET_API_ERROR_TUNNEL_EXIST == rv)
- {
- vxri = vxlan_tunnel_ref_db[sw_if_index];
-
- vxr = vxlan_tunnel_ref_get (vxri);
- gbp_itf_lock (vxr->vxr_itf);
- }
- else if (0 == rv)
- {
- ASSERT (~0 != sw_if_index);
- GBP_VXLAN_TUN_DBG ("add-dep:%U %U %U %d", format_vnet_sw_if_index_name,
- vnet_get_main (), sw_if_index,
- format_ip46_address, src, IP46_TYPE_ANY,
- format_ip46_address, dst, IP46_TYPE_ANY, gt->gt_vni);
-
- pool_get_zero (vxlan_tunnel_ref_pool, vxr);
-
- vxri = (vxr - vxlan_tunnel_ref_pool);
- vxr->vxr_parent = gt - gbp_vxlan_tunnel_pool;
- vxr->vxr_sw_if_index = sw_if_index;
- vxr->vxr_layer = gt->gt_layer;
-
- /*
- * store the child both on the parent's list and the global DB
- */
- vec_add1 (gt->gt_tuns, vxri);
-
- vec_validate_init_empty (vxlan_tunnel_ref_db,
- vxr->vxr_sw_if_index, INDEX_INVALID);
- vxlan_tunnel_ref_db[vxr->vxr_sw_if_index] = vxri;
-
- if (GBP_VXLAN_TUN_L2 == vxr->vxr_layer)
- {
- l2output_feat_masks_t ofeat;
- l2input_feat_masks_t ifeat;
- gbp_bridge_domain_t *gbd;
-
- gbd = gbp_bridge_domain_get (gt->gt_gbd);
- vxr->vxr_itf = gbp_itf_l2_add_and_lock_w_free
- (vxr->vxr_sw_if_index, gt->gt_gbd, gdb_vxlan_dep_del);
-
- ofeat = L2OUTPUT_FEAT_GBP_POLICY_MAC;
- ifeat = L2INPUT_FEAT_NONE;
-
- if (!(gbd->gb_flags & GBP_BD_FLAG_DO_NOT_LEARN))
- ifeat |= L2INPUT_FEAT_GBP_LEARN;
-
- gbp_itf_l2_set_output_feature (vxr->vxr_itf, ofeat);
- gbp_itf_l2_set_input_feature (vxr->vxr_itf, ifeat);
- }
- else
- {
- vxr->vxr_itf = gbp_itf_l3_add_and_lock_w_free
- (vxr->vxr_sw_if_index, gt->gt_grd, gdb_vxlan_dep_del);
-
- gbp_itf_l3_set_input_feature (vxr->vxr_itf, GBP_ITF_L3_FEAT_LEARN);
- }
- }
- else
- {
- return (GBP_ITF_HDL_INVALID);
- }
-
- return (vxr->vxr_itf);
-}
-
-u32
-vxlan_gbp_tunnel_get_parent (u32 sw_if_index)
-{
- ASSERT ((sw_if_index < vec_len (vxlan_tunnel_ref_db)) &&
- (INDEX_INVALID != vxlan_tunnel_ref_db[sw_if_index]));
-
- gbp_vxlan_tunnel_t *gt;
- vxlan_tunnel_ref_t *vxr;
-
- vxr = vxlan_tunnel_ref_get (vxlan_tunnel_ref_db[sw_if_index]);
- gt = gbp_vxlan_tunnel_get (vxr->vxr_parent);
-
- return (gt->gt_sw_if_index);
-}
-
-gbp_itf_hdl_t
-vxlan_gbp_tunnel_lock_itf (u32 sw_if_index)
-{
- ASSERT ((sw_if_index < vec_len (vxlan_tunnel_ref_db)) &&
- (INDEX_INVALID != vxlan_tunnel_ref_db[sw_if_index]));
-
- vxlan_tunnel_ref_t *vxr;
-
- vxr = vxlan_tunnel_ref_get (vxlan_tunnel_ref_db[sw_if_index]);
-
- gbp_itf_lock (vxr->vxr_itf);
-
- return (vxr->vxr_itf);
-}
-
-
-gbp_vxlan_tunnel_type_t
-gbp_vxlan_tunnel_get_type (u32 sw_if_index)
-{
- if (sw_if_index < vec_len (vxlan_tunnel_ref_db) &&
- INDEX_INVALID != vxlan_tunnel_ref_db[sw_if_index])
- {
- return (VXLAN_GBP_TUNNEL);
- }
- else if (sw_if_index < vec_len (gbp_vxlan_tunnel_db) &&
- INDEX_INVALID != gbp_vxlan_tunnel_db[sw_if_index])
- {
- return (GBP_VXLAN_TEMPLATE_TUNNEL);
- }
-
- ASSERT (0);
- return (GBP_VXLAN_TEMPLATE_TUNNEL);
-}
-
-gbp_itf_hdl_t
-gbp_vxlan_tunnel_clone_and_lock (u32 sw_if_index,
- const ip46_address_t * src,
- const ip46_address_t * dst)
-{
- gbp_vxlan_tunnel_t *gt;
- index_t gti;
-
- gti = gbp_vxlan_tunnel_db[sw_if_index];
-
- if (INDEX_INVALID == gti)
- return (GBP_ITF_HDL_INVALID);
-
- gt = pool_elt_at_index (gbp_vxlan_tunnel_pool, gti);
-
- return (gdb_vxlan_dep_add (gt, src, dst));
-}
-
-void
-vxlan_gbp_tunnel_unlock (u32 sw_if_index)
-{
- /* vxlan_tunnel_ref_t *vxr; */
- /* index_t vxri; */
-
- /* vxri = vxlan_tunnel_ref_db[sw_if_index]; */
-
- /* ASSERT (vxri != INDEX_INVALID); */
-
- /* vxr = vxlan_tunnel_ref_get (vxri); */
-
- /* gdb_vxlan_dep_del (vxri); */
-}
-
-void
-gbp_vxlan_walk (gbp_vxlan_cb_t cb, void *ctx)
-{
- gbp_vxlan_tunnel_t *gt;
-
- /* *INDENT-OFF* */
- pool_foreach (gt, gbp_vxlan_tunnel_pool)
- {
- if (WALK_CONTINUE != cb(gt, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static walk_rc_t
-gbp_vxlan_tunnel_show_one (gbp_vxlan_tunnel_t * gt, void *ctx)
-{
- vlib_cli_output (ctx, "%U", format_gbp_vxlan_tunnel,
- gt - gbp_vxlan_tunnel_pool);
-
- return (WALK_CONTINUE);
-}
-
-static u8 *
-format_gbp_vxlan_tunnel_name (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
-
- return format (s, "gbp-vxlan-%d", dev_instance);
-}
-
-u8 *
-format_gbp_vxlan_tunnel_layer (u8 * s, va_list * args)
-{
- gbp_vxlan_tunnel_layer_t gl = va_arg (*args, gbp_vxlan_tunnel_layer_t);
- s = format (s, "%s", gbp_vxlan_tunnel_layer_strings[gl]);
-
- return (s);
-}
-
-u8 *
-format_gbp_vxlan_tunnel (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- CLIB_UNUSED (int verbose) = va_arg (*args, int);
- gbp_vxlan_tunnel_t *gt = gbp_vxlan_tunnel_get (dev_instance);
- index_t *vxri;
-
- s = format (s, " [%d] gbp-vxlan-tunnel: hw:%d sw:%d vni:%d %U",
- dev_instance, gt->gt_hw_if_index,
- gt->gt_sw_if_index, gt->gt_vni,
- format_gbp_vxlan_tunnel_layer, gt->gt_layer);
- if (GBP_VXLAN_TUN_L2 == gt->gt_layer)
- s = format (s, " BD:%d gbd-index:%d", gt->gt_bd_rd_id, gt->gt_gbd);
- else
- s = format (s, " RD:%d grd-index:%d", gt->gt_bd_rd_id, gt->gt_grd);
-
- s = format (s, " dependents:");
- vec_foreach (vxri, gt->gt_tuns)
- {
- s = format (s, "\n %U, ", format_vxlan_tunnel_ref, *vxri);
- }
-
- return s;
-}
-
-typedef struct gbp_vxlan_tx_trace_t_
-{
- u32 vni;
-} gbp_vxlan_tx_trace_t;
-
-u8 *
-format_gbp_vxlan_tx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_vxlan_tx_trace_t *t = va_arg (*args, gbp_vxlan_tx_trace_t *);
-
- s = format (s, "GBP-VXLAN: vni:%d", t->vni);
-
- return (s);
-}
-
-clib_error_t *
-gbp_vxlan_interface_admin_up_down (vnet_main_t * vnm,
- u32 hw_if_index, u32 flags)
-{
- vnet_hw_interface_t *hi;
- u32 ti;
-
- hi = vnet_get_hw_interface (vnm, hw_if_index);
-
- if (NULL == gbp_vxlan_tunnel_db ||
- hi->sw_if_index >= vec_len (gbp_vxlan_tunnel_db))
- return (NULL);
-
- ti = gbp_vxlan_tunnel_db[hi->sw_if_index];
-
- if (~0 == ti)
- /* not one of ours */
- return (NULL);
-
- if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
- vnet_hw_interface_set_flags (vnm, hw_if_index,
- VNET_HW_INTERFACE_FLAG_LINK_UP);
- else
- vnet_hw_interface_set_flags (vnm, hw_if_index, 0);
-
- return (NULL);
-}
-
-static uword
-gbp_vxlan_interface_tx (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
- clib_warning ("you shouldn't be here, leaking buffers...");
- return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VNET_DEVICE_CLASS (gbp_vxlan_device_class) = {
- .name = "GBP VXLAN tunnel-template",
- .format_device_name = format_gbp_vxlan_tunnel_name,
- .format_device = format_gbp_vxlan_tunnel,
- .format_tx_trace = format_gbp_vxlan_tx_trace,
- .admin_up_down_function = gbp_vxlan_interface_admin_up_down,
- .tx_function = gbp_vxlan_interface_tx,
-};
-
-VNET_HW_INTERFACE_CLASS (gbp_vxlan_hw_interface_class) = {
- .name = "GBP-VXLAN",
- .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
-};
-/* *INDENT-ON* */
-
-int
-gbp_vxlan_tunnel_add (u32 vni, gbp_vxlan_tunnel_layer_t layer,
- u32 bd_rd_id,
- const ip4_address_t * src, u32 * sw_if_indexp)
-{
- gbp_vxlan_tunnel_t *gt;
- index_t gti;
- uword *p;
- int rv;
-
- rv = 0;
- p = hash_get (gv_db, vni);
-
- GBP_VXLAN_TUN_DBG ("add: %d %d %d", vni, layer, bd_rd_id);
-
- if (NULL == p)
- {
- vnet_sw_interface_t *si;
- vnet_hw_interface_t *hi;
- index_t gbi, grdi;
- vnet_main_t *vnm;
-
- gbi = grdi = INDEX_INVALID;
-
- if (layer == GBP_VXLAN_TUN_L2)
- {
- gbi = gbp_bridge_domain_find_and_lock (bd_rd_id);
-
- if (INDEX_INVALID == gbi)
- {
- return (VNET_API_ERROR_BD_NOT_MODIFIABLE);
- }
- }
- else
- {
- grdi = gbp_route_domain_find_and_lock (bd_rd_id);
-
- if (INDEX_INVALID == grdi)
- {
- return (VNET_API_ERROR_NO_SUCH_FIB);
- }
- }
-
- vnm = vnet_get_main ();
- pool_get (gbp_vxlan_tunnel_pool, gt);
- gti = gt - gbp_vxlan_tunnel_pool;
-
- gt->gt_vni = vni;
- gt->gt_layer = layer;
- gt->gt_bd_rd_id = bd_rd_id;
- gt->gt_src.ip4.as_u32 = src->as_u32;
- gt->gt_hw_if_index = vnet_register_interface (vnm,
- gbp_vxlan_device_class.index,
- gti,
- gbp_vxlan_hw_interface_class.index,
- gti);
-
- hi = vnet_get_hw_interface (vnm, gt->gt_hw_if_index);
-
- gt->gt_sw_if_index = hi->sw_if_index;
-
- /* don't flood packets in a BD to these interfaces */
- si = vnet_get_sw_interface (vnm, gt->gt_sw_if_index);
- si->flood_class = VNET_FLOOD_CLASS_NO_FLOOD;
-
- if (layer == GBP_VXLAN_TUN_L2)
- {
- gbp_bridge_domain_t *gb;
-
- gb = gbp_bridge_domain_get (gbi);
-
- gt->gt_gbd = gbi;
- gb->gb_vni = gti;
- /* set it up as a GBP interface */
- gt->gt_itf = gbp_itf_l2_add_and_lock (gt->gt_sw_if_index,
- gt->gt_gbd);
- gbp_itf_l2_set_input_feature (gt->gt_itf, L2INPUT_FEAT_GBP_LEARN);
- }
- else
- {
- gt->gt_grd = grdi;
- gt->gt_itf = gbp_itf_l3_add_and_lock (gt->gt_sw_if_index,
- gt->gt_grd);
- gbp_itf_l3_set_input_feature (gt->gt_itf, GBP_ITF_L3_FEAT_LEARN);
- }
-
- /*
- * save the tunnel by VNI and by sw_if_index
- */
- hash_set (gv_db, vni, gti);
-
- vec_validate_init_empty (gbp_vxlan_tunnel_db,
- gt->gt_sw_if_index, INDEX_INVALID);
- gbp_vxlan_tunnel_db[gt->gt_sw_if_index] = gti;
-
- if (sw_if_indexp)
- *sw_if_indexp = gt->gt_sw_if_index;
-
- vxlan_gbp_register_udp_ports ();
- }
- else
- {
- gti = p[0];
- rv = VNET_API_ERROR_IF_ALREADY_EXISTS;
- }
-
- GBP_VXLAN_TUN_DBG ("add: %U", format_gbp_vxlan_tunnel, gti);
-
- return (rv);
-}
-
-int
-gbp_vxlan_tunnel_del (u32 vni)
-{
- gbp_vxlan_tunnel_t *gt;
- uword *p;
-
- p = hash_get (gv_db, vni);
-
- if (NULL != p)
- {
- vnet_main_t *vnm;
-
- vnm = vnet_get_main ();
- gt = gbp_vxlan_tunnel_get (p[0]);
-
- vxlan_gbp_unregister_udp_ports ();
-
- GBP_VXLAN_TUN_DBG ("del: %U", format_gbp_vxlan_tunnel,
- gt - gbp_vxlan_tunnel_pool);
-
- gbp_endpoint_flush (GBP_ENDPOINT_SRC_DP, gt->gt_sw_if_index);
- ASSERT (0 == vec_len (gt->gt_tuns));
- vec_free (gt->gt_tuns);
-
- gbp_itf_unlock (&gt->gt_itf);
-
- if (GBP_VXLAN_TUN_L2 == gt->gt_layer)
- {
- gbp_bridge_domain_unlock (gt->gt_gbd);
- }
- else
- {
- gbp_route_domain_unlock (gt->gt_grd);
- }
-
- vnet_sw_interface_set_flags (vnm, gt->gt_sw_if_index, 0);
- vnet_delete_hw_interface (vnm, gt->gt_hw_if_index);
-
- hash_unset (gv_db, vni);
- gbp_vxlan_tunnel_db[gt->gt_sw_if_index] = INDEX_INVALID;
-
- pool_put (gbp_vxlan_tunnel_pool, gt);
- }
- else
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
- return (0);
-}
-
-static clib_error_t *
-gbp_vxlan_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
-
- vlib_cli_output (vm, "GBP-VXLAN Interfaces:");
-
- gbp_vxlan_walk (gbp_vxlan_tunnel_show_one, vm);
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Policy VXLAN tunnels
- *
- * @cliexpar
- * @cliexstart{show gbp vxlan}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_vxlan_show_node, static) = {
- .path = "show gbp vxlan",
- .short_help = "show gbp vxlan\n",
- .function = gbp_vxlan_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_vxlan_init (vlib_main_t * vm)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
-
- gt_logger = vlib_log_register_class ("gbp", "tun");
-
- punt_hdl = vlib_punt_client_register ("gbp-vxlan");
-
- vlib_punt_register (punt_hdl,
- vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP4],
- "gbp-vxlan4");
-
- return (0);
-}
-
-/* *INDENT-OFF* */
-VLIB_INIT_FUNCTION (gbp_vxlan_init) =
-{
- .runs_after = VLIB_INITS("punt_init", "vxlan_gbp_init"),
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_vxlan.h b/src/plugins/gbp/gbp_vxlan.h
deleted file mode 100644
index 706fe2a0e85..00000000000
--- a/src/plugins/gbp/gbp_vxlan.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_VXLAN_H__
-#define __GBP_VXLAN_H__
-
-#include <vnet/fib/fib_types.h>
-#include <plugins/gbp/gbp_itf.h>
-
-#define foreach_gbp_vxlan_tunnel_layer \
- _ (L2, "l2") \
- _ (L3, "l3")
-
-typedef enum gbp_vxlan_tunnel_layer_t_
-{
-#define _(s,n) GBP_VXLAN_TUN_##s,
- foreach_gbp_vxlan_tunnel_layer
-#undef _
-} gbp_vxlan_tunnel_layer_t;
-
-/**
- * GBP VXLAN (template) tunnel.
- * A template tunnel has only a VNI, it does not have src,dst address.
- * As such it cannot be used to send traffic. It is used in the RX path
- * to RX vxlan-gbp packets that do not match an existing tunnel;
- */
-typedef struct gbp_vxlan_tunnel_t_
-{
- u32 gt_hw_if_index;
- u32 gt_sw_if_index;
- u32 gt_vni;
-
- /**
- * The BD or RD value (depending on the layer) that the tunnel is bound to
- */
- u32 gt_bd_rd_id;
- gbp_vxlan_tunnel_layer_t gt_layer;
-
- union
- {
- struct
- {
- /**
- * Reference to the GPB-BD
- */
- index_t gt_gbd;
- };
- struct
- {
- /**
- * References to the GBP-RD
- */
- index_t gt_grd;
- };
- };
-
- /**
- * gbp-itf config for this interface
- */
- gbp_itf_hdl_t gt_itf;
-
- /**
- * list of child vxlan-gbp tunnels built from this template
- */
- index_t *gt_tuns;
-
- /**
- * The source address to use for child tunnels
- */
- ip46_address_t gt_src;
-} gbp_vxlan_tunnel_t;
-
-/**
- * The different types of interfaces that endpoints are learned on
- */
-typedef enum gbp_vxlan_tunnel_type_t_
-{
- /**
- * This is the object type defined above.
- * A template representation of a vxlan-gbp tunnel. from this tunnel
- * type, real vxlan-gbp tunnels are created (by cloning the VNI)
- */
- GBP_VXLAN_TEMPLATE_TUNNEL,
-
- /**
- * A real VXLAN-GBP tunnel (from vnet/vxlan-gbp/...)
- */
- VXLAN_GBP_TUNNEL,
-} gbp_vxlan_tunnel_type_t;
-
-extern int gbp_vxlan_tunnel_add (u32 vni, gbp_vxlan_tunnel_layer_t layer,
- u32 bd_rd_id,
- const ip4_address_t * src,
- u32 * sw_if_indexp);
-extern int gbp_vxlan_tunnel_del (u32 vni);
-
-extern gbp_vxlan_tunnel_type_t gbp_vxlan_tunnel_get_type (u32 sw_if_index);
-
-extern gbp_itf_hdl_t gbp_vxlan_tunnel_clone_and_lock (u32 parent_tunnel,
- const ip46_address_t *
- src,
- const ip46_address_t *
- dst);
-
-extern u32 vxlan_gbp_tunnel_get_parent (u32 sw_if_index);
-extern gbp_itf_hdl_t vxlan_gbp_tunnel_lock_itf (u32 sw_if_index);
-
-typedef walk_rc_t (*gbp_vxlan_cb_t) (gbp_vxlan_tunnel_t * gt, void *ctx);
-extern void gbp_vxlan_walk (gbp_vxlan_cb_t cb, void *ctx);
-
-extern u8 *format_gbp_vxlan_tunnel (u8 * s, va_list * args);
-extern u8 *format_gbp_vxlan_tunnel_layer (u8 * s, va_list * args);
-
-extern gbp_vxlan_tunnel_t *gbp_vxlan_tunnel_get (index_t gti);
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_vxlan_node.c b/src/plugins/gbp/gbp_vxlan_node.c
deleted file mode 100644
index 413a9f47e1b..00000000000
--- a/src/plugins/gbp/gbp_vxlan_node.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Copyright (c) 2019 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <plugins/gbp/gbp_vxlan.h>
-#include <plugins/gbp/gbp_itf.h>
-#include <plugins/gbp/gbp_learn.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-#include <vlibmemory/api.h>
-#include <vnet/fib/fib_table.h>
-
-extern uword *gv_db;
-
-typedef struct gbp_vxlan_trace_t_
-{
- u8 dropped;
- u32 vni;
- u32 sw_if_index;
- u16 sclass;
- u8 flags;
-} gbp_vxlan_trace_t;
-
-#define foreach_gbp_vxlan_input_next \
- _(DROP, "error-drop") \
- _(L2_INPUT, "l2-input") \
- _(IP4_INPUT, "ip4-input") \
- _(IP6_INPUT, "ip6-input")
-
-typedef enum
-{
-#define _(s,n) GBP_VXLAN_INPUT_NEXT_##s,
- foreach_gbp_vxlan_input_next
-#undef _
- GBP_VXLAN_INPUT_N_NEXT,
-} gbp_vxlan_input_next_t;
-
-
-#define foreach_gbp_vxlan_error \
- _(DECAPPED, "decapped") \
- _(LEARNED, "learned")
-
-typedef enum
-{
-#define _(s,n) GBP_VXLAN_ERROR_##s,
- foreach_gbp_vxlan_error
-#undef _
- GBP_VXLAN_N_ERROR,
-} gbp_vxlan_input_error_t;
-
-static char *gbp_vxlan_error_strings[] = {
-#define _(n,s) s,
- foreach_gbp_vxlan_error
-#undef _
-};
-
-static uword
-gbp_vxlan_decap (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, u8 is_ip4)
-{
- u32 n_left_to_next, n_left_from, next_index, *to_next, *from;
-
- next_index = 0;
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
-
- while (n_left_from > 0)
- {
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vxlan_gbp_header_t *vxlan_gbp0;
- gbp_vxlan_input_next_t next0;
- gbp_vxlan_tunnel_t *gt0;
- vlib_buffer_t *b0;
- u32 bi0, vni0;
- uword *p;
-
- bi0 = to_next[0] = from[0];
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
- next0 = GBP_VXLAN_INPUT_NEXT_DROP;
-
- b0 = vlib_get_buffer (vm, bi0);
- vxlan_gbp0 =
- vlib_buffer_get_current (b0) - sizeof (vxlan_gbp_header_t);
-
- vni0 = vxlan_gbp_get_vni (vxlan_gbp0);
- p = hash_get (gv_db, vni0);
-
- if (PREDICT_FALSE (NULL == p))
- {
- gt0 = NULL;
- next0 = GBP_VXLAN_INPUT_NEXT_DROP;
- }
- else
- {
- gt0 = gbp_vxlan_tunnel_get (p[0]);
-
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = gt0->gt_sw_if_index;
-
- if (GBP_VXLAN_TUN_L2 == gt0->gt_layer)
- /*
- * An L2 layer tunnel goes into the BD
- */
- next0 = GBP_VXLAN_INPUT_NEXT_L2_INPUT;
- else
- {
- /*
- * An L3 layer tunnel needs to strip the L2 header
- * an inject into the RD
- */
- ethernet_header_t *e0;
- u16 type0;
-
- e0 = vlib_buffer_get_current (b0);
- type0 = clib_net_to_host_u16 (e0->type);
- switch (type0)
- {
- case ETHERNET_TYPE_IP4:
- next0 = GBP_VXLAN_INPUT_NEXT_IP4_INPUT;
- break;
- case ETHERNET_TYPE_IP6:
- next0 = GBP_VXLAN_INPUT_NEXT_IP6_INPUT;
- break;
- default:
- goto trace;
- }
- vlib_buffer_advance (b0, sizeof (*e0));
- }
- }
-
- trace:
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- gbp_vxlan_trace_t *tr;
-
- tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->dropped = (next0 == GBP_VXLAN_INPUT_NEXT_DROP);
- tr->vni = vni0;
- tr->sw_if_index = (gt0 ? gt0->gt_sw_if_index : ~0);
- tr->flags = vxlan_gbp_get_gpflags (vxlan_gbp0);
- tr->sclass = vxlan_gbp_get_sclass (vxlan_gbp0);
- }
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return from_frame->n_vectors;
-}
-
-VLIB_NODE_FN (gbp_vxlan4_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return gbp_vxlan_decap (vm, node, from_frame, 1);
-}
-
-static u8 *
-format_gbp_vxlan_rx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_vxlan_trace_t *t = va_arg (*args, gbp_vxlan_trace_t *);
-
- s = format (s, "vni:%d dropped:%d rx:%d sclass:%d flags:%U",
- t->vni, t->dropped, t->sw_if_index,
- t->sclass, format_vxlan_gbp_header_gpflags, t->flags);
-
- return (s);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_vxlan4_input_node) =
-{
- .name = "gbp-vxlan4",
- .vector_size = sizeof (u32),
- .n_errors = GBP_VXLAN_N_ERROR,
- .error_strings = gbp_vxlan_error_strings,
- .n_next_nodes = GBP_VXLAN_INPUT_N_NEXT,
- .format_trace = format_gbp_vxlan_rx_trace,
- .next_nodes = {
-#define _(s,n) [GBP_VXLAN_INPUT_NEXT_##s] = n,
- foreach_gbp_vxlan_input_next
-#undef _
- },
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/geneve/decap.c b/src/plugins/geneve/decap.c
index bd189913f71..c64121e2829 100644
--- a/src/plugins/geneve/decap.c
+++ b/src/plugins/geneve/decap.c
@@ -812,7 +812,6 @@ static char *geneve_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (geneve4_input_node) = {
.name = "geneve4-input",
/* Takes a vector of packets. */
@@ -847,7 +846,6 @@ VLIB_REGISTER_NODE (geneve6_input_node) = {
.format_trace = format_geneve_rx_trace,
// $$$$ .unformat_buffer = unformat_geneve_header,
};
-/* *INDENT-ON* */
typedef enum
{
@@ -1252,7 +1250,6 @@ VLIB_NODE_FN (ip4_geneve_bypass_node) (vlib_main_t * vm,
return ip_geneve_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_geneve_bypass_node) =
{
.name = "ip4-geneve-bypass",
@@ -1265,7 +1262,6 @@ VLIB_REGISTER_NODE (ip4_geneve_bypass_node) =
.format_buffer = format_ip4_header,
.format_trace = format_ip4_forward_next_trace,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_geneve_bypass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1274,7 +1270,6 @@ VLIB_NODE_FN (ip6_geneve_bypass_node) (vlib_main_t * vm,
return ip_geneve_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_geneve_bypass_node) =
{
.name = "ip6-geneve-bypass",
@@ -1288,7 +1283,6 @@ VLIB_REGISTER_NODE (ip6_geneve_bypass_node) =
.format_buffer = format_ip6_header,
.format_trace = format_ip6_forward_next_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/geneve/encap.c b/src/plugins/geneve/encap.c
index 6a84d0c5490..609da2218cf 100644
--- a/src/plugins/geneve/encap.c
+++ b/src/plugins/geneve/encap.c
@@ -544,7 +544,6 @@ VLIB_NODE_FN (geneve6_encap_node) (vlib_main_t * vm,
return geneve_encap_inline (vm, node, from_frame, /* is_ip4 */ 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (geneve4_encap_node) = {
.name = "geneve4-encap",
.vector_size = sizeof (u32),
@@ -570,7 +569,6 @@ VLIB_REGISTER_NODE (geneve6_encap_node) = {
[GENEVE_ENCAP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/geneve/geneve.c b/src/plugins/geneve/geneve.c
index 62502ef3fde..37b83d01761 100644
--- a/src/plugins/geneve/geneve.c
+++ b/src/plugins/geneve/geneve.c
@@ -114,7 +114,6 @@ geneve_mac_change (vnet_hw_interface_t * hi,
return (NULL);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (geneve_device_class, static) = {
.name = "GENEVE",
.format_device_name = format_geneve_name,
@@ -122,7 +121,6 @@ VNET_DEVICE_CLASS (geneve_device_class, static) = {
.admin_up_down_function = geneve_interface_admin_up_down,
.mac_addr_change_function = geneve_mac_change,
};
-/* *INDENT-ON* */
static u8 *
format_geneve_header_with_length (u8 * s, va_list * args)
@@ -132,13 +130,11 @@ format_geneve_header_with_length (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (geneve_hw_class) = {
.name = "GENEVE",
.format_header = format_geneve_header_with_length,
.build_rewrite = default_build_rewrite,
};
-/* *INDENT-ON* */
static void
geneve_tunnel_restack_dpo (geneve_tunnel_t * t)
@@ -425,18 +421,15 @@ int vnet_geneve_add_del_tunnel
vnet_hw_interface_t *hi;
if (a->l3_mode)
{
+ vnet_eth_interface_registration_t eir = {};
u32 t_idx = t - vxm->tunnels;
u8 address[6] =
{ 0xd0, 0x0b, 0xee, 0xd0, (u8) (t_idx >> 8), (u8) t_idx };
- clib_error_t *error =
- ethernet_register_interface (vnm, geneve_device_class.index,
- t_idx,
- address, &hw_if_index, 0);
- if (error)
- {
- clib_error_report (error);
- return VNET_API_ERROR_INVALID_REGISTRATION;
- }
+
+ eir.dev_class_index = geneve_device_class.index;
+ eir.dev_instance = t_idx;
+ eir.address = address;
+ hw_if_index = vnet_eth_register_interface (vnm, &eir);
}
else
{
@@ -473,7 +466,8 @@ int vnet_geneve_add_del_tunnel
fib_prefix_t tun_remote_pfx;
vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
- fib_prefix_from_ip46_addr (&t->remote, &tun_remote_pfx);
+ fib_protocol_t fp = fib_ip_proto (is_ip6);
+ fib_prefix_from_ip46_addr (fp, &t->remote, &tun_remote_pfx);
if (!ip46_address_is_multicast (&t->remote))
{
/* Unicast tunnel -
@@ -497,8 +491,6 @@ int vnet_geneve_add_del_tunnel
* with different VNIs, create the output fib adjecency only if
* it does not already exist
*/
- fib_protocol_t fp = fib_ip_proto (is_ip6);
-
if (vtep_addr_ref (&vxm->vtep_table,
t->encap_fib_index, &t->remote) == 1)
{
@@ -524,15 +516,16 @@ int vnet_geneve_add_del_tunnel
* - the forwarding interface is for-us
* - the accepting interface is that from the API
*/
- mfib_table_entry_path_update (t->encap_fib_index,
- &mpfx, MFIB_SOURCE_GENEVE, &path);
+ mfib_table_entry_path_update (t->encap_fib_index, &mpfx,
+ MFIB_SOURCE_GENEVE,
+ MFIB_ENTRY_FLAG_NONE, &path);
path.frp_sw_if_index = a->mcast_sw_if_index;
path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
path.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
- mfei = mfib_table_entry_path_update (t->encap_fib_index,
- &mpfx,
- MFIB_SOURCE_GENEVE, &path);
+ mfei = mfib_table_entry_path_update (
+ t->encap_fib_index, &mpfx, MFIB_SOURCE_GENEVE,
+ MFIB_ENTRY_FLAG_NONE, &path);
/*
* Create the mcast adjacency to send traffic to the group
@@ -881,7 +874,6 @@ done:
* Example of how to delete a GENEVE Tunnel:
* @cliexcmd{create geneve tunnel local 10.0.3.1 remote 10.0.3.3 vni 13 del}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_geneve_tunnel_command, static) = {
.path = "create geneve tunnel",
.short_help =
@@ -890,7 +882,6 @@ VLIB_CLI_COMMAND (create_geneve_tunnel_command, static) = {
" [encap-vrf-id <nn>] [decap-next [l2|node <name>]] [l3-mode] [del]",
.function = geneve_add_del_tunnel_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_geneve_tunnel_command_fn (vlib_main_t * vm,
@@ -920,13 +911,11 @@ show_geneve_tunnel_command_fn (vlib_main_t * vm,
* [0] local 10.0.3.1 remote 10.0.3.3 vni 13 encap_fib_index 0 sw_if_index 5 decap_next l2
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_geneve_tunnel_command, static) = {
.path = "show geneve tunnel",
.short_help = "show geneve tunnel",
.function = show_geneve_tunnel_command_fn,
};
-/* *INDENT-ON* */
void
@@ -995,7 +984,7 @@ set_ip4_geneve_bypass (vlib_main_t * vm,
/*?
* This command adds the 'ip4-geneve-bypass' graph node for a given interface.
* By adding the IPv4 geneve-bypass graph node to an interface, the node checks
- * for and validate input geneve packet and bypass ip4-lookup, ip4-local,
+ * for and validate input geneve packet and bypass ip4-lookup, ip4-local,
* ip4-udp-lookup nodes to speedup geneve packet forwarding. This node will
* cause extra overhead to for non-geneve packets which is kept at a minimum.
*
@@ -1014,13 +1003,13 @@ set_ip4_geneve_bypass (vlib_main_t * vm,
*
* Example of graph node after ip4-geneve-bypass is enabled:
* @cliexstart{show vlib graph ip4-geneve-bypass}
- * Name Next Previous
- * ip4-geneve-bypass error-drop [0] ip4-input
- * geneve4-input [1] ip4-input-no-checksum
- * ip4-lookup [2]
+ * Name Next Previous
+ * ip4-geneve-bypass error-drop [0] ip4-input
+ * geneve4-input [1] ip4-input-no-checksum
+ * ip4-lookup [2]
* @cliexend
*
- * Example of how to display the feature enabed on an interface:
+ * Example of how to display the feature enabled on an interface:
* @cliexstart{show ip interface features GigabitEthernet2/0/0}
* IP feature paths configured on GigabitEthernet2/0/0...
* ...
@@ -1034,13 +1023,11 @@ set_ip4_geneve_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip geneve-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_geneve_bypass_command, static) = {
.path = "set interface ip geneve-bypass",
.function = set_ip4_geneve_bypass,
.short_help = "set interface ip geneve-bypass <interface> [del]",
};
-/* *INDENT-ON* */
static clib_error_t *
set_ip6_geneve_bypass (vlib_main_t * vm,
@@ -1052,7 +1039,7 @@ set_ip6_geneve_bypass (vlib_main_t * vm,
/*?
* This command adds the 'ip6-geneve-bypass' graph node for a given interface.
* By adding the IPv6 geneve-bypass graph node to an interface, the node checks
- * for and validate input geneve packet and bypass ip6-lookup, ip6-local,
+ * for and validate input geneve packet and bypass ip6-lookup, ip6-local,
* ip6-udp-lookup nodes to speedup geneve packet forwarding. This node will
* cause extra overhead to for non-geneve packets which is kept at a minimum.
*
@@ -1071,13 +1058,13 @@ set_ip6_geneve_bypass (vlib_main_t * vm,
*
* Example of graph node after ip6-geneve-bypass is enabled:
* @cliexstart{show vlib graph ip6-geneve-bypass}
- * Name Next Previous
- * ip6-geneve-bypass error-drop [0] ip6-input
- * geneve6-input [1] ip4-input-no-checksum
- * ip6-lookup [2]
+ * Name Next Previous
+ * ip6-geneve-bypass error-drop [0] ip6-input
+ * geneve6-input [1] ip4-input-no-checksum
+ * ip6-lookup [2]
* @cliexend
*
- * Example of how to display the feature enabed on an interface:
+ * Example of how to display the feature enabled on an interface:
* @cliexstart{show ip interface features GigabitEthernet2/0/0}
* IP feature paths configured on GigabitEthernet2/0/0...
* ...
@@ -1091,13 +1078,11 @@ set_ip6_geneve_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip6 geneve-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip6_geneve_bypass_command, static) = {
.path = "set interface ip6 geneve-bypass",
.function = set_ip6_geneve_bypass,
.short_help = "set interface ip6 geneve-bypass <interface> [del]",
};
-/* *INDENT-ON* */
clib_error_t *
geneve_init (vlib_main_t * vm)
diff --git a/src/plugins/geneve/geneve.h b/src/plugins/geneve/geneve.h
index 0cc14214b9b..deb51abe126 100644
--- a/src/plugins/geneve/geneve.h
+++ b/src/plugins/geneve/geneve.h
@@ -187,9 +187,7 @@ typedef struct
u16 msg_id_base;
/* cache for last 8 geneve tunnel */
-#ifdef CLIB_HAVE_VEC512
vtep4_cache_t vtep4_u512;
-#endif
} geneve_main_t;
diff --git a/src/plugins/geneve/geneve_api.c b/src/plugins/geneve/geneve_api.c
index d35a1bf5dbc..120fab93561 100644
--- a/src/plugins/geneve/geneve_api.c
+++ b/src/plugins/geneve/geneve_api.c
@@ -97,12 +97,10 @@ static void vl_api_geneve_add_del_tunnel_t_handler
rv = vnet_geneve_add_del_tunnel (&a, &sw_if_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_GENEVE_ADD_DEL_TUNNEL_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void vl_api_geneve_add_del_tunnel2_t_handler
@@ -149,12 +147,10 @@ static void vl_api_geneve_add_del_tunnel2_t_handler
rv = vnet_geneve_add_del_tunnel (&a, &sw_if_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_GENEVE_ADD_DEL_TUNNEL2_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void send_geneve_tunnel_details
@@ -201,12 +197,10 @@ static void vl_api_geneve_tunnel_dump_t_handler
if (~0 == sw_if_index)
{
- /* *INDENT-OFF* */
pool_foreach (t, vxm->tunnels)
{
send_geneve_tunnel_details(t, reg, mp->context);
}
- /* *INDENT-ON* */
}
else
{
@@ -238,8 +232,8 @@ geneve_api_hookup (vlib_main_t * vm)
*/
geneve_base_msg_id = setup_message_id_table ();
- am->api_trace_cfg[VL_API_GENEVE_ADD_DEL_TUNNEL + REPLY_MSG_ID_BASE].size +=
- 16 * sizeof (u32);
+ vl_api_increase_msg_trace_size (
+ am, VL_API_GENEVE_ADD_DEL_TUNNEL + REPLY_MSG_ID_BASE, 16 * sizeof (u32));
return 0;
}
@@ -249,12 +243,10 @@ VLIB_API_INIT_FUNCTION (geneve_api_hookup);
#include <vlib/unix/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "GENEVE Tunnels",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/geneve/geneve_test.c b/src/plugins/geneve/geneve_test.c
index ad6d3296ef6..e777e9b998e 100644
--- a/src/plugins/geneve/geneve_test.c
+++ b/src/plugins/geneve/geneve_test.c
@@ -26,7 +26,7 @@
#include <vnet/format_fns.h>
#include <geneve/geneve.api_enum.h>
#include <geneve/geneve.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
@@ -41,13 +41,11 @@ geneve_test_main_t geneve_test_main;
#define __plugin_msg_base geneve_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
static void vl_api_geneve_add_del_tunnel_reply_t_handler
(vl_api_geneve_add_del_tunnel_reply_t * mp)
diff --git a/src/plugins/l2e/CMakeLists.txt b/src/plugins/gre/CMakeLists.txt
index 2bfb05a43e6..60fe540b968 100644
--- a/src/plugins/l2e/CMakeLists.txt
+++ b/src/plugins/gre/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -11,18 +11,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-add_vpp_plugin(l2e
+add_vpp_plugin(gre
SOURCES
- l2e_node.c
- l2e_api.c
- l2e.c
+ gre.c
+ node.c
+ gre_api.c
+ interface.c
+ pg.c
+ plugin.c
MULTIARCH_SOURCES
- l2e_node.c
+ node.c
+ gre.c
+
+ INSTALL_HEADERS
+ gre.h
+ error.def
API_FILES
- l2e.api
+ gre.api
- INSTALL_HEADERS
- l2e.h
)
+
diff --git a/src/plugins/gre/FEATURE.yaml b/src/plugins/gre/FEATURE.yaml
new file mode 100644
index 00000000000..4b35b870dc3
--- /dev/null
+++ b/src/plugins/gre/FEATURE.yaml
@@ -0,0 +1,13 @@
+---
+name: Generic Routing Encapsulation
+maintainer: Neale Ranns <nranns@cisco.com>
+features:
+ - L3 tunnels, all combinations of IPv4 and IPv6
+ - Encap/Decap flags to control the copying of DSCP, ECN, DF from overlay to
+ underlay and vice-versa.
+ - L2 tunnels
+missing:
+ - GRE keys
+description: "An implementation of Generic Routing Encapsulation (GRE)"
+state: production
+properties: [API, CLI, MULTITHREAD]
diff --git a/src/plugins/gre/error.def b/src/plugins/gre/error.def
new file mode 100644
index 00000000000..161ecc1d874
--- /dev/null
+++ b/src/plugins/gre/error.def
@@ -0,0 +1,23 @@
+/*
+ * gre_error.def: gre errors
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+gre_error (NONE, "no error")
+gre_error (UNKNOWN_PROTOCOL, "unknown protocol")
+gre_error (UNSUPPORTED_VERSION, "unsupported version")
+gre_error (PKTS_DECAP, "GRE input packets decapsulated")
+gre_error (PKTS_ENCAP, "GRE output packets encapsulated")
+gre_error (NO_SUCH_TUNNEL, "GRE input packets dropped due to missing tunnel")
diff --git a/src/plugins/gre/gre.api b/src/plugins/gre/gre.api
new file mode 100644
index 00000000000..9c69ba4007d
--- /dev/null
+++ b/src/plugins/gre/gre.api
@@ -0,0 +1,110 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2015-2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "2.1.1";
+
+import "vnet/interface_types.api";
+import "vnet/tunnel/tunnel_types.api";
+import "vnet/ip/ip_types.api";
+
+/** \brief A GRE tunnel type
+*/
+enum gre_tunnel_type : u8
+{
+ GRE_API_TUNNEL_TYPE_L3 = 0,
+ /* L2 Transparent Ethernet Bridge */
+ GRE_API_TUNNEL_TYPE_TEB,
+ /* Encapsulated Remote Switched Port ANalyzer */
+ GRE_API_TUNNEL_TYPE_ERSPAN,
+};
+
+/** \brief A composite type uniquely defining a GRE tunnel.
+ @param type - tunnel type (see enum definition), 0: L3, 1: TEB, 2: ERSPAN
+ @param mode - P2P or P2MP
+ @param flags - to control encap/decap behaviour
+ @param session_id - session for ERSPAN tunnel, range 0-1023
+ @param instance - optional unique custom device instance, else ~0.
+ @param outer_table_id - Encap FIB table ID
+ @param sw_if_index - ignored on create/delete, present in details.
+ @param src - Source IP address
+ @param dst - Destination IP address, can be multicast
+*/
+typedef gre_tunnel
+{
+ vl_api_gre_tunnel_type_t type;
+ vl_api_tunnel_mode_t mode;
+ vl_api_tunnel_encap_decap_flags_t flags;
+ u16 session_id;
+ u32 instance;
+ u32 outer_table_id;
+ vl_api_interface_index_t sw_if_index;
+ vl_api_address_t src;
+ vl_api_address_t dst;
+};
+
+/** \brief Add or delete a single GRE tunnel.
+ @param client_index - opaque cookie to identify the sender.
+ @param context - sender context, to match reply w/ request.
+ @param is_add - add if true, delete if false.
+ @param tunnel - tunnel definition to add or delete.
+*/
+define gre_tunnel_add_del
+{
+ u32 client_index;
+ u32 context;
+ bool is_add;
+ vl_api_gre_tunnel_t tunnel;
+};
+
+/** \brief Add or delete a single GRE tunnel.
+ @param context - sender context, to match reply w/ request.
+ @param retval - return code for the request.
+ @param sw_if_index - the interface corresponding to the affected tunnel.
+*/
+define gre_tunnel_add_del_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief Dump details of all or just a single GRE tunnel.
+ @param client_index - opaque cookie to identify the sender.
+ @param context - sender context, to match reply w/ request.
+ @param sw_if_index - filter for tunnel of this interface index, ~0 for all.
+*/
+define gre_tunnel_dump
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief Details response for one of the requested GRE tunnels.
+ @param context - sender context, to match reply w/ request.
+ @param tunnel - definition of the dumped tunnel.
+*/
+define gre_tunnel_details
+{
+ u32 context;
+ vl_api_gre_tunnel_t tunnel;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/gre/gre.c b/src/plugins/gre/gre.c
new file mode 100644
index 00000000000..ce11ee9ecb2
--- /dev/null
+++ b/src/plugins/gre/gre.c
@@ -0,0 +1,842 @@
+/*
+ * gre.c: gre
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <gre/gre.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/tunnel/tunnel_dp.h>
+#include <vpp/app/version.h>
+#include <vnet/plugin/plugin.h>
+
+extern gre_main_t gre_main;
+
+#ifndef CLIB_MARCH_VARIANT
+gre_main_t gre_main;
+
+typedef struct
+{
+ union
+ {
+ ip4_and_gre_header_t ip4_and_gre;
+ u64 as_u64[3];
+ };
+} ip4_and_gre_union_t;
+
+typedef struct
+{
+ union
+ {
+ ip6_and_gre_header_t ip6_and_gre;
+ u64 as_u64[3];
+ };
+} ip6_and_gre_union_t;
+#endif /* CLIB_MARCH_VARIANT */
+
+/* Packet trace structure */
+typedef struct
+{
+ /* Tunnel-id / index in tunnel vector */
+ u32 tunnel_id;
+
+ /* pkt length */
+ u32 length;
+
+ /* tunnel ip addresses */
+ ip46_address_t src;
+ ip46_address_t dst;
+} gre_tx_trace_t;
+
+extern u8 *format_gre_tx_trace (u8 *s, va_list *args);
+
+#ifndef CLIB_MARCH_VARIANT
+u8 *
+format_gre_tx_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ gre_tx_trace_t *t = va_arg (*args, gre_tx_trace_t *);
+
+ s = format (s, "GRE: tunnel %d len %d src %U dst %U", t->tunnel_id,
+ t->length, format_ip46_address, &t->src, IP46_TYPE_ANY,
+ format_ip46_address, &t->dst, IP46_TYPE_ANY);
+ return s;
+}
+
+u8 *
+format_gre_protocol (u8 *s, va_list *args)
+{
+ gre_protocol_t p = va_arg (*args, u32);
+ gre_main_t *gm = &gre_main;
+ gre_protocol_info_t *pi = gre_get_protocol_info (gm, p);
+
+ if (pi)
+ s = format (s, "%s", pi->name);
+ else
+ s = format (s, "0x%04x", p);
+
+ return s;
+}
+
+u8 *
+format_gre_header_with_length (u8 *s, va_list *args)
+{
+ gre_main_t *gm = &gre_main;
+ gre_header_t *h = va_arg (*args, gre_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ gre_protocol_t p = clib_net_to_host_u16 (h->protocol);
+ u32 indent, header_bytes;
+
+ header_bytes = sizeof (h[0]);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "gre header truncated");
+
+ indent = format_get_indent (s);
+
+ s = format (s, "GRE %U", format_gre_protocol, p);
+
+ if (max_header_bytes != 0 && header_bytes < max_header_bytes)
+ {
+ gre_protocol_info_t *pi = gre_get_protocol_info (gm, p);
+ vlib_node_t *node = vlib_get_node (gm->vlib_main, pi->node_index);
+ if (node->format_buffer)
+ s =
+ format (s, "\n%U%U", format_white_space, indent, node->format_buffer,
+ (void *) (h + 1), max_header_bytes - header_bytes);
+ }
+
+ return s;
+}
+
+u8 *
+format_gre_header (u8 *s, va_list *args)
+{
+ gre_header_t *h = va_arg (*args, gre_header_t *);
+ return format (s, "%U", format_gre_header_with_length, h, 0);
+}
+
+/* Returns gre protocol as an int in host byte order. */
+uword
+unformat_gre_protocol_host_byte_order (unformat_input_t *input, va_list *args)
+{
+ u16 *result = va_arg (*args, u16 *);
+ gre_main_t *gm = &gre_main;
+ int i;
+
+ /* Named type. */
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ gm->protocol_info_by_name, &i))
+ {
+ gre_protocol_info_t *pi = vec_elt_at_index (gm->protocol_infos, i);
+ *result = pi->protocol;
+ return 1;
+ }
+
+ return 0;
+}
+
+uword
+unformat_gre_protocol_net_byte_order (unformat_input_t *input, va_list *args)
+{
+ u16 *result = va_arg (*args, u16 *);
+ if (!unformat_user (input, unformat_gre_protocol_host_byte_order, result))
+ return 0;
+ *result = clib_host_to_net_u16 ((u16) *result);
+ return 1;
+}
+
+uword
+unformat_gre_header (unformat_input_t *input, va_list *args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ gre_header_t _h, *h = &_h;
+ u16 p;
+
+ if (!unformat (input, "%U", unformat_gre_protocol_host_byte_order, &p))
+ return 0;
+
+ h->protocol = clib_host_to_net_u16 (p);
+
+ /* Add header to result. */
+ {
+ void *p;
+ u32 n_bytes = sizeof (h[0]);
+
+ vec_add2 (*result, p, n_bytes);
+ clib_memcpy (p, h, n_bytes);
+ }
+
+ return 1;
+}
+
+static int
+gre_proto_from_vnet_link (vnet_link_t link)
+{
+ switch (link)
+ {
+ case VNET_LINK_IP4:
+ return (GRE_PROTOCOL_ip4);
+ case VNET_LINK_IP6:
+ return (GRE_PROTOCOL_ip6);
+ case VNET_LINK_MPLS:
+ return (GRE_PROTOCOL_mpls_unicast);
+ case VNET_LINK_ETHERNET:
+ return (GRE_PROTOCOL_teb);
+ case VNET_LINK_ARP:
+ return (GRE_PROTOCOL_arp);
+ case VNET_LINK_NSH:
+ ASSERT (0);
+ break;
+ }
+ ASSERT (0);
+ return (GRE_PROTOCOL_ip4);
+}
+
+static u8 *
+gre_build_rewrite (vnet_main_t *vnm, u32 sw_if_index, vnet_link_t link_type,
+ const void *dst_address)
+{
+ gre_main_t *gm = &gre_main;
+ const ip46_address_t *dst;
+ ip4_and_gre_header_t *h4;
+ ip6_and_gre_header_t *h6;
+ gre_header_t *gre;
+ u8 *rewrite = NULL;
+ gre_tunnel_t *t;
+ u32 ti;
+ u8 is_ipv6;
+
+ dst = dst_address;
+ ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
+
+ if (~0 == ti)
+ /* not one of ours */
+ return (0);
+
+ t = pool_elt_at_index (gm->tunnels, ti);
+
+ is_ipv6 = t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0;
+
+ if (!is_ipv6)
+ {
+ vec_validate (rewrite, sizeof (*h4) - 1);
+ h4 = (ip4_and_gre_header_t *) rewrite;
+ gre = &h4->gre;
+ h4->ip4.ip_version_and_header_length = 0x45;
+ h4->ip4.ttl = 254;
+ h4->ip4.protocol = IP_PROTOCOL_GRE;
+ /* fixup ip4 header length and checksum after-the-fact */
+ h4->ip4.src_address.as_u32 = t->tunnel_src.ip4.as_u32;
+ h4->ip4.dst_address.as_u32 = dst->ip4.as_u32;
+ h4->ip4.checksum = ip4_header_checksum (&h4->ip4);
+ }
+ else
+ {
+ vec_validate (rewrite, sizeof (*h6) - 1);
+ h6 = (ip6_and_gre_header_t *) rewrite;
+ gre = &h6->gre;
+ h6->ip6.ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (6 << 28);
+ h6->ip6.hop_limit = 255;
+ h6->ip6.protocol = IP_PROTOCOL_GRE;
+ /* fixup ip6 header length and checksum after-the-fact */
+ h6->ip6.src_address.as_u64[0] = t->tunnel_src.ip6.as_u64[0];
+ h6->ip6.src_address.as_u64[1] = t->tunnel_src.ip6.as_u64[1];
+ h6->ip6.dst_address.as_u64[0] = dst->ip6.as_u64[0];
+ h6->ip6.dst_address.as_u64[1] = dst->ip6.as_u64[1];
+ }
+
+ if (PREDICT_FALSE (t->type == GRE_TUNNEL_TYPE_ERSPAN))
+ {
+ gre->protocol = clib_host_to_net_u16 (GRE_PROTOCOL_erspan);
+ gre->flags_and_version = clib_host_to_net_u16 (GRE_FLAGS_SEQUENCE);
+ }
+ else
+ gre->protocol =
+ clib_host_to_net_u16 (gre_proto_from_vnet_link (link_type));
+
+ return (rewrite);
+}
+
+static void
+gre44_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b0,
+ const void *data)
+{
+ tunnel_encap_decap_flags_t flags;
+ ip4_and_gre_header_t *ip0;
+
+ ip0 = vlib_buffer_get_current (b0);
+ flags = pointer_to_uword (data);
+
+ /* Fixup the checksum and len fields in the GRE tunnel encap
+ * that was applied at the midchain node */
+ ip0->ip4.length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+ tunnel_encap_fixup_4o4 (flags, (ip4_header_t *) (ip0 + 1), &ip0->ip4);
+ ip0->ip4.checksum = ip4_header_checksum (&ip0->ip4);
+}
+
+static void
+gre64_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b0,
+ const void *data)
+{
+ tunnel_encap_decap_flags_t flags;
+ ip4_and_gre_header_t *ip0;
+
+ ip0 = vlib_buffer_get_current (b0);
+ flags = pointer_to_uword (data);
+
+ /* Fixup the checksum and len fields in the GRE tunnel encap
+ * that was applied at the midchain node */
+ ip0->ip4.length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+ tunnel_encap_fixup_6o4 (flags, (ip6_header_t *) (ip0 + 1), &ip0->ip4);
+ ip0->ip4.checksum = ip4_header_checksum (&ip0->ip4);
+}
+
+static void
+grex4_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b0,
+ const void *data)
+{
+ ip4_header_t *ip0;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* Fixup the checksum and len fields in the GRE tunnel encap
+ * that was applied at the midchain node */
+ ip0->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
+ ip0->checksum = ip4_header_checksum (ip0);
+}
+
+static void
+gre46_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b0,
+ const void *data)
+{
+ tunnel_encap_decap_flags_t flags;
+ ip6_and_gre_header_t *ip0;
+
+ ip0 = vlib_buffer_get_current (b0);
+ flags = pointer_to_uword (data);
+
+ /* Fixup the payload length field in the GRE tunnel encap that was applied
+ * at the midchain node */
+ ip0->ip6.payload_length = clib_host_to_net_u16 (
+ vlib_buffer_length_in_chain (vm, b0) - sizeof (ip0->ip6));
+ tunnel_encap_fixup_4o6 (flags, b0, (ip4_header_t *) (ip0 + 1), &ip0->ip6);
+}
+
+static void
+gre66_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b0,
+ const void *data)
+{
+ tunnel_encap_decap_flags_t flags;
+ ip6_and_gre_header_t *ip0;
+
+ ip0 = vlib_buffer_get_current (b0);
+ flags = pointer_to_uword (data);
+
+ /* Fixup the payload length field in the GRE tunnel encap that was applied
+ * at the midchain node */
+ ip0->ip6.payload_length = clib_host_to_net_u16 (
+ vlib_buffer_length_in_chain (vm, b0) - sizeof (ip0->ip6));
+ tunnel_encap_fixup_6o6 (flags, (ip6_header_t *) (ip0 + 1), &ip0->ip6);
+}
+
+static void
+grex6_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b0,
+ const void *data)
+{
+ ip6_and_gre_header_t *ip0;
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* Fixup the payload length field in the GRE tunnel encap that was applied
+ * at the midchain node */
+ ip0->ip6.payload_length = clib_host_to_net_u16 (
+ vlib_buffer_length_in_chain (vm, b0) - sizeof (ip0->ip6));
+}
+
+/**
+ * return the appropriate fixup function given the overlay (link-type) and
+ * underlay (fproto) combination
+ */
+static adj_midchain_fixup_t
+gre_get_fixup (fib_protocol_t fproto, vnet_link_t lt)
+{
+ if (fproto == FIB_PROTOCOL_IP6 && lt == VNET_LINK_IP6)
+ return (gre66_fixup);
+ if (fproto == FIB_PROTOCOL_IP6 && lt == VNET_LINK_IP4)
+ return (gre46_fixup);
+ if (fproto == FIB_PROTOCOL_IP4 && lt == VNET_LINK_IP6)
+ return (gre64_fixup);
+ if (fproto == FIB_PROTOCOL_IP4 && lt == VNET_LINK_IP4)
+ return (gre44_fixup);
+ if (fproto == FIB_PROTOCOL_IP6)
+ return (grex6_fixup);
+ if (fproto == FIB_PROTOCOL_IP4)
+ return (grex4_fixup);
+
+ ASSERT (0);
+ return (gre44_fixup);
+}
+
+void
+gre_update_adj (vnet_main_t *vnm, u32 sw_if_index, adj_index_t ai)
+{
+ gre_main_t *gm = &gre_main;
+ gre_tunnel_t *t;
+ adj_flags_t af;
+ u32 ti;
+
+ ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
+ t = pool_elt_at_index (gm->tunnels, ti);
+ af = ADJ_FLAG_NONE;
+
+ /*
+ * the user has not requested that the load-balancing be based on
+ * a flow hash of the inner packet. so use the stacking to choose
+ * a path.
+ */
+ if (!(t->flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_INNER_HASH))
+ af |= ADJ_FLAG_MIDCHAIN_IP_STACK;
+
+ adj_nbr_midchain_update_rewrite (
+ ai, gre_get_fixup (t->tunnel_dst.fp_proto, adj_get_link_type (ai)),
+ uword_to_pointer (t->flags, void *), af,
+ gre_build_rewrite (vnm, sw_if_index, adj_get_link_type (ai),
+ &t->tunnel_dst.fp_addr));
+
+ gre_tunnel_stack (ai);
+}
+
+adj_walk_rc_t
+mgre_mk_complete_walk (adj_index_t ai, void *data)
+{
+ mgre_walk_ctx_t *ctx = data;
+ adj_flags_t af;
+
+ af = ADJ_FLAG_NONE;
+
+ /*
+ * the user has not requested that the load-balancing be based on
+ * a flow hash of the inner packet. so use the stacking to choose
+ * a path.
+ */
+ if (!(ctx->t->flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_INNER_HASH))
+ af |= ADJ_FLAG_MIDCHAIN_IP_STACK;
+
+ adj_nbr_midchain_update_rewrite (
+ ai, gre_get_fixup (ctx->t->tunnel_dst.fp_proto, adj_get_link_type (ai)),
+ uword_to_pointer (ctx->t->flags, void *), af,
+ gre_build_rewrite (vnet_get_main (), ctx->t->sw_if_index,
+ adj_get_link_type (ai),
+ &teib_entry_get_nh (ctx->ne)->fp_addr));
+
+ teib_entry_adj_stack (ctx->ne, ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+adj_walk_rc_t
+mgre_mk_incomplete_walk (adj_index_t ai, void *data)
+{
+ gre_tunnel_t *t = data;
+
+ adj_nbr_midchain_update_rewrite (
+ ai, gre_get_fixup (t->tunnel_dst.fp_proto, adj_get_link_type (ai)), NULL,
+ ADJ_FLAG_NONE, NULL);
+
+ adj_midchain_delegate_unstack (ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+void
+mgre_update_adj (vnet_main_t *vnm, u32 sw_if_index, adj_index_t ai)
+{
+ gre_main_t *gm = &gre_main;
+ ip_adjacency_t *adj;
+ teib_entry_t *ne;
+ gre_tunnel_t *t;
+ u32 ti;
+
+ adj = adj_get (ai);
+ ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
+ t = pool_elt_at_index (gm->tunnels, ti);
+
+ ne = teib_entry_find_46 (sw_if_index, adj->ia_nh_proto,
+ &adj->sub_type.nbr.next_hop);
+
+ if (NULL == ne)
+ {
+ // no TEIB entry to provide the next-hop
+ adj_nbr_midchain_update_rewrite (
+ ai, gre_get_fixup (t->tunnel_dst.fp_proto, adj_get_link_type (ai)),
+ uword_to_pointer (t->flags, void *), ADJ_FLAG_NONE, NULL);
+ return;
+ }
+
+ mgre_walk_ctx_t ctx = { .t = t, .ne = ne };
+ adj_nbr_walk_nh (sw_if_index, adj->ia_nh_proto, &adj->sub_type.nbr.next_hop,
+ mgre_mk_complete_walk, &ctx);
+}
+#endif /* CLIB_MARCH_VARIANT */
+
+typedef enum
+{
+ GRE_ENCAP_NEXT_L2_MIDCHAIN,
+ GRE_ENCAP_N_NEXT,
+} gre_encap_next_t;
+
+/**
+ * @brief TX function. Only called for L2 payload including TEB or ERSPAN.
+ * L3 traffic uses the adj-midchains.
+ */
+static_always_inline u32
+gre_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, gre_tunnel_type_t type)
+{
+ gre_main_t *gm = &gre_main;
+ u32 *from, n_left_from;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ u32 sw_if_index[2] = { ~0, ~0 };
+ const gre_tunnel_t *gt[2] = { 0 };
+ adj_index_t adj_index[2] = { ADJ_INDEX_INVALID, ADJ_INDEX_INVALID };
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+
+ while (n_left_from >= 2)
+ {
+
+ if (PREDICT_FALSE (sw_if_index[0] !=
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX]))
+ {
+ const vnet_hw_interface_t *hi;
+ sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
+ hi = vnet_get_sup_hw_interface (gm->vnet_main, sw_if_index[0]);
+ gt[0] = &gm->tunnels[hi->dev_instance];
+ adj_index[0] = gt[0]->l2_adj_index;
+ }
+ if (PREDICT_FALSE (sw_if_index[1] !=
+ vnet_buffer (b[1])->sw_if_index[VLIB_TX]))
+ {
+ const vnet_hw_interface_t *hi;
+ sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
+ hi = vnet_get_sup_hw_interface (gm->vnet_main, sw_if_index[1]);
+ gt[1] = &gm->tunnels[hi->dev_instance];
+ adj_index[1] = gt[1]->l2_adj_index;
+ }
+
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = adj_index[0];
+ vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = adj_index[1];
+
+ if (type == GRE_TUNNEL_TYPE_ERSPAN)
+ {
+ /* Encap GRE seq# and ERSPAN type II header */
+ erspan_t2_t *h0;
+ u32 seq_num;
+ u64 hdr;
+ vlib_buffer_advance (b[0], -sizeof (erspan_t2_t));
+ h0 = vlib_buffer_get_current (b[0]);
+ seq_num = clib_atomic_fetch_add (&gt[0]->gre_sn->seq_num, 1);
+ hdr = clib_host_to_net_u64 (ERSPAN_HDR2);
+ h0->seq_num = clib_host_to_net_u32 (seq_num);
+ h0->t2_u64 = hdr;
+ h0->t2.cos_en_t_session |= clib_host_to_net_u16 (gt[0]->session_id);
+ }
+ if (type == GRE_TUNNEL_TYPE_ERSPAN)
+ {
+ /* Encap GRE seq# and ERSPAN type II header */
+ erspan_t2_t *h0;
+ u32 seq_num;
+ u64 hdr;
+ vlib_buffer_advance (b[1], -sizeof (erspan_t2_t));
+ h0 = vlib_buffer_get_current (b[1]);
+ seq_num = clib_atomic_fetch_add (&gt[1]->gre_sn->seq_num, 1);
+ hdr = clib_host_to_net_u64 (ERSPAN_HDR2);
+ h0->seq_num = clib_host_to_net_u32 (seq_num);
+ h0->t2_u64 = hdr;
+ h0->t2.cos_en_t_session |= clib_host_to_net_u16 (gt[1]->session_id);
+ }
+
+ if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gre_tx_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof (*tr));
+ tr->tunnel_id = gt[0] - gm->tunnels;
+ tr->src = gt[0]->tunnel_src;
+ tr->dst = gt[0]->tunnel_dst.fp_addr;
+ tr->length = vlib_buffer_length_in_chain (vm, b[0]);
+ }
+ if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gre_tx_trace_t *tr = vlib_add_trace (vm, node, b[1], sizeof (*tr));
+ tr->tunnel_id = gt[1] - gm->tunnels;
+ tr->src = gt[1]->tunnel_src;
+ tr->dst = gt[1]->tunnel_dst.fp_addr;
+ tr->length = vlib_buffer_length_in_chain (vm, b[1]);
+ }
+
+ b += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from >= 1)
+ {
+
+ if (PREDICT_FALSE (sw_if_index[0] !=
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX]))
+ {
+ const vnet_hw_interface_t *hi;
+ sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
+ hi = vnet_get_sup_hw_interface (gm->vnet_main, sw_if_index[0]);
+ gt[0] = &gm->tunnels[hi->dev_instance];
+ adj_index[0] = gt[0]->l2_adj_index;
+ }
+
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = adj_index[0];
+
+ if (type == GRE_TUNNEL_TYPE_ERSPAN)
+ {
+ /* Encap GRE seq# and ERSPAN type II header */
+ erspan_t2_t *h0;
+ u32 seq_num;
+ u64 hdr;
+ ASSERT (gt[0]->type == GRE_TUNNEL_TYPE_ERSPAN);
+ vlib_buffer_advance (b[0], -sizeof (erspan_t2_t));
+ h0 = vlib_buffer_get_current (b[0]);
+ seq_num = clib_atomic_fetch_add (&gt[0]->gre_sn->seq_num, 1);
+ hdr = clib_host_to_net_u64 (ERSPAN_HDR2);
+ h0->seq_num = clib_host_to_net_u32 (seq_num);
+ h0->t2_u64 = hdr;
+ h0->t2.cos_en_t_session |= clib_host_to_net_u16 (gt[0]->session_id);
+ }
+
+ if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gre_tx_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof (*tr));
+ tr->tunnel_id = gt[0] - gm->tunnels;
+ tr->src = gt[0]->tunnel_src;
+ tr->dst = gt[0]->tunnel_dst.fp_addr;
+ tr->length = vlib_buffer_length_in_chain (vm, b[0]);
+ }
+
+ b += 1;
+ n_left_from -= 1;
+ }
+
+ vlib_buffer_enqueue_to_single_next (
+ vm, node, from, GRE_ENCAP_NEXT_L2_MIDCHAIN, frame->n_vectors);
+
+ vlib_node_increment_counter (vm, node->node_index, GRE_ERROR_PKTS_ENCAP,
+ frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+static char *gre_error_strings[] = {
+#define gre_error(n, s) s,
+#include "error.def"
+#undef gre_error
+};
+
+VLIB_NODE_FN (gre_teb_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return (gre_encap_inline (vm, node, frame, GRE_TUNNEL_TYPE_TEB));
+}
+
+VLIB_NODE_FN (gre_erspan_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return (gre_encap_inline (vm, node, frame, GRE_TUNNEL_TYPE_ERSPAN));
+}
+
+VLIB_REGISTER_NODE (gre_teb_encap_node) =
+{
+ .name = "gre-teb-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_gre_tx_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = GRE_N_ERROR,
+ .error_strings = gre_error_strings,
+ .n_next_nodes = GRE_ENCAP_N_NEXT,
+ .next_nodes = {
+ [GRE_ENCAP_NEXT_L2_MIDCHAIN] = "adj-l2-midchain",
+ },
+};
+VLIB_REGISTER_NODE (gre_erspan_encap_node) =
+{
+ .name = "gre-erspan-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_gre_tx_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = GRE_N_ERROR,
+ .error_strings = gre_error_strings,
+ .n_next_nodes = GRE_ENCAP_N_NEXT,
+ .next_nodes = {
+ [GRE_ENCAP_NEXT_L2_MIDCHAIN] = "adj-l2-midchain",
+ },
+};
+
+#ifndef CLIB_MARCH_VARIANT
+static u8 *
+format_gre_tunnel_name (u8 *s, va_list *args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ gre_main_t *gm = &gre_main;
+ gre_tunnel_t *t;
+
+ if (dev_instance >= vec_len (gm->tunnels))
+ return format (s, "<improperly-referenced>");
+
+ t = pool_elt_at_index (gm->tunnels, dev_instance);
+ return format (s, "gre%d", t->user_instance);
+}
+
+static u8 *
+format_gre_device (u8 *s, va_list *args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ CLIB_UNUSED (int verbose) = va_arg (*args, int);
+
+ s = format (s, "GRE tunnel: id %d\n", dev_instance);
+ return s;
+}
+
+static int
+gre_tunnel_desc (u32 sw_if_index, ip46_address_t *src, ip46_address_t *dst,
+ u8 *is_l2)
+{
+ gre_main_t *gm = &gre_main;
+ gre_tunnel_t *t;
+ u32 ti;
+
+ ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
+
+ if (~0 == ti)
+ /* not one of ours */
+ return -1;
+
+ t = pool_elt_at_index (gm->tunnels, ti);
+
+ *src = t->tunnel_src;
+ *dst = t->tunnel_dst.fp_addr;
+ *is_l2 = t->type == GRE_TUNNEL_TYPE_TEB;
+
+ return (0);
+}
+
+VNET_DEVICE_CLASS (gre_device_class) = {
+ .name = "GRE tunnel device",
+ .format_device_name = format_gre_tunnel_name,
+ .format_device = format_gre_device,
+ .format_tx_trace = format_gre_tx_trace,
+ .admin_up_down_function = gre_interface_admin_up_down,
+ .ip_tun_desc = gre_tunnel_desc,
+#ifdef SOON
+ .clear counter = 0;
+#endif
+}
+;
+
+VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = {
+ .name = "GRE",
+ .format_header = format_gre_header_with_length,
+ .unformat_header = unformat_gre_header,
+ .build_rewrite = gre_build_rewrite,
+ .update_adjacency = gre_update_adj,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+
+VNET_HW_INTERFACE_CLASS (mgre_hw_interface_class) = {
+ .name = "mGRE",
+ .format_header = format_gre_header_with_length,
+ .unformat_header = unformat_gre_header,
+ .build_rewrite = gre_build_rewrite,
+ .update_adjacency = mgre_update_adj,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
+};
+#endif /* CLIB_MARCH_VARIANT */
+
+static void
+add_protocol (gre_main_t *gm, gre_protocol_t protocol, char *protocol_name)
+{
+ gre_protocol_info_t *pi;
+ u32 i;
+
+ vec_add2 (gm->protocol_infos, pi, 1);
+ i = pi - gm->protocol_infos;
+
+ pi->name = protocol_name;
+ pi->protocol = protocol;
+ pi->next_index = pi->node_index = ~0;
+
+ hash_set (gm->protocol_info_by_protocol, protocol, i);
+ hash_set_mem (gm->protocol_info_by_name, pi->name, i);
+}
+
+static clib_error_t *
+gre_init (vlib_main_t *vm)
+{
+ gre_main_t *gm = &gre_main;
+ clib_error_t *error;
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi;
+
+ clib_memset (gm, 0, sizeof (gm[0]));
+ gm->vlib_main = vm;
+ gm->vnet_main = vnet_get_main ();
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+ return error;
+
+ /* Set up the ip packet generator */
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_GRE);
+ pi->format_header = format_gre_header;
+ pi->unformat_pg_edit = unformat_pg_gre_header;
+
+ gm->protocol_info_by_name = hash_create_string (0, sizeof (uword));
+ gm->protocol_info_by_protocol = hash_create (0, sizeof (uword));
+ gm->tunnel_by_key4 =
+ hash_create_mem (0, sizeof (gre_tunnel_key4_t), sizeof (uword));
+ gm->tunnel_by_key6 =
+ hash_create_mem (0, sizeof (gre_tunnel_key6_t), sizeof (uword));
+ gm->seq_num_by_key =
+ hash_create_mem (0, sizeof (gre_sn_key_t), sizeof (uword));
+
+#define _(n, s) add_protocol (gm, GRE_PROTOCOL_##s, #s);
+ foreach_gre_protocol
+#undef _
+ return vlib_call_init_function (vm, gre_input_init);
+}
+
+VLIB_INIT_FUNCTION (gre_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/gre/gre.h b/src/plugins/gre/gre.h
new file mode 100644
index 00000000000..ce57454f9b7
--- /dev/null
+++ b/src/plugins/gre/gre.h
@@ -0,0 +1,439 @@
+/*
+ * gre.h: types/functions for gre.
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_gre_h
+#define included_gre_h
+
+#include <vnet/vnet.h>
+#include <vnet/gre/packet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/format.h>
+#include <vnet/adj/adj_types.h>
+#include <vnet/tunnel/tunnel.h>
+#include <vnet/teib/teib.h>
+
+extern vnet_hw_interface_class_t gre_hw_interface_class;
+extern vnet_hw_interface_class_t mgre_hw_interface_class;
+
+typedef enum
+{
+#define gre_error(n,s) GRE_ERROR_##n,
+#include <gre/error.def>
+#undef gre_error
+ GRE_N_ERROR,
+} gre_error_t;
+
+/**
+ * L3: GRE (i.e. this tunnel is in L3 mode)
+ * TEB: Transparent Ethernet Bridging - the tunnel is in L2 mode
+ * ERSPAN: type 2 - the tunnel is for port mirror SPAN output. Each tunnel is
+ * associated with a session ID and expected to be used for encap
+ * and output of mirrored packet from a L2 network only. There is
+ * no support for receiving ERSPAN packets from a GRE ERSPAN tunnel
+ */
+#define foreach_gre_tunnel_type \
+ _(L3, "L3") \
+ _(TEB, "TEB") \
+ _(ERSPAN, "ERSPAN") \
+
+/**
+ * @brief The GRE tunnel type
+ */
+typedef enum gre_tunnel_type_t_
+{
+#define _(n, s) GRE_TUNNEL_TYPE_##n,
+ foreach_gre_tunnel_type
+#undef _
+} __clib_packed gre_tunnel_type_t;
+
+extern u8 *format_gre_tunnel_type (u8 * s, va_list * args);
+
+
+/**
+ * A GRE payload protocol registration
+ */
+typedef struct
+{
+ /** Name (a c string). */
+ char *name;
+
+ /** GRE protocol type in host byte order. */
+ gre_protocol_t protocol;
+
+ /** GRE tunnel type */
+ gre_tunnel_type_t tunnel_type;
+
+ /** Node which handles this type. */
+ u32 node_index;
+
+ /** Next index for this type. */
+ u32 next_index;
+} gre_protocol_info_t;
+
+/**
+ * Elements of the GRE key that are common for v6 and v6 addresses
+ */
+typedef struct gre_tunnel_key_common_t_
+{
+ union
+ {
+ struct
+ {
+ u32 fib_index;
+ u16 session_id;
+ gre_tunnel_type_t type;
+ tunnel_mode_t mode;
+ };
+ u64 as_u64;
+ };
+} gre_tunnel_key_common_t;
+
+STATIC_ASSERT_SIZEOF (gre_tunnel_key_common_t, sizeof (u64));
+
+/**
+ * @brief Key for a IPv4 GRE Tunnel
+ */
+typedef struct gre_tunnel_key4_t_
+{
+ /**
+ * Source and destination IP addresses
+ */
+ union
+ {
+ struct
+ {
+ ip4_address_t gtk_src;
+ ip4_address_t gtk_dst;
+ };
+ u64 gtk_as_u64;
+ };
+
+ /** address independent attributes */
+ gre_tunnel_key_common_t gtk_common;
+} __attribute__ ((packed)) gre_tunnel_key4_t;
+
+STATIC_ASSERT_SIZEOF (gre_tunnel_key4_t, 2 * sizeof (u64));
+
+/**
+ * @brief Key for a IPv6 GRE Tunnel
+ * We use a different type so that the V4 key hash is as small as possible
+ */
+typedef struct gre_tunnel_key6_t_
+{
+ /**
+ * Source and destination IP addresses
+ */
+ ip6_address_t gtk_src;
+ ip6_address_t gtk_dst;
+
+ /** address independent attributes */
+ gre_tunnel_key_common_t gtk_common;
+} __attribute__ ((packed)) gre_tunnel_key6_t;
+
+STATIC_ASSERT_SIZEOF (gre_tunnel_key6_t, 5 * sizeof (u64));
+
+/**
+ * Union of the two possible key types
+ */
+typedef union gre_tunnel_key_t_
+{
+ gre_tunnel_key4_t gtk_v4;
+ gre_tunnel_key6_t gtk_v6;
+} gre_tunnel_key_t;
+
+/**
+ * The session ID is only a 10 bit value
+ */
+#define GTK_SESSION_ID_MAX (0x3ff)
+
+/**
+ * Used for GRE header seq number generation for ERSPAN encap
+ */
+typedef struct
+{
+ u32 seq_num;
+ u32 ref_count;
+} gre_sn_t;
+
+/**
+ * Hash key for GRE header seq number generation for ERSPAN encap
+ */
+typedef struct
+{
+ ip46_address_t src;
+ ip46_address_t dst;
+ u32 fib_index;
+} gre_sn_key_t;
+
+/**
+ * @brief A representation of a GRE tunnel
+ */
+typedef struct
+{
+ /**
+ * Required for pool_get_aligned
+ */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+
+ /**
+ * The tunnel's source/local address
+ */
+ ip46_address_t tunnel_src;
+ /**
+ * The tunnel's destination/remote address
+ */
+ fib_prefix_t tunnel_dst;
+ /**
+ * The FIB in which the src.dst address are present
+ */
+ u32 outer_fib_index;
+ u32 hw_if_index;
+ u32 sw_if_index;
+ gre_tunnel_type_t type;
+ tunnel_mode_t mode;
+ tunnel_encap_decap_flags_t flags;
+
+ /**
+ * an L2 tunnel always rquires an L2 midchain. cache here for DP.
+ */
+ adj_index_t l2_adj_index;
+
+ /**
+ * ERSPAN type 2 session ID, least significant 10 bits of u16
+ */
+ u16 session_id;
+
+ /**
+ * GRE header sequence number (SN) used for ERSPAN type 2 header, must be
+ * bumped automically to be thread safe. As multiple GRE tunnels are created
+ * for the same fib-idx/DIP/SIP with different ERSPAN session number, they all
+ * share the same SN which is kept per FIB/DIP/SIP, as specified by RFC2890.
+ */
+ gre_sn_t *gre_sn;
+
+
+ u32 dev_instance; /* Real device instance in tunnel vector */
+ u32 user_instance; /* Instance name being shown to user */
+} gre_tunnel_t;
+
+typedef struct
+{
+ u8 next_index;
+ u8 tunnel_type;
+} next_info_t;
+
+/**
+ * @brief GRE related global data
+ */
+typedef struct
+{
+ /**
+ * pool of tunnel instances
+ */
+ gre_tunnel_t *tunnels;
+
+ /**
+ * GRE payload protocol registrations
+ */
+ gre_protocol_info_t *protocol_infos;
+
+ /**
+ * Hash tables mapping name/protocol to protocol info index.
+ */
+ uword *protocol_info_by_name, *protocol_info_by_protocol;
+
+ /**
+ * Hash mapping to tunnels with ipv4 src/dst addr
+ */
+ uword *tunnel_by_key4;
+
+ /**
+ * Hash mapping to tunnels with ipv6 src/dst addr
+ */
+ uword *tunnel_by_key6;
+
+ /**
+ * Hash mapping tunnel src/dst addr and fib-idx to sequence number
+ */
+ uword *seq_num_by_key;
+
+ /**
+ * Mapping from sw_if_index to tunnel index
+ */
+ u32 *tunnel_index_by_sw_if_index;
+
+ /* Sparse vector mapping gre protocol in network byte order
+ to next index. */
+ next_info_t *next_by_protocol;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+
+ /* Record used instances */
+ uword *instance_used;
+
+ u16 msg_id_base;
+} gre_main_t;
+
+/**
+ * @brief IPv4 and GRE header.
+ */
+typedef CLIB_PACKED (struct {
+ ip4_header_t ip4;
+ gre_header_t gre;
+}) ip4_and_gre_header_t;
+
+/**
+ * @brief IPv6 and GRE header.
+ */
+typedef CLIB_PACKED (struct {
+ ip6_header_t ip6;
+ gre_header_t gre;
+}) ip6_and_gre_header_t;
+
+always_inline gre_protocol_info_t *
+gre_get_protocol_info (gre_main_t * em, gre_protocol_t protocol)
+{
+ uword *p = hash_get (em->protocol_info_by_protocol, protocol);
+ return p ? vec_elt_at_index (em->protocol_infos, p[0]) : 0;
+}
+
+extern gre_main_t gre_main;
+
+extern clib_error_t *gre_interface_admin_up_down (vnet_main_t * vnm,
+ u32 hw_if_index, u32 flags);
+
+extern void gre_tunnel_stack (adj_index_t ai);
+extern void gre_update_adj (vnet_main_t * vnm,
+ u32 sw_if_index, adj_index_t ai);
+
+typedef struct mgre_walk_ctx_t_
+{
+ const gre_tunnel_t *t;
+ const teib_entry_t *ne;
+} mgre_walk_ctx_t;
+
+adj_walk_rc_t mgre_mk_complete_walk (adj_index_t ai, void *data);
+adj_walk_rc_t mgre_mk_incomplete_walk (adj_index_t ai, void *data);
+
+format_function_t format_gre_protocol;
+format_function_t format_gre_header;
+format_function_t format_gre_header_with_length;
+
+extern vlib_node_registration_t gre4_input_node;
+extern vlib_node_registration_t gre6_input_node;
+extern vlib_node_registration_t gre_erspan_encap_node;
+extern vlib_node_registration_t gre_teb_encap_node;
+extern vnet_device_class_t gre_device_class;
+
+/* Parse gre protocol as 0xXXXX or protocol name.
+ In either host or network byte order. */
+unformat_function_t unformat_gre_protocol_host_byte_order;
+unformat_function_t unformat_gre_protocol_net_byte_order;
+
+/* Parse gre header. */
+unformat_function_t unformat_gre_header;
+unformat_function_t unformat_pg_gre_header;
+
+void
+gre_register_input_protocol (vlib_main_t * vm, gre_protocol_t protocol,
+ u32 node_index, gre_tunnel_type_t tunnel_type);
+
+/* manually added to the interface output node in gre.c */
+#define GRE_OUTPUT_NEXT_LOOKUP 1
+
+typedef struct
+{
+ u8 is_add;
+ gre_tunnel_type_t type;
+ tunnel_mode_t mode;
+ u8 is_ipv6;
+ u32 instance;
+ ip46_address_t src, dst;
+ u32 outer_table_id;
+ u16 session_id;
+ tunnel_encap_decap_flags_t flags;
+} vnet_gre_tunnel_add_del_args_t;
+
+extern int vnet_gre_tunnel_add_del (vnet_gre_tunnel_add_del_args_t * a,
+ u32 * sw_if_indexp);
+
+static inline void
+gre_mk_key4 (ip4_address_t src,
+ ip4_address_t dst,
+ u32 fib_index,
+ gre_tunnel_type_t ttype,
+ tunnel_mode_t tmode, u16 session_id, gre_tunnel_key4_t * key)
+{
+ key->gtk_src = src;
+ key->gtk_dst = dst;
+ key->gtk_common.type = ttype;
+ key->gtk_common.mode = tmode;
+ key->gtk_common.fib_index = fib_index;
+ key->gtk_common.session_id = session_id;
+}
+
+static inline int
+gre_match_key4 (const gre_tunnel_key4_t * key1,
+ const gre_tunnel_key4_t * key2)
+{
+ return ((key1->gtk_as_u64 == key2->gtk_as_u64) &&
+ (key1->gtk_common.as_u64 == key2->gtk_common.as_u64));
+}
+
+static inline void
+gre_mk_key6 (const ip6_address_t * src,
+ const ip6_address_t * dst,
+ u32 fib_index,
+ gre_tunnel_type_t ttype,
+ tunnel_mode_t tmode, u16 session_id, gre_tunnel_key6_t * key)
+{
+ key->gtk_src = *src;
+ key->gtk_dst = *dst;
+ key->gtk_common.type = ttype;
+ key->gtk_common.mode = tmode;
+ key->gtk_common.fib_index = fib_index;
+ key->gtk_common.session_id = session_id;
+}
+
+static inline int
+gre_match_key6 (const gre_tunnel_key6_t * key1,
+ const gre_tunnel_key6_t * key2)
+{
+ return (ip6_address_is_equal (&key1->gtk_src, &key2->gtk_src) &&
+ ip6_address_is_equal (&key1->gtk_dst, &key2->gtk_dst) &&
+ (key1->gtk_common.as_u64 == key2->gtk_common.as_u64));
+}
+
+static inline void
+gre_mk_sn_key (const gre_tunnel_t * gt, gre_sn_key_t * key)
+{
+ key->src = gt->tunnel_src;
+ key->dst = gt->tunnel_dst.fp_addr;
+ key->fib_index = gt->outer_fib_index;
+}
+
+#endif /* included_gre_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/gre/gre_api.c b/src/plugins/gre/gre_api.c
new file mode 100644
index 00000000000..5149f92fb80
--- /dev/null
+++ b/src/plugins/gre/gre_api.c
@@ -0,0 +1,212 @@
+/*
+ *------------------------------------------------------------------
+ * gre_api.c - gre api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+
+#include <gre/gre.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/tunnel/tunnel_types_api.h>
+#include <vnet/ip/ip_types_api.h>
+
+#include <gre/gre.api_enum.h>
+#include <gre/gre.api_types.h>
+
+#define REPLY_MSG_ID_BASE gre_main.msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static int
+gre_tunnel_type_decode (vl_api_gre_tunnel_type_t in, gre_tunnel_type_t *out)
+{
+ switch (in)
+ {
+#define _(n, v) \
+ case GRE_API_TUNNEL_TYPE_##n: \
+ *out = GRE_TUNNEL_TYPE_##n; \
+ return (0);
+ foreach_gre_tunnel_type
+#undef _
+ }
+
+ return (VNET_API_ERROR_INVALID_VALUE);
+}
+
+static vl_api_gre_tunnel_type_t
+gre_tunnel_type_encode (gre_tunnel_type_t in)
+{
+ vl_api_gre_tunnel_type_t out = GRE_API_TUNNEL_TYPE_L3;
+
+ switch (in)
+ {
+#define _(n, v) \
+ case GRE_TUNNEL_TYPE_##n: \
+ out = GRE_API_TUNNEL_TYPE_##n; \
+ break;
+ foreach_gre_tunnel_type
+#undef _
+ }
+
+ return (out);
+}
+
+static void
+vl_api_gre_tunnel_add_del_t_handler (vl_api_gre_tunnel_add_del_t *mp)
+{
+ vnet_gre_tunnel_add_del_args_t _a = {}, *a = &_a;
+ vl_api_gre_tunnel_add_del_reply_t *rmp;
+ tunnel_encap_decap_flags_t flags;
+ u32 sw_if_index = ~0;
+ ip46_type_t itype[2];
+ int rv = 0;
+
+ itype[0] = ip_address_decode (&mp->tunnel.src, &a->src);
+ itype[1] = ip_address_decode (&mp->tunnel.dst, &a->dst);
+
+ if (itype[0] != itype[1])
+ {
+ rv = VNET_API_ERROR_INVALID_PROTOCOL;
+ goto out;
+ }
+
+ if (ip46_address_is_equal (&a->src, &a->dst))
+ {
+ rv = VNET_API_ERROR_SAME_SRC_DST;
+ goto out;
+ }
+
+ rv = gre_tunnel_type_decode (mp->tunnel.type, &a->type);
+
+ if (rv)
+ goto out;
+
+ rv = tunnel_mode_decode (mp->tunnel.mode, &a->mode);
+
+ if (rv)
+ goto out;
+
+ rv = tunnel_encap_decap_flags_decode (mp->tunnel.flags, &flags);
+
+ if (rv)
+ goto out;
+
+ a->is_add = mp->is_add;
+ a->is_ipv6 = (itype[0] == IP46_TYPE_IP6);
+ a->instance = ntohl (mp->tunnel.instance);
+ a->session_id = ntohs (mp->tunnel.session_id);
+ a->outer_table_id = ntohl (mp->tunnel.outer_table_id);
+ a->flags = flags;
+
+ rv = vnet_gre_tunnel_add_del (a, &sw_if_index);
+
+out:
+ REPLY_MACRO2 (VL_API_GRE_TUNNEL_ADD_DEL_REPLY,
+ ({ rmp->sw_if_index = ntohl (sw_if_index); }));
+}
+
+static void
+send_gre_tunnel_details (gre_tunnel_t *t, vl_api_gre_tunnel_dump_t *mp)
+{
+ vl_api_gre_tunnel_details_t *rmp;
+
+ REPLY_MACRO_DETAILS2 (
+ VL_API_GRE_TUNNEL_DETAILS, ({
+ ip_address_encode (&t->tunnel_src, IP46_TYPE_ANY, &rmp->tunnel.src);
+ ip_address_encode (&t->tunnel_dst.fp_addr, IP46_TYPE_ANY,
+ &rmp->tunnel.dst);
+
+ rmp->tunnel.outer_table_id = htonl (
+ fib_table_get_table_id (t->outer_fib_index, t->tunnel_dst.fp_proto));
+
+ rmp->tunnel.type = gre_tunnel_type_encode (t->type);
+ rmp->tunnel.mode = tunnel_mode_encode (t->mode);
+ rmp->tunnel.flags = tunnel_encap_decap_flags_encode (t->flags);
+ rmp->tunnel.instance = htonl (t->user_instance);
+ rmp->tunnel.sw_if_index = htonl (t->sw_if_index);
+ rmp->tunnel.session_id = htons (t->session_id);
+ }));
+}
+
+static void
+vl_api_gre_tunnel_dump_t_handler (vl_api_gre_tunnel_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ gre_main_t *gm = &gre_main;
+ gre_tunnel_t *t;
+ u32 sw_if_index;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (~0 == sw_if_index)
+ {
+ pool_foreach (t, gm->tunnels)
+ {
+ send_gre_tunnel_details (t, mp);
+ }
+ }
+
+ else
+ {
+ if ((sw_if_index >= vec_len (gm->tunnel_index_by_sw_if_index)) ||
+ (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
+ {
+ return;
+ }
+ t = &gm->tunnels[gm->tunnel_index_by_sw_if_index[sw_if_index]];
+ send_gre_tunnel_details (t, mp);
+ }
+}
+
+/*
+ * gre_api_hookup
+ * Add vpe's API message handlers to the table.
+ * vlib has already mapped shared memory and
+ * added the client registration handlers.
+ * See .../vlib-api/vlibmemory/memclnt_vlib.c:memclnt_process()
+ */
+/* API definitions */
+#include <vnet/format_fns.h>
+#include <gre/gre.api.c>
+
+static clib_error_t *
+gre_api_hookup (vlib_main_t *vm)
+{
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ gre_main.msg_id_base = setup_message_id_table ();
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (gre_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/gre/interface.c b/src/plugins/gre/interface.c
new file mode 100644
index 00000000000..bd9a6078502
--- /dev/null
+++ b/src/plugins/gre/interface.c
@@ -0,0 +1,826 @@
+/*
+ * gre_interface.c: gre interfaces
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <gre/gre.h>
+#include <vnet/ip/format.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/adj/adj_nbr.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/teib/teib.h>
+
+u8 *
+format_gre_tunnel_type (u8 *s, va_list *args)
+{
+ gre_tunnel_type_t type = va_arg (*args, int);
+
+ switch (type)
+ {
+#define _(n, v) \
+ case GRE_TUNNEL_TYPE_##n: \
+ s = format (s, "%s", v); \
+ break;
+ foreach_gre_tunnel_type
+#undef _
+ }
+
+ return (s);
+}
+
+static u8 *
+format_gre_tunnel (u8 *s, va_list *args)
+{
+ gre_tunnel_t *t = va_arg (*args, gre_tunnel_t *);
+
+ s = format (s, "[%d] instance %d src %U dst %U fib-idx %d sw-if-idx %d ",
+ t->dev_instance, t->user_instance, format_ip46_address,
+ &t->tunnel_src, IP46_TYPE_ANY, format_ip46_address,
+ &t->tunnel_dst.fp_addr, IP46_TYPE_ANY, t->outer_fib_index,
+ t->sw_if_index);
+
+ s = format (s, "payload %U ", format_gre_tunnel_type, t->type);
+ s = format (s, "%U ", format_tunnel_mode, t->mode);
+
+ if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
+ s = format (s, "session %d ", t->session_id);
+
+ if (t->type != GRE_TUNNEL_TYPE_L3)
+ s = format (s, "l2-adj-idx %d ", t->l2_adj_index);
+
+ return s;
+}
+
+static gre_tunnel_t *
+gre_tunnel_db_find (const vnet_gre_tunnel_add_del_args_t *a,
+ u32 outer_fib_index, gre_tunnel_key_t *key)
+{
+ gre_main_t *gm = &gre_main;
+ uword *p;
+
+ if (!a->is_ipv6)
+ {
+ gre_mk_key4 (a->src.ip4, a->dst.ip4, outer_fib_index, a->type, a->mode,
+ a->session_id, &key->gtk_v4);
+ p = hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4);
+ }
+ else
+ {
+ gre_mk_key6 (&a->src.ip6, &a->dst.ip6, outer_fib_index, a->type, a->mode,
+ a->session_id, &key->gtk_v6);
+ p = hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6);
+ }
+
+ if (NULL == p)
+ return (NULL);
+
+ return (pool_elt_at_index (gm->tunnels, p[0]));
+}
+
+static void
+gre_tunnel_db_add (gre_tunnel_t *t, gre_tunnel_key_t *key)
+{
+ gre_main_t *gm = &gre_main;
+
+ if (t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6)
+ {
+ hash_set_mem_alloc (&gm->tunnel_by_key6, &key->gtk_v6, t->dev_instance);
+ }
+ else
+ {
+ hash_set_mem_alloc (&gm->tunnel_by_key4, &key->gtk_v4, t->dev_instance);
+ }
+}
+
+static void
+gre_tunnel_db_remove (gre_tunnel_t *t, gre_tunnel_key_t *key)
+{
+ gre_main_t *gm = &gre_main;
+
+ if (t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6)
+ {
+ hash_unset_mem_free (&gm->tunnel_by_key6, &key->gtk_v6);
+ }
+ else
+ {
+ hash_unset_mem_free (&gm->tunnel_by_key4, &key->gtk_v4);
+ }
+}
+
+/**
+ * gre_tunnel_stack
+ *
+ * 'stack' (resolve the recursion for) the tunnel's midchain adjacency
+ */
+void
+gre_tunnel_stack (adj_index_t ai)
+{
+ gre_main_t *gm = &gre_main;
+ ip_adjacency_t *adj;
+ gre_tunnel_t *gt;
+ u32 sw_if_index;
+
+ adj = adj_get (ai);
+ sw_if_index = adj->rewrite_header.sw_if_index;
+
+ if ((vec_len (gm->tunnel_index_by_sw_if_index) <= sw_if_index) ||
+ (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
+ return;
+
+ gt = pool_elt_at_index (gm->tunnels,
+ gm->tunnel_index_by_sw_if_index[sw_if_index]);
+
+ if ((vnet_hw_interface_get_flags (vnet_get_main (), gt->hw_if_index) &
+ VNET_HW_INTERFACE_FLAG_LINK_UP) == 0)
+ {
+ adj_midchain_delegate_unstack (ai);
+ }
+ else
+ {
+ adj_midchain_delegate_stack (ai, gt->outer_fib_index, &gt->tunnel_dst);
+ }
+}
+
+/**
+ * mgre_tunnel_stack
+ *
+ * 'stack' (resolve the recursion for) the tunnel's midchain adjacency
+ */
+static void
+mgre_tunnel_stack (adj_index_t ai)
+{
+ gre_main_t *gm = &gre_main;
+ const ip_adjacency_t *adj;
+ const gre_tunnel_t *gt;
+ u32 sw_if_index;
+
+ adj = adj_get (ai);
+ sw_if_index = adj->rewrite_header.sw_if_index;
+
+ if ((vec_len (gm->tunnel_index_by_sw_if_index) <= sw_if_index) ||
+ (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
+ return;
+
+ gt = pool_elt_at_index (gm->tunnels,
+ gm->tunnel_index_by_sw_if_index[sw_if_index]);
+
+ if ((vnet_hw_interface_get_flags (vnet_get_main (), gt->hw_if_index) &
+ VNET_HW_INTERFACE_FLAG_LINK_UP) == 0)
+ {
+ adj_midchain_delegate_unstack (ai);
+ }
+ else
+ {
+ const teib_entry_t *ne;
+
+ ne = teib_entry_find_46 (sw_if_index, adj->ia_nh_proto,
+ &adj->sub_type.nbr.next_hop);
+ if (NULL != ne)
+ teib_entry_adj_stack (ne, ai);
+ }
+}
+
+/**
+ * @brief Call back when restacking all adjacencies on a GRE interface
+ */
+static adj_walk_rc_t
+gre_adj_walk_cb (adj_index_t ai, void *ctx)
+{
+ gre_tunnel_stack (ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+static adj_walk_rc_t
+mgre_adj_walk_cb (adj_index_t ai, void *ctx)
+{
+ mgre_tunnel_stack (ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static void
+gre_tunnel_restack (gre_tunnel_t *gt)
+{
+ fib_protocol_t proto;
+
+ /*
+ * walk all the adjacencies on th GRE interface and restack them
+ */
+ FOR_EACH_FIB_IP_PROTOCOL (proto)
+ {
+ switch (gt->mode)
+ {
+ case TUNNEL_MODE_P2P:
+ adj_nbr_walk (gt->sw_if_index, proto, gre_adj_walk_cb, NULL);
+ break;
+ case TUNNEL_MODE_MP:
+ adj_nbr_walk (gt->sw_if_index, proto, mgre_adj_walk_cb, NULL);
+ break;
+ }
+ }
+}
+
+static void
+gre_teib_mk_key (const gre_tunnel_t *t, const teib_entry_t *ne,
+ gre_tunnel_key_t *key)
+{
+ const fib_prefix_t *nh;
+
+ nh = teib_entry_get_nh (ne);
+
+ /* construct the key using mode P2P so it can be found in the DP */
+ if (FIB_PROTOCOL_IP4 == nh->fp_proto)
+ gre_mk_key4 (t->tunnel_src.ip4, nh->fp_addr.ip4,
+ teib_entry_get_fib_index (ne), t->type, TUNNEL_MODE_P2P, 0,
+ &key->gtk_v4);
+ else
+ gre_mk_key6 (&t->tunnel_src.ip6, &nh->fp_addr.ip6,
+ teib_entry_get_fib_index (ne), t->type, TUNNEL_MODE_P2P, 0,
+ &key->gtk_v6);
+}
+
+/**
+ * An TEIB entry has been added
+ */
+static void
+gre_teib_entry_added (const teib_entry_t *ne)
+{
+ gre_main_t *gm = &gre_main;
+ const ip_address_t *nh;
+ gre_tunnel_key_t key;
+ gre_tunnel_t *t;
+ u32 sw_if_index;
+ u32 t_idx;
+
+ sw_if_index = teib_entry_get_sw_if_index (ne);
+ if (vec_len (gm->tunnel_index_by_sw_if_index) < sw_if_index)
+ return;
+
+ t_idx = gm->tunnel_index_by_sw_if_index[sw_if_index];
+
+ if (INDEX_INVALID == t_idx)
+ return;
+
+ /* entry has been added on an interface for which there is a GRE tunnel */
+ t = pool_elt_at_index (gm->tunnels, t_idx);
+
+ if (t->mode != TUNNEL_MODE_MP)
+ return;
+
+ /* the next-hop (underlay) of the NHRP entry will form part of the key for
+ * ingress lookup to match packets to this interface */
+ gre_teib_mk_key (t, ne, &key);
+ gre_tunnel_db_add (t, &key);
+
+ /* update the rewrites for each of the adjacencies for this peer (overlay)
+ * using the next-hop (underlay) */
+ mgre_walk_ctx_t ctx = { .t = t, .ne = ne };
+ nh = teib_entry_get_peer (ne);
+ adj_nbr_walk_nh (
+ teib_entry_get_sw_if_index (ne),
+ (AF_IP4 == ip_addr_version (nh) ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6),
+ &ip_addr_46 (nh), mgre_mk_complete_walk, &ctx);
+}
+
+static void
+gre_teib_entry_deleted (const teib_entry_t *ne)
+{
+ gre_main_t *gm = &gre_main;
+ const ip_address_t *nh;
+ gre_tunnel_key_t key;
+ gre_tunnel_t *t;
+ u32 sw_if_index;
+ u32 t_idx;
+
+ sw_if_index = teib_entry_get_sw_if_index (ne);
+ if (vec_len (gm->tunnel_index_by_sw_if_index) < sw_if_index)
+ return;
+
+ t_idx = gm->tunnel_index_by_sw_if_index[sw_if_index];
+
+ if (INDEX_INVALID == t_idx)
+ return;
+
+ t = pool_elt_at_index (gm->tunnels, t_idx);
+
+ /* remove the next-hop as an ingress lookup key */
+ gre_teib_mk_key (t, ne, &key);
+ gre_tunnel_db_remove (t, &key);
+
+ nh = teib_entry_get_peer (ne);
+
+ /* make all the adjacencies incomplete */
+ adj_nbr_walk_nh (
+ teib_entry_get_sw_if_index (ne),
+ (AF_IP4 == ip_addr_version (nh) ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6),
+ &ip_addr_46 (nh), mgre_mk_incomplete_walk, t);
+}
+
+static walk_rc_t
+gre_tunnel_delete_teib_walk (index_t nei, void *ctx)
+{
+ gre_tunnel_t *t = ctx;
+ gre_tunnel_key_t key;
+
+ gre_teib_mk_key (t, teib_entry_get (nei), &key);
+ gre_tunnel_db_remove (t, &key);
+
+ return (WALK_CONTINUE);
+}
+
+static walk_rc_t
+gre_tunnel_add_teib_walk (index_t nei, void *ctx)
+{
+ gre_tunnel_t *t = ctx;
+ gre_tunnel_key_t key = {};
+
+ gre_teib_mk_key (t, teib_entry_get (nei), &key);
+ gre_tunnel_db_add (t, &key);
+
+ return (WALK_CONTINUE);
+}
+
+static int
+vnet_gre_tunnel_add (vnet_gre_tunnel_add_del_args_t *a, u32 outer_fib_index,
+ u32 *sw_if_indexp)
+{
+ gre_main_t *gm = &gre_main;
+ vnet_main_t *vnm = gm->vnet_main;
+ gre_tunnel_t *t;
+ vnet_hw_interface_t *hi;
+ u32 hw_if_index, sw_if_index;
+ u8 is_ipv6 = a->is_ipv6;
+ gre_tunnel_key_t key;
+
+ t = gre_tunnel_db_find (a, outer_fib_index, &key);
+ if (NULL != t)
+ return VNET_API_ERROR_IF_ALREADY_EXISTS;
+
+ pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES);
+ clib_memset (t, 0, sizeof (*t));
+
+ /* Reconcile the real dev_instance and a possible requested instance */
+ u32 t_idx = t - gm->tunnels; /* tunnel index (or instance) */
+ u32 u_idx = a->instance; /* user specified instance */
+ if (u_idx == ~0)
+ u_idx = t_idx;
+ if (hash_get (gm->instance_used, u_idx))
+ {
+ pool_put (gm->tunnels, t);
+ return VNET_API_ERROR_INSTANCE_IN_USE;
+ }
+ hash_set (gm->instance_used, u_idx, 1);
+
+ t->dev_instance = t_idx; /* actual */
+ t->user_instance = u_idx; /* name */
+
+ t->type = a->type;
+ t->mode = a->mode;
+ t->flags = a->flags;
+ if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
+ t->session_id = a->session_id;
+
+ if (t->type == GRE_TUNNEL_TYPE_L3)
+ {
+ if (t->mode == TUNNEL_MODE_P2P)
+ hw_if_index =
+ vnet_register_interface (vnm, gre_device_class.index, t_idx,
+ gre_hw_interface_class.index, t_idx);
+ else
+ hw_if_index =
+ vnet_register_interface (vnm, gre_device_class.index, t_idx,
+ mgre_hw_interface_class.index, t_idx);
+ }
+ else
+ {
+ vnet_eth_interface_registration_t eir = {};
+
+ /* Default MAC address (d00b:eed0:0000 + sw_if_index) */
+ u8 address[6] = {
+ 0xd0, 0x0b, 0xee, 0xd0, (u8) (t_idx >> 8), (u8) t_idx
+ };
+
+ eir.dev_class_index = gre_device_class.index;
+ eir.dev_instance = t_idx;
+ eir.address = address;
+ hw_if_index = vnet_eth_register_interface (vnm, &eir);
+ }
+
+ /* Set GRE tunnel interface output node (not used for L3 payload) */
+ if (GRE_TUNNEL_TYPE_ERSPAN == t->type)
+ vnet_set_interface_output_node (vnm, hw_if_index,
+ gre_erspan_encap_node.index);
+ else
+ vnet_set_interface_output_node (vnm, hw_if_index,
+ gre_teb_encap_node.index);
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ sw_if_index = hi->sw_if_index;
+
+ t->hw_if_index = hw_if_index;
+ t->outer_fib_index = outer_fib_index;
+ t->sw_if_index = sw_if_index;
+ t->l2_adj_index = ADJ_INDEX_INVALID;
+
+ vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0);
+ gm->tunnel_index_by_sw_if_index[sw_if_index] = t_idx;
+
+ if (!is_ipv6)
+ {
+ hi->frame_overhead = sizeof (gre_header_t) + sizeof (ip4_header_t);
+ hi->min_frame_size = hi->frame_overhead + 64;
+ }
+ else
+ {
+ hi->frame_overhead = sizeof (gre_header_t) + sizeof (ip6_header_t);
+ hi->min_frame_size = hi->frame_overhead + 64;
+ }
+
+ /* Standard default gre MTU. */
+ vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
+
+ /*
+ * source the FIB entry for the tunnel's destination
+ * and become a child thereof. The tunnel will then get poked
+ * when the forwarding for the entry updates, and the tunnel can
+ * re-stack accordingly
+ */
+
+ clib_memcpy (&t->tunnel_src, &a->src, sizeof (t->tunnel_src));
+ t->tunnel_dst.fp_len = !is_ipv6 ? 32 : 128;
+ t->tunnel_dst.fp_proto = !is_ipv6 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
+ t->tunnel_dst.fp_addr = a->dst;
+
+ gre_tunnel_db_add (t, &key);
+
+ if (t->mode == TUNNEL_MODE_MP)
+ teib_walk_itf (t->sw_if_index, gre_tunnel_add_teib_walk, t);
+
+ if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
+ {
+ gre_sn_key_t skey;
+ gre_sn_t *gre_sn;
+
+ gre_mk_sn_key (t, &skey);
+ gre_sn = (gre_sn_t *) hash_get_mem (gm->seq_num_by_key, &skey);
+ if (gre_sn != NULL)
+ {
+ gre_sn->ref_count++;
+ t->gre_sn = gre_sn;
+ }
+ else
+ {
+ gre_sn = clib_mem_alloc (sizeof (gre_sn_t));
+ gre_sn->seq_num = 0;
+ gre_sn->ref_count = 1;
+ t->gre_sn = gre_sn;
+ hash_set_mem_alloc (&gm->seq_num_by_key, &skey, (uword) gre_sn);
+ }
+ }
+
+ if (t->type != GRE_TUNNEL_TYPE_L3)
+ {
+ t->l2_adj_index = adj_nbr_add_or_lock (
+ t->tunnel_dst.fp_proto, VNET_LINK_ETHERNET, &zero_addr, sw_if_index);
+ vnet_set_interface_l3_output_node (gm->vlib_main, sw_if_index,
+ (u8 *) "tunnel-output-no-count");
+ gre_update_adj (vnm, t->sw_if_index, t->l2_adj_index);
+ }
+ else
+ {
+ vnet_set_interface_l3_output_node (gm->vlib_main, sw_if_index,
+ (u8 *) "tunnel-output");
+ }
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
+
+ /* register gre46-input nodes */
+ ip4_register_protocol (IP_PROTOCOL_GRE, gre4_input_node.index);
+ ip6_register_protocol (IP_PROTOCOL_GRE, gre6_input_node.index);
+
+ return 0;
+}
+
+static int
+vnet_gre_tunnel_delete (vnet_gre_tunnel_add_del_args_t *a, u32 outer_fib_index,
+ u32 *sw_if_indexp)
+{
+ gre_main_t *gm = &gre_main;
+ vnet_main_t *vnm = gm->vnet_main;
+ gre_tunnel_t *t;
+ gre_tunnel_key_t key;
+ u32 sw_if_index;
+
+ t = gre_tunnel_db_find (a, outer_fib_index, &key);
+ if (NULL == t)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ if (t->mode == TUNNEL_MODE_MP)
+ teib_walk_itf (t->sw_if_index, gre_tunnel_delete_teib_walk, t);
+
+ sw_if_index = t->sw_if_index;
+ vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */);
+
+ /* make sure tunnel is removed from l2 bd or xconnect */
+ set_int_l2_mode (gm->vlib_main, vnm, MODE_L3, sw_if_index, 0,
+ L2_BD_PORT_TYPE_NORMAL, 0, 0);
+ gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
+
+ if (t->type == GRE_TUNNEL_TYPE_L3)
+ vnet_delete_hw_interface (vnm, t->hw_if_index);
+ else
+ ethernet_delete_interface (vnm, t->hw_if_index);
+
+ if (t->l2_adj_index != ADJ_INDEX_INVALID)
+ {
+ adj_midchain_delegate_unstack (t->l2_adj_index);
+ adj_unlock (t->l2_adj_index);
+ }
+
+ ASSERT ((t->type != GRE_TUNNEL_TYPE_ERSPAN) || (t->gre_sn != NULL));
+ if ((t->type == GRE_TUNNEL_TYPE_ERSPAN) && (t->gre_sn->ref_count-- == 1))
+ {
+ gre_sn_key_t skey;
+ gre_mk_sn_key (t, &skey);
+ hash_unset_mem_free (&gm->seq_num_by_key, &skey);
+ clib_mem_free (t->gre_sn);
+ }
+
+ vnet_reset_interface_l3_output_node (gm->vlib_main, sw_if_index);
+ hash_unset (gm->instance_used, t->user_instance);
+ gre_tunnel_db_remove (t, &key);
+ pool_put (gm->tunnels, t);
+
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
+
+ return 0;
+}
+
+int
+vnet_gre_tunnel_add_del (vnet_gre_tunnel_add_del_args_t *a, u32 *sw_if_indexp)
+{
+ u32 outer_fib_index;
+
+ outer_fib_index = fib_table_find (
+ (a->is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4), a->outer_table_id);
+
+ if (~0 == outer_fib_index)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ if (a->session_id > GTK_SESSION_ID_MAX)
+ return VNET_API_ERROR_INVALID_SESSION_ID;
+
+ if (a->mode == TUNNEL_MODE_MP && !ip46_address_is_zero (&a->dst))
+ return (VNET_API_ERROR_INVALID_DST_ADDRESS);
+
+ if (a->is_add)
+ return (vnet_gre_tunnel_add (a, outer_fib_index, sw_if_indexp));
+ else
+ return (vnet_gre_tunnel_delete (a, outer_fib_index, sw_if_indexp));
+}
+
+clib_error_t *
+gre_interface_admin_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+ gre_main_t *gm = &gre_main;
+ vnet_hw_interface_t *hi;
+ gre_tunnel_t *t;
+ u32 ti;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+
+ if (NULL == gm->tunnel_index_by_sw_if_index ||
+ hi->sw_if_index >= vec_len (gm->tunnel_index_by_sw_if_index))
+ return (NULL);
+
+ ti = gm->tunnel_index_by_sw_if_index[hi->sw_if_index];
+
+ if (~0 == ti)
+ /* not one of ours */
+ return (NULL);
+
+ t = pool_elt_at_index (gm->tunnels, ti);
+
+ if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
+ vnet_hw_interface_set_flags (vnm, hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ else
+ vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */);
+
+ gre_tunnel_restack (t);
+
+ return /* no error */ 0;
+}
+
+static clib_error_t *
+create_gre_tunnel_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_gre_tunnel_add_del_args_t _a, *a = &_a;
+ ip46_address_t src = ip46_address_initializer,
+ dst = ip46_address_initializer;
+ u32 instance = ~0;
+ u32 outer_table_id = 0;
+ gre_tunnel_type_t t_type = GRE_TUNNEL_TYPE_L3;
+ tunnel_mode_t t_mode = TUNNEL_MODE_P2P;
+ tunnel_encap_decap_flags_t flags = TUNNEL_ENCAP_DECAP_FLAG_NONE;
+ u32 session_id = 0;
+ int rv;
+ u8 is_add = 1;
+ u32 sw_if_index;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "instance %d", &instance))
+ ;
+ else if (unformat (line_input, "src %U", unformat_ip46_address, &src))
+ ;
+ else if (unformat (line_input, "dst %U", unformat_ip46_address, &dst))
+ ;
+ else if (unformat (line_input, "outer-table-id %d", &outer_table_id))
+ ;
+ else if (unformat (line_input, "multipoint"))
+ t_mode = TUNNEL_MODE_MP;
+ else if (unformat (line_input, "teb"))
+ t_type = GRE_TUNNEL_TYPE_TEB;
+ else if (unformat (line_input, "erspan %d", &session_id))
+ t_type = GRE_TUNNEL_TYPE_ERSPAN;
+ else if (unformat (line_input, "flags %U",
+ unformat_tunnel_encap_decap_flags, &flags))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (ip46_address_is_equal (&src, &dst))
+ {
+ error = clib_error_return (0, "src and dst are identical");
+ goto done;
+ }
+
+ if (t_mode != TUNNEL_MODE_MP && ip46_address_is_zero (&dst))
+ {
+ error = clib_error_return (0, "destination address not specified");
+ goto done;
+ }
+
+ if (ip46_address_is_zero (&src))
+ {
+ error = clib_error_return (0, "source address not specified");
+ goto done;
+ }
+
+ if (ip46_address_is_ip4 (&src) != ip46_address_is_ip4 (&dst))
+ {
+ error = clib_error_return (0, "src and dst address must be the same AF");
+ goto done;
+ }
+
+ clib_memset (a, 0, sizeof (*a));
+ a->is_add = is_add;
+ a->outer_table_id = outer_table_id;
+ a->type = t_type;
+ a->mode = t_mode;
+ a->session_id = session_id;
+ a->is_ipv6 = !ip46_address_is_ip4 (&src);
+ a->instance = instance;
+ a->flags = flags;
+ clib_memcpy (&a->src, &src, sizeof (a->src));
+ clib_memcpy (&a->dst, &dst, sizeof (a->dst));
+
+ rv = vnet_gre_tunnel_add_del (a, &sw_if_index);
+
+ switch (rv)
+ {
+ case 0:
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
+ vnet_get_main (), sw_if_index);
+ break;
+ case VNET_API_ERROR_IF_ALREADY_EXISTS:
+ error = clib_error_return (0, "GRE tunnel already exists...");
+ goto done;
+ case VNET_API_ERROR_NO_SUCH_FIB:
+ error = clib_error_return (0, "outer table ID %d doesn't exist\n",
+ outer_table_id);
+ goto done;
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "GRE tunnel doesn't exist");
+ goto done;
+ case VNET_API_ERROR_INVALID_SESSION_ID:
+ error =
+ clib_error_return (0, "session ID %d out of range\n", session_id);
+ goto done;
+ case VNET_API_ERROR_INSTANCE_IN_USE:
+ error = clib_error_return (0, "Instance is in use");
+ goto done;
+ default:
+ error = clib_error_return (0, "vnet_gre_tunnel_add_del returned %d", rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (create_gre_tunnel_command, static) = {
+ .path = "create gre tunnel",
+ .short_help = "create gre tunnel src <addr> dst <addr> [instance <n>] "
+ "[outer-fib-id <fib>] [teb | erspan <session-id>] [del] "
+ "[multipoint]",
+ .function = create_gre_tunnel_command_fn,
+};
+
+static clib_error_t *
+show_gre_tunnel_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ gre_main_t *gm = &gre_main;
+ gre_tunnel_t *t;
+ u32 ti = ~0;
+
+ if (pool_elts (gm->tunnels) == 0)
+ vlib_cli_output (vm, "No GRE tunnels configured...");
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &ti))
+ ;
+ else
+ break;
+ }
+
+ if (~0 == ti)
+ {
+ pool_foreach (t, gm->tunnels)
+ {
+ vlib_cli_output (vm, "%U", format_gre_tunnel, t);
+ }
+ }
+ else
+ {
+ t = pool_elt_at_index (gm->tunnels, ti);
+
+ vlib_cli_output (vm, "%U", format_gre_tunnel, t);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_gre_tunnel_command, static) = {
+ .path = "show gre tunnel",
+ .function = show_gre_tunnel_command_fn,
+};
+
+const static teib_vft_t gre_teib_vft = {
+ .nv_added = gre_teib_entry_added,
+ .nv_deleted = gre_teib_entry_deleted,
+};
+
+/* force inclusion from application's main.c */
+clib_error_t *
+gre_interface_init (vlib_main_t *vm)
+{
+ teib_register (&gre_teib_vft);
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (gre_interface_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/gre/node.c b/src/plugins/gre/node.c
new file mode 100644
index 00000000000..5235888cc6f
--- /dev/null
+++ b/src/plugins/gre/node.c
@@ -0,0 +1,574 @@
+/*
+ * node.c: gre packet processing
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <gre/gre.h>
+#include <vnet/mpls/mpls.h>
+#include <vppinfra/sparse_vec.h>
+
+#define foreach_gre_input_next \
+ _ (PUNT, "error-punt") \
+ _ (DROP, "error-drop") \
+ _ (ETHERNET_INPUT, "ethernet-input") \
+ _ (IP4_INPUT, "ip4-input") \
+ _ (IP6_INPUT, "ip6-input") \
+ _ (MPLS_INPUT, "mpls-input")
+
+typedef enum
+{
+#define _(s, n) GRE_INPUT_NEXT_##s,
+ foreach_gre_input_next
+#undef _
+ GRE_INPUT_N_NEXT,
+} gre_input_next_t;
+
+typedef struct
+{
+ u32 tunnel_id;
+ u32 length;
+ ip46_address_t src;
+ ip46_address_t dst;
+} gre_rx_trace_t;
+
+extern u8 *format_gre_rx_trace (u8 *s, va_list *args);
+
+#ifndef CLIB_MARCH_VARIANT
+u8 *
+format_gre_rx_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ gre_rx_trace_t *t = va_arg (*args, gre_rx_trace_t *);
+
+ s = format (s, "GRE: tunnel %d len %d src %U dst %U", t->tunnel_id,
+ clib_net_to_host_u16 (t->length), format_ip46_address, &t->src,
+ IP46_TYPE_ANY, format_ip46_address, &t->dst, IP46_TYPE_ANY);
+ return s;
+}
+#endif /* CLIB_MARCH_VARIANT */
+
+typedef struct
+{
+ /* Sparse vector mapping gre protocol in network byte order
+ to next index. */
+ u16 *next_by_protocol;
+} gre_input_runtime_t;
+
+always_inline void
+gre_trace (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b,
+ u32 tun_sw_if_index, const ip6_header_t *ip6,
+ const ip4_header_t *ip4, int is_ipv6)
+{
+ gre_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr));
+ tr->tunnel_id = tun_sw_if_index;
+ if (is_ipv6)
+ {
+ tr->length = ip6->payload_length;
+ tr->src.ip6.as_u64[0] = ip6->src_address.as_u64[0];
+ tr->src.ip6.as_u64[1] = ip6->src_address.as_u64[1];
+ tr->dst.ip6.as_u64[0] = ip6->dst_address.as_u64[0];
+ tr->dst.ip6.as_u64[1] = ip6->dst_address.as_u64[1];
+ }
+ else
+ {
+ tr->length = ip4->length;
+ tr->src.as_u64[0] = tr->src.as_u64[1] = 0;
+ tr->dst.as_u64[0] = tr->dst.as_u64[1] = 0;
+ tr->src.ip4.as_u32 = ip4->src_address.as_u32;
+ tr->dst.ip4.as_u32 = ip4->dst_address.as_u32;
+ }
+}
+
+always_inline void
+gre_tunnel_get (const gre_main_t *gm, vlib_node_runtime_t *node,
+ vlib_buffer_t *b, u16 *next, const gre_tunnel_key_t *key,
+ gre_tunnel_key_t *cached_key, u32 *tun_sw_if_index,
+ u32 *cached_tun_sw_if_index, int is_ipv6)
+{
+ const uword *p;
+ p = is_ipv6 ? hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6) :
+ hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4);
+ if (PREDICT_FALSE (!p))
+ {
+ *next = GRE_INPUT_NEXT_DROP;
+ b->error = node->errors[GRE_ERROR_NO_SUCH_TUNNEL];
+ *tun_sw_if_index = ~0;
+ }
+ else
+ {
+ const gre_tunnel_t *tun;
+ tun = pool_elt_at_index (gm->tunnels, *p);
+ *cached_tun_sw_if_index = *tun_sw_if_index = tun->sw_if_index;
+ if (is_ipv6)
+ cached_key->gtk_v6 = key->gtk_v6;
+ else
+ cached_key->gtk_v4 = key->gtk_v4;
+ }
+}
+
+always_inline uword
+gre_input (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame,
+ const int is_ipv6)
+{
+ gre_main_t *gm = &gre_main;
+ u32 *from, n_left_from;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
+ u16 cached_protocol = ~0;
+ u32 cached_next_index = SPARSE_VEC_INVALID_INDEX;
+ u32 cached_tun_sw_if_index = ~0;
+ gre_tunnel_key_t cached_key;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+
+ if (is_ipv6)
+ clib_memset (&cached_key.gtk_v6, 0xff, sizeof (cached_key.gtk_v6));
+ else
+ clib_memset (&cached_key.gtk_v4, 0xff, sizeof (cached_key.gtk_v4));
+
+ while (n_left_from >= 2)
+ {
+ const ip6_header_t *ip6[2];
+ const ip4_header_t *ip4[2];
+ const gre_header_t *gre[2];
+ u32 nidx[2];
+ next_info_t ni[2];
+ u8 type[2];
+ u16 version[2];
+ u32 len[2];
+ gre_tunnel_key_t key[2];
+ u8 matched[2];
+ u32 tun_sw_if_index[2];
+
+ if (PREDICT_TRUE (n_left_from >= 6))
+ {
+ vlib_prefetch_buffer_data (b[2], LOAD);
+ vlib_prefetch_buffer_data (b[3], LOAD);
+ vlib_prefetch_buffer_header (b[4], STORE);
+ vlib_prefetch_buffer_header (b[5], STORE);
+ }
+
+ if (is_ipv6)
+ {
+ /* ip6_local hands us the ip header, not the gre header */
+ ip6[0] = vlib_buffer_get_current (b[0]);
+ ip6[1] = vlib_buffer_get_current (b[1]);
+ gre[0] = (void *) (ip6[0] + 1);
+ gre[1] = (void *) (ip6[1] + 1);
+ vlib_buffer_advance (b[0], sizeof (*ip6[0]) + sizeof (*gre[0]));
+ vlib_buffer_advance (b[1], sizeof (*ip6[0]) + sizeof (*gre[0]));
+ }
+ else
+ {
+ /* ip4_local hands us the ip header, not the gre header */
+ ip4[0] = vlib_buffer_get_current (b[0]);
+ ip4[1] = vlib_buffer_get_current (b[1]);
+ gre[0] = (void *) (ip4[0] + 1);
+ gre[1] = (void *) (ip4[1] + 1);
+ vlib_buffer_advance (b[0], sizeof (*ip4[0]) + sizeof (*gre[0]));
+ vlib_buffer_advance (b[1], sizeof (*ip4[0]) + sizeof (*gre[0]));
+ }
+
+ if (PREDICT_TRUE (cached_protocol == gre[0]->protocol))
+ {
+ nidx[0] = cached_next_index;
+ }
+ else
+ {
+ cached_next_index = nidx[0] =
+ sparse_vec_index (gm->next_by_protocol, gre[0]->protocol);
+ cached_protocol = gre[0]->protocol;
+ }
+ if (PREDICT_TRUE (cached_protocol == gre[1]->protocol))
+ {
+ nidx[1] = cached_next_index;
+ }
+ else
+ {
+ cached_next_index = nidx[1] =
+ sparse_vec_index (gm->next_by_protocol, gre[1]->protocol);
+ cached_protocol = gre[1]->protocol;
+ }
+
+ ni[0] = vec_elt (gm->next_by_protocol, nidx[0]);
+ ni[1] = vec_elt (gm->next_by_protocol, nidx[1]);
+ next[0] = ni[0].next_index;
+ next[1] = ni[1].next_index;
+ type[0] = ni[0].tunnel_type;
+ type[1] = ni[1].tunnel_type;
+
+ b[0]->error = nidx[0] == SPARSE_VEC_INVALID_INDEX ?
+ node->errors[GRE_ERROR_UNKNOWN_PROTOCOL] :
+ node->errors[GRE_ERROR_NONE];
+ b[1]->error = nidx[1] == SPARSE_VEC_INVALID_INDEX ?
+ node->errors[GRE_ERROR_UNKNOWN_PROTOCOL] :
+ node->errors[GRE_ERROR_NONE];
+
+ version[0] = clib_net_to_host_u16 (gre[0]->flags_and_version);
+ version[1] = clib_net_to_host_u16 (gre[1]->flags_and_version);
+ version[0] &= GRE_VERSION_MASK;
+ version[1] &= GRE_VERSION_MASK;
+
+ b[0]->error =
+ version[0] ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[0]->error;
+ next[0] = version[0] ? GRE_INPUT_NEXT_DROP : next[0];
+ b[1]->error =
+ version[1] ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[1]->error;
+ next[1] = version[1] ? GRE_INPUT_NEXT_DROP : next[1];
+
+ len[0] = vlib_buffer_length_in_chain (vm, b[0]);
+ len[1] = vlib_buffer_length_in_chain (vm, b[1]);
+
+ /* always search for P2P types in the DP */
+ if (is_ipv6)
+ {
+ gre_mk_key6 (&ip6[0]->dst_address, &ip6[0]->src_address,
+ vnet_buffer (b[0])->ip.fib_index, type[0],
+ TUNNEL_MODE_P2P, 0, &key[0].gtk_v6);
+ gre_mk_key6 (&ip6[1]->dst_address, &ip6[1]->src_address,
+ vnet_buffer (b[1])->ip.fib_index, type[1],
+ TUNNEL_MODE_P2P, 0, &key[1].gtk_v6);
+ matched[0] = gre_match_key6 (&cached_key.gtk_v6, &key[0].gtk_v6);
+ matched[1] = gre_match_key6 (&cached_key.gtk_v6, &key[1].gtk_v6);
+ }
+ else
+ {
+ gre_mk_key4 (ip4[0]->dst_address, ip4[0]->src_address,
+ vnet_buffer (b[0])->ip.fib_index, type[0],
+ TUNNEL_MODE_P2P, 0, &key[0].gtk_v4);
+ gre_mk_key4 (ip4[1]->dst_address, ip4[1]->src_address,
+ vnet_buffer (b[1])->ip.fib_index, type[1],
+ TUNNEL_MODE_P2P, 0, &key[1].gtk_v4);
+ matched[0] = gre_match_key4 (&cached_key.gtk_v4, &key[0].gtk_v4);
+ matched[1] = gre_match_key4 (&cached_key.gtk_v4, &key[1].gtk_v4);
+ }
+
+ tun_sw_if_index[0] = cached_tun_sw_if_index;
+ tun_sw_if_index[1] = cached_tun_sw_if_index;
+ if (PREDICT_FALSE (!matched[0]))
+ gre_tunnel_get (gm, node, b[0], &next[0], &key[0], &cached_key,
+ &tun_sw_if_index[0], &cached_tun_sw_if_index, is_ipv6);
+ if (PREDICT_FALSE (!matched[1]))
+ gre_tunnel_get (gm, node, b[1], &next[1], &key[1], &cached_key,
+ &tun_sw_if_index[1], &cached_tun_sw_if_index, is_ipv6);
+
+ if (PREDICT_TRUE (next[0] > GRE_INPUT_NEXT_DROP))
+ {
+ vlib_increment_combined_counter (
+ &gm->vnet_main->interface_main
+ .combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX],
+ vm->thread_index, tun_sw_if_index[0], 1 /* packets */,
+ len[0] /* bytes */);
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX] = tun_sw_if_index[0];
+ }
+ if (PREDICT_TRUE (next[1] > GRE_INPUT_NEXT_DROP))
+ {
+ vlib_increment_combined_counter (
+ &gm->vnet_main->interface_main
+ .combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX],
+ vm->thread_index, tun_sw_if_index[1], 1 /* packets */,
+ len[1] /* bytes */);
+ vnet_buffer (b[1])->sw_if_index[VLIB_RX] = tun_sw_if_index[1];
+ }
+
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] = (u32) ~0;
+ vnet_buffer (b[1])->sw_if_index[VLIB_TX] = (u32) ~0;
+
+ if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+ gre_trace (vm, node, b[0], tun_sw_if_index[0], ip6[0], ip4[0],
+ is_ipv6);
+ if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
+ gre_trace (vm, node, b[1], tun_sw_if_index[1], ip6[1], ip4[1],
+ is_ipv6);
+
+ b += 2;
+ next += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from >= 1)
+ {
+ const ip6_header_t *ip6[1];
+ const ip4_header_t *ip4[1];
+ const gre_header_t *gre[1];
+ u32 nidx[1];
+ next_info_t ni[1];
+ u8 type[1];
+ u16 version[1];
+ u32 len[1];
+ gre_tunnel_key_t key[1];
+ u8 matched[1];
+ u32 tun_sw_if_index[1];
+
+ if (PREDICT_TRUE (n_left_from >= 3))
+ {
+ vlib_prefetch_buffer_data (b[1], LOAD);
+ vlib_prefetch_buffer_header (b[2], STORE);
+ }
+
+ if (is_ipv6)
+ {
+ /* ip6_local hands us the ip header, not the gre header */
+ ip6[0] = vlib_buffer_get_current (b[0]);
+ gre[0] = (void *) (ip6[0] + 1);
+ vlib_buffer_advance (b[0], sizeof (*ip6[0]) + sizeof (*gre[0]));
+ }
+ else
+ {
+ /* ip4_local hands us the ip header, not the gre header */
+ ip4[0] = vlib_buffer_get_current (b[0]);
+ gre[0] = (void *) (ip4[0] + 1);
+ vlib_buffer_advance (b[0], sizeof (*ip4[0]) + sizeof (*gre[0]));
+ }
+
+ if (PREDICT_TRUE (cached_protocol == gre[0]->protocol))
+ {
+ nidx[0] = cached_next_index;
+ }
+ else
+ {
+ cached_next_index = nidx[0] =
+ sparse_vec_index (gm->next_by_protocol, gre[0]->protocol);
+ cached_protocol = gre[0]->protocol;
+ }
+
+ ni[0] = vec_elt (gm->next_by_protocol, nidx[0]);
+ next[0] = ni[0].next_index;
+ type[0] = ni[0].tunnel_type;
+
+ b[0]->error = nidx[0] == SPARSE_VEC_INVALID_INDEX ?
+ node->errors[GRE_ERROR_UNKNOWN_PROTOCOL] :
+ node->errors[GRE_ERROR_NONE];
+
+ version[0] = clib_net_to_host_u16 (gre[0]->flags_and_version);
+ version[0] &= GRE_VERSION_MASK;
+
+ b[0]->error =
+ version[0] ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[0]->error;
+ next[0] = version[0] ? GRE_INPUT_NEXT_DROP : next[0];
+
+ len[0] = vlib_buffer_length_in_chain (vm, b[0]);
+
+ if (is_ipv6)
+ {
+ gre_mk_key6 (&ip6[0]->dst_address, &ip6[0]->src_address,
+ vnet_buffer (b[0])->ip.fib_index, type[0],
+ TUNNEL_MODE_P2P, 0, &key[0].gtk_v6);
+ matched[0] = gre_match_key6 (&cached_key.gtk_v6, &key[0].gtk_v6);
+ }
+ else
+ {
+ gre_mk_key4 (ip4[0]->dst_address, ip4[0]->src_address,
+ vnet_buffer (b[0])->ip.fib_index, type[0],
+ TUNNEL_MODE_P2P, 0, &key[0].gtk_v4);
+ matched[0] = gre_match_key4 (&cached_key.gtk_v4, &key[0].gtk_v4);
+ }
+
+ tun_sw_if_index[0] = cached_tun_sw_if_index;
+ if (PREDICT_FALSE (!matched[0]))
+ gre_tunnel_get (gm, node, b[0], &next[0], &key[0], &cached_key,
+ &tun_sw_if_index[0], &cached_tun_sw_if_index, is_ipv6);
+
+ if (PREDICT_TRUE (next[0] > GRE_INPUT_NEXT_DROP))
+ {
+ vlib_increment_combined_counter (
+ &gm->vnet_main->interface_main
+ .combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX],
+ vm->thread_index, tun_sw_if_index[0], 1 /* packets */,
+ len[0] /* bytes */);
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX] = tun_sw_if_index[0];
+ }
+
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] = (u32) ~0;
+
+ if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+ gre_trace (vm, node, b[0], tun_sw_if_index[0], ip6[0], ip4[0],
+ is_ipv6);
+
+ b += 1;
+ next += 1;
+ n_left_from -= 1;
+ }
+
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+
+ vlib_node_increment_counter (
+ vm, is_ipv6 ? gre6_input_node.index : gre4_input_node.index,
+ GRE_ERROR_PKTS_DECAP, n_left_from);
+
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (gre4_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return gre_input (vm, node, from_frame, /* is_ip6 */ 0);
+}
+
+VLIB_NODE_FN (gre6_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return gre_input (vm, node, from_frame, /* is_ip6 */ 1);
+}
+
+static char *gre_error_strings[] = {
+#define gre_error(n, s) s,
+#include "error.def"
+#undef gre_error
+};
+
+VLIB_REGISTER_NODE (gre4_input_node) = {
+ .name = "gre4-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = GRE_N_ERROR,
+ .error_strings = gre_error_strings,
+
+ .n_next_nodes = GRE_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [GRE_INPUT_NEXT_##s] = n,
+ foreach_gre_input_next
+#undef _
+ },
+
+ .format_buffer = format_gre_header_with_length,
+ .format_trace = format_gre_rx_trace,
+ .unformat_buffer = unformat_gre_header,
+};
+
+VLIB_REGISTER_NODE (gre6_input_node) = {
+ .name = "gre6-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .runtime_data_bytes = sizeof (gre_input_runtime_t),
+
+ .n_errors = GRE_N_ERROR,
+ .error_strings = gre_error_strings,
+
+ .n_next_nodes = GRE_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [GRE_INPUT_NEXT_##s] = n,
+ foreach_gre_input_next
+#undef _
+ },
+
+ .format_buffer = format_gre_header_with_length,
+ .format_trace = format_gre_rx_trace,
+ .unformat_buffer = unformat_gre_header,
+};
+
+#ifndef CLIB_MARCH_VARIANT
+void
+gre_register_input_protocol (vlib_main_t *vm, gre_protocol_t protocol,
+ u32 node_index, gre_tunnel_type_t tunnel_type)
+{
+ gre_main_t *em = &gre_main;
+ gre_protocol_info_t *pi;
+ next_info_t *n;
+ u32 i;
+
+ {
+ clib_error_t *error = vlib_call_init_function (vm, gre_input_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ pi = gre_get_protocol_info (em, protocol);
+ pi->node_index = node_index;
+ pi->tunnel_type = tunnel_type;
+ pi->next_index = vlib_node_add_next (vm, gre4_input_node.index, node_index);
+ i = vlib_node_add_next (vm, gre6_input_node.index, node_index);
+ ASSERT (i == pi->next_index);
+
+ /* Setup gre protocol -> next index sparse vector mapping. */
+ n = sparse_vec_validate (em->next_by_protocol,
+ clib_host_to_net_u16 (protocol));
+ n->next_index = pi->next_index;
+ n->tunnel_type = tunnel_type;
+}
+
+static void
+gre_setup_node (vlib_main_t *vm, u32 node_index)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ pg_node_t *pn = pg_get_node (node_index);
+
+ n->format_buffer = format_gre_header_with_length;
+ n->unformat_buffer = unformat_gre_header;
+ pn->unformat_edit = unformat_pg_gre_header;
+}
+
+static clib_error_t *
+gre_input_init (vlib_main_t *vm)
+{
+ gre_main_t *gm = &gre_main;
+ vlib_node_t *ethernet_input, *ip4_input, *ip6_input, *mpls_unicast_input;
+
+ {
+ clib_error_t *error;
+ error = vlib_call_init_function (vm, gre_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ gre_setup_node (vm, gre4_input_node.index);
+ gre_setup_node (vm, gre6_input_node.index);
+
+ gm->next_by_protocol =
+ sparse_vec_new (/* elt bytes */ sizeof (gm->next_by_protocol[0]),
+ /* bits in index */ BITS (((gre_header_t *) 0)->protocol));
+
+ /* These could be moved to the supported protocol input node defn's */
+ ethernet_input = vlib_get_node_by_name (vm, (u8 *) "ethernet-input");
+ ASSERT (ethernet_input);
+ ip4_input = vlib_get_node_by_name (vm, (u8 *) "ip4-input");
+ ASSERT (ip4_input);
+ ip6_input = vlib_get_node_by_name (vm, (u8 *) "ip6-input");
+ ASSERT (ip6_input);
+ mpls_unicast_input = vlib_get_node_by_name (vm, (u8 *) "mpls-input");
+ ASSERT (mpls_unicast_input);
+
+ gre_register_input_protocol (vm, GRE_PROTOCOL_teb, ethernet_input->index,
+ GRE_TUNNEL_TYPE_TEB);
+
+ gre_register_input_protocol (vm, GRE_PROTOCOL_ip4, ip4_input->index,
+ GRE_TUNNEL_TYPE_L3);
+
+ gre_register_input_protocol (vm, GRE_PROTOCOL_ip6, ip6_input->index,
+ GRE_TUNNEL_TYPE_L3);
+
+ gre_register_input_protocol (vm, GRE_PROTOCOL_mpls_unicast,
+ mpls_unicast_input->index, GRE_TUNNEL_TYPE_L3);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (gre_input_init);
+
+#endif /* CLIB_MARCH_VARIANT */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/gre/pg.c b/src/plugins/gre/pg.c
new file mode 100644
index 00000000000..91c9e487899
--- /dev/null
+++ b/src/plugins/gre/pg.c
@@ -0,0 +1,84 @@
+/*
+ * hdlc_pg.c: packet generator gre interface
+ *
+ * Copyright (c) 2012 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <gre/gre.h>
+
+typedef struct
+{
+ pg_edit_t flags_and_version;
+ pg_edit_t protocol;
+} pg_gre_header_t;
+
+static inline void
+pg_gre_header_init (pg_gre_header_t *e)
+{
+ pg_edit_init (&e->flags_and_version, gre_header_t, flags_and_version);
+ pg_edit_init (&e->protocol, gre_header_t, protocol);
+}
+
+uword
+unformat_pg_gre_header (unformat_input_t *input, va_list *args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_gre_header_t *h;
+ u32 group_index, error;
+
+ h = pg_create_edit_group (s, sizeof (h[0]), sizeof (gre_header_t),
+ &group_index);
+ pg_gre_header_init (h);
+
+ pg_edit_set_fixed (&h->flags_and_version, 0);
+
+ error = 1;
+ if (!unformat (input, "%U", unformat_pg_edit,
+ unformat_gre_protocol_net_byte_order, &h->protocol))
+ goto done;
+
+ {
+ gre_main_t *pm = &gre_main;
+ gre_protocol_info_t *pi = 0;
+ pg_node_t *pg_node = 0;
+
+ if (h->protocol.type == PG_EDIT_FIXED)
+ {
+ u16 t = *(u16 *) h->protocol.values[PG_EDIT_LO];
+ pi = gre_get_protocol_info (pm, clib_net_to_host_u16 (t));
+ if (pi && pi->node_index != ~0)
+ pg_node = pg_get_node (pi->node_index);
+ }
+
+ if (pg_node && pg_node->unformat_edit &&
+ unformat_user (input, pg_node->unformat_edit, s))
+ ;
+ }
+
+ error = 0;
+done:
+ if (error)
+ pg_free_edit_group (s);
+ return error == 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/gbp/gbp_scanner.h b/src/plugins/gre/plugin.c
index 1133167d927..b92ec0b6dcd 100644
--- a/src/plugins/gbp/gbp_scanner.h
+++ b/src/plugins/gre/plugin.c
@@ -1,5 +1,7 @@
/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
+ * plugin.c: gre
+ *
+ * Copyright (c) 2023 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -13,18 +15,12 @@
* limitations under the License.
*/
-#ifndef __GBP_SCANNER_H__
-#define __GBP_SCANNER_H__
-
#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
-typedef enum gbp_scan_event_t_
-{
- GBP_ENDPOINT_SCAN_START,
- GBP_ENDPOINT_SCAN_STOP,
- GBP_ENDPOINT_SCAN_SET_TIME,
-} gbp_scan_event_t;
-
-extern vlib_node_registration_t gbp_scanner_node;
-
-#endif
+// register a plugin
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Generic Routing Encapsulation (GRE) plugin",
+};
diff --git a/src/plugins/gtpu/gtpu.api b/src/plugins/gtpu/gtpu.api
index ec4933af197..7c5c137a840 100644
--- a/src/plugins/gtpu/gtpu.api
+++ b/src/plugins/gtpu/gtpu.api
@@ -13,10 +13,34 @@
* limitations under the License.
*/
-option version = "2.0.1";
+option version = "2.1.0";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
+enum gtpu_forwarding_type
+{
+ GTPU_API_FORWARDING_NONE = 0,
+ GTPU_API_FORWARDING_BAD_HEADER = 1,
+ GTPU_API_FORWARDING_UNKNOWN_TEID = 2,
+ GTPU_API_FORWARDING_UNKNOWN_TYPE = 4,
+};
+
+enum gtpu_decap_next_type
+{
+ GTPU_API_DECAP_NEXT_DROP = 0,
+ GTPU_API_DECAP_NEXT_L2 = 1,
+ GTPU_API_DECAP_NEXT_IP4 = 2,
+ GTPU_API_DECAP_NEXT_IP6 = 3,
+};
+
+typedef sw_if_counters
+{
+ u64 packets_rx;
+ u64 packets_tx;
+ u64 bytes_rx;
+ u64 bytes_tx;
+};
+
/** \brief Create or delete a GTPU tunnel
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -56,6 +80,53 @@ define gtpu_add_del_tunnel_reply
vl_api_interface_index_t sw_if_index;
};
+/** \brief Create or delete a GTPU tunnel
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param src_address - GTPU tunnel's source address.
+ @param dst_address - GTPU tunnel's destination address.
+ @param mcast_sw_if_index - version, O-bit and C-bit (see nsh_packet.h)
+ @param encap_vrf_id - fib identifier used for outgoing encapsulated packets
+ @param decap_next_index - the index of the next node if success
+ @param teid - Local (rx) Tunnel Endpoint Identifier
+ @param tteid - Remote (tx) Tunnel Endpoint Identifier
+ @param pdu_extension - add PDU session container extension to each packet
+ @param qfi - the QFI to set in the PDU session container, 6 bits only
+*/
+define gtpu_add_del_tunnel_v2
+{
+ u32 client_index;
+ u32 context;
+ bool is_add;
+ vl_api_address_t src_address;
+ vl_api_address_t dst_address;
+ vl_api_interface_index_t mcast_sw_if_index;
+ u32 encap_vrf_id;
+ vl_api_gtpu_decap_next_type_t decap_next_index;
+ u32 teid;
+ u32 tteid;
+ bool pdu_extension;
+ u8 qfi;
+ option vat_help = "src <ip-addr> {dst <ip-addr> | group <mcast-ip-addr> {<intfc> | mcast_sw_if_index <nn>}} teid <nn> [tteid <nn>] [encap-vrf-id <nn>] [decap-next <l2|nn>] [qfi <nn>] [del]";
+ option in_progress;
+};
+
+/** \brief reply for set or delete an GTPU tunnel
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - software index of the interface
+ @param counters - Number of packets/bytes that is sent/received via this tunnel. Inaccurate (with in flight packets), sum for the entire set of per-thread counters. Zero for new tunnels.
+*/
+define gtpu_add_del_tunnel_v2_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+ vl_api_sw_if_counters_t counters;
+ option in_progress;
+};
+
/** \brief Update GTPU tunnel TX TEID
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -112,6 +183,56 @@ define gtpu_tunnel_details
u32 tteid;
};
+
+/** \brief Dump GTPU tunnel
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the interface
+*/
+define gtpu_tunnel_v2_dump
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ option vat_help = "[<intfc> | sw_if_index <nn>]";
+ option in_progress;
+};
+
+/** \brief dump details of an GTPU tunnel
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the interface
+ @param src_address - GTPU tunnel's source address.
+ @param dst_address - GTPU tunnel's destination address.
+ @param mcast_sw_if_index - version, O-bit and C-bit (see nsh_packet.h)
+ @param encap_vrf_id - fib identifier used for outgoing encapsulated packets
+ @param decap_next_index - the index of the next node if success
+ @param teid - Local (rx) Tunnel Endpoint Identifier
+ @param tteid - Remote (tx) Tunnel Endpoint Identifier
+ @param pdu_extension - add PDU session container extension to each packet
+ @param qfi - the QFI to set in the PDU session container, 6 bits only
+ @param is_forwarding - tunnel used for forwarding packets
+ @param forwarding_type - the type of packets forwarded
+ @param counters - Number of packets/bytes that is sent/received via this tunnel. Inaccurate (with in flight packets), sum for the entire set of per-thread counters.
+*/
+define gtpu_tunnel_v2_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ vl_api_address_t src_address;
+ vl_api_address_t dst_address;
+ vl_api_interface_index_t mcast_sw_if_index;
+ u32 encap_vrf_id;
+ vl_api_gtpu_decap_next_type_t decap_next_index;
+ u32 teid;
+ u32 tteid;
+ bool pdu_extension;
+ u8 qfi;
+ bool is_forwarding;
+ vl_api_gtpu_forwarding_type_t forwarding_type;
+ vl_api_sw_if_counters_t counters;
+ option in_progress;
+};
+
/** \brief Interface set gtpu-bypass request
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -146,6 +267,79 @@ autoreply define gtpu_offload_rx
option vat_help = "hw <intfc> rx <tunnel-name> [del]";
};
+/** \brief Set gtpu-forward request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param forwarding_type - forward filter (unknown teid, unknown message type or unknown header)
+ @param dst_address - forward destination address.
+ @param encap_vrf_id - fib identifier used for outgoing packets
+ @param decap_next_index - the index of the next node if success
+*/
+define gtpu_add_del_forward
+{
+ u32 client_index;
+ u32 context;
+ bool is_add;
+ vl_api_address_t dst_address;
+ vl_api_gtpu_forwarding_type_t forwarding_type;
+ u32 encap_vrf_id;
+ vl_api_gtpu_decap_next_type_t decap_next_index;
+ option vat_help = "dst <ip-addr> {bad-header|unknown-teid|unknown-type} [decap-next <l2|nn>] [del]";
+ option in_progress;
+};
+
+/** \brief reply for set or delete GTPU forwarding
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - software index of the interface
+*/
+define gtpu_add_del_forward_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+ option in_progress;
+};
+
+/** \brief Get list of metrics, use for bulk transfer.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index_start - software index of the first interface to return data on.
+ @param capacity - max number of interfaces returned.
+*/
+define gtpu_get_transfer_counts
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index_start;
+ u32 capacity;
+ //option vat_help = "start_index <sw_if_index> count <nn>";
+ option in_progress;
+};
+
+/** \brief reply for set or delete GTPU forwarding
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param count - number of tunnel counters returned, sequential starting at sw_if_index_start.
+ @param tunnels - Number of packets/bytes that is sent/received via this tunnel. Inaccurate (with in flight packets), sum for the entire set of per-thread counters.
+*/
+typedef tunnel_metrics
+{
+ vl_api_interface_index_t sw_if_index;
+ u32 reserved;
+ vl_api_sw_if_counters_t counters;
+};
+
+define gtpu_get_transfer_counts_reply
+{
+ u32 context;
+ i32 retval;
+ u32 count;
+ vl_api_tunnel_metrics_t tunnels[count];
+ option in_progress;
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/plugins/gtpu/gtpu.c b/src/plugins/gtpu/gtpu.c
index 531e45a1d5a..a2013c91c3f 100644
--- a/src/plugins/gtpu/gtpu.c
+++ b/src/plugins/gtpu/gtpu.c
@@ -35,7 +35,6 @@
gtpu_main_t gtpu_main;
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_gtpu_bypass, static) = {
.arc_name = "ip4-unicast",
.node_name = "ip4-gtpu-bypass",
@@ -47,7 +46,6 @@ VNET_FEATURE_INIT (ip6_gtpu_bypass, static) = {
.node_name = "ip6-gtpu-bypass",
.runs_before = VNET_FEATURES ("ip6-lookup"),
};
-/* *INDENT-on* */
u8 * format_gtpu_encap_trace (u8 * s, va_list * args)
{
@@ -56,8 +54,13 @@ u8 * format_gtpu_encap_trace (u8 * s, va_list * args)
gtpu_encap_trace_t * t
= va_arg (*args, gtpu_encap_trace_t *);
- s = format (s, "GTPU encap to gtpu_tunnel%d tteid %d",
- t->tunnel_index, t->tteid);
+ s = format (s, "GTPU encap to gtpu_tunnel%d tteid %u ", t->tunnel_index,
+ t->tteid);
+
+ if (t->pdu_extension)
+ s = format (s, "pdu-extension qfi %d ", t->qfi);
+ else
+ s = format (s, "no-pdu-extension ");
return s;
}
@@ -95,16 +98,37 @@ format_gtpu_tunnel (u8 * s, va_list * args)
is_ipv6 ? im6->fibs[t->encap_fib_index].ft_table_id :
im4->fibs[t->encap_fib_index].ft_table_id;
- s = format (s, "[%d] src %U dst %U teid %d tteid %d "
+ s = format (s,
+ "[%d] src %U dst %U teid %u tteid %u "
"encap-vrf-id %d sw-if-idx %d ",
- t - ngm->tunnels,
- format_ip46_address, &t->src, IP46_TYPE_ANY,
- format_ip46_address, &t->dst, IP46_TYPE_ANY,
- t->teid, t->tteid, encap_vrf_id, t->sw_if_index);
+ t - ngm->tunnels, format_ip46_address, &t->src, IP46_TYPE_ANY,
+ format_ip46_address, &t->dst, IP46_TYPE_ANY, t->teid, t->tteid,
+ encap_vrf_id, t->sw_if_index);
s = format (s, "encap-dpo-idx %d ", t->next_dpo.dpoi_index);
s = format (s, "decap-next-%U ", format_decap_next, t->decap_next_index);
+ if (t->is_forwarding)
+ {
+ switch (t->forwarding_type)
+ {
+ case GTPU_FORWARD_BAD_HEADER:
+ s = format (s, "forwarding bad-header ");
+ break;
+ case GTPU_FORWARD_UNKNOWN_TEID:
+ s = format (s, "forwarding unknown-teid ");
+ break;
+ case GTPU_FORWARD_UNKNOWN_TYPE:
+ s = format (s, "forwarding unknown-type ");
+ break;
+ }
+ return s;
+ }
+ if (t->pdu_extension != 0)
+ s = format (s, "pdu-enabled qfi %d ", t->qfi);
+ else
+ s = format (s, "pdu-disabled ");
+
if (PREDICT_FALSE (ip46_address_is_multicast (&t->dst)))
s = format (s, "mcast-sw-if-idx %d ", t->mcast_sw_if_index);
@@ -128,14 +152,12 @@ gtpu_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
return /* no error */ 0;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (gtpu_device_class,static) = {
.name = "GTPU",
.format_device_name = format_gtpu_name,
.format_tx_trace = format_gtpu_encap_trace,
.admin_up_down_function = gtpu_interface_admin_up_down,
};
-/* *INDENT-ON* */
static u8 *
format_gtpu_header_with_length (u8 * s, va_list * args)
@@ -145,7 +167,6 @@ format_gtpu_header_with_length (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (gtpu_hw_class) =
{
.name = "GTPU",
@@ -153,7 +174,6 @@ VNET_HW_INTERFACE_CLASS (gtpu_hw_class) =
.build_rewrite = default_build_rewrite,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
static void
gtpu_tunnel_restack_dpo (gtpu_tunnel_t * t)
@@ -224,15 +244,18 @@ const static fib_node_vft_t gtpu_vft = {
.fnv_back_walk = gtpu_tunnel_back_walk,
};
-
-#define foreach_copy_field \
-_(teid) \
-_(tteid) \
-_(mcast_sw_if_index) \
-_(encap_fib_index) \
-_(decap_next_index) \
-_(src) \
-_(dst)
+#define foreach_copy_field \
+ _ (teid) \
+ _ (tteid) \
+ _ (mcast_sw_if_index) \
+ _ (encap_fib_index) \
+ _ (decap_next_index) \
+ _ (src) \
+ _ (dst) \
+ _ (pdu_extension) \
+ _ (qfi) \
+ _ (is_forwarding) \
+ _ (forwarding_type)
static void
ip_udp_gtpu_rewrite (gtpu_tunnel_t * t, bool is_ip6)
@@ -251,12 +274,15 @@ ip_udp_gtpu_rewrite (gtpu_tunnel_t * t, bool is_ip6)
udp_header_t *udp;
gtpu_header_t *gtpu;
+ gtpu_ext_with_pdu_session_header_t *gtpu_ext_pdu;
+ i64 length_adjustment = 0;
/* Fixed portion of the (outer) ip header */
if (!is_ip6)
{
ip4_header_t *ip = &r.h4->ip4;
udp = &r.h4->udp;
gtpu = &r.h4->gtpu;
+ gtpu_ext_pdu = &r.h4->gtpu_ext;
ip->ip_version_and_header_length = 0x45;
ip->ttl = 254;
ip->protocol = IP_PROTOCOL_UDP;
@@ -272,6 +298,7 @@ ip_udp_gtpu_rewrite (gtpu_tunnel_t * t, bool is_ip6)
ip6_header_t *ip = &r.h6->ip6;
udp = &r.h6->udp;
gtpu = &r.h6->gtpu;
+ gtpu_ext_pdu = &r.h6->gtpu_ext;
ip->ip_version_traffic_class_and_flow_label =
clib_host_to_net_u32 (6 << 28);
ip->hop_limit = 255;
@@ -290,9 +317,27 @@ ip_udp_gtpu_rewrite (gtpu_tunnel_t * t, bool is_ip6)
gtpu->type = GTPU_TYPE_GTPU;
gtpu->teid = clib_host_to_net_u32 (t->tteid);
+ if (t->pdu_extension)
+ {
+ gtpu->ver_flags = GTPU_V1_VER | GTPU_PT_GTP | GTPU_E_BIT;
+ gtpu->next_ext_type = GTPU_EXT_HDR_PDU_SESSION_CONTAINER;
+ gtpu_ext_pdu->len = 1;
+ gtpu_ext_pdu->pdu.oct0 = GTPU_PDU_DL_SESSION_TYPE;
+ gtpu_ext_pdu->pdu.oct1 = t->qfi;
+ gtpu_ext_pdu->next_header = 0;
+ }
+ else
+ {
+ // Remove the size of the PDU session header and the optional fields
+ length_adjustment = -sizeof (gtpu_ext_with_pdu_session_header_t) - 4;
+ }
+
t->rewrite = r.rw;
- /* Now only support 8-byte gtpu header. TBD */
- _vec_len (t->rewrite) = sizeof (ip4_gtpu_header_t) - 4;
+ /* Now only support 8-byte gtpu header or 12+4-byte header. TBD */
+ if (!is_ip6)
+ vec_set_len (t->rewrite, sizeof (ip4_gtpu_header_t) + length_adjustment);
+ else
+ vec_set_len (t->rewrite, sizeof (ip6_gtpu_header_t) + length_adjustment);
return;
}
@@ -349,6 +394,139 @@ mcast_shared_remove (ip46_address_t * dst)
hash_unset_mem_free (&gtpu_main.mcast_shared, dst);
}
+int
+vnet_gtpu_add_del_forwarding (vnet_gtpu_add_mod_del_tunnel_args_t *a,
+ u32 *sw_if_indexp)
+{
+ gtpu_main_t *gtm = &gtpu_main;
+ bool is_add;
+ u32 current_index_value, current_index_value_ipv6;
+ u32 address_tabel_ipv4;
+ ip6_address_t address_tabel_ipv6;
+ u32 sw_if_index = ~0;
+ bool is_ip6 = !ip46_address_is_ip4 (&a->dst);
+ int rv;
+ /* Check for errors */
+ if (!a->is_forwarding)
+ {
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ switch (a->opn)
+ {
+ case GTPU_ADD_TUNNEL:
+ is_add = 1;
+ break;
+ case GTPU_DEL_TUNNEL:
+ is_add = 0;
+ break;
+ default:
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ /* Check if the operation is valid, and get the current state if it is.
+ * Handling multiple flags at once is not supported yet. */
+ switch (a->forwarding_type)
+ {
+ case GTPU_FORWARD_BAD_HEADER:
+ current_index_value = gtm->bad_header_forward_tunnel_index_ipv4;
+ current_index_value_ipv6 = gtm->bad_header_forward_tunnel_index_ipv6;
+ address_tabel_ipv4 = GTPU_FORWARD_BAD_HEADER_ADDRESS_IPV4;
+ /* ipv6 is TBD */
+ ip6_address_t address_tabel_ipv6_ = GTPU_FORWARD_BAD_HEADER_ADDRESS_IPV6;
+ address_tabel_ipv6 = address_tabel_ipv6_;
+ break;
+ case GTPU_FORWARD_UNKNOWN_TEID:
+ current_index_value = gtm->unknown_teid_forward_tunnel_index_ipv4;
+ current_index_value_ipv6 = gtm->unknown_teid_forward_tunnel_index_ipv6;
+ address_tabel_ipv4 = GTPU_FORWARD_UNKNOWN_TEID_ADDRESS_IPV4;
+ ip6_address_t address_tabel_ipv6__ =
+ GTPU_FORWARD_UNKNOWN_TEID_ADDRESS_IPV6;
+ address_tabel_ipv6 = address_tabel_ipv6__;
+ break;
+ case GTPU_FORWARD_UNKNOWN_TYPE:
+ current_index_value = gtm->unknown_type_forward_tunnel_index_ipv4;
+ current_index_value_ipv6 = gtm->unknown_type_forward_tunnel_index_ipv6;
+ address_tabel_ipv4 = GTPU_FORWARD_UNKNOWN_TYPE_ADDRESS_IPV4;
+ ip6_address_t address_tabel_ipv6___ =
+ GTPU_FORWARD_UNKNOWN_TYPE_ADDRESS_IPV6;
+ address_tabel_ipv6 = address_tabel_ipv6___;
+ break;
+ default:
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ if (is_ip6)
+ current_index_value = current_index_value_ipv6;
+
+ /* Check if the existing forwarding rule state conflicts with this operation
+ */
+ if ((is_add) && (current_index_value != ~0))
+ {
+ return VNET_API_ERROR_TUNNEL_EXIST;
+ }
+ if (!is_add)
+ {
+ if (current_index_value == ~0)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ /* Clear the tunnel index before deleting the tunnel itself */
+ switch (a->forwarding_type)
+ {
+ case GTPU_FORWARD_BAD_HEADER:
+ gtm->bad_header_forward_tunnel_index_ipv4 = ~0;
+ break;
+ case GTPU_FORWARD_UNKNOWN_TEID:
+ gtm->unknown_teid_forward_tunnel_index_ipv4 = ~0;
+ break;
+ case GTPU_FORWARD_UNKNOWN_TYPE:
+ gtm->unknown_type_forward_tunnel_index_ipv4 = ~0;
+ break;
+ }
+ }
+
+ /* src is the tunnel lookup key, so it is fixed.
+ * dst is used for the new target */
+ a->src = a->dst;
+ if (is_ip6)
+ a->dst.ip6 = address_tabel_ipv6;
+ else
+ a->dst.ip4.as_u32 = address_tabel_ipv4;
+ rv = vnet_gtpu_add_mod_del_tunnel (a, &sw_if_index);
+
+ // Forward only if not nil
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
+
+ if (rv != 0)
+ return rv;
+
+ /* Update the forwarding tunnel index */
+ u32 tunnel_index = is_add ? vnet_gtpu_get_tunnel_index (sw_if_index) : ~0;
+ switch (a->forwarding_type)
+ {
+ case GTPU_FORWARD_BAD_HEADER:
+ if (is_ip6)
+ gtm->bad_header_forward_tunnel_index_ipv6 = tunnel_index;
+ else
+ gtm->bad_header_forward_tunnel_index_ipv4 = tunnel_index;
+
+ break;
+ case GTPU_FORWARD_UNKNOWN_TEID:
+ if (is_ip6)
+ gtm->unknown_teid_forward_tunnel_index_ipv6 = tunnel_index;
+ else
+ gtm->unknown_teid_forward_tunnel_index_ipv4 = tunnel_index;
+ break;
+ case GTPU_FORWARD_UNKNOWN_TYPE:
+ if (is_ip6)
+ gtm->unknown_type_forward_tunnel_index_ipv6 = tunnel_index;
+ else
+ gtm->unknown_type_forward_tunnel_index_ipv4 = tunnel_index;
+ break;
+ }
+ return 0;
+}
+
int vnet_gtpu_add_mod_del_tunnel
(vnet_gtpu_add_mod_del_tunnel_args_t * a, u32 * sw_if_indexp)
{
@@ -419,7 +597,7 @@ int vnet_gtpu_add_mod_del_tunnel
vnet_interface_main_t *im = &vnm->interface_main;
hw_if_index = gtm->free_gtpu_tunnel_hw_if_indices
[vec_len (gtm->free_gtpu_tunnel_hw_if_indices) - 1];
- _vec_len (gtm->free_gtpu_tunnel_hw_if_indices) -= 1;
+ vec_dec_len (gtm->free_gtpu_tunnel_hw_if_indices, 1);
hi = vnet_get_hw_interface (vnm, hw_if_index);
hi->dev_instance = t - gtm->tunnels;
@@ -473,7 +651,8 @@ int vnet_gtpu_add_mod_del_tunnel
fib_prefix_t tun_dst_pfx;
vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
- fib_prefix_from_ip46_addr (&t->dst, &tun_dst_pfx);
+ fib_protocol_t fp = fib_ip_proto (is_ip6);
+ fib_prefix_from_ip46_addr (fp, &t->dst, &tun_dst_pfx);
if (!ip46_address_is_multicast (&t->dst))
{
/* Unicast tunnel -
@@ -497,8 +676,6 @@ int vnet_gtpu_add_mod_del_tunnel
* with different VNIs, create the output adjacency only if
* it does not already exist
*/
- fib_protocol_t fp = fib_ip_proto (is_ip6);
-
if (vtep_addr_ref (&gtm->vtep_table,
t->encap_fib_index, &t->dst) == 1)
{
@@ -524,15 +701,16 @@ int vnet_gtpu_add_mod_del_tunnel
* - the forwarding interface is for-us
* - the accepting interface is that from the API
*/
- mfib_table_entry_path_update (t->encap_fib_index,
- &mpfx, MFIB_SOURCE_GTPU, &path);
+ mfib_table_entry_path_update (t->encap_fib_index, &mpfx,
+ MFIB_SOURCE_GTPU,
+ MFIB_ENTRY_FLAG_NONE, &path);
path.frp_sw_if_index = a->mcast_sw_if_index;
path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
path.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
- mfei = mfib_table_entry_path_update (t->encap_fib_index,
- &mpfx,
- MFIB_SOURCE_GTPU, &path);
+ mfei = mfib_table_entry_path_update (
+ t->encap_fib_index, &mpfx, MFIB_SOURCE_GTPU,
+ MFIB_ENTRY_FLAG_NONE, &path);
/*
* Create the mcast adjacency to send traffic to the group
@@ -577,6 +755,7 @@ int vnet_gtpu_add_mod_del_tunnel
if (a->tteid == 0)
return VNET_API_ERROR_INVALID_VALUE;
t->tteid = a->tteid;
+ vec_free (t->rewrite);
ip_udp_gtpu_rewrite (t, is_ip6);
return 0;
}
@@ -634,6 +813,22 @@ int vnet_gtpu_add_mod_del_tunnel
return 0;
}
+int
+get_combined_counters (u32 sw_if_index, vlib_counter_t *result_rx,
+ vlib_counter_t *result_tx)
+{
+ gtpu_main_t *gtm = &gtpu_main;
+ vnet_main_t *vnm = gtm->vnet_main;
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vlib_get_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_RX,
+ sw_if_index, result_rx);
+ vlib_get_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_TX,
+ sw_if_index, result_tx);
+ return 0;
+}
+
static uword
get_decap_next_for_node (u32 node_index, u32 ipv4_set)
{
@@ -689,6 +884,11 @@ gtpu_add_del_tunnel_command_fn (vlib_main_t * vm,
u32 decap_next_index = GTPU_INPUT_NEXT_L2_INPUT;
u32 teid = 0, tteid = 0;
u32 tmp;
+ /* PDU is disabled by default */
+ u8 pdu_extension = 0;
+ u32 qfi = ~0;
+ u8 is_forwarding = 0;
+ u8 forwarding_type = 0;
int rv;
vnet_gtpu_add_mod_del_tunnel_args_t _a, *a = &_a;
u32 tunnel_sw_if_index;
@@ -767,6 +967,8 @@ gtpu_add_del_tunnel_command_fn (vlib_main_t * vm,
;
else if (unformat (line_input, "upd-tteid %d", &tteid))
opn = GTPU_UPD_TTEID;
+ else if (unformat (line_input, "qfi %d", &qfi))
+ pdu_extension = 1;
else
{
error = clib_error_return (0, "parse error: '%U'",
@@ -828,7 +1030,11 @@ gtpu_add_del_tunnel_command_fn (vlib_main_t * vm,
error = clib_error_return (0, "next node not found");
goto done;
}
-
+ if (pdu_extension == 1 && qfi > 31)
+ {
+ error = clib_error_return (0, "qfi max value is 31");
+ goto done;
+ }
clib_memset (a, 0, sizeof (*a));
a->opn = opn;
@@ -879,29 +1085,30 @@ done:
* to span multiple servers. This is done by building an L2 overlay on
* top of an L3 network underlay using GTPU tunnels.
*
- * GTPU can also be used to transport IP packetes as its PDU type to
+ * GTPU can also be used to transport IP packets as its PDU type to
* allow IP forwarding over underlay network, e.g. between RAN and UPF
- * for mobility deplyments.
+ * for mobility deployments.
*
* @cliexpar
* Example of how to create a GTPU Tunnel:
- * @cliexcmd{create gtpu tunnel src 10.0.3.1 dst 10.0.3.3 teid 13 tteid 55 encap-vrf-id 7}
+ * @cliexcmd{create gtpu tunnel src 10.0.3.1 dst 10.0.3.3 teid 13 tteid 55
+ * encap-vrf-id 7}
* Example of how to delete a GTPU Tunnel:
- * @cliexcmd{create gtpu tunnel src 10.0.3.1 dst 10.0.3.3 teid 13 encap-vrf-id 7 del}
+ * @cliexcmd{create gtpu tunnel src 10.0.3.1 dst 10.0.3.3 teid 13 encap-vrf-id
+ * 7 del}
* Example of how to update tx TEID of a GTPU Tunnel:
- * @cliexcmd{create gtpu tunnel src 10.0.3.1 dst 10.0.3.3 encap-vrf-id 7 upd-tteid 55}
+ * @cliexcmd{create gtpu tunnel src 10.0.3.1 dst 10.0.3.3 encap-vrf-id 7
+ * upd-tteid 55}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_gtpu_tunnel_command, static) = {
.path = "create gtpu tunnel",
.short_help =
- "create gtpu tunnel src <local-tep-addr>"
- " {dst <remote-tep-addr>|group <mcast-addr> <intf-name>}"
- " teid <nn> [tteid <nn>] [encap-vrf-id <nn>]"
- " [decap-next [l2|ip4|ip6|node <name>]] [del | upd-tteid <nn>]",
+ "create gtpu tunnel src <local-tep-addr>"
+ " {dst <remote-tep-addr>|group <mcast-addr> <intf-name>}"
+ " teid <nn> [tteid <nn>] [encap-vrf-id <nn>]"
+ " [decap-next [l2|ip4|ip6|node <name>]] [qfi <nn>] [del | upd-tteid <nn>]",
.function = gtpu_add_del_tunnel_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_gtpu_tunnel_command_fn (vlib_main_t * vm,
@@ -928,16 +1135,15 @@ show_gtpu_tunnel_command_fn (vlib_main_t * vm,
* @cliexpar
* Example of how to display the GTPU Tunnel entries:
* @cliexstart{show gtpu tunnel}
- * [0] src 10.0.3.1 dst 10.0.3.3 teid 13 tx-teid 55 encap_fib_index 0 sw_if_index 5 decap_next l2
+ * [0] src 10.0.3.1 dst 10.0.3.3 teid 13 tx-teid 55 encap_fib_index 0
+ sw_if_index 5 decap_next l2 pdu-disabled
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_gtpu_tunnel_command, static) = {
.path = "show gtpu tunnel",
.short_help = "show gtpu tunnel",
.function = show_gtpu_tunnel_command_fn,
};
-/* *INDENT-ON* */
void
vnet_int_gtpu_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
@@ -1004,7 +1210,7 @@ set_ip4_gtpu_bypass (vlib_main_t * vm,
/*?
* This command adds the 'ip4-gtpu-bypass' graph node for a given interface.
* By adding the IPv4 gtpu-bypass graph node to an interface, the node checks
- * for and validate input gtpu packet and bypass ip4-lookup, ip4-local,
+ * for and validate input gtpu packet and bypass ip4-lookup, ip4-local,
* ip4-udp-lookup nodes to speedup gtpu packet forwarding. This node will
* cause extra overhead to for non-gtpu packets which is kept at a minimum.
*
@@ -1043,13 +1249,11 @@ set_ip4_gtpu_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip gtpu-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_gtpu_bypass_command, static) = {
.path = "set interface ip gtpu-bypass",
.function = set_ip4_gtpu_bypass,
.short_help = "set interface ip gtpu-bypass <interface> [del]",
};
-/* *INDENT-ON* */
static clib_error_t *
set_ip6_gtpu_bypass (vlib_main_t * vm,
@@ -1061,7 +1265,7 @@ set_ip6_gtpu_bypass (vlib_main_t * vm,
/*?
* This command adds the 'ip6-gtpu-bypass' graph node for a given interface.
* By adding the IPv6 gtpu-bypass graph node to an interface, the node checks
- * for and validate input gtpu packet and bypass ip6-lookup, ip6-local,
+ * for and validate input gtpu packet and bypass ip6-lookup, ip6-local,
* ip6-udp-lookup nodes to speedup gtpu packet forwarding. This node will
* cause extra overhead to for non-gtpu packets which is kept at a minimum.
*
@@ -1100,13 +1304,11 @@ set_ip6_gtpu_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip6 gtpu-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip6_gtpu_bypass_command, static) = {
.path = "set interface ip6 gtpu-bypass",
.function = set_ip6_gtpu_bypass,
.short_help = "set interface ip6 gtpu-bypass <interface> [del]",
};
-/* *INDENT-ON* */
int
vnet_gtpu_add_del_rx_flow (u32 hw_if_index, u32 t_index, int is_add)
@@ -1229,14 +1431,145 @@ gtpu_offload_command_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (gtpu_offload_command, static) = {
.path = "set flow-offload gtpu",
.short_help =
"set flow-offload gtpu hw <inerface-name> rx <tunnel-name> [del]",
.function = gtpu_offload_command_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+gtpu_forward_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ u32 tunnel_sw_if_index;
+ clib_error_t *error = NULL;
+
+ u32 decap_next_index = GTPU_INPUT_NEXT_L2_INPUT;
+
+ int is_add = 1;
+ u8 dst_set = 0;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ ip46_address_t src, dst;
+ u32 encap_fib_index = 0;
+ u32 mcast_sw_if_index = ~0;
+ u32 teid = 0, tteid = 0;
+ u32 tmp;
+ /* PDU is disabled by default */
+ u8 pdu_extension = 0;
+ u32 qfi = ~0;
+ u8 is_forwarding = 1;
+ u8 forwarding_type = 0;
+ int rv;
+ vnet_gtpu_add_mod_del_tunnel_args_t _a, *a = &_a;
+
+ /* Cant "universally zero init" (={0}) due to GCC bug 53119 */
+ clib_memset (&src, 0, sizeof src);
+ clib_memset (&dst, 0, sizeof dst);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "dst %U", unformat_ip4_address, &dst.ip4))
+ {
+ dst_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "dst %U", unformat_ip6_address, &dst.ip6))
+ {
+ dst_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "decap-next %U", unformat_decap_next,
+ &decap_next_index, ipv4_set))
+ ;
+ else if (unformat (line_input, "encap-vrf-id %d", &tmp))
+ {
+ encap_fib_index = fib_table_find (fib_ip_proto (ipv6_set), tmp);
+ if (encap_fib_index == ~0)
+ {
+ error =
+ clib_error_return (0, "nonexistent encap-vrf-id %d", tmp);
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "bad-header"))
+ forwarding_type |= GTPU_FORWARD_BAD_HEADER;
+ else if (unformat (line_input, "unknown-teid"))
+ forwarding_type |= GTPU_FORWARD_UNKNOWN_TEID;
+ else if (unformat (line_input, "unknown-type"))
+ forwarding_type |= GTPU_FORWARD_UNKNOWN_TYPE;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (!dst_set)
+ {
+ error = clib_error_return (0, "dst must be set to a valid IP address");
+ goto done;
+ }
+
+ a->opn = is_add ? GTPU_ADD_TUNNEL : GTPU_DEL_TUNNEL;
+#define _(x) a->x = x;
+ foreach_copy_field;
+#undef _
+
+ rv = vnet_gtpu_add_del_forwarding (a, &tunnel_sw_if_index);
+
+ switch (rv)
+ {
+ case 0:
+ if (is_add)
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
+ vnet_get_main (), tunnel_sw_if_index);
+ break;
+
+ case VNET_API_ERROR_TUNNEL_EXIST:
+ error = clib_error_return (0, "tunnel already exists...");
+ goto done;
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "tunnel does not exist...");
+ goto done;
+
+ case VNET_API_ERROR_INVALID_ARGUMENT:
+ error =
+ clib_error_return (0, "one and only one of unknown-teid, unknown-type "
+ "or bad-header must be specified");
+ goto done;
+
+ default:
+ error =
+ clib_error_return (0, "vnet_gtpu_add_del_tunnel returned %d", rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (gtpu_forward_command, static) = {
+ .path = "create gtpu forward",
+ .short_help =
+ "create gtpu forward dst <local-tep-addr> "
+ "{unknown-teid|unknown-type|bad-header} "
+ "[decap-next [l2|ip4|ip6|node <name>]] [encap-vrf-id <nn>] [del]",
+ .function = gtpu_forward_command_fn,
+};
clib_error_t *
gtpu_init (vlib_main_t * vm)
@@ -1258,19 +1591,25 @@ gtpu_init (vlib_main_t * vm)
sizeof (ip46_address_t),
sizeof (mcast_shared_t));
- gtm->fib_node_type = fib_node_register_new_type (&gtpu_vft);
+ gtm->fib_node_type = fib_node_register_new_type ("gtpu", &gtpu_vft);
+
+ /* Clear forward tunnels */
+ gtm->bad_header_forward_tunnel_index_ipv4 = ~0;
+ gtm->unknown_teid_forward_tunnel_index_ipv4 = ~0;
+ gtm->unknown_type_forward_tunnel_index_ipv4 = ~0;
+ gtm->bad_header_forward_tunnel_index_ipv6 = ~0;
+ gtm->unknown_teid_forward_tunnel_index_ipv6 = ~0;
+ gtm->unknown_type_forward_tunnel_index_ipv6 = ~0;
return 0;
}
VLIB_INIT_FUNCTION (gtpu_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "GPRS Tunnelling Protocol, User Data (GTPv1-U)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/gtpu/gtpu.h b/src/plugins/gtpu/gtpu.h
index 59e340148fb..881fbca936a 100644
--- a/src/plugins/gtpu/gtpu.h
+++ b/src/plugins/gtpu/gtpu.h
@@ -53,21 +53,56 @@
* 12 Next Extension Header Type3) 4)
**/
-typedef struct
-{
+typedef CLIB_PACKED (struct {
u8 ver_flags;
u8 type;
u16 length; /* length in octets of the data following the fixed part of the header */
u32 teid;
+ /* The following fields exists if and only if one or more of E, S or PN
+ * are 1. */
u16 sequence;
u8 pdu_number;
u8 next_ext_type;
-} gtpu_header_t;
+}) gtpu_header_t;
-#define GTPU_V1_HDR_LEN 8
+typedef CLIB_PACKED (struct {
+ u8 type;
+ u8 len;
+ u16 pad;
+}) gtpu_ext_header_t;
+
+/**
+ * DL PDU SESSION INFORMATION (PDU Type 0):
+ * (3GPP TS 38.415)
+ * Bits
+ * Octets 8 7 6 5 4 3 2 1
+ * 1 type qmp snp spare
+ * 2 ppp rqi qos_fi
+ *
+ * UL PDU SESSION INFORMATION (PDU Type 1):
+ * Bits
+ * Octets 8 7 6 5 4 3 2 1
+ * 1 type qmp DL d. UL d. snp
+ * 2 n3/n9 delay new IE qos_fi
+ **/
+typedef CLIB_PACKED (struct {
+ u8 oct0;
+ u8 oct1;
+ // Extensions are supported
+}) pdu_session_container_t;
+
+STATIC_ASSERT_SIZEOF (pdu_session_container_t, 2);
+typedef CLIB_PACKED (struct {
+ u8 len;
+ pdu_session_container_t pdu;
+ u8 next_header;
+}) gtpu_ext_with_pdu_session_header_t;
+
+#define GTPU_V1_HDR_LEN 8
#define GTPU_VER_MASK (7<<5)
#define GTPU_PT_BIT (1<<4)
+#define GTPU_RES_BIT (1 << 3)
#define GTPU_E_BIT (1<<2)
#define GTPU_S_BIT (1<<1)
#define GTPU_PN_BIT (1<<0)
@@ -78,25 +113,51 @@ typedef struct
#define GTPU_PT_GTP (1<<4)
#define GTPU_TYPE_GTPU 255
-/* *INDENT-OFF* */
+#define GTPU_EXT_HDR_PDU_SESSION_CONTAINER 133
+#define GTPU_NO_MORE_EXT_HDR 0
+#define GTPU_PDU_DL_SESSION_TYPE 0
+#define GTPU_PDU_UL_SESSION_TYPE (1 << 4)
+
+#define GTPU_FORWARD_BAD_HEADER (1 << 0)
+#define GTPU_FORWARD_UNKNOWN_TEID (1 << 1)
+#define GTPU_FORWARD_UNKNOWN_TYPE (1 << 2)
+
+/* the ipv4 addresses used for the forwarding tunnels. 127.0.0.127 - .129. */
+#define GTPU_FORWARD_BAD_HEADER_ADDRESS_IPV4 0x7f00007fu
+#define GTPU_FORWARD_UNKNOWN_TEID_ADDRESS_IPV4 0x8000007fu
+#define GTPU_FORWARD_UNKNOWN_TYPE_ADDRESS_IPV4 0x8100007fu
+
+/* the ipv6 addresses used for the forwarding tunnels.
+ * 2001:db8:ffff:ffff:ffff:ffff:ffff:fffd -
+ * 2001:db8:ffff:ffff:ffff:ffff:ffff:ffff*/
+#define GTPU_FORWARD_BAD_HEADER_ADDRESS_IPV6 \
+ { \
+ .as_u64[0] = 0xffffffffb80d0120ull, .as_u64[1] = 0xfdffffffffffffffull \
+ }
+#define GTPU_FORWARD_UNKNOWN_TEID_ADDRESS_IPV6 \
+ { \
+ .as_u64[0] = 0xffffffffb80d0120ull, .as_u64[1] = 0xfeffffffffffffffull \
+ }
+#define GTPU_FORWARD_UNKNOWN_TYPE_ADDRESS_IPV6 \
+ { \
+ .as_u64[0] = 0xffffffffb80d0120ull, .as_u64[1] = 0xffffffffffffffffull \
+ }
typedef CLIB_PACKED(struct
{
ip4_header_t ip4; /* 20 bytes */
udp_header_t udp; /* 8 bytes */
gtpu_header_t gtpu; /* 12 bytes */
+ gtpu_ext_with_pdu_session_header_t gtpu_ext; /* 4 bytes */
}) ip4_gtpu_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct
{
ip6_header_t ip6; /* 40 bytes */
udp_header_t udp; /* 8 bytes */
- gtpu_header_t gtpu; /* 8 bytes */
+ gtpu_header_t gtpu; /* 12 bytes */
+ gtpu_ext_with_pdu_session_header_t gtpu_ext; /* 4 bytes */
}) ip6_gtpu_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct {
/*
@@ -111,9 +172,7 @@ typedef CLIB_PACKED
u64 as_u64;
};
}) gtpu4_tunnel_key_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct {
/*
@@ -123,7 +182,6 @@ typedef CLIB_PACKED
ip6_address_t src;
u32 teid;
}) gtpu6_tunnel_key_t;
-/* *INDENT-ON* */
typedef struct
{
@@ -157,6 +215,14 @@ typedef struct
u32 sw_if_index;
u32 hw_if_index;
+ /* PDU session container extension enable/disable */
+ u8 pdu_extension;
+ u8 qfi;
+
+ /* The tunnel is used for forwarding */
+ u8 is_forwarding;
+ u8 forwarding_type;
+
/**
* Linkage into the FIB object graph
*/
@@ -232,14 +298,25 @@ typedef struct
/* API message ID base */
u16 msg_id_base;
+ /* Handle GTP packets of unknown type like echo and error indication,
+ * unknown teid or bad version/header.
+ * All packets will be forwarded to a new IP address,
+ * so that they can be processes outside vpp.
+ * If not set then packets are dropped.
+ * One of more indexes can be unused (~0). */
+ u32 bad_header_forward_tunnel_index_ipv4;
+ u32 unknown_teid_forward_tunnel_index_ipv4;
+ u32 unknown_type_forward_tunnel_index_ipv4;
+ u32 bad_header_forward_tunnel_index_ipv6;
+ u32 unknown_teid_forward_tunnel_index_ipv6;
+ u32 unknown_type_forward_tunnel_index_ipv6;
+
/* convenience */
vlib_main_t *vlib_main;
vnet_main_t *vnet_main;
u32 flow_id_start;
/* cache for last 8 gtpu tunnel */
-#ifdef CLIB_HAVE_VEC512
vtep4_cache_t vtep4_u512;
-#endif
} gtpu_main_t;
@@ -265,8 +342,15 @@ typedef struct
u32 decap_next_index;
u32 teid; /* local or rx teid */
u32 tteid; /* remote or tx teid */
+ u8 pdu_extension;
+ u8 qfi;
+ u8 is_forwarding;
+ u8 forwarding_type;
} vnet_gtpu_add_mod_del_tunnel_args_t;
+int vnet_gtpu_add_del_forwarding (vnet_gtpu_add_mod_del_tunnel_args_t *a,
+ u32 *sw_if_indexp);
+
int vnet_gtpu_add_mod_del_tunnel
(vnet_gtpu_add_mod_del_tunnel_args_t * a, u32 * sw_if_indexp);
@@ -274,11 +358,15 @@ typedef struct
{
u32 tunnel_index;
u32 tteid;
+ u8 pdu_extension;
+ u8 qfi;
} gtpu_encap_trace_t;
void vnet_int_gtpu_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable);
u32 vnet_gtpu_get_tunnel_index (u32 sw_if_index);
int vnet_gtpu_add_del_rx_flow (u32 hw_if_index, u32 t_imdex, int is_add);
+int get_combined_counters (u32 sw_if_index, vlib_counter_t *result_rx,
+ vlib_counter_t *result_tx);
#endif /* included_vnet_gtpu_h */
diff --git a/src/plugins/gtpu/gtpu_api.c b/src/plugins/gtpu/gtpu_api.c
index 77432bae4fa..4efd9ac3bba 100644
--- a/src/plugins/gtpu/gtpu_api.c
+++ b/src/plugins/gtpu/gtpu_api.c
@@ -124,6 +124,10 @@ static void vl_api_gtpu_add_del_tunnel_t_handler
.decap_next_index = ntohl (mp->decap_next_index),
.teid = ntohl (mp->teid),
.tteid = ntohl (mp->tteid),
+ .pdu_extension = 0,
+ .qfi = 0,
+ .is_forwarding = 0,
+ .forwarding_type = 0,
};
ip_address_decode (&mp->dst_address, &a.dst);
ip_address_decode (&mp->src_address, &a.src);
@@ -154,12 +158,70 @@ static void vl_api_gtpu_add_del_tunnel_t_handler
rv = vnet_gtpu_add_mod_del_tunnel (&a, &sw_if_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_GTPU_ADD_DEL_TUNNEL_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
+}
+
+static void
+vl_api_gtpu_add_del_tunnel_v2_t_handler (vl_api_gtpu_add_del_tunnel_v2_t *mp)
+{
+ vl_api_gtpu_add_del_tunnel_v2_reply_t *rmp;
+ int rv = 0;
+ vlib_counter_t result_rx;
+ vlib_counter_t result_tx;
+ gtpu_main_t *gtm = &gtpu_main;
+
+ vnet_gtpu_add_mod_del_tunnel_args_t a = {
+ .opn = mp->is_add ? GTPU_ADD_TUNNEL : GTPU_DEL_TUNNEL,
+ .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index),
+ .decap_next_index = ntohl (mp->decap_next_index),
+ .teid = ntohl (mp->teid),
+ .tteid = ntohl (mp->tteid),
+ .pdu_extension = mp->pdu_extension ? 1 : 0,
+ .qfi = mp->qfi,
+ .is_forwarding = 0,
+ .forwarding_type = 0,
+ };
+ ip_address_decode (&mp->dst_address, &a.dst);
+ ip_address_decode (&mp->src_address, &a.src);
+
+ u8 is_ipv6 = !ip46_address_is_ip4 (&a.dst);
+ a.encap_fib_index =
+ fib_table_find (fib_ip_proto (is_ipv6), ntohl (mp->encap_vrf_id));
+ if (a.encap_fib_index == ~0)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_FIB;
+ goto out;
+ }
+
+ /* Check src & dst are different */
+ if (ip46_address_cmp (&a.dst, &a.src) == 0)
+ {
+ rv = VNET_API_ERROR_SAME_SRC_DST;
+ goto out;
+ }
+ if (ip46_address_is_multicast (&a.dst) &&
+ !vnet_sw_if_index_is_api_valid (a.mcast_sw_if_index))
+ {
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto out;
+ }
+
+ u32 sw_if_index = ~0;
+ rv = vnet_gtpu_add_mod_del_tunnel (&a, &sw_if_index);
+ get_combined_counters (sw_if_index, &result_rx, &result_tx);
+
+out:
+ REPLY_MACRO2 (
+ VL_API_GTPU_ADD_DEL_TUNNEL_V2_REPLY, ({
+ rmp->sw_if_index = ntohl (sw_if_index);
+ rmp->counters.packets_rx = clib_net_to_host_u64 (result_rx.packets);
+ rmp->counters.packets_tx = clib_net_to_host_u64 (result_tx.packets);
+ rmp->counters.bytes_rx = clib_net_to_host_u64 (result_rx.bytes);
+ rmp->counters.bytes_tx = clib_net_to_host_u64 (result_tx.bytes);
+ }));
}
static void vl_api_gtpu_tunnel_update_tteid_t_handler
@@ -238,12 +300,10 @@ vl_api_gtpu_tunnel_dump_t_handler (vl_api_gtpu_tunnel_dump_t * mp)
if (~0 == sw_if_index)
{
- /* *INDENT-OFF* */
pool_foreach (t, gtm->tunnels)
{
send_gtpu_tunnel_details(t, reg, mp->context);
- }
- /* *INDENT-ON* */
+ }
}
else
{
@@ -257,6 +317,184 @@ vl_api_gtpu_tunnel_dump_t_handler (vl_api_gtpu_tunnel_dump_t * mp)
}
}
+static void
+send_gtpu_tunnel_details_v2 (gtpu_tunnel_t *t, vl_api_registration_t *reg,
+ u32 context)
+{
+ vl_api_gtpu_tunnel_v2_details_t *rmp;
+ vlib_counter_t result_rx;
+ vlib_counter_t result_tx;
+ gtpu_main_t *gtm = &gtpu_main;
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ u8 is_ipv6 = !ip46_address_is_ip4 (&t->dst);
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_GTPU_TUNNEL_V2_DETAILS + gtm->msg_id_base);
+
+ ip_address_encode (&t->src, is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+ &rmp->src_address);
+ ip_address_encode (&t->dst, is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+ &rmp->dst_address);
+
+ rmp->encap_vrf_id = is_ipv6 ?
+ htonl (im6->fibs[t->encap_fib_index].ft_table_id) :
+ htonl (im4->fibs[t->encap_fib_index].ft_table_id);
+ rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index);
+ rmp->teid = htonl (t->teid);
+ rmp->tteid = htonl (t->tteid);
+ rmp->decap_next_index = htonl (t->decap_next_index);
+ rmp->sw_if_index = htonl (t->sw_if_index);
+ rmp->context = context;
+ rmp->pdu_extension = t->pdu_extension;
+ rmp->qfi = t->qfi;
+ rmp->is_forwarding = t->is_forwarding;
+ rmp->forwarding_type = htonl (t->forwarding_type);
+
+ get_combined_counters (t->sw_if_index, &result_rx, &result_tx);
+ rmp->counters.packets_rx = clib_net_to_host_u64 (result_rx.packets);
+ rmp->counters.packets_tx = clib_net_to_host_u64 (result_tx.packets);
+ rmp->counters.bytes_rx = clib_net_to_host_u64 (result_rx.bytes);
+ rmp->counters.bytes_tx = clib_net_to_host_u64 (result_tx.bytes);
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_gtpu_tunnel_v2_dump_t_handler (vl_api_gtpu_tunnel_v2_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ gtpu_main_t *gtm = &gtpu_main;
+ gtpu_tunnel_t *t;
+ u32 sw_if_index;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (~0 == sw_if_index)
+ {
+ pool_foreach (t, gtm->tunnels)
+ {
+ send_gtpu_tunnel_details_v2 (t, reg, mp->context);
+ }
+ }
+ else
+ {
+ if ((sw_if_index >= vec_len (gtm->tunnel_index_by_sw_if_index)) ||
+ (~0 == gtm->tunnel_index_by_sw_if_index[sw_if_index]))
+ {
+ return;
+ }
+ t = &gtm->tunnels[gtm->tunnel_index_by_sw_if_index[sw_if_index]];
+ send_gtpu_tunnel_details_v2 (t, reg, mp->context);
+ }
+}
+
+static void
+vl_api_gtpu_add_del_forward_t_handler (vl_api_gtpu_add_del_forward_t *mp)
+{
+ vl_api_gtpu_add_del_forward_reply_t *rmp;
+ int rv = 0;
+ gtpu_main_t *gtm = &gtpu_main;
+
+ vnet_gtpu_add_mod_del_tunnel_args_t a = {
+ .opn = mp->is_add ? GTPU_ADD_TUNNEL : GTPU_DEL_TUNNEL,
+ .mcast_sw_if_index = 0,
+ .decap_next_index = ntohl (mp->decap_next_index),
+ .teid = 0,
+ .tteid = 0,
+ .pdu_extension = 0,
+ .qfi = 0,
+ .is_forwarding = 1,
+ .forwarding_type = ntohl (mp->forwarding_type),
+ };
+ ip_address_decode (&mp->dst_address, &a.dst);
+ /* Will be overwritten later */
+ ip_address_decode (&mp->dst_address, &a.src);
+
+ u8 is_ipv6 = !ip46_address_is_ip4 (&a.dst);
+ a.encap_fib_index =
+ fib_table_find (fib_ip_proto (is_ipv6), ntohl (mp->encap_vrf_id));
+
+ if (a.encap_fib_index == ~0)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_FIB;
+ goto out;
+ }
+
+ if (ip46_address_is_multicast (&a.dst) &&
+ !vnet_sw_if_index_is_api_valid (a.mcast_sw_if_index))
+ {
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto out;
+ }
+
+ u32 sw_if_index = ~0;
+ rv = vnet_gtpu_add_del_forwarding (&a, &sw_if_index);
+
+out:
+ REPLY_MACRO2 (VL_API_GTPU_ADD_DEL_FORWARD_REPLY,
+ ({ rmp->sw_if_index = ntohl (sw_if_index); }));
+}
+
+static void
+vl_api_gtpu_get_transfer_counts_t_handler (
+ vl_api_gtpu_get_transfer_counts_t *mp)
+{
+ vl_api_gtpu_get_transfer_counts_reply_t *rmp;
+ int rv = 0;
+ vlib_counter_t result_rx;
+ vlib_counter_t result_tx;
+ gtpu_main_t *gtm = &gtpu_main;
+ u32 count = 0;
+ u32 sw_if_index;
+ u32 capacity = ntohl (mp->capacity);
+ u32 sw_if_index_start = ntohl (mp->sw_if_index_start);
+ int extra_size = sizeof (rmp->tunnels[0]) * capacity;
+
+ if (sw_if_index_start >= vec_len (gtm->tunnel_index_by_sw_if_index))
+ {
+ capacity = 0;
+ extra_size = 0;
+ }
+ sw_if_index = sw_if_index_start;
+
+ REPLY_MACRO4 (
+ VL_API_GTPU_GET_TRANSFER_COUNTS_REPLY, extra_size, ({
+ for (; count < capacity; sw_if_index++)
+ {
+ if (sw_if_index >= vec_len (gtm->tunnel_index_by_sw_if_index))
+ {
+ // No more tunnels
+ break;
+ }
+ if (~0 == gtm->tunnel_index_by_sw_if_index[sw_if_index])
+ {
+ // Skip inactive/deleted tunnel
+ continue;
+ }
+ rmp->tunnels[count].sw_if_index = htonl (sw_if_index);
+ rmp->tunnels[count].reserved = 0;
+
+ get_combined_counters (sw_if_index, &result_rx, &result_tx);
+ rmp->tunnels[count].counters.packets_rx =
+ clib_net_to_host_u64 (result_rx.packets);
+ rmp->tunnels[count].counters.packets_tx =
+ clib_net_to_host_u64 (result_tx.packets);
+ rmp->tunnels[count].counters.bytes_rx =
+ clib_net_to_host_u64 (result_rx.bytes);
+ rmp->tunnels[count].counters.bytes_tx =
+ clib_net_to_host_u64 (result_tx.bytes);
+ count++;
+ }
+ rmp->count = htonl (count);
+ }));
+}
+
#include <gtpu/gtpu.api.c>
static clib_error_t *
gtpu_api_hookup (vlib_main_t * vm)
diff --git a/src/plugins/gtpu/gtpu_decap.c b/src/plugins/gtpu/gtpu_decap.c
index 40243dbcc53..093d85ef13c 100644
--- a/src/plugins/gtpu/gtpu_decap.c
+++ b/src/plugins/gtpu/gtpu_decap.c
@@ -26,6 +26,8 @@ typedef struct {
u32 tunnel_index;
u32 error;
u32 teid;
+ gtpu_header_t header;
+ u8 forwarding_type;
} gtpu_rx_trace_t;
static u8 * format_gtpu_rx_trace (u8 * s, va_list * args)
@@ -36,14 +38,29 @@ static u8 * format_gtpu_rx_trace (u8 * s, va_list * args)
if (t->tunnel_index != ~0)
{
- s = format (s, "GTPU decap from gtpu_tunnel%d teid %d next %d error %d",
- t->tunnel_index, t->teid, t->next_index, t->error);
+ s = format (s, "GTPU decap from gtpu_tunnel%d ", t->tunnel_index);
+ switch (t->forwarding_type)
+ {
+ case GTPU_FORWARD_BAD_HEADER:
+ s = format (s, "forwarding bad-header ");
+ break;
+ case GTPU_FORWARD_UNKNOWN_TEID:
+ s = format (s, "forwarding unknown-teid ");
+ break;
+ case GTPU_FORWARD_UNKNOWN_TYPE:
+ s = format (s, "forwarding unknown-type ");
+ break;
+ }
+ s = format (s, "teid %u, ", t->teid);
}
else
{
- s = format (s, "GTPU decap error - tunnel for teid %d does not exist",
+ s = format (s, "GTPU decap error - tunnel for teid %u does not exist, ",
t->teid);
}
+ s = format (s, "next %d error %d, ", t->next_index, t->error);
+ s = format (s, "flags: 0x%x, type: %d, length: %d", t->header.ver_flags,
+ t->header.type, t->header.length);
return s;
}
@@ -53,6 +70,7 @@ validate_gtpu_fib (vlib_buffer_t *b, gtpu_tunnel_t *t, u32 is_ip4)
return t->encap_fib_index == vlib_buffer_get_ip_fib_index (b, is_ip4);
}
+// Gets run with every input
always_inline uword
gtpu_input (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -75,28 +93,41 @@ gtpu_input (vlib_main_t * vm,
else
clib_memset (&last_key6, 0xff, sizeof (last_key6));
+ // Where is the framevector coming from
from = vlib_frame_vector_args (from_frame);
+ // number of packets left in frame
n_left_from = from_frame->n_vectors;
+ // whats the next node it needs to go to
next_index = node->cached_next_index;
+ // stats from the next interface
stats_sw_if_index = node->runtime_data[0];
+ // number of packets processed
stats_n_packets = stats_n_bytes = 0;
+ // run until no more packets left in vectorframe
while (n_left_from > 0)
{
u32 n_left_to_next;
+ // get vectorframe to process
vlib_get_next_frame (vm, node, next_index,
to_next, n_left_to_next);
+ // while there are still more than 4 packets left in frame and more than
+ // two packets in current frame
while (n_left_from >= 4 && n_left_to_next >= 2)
{
- u32 bi0, bi1;
+ // buffer index for loading packet data
+ u32 bi0, bi1;
+ // vlib packet buffer
vlib_buffer_t * b0, * b1;
+ // next operation to do with the packet
u32 next0, next1;
- ip4_header_t * ip4_0, * ip4_1;
- ip6_header_t * ip6_0, * ip6_1;
- gtpu_header_t * gtpu0, * gtpu1;
- u32 gtpu_hdr_len0, gtpu_hdr_len1;
+ // IP4 header type
+ ip4_header_t *ip4_0, *ip4_1;
+ ip6_header_t *ip6_0, *ip6_1;
+ gtpu_header_t *gtpu0, *gtpu1;
+ i32 gtpu_hdr_len0, gtpu_hdr_len1;
uword * p0, * p1;
u32 tunnel_index0, tunnel_index1;
gtpu_tunnel_t * t0, * t1, * mt0 = NULL, * mt1 = NULL;
@@ -106,11 +137,19 @@ gtpu_input (vlib_main_t * vm,
u32 sw_if_index0, sw_if_index1, len0, len1;
u8 has_space0, has_space1;
u8 ver0, ver1;
+ udp_header_t *udp0, *udp1;
+ ip_csum_t sum0, sum1;
+ u32 old0, old1;
+ gtpu_ext_header_t ext = { .type = 0, .len = 0, .pad = 0 };
+ gtpu_ext_header_t *ext0, *ext1;
+ bool is_fast_track0, is_fast_track1;
+ ext0 = ext1 = &ext;
/* Prefetch next iteration. */
{
vlib_buffer_t * p2, * p3;
+ // prefetch 3 and 4
p2 = vlib_get_buffer (vm, from[2]);
p3 = vlib_get_buffer (vm, from[3]);
@@ -121,57 +160,172 @@ gtpu_input (vlib_main_t * vm,
CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
}
+ // getting buffer index from vectorframe
bi0 = from[0];
bi1 = from[1];
+ // pre inserting the packets for the next node
to_next[0] = bi0;
to_next[1] = bi1;
+ // forward in vectorframe
from += 2;
+ // forward next node
to_next += 2;
+ // decimate message counter for next node
n_left_to_next -= 2;
+ // decimate message counter for current progessing node
n_left_from -= 2;
+ // load packets into buffer
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
/* udp leaves current_data pointing at the gtpu header */
- gtpu0 = vlib_buffer_get_current (b0);
- gtpu1 = vlib_buffer_get_current (b1);
- if (is_ip4)
- {
- ip4_0 = (void *)((u8*)gtpu0 - sizeof(udp_header_t) - sizeof(ip4_header_t));
- ip4_1 = (void *)((u8*)gtpu1 - sizeof(udp_header_t) - sizeof(ip4_header_t));
- }
- else
- {
- ip6_0 = (void *)((u8*)gtpu0 - sizeof(udp_header_t) - sizeof(ip6_header_t));
- ip6_1 = (void *)((u8*)gtpu1 - sizeof(udp_header_t) - sizeof(ip6_header_t));
- }
+ // get pointers to the beginnings of the gtpu frame
+ gtpu0 = vlib_buffer_get_current (b0);
+ gtpu1 = vlib_buffer_get_current (b1);
+ if (is_ip4)
+ {
+ ip4_0 = (void *) ((u8 *) gtpu0 - sizeof (udp_header_t) -
+ sizeof (ip4_header_t));
+ ip4_1 = (void *) ((u8 *) gtpu1 - sizeof (udp_header_t) -
+ sizeof (ip4_header_t));
+ }
+ else
+ {
+ ip6_0 = (void *) ((u8 *) gtpu0 - sizeof (udp_header_t) -
+ sizeof (ip6_header_t));
+ ip6_1 = (void *) ((u8 *) gtpu1 - sizeof (udp_header_t) -
+ sizeof (ip6_header_t));
+ }
+ udp0 = (void *) ((u8 *) gtpu0 - sizeof (udp_header_t));
+ udp1 = (void *) ((u8 *) gtpu1 - sizeof (udp_header_t));
- tunnel_index0 = ~0;
- error0 = 0;
+ tunnel_index0 = ~0;
+ error0 = 0;
- tunnel_index1 = ~0;
- error1 = 0;
+ tunnel_index1 = ~0;
+ error1 = 0;
- /* speculatively load gtp header version field */
- ver0 = gtpu0->ver_flags;
- ver1 = gtpu1->ver_flags;
+ /* speculatively load gtp header version field */
+ ver0 = gtpu0->ver_flags;
+ ver1 = gtpu1->ver_flags;
/*
* Manipulate gtpu header
* TBD: Manipulate Sequence Number and N-PDU Number
* TBD: Manipulate Next Extension Header
*/
- gtpu_hdr_len0 = sizeof(gtpu_header_t) - (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4);
- gtpu_hdr_len1 = sizeof(gtpu_header_t) - (((ver1 & GTPU_E_S_PN_BIT) == 0) * 4);
-
- has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
- has_space1 = vlib_buffer_has_space (b1, gtpu_hdr_len1);
- if (PREDICT_FALSE (((ver0 & GTPU_VER_MASK) != GTPU_V1_VER) | (!has_space0)))
+ /* Perform all test assuming the packet has the needed space.
+ * Check if version 1, not PT, not reserved.
+ * Check message type 255.
+ */
+ is_fast_track0 =
+ ((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT));
+ is_fast_track0 = is_fast_track0 & (gtpu0->type == 255);
+
+ is_fast_track1 =
+ ((ver1 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT));
+ is_fast_track1 = is_fast_track1 & (gtpu1->type == 255);
+
+ /* Make the header overlap the end of the gtpu_header_t, so
+ * that it starts with the same Next extension header as the
+ * gtpu_header_t.
+ * This means that the gtpu_ext_header_t (ext) has the type
+ * from the previous header and the length from the current one.
+ * Works both for the first gtpu_header_t and all following
+ * gtpu_ext_header_t extensions.
+ * Copy the ext data if the E bit is set, else use the 0 value.
+ */
+ ext0 = (ver0 & GTPU_E_BIT) ?
+ (gtpu_ext_header_t *) &gtpu0->next_ext_type :
+ &ext;
+ ext1 = (ver1 & GTPU_E_BIT) ?
+ (gtpu_ext_header_t *) &gtpu1->next_ext_type :
+ &ext;
+
+ /* One or more of the E, S and PN flags are set, so all 3 fields
+ * must be present:
+ * The gtpu_header_t contains the Sequence number, N-PDU number and
+ * Next extension header type.
+ * If E is not set subtract 4 bytes from the header.
+ * Then add the length of the extension. 0 * 4 if E is not set,
+ * else it's the ext->len from the gtp extension. Length is multiple
+ * of 4 always.
+ * Note: This length is only valid if the header itself is valid,
+ * so it must be verified before use.
+ */
+ gtpu_hdr_len0 = sizeof (gtpu_header_t) -
+ (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4) +
+ ext0->len * 4;
+ gtpu_hdr_len1 = sizeof (gtpu_header_t) -
+ (((ver1 & GTPU_E_S_PN_BIT) == 0) * 4) +
+ ext1->len * 4;
+
+ /* Get the next extension, unconditionally.
+ * If E was not set in the gtp header ext->len is zero.
+ * If E was set ext0 will now point to the packet buffer.
+ * If the gtp packet is illegal this might point outside the buffer.
+ * TBD check the updated for ext0->type != 0, and continue removing
+ * extensions. Only for clarity, will be optimized away.
+ */
+ ext0 += ext0->len * 4 / sizeof (*ext0);
+ ext1 += ext1->len * 4 / sizeof (*ext1);
+
+ /* Check the space, if this is true then ext0 points to a valid
+ * location in the buffer as well.
+ */
+ has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
+ has_space1 = vlib_buffer_has_space (b1, gtpu_hdr_len1);
+
+ /* Diverge the packet paths for 0 and 1 */
+ if (PREDICT_FALSE ((!is_fast_track0) | (!has_space0)))
{
- error0 = has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ /* Not fast path. ext0 and gtpu_hdr_len0 might be wrong */
+
+ /* GCC will hopefully fix the duplicate compute */
+ if (PREDICT_FALSE (
+ !((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT)) |
+ (!has_space0)))
+ {
+ /* The header or size is wrong */
+ error0 =
+ has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+
+ /* This is an unsupported/bad packet.
+ * Check if it is to be forwarded.
+ */
+ if (is_ip4)
+ tunnel_index0 = gtm->bad_header_forward_tunnel_index_ipv4;
+ else
+ tunnel_index0 = gtm->bad_header_forward_tunnel_index_ipv6;
+
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
+
+ goto trace0;
+ }
+ /* Correct version and has the space. It can only be unknown
+ * message type.
+ */
+ error0 = GTPU_ERROR_UNSUPPORTED_TYPE;
next0 = GTPU_INPUT_NEXT_DROP;
+
+ /* This is an error/nonstandard packet
+ * Check if it is to be forwarded. */
+ if (is_ip4)
+ tunnel_index0 = gtm->unknown_type_forward_tunnel_index_ipv4;
+ else
+ tunnel_index0 = gtm->unknown_type_forward_tunnel_index_ipv6;
+
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
+
+ /* The packet is ipv6/not forwarded */
goto trace0;
}
@@ -180,22 +334,31 @@ gtpu_input (vlib_main_t * vm,
key4_0.src = ip4_0->src_address.as_u32;
key4_0.teid = gtpu0->teid;
- /* Make sure GTPU tunnel exist according to packet SIP and teid
- * SIP identify a GTPU path, and teid identify a tunnel in a given GTPU path */
- if (PREDICT_FALSE (key4_0.as_u64 != last_key4.as_u64))
- {
- p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
- if (PREDICT_FALSE (p0 == NULL))
- {
- error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace0;
- }
- last_key4.as_u64 = key4_0.as_u64;
- tunnel_index0 = last_tunnel_index = p0[0];
- }
- else
- tunnel_index0 = last_tunnel_index;
+ /* Make sure GTPU tunnel exist according to packet SourceIP and
+ * teid SourceIP identify a GTPU path, and teid identify a tunnel
+ * in a given GTPU path */
+ if (PREDICT_FALSE (key4_0.as_u64 != last_key4.as_u64))
+ {
+ p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
+ if (PREDICT_FALSE (p0 == NULL))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ /* This is a standard packet, but no tunnel was found.
+ * Check if it is to be forwarded. */
+ tunnel_index0 =
+ gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
+ goto trace0;
+ }
+ last_key4.as_u64 = key4_0.as_u64;
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else // when the address of the packet is the same as the packet
+ // before ... saving lookup in table
+ tunnel_index0 = last_tunnel_index;
+ // tunnel index in vpp
t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
/* Validate GTPU tunnel encap-fib index against packet */
@@ -203,10 +366,13 @@ gtpu_input (vlib_main_t * vm,
{
error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
goto trace0;
}
- /* Validate GTPU tunnel SIP against packet DIP */
+ /* Validate GTPU tunnel SourceIP against packet DestinationIP */
if (PREDICT_TRUE (ip4_0->dst_address.as_u32 == t0->src.ip4.as_u32))
goto next0; /* valid packet */
if (PREDICT_FALSE (ip4_address_is_multicast (&ip4_0->dst_address)))
@@ -223,6 +389,9 @@ gtpu_input (vlib_main_t * vm,
}
error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
goto trace0;
} else /* !is_ip4 */ {
@@ -239,13 +408,19 @@ gtpu_input (vlib_main_t * vm,
{
error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
next0 = GTPU_INPUT_NEXT_DROP;
- goto trace0;
- }
- clib_memcpy_fast (&last_key6, &key6_0, sizeof(key6_0));
- tunnel_index0 = last_tunnel_index = p0[0];
- }
- else
- tunnel_index0 = last_tunnel_index;
+ /* This is a standard packet, but no tunnel was found.
+ * Check if it is to be forwarded. */
+ tunnel_index0 =
+ gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
+ goto trace0;
+ }
+ clib_memcpy_fast (&last_key6, &key6_0, sizeof (key6_0));
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
/* Validate GTPU tunnel encap-fib index against packet */
@@ -253,6 +428,9 @@ gtpu_input (vlib_main_t * vm,
{
error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
goto trace0;
}
@@ -274,28 +452,85 @@ gtpu_input (vlib_main_t * vm,
}
error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
goto trace0;
}
+ forward0:
+ /* Get the tunnel */
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+
+ /* Validate GTPU tunnel encap-fib index against packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = GTPU_ERROR_NO_ERROR_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+
+ /* Clear the error, next0 will be overwritten by the tunnel */
+ error0 = 0;
+
+ if (is_ip4)
+ {
+ /* Forward packet instead. Push the IP+UDP header */
+ gtpu_hdr_len0 =
+ -(i32) (sizeof (udp_header_t) + sizeof (ip4_header_t));
+ /* Backup the IP4 checksum and address */
+ sum0 = ip4_0->checksum;
+ old0 = ip4_0->dst_address.as_u32;
+
+ /* Update IP address of the packet using the src from the tunnel
+ */
+ ip4_0->dst_address.as_u32 = t0->src.ip4.as_u32;
+
+ /* Fix the IP4 checksum */
+ sum0 = ip_csum_update (sum0, old0, ip4_0->dst_address.as_u32,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip4_0->checksum = ip_csum_fold (sum0);
+ }
+ else
+ {
+ /* Forward packet instead. Push the IP+UDP header */
+ gtpu_hdr_len0 =
+ -(i32) (sizeof (udp_header_t) + sizeof (ip6_header_t));
+ /* IPv6 UDP checksum is mandatory */
+ int bogus = 0;
+ udp0->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6_0, &bogus);
+ if (udp0->checksum == 0)
+ udp0->checksum = 0xffff;
+ }
next0:
- /* Pop gtpu header */
+ /* Pop/Remove gtpu header from buffered package or push existing
+ * IP+UDP header back to the buffer*/
vlib_buffer_advance (b0, gtpu_hdr_len0);
- next0 = t0->decap_next_index;
- sw_if_index0 = t0->sw_if_index;
- len0 = vlib_buffer_length_in_chain (vm, b0);
+ // where does it need to go in the graph next
+ next0 = t0->decap_next_index;
+ // interface index the package is on
+ sw_if_index0 = t0->sw_if_index;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
- /* Required to make the l2 tag push / pop code work on l2 subifs */
- if (PREDICT_TRUE(next0 == GTPU_INPUT_NEXT_L2_INPUT))
- vnet_update_l2_len (b0);
+ // Next three lines are for forwarding the payload to L2
+ // subinterfaces
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ if (PREDICT_TRUE (next0 == GTPU_INPUT_NEXT_L2_INPUT))
+ vnet_update_l2_len (b0);
- /* Set packet input sw_if_index to unicast GTPU tunnel for learning */
- vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ /* Set packet input sw_if_index to unicast GTPU tunnel for learning
+ */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ // in case its a multicast packet set different interface index
sw_if_index0 = (mt0) ? mt0->sw_if_index : sw_if_index0;
- pkts_decapsulated ++;
- stats_n_packets += 1;
- stats_n_bytes += len0;
+ // Update stats
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
/* Batch stats increment on the same gtpu tunnel so counter
is not incremented per packet */
@@ -324,12 +559,61 @@ gtpu_input (vlib_main_t * vm,
tr->error = error0;
tr->tunnel_index = tunnel_index0;
tr->teid = has_space0 ? clib_net_to_host_u32(gtpu0->teid) : ~0;
- }
- if (PREDICT_FALSE (((ver1 & GTPU_VER_MASK) != GTPU_V1_VER) | (!has_space1)))
+ if (vlib_buffer_has_space (b0, 4))
+ {
+ tr->header.ver_flags = gtpu0->ver_flags;
+ tr->header.type = gtpu0->type;
+ tr->header.length = clib_net_to_host_u16 (gtpu0->length);
+ }
+ }
+
+ /* End of processing for packet 0, start for packet 1 */
+ if (PREDICT_FALSE ((!is_fast_track1) | (!has_space1)))
{
- error1 = has_space1 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ /* Not fast path. ext1 and gtpu_hdr_len1 might be wrong */
+
+ /* GCC will hopefully fix the duplicate compute */
+ if (PREDICT_FALSE (
+ !((ver1 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT)) |
+ (!has_space1)))
+ {
+ /* The header or size is wrong */
+ error1 =
+ has_space1 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ next1 = GTPU_INPUT_NEXT_DROP;
+
+ /* This is an unsupported/bad packet.
+ * Check if it is to be forwarded.
+ */
+ if (is_ip4)
+ tunnel_index1 = gtm->bad_header_forward_tunnel_index_ipv4;
+ else
+ tunnel_index1 = gtm->bad_header_forward_tunnel_index_ipv6;
+
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
+
+ goto trace1;
+ }
+ /* Correct version and has the space. It can only be unknown
+ * message type.
+ */
+ error1 = GTPU_ERROR_UNSUPPORTED_TYPE;
next1 = GTPU_INPUT_NEXT_DROP;
+
+ /* This is an error/nonstandard packet
+ * Check if it is to be forwarded. */
+ if (is_ip4)
+ tunnel_index1 = gtm->unknown_type_forward_tunnel_index_ipv4;
+ else
+ tunnel_index1 = gtm->unknown_type_forward_tunnel_index_ipv6;
+
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
+
+ /* The packet is ipv6/not forwarded */
goto trace1;
}
@@ -347,20 +631,27 @@ gtpu_input (vlib_main_t * vm,
{
error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
next1 = GTPU_INPUT_NEXT_DROP;
- goto trace1;
- }
- last_key4.as_u64 = key4_1.as_u64;
- tunnel_index1 = last_tunnel_index = p1[0];
- }
- else
- tunnel_index1 = last_tunnel_index;
- t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
+ tunnel_index1 =
+ gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
+ goto trace1;
+ }
+ last_key4.as_u64 = key4_1.as_u64;
+ tunnel_index1 = last_tunnel_index = p1[0];
+ }
+ else
+ tunnel_index1 = last_tunnel_index;
+ t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
/* Validate GTPU tunnel encap-fib index against packet */
if (PREDICT_FALSE (validate_gtpu_fib (b1, t1, is_ip4) == 0))
{
error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
next1 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index1 = gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
goto trace1;
}
@@ -381,6 +672,9 @@ gtpu_input (vlib_main_t * vm,
}
error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
next1 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index1 = gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
goto trace1;
} else /* !is_ip4 */ {
@@ -398,21 +692,28 @@ gtpu_input (vlib_main_t * vm,
{
error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
next1 = GTPU_INPUT_NEXT_DROP;
- goto trace1;
- }
+ tunnel_index1 =
+ gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
+ goto trace1;
+ }
- clib_memcpy_fast (&last_key6, &key6_1, sizeof(key6_1));
- tunnel_index1 = last_tunnel_index = p1[0];
- }
- else
- tunnel_index1 = last_tunnel_index;
- t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
+ clib_memcpy_fast (&last_key6, &key6_1, sizeof (key6_1));
+ tunnel_index1 = last_tunnel_index = p1[0];
+ }
+ else
+ tunnel_index1 = last_tunnel_index;
+ t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
/* Validate GTPU tunnel encap-fib index against packet */
if (PREDICT_FALSE (validate_gtpu_fib (b1, t1, is_ip4) == 0))
{
error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
next1 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index1 = gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
goto trace1;
}
@@ -434,11 +735,63 @@ gtpu_input (vlib_main_t * vm,
}
error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
next1 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index1 = gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
goto trace1;
}
+ forward1:
+
+ /* Get the tunnel */
+ t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
+
+ /* Validate GTPU tunnel encap-fib index against packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b1, t1, is_ip4) == 0))
+ {
+ error1 = GTPU_ERROR_NO_ERROR_TUNNEL;
+ next1 = GTPU_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+
+ /* Clear the error, next0 will be overwritten by the tunnel */
+ error1 = 0;
+
+ if (is_ip4)
+ {
+ /* Forward packet instead. Push the IP+UDP header */
+ gtpu_hdr_len1 =
+ -(i32) (sizeof (udp_header_t) + sizeof (ip4_header_t));
+
+ /* Backup the IP4 checksum and address */
+ sum1 = ip4_1->checksum;
+ old1 = ip4_1->dst_address.as_u32;
+
+ /* Update IP address of the packet using the src from the tunnel
+ */
+ ip4_1->dst_address.as_u32 = t1->src.ip4.as_u32;
+
+ /* Fix the IP4 checksum */
+ sum1 = ip_csum_update (sum1, old1, ip4_1->dst_address.as_u32,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip4_1->checksum = ip_csum_fold (sum1);
+ }
+ else
+ {
+ /* Forward packet instead. Push the IP+UDP header */
+ gtpu_hdr_len1 =
+ -(i32) (sizeof (udp_header_t) + sizeof (ip6_header_t));
+
+ /* IPv6 UDP checksum is mandatory */
+ int bogus = 0;
+ udp1->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b1, ip6_1, &bogus);
+ if (udp1->checksum == 0)
+ udp1->checksum = 0xffff;
+ }
next1:
- /* Pop gtpu header */
+ /* Pop gtpu header / push IP+UDP header */
vlib_buffer_advance (b1, gtpu_hdr_len1);
next1 = t1->decap_next_index;
@@ -484,13 +837,21 @@ gtpu_input (vlib_main_t * vm,
tr->error = error1;
tr->tunnel_index = tunnel_index1;
tr->teid = has_space1 ? clib_net_to_host_u32(gtpu1->teid) : ~0;
- }
+ if (vlib_buffer_has_space (b1, 4))
+ {
+ tr->header.ver_flags = gtpu1->ver_flags;
+ tr->header.type = gtpu1->type;
+ tr->header.length = clib_net_to_host_u16 (gtpu1->length);
+ }
+ }
vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
to_next, n_left_to_next,
bi0, bi1, next0, next1);
}
+ /* In case there are less than 4 packets left in frame and packets in
+ current frame aka single processing */
while (n_left_from > 0 && n_left_to_next > 0)
{
u32 bi0;
@@ -499,7 +860,7 @@ gtpu_input (vlib_main_t * vm,
ip4_header_t * ip4_0;
ip6_header_t * ip6_0;
gtpu_header_t * gtpu0;
- u32 gtpu_hdr_len0;
+ i32 gtpu_hdr_len0;
uword * p0;
u32 tunnel_index0;
gtpu_tunnel_t * t0, * mt0 = NULL;
@@ -509,6 +870,13 @@ gtpu_input (vlib_main_t * vm,
u32 sw_if_index0, len0;
u8 has_space0;
u8 ver0;
+ udp_header_t *udp0;
+ ip_csum_t sum0;
+ u32 old0;
+ gtpu_ext_header_t ext = { .type = 0, .len = 0, .pad = 0 };
+ gtpu_ext_header_t *ext0;
+ bool is_fast_track0;
+ ext0 = &ext;
bi0 = from[0];
to_next[0] = bi0;
@@ -526,112 +894,197 @@ gtpu_input (vlib_main_t * vm,
} else {
ip6_0 = (void *)((u8*)gtpu0 - sizeof(udp_header_t) - sizeof(ip6_header_t));
}
+ udp0 = (void *) ((u8 *) gtpu0 - sizeof (udp_header_t));
- tunnel_index0 = ~0;
- error0 = 0;
-
- /* speculatively load gtp header version field */
- ver0 = gtpu0->ver_flags;
+ tunnel_index0 = ~0;
+ error0 = 0;
+ /* speculatively load gtp header version field */
+ ver0 = gtpu0->ver_flags;
/*
* Manipulate gtpu header
* TBD: Manipulate Sequence Number and N-PDU Number
* TBD: Manipulate Next Extension Header
*/
- gtpu_hdr_len0 = sizeof(gtpu_header_t) - (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4);
- has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
+ is_fast_track0 =
+ ((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT));
+ is_fast_track0 = is_fast_track0 & (gtpu0->type == 255);
- if (PREDICT_FALSE (((ver0 & GTPU_VER_MASK) != GTPU_V1_VER) | (!has_space0)))
- {
- error0 = has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
- }
+ ext0 = (ver0 & GTPU_E_BIT) ?
+ (gtpu_ext_header_t *) &gtpu0->next_ext_type :
+ &ext;
- if (is_ip4) {
- key4_0.src = ip4_0->src_address.as_u32;
- key4_0.teid = gtpu0->teid;
+ gtpu_hdr_len0 = sizeof (gtpu_header_t) -
+ (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4) +
+ ext0->len * 4;
- /* Make sure GTPU tunnel exist according to packet SIP and teid
- * SIP identify a GTPU path, and teid identify a tunnel in a given GTPU path */
- if (PREDICT_FALSE (key4_0.as_u64 != last_key4.as_u64))
- {
- p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
- if (PREDICT_FALSE (p0 == NULL))
- {
- error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
- }
- last_key4.as_u64 = key4_0.as_u64;
- tunnel_index0 = last_tunnel_index = p0[0];
- }
- else
- tunnel_index0 = last_tunnel_index;
- t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+ ext0 += ext0->len * 4 / sizeof (*ext0);
- /* Validate GTPU tunnel encap-fib index against packet */
- if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
- {
- error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
- }
+ has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
- /* Validate GTPU tunnel SIP against packet DIP */
- if (PREDICT_TRUE (ip4_0->dst_address.as_u32 == t0->src.ip4.as_u32))
- goto next00; /* valid packet */
- if (PREDICT_FALSE (ip4_address_is_multicast (&ip4_0->dst_address)))
- {
- key4_0.src = ip4_0->dst_address.as_u32;
- key4_0.teid = gtpu0->teid;
- /* Make sure mcast GTPU tunnel exist by packet DIP and teid */
- p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
- if (PREDICT_TRUE (p0 != NULL))
- {
- mt0 = pool_elt_at_index (gtm->tunnels, p0[0]);
- goto next00; /* valid packet */
- }
- }
- error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
+ if (PREDICT_FALSE ((!is_fast_track0) | (!has_space0)))
+ {
+ /* Not fast path. ext0 and gtpu_hdr_len0 might be wrong */
+
+ /* GCC will hopefully fix the duplicate compute */
+ if (PREDICT_FALSE (
+ !((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT)) |
+ (!has_space0)))
+ {
+ /* The header or size is wrong */
+ error0 =
+ has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+
+ /* This is an unsupported/bad packet.
+ * Check if it is to be forwarded.
+ */
+ if (is_ip4)
+ tunnel_index0 = gtm->bad_header_forward_tunnel_index_ipv4;
+ else
+ tunnel_index0 = gtm->bad_header_forward_tunnel_index_ipv6;
- } else /* !is_ip4 */ {
- key6_0.src.as_u64[0] = ip6_0->src_address.as_u64[0];
- key6_0.src.as_u64[1] = ip6_0->src_address.as_u64[1];
- key6_0.teid = gtpu0->teid;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
- /* Make sure GTPU tunnel exist according to packet SIP and teid
- * SIP identify a GTPU path, and teid identify a tunnel in a given GTPU path */
- if (PREDICT_FALSE (memcmp(&key6_0, &last_key6, sizeof(last_key6)) != 0))
- {
- p0 = hash_get_mem (gtm->gtpu6_tunnel_by_key, &key6_0);
- if (PREDICT_FALSE (p0 == NULL))
- {
- error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
- }
- clib_memcpy_fast (&last_key6, &key6_0, sizeof(key6_0));
- tunnel_index0 = last_tunnel_index = p0[0];
- }
- else
- tunnel_index0 = last_tunnel_index;
- t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+ goto trace00;
+ }
+ /* Correct version and has the space. It can only be unknown
+ * message type
+ */
+ error0 = GTPU_ERROR_UNSUPPORTED_TYPE;
+ next0 = GTPU_INPUT_NEXT_DROP;
- /* Validate GTPU tunnel encap-fib index against packet */
- if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
- {
- error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
- }
+ /* This is an error/nonstandard packet
+ * Check if it is to be forwarded. */
+ if (is_ip4)
+ tunnel_index0 = gtm->unknown_type_forward_tunnel_index_ipv4;
+ else
+ tunnel_index0 = gtm->unknown_type_forward_tunnel_index_ipv6;
- /* Validate GTPU tunnel SIP against packet DIP */
- if (PREDICT_TRUE (ip6_address_is_equal (&ip6_0->dst_address,
- &t0->src.ip6)))
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
+
+ /* The packet is ipv6/not forwarded */
+ goto trace00;
+ }
+
+ if (is_ip4)
+ {
+ key4_0.src = ip4_0->src_address.as_u32;
+ key4_0.teid = gtpu0->teid;
+
+ /* Make sure GTPU tunnel exist according to packet SIP and teid
+ * SIP identify a GTPU path, and teid identify a tunnel in a
+ * given GTPU path */
+ if (PREDICT_FALSE (key4_0.as_u64 != last_key4.as_u64))
+ {
+ // Cache miss, so try normal lookup now.
+ p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
+ if (PREDICT_FALSE (p0 == NULL))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+
+ /* This is a standard packet, but no tunnel was found.
+ * Check if it is to be forwarded. */
+ tunnel_index0 =
+ gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
+ goto trace00;
+ }
+ // Update the key/tunnel cache for normal packets
+ last_key4.as_u64 = key4_0.as_u64;
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+
+ /* Validate GTPU tunnel encap-fib index against packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
+ goto trace00;
+ }
+
+ /* Validate GTPU tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (ip4_0->dst_address.as_u32 ==
+ t0->src.ip4.as_u32))
+ goto next00; /* valid packet */
+ if (PREDICT_FALSE (
+ ip4_address_is_multicast (&ip4_0->dst_address)))
+ {
+ key4_0.src = ip4_0->dst_address.as_u32;
+ key4_0.teid = gtpu0->teid;
+ /* Make sure mcast GTPU tunnel exist by packet DIP and teid
+ */
+ p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
+ if (PREDICT_TRUE (p0 != NULL))
+ {
+ mt0 = pool_elt_at_index (gtm->tunnels, p0[0]);
+ goto next00; /* valid packet */
+ }
+ }
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
+ goto trace00;
+ }
+ else /* !is_ip4 */
+ {
+ key6_0.src.as_u64[0] = ip6_0->src_address.as_u64[0];
+ key6_0.src.as_u64[1] = ip6_0->src_address.as_u64[1];
+ key6_0.teid = gtpu0->teid;
+
+ /* Make sure GTPU tunnel exist according to packet SIP and teid
+ * SIP identify a GTPU path, and teid identify a tunnel in a
+ * given GTPU path */
+ if (PREDICT_FALSE (
+ memcmp (&key6_0, &last_key6, sizeof (last_key6)) != 0))
+ {
+ p0 = hash_get_mem (gtm->gtpu6_tunnel_by_key, &key6_0);
+ if (PREDICT_FALSE (p0 == NULL))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 =
+ gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
+ goto trace00;
+ }
+ clib_memcpy_fast (&last_key6, &key6_0, sizeof (key6_0));
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+
+ /* Validate GTPU tunnel encap-fib index against packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
+ goto trace00;
+ }
+
+ /* Validate GTPU tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (
+ ip6_address_is_equal (&ip6_0->dst_address, &t0->src.ip6)))
goto next00; /* valid packet */
if (PREDICT_FALSE (ip6_address_is_multicast (&ip6_0->dst_address)))
{
@@ -647,11 +1100,63 @@ gtpu_input (vlib_main_t * vm,
}
error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
goto trace00;
- }
+ }
+
+ /* This can only be reached via goto */
+ forward00:
+ // Get the tunnel
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+
+ /* Validate GTPU tunnel encap-fib index against packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = GTPU_ERROR_NO_ERROR_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ /* Clear the error, next0 will be overwritten by the tunnel */
+ error0 = 0;
+
+ if (is_ip4)
+ {
+ /* Forward packet instead. Push the IP+UDP header */
+ gtpu_hdr_len0 =
+ -(i32) (sizeof (udp_header_t) + sizeof (ip4_header_t));
+ /* Backup the IP4 checksum and address */
+ sum0 = ip4_0->checksum;
+ old0 = ip4_0->dst_address.as_u32;
+
+ /* Update IP address of the packet using the src from the tunnel
+ */
+ ip4_0->dst_address.as_u32 = t0->src.ip4.as_u32;
+
+ /* Fix the IP4 checksum */
+ sum0 = ip_csum_update (sum0, old0, ip4_0->dst_address.as_u32,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip4_0->checksum = ip_csum_fold (sum0);
+ }
+ else
+ {
+ /* Forward packet instead. Push the IP+UDP header */
+ gtpu_hdr_len0 =
+ -(i32) (sizeof (udp_header_t) + sizeof (ip6_header_t));
+
+ /* IPv6 UDP checksum is mandatory */
+ int bogus = 0;
+ udp0->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6_0, &bogus);
+ if (udp0->checksum == 0)
+ udp0->checksum = 0xffff;
+ }
next00:
- /* Pop gtpu header */
+ /* Pop gtpu header / push IP+UDP header */
vlib_buffer_advance (b0, gtpu_hdr_len0);
next0 = t0->decap_next_index;
@@ -697,7 +1202,13 @@ gtpu_input (vlib_main_t * vm,
tr->error = error0;
tr->tunnel_index = tunnel_index0;
tr->teid = has_space0 ? clib_net_to_host_u32(gtpu0->teid) : ~0;
- }
+ if (vlib_buffer_has_space (b0, 4))
+ {
+ tr->header.ver_flags = gtpu0->ver_flags;
+ tr->header.type = gtpu0->type;
+ tr->header.length = clib_net_to_host_u16 (gtpu0->length);
+ }
+ }
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next,
bi0, next0);
@@ -790,6 +1301,8 @@ typedef enum {
IP_GTPU_BYPASS_N_NEXT,
} ip_vxan_bypass_next_t;
+/* this function determines if a udp packet is actually gtpu and needs
+ forwarding to gtpu_input */
always_inline uword
ip_gtpu_bypass_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1356,128 +1869,183 @@ gtpu_flow_input (vlib_main_t * vm,
u32 sw_if_index0, sw_if_index1, len0, len1;
u8 has_space0 = 0, has_space1 = 0;
u8 ver0, ver1;
+ gtpu_ext_header_t ext = { .type = 0, .len = 0, .pad = 0 };
+ gtpu_ext_header_t *ext0, *ext1;
+ bool is_fast_track0, is_fast_track1;
+ ext0 = ext1 = &ext;
- /* Prefetch next iteration. */
- {
- vlib_buffer_t * p2, * p3;
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
- CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
- }
+ CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
- bi0 = from[0];
- bi1 = from[1];
- to_next[0] = bi0;
- to_next[1] = bi1;
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
- /* udp leaves current_data pointing at the gtpu header */
- gtpu0 = vlib_buffer_get_current (b0);
- gtpu1 = vlib_buffer_get_current (b1);
+ /* udp leaves current_data pointing at the gtpu header */
+ gtpu0 = vlib_buffer_get_current (b0);
+ gtpu1 = vlib_buffer_get_current (b1);
- len0 = vlib_buffer_length_in_chain (vm, b0);
- len1 = vlib_buffer_length_in_chain (vm, b1);
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+ len1 = vlib_buffer_length_in_chain (vm, b1);
- tunnel_index0 = ~0;
- error0 = 0;
-
- tunnel_index1 = ~0;
- error1 = 0;
-
- ip_err0 = gtpu_check_ip (b0, len0);
- udp_err0 = gtpu_check_ip_udp_len (b0);
- ip_err1 = gtpu_check_ip (b1, len1);
- udp_err1 = gtpu_check_ip_udp_len (b1);
-
- if (PREDICT_FALSE (gtpu_local_need_csum_check (b0)))
- csum_err0 = !gtpu_validate_udp_csum (vm, b0);
- else
- csum_err0 = !gtpu_local_csum_is_valid (b0);
- if (PREDICT_FALSE (gtpu_local_need_csum_check (b1)))
- csum_err1 = !gtpu_validate_udp_csum (vm, b1);
- else
- csum_err1 = !gtpu_local_csum_is_valid (b1);
-
- if (ip_err0 || udp_err0 || csum_err0)
- {
- next0 = GTPU_INPUT_NEXT_DROP;
- error0 = gtpu_err_code (ip_err0, udp_err0, csum_err0);
- goto trace0;
- }
-
- /* speculatively load gtp header version field */
- ver0 = gtpu0->ver_flags;
-
- /*
- * Manipulate gtpu header
- * TBD: Manipulate Sequence Number and N-PDU Number
- * TBD: Manipulate Next Extension Header
- */
- gtpu_hdr_len0 = sizeof(gtpu_header_t) - (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4);
-
- has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
- if (PREDICT_FALSE (((ver0 & GTPU_VER_MASK) != GTPU_V1_VER) | (!has_space0)))
- {
- error0 = has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace0;
- }
-
- /* Manipulate packet 0 */
- ASSERT (b0->flow_id != 0);
- tunnel_index0 = b0->flow_id - gtm->flow_id_start;
- t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
- b0->flow_id = 0;
-
- /* Pop gtpu header */
- vlib_buffer_advance (b0, gtpu_hdr_len0);
-
- /* assign the next node */
- if (PREDICT_FALSE (t0->decap_next_index != GTPU_INPUT_NEXT_IP4_INPUT) &&
- (t0->decap_next_index != GTPU_INPUT_NEXT_IP6_INPUT))
- {
- error0 = GTPU_FLOW_ERROR_PAYLOAD_ERROR;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace0;
- }
- next0 = t0->decap_next_index;
+ tunnel_index0 = ~0;
+ error0 = 0;
- sw_if_index0 = t0->sw_if_index;
+ tunnel_index1 = ~0;
+ error1 = 0;
- /* Set packet input sw_if_index to unicast GTPU tunnel for learning */
- vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ ip_err0 = gtpu_check_ip (b0, len0);
+ udp_err0 = gtpu_check_ip_udp_len (b0);
+ ip_err1 = gtpu_check_ip (b1, len1);
+ udp_err1 = gtpu_check_ip_udp_len (b1);
- pkts_decapsulated ++;
- stats_n_packets += 1;
- stats_n_bytes += len0;
+ if (PREDICT_FALSE (gtpu_local_need_csum_check (b0)))
+ csum_err0 = !gtpu_validate_udp_csum (vm, b0);
+ else
+ csum_err0 = !gtpu_local_csum_is_valid (b0);
+ if (PREDICT_FALSE (gtpu_local_need_csum_check (b1)))
+ csum_err1 = !gtpu_validate_udp_csum (vm, b1);
+ else
+ csum_err1 = !gtpu_local_csum_is_valid (b1);
- /* Batch stats increment on the same gtpu tunnel so counter
- is not incremented per packet */
- if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
- {
- stats_n_packets -= 1;
- stats_n_bytes -= len0;
- if (stats_n_packets)
- vlib_increment_combined_counter
- (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
- thread_index, stats_sw_if_index,
- stats_n_packets, stats_n_bytes);
- stats_n_packets = 1;
- stats_n_bytes = len0;
- stats_sw_if_index = sw_if_index0;
- }
+ /* speculatively load gtp header version field */
+ ver0 = gtpu0->ver_flags;
+ ver1 = gtpu1->ver_flags;
+
+ /*
+ * Manipulate gtpu header
+ * TBD: Manipulate Sequence Number and N-PDU Number
+ * TBD: Manipulate Next Extension Header
+ */
+ is_fast_track0 =
+ ((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT));
+ is_fast_track0 = is_fast_track0 & (gtpu0->type == 255);
+
+ is_fast_track1 =
+ ((ver1 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT));
+ is_fast_track1 = is_fast_track1 & (gtpu1->type == 255);
+
+ ext0 = (ver0 & GTPU_E_BIT) ?
+ (gtpu_ext_header_t *) &gtpu0->next_ext_type :
+ &ext;
+ ext1 = (ver1 & GTPU_E_BIT) ?
+ (gtpu_ext_header_t *) &gtpu1->next_ext_type :
+ &ext;
+
+ gtpu_hdr_len0 = sizeof (gtpu_header_t) -
+ (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4) +
+ ext0->len * 4;
+ gtpu_hdr_len1 = sizeof (gtpu_header_t) -
+ (((ver1 & GTPU_E_S_PN_BIT) == 0) * 4) +
+ ext1->len * 4;
+
+ /* Only for clarity, will be optimized away */
+ ext0 += ext0->len * 4 / sizeof (*ext0);
+ ext1 += ext1->len * 4 / sizeof (*ext1);
+
+ has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
+ has_space1 = vlib_buffer_has_space (b1, gtpu_hdr_len1);
+
+ if (ip_err0 || udp_err0 || csum_err0)
+ {
+ next0 = GTPU_INPUT_NEXT_DROP;
+ error0 = gtpu_err_code (ip_err0, udp_err0, csum_err0);
+ goto trace0;
+ }
+
+ /* Diverge the packet paths for 0 and 1 */
+ if (PREDICT_FALSE ((!is_fast_track0) | (!has_space0)))
+ {
+ /* Not fast path. ext0 and gtpu_hdr_len0 might be wrong */
+
+ /* GCC will hopefully fix the duplicate compute */
+ if (PREDICT_FALSE (
+ !((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT)) |
+ (!has_space0)))
+ {
+ /* The header or size is wrong */
+ error0 =
+ has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+ /* Correct version and has the space. It can only be unknown
+ * message type.
+ */
+ error0 = GTPU_ERROR_UNSUPPORTED_TYPE;
+ next0 = GTPU_INPUT_NEXT_DROP;
+
+ /* The packet is not forwarded */
+ goto trace0;
+ }
+
+ /* Manipulate packet 0 */
+ ASSERT (b0->flow_id != 0);
+ tunnel_index0 = b0->flow_id - gtm->flow_id_start;
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+ b0->flow_id = 0;
+
+ /* Pop gtpu header */
+ vlib_buffer_advance (b0, gtpu_hdr_len0);
+
+ /* assign the next node */
+ if (PREDICT_FALSE (t0->decap_next_index !=
+ GTPU_INPUT_NEXT_IP4_INPUT) &&
+ (t0->decap_next_index != GTPU_INPUT_NEXT_IP6_INPUT))
+ {
+ error0 = GTPU_FLOW_ERROR_PAYLOAD_ERROR;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+ next0 = t0->decap_next_index;
+
+ sw_if_index0 = t0->sw_if_index;
+
+ /* Set packet input sw_if_index to unicast GTPU tunnel for learning
+ */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same gtpu tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets,
+ stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
trace0:
b0->error = error0 ? node->errors[error0] : 0;
@@ -1490,81 +2058,103 @@ trace0:
tr->error = error0;
tr->tunnel_index = tunnel_index0;
tr->teid = has_space0 ? clib_net_to_host_u32(gtpu0->teid) : ~0;
- }
+ if (vlib_buffer_has_space (b0, 4))
+ {
+ tr->header.ver_flags = gtpu0->ver_flags;
+ tr->header.type = gtpu0->type;
+ tr->header.length = clib_net_to_host_u16 (gtpu0->length);
+ }
+ }
- if (ip_err1 || udp_err1 || csum_err1)
- {
- next1 = GTPU_INPUT_NEXT_DROP;
- error1 = gtpu_err_code (ip_err1, udp_err1, csum_err1);
- goto trace1;
- }
+ if (ip_err1 || udp_err1 || csum_err1)
+ {
+ next1 = GTPU_INPUT_NEXT_DROP;
+ error1 = gtpu_err_code (ip_err1, udp_err1, csum_err1);
+ goto trace1;
+ }
- /* speculatively load gtp header version field */
- ver1 = gtpu1->ver_flags;
+ /*
+ * Manipulate gtpu header
+ * TBD: Manipulate Sequence Number and N-PDU Number
+ * TBD: Manipulate Next Extension Header
+ */
+ if (PREDICT_FALSE ((!is_fast_track1) | (!has_space1)))
+ {
+ /* Not fast path. ext1 and gtpu_hdr_len1 might be wrong */
+
+ /* GCC will hopefully fix the duplicate compute */
+ if (PREDICT_FALSE (
+ !((ver1 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT)) |
+ (!has_space1)))
+ {
+ /* The header or size is wrong */
+ error1 =
+ has_space1 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ next1 = GTPU_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+ /* Correct version and has the space. It can only be unknown
+ * message type.
+ */
+ error1 = GTPU_ERROR_UNSUPPORTED_TYPE;
+ next1 = GTPU_INPUT_NEXT_DROP;
- /*
- * Manipulate gtpu header
- * TBD: Manipulate Sequence Number and N-PDU Number
- * TBD: Manipulate Next Extension Header
- */
- gtpu_hdr_len1 = sizeof(gtpu_header_t) - (((ver1 & GTPU_E_S_PN_BIT) == 0) * 4);
- has_space1 = vlib_buffer_has_space (b1, gtpu_hdr_len1);
- if (PREDICT_FALSE (((ver1 & GTPU_VER_MASK) != GTPU_V1_VER) | (!has_space1)))
- {
- error1 = has_space1 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
- next1 = GTPU_INPUT_NEXT_DROP;
- goto trace1;
- }
+ /* The packet is not forwarded */
+ goto trace1;
+ }
- /* Manipulate packet 1 */
- ASSERT (b1->flow_id != 0);
- tunnel_index1 = b1->flow_id - gtm->flow_id_start;
- t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
- b1->flow_id = 0;
-
- /* Pop gtpu header */
- vlib_buffer_advance (b1, gtpu_hdr_len1);
-
- /* assign the next node */
- if (PREDICT_FALSE (t1->decap_next_index != GTPU_INPUT_NEXT_IP4_INPUT) &&
- (t1->decap_next_index != GTPU_INPUT_NEXT_IP6_INPUT))
- {
- error1 = GTPU_FLOW_ERROR_PAYLOAD_ERROR;
- next1 = GTPU_INPUT_NEXT_DROP;
- goto trace1;
- }
- next1 = t1->decap_next_index;
+ /* Manipulate packet 1 */
+ ASSERT (b1->flow_id != 0);
+ tunnel_index1 = b1->flow_id - gtm->flow_id_start;
+ t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
+ b1->flow_id = 0;
- sw_if_index1 = t1->sw_if_index;
+ /* Pop gtpu header */
+ vlib_buffer_advance (b1, gtpu_hdr_len1);
- /* Required to make the l2 tag push / pop code work on l2 subifs */
- /* This won't happen in current implementation as only
- ipv4/udp/gtpu/IPV4 type packets can be matched */
- if (PREDICT_FALSE(next1 == GTPU_INPUT_NEXT_L2_INPUT))
- vnet_update_l2_len (b1);
+ /* assign the next node */
+ if (PREDICT_FALSE (t1->decap_next_index !=
+ GTPU_INPUT_NEXT_IP4_INPUT) &&
+ (t1->decap_next_index != GTPU_INPUT_NEXT_IP6_INPUT))
+ {
+ error1 = GTPU_FLOW_ERROR_PAYLOAD_ERROR;
+ next1 = GTPU_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+ next1 = t1->decap_next_index;
- /* Set packet input sw_if_index to unicast GTPU tunnel for learning */
- vnet_buffer(b1)->sw_if_index[VLIB_RX] = sw_if_index1;
+ sw_if_index1 = t1->sw_if_index;
- pkts_decapsulated ++;
- stats_n_packets += 1;
- stats_n_bytes += len1;
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ /* This won't happen in current implementation as only
+ ipv4/udp/gtpu/IPV4 type packets can be matched */
+ if (PREDICT_FALSE (next1 == GTPU_INPUT_NEXT_L2_INPUT))
+ vnet_update_l2_len (b1);
+
+ /* Set packet input sw_if_index to unicast GTPU tunnel for learning
+ */
+ vnet_buffer (b1)->sw_if_index[VLIB_RX] = sw_if_index1;
- /* Batch stats increment on the same gtpu tunnel so counter
- is not incremented per packet */
- if (PREDICT_FALSE (sw_if_index1 != stats_sw_if_index))
- {
- stats_n_packets -= 1;
- stats_n_bytes -= len1;
- if (stats_n_packets)
- vlib_increment_combined_counter
- (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
- thread_index, stats_sw_if_index,
- stats_n_packets, stats_n_bytes);
- stats_n_packets = 1;
- stats_n_bytes = len1;
- stats_sw_if_index = sw_if_index1;
- }
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len1;
+
+ /* Batch stats increment on the same gtpu tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (sw_if_index1 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len1;
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets,
+ stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len1;
+ stats_sw_if_index = sw_if_index1;
+ }
trace1:
b1->error = error1 ? node->errors[error1] : 0;
@@ -1577,12 +2167,18 @@ trace1:
tr->error = error1;
tr->tunnel_index = tunnel_index1;
tr->teid = has_space1 ? clib_net_to_host_u32(gtpu1->teid) : ~0;
- }
+ if (vlib_buffer_has_space (b1, 4))
+ {
+ tr->header.ver_flags = gtpu1->ver_flags;
+ tr->header.type = gtpu1->type;
+ tr->header.length = clib_net_to_host_u16 (gtpu1->length);
+ }
+ }
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+}
while (n_left_from > 0 && n_left_to_next > 0)
{
@@ -1597,97 +2193,135 @@ trace1:
u32 sw_if_index0, len0;
u8 has_space0 = 0;
u8 ver0;
+ gtpu_ext_header_t ext = { .type = 0, .len = 0, .pad = 0 };
+ gtpu_ext_header_t *ext0;
+ bool is_fast_track0;
+ ext0 = &ext;
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- len0 = vlib_buffer_length_in_chain (vm, b0);
-
- tunnel_index0 = ~0;
- error0 = 0;
-
- ip_err0 = gtpu_check_ip (b0, len0);
- udp_err0 = gtpu_check_ip_udp_len (b0);
- if (PREDICT_FALSE (gtpu_local_need_csum_check (b0)))
- csum_err0 = !gtpu_validate_udp_csum (vm, b0);
- else
- csum_err0 = !gtpu_local_csum_is_valid (b0);
-
- if (ip_err0 || udp_err0 || csum_err0)
- {
- next0 = GTPU_INPUT_NEXT_DROP;
- error0 = gtpu_err_code (ip_err0, udp_err0, csum_err0);
- goto trace00;
- }
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
- /* udp leaves current_data pointing at the gtpu header */
- gtpu0 = vlib_buffer_get_current (b0);
+ b0 = vlib_get_buffer (vm, bi0);
+ len0 = vlib_buffer_length_in_chain (vm, b0);
- /* speculatively load gtp header version field */
- ver0 = gtpu0->ver_flags;
+ tunnel_index0 = ~0;
+ error0 = 0;
- /*
- * Manipulate gtpu header
- * TBD: Manipulate Sequence Number and N-PDU Number
- * TBD: Manipulate Next Extension Header
- */
- gtpu_hdr_len0 = sizeof(gtpu_header_t) - (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4);
+ ip_err0 = gtpu_check_ip (b0, len0);
+ udp_err0 = gtpu_check_ip_udp_len (b0);
+ if (PREDICT_FALSE (gtpu_local_need_csum_check (b0)))
+ csum_err0 = !gtpu_validate_udp_csum (vm, b0);
+ else
+ csum_err0 = !gtpu_local_csum_is_valid (b0);
- has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
- if (PREDICT_FALSE (((ver0 & GTPU_VER_MASK) != GTPU_V1_VER) | (!has_space0)))
- {
- error0 = has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
- }
-
- ASSERT (b0->flow_id != 0);
- tunnel_index0 = b0->flow_id - gtm->flow_id_start;
- t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
- b0->flow_id = 0;
-
- /* Pop gtpu header */
- vlib_buffer_advance (b0, gtpu_hdr_len0);
-
- /* assign the next node */
- if (PREDICT_FALSE (t0->decap_next_index != GTPU_INPUT_NEXT_IP4_INPUT) &&
- (t0->decap_next_index != GTPU_INPUT_NEXT_IP6_INPUT))
- {
- error0 = GTPU_FLOW_ERROR_PAYLOAD_ERROR;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
- }
- next0 = t0->decap_next_index;
+ /* udp leaves current_data pointing at the gtpu header */
+ gtpu0 = vlib_buffer_get_current (b0);
- sw_if_index0 = t0->sw_if_index;
+ /* speculatively load gtp header version field */
+ ver0 = gtpu0->ver_flags;
- /* Set packet input sw_if_index to unicast GTPU tunnel for learning */
- vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ /*
+ * Manipulate gtpu header
+ * TBD: Manipulate Sequence Number and N-PDU Number
+ * TBD: Manipulate Next Extension Header
+ */
+ is_fast_track0 =
+ ((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT));
+ is_fast_track0 = is_fast_track0 & (gtpu0->type == 255);
+
+ ext0 = (ver0 & GTPU_E_BIT) ?
+ (gtpu_ext_header_t *) &gtpu0->next_ext_type :
+ &ext;
+
+ gtpu_hdr_len0 = sizeof (gtpu_header_t) -
+ (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4) +
+ ext0->len * 4;
+ ext0 += ext0->len * 4 / sizeof (*ext0);
+
+ has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
+
+ if (ip_err0 || udp_err0 || csum_err0)
+ {
+ next0 = GTPU_INPUT_NEXT_DROP;
+ error0 = gtpu_err_code (ip_err0, udp_err0, csum_err0);
+ goto trace00;
+ }
- pkts_decapsulated ++;
- stats_n_packets += 1;
- stats_n_bytes += len0;
+ if (PREDICT_FALSE ((!is_fast_track0) | (!has_space0)))
+ {
+ /* Not fast path. ext0 and gtpu_hdr_len0 might be wrong */
+
+ /* GCC will hopefully fix the duplicate compute */
+ if (PREDICT_FALSE (
+ !((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT)) |
+ (!has_space0)))
+ {
+ /* The header or size is wrong */
+ error0 =
+ has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+ /* Correct version and has the space. It can only be unknown
+ * message type.
+ */
+ error0 = GTPU_ERROR_UNSUPPORTED_TYPE;
+ next0 = GTPU_INPUT_NEXT_DROP;
- /* Batch stats increment on the same gtpu tunnel so counter
- is not incremented per packet */
- if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
- {
- stats_n_packets -= 1;
- stats_n_bytes -= len0;
- if (stats_n_packets)
- vlib_increment_combined_counter
- (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
- thread_index, stats_sw_if_index,
- stats_n_packets, stats_n_bytes);
- stats_n_packets = 1;
- stats_n_bytes = len0;
- stats_sw_if_index = sw_if_index0;
- }
+ /* The packet is not forwarded */
+ goto trace00;
+ }
+
+ ASSERT (b0->flow_id != 0);
+ tunnel_index0 = b0->flow_id - gtm->flow_id_start;
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+ b0->flow_id = 0;
+
+ /* Pop gtpu header */
+ vlib_buffer_advance (b0, gtpu_hdr_len0);
+
+ /* assign the next node */
+ if (PREDICT_FALSE (t0->decap_next_index !=
+ GTPU_INPUT_NEXT_IP4_INPUT) &&
+ (t0->decap_next_index != GTPU_INPUT_NEXT_IP6_INPUT))
+ {
+ error0 = GTPU_FLOW_ERROR_PAYLOAD_ERROR;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+ next0 = t0->decap_next_index;
+
+ sw_if_index0 = t0->sw_if_index;
+
+ /* Set packet input sw_if_index to unicast GTPU tunnel for learning
+ */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same gtpu tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets,
+ stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
trace00:
b0->error = error0 ? node->errors[error0] : 0;
@@ -1699,11 +2333,16 @@ trace1:
tr->error = error0;
tr->tunnel_index = tunnel_index0;
tr->teid = has_space0 ? clib_net_to_host_u32(gtpu0->teid) : ~0;
- }
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
+ if (vlib_buffer_has_space (b0, 4))
+ {
+ tr->header.ver_flags = gtpu0->ver_flags;
+ tr->header.type = gtpu0->type;
+ tr->header.length = clib_net_to_host_u16 (gtpu0->length);
+ }
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
@@ -1733,7 +2372,6 @@ VLIB_NODE_FN (gtpu4_flow_input_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
#ifndef CLIB_MULTIARCH_VARIANT
VLIB_REGISTER_NODE (gtpu4_flow_input_node) = {
.name = "gtpu4-flow-input",
@@ -1754,6 +2392,5 @@ VLIB_REGISTER_NODE (gtpu4_flow_input_node) = {
},
};
#endif
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
diff --git a/src/plugins/gtpu/gtpu_encap.c b/src/plugins/gtpu/gtpu_encap.c
index 4b7d98786f4..2c3c46a4be2 100644
--- a/src/plugins/gtpu/gtpu_encap.c
+++ b/src/plugins/gtpu/gtpu_encap.c
@@ -199,7 +199,8 @@ gtpu_encap_inline (vlib_main_t * vm,
copy_dst3 = (u64 *) ip4_3;
copy_src3 = (u64 *) t3->rewrite;
- /* Copy first 32 octets 8-bytes at a time */
+ /* Copy first 32 octets 8-bytes at a time (minimum size)
+ * TODO: check if clib_memcpy_fast is better */
#define _(offs) copy_dst0[offs] = copy_src0[offs];
foreach_fixed_header4_offset;
#undef _
@@ -212,19 +213,83 @@ gtpu_encap_inline (vlib_main_t * vm,
#define _(offs) copy_dst3[offs] = copy_src3[offs];
foreach_fixed_header4_offset;
#undef _
- /* Last 4 octets. Hopefully gcc will be our friend */
- copy_dst_last0 = (u32 *)(&copy_dst0[4]);
- copy_src_last0 = (u32 *)(&copy_src0[4]);
- copy_dst_last0[0] = copy_src_last0[0];
- copy_dst_last1 = (u32 *)(&copy_dst1[4]);
- copy_src_last1 = (u32 *)(&copy_src1[4]);
- copy_dst_last1[0] = copy_src_last1[0];
- copy_dst_last2 = (u32 *)(&copy_dst2[4]);
- copy_src_last2 = (u32 *)(&copy_src2[4]);
- copy_dst_last2[0] = copy_src_last2[0];
- copy_dst_last3 = (u32 *)(&copy_dst3[4]);
- copy_src_last3 = (u32 *)(&copy_src3[4]);
- copy_dst_last3[0] = copy_src_last3[0];
+
+ /* Copy last octets */
+ if (_vec_len (t0->rewrite) == 36)
+ {
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *) (&copy_dst0[4]);
+ copy_src_last0 = (u32 *) (&copy_src0[4]);
+ copy_dst_last0[0] = copy_src_last0[0];
+ }
+ else
+ {
+ /* Near last 8 octets. */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ _ (4);
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *) (&copy_dst0[5]);
+ copy_src_last0 = (u32 *) (&copy_src0[5]);
+ copy_dst_last0[0] = copy_src_last0[0];
+ }
+
+ if (_vec_len (t1->rewrite) == 36)
+ {
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last1 = (u32 *) (&copy_dst1[4]);
+ copy_src_last1 = (u32 *) (&copy_src1[4]);
+ copy_dst_last1[0] = copy_src_last1[0];
+ }
+ else
+ {
+ /* Near last 8 octets. */
+#define _(offs) copy_dst1[offs] = copy_src1[offs];
+ _ (4);
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last1 = (u32 *) (&copy_dst1[5]);
+ copy_src_last1 = (u32 *) (&copy_src1[5]);
+ copy_dst_last1[0] = copy_src_last1[0];
+ }
+
+ if (_vec_len (t2->rewrite) == 36)
+ {
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last2 = (u32 *) (&copy_dst2[4]);
+ copy_src_last2 = (u32 *) (&copy_src2[4]);
+ copy_dst_last2[0] = copy_src_last2[0];
+ }
+ else
+ {
+ /* Near last 8 octets. */
+#define _(offs) copy_dst2[offs] = copy_src2[offs];
+ _ (4);
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last2 = (u32 *) (&copy_dst2[5]);
+ copy_src_last2 = (u32 *) (&copy_src2[5]);
+ copy_dst_last2[0] = copy_src_last2[0];
+ }
+
+ if (_vec_len (t3->rewrite) == 36)
+ {
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last3 = (u32 *) (&copy_dst3[4]);
+ copy_src_last3 = (u32 *) (&copy_src3[4]);
+ copy_dst_last3[0] = copy_src_last3[0];
+ }
+ else
+ {
+ /* Near last 8 octets. */
+#define _(offs) copy_dst3[offs] = copy_src3[offs];
+ _ (4);
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last3 = (u32 *) (&copy_dst3[5]);
+ copy_src_last3 = (u32 *) (&copy_src3[5]);
+ copy_dst_last3[0] = copy_src_last3[0];
+ }
/* Fix the IP4 checksum and length */
sum0 = ip4_0->checksum;
@@ -318,7 +383,7 @@ gtpu_encap_inline (vlib_main_t * vm,
copy_src2 = (u64 *) t2->rewrite;
copy_dst3 = (u64 *) ip6_3;
copy_src3 = (u64 *) t3->rewrite;
- /* Copy first 56 (ip6) octets 8-bytes at a time */
+ /* Copy first 56 (ip6) octets 8-bytes at a time (minimum size) */
#define _(offs) copy_dst0[offs] = copy_src0[offs];
foreach_fixed_header6_offset;
#undef _
@@ -331,6 +396,40 @@ gtpu_encap_inline (vlib_main_t * vm,
#define _(offs) copy_dst3[offs] = copy_src3[offs];
foreach_fixed_header6_offset;
#undef _
+
+ /* Copy last octets */
+ if (_vec_len (t0->rewrite) == 64)
+ {
+ /* Last 8 octets. */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ _ (7);
+#undef _
+ }
+
+ if (_vec_len (t1->rewrite) == 64)
+ {
+ /* Last 8 octets. */
+#define _(offs) copy_dst1[offs] = copy_src1[offs];
+ _ (7);
+#undef _
+ }
+
+ if (_vec_len (t2->rewrite) == 64)
+ {
+ /* Last 8 octets. */
+#define _(offs) copy_dst2[offs] = copy_src2[offs];
+ _ (7);
+#undef _
+ }
+
+ if (_vec_len (t3->rewrite) == 64)
+ {
+ /* Last 8 octets. */
+#define _(offs) copy_dst3[offs] = copy_src3[offs];
+ _ (7);
+#undef _
+ }
+
/* Fix IP6 payload length */
new_l0 =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
@@ -466,15 +565,19 @@ gtpu_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->tunnel_index = t0 - gtm->tunnels;
tr->tteid = t0->tteid;
- }
+ tr->pdu_extension = t0->pdu_extension;
+ tr->qfi = t0->qfi;
+ }
- if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
- {
- gtpu_encap_trace_t *tr =
- vlib_add_trace (vm, node, b1, sizeof (*tr));
- tr->tunnel_index = t1 - gtm->tunnels;
- tr->tteid = t1->tteid;
- }
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gtpu_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->tunnel_index = t1 - gtm->tunnels;
+ tr->tteid = t1->tteid;
+ tr->pdu_extension = t1->pdu_extension;
+ tr->qfi = t1->qfi;
+ }
if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -482,15 +585,19 @@ gtpu_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b2, sizeof (*tr));
tr->tunnel_index = t2 - gtm->tunnels;
tr->tteid = t2->tteid;
- }
+ tr->pdu_extension = t2->pdu_extension;
+ tr->qfi = t2->qfi;
+ }
- if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED))
- {
- gtpu_encap_trace_t *tr =
- vlib_add_trace (vm, node, b3, sizeof (*tr));
- tr->tunnel_index = t3 - gtm->tunnels;
- tr->tteid = t3->tteid;
- }
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gtpu_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ tr->tunnel_index = t3 - gtm->tunnels;
+ tr->tteid = t3->tteid;
+ tr->pdu_extension = t3->pdu_extension;
+ tr->qfi = t3->qfi;
+ }
vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
to_next, n_left_to_next,
@@ -532,8 +639,9 @@ gtpu_encap_inline (vlib_main_t * vm,
next0 = t0->next_dpo.dpoi_next_node;
vnet_buffer(b0)->ip.adj_index[VLIB_TX] = t0->next_dpo.dpoi_index;
- /* Apply the rewrite string. $$$$ vnet_rewrite? */
- vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
+ /* Apply the rewrite string. $$$$ vnet_rewrite.
+ * The correct total size is set in ip_udp_gtpu_rewrite() */
+ vlib_buffer_advance (b0, -(word) _vec_len (t0->rewrite));
if (is_ip4)
{
@@ -546,10 +654,26 @@ gtpu_encap_inline (vlib_main_t * vm,
#define _(offs) copy_dst0[offs] = copy_src0[offs];
foreach_fixed_header4_offset;
#undef _
- /* Last 4 octets. Hopefully gcc will be our friend */
- copy_dst_last0 = (u32 *)(&copy_dst0[4]);
- copy_src_last0 = (u32 *)(&copy_src0[4]);
- copy_dst_last0[0] = copy_src_last0[0];
+
+ /* Copy last octets */
+ if (_vec_len (t0->rewrite) == 36)
+ {
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *) (&copy_dst0[4]);
+ copy_src_last0 = (u32 *) (&copy_src0[4]);
+ copy_dst_last0[0] = copy_src_last0[0];
+ }
+ else
+ {
+ /* Near last 8 octets. */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ _ (4);
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *) (&copy_dst0[5]);
+ copy_src_last0 = (u32 *) (&copy_src0[5]);
+ copy_dst_last0[0] = copy_src_last0[0];
+ }
/* Fix the IP4 checksum and length */
sum0 = ip4_0->checksum;
@@ -587,6 +711,16 @@ gtpu_encap_inline (vlib_main_t * vm,
#define _(offs) copy_dst0[offs] = copy_src0[offs];
foreach_fixed_header6_offset;
#undef _
+
+ /* Copy last octets */
+ if (_vec_len (t0->rewrite) == 64)
+ {
+ /* Last 8 octets. */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ _ (7);
+#undef _
+ }
+
/* Fix IP6 payload length */
new_l0 =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
@@ -600,9 +734,9 @@ gtpu_encap_inline (vlib_main_t * vm,
/* Fix GTPU length */
gtpu0 = (gtpu_header_t *)(udp0+1);
- new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
- - sizeof (*ip4_0) - sizeof(*udp0)
- - GTPU_V1_HDR_LEN);
+ new_l0 = clib_host_to_net_u16 (
+ vlib_buffer_length_in_chain (vm, b0) - sizeof (*ip6_0) -
+ sizeof (*udp0) - GTPU_V1_HDR_LEN);
gtpu0->length = new_l0;
/* IPv6 UDP checksum is mandatory */
@@ -644,7 +778,9 @@ gtpu_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->tunnel_index = t0 - gtm->tunnels;
tr->tteid = t0->tteid;
- }
+ tr->pdu_extension = t0->pdu_extension;
+ tr->qfi = t0->qfi;
+ }
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next,
bi0, next0);
diff --git a/src/plugins/gtpu/gtpu_error.def b/src/plugins/gtpu/gtpu_error.def
index 4351529ef25..6b521c8658a 100644
--- a/src/plugins/gtpu/gtpu_error.def
+++ b/src/plugins/gtpu/gtpu_error.def
@@ -17,3 +17,5 @@ gtpu_error (NO_SUCH_TUNNEL, "no such tunnel packets")
gtpu_error (BAD_VER, "packets with bad version in gtpu header")
gtpu_error (BAD_FLAGS, "packets with bad flags field in gtpu header")
gtpu_error (TOO_SMALL, "packet too small to fit a gtpu header")
+gtpu_error (UNSUPPORTED_TYPE, "packets with message type < 255 in gtpu header")
+gtpu_error (NO_ERROR_TUNNEL, "did not find an forward tunnel")
diff --git a/src/plugins/gtpu/gtpu_test.c b/src/plugins/gtpu/gtpu_test.c
index 373e7888341..fadcb82cb88 100644
--- a/src/plugins/gtpu/gtpu_test.c
+++ b/src/plugins/gtpu/gtpu_test.c
@@ -91,24 +91,6 @@ static void vl_api_gtpu_add_del_tunnel_reply_t_handler
}
static uword
-api_unformat_sw_if_index (unformat_input_t * input, va_list * args)
-{
- vat_main_t *vam = va_arg (*args, vat_main_t *);
- u32 *result = va_arg (*args, u32 *);
- u8 *if_name;
- uword *p;
-
- if (!unformat (input, "%s", &if_name))
- return 0;
-
- p = hash_get_mem (vam->sw_if_index_by_interface_name, if_name);
- if (p == 0)
- return 0;
- *result = p[0];
- return 1;
-}
-
-static uword
api_unformat_hw_if_index (unformat_input_t * input, va_list * args)
{
return 0;
@@ -316,9 +298,9 @@ api_gtpu_add_del_tunnel (vat_main_t * vam)
unformat_gtpu_decap_next, &decap_next_index))
;
else if (unformat (line_input, "teid %d", &teid))
- ;
+ ;
else if (unformat (line_input, "tteid %d", &tteid))
- ;
+ ;
else
{
errmsg ("parse error '%U'", format_unformat_error, line_input);
@@ -378,6 +360,175 @@ api_gtpu_add_del_tunnel (vat_main_t * vam)
return ret;
}
+static void
+vl_api_gtpu_add_del_tunnel_v2_reply_t_handler (
+ vl_api_gtpu_add_del_tunnel_v2_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+ }
+}
+
+static int
+api_gtpu_add_del_tunnel_v2 (vat_main_t *vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_gtpu_add_del_tunnel_v2_t *mp;
+ ip46_address_t src, dst;
+ u8 is_add = 1;
+ u8 ipv4_set = 0, ipv6_set = 0;
+ u8 src_set = 0;
+ u8 dst_set = 0;
+ u8 grp_set = 0;
+ u32 mcast_sw_if_index = ~0;
+ u32 encap_vrf_id = 0;
+ u32 decap_next_index = ~0;
+ u32 teid = 0, tteid = 0;
+ u8 pdu_extension = 0;
+ u32 qfi = 0;
+ int ret;
+
+ /* Can't "universally zero init" (={0}) due to GCC bug 53119 */
+ clib_memset (&src, 0, sizeof src);
+ clib_memset (&dst, 0, sizeof dst);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "src %U", unformat_ip4_address, &src.ip4))
+ {
+ ipv4_set = 1;
+ src_set = 1;
+ }
+ else if (unformat (line_input, "dst %U", unformat_ip4_address, &dst.ip4))
+ {
+ ipv4_set = 1;
+ dst_set = 1;
+ }
+ else if (unformat (line_input, "src %U", unformat_ip6_address, &src.ip6))
+ {
+ ipv6_set = 1;
+ src_set = 1;
+ }
+ else if (unformat (line_input, "dst %U", unformat_ip6_address, &dst.ip6))
+ {
+ ipv6_set = 1;
+ dst_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U", unformat_ip4_address,
+ &dst.ip4, api_unformat_sw_if_index, vam,
+ &mcast_sw_if_index))
+ {
+ grp_set = dst_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "group %U", unformat_ip4_address,
+ &dst.ip4))
+ {
+ grp_set = dst_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U", unformat_ip6_address,
+ &dst.ip6, api_unformat_sw_if_index, vam,
+ &mcast_sw_if_index))
+ {
+ grp_set = dst_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "group %U", unformat_ip6_address,
+ &dst.ip6))
+ {
+ grp_set = dst_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "mcast_sw_if_index %u",
+ &mcast_sw_if_index))
+ ;
+ else if (unformat (line_input, "encap-vrf-id %d", &encap_vrf_id))
+ ;
+ else if (unformat (line_input, "decap-next %U", unformat_gtpu_decap_next,
+ &decap_next_index))
+ ;
+ else if (unformat (line_input, "teid %d", &teid)) /* Change to %u ? */
+ ;
+ else if (unformat (line_input, "tteid %d", &tteid)) /* Change to %u ? */
+ ;
+ else if (unformat (line_input, "qfi %u", &qfi))
+ pdu_extension = 1;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (is_add && src_set == 0)
+ {
+ errmsg ("tunnel src address not specified");
+ return -99;
+ }
+ if (dst_set == 0)
+ {
+ errmsg ("tunnel dst address not specified");
+ return -99;
+ }
+
+ if (grp_set && !ip46_address_is_multicast (&dst))
+ {
+ errmsg ("tunnel group address not multicast");
+ return -99;
+ }
+ if (grp_set && mcast_sw_if_index == ~0)
+ {
+ errmsg ("tunnel nonexistent multicast device");
+ return -99;
+ }
+ if (grp_set == 0 && ip46_address_is_multicast (&dst))
+ {
+ errmsg ("tunnel dst address must be unicast");
+ return -99;
+ }
+
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both IPv4 and IPv6 addresses specified");
+ return -99;
+ }
+ if (qfi > 31)
+ {
+ errmsg ("qfi max value is 31");
+ return -99;
+ }
+
+ M (GTPU_ADD_DEL_TUNNEL_V2, mp);
+
+ ip_address_encode (&src, ipv6_set ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+ &mp->src_address);
+ ip_address_encode (&dst, ipv6_set ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+ &mp->dst_address);
+ mp->encap_vrf_id = ntohl (encap_vrf_id);
+ mp->decap_next_index = ntohl (decap_next_index);
+ mp->mcast_sw_if_index = ntohl (mcast_sw_if_index);
+ mp->teid = ntohl (teid);
+ mp->tteid = ntohl (tteid);
+ mp->is_add = is_add;
+ mp->pdu_extension = pdu_extension;
+ mp->qfi = ntohl (qfi);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
static int
api_gtpu_tunnel_update_tteid (vat_main_t * vam)
{
@@ -454,6 +605,40 @@ static void vl_api_gtpu_tunnel_details_t_handler
ntohl (mp->mcast_sw_if_index));
}
+static void
+vl_api_gtpu_tunnel_v2_details_t_handler (vl_api_gtpu_tunnel_v2_details_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ ip46_address_t src;
+ ip46_address_t dst;
+ ip_address_decode (&mp->dst_address, &dst);
+ ip_address_decode (&mp->src_address, &src);
+ print (vam->ofp, "%11d%24U%24U%14d%18d%13d%13d%19d%15d%5d%15d%17d",
+ ntohl (mp->sw_if_index), format_ip46_address, &src, IP46_TYPE_ANY,
+ format_ip46_address, &dst, IP46_TYPE_ANY, ntohl (mp->encap_vrf_id),
+ ntohl (mp->decap_next_index), ntohl (mp->teid), ntohl (mp->tteid),
+ ntohl (mp->mcast_sw_if_index), mp->pdu_extension, mp->qfi,
+ mp->is_forwarding, ntohl (mp->forwarding_type));
+}
+
+static void
+vl_api_gtpu_add_del_forward_reply_t_handler (
+ vl_api_gtpu_add_del_forward_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+ }
+}
+
static int
api_gtpu_tunnel_dump (vat_main_t * vam)
{
@@ -498,4 +683,163 @@ api_gtpu_tunnel_dump (vat_main_t * vam)
return 0;
}
+static int
+api_gtpu_tunnel_v2_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_gtpu_tunnel_dump_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ sw_if_index = ~0;
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%11s%24s%24s%14s%18s%13s%13s%19s%12s%5s%15s%17s",
+ "sw_if_index", "src_address", "dst_address", "encap_vrf_id",
+ "decap_next_index", "teid", "tteid", "mcast_sw_if_index",
+ "pdu_extension", "qfi", "is_forwarding", "forwarding_type");
+ }
+
+ /* Get list of gtpu-tunnel interfaces */
+ M (GTPU_TUNNEL_DUMP, mp);
+
+ mp->sw_if_index = htonl (sw_if_index);
+
+ S (mp);
+
+ /* No status response for this API call.
+ * Wait 1 sec for any dump output before return to vat# */
+ sleep (1);
+
+ return 0;
+}
+
+static int
+api_gtpu_add_del_forward (vat_main_t *vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_gtpu_add_del_forward_t *mp;
+ int ret;
+ u32 decap_next_index = GTPU_INPUT_NEXT_L2_INPUT;
+ int is_add = 1;
+ ip46_address_t dst;
+ u8 dst_set = 0;
+ u8 type = 0;
+ u8 ipv6_set = 0;
+ u32 encap_vrf_id;
+
+ /* Cant "universally zero init" (={0}) due to GCC bug 53119 */
+ clib_memset (&dst, 0, sizeof dst);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "dst %U", unformat_ip4_address, &dst.ip4))
+ dst_set = 1;
+ else if (unformat (line_input, "dst %U", unformat_ip6_address, &dst.ip6))
+ {
+ dst_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "decap-next %U", unformat_gtpu_decap_next,
+ &decap_next_index))
+ ;
+ else if (unformat (line_input, "encap-vrf-id %d", &encap_vrf_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "bad-header"))
+ type |= GTPU_FORWARD_BAD_HEADER;
+ else if (unformat (line_input, "unknown-teid"))
+ type |= GTPU_FORWARD_UNKNOWN_TEID;
+ else if (unformat (line_input, "unknown-type"))
+ type |= GTPU_FORWARD_UNKNOWN_TYPE;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (!dst_set)
+ {
+ errmsg ("dst must be set to a valid IP address");
+ return -99;
+ }
+
+ M (GTPU_ADD_DEL_FORWARD, mp);
+
+ mp->is_add = is_add;
+ ip_address_encode (&dst, ipv6_set ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+ &mp->dst_address);
+ mp->forwarding_type = type;
+ mp->encap_vrf_id = ntohl (encap_vrf_id);
+ mp->decap_next_index = ntohl (decap_next_index);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_gtpu_get_transfer_counts (vat_main_t *vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_gtpu_get_transfer_counts_t *mp;
+ u32 start_index;
+ u32 capacity;
+ int ret;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "start_index %u", &start_index))
+ ;
+ else if (unformat (line_input, "capacity %u", &capacity))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ M (GTPU_GET_TRANSFER_COUNTS, mp);
+ mp->sw_if_index_start = start_index;
+ mp->capacity = capacity;
+
+ S (mp); // TODO: Handle the prints somehow. But how is it done??
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_gtpu_get_transfer_counts_reply_t_handler (
+ vl_api_gtpu_get_transfer_counts_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ // TODO: Add reply here?
+ vam->result_ready = 1;
+ }
+}
+
#include <gtpu/gtpu.api_test.c>
diff --git a/src/plugins/hs_apps/CMakeLists.txt b/src/plugins/hs_apps/CMakeLists.txt
index 1f474828b15..179c9c7a4c4 100644
--- a/src/plugins/hs_apps/CMakeLists.txt
+++ b/src/plugins/hs_apps/CMakeLists.txt
@@ -19,7 +19,9 @@ add_vpp_plugin(hs_apps
echo_client.c
echo_server.c
hs_apps.c
- http_server.c
+ http_cli.c
+ http_client_cli.c
+ http_tps.c
proxy.c
)
@@ -33,6 +35,7 @@ if(VPP_BUILD_HS_SAPI_APPS)
sapi/vpp_echo.c
sapi/vpp_echo_common.c
sapi/vpp_echo_bapi.c
+ sapi/vpp_echo_sapi.c
sapi/vpp_echo_proto_quic.c
sapi/vpp_echo_proto_tcp.c
sapi/vpp_echo_proto_udp.c
@@ -52,7 +55,7 @@ if(VPP_BUILD_VCL_TESTS)
)
add_vpp_executable(${test}
SOURCES "vcl/${test}.c"
- LINK_LIBRARIES vppcom pthread
+ LINK_LIBRARIES vppcom pthread ${EPOLL_LIB}
NO_INSTALL
)
endforeach()
@@ -65,7 +68,7 @@ if(VPP_BUILD_VCL_TESTS)
SOURCES
"vcl/${test}.c"
vcl/vcl_test_protos.c
- LINK_LIBRARIES vppcom pthread
+ LINK_LIBRARIES vppcom pthread ${EPOLL_LIB}
NO_INSTALL
)
endforeach()
diff --git a/src/plugins/hs_apps/echo_client.c b/src/plugins/hs_apps/echo_client.c
index d641a9ec14e..d1443e75e80 100644
--- a/src/plugins/hs_apps/echo_client.c
+++ b/src/plugins/hs_apps/echo_client.c
@@ -15,38 +15,69 @@
* limitations under the License.
*/
-#include <vnet/vnet.h>
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
#include <hs_apps/echo_client.h>
-echo_client_main_t echo_client_main;
+static ec_main_t ec_main;
-#define ECHO_CLIENT_DBG (0)
-#define DBG(_fmt, _args...) \
- if (ECHO_CLIENT_DBG) \
- clib_warning (_fmt, ##_args)
+#define ec_err(_fmt, _args...) clib_warning (_fmt, ##_args);
+
+#define ec_dbg(_fmt, _args...) \
+ do \
+ { \
+ if (ec_main.cfg.verbose) \
+ ec_err (_fmt, ##_args); \
+ } \
+ while (0)
+
+#define ec_cli(_fmt, _args...) vlib_cli_output (vm, _fmt, ##_args)
static void
-signal_evt_to_cli_i (int *code)
+signal_evt_to_cli_i (void *codep)
{
- echo_client_main_t *ecm = &echo_client_main;
+ ec_main_t *ecm = &ec_main;
+ int code;
+
ASSERT (vlib_get_thread_index () == 0);
- vlib_process_signal_event (ecm->vlib_main, ecm->cli_node_index, *code, 0);
+ code = pointer_to_uword (codep);
+ vlib_process_signal_event (ecm->vlib_main, ecm->cli_node_index, code, 0);
}
static void
signal_evt_to_cli (int code)
{
if (vlib_get_thread_index () != 0)
- vl_api_rpc_call_main_thread (signal_evt_to_cli_i, (u8 *) & code,
- sizeof (code));
+ session_send_rpc_evt_to_thread_force (
+ 0, signal_evt_to_cli_i, uword_to_pointer ((uword) code, void *));
else
- signal_evt_to_cli_i (&code);
+ signal_evt_to_cli_i (uword_to_pointer ((uword) code, void *));
+}
+
+static inline ec_worker_t *
+ec_worker_get (u32 thread_index)
+{
+ return vec_elt_at_index (ec_main.wrk, thread_index);
+}
+
+static inline ec_session_t *
+ec_session_alloc (ec_worker_t *wrk)
+{
+ ec_session_t *ecs;
+
+ pool_get_zero (wrk->sessions, ecs);
+ ecs->session_index = ecs - wrk->sessions;
+ ecs->thread_index = wrk->thread_index;
+
+ return ecs;
+}
+
+static inline ec_session_t *
+ec_session_get (ec_worker_t *wrk, u32 ec_index)
+{
+ return pool_elt_at_index (wrk->sessions, ec_index);
}
static void
-send_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
+send_data_chunk (ec_main_t *ecm, ec_session_t *es)
{
u8 *test_data = ecm->connect_test_data;
int test_buf_len, test_buf_offset, rv;
@@ -54,27 +85,28 @@ send_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
test_buf_len = vec_len (test_data);
ASSERT (test_buf_len > 0);
- test_buf_offset = s->bytes_sent % test_buf_len;
- bytes_this_chunk = clib_min (test_buf_len - test_buf_offset,
- s->bytes_to_send);
+ test_buf_offset = es->bytes_sent % test_buf_len;
+ bytes_this_chunk =
+ clib_min (test_buf_len - test_buf_offset, es->bytes_to_send);
- if (!ecm->is_dgram)
+ if (!es->is_dgram)
{
if (ecm->no_copy)
{
- svm_fifo_t *f = s->data.tx_fifo;
+ svm_fifo_t *f = es->tx_fifo;
rv = clib_min (svm_fifo_max_enqueue_prod (f), bytes_this_chunk);
svm_fifo_enqueue_nocopy (f, rv);
session_send_io_evt_to_thread_custom (
- &f->shr->master_session_index, s->thread_index, SESSION_IO_EVT_TX);
+ &es->vpp_session_index, es->thread_index, SESSION_IO_EVT_TX);
}
else
- rv = app_send_stream (&s->data, test_data + test_buf_offset,
- bytes_this_chunk, 0);
+ rv =
+ app_send_stream ((app_session_t *) es, test_data + test_buf_offset,
+ bytes_this_chunk, 0);
}
else
{
- svm_fifo_t *f = s->data.tx_fifo;
+ svm_fifo_t *f = es->tx_fifo;
u32 max_enqueue = svm_fifo_max_enqueue_prod (f);
if (max_enqueue < sizeof (session_dgram_hdr_t))
@@ -85,7 +117,7 @@ send_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
if (ecm->no_copy)
{
session_dgram_hdr_t hdr;
- app_session_transport_t *at = &s->data.transport;
+ app_session_transport_t *at = &es->transport;
rv = clib_min (max_enqueue, bytes_this_chunk);
@@ -101,13 +133,15 @@ send_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
svm_fifo_enqueue (f, sizeof (hdr), (u8 *) & hdr);
svm_fifo_enqueue_nocopy (f, rv);
session_send_io_evt_to_thread_custom (
- &f->shr->master_session_index, s->thread_index, SESSION_IO_EVT_TX);
+ &es->vpp_session_index, es->thread_index, SESSION_IO_EVT_TX);
}
else
{
bytes_this_chunk = clib_min (bytes_this_chunk, max_enqueue);
- rv = app_send_dgram (&s->data, test_data + test_buf_offset,
- bytes_this_chunk, 0);
+ bytes_this_chunk = clib_min (bytes_this_chunk, 1460);
+ rv =
+ app_send_dgram ((app_session_t *) es, test_data + test_buf_offset,
+ bytes_this_chunk, 0);
}
}
@@ -115,45 +149,39 @@ send_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
if (rv > 0)
{
/* Account for it... */
- s->bytes_to_send -= rv;
- s->bytes_sent += rv;
+ es->bytes_to_send -= rv;
+ es->bytes_sent += rv;
- if (ECHO_CLIENT_DBG)
+ if (ecm->cfg.verbose)
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "tx-enq: xfer %d bytes, sent %u remain %u",
.format_args = "i4i4i4",
};
- /* *INDENT-ON* */
struct
{
u32 data[3];
} *ed;
ed = ELOG_DATA (&vlib_global_main.elog_main, e);
ed->data[0] = rv;
- ed->data[1] = s->bytes_sent;
- ed->data[2] = s->bytes_to_send;
+ ed->data[1] = es->bytes_sent;
+ ed->data[2] = es->bytes_to_send;
}
}
}
static void
-receive_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
+receive_data_chunk (ec_worker_t *wrk, ec_session_t *es)
{
- svm_fifo_t *rx_fifo = s->data.rx_fifo;
- u32 thread_index = vlib_get_thread_index ();
+ ec_main_t *ecm = &ec_main;
+ svm_fifo_t *rx_fifo = es->rx_fifo;
int n_read, i;
- if (ecm->test_bytes)
+ if (ecm->cfg.test_bytes)
{
- if (!ecm->is_dgram)
- n_read = app_recv_stream (&s->data, ecm->rx_buf[thread_index],
- vec_len (ecm->rx_buf[thread_index]));
- else
- n_read = app_recv_dgram (&s->data, ecm->rx_buf[thread_index],
- vec_len (ecm->rx_buf[thread_index]));
+ n_read =
+ app_recv ((app_session_t *) es, wrk->rx_buf, vec_len (wrk->rx_buf));
}
else
{
@@ -163,15 +191,13 @@ receive_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
if (n_read > 0)
{
- if (ECHO_CLIENT_DBG)
+ if (ecm->cfg.verbose)
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "rx-deq: %d bytes",
.format_args = "i4",
};
- /* *INDENT-ON* */
struct
{
u32 data[1];
@@ -180,102 +206,104 @@ receive_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
ed->data[0] = n_read;
}
- if (ecm->test_bytes)
+ if (ecm->cfg.test_bytes)
{
for (i = 0; i < n_read; i++)
{
- if (ecm->rx_buf[thread_index][i]
- != ((s->bytes_received + i) & 0xff))
+ if (wrk->rx_buf[i] != ((es->bytes_received + i) & 0xff))
{
- clib_warning ("read %d error at byte %lld, 0x%x not 0x%x",
- n_read, s->bytes_received + i,
- ecm->rx_buf[thread_index][i],
- ((s->bytes_received + i) & 0xff));
+ ec_err ("read %d error at byte %lld, 0x%x not 0x%x", n_read,
+ es->bytes_received + i, wrk->rx_buf[i],
+ ((es->bytes_received + i) & 0xff));
ecm->test_failed = 1;
}
}
}
- ASSERT (n_read <= s->bytes_to_receive);
- s->bytes_to_receive -= n_read;
- s->bytes_received += n_read;
+ ASSERT (n_read <= es->bytes_to_receive);
+ es->bytes_to_receive -= n_read;
+ es->bytes_received += n_read;
}
}
static uword
-echo_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+ec_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- echo_client_main_t *ecm = &echo_client_main;
- int my_thread_index = vlib_get_thread_index ();
- eclient_session_t *sp;
- int i;
- int delete_session;
- u32 *connection_indices;
- u32 *connections_this_batch;
- u32 nconnections_this_batch;
-
- connection_indices = ecm->connection_index_by_thread[my_thread_index];
- connections_this_batch =
- ecm->connections_this_batch_by_thread[my_thread_index];
-
- if ((ecm->run_test != ECHO_CLIENTS_RUNNING) ||
- ((vec_len (connection_indices) == 0)
- && vec_len (connections_this_batch) == 0))
+ u32 *conn_indices, *conns_this_batch, nconns_this_batch;
+ int thread_index = vm->thread_index, i, delete_session;
+ ec_main_t *ecm = &ec_main;
+ ec_worker_t *wrk;
+ ec_session_t *es;
+ session_t *s;
+
+ if (ecm->run_test != EC_RUNNING)
+ return 0;
+
+ wrk = ec_worker_get (thread_index);
+ conn_indices = wrk->conn_indices;
+ conns_this_batch = wrk->conns_this_batch;
+
+ if (((vec_len (conn_indices) == 0) && vec_len (conns_this_batch) == 0))
return 0;
/* Grab another pile of connections */
- if (PREDICT_FALSE (vec_len (connections_this_batch) == 0))
+ if (PREDICT_FALSE (vec_len (conns_this_batch) == 0))
{
- nconnections_this_batch =
- clib_min (ecm->connections_per_batch, vec_len (connection_indices));
-
- ASSERT (nconnections_this_batch > 0);
- vec_validate (connections_this_batch, nconnections_this_batch - 1);
- clib_memcpy_fast (connections_this_batch,
- connection_indices + vec_len (connection_indices)
- - nconnections_this_batch,
- nconnections_this_batch * sizeof (u32));
- _vec_len (connection_indices) -= nconnections_this_batch;
+ nconns_this_batch =
+ clib_min (ecm->connections_per_batch, vec_len (conn_indices));
+
+ ASSERT (nconns_this_batch > 0);
+ vec_validate (conns_this_batch, nconns_this_batch - 1);
+ clib_memcpy_fast (conns_this_batch,
+ conn_indices + vec_len (conn_indices) -
+ nconns_this_batch,
+ nconns_this_batch * sizeof (u32));
+ vec_dec_len (conn_indices, nconns_this_batch);
}
- if (PREDICT_FALSE (ecm->prev_conns != ecm->connections_per_batch
- && ecm->prev_conns == vec_len (connections_this_batch)))
+ /*
+ * Track progress
+ */
+ if (PREDICT_FALSE (ecm->prev_conns != ecm->connections_per_batch &&
+ ecm->prev_conns == vec_len (conns_this_batch)))
{
ecm->repeats++;
- ecm->prev_conns = vec_len (connections_this_batch);
+ ecm->prev_conns = vec_len (conns_this_batch);
if (ecm->repeats == 500000)
{
- clib_warning ("stuck clients");
+ ec_err ("stuck clients");
}
}
else
{
- ecm->prev_conns = vec_len (connections_this_batch);
+ ecm->prev_conns = vec_len (conns_this_batch);
ecm->repeats = 0;
}
- for (i = 0; i < vec_len (connections_this_batch); i++)
+ /*
+ * Handle connections in this batch
+ */
+ for (i = 0; i < vec_len (conns_this_batch); i++)
{
- delete_session = 1;
+ es = ec_session_get (wrk, conns_this_batch[i]);
- sp = pool_elt_at_index (ecm->sessions, connections_this_batch[i]);
+ delete_session = 1;
- if (sp->bytes_to_send > 0)
+ if (es->bytes_to_send > 0)
{
- send_data_chunk (ecm, sp);
+ send_data_chunk (ecm, es);
delete_session = 0;
}
- if (sp->bytes_to_receive > 0)
+
+ if (es->bytes_to_receive > 0)
{
delete_session = 0;
}
+
if (PREDICT_FALSE (delete_session == 1))
{
- session_t *s;
-
- clib_atomic_fetch_add (&ecm->tx_total, sp->bytes_sent);
- clib_atomic_fetch_add (&ecm->rx_total, sp->bytes_received);
- s = session_get_from_handle_if_valid (sp->vpp_session_handle);
+ clib_atomic_fetch_add (&ecm->tx_total, es->bytes_sent);
+ clib_atomic_fetch_add (&ecm->rx_total, es->bytes_received);
+ s = session_get_from_handle_if_valid (es->vpp_session_handle);
if (s)
{
@@ -284,205 +312,327 @@ echo_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
a->app_index = ecm->app_index;
vnet_disconnect_session (a);
- vec_delete (connections_this_batch, 1, i);
+ vec_delete (conns_this_batch, 1, i);
i--;
clib_atomic_fetch_add (&ecm->ready_connections, -1);
}
else
{
- clib_warning ("session AWOL?");
- vec_delete (connections_this_batch, 1, i);
+ ec_err ("session AWOL?");
+ vec_delete (conns_this_batch, 1, i);
}
/* Kick the debug CLI process */
if (ecm->ready_connections == 0)
{
- signal_evt_to_cli (2);
+ signal_evt_to_cli (EC_CLI_TEST_DONE);
}
}
}
- ecm->connection_index_by_thread[my_thread_index] = connection_indices;
- ecm->connections_this_batch_by_thread[my_thread_index] =
- connections_this_batch;
+ wrk->conn_indices = conn_indices;
+ wrk->conns_this_batch = conns_this_batch;
return 0;
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (echo_clients_node) =
-{
- .function = echo_client_node_fn,
+VLIB_REGISTER_NODE (echo_clients_node) = {
+ .function = ec_node_fn,
.name = "echo-clients",
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
+
+static void
+ec_reset_runtime_config (ec_main_t *ecm)
+{
+ hs_test_cfg_init (&ecm->cfg);
+ ecm->n_clients = 1;
+ ecm->quic_streams = 1;
+ ecm->bytes_to_send = 8192;
+ ecm->echo_bytes = 0;
+ ecm->fifo_size = 64 << 10;
+ ecm->connections_per_batch = 1000;
+ ecm->private_segment_count = 0;
+ ecm->private_segment_size = 256 << 20;
+ ecm->test_failed = 0;
+ ecm->tls_engine = CRYPTO_ENGINE_OPENSSL;
+ ecm->no_copy = 0;
+ ecm->run_test = EC_STARTING;
+ ecm->ready_connections = 0;
+ ecm->connect_conn_index = 0;
+ ecm->rx_total = 0;
+ ecm->tx_total = 0;
+ ecm->barrier_acq_needed = 0;
+ ecm->prealloc_sessions = 0;
+ ecm->prealloc_fifos = 0;
+ ecm->appns_id = 0;
+ ecm->appns_secret = 0;
+ ecm->attach_flags = 0;
+ ecm->syn_timeout = 20.0;
+ ecm->test_timeout = 20.0;
+ vec_free (ecm->connect_uri);
+}
static int
-echo_clients_init (vlib_main_t * vm)
+ec_init (vlib_main_t *vm)
{
- echo_client_main_t *ecm = &echo_client_main;
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ ec_main_t *ecm = &ec_main;
+ ec_worker_t *wrk;
u32 num_threads;
int i;
- num_threads = 1 /* main thread */ + vtm->n_threads;
+ ec_reset_runtime_config (ecm);
+
+ /* Store cli process node index for signaling */
+ ecm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index;
+ ecm->vlib_main = vm;
+
+ if (vlib_num_workers ())
+ {
+ /* The request came over the binary api and the inband cli handler
+ * is not mp_safe. Drop the barrier to make sure the workers are not
+ * blocked.
+ */
+ if (vlib_thread_is_main_w_barrier ())
+ {
+ ecm->barrier_acq_needed = 1;
+ vlib_worker_thread_barrier_release (vm);
+ }
+ /*
+ * There's a good chance that both the client and the server echo
+ * apps will be enabled so make sure the session queue node polls on
+ * the main thread as connections will probably be established on it.
+ */
+ vlib_node_set_state (vm, session_queue_node.index,
+ VLIB_NODE_STATE_POLLING);
+ }
+
+ /* App init done only once */
+ if (ecm->app_is_init)
+ return 0;
+
/* Init test data. Big buffer */
vec_validate (ecm->connect_test_data, 4 * 1024 * 1024 - 1);
for (i = 0; i < vec_len (ecm->connect_test_data); i++)
ecm->connect_test_data[i] = i & 0xff;
- vec_validate (ecm->rx_buf, num_threads - 1);
- for (i = 0; i < num_threads; i++)
- vec_validate (ecm->rx_buf[i], vec_len (ecm->connect_test_data) - 1);
+ num_threads = 1 /* main thread */ + vlib_num_workers ();
+ vec_validate (ecm->wrk, num_threads - 1);
+ vec_foreach (wrk, ecm->wrk)
+ {
+ vec_validate (wrk->rx_buf, vec_len (ecm->connect_test_data) - 1);
+ wrk->thread_index = wrk - ecm->wrk;
+ wrk->vpp_event_queue =
+ session_main_get_vpp_event_queue (wrk->thread_index);
+ }
- ecm->is_init = 1;
+ ecm->app_is_init = 1;
- vec_validate (ecm->connection_index_by_thread, vtm->n_vlib_mains);
- vec_validate (ecm->connections_this_batch_by_thread, vtm->n_vlib_mains);
- vec_validate (ecm->quic_session_index_by_thread, vtm->n_vlib_mains);
- vec_validate (ecm->vpp_event_queue, vtm->n_vlib_mains);
+ vlib_worker_thread_barrier_sync (vm);
+ vnet_session_enable_disable (vm, 1 /* turn on session and transports */);
+
+ /* Turn on the builtin client input nodes */
+ foreach_vlib_main ()
+ vlib_node_set_state (this_vlib_main, echo_clients_node.index,
+ VLIB_NODE_STATE_POLLING);
+
+ vlib_worker_thread_barrier_release (vm);
return 0;
}
+static void
+ec_prealloc_sessions (ec_main_t *ecm)
+{
+ u32 sessions_per_wrk, n_wrks;
+ ec_worker_t *wrk;
+
+ n_wrks = vlib_num_workers () ? vlib_num_workers () : 1;
+
+ sessions_per_wrk = ecm->n_clients / n_wrks;
+ vec_foreach (wrk, ecm->wrk)
+ pool_init_fixed (wrk->sessions, 1.1 * sessions_per_wrk);
+}
+
+static void
+ec_worker_cleanup (ec_worker_t *wrk)
+{
+ pool_free (wrk->sessions);
+ vec_free (wrk->conn_indices);
+ vec_free (wrk->conns_this_batch);
+}
+
+static void
+ec_cleanup (ec_main_t *ecm)
+{
+ ec_worker_t *wrk;
+
+ vec_foreach (wrk, ecm->wrk)
+ ec_worker_cleanup (wrk);
+
+ vec_free (ecm->connect_uri);
+ vec_free (ecm->appns_id);
+
+ if (ecm->barrier_acq_needed)
+ vlib_worker_thread_barrier_sync (ecm->vlib_main);
+}
+
static int
-quic_echo_clients_qsession_connected_callback (u32 app_index, u32 api_context,
- session_t * s,
- session_error_t err)
+quic_ec_qsession_connected_callback (u32 app_index, u32 api_context,
+ session_t *s, session_error_t err)
{
- echo_client_main_t *ecm = &echo_client_main;
- vnet_connect_args_t *a = 0;
- int rv;
- u8 thread_index = vlib_get_thread_index ();
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
+ ec_main_t *ecm = &ec_main;
+ vnet_connect_args_t _a, *a = &_a;
u32 stream_n;
- session_handle_t handle;
+ int rv;
- DBG ("QUIC Connection handle %d", session_handle (s));
+ ec_dbg ("QUIC Connection handle %d", session_handle (s));
- vec_validate (a, 1);
a->uri = (char *) ecm->connect_uri;
if (parse_uri (a->uri, &sep))
return -1;
- sep.parent_handle = handle = session_handle (s);
+ sep.parent_handle = session_handle (s);
for (stream_n = 0; stream_n < ecm->quic_streams; stream_n++)
{
clib_memset (a, 0, sizeof (*a));
a->app_index = ecm->app_index;
- a->api_context = -1 - api_context;
+ a->api_context = -2 - api_context;
clib_memcpy (&a->sep_ext, &sep, sizeof (sep));
- DBG ("QUIC opening stream %d", stream_n);
+ ec_dbg ("QUIC opening stream %d", stream_n);
if ((rv = vnet_connect (a)))
{
clib_error ("Stream session %d opening failed: %d", stream_n, rv);
return -1;
}
- DBG ("QUIC stream %d connected", stream_n);
+ ec_dbg ("QUIC stream %d connected", stream_n);
}
- /*
- * 's' is no longer valid, its underlying pool could have been moved in
- * vnet_connect()
- */
- vec_add1 (ecm->quic_session_index_by_thread[thread_index], handle);
- vec_free (a);
return 0;
}
static int
-quic_echo_clients_session_connected_callback (u32 app_index, u32 api_context,
- session_t * s,
- session_error_t err)
+ec_ctrl_send (hs_test_cmd_t cmd)
+{
+ ec_main_t *ecm = &ec_main;
+ session_t *s;
+ int rv;
+
+ ecm->cfg.cmd = cmd;
+ if (ecm->ctrl_session_handle == SESSION_INVALID_HANDLE)
+ {
+ ec_dbg ("ctrl session went away");
+ return -1;
+ }
+
+ s = session_get_from_handle_if_valid (ecm->ctrl_session_handle);
+ if (!s)
+ {
+ ec_err ("ctrl session not found");
+ return -1;
+ }
+
+ ec_dbg ("sending test paramters to the server..");
+ if (ecm->cfg.verbose)
+ hs_test_cfg_dump (&ecm->cfg, 1);
+
+ rv = svm_fifo_enqueue (s->tx_fifo, sizeof (ecm->cfg), (u8 *) &ecm->cfg);
+ ASSERT (rv == sizeof (ecm->cfg));
+ session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX);
+ return 0;
+}
+
+static int
+ec_ctrl_session_connected_callback (session_t *s)
+{
+ ec_main_t *ecm = &ec_main;
+
+ s->opaque = HS_CTRL_HANDLE;
+ ecm->ctrl_session_handle = session_handle (s);
+
+ /* send test parameters to the server */
+ ec_ctrl_send (HS_TEST_CMD_SYNC);
+ return 0;
+}
+
+static int
+quic_ec_session_connected_callback (u32 app_index, u32 api_context,
+ session_t *s, session_error_t err)
{
- echo_client_main_t *ecm = &echo_client_main;
- eclient_session_t *session;
- u32 session_index;
- u8 thread_index;
+ ec_main_t *ecm = &ec_main;
+ ec_session_t *es;
+ ec_worker_t *wrk;
+ u32 thread_index;
- if (PREDICT_FALSE (ecm->run_test != ECHO_CLIENTS_STARTING))
+ if (PREDICT_FALSE (api_context == HS_CTRL_HANDLE))
+ return ec_ctrl_session_connected_callback (s);
+
+ if (PREDICT_FALSE (ecm->run_test != EC_STARTING))
return -1;
if (err)
{
- clib_warning ("connection %d failed!", api_context);
- ecm->run_test = ECHO_CLIENTS_EXITING;
- signal_evt_to_cli (-1);
+ ec_err ("connection %d failed!", api_context);
+ ecm->run_test = EC_EXITING;
+ signal_evt_to_cli (EC_CLI_CONNECTS_FAILED);
return 0;
}
if (s->listener_handle == SESSION_INVALID_HANDLE)
- return quic_echo_clients_qsession_connected_callback (app_index,
- api_context, s,
- err);
- DBG ("STREAM Connection callback %d", api_context);
+ return quic_ec_qsession_connected_callback (app_index, api_context, s,
+ err);
+ ec_dbg ("STREAM Connection callback %d", api_context);
thread_index = s->thread_index;
ASSERT (thread_index == vlib_get_thread_index ()
|| session_transport_service_type (s) == TRANSPORT_SERVICE_CL);
- if (!ecm->vpp_event_queue[thread_index])
- ecm->vpp_event_queue[thread_index] =
- session_main_get_vpp_event_queue (thread_index);
+ wrk = ec_worker_get (thread_index);
/*
* Setup session
*/
- clib_spinlock_lock_if_init (&ecm->sessions_lock);
- pool_get (ecm->sessions, session);
- clib_spinlock_unlock_if_init (&ecm->sessions_lock);
-
- clib_memset (session, 0, sizeof (*session));
- session_index = session - ecm->sessions;
- session->bytes_to_send = ecm->bytes_to_send;
- session->bytes_to_receive = ecm->no_return ? 0ULL : ecm->bytes_to_send;
- session->data.rx_fifo = s->rx_fifo;
- session->data.rx_fifo->shr->client_session_index = session_index;
- session->data.tx_fifo = s->tx_fifo;
- session->data.tx_fifo->shr->client_session_index = session_index;
- session->data.vpp_evt_q = ecm->vpp_event_queue[thread_index];
- session->vpp_session_handle = session_handle (s);
-
- if (ecm->is_dgram)
- {
- transport_connection_t *tc;
- tc = session_get_transport (s);
- clib_memcpy_fast (&session->data.transport, tc,
- sizeof (session->data.transport));
- session->data.is_dgram = 1;
- }
+ es = ec_session_alloc (wrk);
+ hs_test_app_session_init (es, s);
- vec_add1 (ecm->connection_index_by_thread[thread_index], session_index);
+ es->bytes_to_send = ecm->bytes_to_send;
+ es->bytes_to_receive = ecm->echo_bytes ? ecm->bytes_to_send : 0ULL;
+ es->vpp_session_handle = session_handle (s);
+ es->vpp_session_index = s->session_index;
+ s->opaque = es->session_index;
+
+ vec_add1 (wrk->conn_indices, es->session_index);
clib_atomic_fetch_add (&ecm->ready_connections, 1);
if (ecm->ready_connections == ecm->expected_connections)
{
- ecm->run_test = ECHO_CLIENTS_RUNNING;
+ ecm->run_test = EC_RUNNING;
/* Signal the CLI process that the action is starting... */
- signal_evt_to_cli (1);
+ signal_evt_to_cli (EC_CLI_CONNECTS_DONE);
}
return 0;
}
static int
-echo_clients_session_connected_callback (u32 app_index, u32 api_context,
- session_t * s, session_error_t err)
+ec_session_connected_callback (u32 app_index, u32 api_context, session_t *s,
+ session_error_t err)
{
- echo_client_main_t *ecm = &echo_client_main;
- eclient_session_t *session;
- u32 session_index;
- u8 thread_index;
+ ec_main_t *ecm = &ec_main;
+ ec_session_t *es;
+ u32 thread_index;
+ ec_worker_t *wrk;
- if (PREDICT_FALSE (ecm->run_test != ECHO_CLIENTS_STARTING))
+ if (PREDICT_FALSE (ecm->run_test != EC_STARTING))
return -1;
if (err)
{
- clib_warning ("connection %d failed!", api_context);
- ecm->run_test = ECHO_CLIENTS_EXITING;
- signal_evt_to_cli (-1);
+ ec_err ("connection %d failed! %U", api_context, format_session_error,
+ err);
+ ecm->run_test = EC_EXITING;
+ signal_evt_to_cli (EC_CLI_CONNECTS_FAILED);
return 0;
}
@@ -490,57 +640,43 @@ echo_clients_session_connected_callback (u32 app_index, u32 api_context,
ASSERT (thread_index == vlib_get_thread_index ()
|| session_transport_service_type (s) == TRANSPORT_SERVICE_CL);
- if (!ecm->vpp_event_queue[thread_index])
- ecm->vpp_event_queue[thread_index] =
- session_main_get_vpp_event_queue (thread_index);
+ if (PREDICT_FALSE (api_context == HS_CTRL_HANDLE))
+ return ec_ctrl_session_connected_callback (s);
+
+ wrk = ec_worker_get (thread_index);
/*
* Setup session
*/
- clib_spinlock_lock_if_init (&ecm->sessions_lock);
- pool_get (ecm->sessions, session);
- clib_spinlock_unlock_if_init (&ecm->sessions_lock);
-
- clib_memset (session, 0, sizeof (*session));
- session_index = session - ecm->sessions;
- session->bytes_to_send = ecm->bytes_to_send;
- session->bytes_to_receive = ecm->no_return ? 0ULL : ecm->bytes_to_send;
- session->data.rx_fifo = s->rx_fifo;
- session->data.rx_fifo->shr->client_session_index = session_index;
- session->data.tx_fifo = s->tx_fifo;
- session->data.tx_fifo->shr->client_session_index = session_index;
- session->data.vpp_evt_q = ecm->vpp_event_queue[thread_index];
- session->vpp_session_handle = session_handle (s);
-
- if (ecm->is_dgram)
- {
- transport_connection_t *tc;
- tc = session_get_transport (s);
- clib_memcpy_fast (&session->data.transport, tc,
- sizeof (session->data.transport));
- session->data.is_dgram = 1;
- }
+ es = ec_session_alloc (wrk);
+ hs_test_app_session_init (es, s);
+
+ es->bytes_to_send = ecm->bytes_to_send;
+ es->bytes_to_receive = ecm->echo_bytes ? ecm->bytes_to_send : 0ULL;
+ es->vpp_session_handle = session_handle (s);
+ es->vpp_session_index = s->session_index;
+ s->opaque = es->session_index;
- vec_add1 (ecm->connection_index_by_thread[thread_index], session_index);
+ vec_add1 (wrk->conn_indices, es->session_index);
clib_atomic_fetch_add (&ecm->ready_connections, 1);
if (ecm->ready_connections == ecm->expected_connections)
{
- ecm->run_test = ECHO_CLIENTS_RUNNING;
+ ecm->run_test = EC_RUNNING;
/* Signal the CLI process that the action is starting... */
- signal_evt_to_cli (1);
+ signal_evt_to_cli (EC_CLI_CONNECTS_DONE);
}
return 0;
}
static void
-echo_clients_session_reset_callback (session_t * s)
+ec_session_reset_callback (session_t *s)
{
- echo_client_main_t *ecm = &echo_client_main;
+ ec_main_t *ecm = &ec_main;
vnet_disconnect_args_t _a = { 0 }, *a = &_a;
if (s->session_state == SESSION_STATE_READY)
- clib_warning ("Reset active connection %U", format_session, s, 2);
+ ec_err ("Reset active connection %U", format_session, s, 2);
a->handle = session_handle (s);
a->app_index = ecm->app_index;
@@ -549,16 +685,23 @@ echo_clients_session_reset_callback (session_t * s)
}
static int
-echo_clients_session_create_callback (session_t * s)
+ec_session_accept_callback (session_t *s)
{
return 0;
}
static void
-echo_clients_session_disconnect_callback (session_t * s)
+ec_session_disconnect_callback (session_t *s)
{
- echo_client_main_t *ecm = &echo_client_main;
+ ec_main_t *ecm = &ec_main;
vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+
+ if (session_handle (s) == ecm->ctrl_session_handle)
+ {
+ ec_dbg ("ctrl session disconnect");
+ ecm->ctrl_session_handle = SESSION_INVALID_HANDLE;
+ }
+
a->handle = session_handle (s);
a->app_index = ecm->app_index;
vnet_disconnect_session (a);
@@ -566,9 +709,9 @@ echo_clients_session_disconnect_callback (session_t * s)
}
void
-echo_clients_session_disconnect (session_t * s)
+ec_session_disconnect (session_t *s)
{
- echo_client_main_t *ecm = &echo_client_main;
+ ec_main_t *ecm = &ec_main;
vnet_disconnect_args_t _a = { 0 }, *a = &_a;
a->handle = session_handle (s);
a->app_index = ecm->app_index;
@@ -576,54 +719,124 @@ echo_clients_session_disconnect (session_t * s)
}
static int
-echo_clients_rx_callback (session_t * s)
+ec_ctrl_session_rx_callback (session_t *s)
{
- echo_client_main_t *ecm = &echo_client_main;
- eclient_session_t *sp;
+ ec_main_t *ecm = &ec_main;
+ int rx_bytes;
+ hs_test_cfg_t cfg = { 0 };
- if (PREDICT_FALSE (ecm->run_test != ECHO_CLIENTS_RUNNING))
+ rx_bytes = svm_fifo_dequeue (s->rx_fifo, sizeof (cfg), (u8 *) &cfg);
+ if (rx_bytes != sizeof (cfg))
{
- echo_clients_session_disconnect (s);
+ ec_err ("invalid cfg length %d (expected %d)", rx_bytes, sizeof (cfg));
+ signal_evt_to_cli (EC_CLI_CONNECTS_FAILED);
return -1;
}
- sp =
- pool_elt_at_index (ecm->sessions, s->rx_fifo->shr->client_session_index);
- receive_data_chunk (ecm, sp);
+ ec_dbg ("control message received:");
+ if (ecm->cfg.verbose)
+ hs_test_cfg_dump (&cfg, 1);
- if (svm_fifo_max_dequeue_cons (s->rx_fifo))
+ switch (cfg.cmd)
{
- if (svm_fifo_set_event (s->rx_fifo))
- session_send_io_evt_to_thread (s->rx_fifo, SESSION_IO_EVT_BUILTIN_RX);
+ case HS_TEST_CMD_SYNC:
+ switch (ecm->run_test)
+ {
+ case EC_STARTING:
+ if (!hs_test_cfg_verify (&cfg, &ecm->cfg))
+ {
+ ec_err ("invalid config received from server!");
+ signal_evt_to_cli (EC_CLI_CONNECTS_FAILED);
+ return -1;
+ }
+ signal_evt_to_cli (EC_CLI_CFG_SYNC);
+ break;
+
+ case EC_RUNNING:
+ ec_dbg ("test running..");
+ break;
+
+ case EC_EXITING:
+ /* post test sync */
+ signal_evt_to_cli (EC_CLI_CFG_SYNC);
+ break;
+
+ default:
+ ec_err ("unexpected test state! %d", ecm->run_test);
+ break;
+ }
+ break;
+ case HS_TEST_CMD_START:
+ signal_evt_to_cli (EC_CLI_START);
+ break;
+ case HS_TEST_CMD_STOP:
+ signal_evt_to_cli (EC_CLI_STOP);
+ break;
+ default:
+ ec_err ("unexpected cmd! %d", cfg.cmd);
+ break;
}
+
return 0;
}
-int
-echo_client_add_segment_callback (u32 client_index, u64 segment_handle)
+static int
+ec_session_rx_callback (session_t *s)
{
- /* New heaps may be added */
+ ec_main_t *ecm = &ec_main;
+ ec_worker_t *wrk;
+ ec_session_t *es;
+
+ if (PREDICT_FALSE (s->opaque == HS_CTRL_HANDLE))
+ return ec_ctrl_session_rx_callback (s);
+
+ if (PREDICT_FALSE (ecm->run_test != EC_RUNNING))
+ {
+ ec_session_disconnect (s);
+ return -1;
+ }
+
+ wrk = ec_worker_get (s->thread_index);
+ es = ec_session_get (wrk, s->opaque);
+
+ receive_data_chunk (wrk, es);
+
+ if (svm_fifo_max_dequeue_cons (s->rx_fifo))
+ session_enqueue_notify (s);
+
return 0;
}
-/* *INDENT-OFF* */
-static session_cb_vft_t echo_clients = {
- .session_reset_callback = echo_clients_session_reset_callback,
- .session_connected_callback = echo_clients_session_connected_callback,
- .session_accept_callback = echo_clients_session_create_callback,
- .session_disconnect_callback = echo_clients_session_disconnect_callback,
- .builtin_app_rx_callback = echo_clients_rx_callback,
- .add_segment_callback = echo_client_add_segment_callback
+static int
+ec_add_segment_callback (u32 app_index, u64 segment_handle)
+{
+ /* New segments may be added */
+ return 0;
+}
+
+static int
+ec_del_segment_callback (u32 app_index, u64 segment_handle)
+{
+ return 0;
+}
+
+static session_cb_vft_t ec_cb_vft = {
+ .session_reset_callback = ec_session_reset_callback,
+ .session_connected_callback = ec_session_connected_callback,
+ .session_accept_callback = ec_session_accept_callback,
+ .session_disconnect_callback = ec_session_disconnect_callback,
+ .builtin_app_rx_callback = ec_session_rx_callback,
+ .add_segment_callback = ec_add_segment_callback,
+ .del_segment_callback = ec_del_segment_callback,
};
-/* *INDENT-ON* */
static clib_error_t *
-echo_clients_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret)
+ec_attach ()
{
vnet_app_add_cert_key_pair_args_t _ck_pair, *ck_pair = &_ck_pair;
- u32 prealloc_fifos, segment_size = 256 << 20;
- echo_client_main_t *ecm = &echo_client_main;
+ ec_main_t *ecm = &ec_main;
vnet_app_attach_args_t _a, *a = &_a;
+ u32 prealloc_fifos;
u64 options[18];
int rv;
@@ -633,18 +846,14 @@ echo_clients_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret)
a->api_client_index = ~0;
a->name = format (0, "echo_client");
if (ecm->transport_proto == TRANSPORT_PROTO_QUIC)
- echo_clients.session_connected_callback =
- quic_echo_clients_session_connected_callback;
- a->session_cb_vft = &echo_clients;
+ ec_cb_vft.session_connected_callback = quic_ec_session_connected_callback;
+ a->session_cb_vft = &ec_cb_vft;
prealloc_fifos = ecm->prealloc_fifos ? ecm->expected_connections : 1;
- if (ecm->private_segment_size)
- segment_size = ecm->private_segment_size;
-
options[APP_OPTIONS_ACCEPT_COOKIE] = 0x12345678;
- options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
- options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size;
+ options[APP_OPTIONS_SEGMENT_SIZE] = ecm->private_segment_size;
+ options[APP_OPTIONS_ADD_SEGMENT_SIZE] = ecm->private_segment_size;
options[APP_OPTIONS_RX_FIFO_SIZE] = ecm->fifo_size;
options[APP_OPTIONS_TX_FIFO_SIZE] = ecm->fifo_size;
options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = ecm->private_segment_count;
@@ -652,13 +861,13 @@ echo_clients_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret)
options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
options[APP_OPTIONS_TLS_ENGINE] = ecm->tls_engine;
options[APP_OPTIONS_PCT_FIRST_ALLOC] = 100;
- if (appns_id)
+ options[APP_OPTIONS_FLAGS] |= ecm->attach_flags;
+ if (ecm->appns_id)
{
- options[APP_OPTIONS_FLAGS] |= appns_flags;
- options[APP_OPTIONS_NAMESPACE_SECRET] = appns_secret;
+ options[APP_OPTIONS_NAMESPACE_SECRET] = ecm->appns_secret;
+ a->namespace_id = ecm->appns_id;
}
a->options = options;
- a->namespace_id = appns_id;
if ((rv = vnet_application_attach (a)))
return clib_error_return (0, "attach returned %d", rv);
@@ -674,16 +883,21 @@ echo_clients_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret)
vnet_app_add_cert_key_pair (ck_pair);
ecm->ckpair_index = ck_pair->index;
+ ecm->test_client_attached = 1;
+
return 0;
}
static int
-echo_clients_detach ()
+ec_detach ()
{
- echo_client_main_t *ecm = &echo_client_main;
+ ec_main_t *ecm = &ec_main;
vnet_app_detach_args_t _da, *da = &_da;
int rv;
+ if (!ecm->test_client_attached)
+ return 0;
+
da->app_index = ecm->app_index;
da->api_client_index = ~0;
rv = vnet_application_detach (da);
@@ -694,412 +908,450 @@ echo_clients_detach ()
return rv;
}
-static void *
-echo_client_thread_fn (void *arg)
-{
- return 0;
-}
-
-/** Start a transmit thread */
-int
-echo_clients_start_tx_pthread (echo_client_main_t * ecm)
-{
- if (ecm->client_thread_handle == 0)
- {
- int rv = pthread_create (&ecm->client_thread_handle,
- NULL /*attr */ ,
- echo_client_thread_fn, 0);
- if (rv)
- {
- ecm->client_thread_handle = 0;
- return -1;
- }
- }
- return 0;
-}
-
static int
-echo_client_transport_needs_crypto (transport_proto_t proto)
+ec_transport_needs_crypto (transport_proto_t proto)
{
return proto == TRANSPORT_PROTO_TLS || proto == TRANSPORT_PROTO_DTLS ||
proto == TRANSPORT_PROTO_QUIC;
}
-clib_error_t *
-echo_clients_connect (vlib_main_t * vm, u32 n_clients)
+static int
+ec_connect_rpc (void *args)
{
- session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
- echo_client_main_t *ecm = &echo_client_main;
- vnet_connect_args_t _a, *a = &_a;
- int i, rv;
-
- clib_memset (a, 0, sizeof (*a));
+ ec_main_t *ecm = &ec_main;
+ vnet_connect_args_t _a = {}, *a = &_a;
+ int rv, needs_crypto;
+ u32 n_clients, ci;
+
+ n_clients = ecm->n_clients;
+ needs_crypto = ec_transport_needs_crypto (ecm->transport_proto);
+ clib_memcpy (&a->sep_ext, &ecm->connect_sep, sizeof (ecm->connect_sep));
+ a->sep_ext.transport_flags |= TRANSPORT_CFG_F_CONNECTED;
+ a->app_index = ecm->app_index;
- if (parse_uri ((char *) ecm->connect_uri, &sep))
- return clib_error_return (0, "invalid uri");
+ ci = ecm->connect_conn_index;
- for (i = 0; i < n_clients; i++)
+ while (ci < n_clients)
{
- clib_memcpy (&a->sep_ext, &sep, sizeof (sep));
- a->api_context = i;
- a->app_index = ecm->app_index;
- if (echo_client_transport_needs_crypto (a->sep_ext.transport_proto))
+ /* Crude pacing for call setups */
+ if (ci - ecm->ready_connections > 128)
+ {
+ ecm->connect_conn_index = ci;
+ break;
+ }
+
+ a->api_context = ci;
+ if (needs_crypto)
{
session_endpoint_alloc_ext_cfg (&a->sep_ext,
TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
a->sep_ext.ext_cfg->crypto.ckpair_index = ecm->ckpair_index;
}
- vlib_worker_thread_barrier_sync (vm);
rv = vnet_connect (a);
- if (a->sep_ext.ext_cfg)
+
+ if (needs_crypto)
clib_mem_free (a->sep_ext.ext_cfg);
+
if (rv)
{
- vlib_worker_thread_barrier_release (vm);
- return clib_error_return (0, "connect returned: %d", rv);
+ ec_err ("connect returned: %U", format_session_error, rv);
+ ecm->run_test = EC_EXITING;
+ signal_evt_to_cli (EC_CLI_CONNECTS_FAILED);
+ break;
}
- vlib_worker_thread_barrier_release (vm);
- /* Crude pacing for call setups */
- if ((i % 16) == 0)
- vlib_process_suspend (vm, 100e-6);
- ASSERT (i + 1 >= ecm->ready_connections);
- while (i + 1 - ecm->ready_connections > 128)
- vlib_process_suspend (vm, 1e-3);
+ ci += 1;
}
+
+ if (ci < ecm->expected_connections && ecm->run_test != EC_EXITING)
+ ec_program_connects ();
+
return 0;
}
-#define ec_cli_output(_fmt, _args...) \
- if (!ecm->no_output) \
- vlib_cli_output(vm, _fmt, ##_args)
+void
+ec_program_connects (void)
+{
+ session_send_rpc_evt_to_thread_force (transport_cl_thread (), ec_connect_rpc,
+ 0);
+}
static clib_error_t *
-echo_clients_command_fn (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
+ec_ctrl_connect_rpc ()
{
- echo_client_main_t *ecm = &echo_client_main;
- vlib_thread_main_t *thread_main = vlib_get_thread_main ();
- u64 tmp, total_bytes, appns_flags = 0, appns_secret = 0;
- session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
- f64 test_timeout = 20.0, syn_timeout = 20.0, delta;
- char *default_uri = "tcp://6.0.1.1/1234";
- u8 *appns_id = 0, barrier_acq_needed = 0;
- int preallocate_sessions = 0, i, rv;
+ session_error_t rv;
+ ec_main_t *ecm = &ec_main;
+ vnet_connect_args_t _a = {}, *a = &_a;
+
+ a->api_context = HS_CTRL_HANDLE;
+ ecm->cfg.cmd = HS_TEST_CMD_SYNC;
+ clib_memcpy (&a->sep_ext, &ecm->connect_sep, sizeof (ecm->connect_sep));
+ a->sep_ext.transport_proto = TRANSPORT_PROTO_TCP;
+ a->app_index = ecm->app_index;
+
+ rv = vnet_connect (a);
+ if (rv)
+ {
+ ec_err ("ctrl connect returned: %U", format_session_error, rv);
+ ecm->run_test = EC_EXITING;
+ signal_evt_to_cli (EC_CLI_CONNECTS_FAILED);
+ }
+ return 0;
+}
+
+static void
+ec_ctrl_connect (void)
+{
+ session_send_rpc_evt_to_thread_force (transport_cl_thread (),
+ ec_ctrl_connect_rpc, 0);
+}
+
+static void
+ec_ctrl_session_disconnect ()
+{
+ ec_main_t *ecm = &ec_main;
+ vnet_disconnect_args_t _a, *a = &_a;
+ session_error_t err;
+
+ a->handle = ecm->ctrl_session_handle;
+ a->app_index = ecm->app_index;
+ err = vnet_disconnect_session (a);
+ if (err)
+ ec_err ("vnet_disconnect_session: %U", format_session_error, err);
+}
+
+static int
+ec_ctrl_test_sync ()
+{
+ ec_main_t *ecm = &ec_main;
+ ecm->cfg.test = HS_TEST_TYPE_ECHO;
+ return ec_ctrl_send (HS_TEST_CMD_SYNC);
+}
+
+static int
+ec_ctrl_test_start ()
+{
+ return ec_ctrl_send (HS_TEST_CMD_START);
+}
+
+static int
+ec_ctrl_test_stop ()
+{
+ return ec_ctrl_send (HS_TEST_CMD_STOP);
+}
+
+#define ec_wait_for_signal(_sig) \
+ vlib_process_wait_for_event_or_clock (vm, ecm->syn_timeout); \
+ event_type = vlib_process_get_events (vm, &event_data); \
+ switch (event_type) \
+ { \
+ case ~0: \
+ ec_cli ("Timeout while waiting for " #_sig); \
+ error = \
+ clib_error_return (0, "failed: timeout while waiting for " #_sig); \
+ goto cleanup; \
+ case _sig: \
+ break; \
+ default: \
+ ec_cli ("unexpected event while waiting for " #_sig ": %d", \
+ event_type); \
+ error = \
+ clib_error_return (0, "failed: unexpected event: %d", event_type); \
+ goto cleanup; \
+ }
+
+static clib_error_t *
+ec_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ char *default_uri = "tcp://6.0.1.1/1234", *transfer_type;
+ ec_main_t *ecm = &ec_main;
uword *event_data = 0, event_type;
- f64 time_before_connects;
- u32 n_clients = 1;
- char *transfer_type;
clib_error_t *error = 0;
+ int rv, had_config = 1;
+ u64 tmp, total_bytes;
+ f64 delta;
- ecm->quic_streams = 1;
- ecm->bytes_to_send = 8192;
- ecm->no_return = 0;
- ecm->fifo_size = 64 << 10;
- ecm->connections_per_batch = 1000;
- ecm->private_segment_count = 0;
- ecm->private_segment_size = 0;
- ecm->no_output = 0;
- ecm->test_bytes = 0;
- ecm->test_failed = 0;
- ecm->vlib_main = vm;
- ecm->tls_engine = CRYPTO_ENGINE_OPENSSL;
- ecm->no_copy = 0;
- ecm->run_test = ECHO_CLIENTS_STARTING;
+ if (ecm->test_client_attached)
+ return clib_error_return (0, "failed: already running!");
- if (vlib_num_workers ())
+ if (ec_init (vm))
{
- /* The request came over the binary api and the inband cli handler
- * is not mp_safe. Drop the barrier to make sure the workers are not
- * blocked.
- */
- if (vlib_thread_is_main_w_barrier ())
- {
- barrier_acq_needed = 1;
- vlib_worker_thread_barrier_release (vm);
- }
- /*
- * There's a good chance that both the client and the server echo
- * apps will be enabled so make sure the session queue node polls on
- * the main thread as connections will probably be established on it.
- */
- vlib_node_set_state (vm, session_queue_node.index,
- VLIB_NODE_STATE_POLLING);
+ error = clib_error_return (0, "failed init");
+ goto cleanup;
}
- if (thread_main->n_vlib_mains > 1)
- clib_spinlock_init (&ecm->sessions_lock);
- vec_free (ecm->connect_uri);
+ if (!unformat_user (input, unformat_line_input, line_input))
+ {
+ had_config = 0;
+ goto parse_config;
+ }
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "uri %s", &ecm->connect_uri))
+ if (unformat (line_input, "uri %s", &ecm->connect_uri))
;
- else if (unformat (input, "nclients %d", &n_clients))
+ else if (unformat (line_input, "nclients %d", &ecm->n_clients))
;
- else if (unformat (input, "quic-streams %d", &ecm->quic_streams))
+ else if (unformat (line_input, "quic-streams %d", &ecm->quic_streams))
;
- else if (unformat (input, "mbytes %lld", &tmp))
+ else if (unformat (line_input, "mbytes %lld", &tmp))
ecm->bytes_to_send = tmp << 20;
- else if (unformat (input, "gbytes %lld", &tmp))
+ else if (unformat (line_input, "gbytes %lld", &tmp))
ecm->bytes_to_send = tmp << 30;
- else if (unformat (input, "bytes %lld", &ecm->bytes_to_send))
+ else if (unformat (line_input, "bytes %U", unformat_memory_size,
+ &ecm->bytes_to_send))
+ ;
+ else if (unformat (line_input, "test-timeout %f", &ecm->test_timeout))
;
- else if (unformat (input, "test-timeout %f", &test_timeout))
+ else if (unformat (line_input, "syn-timeout %f", &ecm->syn_timeout))
;
- else if (unformat (input, "syn-timeout %f", &syn_timeout))
+ else if (unformat (line_input, "echo-bytes"))
+ ecm->echo_bytes = 1;
+ else if (unformat (line_input, "fifo-size %U", unformat_memory_size,
+ &ecm->fifo_size))
;
- else if (unformat (input, "no-return"))
- ecm->no_return = 1;
- else if (unformat (input, "fifo-size %d", &ecm->fifo_size))
- ecm->fifo_size <<= 10;
- else if (unformat (input, "private-segment-count %d",
+ else if (unformat (line_input, "private-segment-count %d",
&ecm->private_segment_count))
;
- else if (unformat (input, "private-segment-size %U",
- unformat_memory_size, &tmp))
- {
- if (tmp >= 0x100000000ULL)
- {
- error = clib_error_return (
- 0, "private segment size %lld (%llu) too large", tmp, tmp);
- goto cleanup;
- }
- ecm->private_segment_size = tmp;
- }
- else if (unformat (input, "preallocate-fifos"))
+ else if (unformat (line_input, "private-segment-size %U",
+ unformat_memory_size, &ecm->private_segment_size))
+ ;
+ else if (unformat (line_input, "preallocate-fifos"))
ecm->prealloc_fifos = 1;
- else if (unformat (input, "preallocate-sessions"))
- preallocate_sessions = 1;
- else
- if (unformat (input, "client-batch %d", &ecm->connections_per_batch))
+ else if (unformat (line_input, "preallocate-sessions"))
+ ecm->prealloc_sessions = 1;
+ else if (unformat (line_input, "client-batch %d",
+ &ecm->connections_per_batch))
;
- else if (unformat (input, "appns %_%v%_", &appns_id))
+ else if (unformat (line_input, "appns %_%v%_", &ecm->appns_id))
;
- else if (unformat (input, "all-scope"))
- appns_flags |= (APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE
- | APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE);
- else if (unformat (input, "local-scope"))
- appns_flags = APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE;
- else if (unformat (input, "global-scope"))
- appns_flags = APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
- else if (unformat (input, "secret %lu", &appns_secret))
+ else if (unformat (line_input, "all-scope"))
+ ecm->attach_flags |= (APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE |
+ APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE);
+ else if (unformat (line_input, "local-scope"))
+ ecm->attach_flags = APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE;
+ else if (unformat (line_input, "global-scope"))
+ ecm->attach_flags = APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
+ else if (unformat (line_input, "secret %lu", &ecm->appns_secret))
;
- else if (unformat (input, "no-output"))
- ecm->no_output = 1;
- else if (unformat (input, "test-bytes"))
- ecm->test_bytes = 1;
- else if (unformat (input, "tls-engine %d", &ecm->tls_engine))
+ else if (unformat (line_input, "verbose"))
+ ecm->cfg.verbose = 1;
+ else if (unformat (line_input, "test-bytes"))
+ ecm->cfg.test_bytes = 1;
+ else if (unformat (line_input, "tls-engine %d", &ecm->tls_engine))
;
else
{
error = clib_error_return (0, "failed: unknown input `%U'",
- format_unformat_error, input);
+ format_unformat_error, line_input);
goto cleanup;
}
}
- /* Store cli process node index for signalling */
- ecm->cli_node_index =
- vlib_get_current_process (vm)->node_runtime.node_index;
-
- if (ecm->is_init == 0)
- {
- if (echo_clients_init (vm))
- {
- error = clib_error_return (0, "failed init");
- goto cleanup;
- }
- }
+parse_config:
-
- ecm->ready_connections = 0;
- ecm->expected_connections = n_clients * ecm->quic_streams;
- ecm->rx_total = 0;
- ecm->tx_total = 0;
+ ecm->cfg.num_test_sessions = ecm->expected_connections =
+ ecm->n_clients * ecm->quic_streams;
if (!ecm->connect_uri)
{
- clib_warning ("No uri provided. Using default: %s", default_uri);
+ ec_cli ("No uri provided. Using default: %s", default_uri);
ecm->connect_uri = format (0, "%s%c", default_uri, 0);
}
- if ((rv = parse_uri ((char *) ecm->connect_uri, &sep)))
+ if ((rv = parse_uri ((char *) ecm->connect_uri, &ecm->connect_sep)))
{
error = clib_error_return (0, "Uri parse error: %d", rv);
goto cleanup;
}
- ecm->transport_proto = sep.transport_proto;
- ecm->is_dgram = (sep.transport_proto == TRANSPORT_PROTO_UDP);
+ ecm->transport_proto = ecm->connect_sep.transport_proto;
-#if ECHO_CLIENT_PTHREAD
- echo_clients_start_tx_pthread ();
-#endif
+ if (ecm->prealloc_sessions)
+ ec_prealloc_sessions (ecm);
- vlib_worker_thread_barrier_sync (vm);
- vnet_session_enable_disable (vm, 1 /* turn on session and transports */ );
- vlib_worker_thread_barrier_release (vm);
-
- if (ecm->test_client_attached == 0)
+ if ((error = ec_attach ()))
{
- if ((error = echo_clients_attach (appns_id, appns_flags, appns_secret)))
- {
- vec_free (appns_id);
- clib_error_report (error);
- goto cleanup;
- }
- vec_free (appns_id);
+ clib_error_report (error);
+ goto cleanup;
}
- ecm->test_client_attached = 1;
- /* Turn on the builtin client input nodes */
- for (i = 0; i < thread_main->n_vlib_mains; i++)
- vlib_node_set_state (vlib_get_main_by_index (i), echo_clients_node.index,
- VLIB_NODE_STATE_POLLING);
+ if (ecm->echo_bytes)
+ ecm->cfg.test = HS_TEST_TYPE_BI;
+ else
+ ecm->cfg.test = HS_TEST_TYPE_UNI;
- if (preallocate_sessions)
- pool_init_fixed (ecm->sessions, 1.1 * n_clients);
+ ec_ctrl_connect ();
+ ec_wait_for_signal (EC_CLI_CFG_SYNC);
- /* Fire off connect requests */
- time_before_connects = vlib_time_now (vm);
- if ((error = echo_clients_connect (vm, n_clients)))
+ if (ec_ctrl_test_start () < 0)
{
+ ec_cli ("failed to send start command");
goto cleanup;
}
+ ec_wait_for_signal (EC_CLI_START);
- /* Park until the sessions come up, or ten seconds elapse... */
- vlib_process_wait_for_event_or_clock (vm, syn_timeout);
+ /*
+ * Start. Fire off connect requests
+ */
+
+ /* update data port */
+ ecm->connect_sep.port = hs_make_data_port (ecm->connect_sep.port);
+
+ ecm->syn_start_time = vlib_time_now (vm);
+ ec_program_connects ();
+
+ /*
+ * Park until the sessions come up, or syn_timeout seconds pass
+ */
+
+ vlib_process_wait_for_event_or_clock (vm, ecm->syn_timeout);
event_type = vlib_process_get_events (vm, &event_data);
switch (event_type)
{
case ~0:
- ec_cli_output ("Timeout with only %d sessions active...",
- ecm->ready_connections);
+ ec_cli ("Timeout with only %d sessions active...",
+ ecm->ready_connections);
error = clib_error_return (0, "failed: syn timeout with %d sessions",
ecm->ready_connections);
- goto cleanup;
+ goto stop_test;
- case 1:
- delta = vlib_time_now (vm) - time_before_connects;
+ case EC_CLI_CONNECTS_DONE:
+ delta = vlib_time_now (vm) - ecm->syn_start_time;
if (delta != 0.0)
- ec_cli_output ("%d three-way handshakes in %.2f seconds %.2f/s",
- n_clients, delta, ((f64) n_clients) / delta);
-
- ecm->test_start_time = vlib_time_now (ecm->vlib_main);
- ec_cli_output ("Test started at %.6f", ecm->test_start_time);
+ ec_cli ("%d three-way handshakes in %.2f seconds %.2f/s",
+ ecm->n_clients, delta, ((f64) ecm->n_clients) / delta);
break;
+ case EC_CLI_CONNECTS_FAILED:
+ error = clib_error_return (0, "failed: connect returned");
+ goto stop_test;
+
default:
- ec_cli_output ("unexpected event(1): %d", event_type);
- error = clib_error_return (0, "failed: unexpected event(1): %d",
- event_type);
- goto cleanup;
+ ec_cli ("unexpected event(2): %d", event_type);
+ error =
+ clib_error_return (0, "failed: unexpected event(2): %d", event_type);
+ goto stop_test;
}
- /* Now wait for the sessions to finish... */
- vlib_process_wait_for_event_or_clock (vm, test_timeout);
+ /*
+ * Wait for the sessions to finish or test_timeout seconds pass
+ */
+ ecm->test_start_time = vlib_time_now (ecm->vlib_main);
+ ec_cli ("Test started at %.6f", ecm->test_start_time);
+ vlib_process_wait_for_event_or_clock (vm, ecm->test_timeout);
event_type = vlib_process_get_events (vm, &event_data);
switch (event_type)
{
case ~0:
- ec_cli_output ("Timeout with %d sessions still active...",
- ecm->ready_connections);
+ ec_cli ("Timeout at %.6f with %d sessions still active...",
+ vlib_time_now (ecm->vlib_main), ecm->ready_connections);
error = clib_error_return (0, "failed: timeout with %d sessions",
ecm->ready_connections);
- goto cleanup;
+ goto stop_test;
- case 2:
+ case EC_CLI_TEST_DONE:
ecm->test_end_time = vlib_time_now (vm);
- ec_cli_output ("Test finished at %.6f", ecm->test_end_time);
+ ec_cli ("Test finished at %.6f", ecm->test_end_time);
break;
default:
- ec_cli_output ("unexpected event(2): %d", event_type);
- error = clib_error_return (0, "failed: unexpected event(2): %d",
- event_type);
- goto cleanup;
+ ec_cli ("unexpected event(3): %d", event_type);
+ error =
+ clib_error_return (0, "failed: unexpected event(3): %d", event_type);
+ goto stop_test;
}
+ /*
+ * Done. Compute stats
+ */
delta = ecm->test_end_time - ecm->test_start_time;
- if (delta != 0.0)
+ if (delta == 0.0)
{
- total_bytes = (ecm->no_return ? ecm->tx_total : ecm->rx_total);
- transfer_type = ecm->no_return ? "half-duplex" : "full-duplex";
- ec_cli_output ("%lld bytes (%lld mbytes, %lld gbytes) in %.2f seconds",
- total_bytes, total_bytes / (1ULL << 20),
- total_bytes / (1ULL << 30), delta);
- ec_cli_output ("%.2f bytes/second %s", ((f64) total_bytes) / (delta),
- transfer_type);
- ec_cli_output ("%.4f gbit/second %s",
- (((f64) total_bytes * 8.0) / delta / 1e9),
- transfer_type);
- }
- else
- {
- ec_cli_output ("zero delta-t?");
+ ec_cli ("zero delta-t?");
error = clib_error_return (0, "failed: zero delta-t");
- goto cleanup;
+ goto stop_test;
}
- if (ecm->test_bytes && ecm->test_failed)
+ total_bytes = (ecm->echo_bytes ? ecm->rx_total : ecm->tx_total);
+ transfer_type = ecm->echo_bytes ? "full-duplex" : "half-duplex";
+ ec_cli ("%lld bytes (%lld mbytes, %lld gbytes) in %.2f seconds", total_bytes,
+ total_bytes / (1ULL << 20), total_bytes / (1ULL << 30), delta);
+ ec_cli ("%.2f bytes/second %s", ((f64) total_bytes) / (delta),
+ transfer_type);
+ ec_cli ("%.4f gbit/second %s", (((f64) total_bytes * 8.0) / delta / 1e9),
+ transfer_type);
+
+ if (ecm->cfg.test_bytes && ecm->test_failed)
error = clib_error_return (0, "failed: test bytes");
-cleanup:
- ecm->run_test = ECHO_CLIENTS_EXITING;
- vlib_process_wait_for_event_or_clock (vm, 10e-3);
- for (i = 0; i < vec_len (ecm->connection_index_by_thread); i++)
+stop_test:
+ ecm->run_test = EC_EXITING;
+
+ /* send stop test command to the server */
+ if (ec_ctrl_test_stop () < 0)
{
- vec_reset_length (ecm->connection_index_by_thread[i]);
- vec_reset_length (ecm->connections_this_batch_by_thread[i]);
- vec_reset_length (ecm->quic_session_index_by_thread[i]);
+ ec_cli ("failed to send stop command");
+ goto cleanup;
}
+ ec_wait_for_signal (EC_CLI_STOP);
- pool_free (ecm->sessions);
+ /* post test sync */
+ if (ec_ctrl_test_sync () < 0)
+ {
+ ec_cli ("failed to send post sync command");
+ goto cleanup;
+ }
+ ec_wait_for_signal (EC_CLI_CFG_SYNC);
+
+ /* disconnect control session */
+ ec_ctrl_session_disconnect ();
+
+cleanup:
+
+ ecm->run_test = EC_EXITING;
+ vlib_process_wait_for_event_or_clock (vm, 10e-3);
/* Detach the application, so we can use different fifo sizes next time */
- if (ecm->test_client_attached)
+ if (ec_detach ())
{
- if (echo_clients_detach ())
- {
- error = clib_error_return (0, "failed: app detach");
- ec_cli_output ("WARNING: app detach failed...");
- }
+ error = clib_error_return (0, "failed: app detach");
+ ec_cli ("WARNING: app detach failed...");
}
- if (error)
- ec_cli_output ("test failed");
- vec_free (ecm->connect_uri);
- clib_spinlock_free (&ecm->sessions_lock);
- if (barrier_acq_needed)
- vlib_worker_thread_barrier_sync (vm);
+ ec_cleanup (ecm);
+ if (had_config)
+ unformat_free (line_input);
+
+ if (error)
+ ec_cli ("test failed");
return error;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (echo_clients_command, static) =
-{
+VLIB_CLI_COMMAND (ec_command, static) = {
.path = "test echo clients",
- .short_help = "test echo clients [nclients %d][[m|g]bytes <bytes>]"
- "[test-timeout <time>][syn-timeout <time>][no-return][fifo-size <size>]"
- "[private-segment-count <count>][private-segment-size <bytes>[m|g]]"
- "[preallocate-fifos][preallocate-sessions][client-batch <batch-size>]"
- "[uri <tcp://ip/port>][test-bytes][no-output]",
- .function = echo_clients_command_fn,
+ .short_help =
+ "test echo clients [nclients %d][[m|g]bytes <bytes>]"
+ "[test-timeout <time>][syn-timeout <time>][echo-bytes][fifo-size <size>]"
+ "[private-segment-count <count>][private-segment-size <bytes>[m|g]]"
+ "[preallocate-fifos][preallocate-sessions][client-batch <batch-size>]"
+ "[uri <tcp://ip/port>][test-bytes][verbose]",
+ .function = ec_command_fn,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
clib_error_t *
-echo_clients_main_init (vlib_main_t * vm)
+ec_main_init (vlib_main_t *vm)
{
- echo_client_main_t *ecm = &echo_client_main;
- ecm->is_init = 0;
+ ec_main_t *ecm = &ec_main;
+ ecm->app_is_init = 0;
return 0;
}
-VLIB_INIT_FUNCTION (echo_clients_main_init);
+VLIB_INIT_FUNCTION (ec_main_init);
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/hs_apps/echo_client.h b/src/plugins/hs_apps/echo_client.h
index c4983ca78d8..5868c3652ce 100644
--- a/src/plugins/hs_apps/echo_client.h
+++ b/src/plugins/hs_apps/echo_client.h
@@ -18,105 +18,121 @@
#ifndef __included_echo_client_h__
#define __included_echo_client_h__
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-
-#include <vppinfra/hash.h>
-#include <vppinfra/error.h>
+#include <hs_apps/hs_test.h>
#include <vnet/session/session.h>
#include <vnet/session/application_interface.h>
-typedef struct
+typedef struct ec_session_
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- app_session_t data;
+#define _(type, name) type name;
+ foreach_app_session_field
+#undef _
+ u32 vpp_session_index;
+ u32 thread_index;
u64 bytes_to_send;
u64 bytes_sent;
u64 bytes_to_receive;
u64 bytes_received;
u64 vpp_session_handle;
- u8 thread_index;
-} eclient_session_t;
+} ec_session_t;
+
+typedef struct ec_worker_
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ ec_session_t *sessions; /**< session pool */
+ u8 *rx_buf; /**< prealloced rx buffer */
+ u32 *conn_indices; /**< sessions handled by worker */
+ u32 *conns_this_batch; /**< sessions handled in batch */
+ svm_msg_q_t *vpp_event_queue; /**< session layer worker mq */
+ u32 thread_index; /**< thread index for worker */
+} ec_worker_t;
typedef struct
{
+ ec_worker_t *wrk; /**< Per-thread state */
+ u8 *connect_test_data; /**< Pre-computed test data */
+
+ volatile u32 ready_connections;
+ volatile u64 rx_total;
+ volatile u64 tx_total;
+ volatile int run_test; /**< Signal start of test */
+
+ f64 syn_start_time;
+ f64 test_start_time;
+ f64 test_end_time;
+ u32 prev_conns;
+ u32 repeats;
+
+ u32 connect_conn_index; /**< Connects attempted progress */
+
/*
* Application setup parameters
*/
- svm_queue_t *vl_input_queue; /**< vpe input queue */
- svm_msg_q_t **vpp_event_queue;
u32 cli_node_index; /**< cli process node index */
- u32 my_client_index; /**< loopback API client handle */
u32 app_index; /**< app index after attach */
+ session_handle_t ctrl_session_handle; /**< control session handle */
/*
* Configuration params
*/
+ hs_test_cfg_t cfg;
+ u32 n_clients; /**< Number of clients */
u8 *connect_uri; /**< URI for slave's connect */
+ session_endpoint_cfg_t connect_sep; /**< Sever session endpoint */
u64 bytes_to_send; /**< Bytes to send */
u32 configured_segment_size;
u32 fifo_size;
u32 expected_connections; /**< Number of clients/connections */
u32 connections_per_batch; /**< Connections to rx/tx at once */
u32 private_segment_count; /**< Number of private fifo segs */
- u32 private_segment_size; /**< size of private fifo segs */
+ u64 private_segment_size; /**< size of private fifo segs */
u32 tls_engine; /**< TLS engine mbedtls/openssl */
- u8 is_dgram;
u32 no_copy; /**< Don't memcpy data to tx fifo */
u32 quic_streams; /**< QUIC streams per connection */
u32 ckpair_index; /**< Cert key pair for tls/quic */
+ u64 attach_flags; /**< App attach flags */
+ u8 *appns_id; /**< App namespaces id */
+ u64 appns_secret; /**< App namespace secret */
+ f64 syn_timeout; /**< Test syn timeout (s) */
+ f64 test_timeout; /**< Test timeout (s) */
/*
- * Test state variables
- */
- eclient_session_t *sessions; /**< Session pool, shared */
- clib_spinlock_t sessions_lock;
- u8 **rx_buf; /**< intermediate rx buffers */
- u8 *connect_test_data; /**< Pre-computed test data */
- u32 **quic_session_index_by_thread;
- u32 **connection_index_by_thread;
- u32 **connections_this_batch_by_thread; /**< active connection batch */
- pthread_t client_thread_handle;
-
- volatile u32 ready_connections;
- volatile u32 finished_connections;
- volatile u64 rx_total;
- volatile u64 tx_total;
- volatile int run_test; /**< Signal start of test */
-
- f64 test_start_time;
- f64 test_end_time;
- u32 prev_conns;
- u32 repeats;
- /*
* Flags
*/
- u8 is_init;
+ u8 app_is_init;
u8 test_client_attached;
- u8 no_return;
+ u8 echo_bytes;
u8 test_return_packets;
- int i_am_master;
int drop_packets; /**< drop all packets */
u8 prealloc_fifos; /**< Request fifo preallocation */
- u8 no_output;
- u8 test_bytes;
+ u8 prealloc_sessions;
u8 test_failed;
u8 transport_proto;
+ u8 barrier_acq_needed;
vlib_main_t *vlib_main;
-} echo_client_main_t;
+} ec_main_t;
+
+typedef enum ec_state_
+{
+ EC_STARTING,
+ EC_RUNNING,
+ EC_EXITING
+} ec_state_t;
-enum
+typedef enum ec_cli_signal_
{
- ECHO_CLIENTS_STARTING,
- ECHO_CLIENTS_RUNNING,
- ECHO_CLIENTS_EXITING
-} echo_clients_test_state_e;
-extern echo_client_main_t echo_client_main;
+ EC_CLI_CONNECTS_DONE = 1,
+ EC_CLI_CONNECTS_FAILED,
+ EC_CLI_CFG_SYNC,
+ EC_CLI_START,
+ EC_CLI_STOP,
+ EC_CLI_TEST_DONE
+} ec_cli_signal_t;
-vlib_node_registration_t echo_clients_node;
+void ec_program_connects (void);
#endif /* __included_echo_client_h__ */
diff --git a/src/plugins/hs_apps/echo_server.c b/src/plugins/hs_apps/echo_server.c
index b75a3667e83..0243252434a 100644
--- a/src/plugins/hs_apps/echo_server.c
+++ b/src/plugins/hs_apps/echo_server.c
@@ -13,79 +13,143 @@
* limitations under the License.
*/
+#include <hs_apps/hs_test.h>
#include <vnet/vnet.h>
#include <vlibmemory/api.h>
#include <vnet/session/application.h>
#include <vnet/session/application_interface.h>
#include <vnet/session/session.h>
-#define ECHO_SERVER_DBG (0)
-#define DBG(_fmt, _args...) \
- if (ECHO_SERVER_DBG) \
- clib_warning (_fmt, ##_args)
+static void es_set_echo_rx_callbacks (u8 no_echo);
typedef struct
{
- /*
- * Server app parameters
- */
- svm_msg_q_t **vpp_queue;
- svm_queue_t *vl_input_queue; /**< Sever's event queue */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+#define _(type, name) type name;
+ foreach_app_session_field
+#undef _
+ u64 vpp_session_handle;
+ u32 vpp_session_index;
+ u32 rx_retries;
+ u8 byte_index;
+} es_session_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ es_session_t *sessions;
+ u8 *rx_buf; /**< Per-thread RX buffer */
+ svm_msg_q_t *vpp_event_queue;
+ u32 thread_index;
+} es_worker_t;
+typedef struct
+{
u32 app_index; /**< Server app index */
- u32 my_client_index; /**< API client handle */
- u32 node_index; /**< process node index for event scheduling */
/*
* Config params
*/
- u8 no_echo; /**< Don't echo traffic */
+ hs_test_cfg_t cfg;
u32 fifo_size; /**< Fifo size */
u32 rcv_buffer_size; /**< Rcv buffer size */
u32 prealloc_fifos; /**< Preallocate fifos */
u32 private_segment_count; /**< Number of private segments */
- u32 private_segment_size; /**< Size of private segments */
+ u64 private_segment_size; /**< Size of private segments */
char *server_uri; /**< Server URI */
u32 tls_engine; /**< TLS engine: mbedtls/openssl */
u32 ckpair_index; /**< Cert and key for tls/quic */
- u8 is_dgram; /**< set if transport is dgram */
/*
* Test state
*/
- u8 **rx_buf; /**< Per-thread RX buffer */
- u64 byte_index;
- u32 **rx_retries;
+ es_worker_t *wrk;
+ int (*rx_callback) (session_t *session);
u8 transport_proto;
u64 listener_handle; /**< Session handle of the root listener */
+ u64 ctrl_listener_handle;
vlib_main_t *vlib_main;
} echo_server_main_t;
echo_server_main_t echo_server_main;
+#define es_err(_fmt, _args...) clib_warning (_fmt, ##_args);
+
+#define es_dbg(_fmt, _args...) \
+ do \
+ { \
+ if (PREDICT_FALSE (echo_server_main.cfg.verbose)) \
+ es_err (_fmt, ##_args); \
+ } \
+ while (0)
+
+#define es_cli(_fmt, _args...) vlib_cli_output (vm, _fmt, ##_args)
+
+static inline es_worker_t *
+es_worker_get (u32 thread_index)
+{
+ return vec_elt_at_index (echo_server_main.wrk, thread_index);
+}
+
+static inline es_session_t *
+es_session_alloc (es_worker_t *wrk)
+{
+ es_session_t *es;
+
+ pool_get_zero (wrk->sessions, es);
+ es->session_index = es - wrk->sessions;
+ return es;
+}
+
+static inline es_session_t *
+es_session_get (es_worker_t *wrk, u32 es_index)
+{
+ return pool_elt_at_index (wrk->sessions, es_index);
+}
+
int
quic_echo_server_qsession_accept_callback (session_t * s)
{
- DBG ("QSession %u accept w/opaque %d", s->session_index, s->opaque);
+ es_dbg ("QSession %u accept w/opaque %d", s->session_index, s->opaque);
return 0;
}
+static int
+echo_server_ctrl_session_accept_callback (session_t *s)
+{
+ s->session_state = SESSION_STATE_READY;
+ return 0;
+}
+
+static void
+es_session_alloc_and_init (session_t *s)
+{
+ es_session_t *es;
+ es_worker_t *wrk = es_worker_get (s->thread_index);
+
+ es = es_session_alloc (wrk);
+ hs_test_app_session_init (es, s);
+ es->vpp_session_index = s->session_index;
+ es->vpp_session_handle = session_handle (s);
+ s->opaque = es->session_index;
+}
+
int
quic_echo_server_session_accept_callback (session_t * s)
{
echo_server_main_t *esm = &echo_server_main;
+
+ if (PREDICT_FALSE (esm->ctrl_listener_handle == s->listener_handle))
+ return echo_server_ctrl_session_accept_callback (s);
+
if (s->listener_handle == esm->listener_handle)
return quic_echo_server_qsession_accept_callback (s);
- DBG ("SSESSION %u accept w/opaque %d", s->session_index, s->opaque);
- esm->vpp_queue[s->thread_index] =
- session_main_get_vpp_event_queue (s->thread_index);
+ es_dbg ("SSESSION %u accept w/opaque %d", s->session_index, s->opaque);
+
s->session_state = SESSION_STATE_READY;
- esm->byte_index = 0;
- ASSERT (vec_len (esm->rx_retries) > s->thread_index);
- vec_validate (esm->rx_retries[s->thread_index], s->session_index);
- esm->rx_retries[s->thread_index][s->session_index] = 0;
+ es_session_alloc_and_init (s);
return 0;
}
@@ -93,13 +157,12 @@ int
echo_server_session_accept_callback (session_t * s)
{
echo_server_main_t *esm = &echo_server_main;
- esm->vpp_queue[s->thread_index] =
- session_main_get_vpp_event_queue (s->thread_index);
+
+ if (PREDICT_FALSE (esm->ctrl_listener_handle == s->listener_handle))
+ return echo_server_ctrl_session_accept_callback (s);
+
s->session_state = SESSION_STATE_READY;
- esm->byte_index = 0;
- ASSERT (vec_len (esm->rx_retries) > s->thread_index);
- vec_validate (esm->rx_retries[s->thread_index], s->session_index);
- esm->rx_retries[s->thread_index][s->session_index] = 0;
+ es_session_alloc_and_init (s);
return 0;
}
@@ -119,7 +182,7 @@ echo_server_session_reset_callback (session_t * s)
{
echo_server_main_t *esm = &echo_server_main;
vnet_disconnect_args_t _a = { 0 }, *a = &_a;
- clib_warning ("Reset session %U", format_session, s, 2);
+ es_dbg ("Reset session %U", format_session, s, 2);
a->handle = session_handle (s);
a->app_index = esm->app_index;
vnet_disconnect_session (a);
@@ -129,7 +192,7 @@ int
echo_server_session_connected_callback (u32 app_index, u32 api_context,
session_t * s, session_error_t err)
{
- clib_warning ("called...");
+ es_err ("called...");
return -1;
}
@@ -143,26 +206,135 @@ echo_server_add_segment_callback (u32 client_index, u64 segment_handle)
int
echo_server_redirect_connect_callback (u32 client_index, void *mp)
{
- clib_warning ("called...");
+ es_err ("called...");
return -1;
}
-void
-test_bytes (echo_server_main_t * esm, int actual_transfer)
+static void
+es_foreach_thread (void *fp)
{
- int i;
- u32 my_thread_id = vlib_get_thread_index ();
+ echo_server_main_t *esm = &echo_server_main;
+ uword thread_index;
+ for (thread_index = 0; thread_index < vec_len (esm->wrk); thread_index++)
+ {
+ session_send_rpc_evt_to_thread (thread_index, fp,
+ uword_to_pointer (thread_index, void *));
+ }
+}
- for (i = 0; i < actual_transfer; i++)
+static int
+es_wrk_prealloc_sessions (void *args)
+{
+ echo_server_main_t *esm = &echo_server_main;
+ u32 sessions_per_wrk, n_wrks, thread_index;
+
+ thread_index = pointer_to_uword (args);
+ es_worker_t *wrk = es_worker_get (thread_index);
+ n_wrks = vlib_num_workers () ? vlib_num_workers () : 1;
+ sessions_per_wrk = esm->cfg.num_test_sessions / n_wrks;
+ pool_alloc (wrk->sessions, 1.1 * sessions_per_wrk);
+ return 0;
+}
+
+static int
+echo_server_setup_test (hs_test_cfg_t *c)
+{
+ if (c->test == HS_TEST_TYPE_UNI)
+ es_set_echo_rx_callbacks (1 /* no echo */);
+ else
+ es_set_echo_rx_callbacks (0 /* no echo */);
+
+ es_foreach_thread (es_wrk_prealloc_sessions);
+ return 0;
+}
+
+static void
+echo_server_ctrl_reply (session_t *s)
+{
+ echo_server_main_t *esm = &echo_server_main;
+ int rv;
+
+ rv = svm_fifo_enqueue (s->tx_fifo, sizeof (esm->cfg), (u8 *) &esm->cfg);
+ ASSERT (rv == sizeof (esm->cfg));
+ session_send_io_evt_to_thread_custom (&s->session_index, s->thread_index,
+ SESSION_IO_EVT_TX);
+}
+
+static int
+es_test_cmd_sync (echo_server_main_t *esm, session_t *s)
+{
+ int rv;
+
+ rv = echo_server_setup_test (&esm->cfg);
+ if (rv)
+ es_err ("setup test error!");
+
+ echo_server_ctrl_reply (s);
+ return 0;
+}
+
+static int
+es_wrk_cleanup_sessions (void *args)
+{
+ echo_server_main_t *esm = &echo_server_main;
+ vnet_disconnect_args_t _a = {}, *a = &_a;
+ u32 thread_index = pointer_to_uword (args);
+ es_session_t *es;
+ es_worker_t *wrk;
+
+ wrk = es_worker_get (thread_index);
+ a->app_index = esm->app_index;
+
+ pool_foreach (es, wrk->sessions)
+ {
+ a->handle = es->vpp_session_handle;
+ vnet_disconnect_session (a);
+ }
+ pool_free (wrk->sessions);
+
+ return 0;
+}
+
+static int
+echo_server_rx_ctrl_callback (session_t *s)
+{
+ echo_server_main_t *esm = &echo_server_main;
+ int rv;
+
+ rv = svm_fifo_dequeue (s->rx_fifo, sizeof (esm->cfg), (u8 *) &esm->cfg);
+ ASSERT (rv == sizeof (esm->cfg));
+
+ es_dbg ("control message received:");
+ if (esm->cfg.verbose)
+ hs_test_cfg_dump (&esm->cfg, 0);
+
+ switch (esm->cfg.cmd)
{
- if (esm->rx_buf[my_thread_id][i] != ((esm->byte_index + i) & 0xff))
+ case HS_TEST_CMD_SYNC:
+ switch (esm->cfg.test)
{
- clib_warning ("at %lld expected %d got %d", esm->byte_index + i,
- (esm->byte_index + i) & 0xff,
- esm->rx_buf[my_thread_id][i]);
+ case HS_TEST_TYPE_ECHO:
+ case HS_TEST_TYPE_NONE:
+ es_foreach_thread (es_wrk_cleanup_sessions);
+ echo_server_ctrl_reply (s);
+ break;
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
+ return es_test_cmd_sync (esm, s);
+ break;
+ default:
+ es_err ("unknown command type! %d", esm->cfg.cmd);
}
+ break;
+ case HS_TEST_CMD_START:
+ case HS_TEST_CMD_STOP:
+ echo_server_ctrl_reply (s);
+ break;
+ default:
+ es_err ("unknown command! %d", esm->cfg.cmd);
+ break;
}
- esm->byte_index += actual_transfer;
+ return 0;
}
/*
@@ -171,11 +343,30 @@ test_bytes (echo_server_main_t * esm, int actual_transfer)
int
echo_server_builtin_server_rx_callback_no_echo (session_t * s)
{
+ echo_server_main_t *esm = &echo_server_main;
+ if (PREDICT_FALSE (esm->ctrl_listener_handle == s->listener_handle))
+ return echo_server_rx_ctrl_callback (s);
+
svm_fifo_t *rx_fifo = s->rx_fifo;
svm_fifo_dequeue_drop (rx_fifo, svm_fifo_max_dequeue_cons (rx_fifo));
return 0;
}
+static void
+es_test_bytes (es_worker_t *wrk, es_session_t *es, int actual_transfer)
+{
+ int i;
+ for (i = 0; i < actual_transfer; i++)
+ {
+ if (wrk->rx_buf[i] != ((es->byte_index + i) & 0xff))
+ {
+ es_err ("at %lld expected %d got %d", es->byte_index + i,
+ (es->byte_index + i) & 0xff, wrk->rx_buf[i]);
+ }
+ }
+ es->byte_index += actual_transfer;
+}
+
int
echo_server_rx_callback (session_t * s)
{
@@ -184,7 +375,8 @@ echo_server_rx_callback (session_t * s)
svm_fifo_t *tx_fifo, *rx_fifo;
echo_server_main_t *esm = &echo_server_main;
u32 thread_index = vlib_get_thread_index ();
- app_session_transport_t at;
+ es_worker_t *wrk;
+ es_session_t *es;
ASSERT (s->thread_index == thread_index);
@@ -194,24 +386,25 @@ echo_server_rx_callback (session_t * s)
ASSERT (rx_fifo->master_thread_index == thread_index);
ASSERT (tx_fifo->master_thread_index == thread_index);
+ if (PREDICT_FALSE (esm->ctrl_listener_handle == s->listener_handle))
+ return echo_server_rx_ctrl_callback (s);
+
+ wrk = es_worker_get (thread_index);
max_enqueue = svm_fifo_max_enqueue_prod (tx_fifo);
- if (!esm->is_dgram)
- {
- max_dequeue = svm_fifo_max_dequeue_cons (rx_fifo);
- }
- else
+ es = es_session_get (wrk, s->opaque);
+
+ if (es->is_dgram)
{
session_dgram_pre_hdr_t ph;
svm_fifo_peek (rx_fifo, 0, sizeof (ph), (u8 *) & ph);
max_dequeue = ph.data_length - ph.data_offset;
- if (!esm->vpp_queue[s->thread_index])
- {
- svm_msg_q_t *mq;
- mq = session_main_get_vpp_event_queue (s->thread_index);
- esm->vpp_queue[s->thread_index] = mq;
- }
+ ASSERT (wrk->vpp_event_queue);
max_enqueue -= sizeof (session_dgram_hdr_t);
}
+ else
+ {
+ max_dequeue = svm_fifo_max_dequeue_cons (rx_fifo);
+ }
if (PREDICT_FALSE (max_dequeue == 0))
return 0;
@@ -228,65 +421,40 @@ echo_server_rx_callback (session_t * s)
/* Program self-tap to retry */
if (svm_fifo_set_event (rx_fifo))
{
+ /* TODO should be session_enqueue_notify(s) but quic tests seem
+ * to fail if that's the case */
if (session_send_io_evt_to_thread (rx_fifo,
SESSION_IO_EVT_BUILTIN_RX))
- clib_warning ("failed to enqueue self-tap");
+ es_err ("failed to enqueue self-tap");
- vec_validate (esm->rx_retries[s->thread_index], s->session_index);
- if (esm->rx_retries[thread_index][s->session_index] == 500000)
+ if (es->rx_retries == 500000)
{
- clib_warning ("session stuck: %U", format_session, s, 2);
+ es_err ("session stuck: %U", format_session, s, 2);
}
- if (esm->rx_retries[thread_index][s->session_index] < 500001)
- esm->rx_retries[thread_index][s->session_index]++;
+ if (es->rx_retries < 500001)
+ es->rx_retries++;
}
return 0;
}
- vec_validate (esm->rx_buf[thread_index], max_transfer);
- if (!esm->is_dgram)
- {
- actual_transfer = app_recv_stream_raw (rx_fifo,
- esm->rx_buf[thread_index],
- max_transfer,
- 0 /* don't clear event */ ,
- 0 /* peek */ );
- }
- else
+ vec_validate (wrk->rx_buf, max_transfer);
+ actual_transfer = app_recv ((app_session_t *) es, wrk->rx_buf, max_transfer);
+ ASSERT (actual_transfer == max_transfer);
+
+ if (esm->cfg.test_bytes)
{
- actual_transfer = app_recv_dgram_raw (rx_fifo,
- esm->rx_buf[thread_index],
- max_transfer, &at,
- 0 /* don't clear event */ ,
- 0 /* peek */ );
+ es_test_bytes (wrk, es, actual_transfer);
}
- ASSERT (actual_transfer == max_transfer);
- /* test_bytes (esm, actual_transfer); */
/*
* Echo back
*/
- if (!esm->is_dgram)
- {
- n_written = app_send_stream_raw (tx_fifo,
- esm->vpp_queue[thread_index],
- esm->rx_buf[thread_index],
- actual_transfer, SESSION_IO_EVT_TX,
- 1 /* do_evt */ , 0);
- }
- else
- {
- n_written = app_send_dgram_raw (tx_fifo, &at,
- esm->vpp_queue[s->thread_index],
- esm->rx_buf[thread_index],
- actual_transfer, SESSION_IO_EVT_TX,
- 1 /* do_evt */ , 0);
- }
+ n_written = app_send ((app_session_t *) es, wrk->rx_buf, actual_transfer, 0);
if (n_written != max_transfer)
- clib_warning ("short trout! written %u read %u", n_written, max_transfer);
+ es_err ("short trout! written %u read %u", n_written, max_transfer);
if (PREDICT_FALSE (svm_fifo_max_dequeue_cons (rx_fifo)))
goto rx_event;
@@ -294,15 +462,32 @@ echo_server_rx_callback (session_t * s)
return 0;
}
+int
+echo_server_rx_callback_common (session_t *s)
+{
+ echo_server_main_t *esm = &echo_server_main;
+ return esm->rx_callback (s);
+}
+
static session_cb_vft_t echo_server_session_cb_vft = {
.session_accept_callback = echo_server_session_accept_callback,
.session_disconnect_callback = echo_server_session_disconnect_callback,
.session_connected_callback = echo_server_session_connected_callback,
.add_segment_callback = echo_server_add_segment_callback,
- .builtin_app_rx_callback = echo_server_rx_callback,
+ .builtin_app_rx_callback = echo_server_rx_callback_common,
.session_reset_callback = echo_server_session_reset_callback
};
+static void
+es_set_echo_rx_callbacks (u8 no_echo)
+{
+ echo_server_main_t *esm = &echo_server_main;
+ if (no_echo)
+ esm->rx_callback = echo_server_builtin_server_rx_callback_no_echo;
+ else
+ esm->rx_callback = echo_server_rx_callback;
+}
+
static int
echo_server_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret)
{
@@ -310,30 +495,22 @@ echo_server_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret)
echo_server_main_t *esm = &echo_server_main;
vnet_app_attach_args_t _a, *a = &_a;
u64 options[APP_OPTIONS_N_OPTIONS];
- u32 segment_size = 512 << 20;
clib_memset (a, 0, sizeof (*a));
clib_memset (options, 0, sizeof (options));
- if (esm->no_echo)
- echo_server_session_cb_vft.builtin_app_rx_callback =
- echo_server_builtin_server_rx_callback_no_echo;
- else
- echo_server_session_cb_vft.builtin_app_rx_callback =
- echo_server_rx_callback;
+ esm->rx_callback = echo_server_rx_callback;
+
if (esm->transport_proto == TRANSPORT_PROTO_QUIC)
echo_server_session_cb_vft.session_accept_callback =
quic_echo_server_session_accept_callback;
- if (esm->private_segment_size)
- segment_size = esm->private_segment_size;
-
a->api_client_index = ~0;
a->name = format (0, "echo_server");
a->session_cb_vft = &echo_server_session_cb_vft;
a->options = options;
- a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
- a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size;
+ a->options[APP_OPTIONS_SEGMENT_SIZE] = esm->private_segment_size;
+ a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = esm->private_segment_size;
a->options[APP_OPTIONS_RX_FIFO_SIZE] = esm->fifo_size;
a->options[APP_OPTIONS_TX_FIFO_SIZE] = esm->fifo_size;
a->options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = esm->private_segment_count;
@@ -352,7 +529,7 @@ echo_server_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret)
if (vnet_application_attach (a))
{
- clib_warning ("failed to attach server");
+ es_err ("failed to attach server");
return -1;
}
esm->app_index = a->app_index;
@@ -392,19 +569,35 @@ echo_client_transport_needs_crypto (transport_proto_t proto)
}
static int
+echo_server_listen_ctrl ()
+{
+ echo_server_main_t *esm = &echo_server_main;
+ vnet_listen_args_t _args = {}, *args = &_args;
+ session_error_t rv;
+
+ if ((rv = parse_uri (esm->server_uri, &args->sep_ext)))
+ return -1;
+ args->sep_ext.transport_proto = TRANSPORT_PROTO_TCP;
+ args->app_index = esm->app_index;
+
+ rv = vnet_listen (args);
+ esm->ctrl_listener_handle = args->handle;
+ return rv;
+}
+
+static int
echo_server_listen ()
{
i32 rv;
echo_server_main_t *esm = &echo_server_main;
- vnet_listen_args_t _args = { 0 }, *args = &_args;
-
- args->sep_ext.app_wrk_index = 0;
+ vnet_listen_args_t _args = {}, *args = &_args;
if ((rv = parse_uri (esm->server_uri, &args->sep_ext)))
{
return -1;
}
args->app_index = esm->app_index;
+ args->sep_ext.port = hs_make_data_port (args->sep_ext.port);
if (echo_client_transport_needs_crypto (args->sep_ext.transport_proto))
{
session_endpoint_alloc_ext_cfg (&args->sep_ext,
@@ -430,30 +623,36 @@ echo_server_create (vlib_main_t * vm, u8 * appns_id, u64 appns_flags,
{
echo_server_main_t *esm = &echo_server_main;
vlib_thread_main_t *vtm = vlib_get_thread_main ();
- u32 num_threads;
- int i;
+ es_worker_t *wrk;
- num_threads = 1 /* main thread */ + vtm->n_threads;
- vec_validate (echo_server_main.vpp_queue, num_threads - 1);
- vec_validate (esm->rx_buf, num_threads - 1);
- vec_validate (esm->rx_retries, num_threads - 1);
- for (i = 0; i < vec_len (esm->rx_retries); i++)
- vec_validate (esm->rx_retries[i],
- pool_elts (session_main.wrk[i].sessions));
esm->rcv_buffer_size = clib_max (esm->rcv_buffer_size, esm->fifo_size);
- for (i = 0; i < num_threads; i++)
- vec_validate (esm->rx_buf[i], esm->rcv_buffer_size);
+ vec_validate (esm->wrk, vtm->n_threads);
+
+ vec_foreach (wrk, esm->wrk)
+ {
+ wrk->thread_index = wrk - esm->wrk;
+ vec_validate (wrk->rx_buf, esm->rcv_buffer_size);
+ wrk->vpp_event_queue =
+ session_main_get_vpp_event_queue (wrk->thread_index);
+ }
if (echo_server_attach (appns_id, appns_flags, appns_secret))
{
- clib_warning ("failed to attach server");
+ es_err ("failed to attach server");
+ return -1;
+ }
+ if (echo_server_listen_ctrl ())
+ {
+ es_err ("failed to start listening on ctrl session");
+ if (echo_server_detach ())
+ es_err ("failed to detach");
return -1;
}
if (echo_server_listen ())
{
- clib_warning ("failed to start listening");
+ es_err ("failed to start listening");
if (echo_server_detach ())
- clib_warning ("failed to detach");
+ es_err ("failed to detach");
return -1;
}
return 0;
@@ -466,27 +665,16 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
echo_server_main_t *esm = &echo_server_main;
u8 server_uri_set = 0, *appns_id = 0;
- u64 tmp, appns_flags = 0, appns_secret = 0;
+ u64 appns_flags = 0, appns_secret = 0;
char *default_uri = "tcp://0.0.0.0/1234";
- int rv, is_stop = 0, barrier_acq_needed = 0;
+ int rv, is_stop = 0;
clib_error_t *error = 0;
- /* The request came over the binary api and the inband cli handler
- * is not mp_safe. Drop the barrier to make sure the workers are not
- * blocked.
- */
- if (vlib_num_workers () && vlib_thread_is_main_w_barrier ())
- {
- barrier_acq_needed = 1;
- vlib_worker_thread_barrier_release (vm);
- }
-
- esm->no_echo = 0;
esm->fifo_size = 64 << 10;
esm->rcv_buffer_size = 128 << 10;
esm->prealloc_fifos = 0;
esm->private_segment_count = 0;
- esm->private_segment_size = 0;
+ esm->private_segment_size = 512 << 20;
esm->tls_engine = CRYPTO_ENGINE_OPENSSL;
vec_free (esm->server_uri);
@@ -494,10 +682,9 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
if (unformat (input, "uri %s", &esm->server_uri))
server_uri_set = 1;
- else if (unformat (input, "no-echo"))
- esm->no_echo = 1;
- else if (unformat (input, "fifo-size %d", &esm->fifo_size))
- esm->fifo_size <<= 10;
+ else if (unformat (input, "fifo-size %U", unformat_memory_size,
+ &esm->fifo_size))
+ ;
else if (unformat (input, "rcv-buf-size %d", &esm->rcv_buffer_size))
;
else if (unformat (input, "prealloc-fifos %d", &esm->prealloc_fifos))
@@ -506,16 +693,8 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
&esm->private_segment_count))
;
else if (unformat (input, "private-segment-size %U",
- unformat_memory_size, &tmp))
- {
- if (tmp >= 0x100000000ULL)
- {
- error = clib_error_return (
- 0, "private segment size %lld (%llu) too large", tmp, tmp);
- goto cleanup;
- }
- esm->private_segment_size = tmp;
- }
+ unformat_memory_size, &esm->private_segment_size))
+ ;
else if (unformat (input, "appns %_%v%_", &appns_id))
;
else if (unformat (input, "all-scope"))
@@ -543,14 +722,14 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
if (esm->app_index == (u32) ~ 0)
{
- clib_warning ("server not running");
+ es_cli ("server not running");
error = clib_error_return (0, "failed: server not running");
goto cleanup;
}
rv = echo_server_detach ();
if (rv)
{
- clib_warning ("failed: detach");
+ es_cli ("failed: detach");
error = clib_error_return (0, "failed: server detach %d", rv);
goto cleanup;
}
@@ -561,7 +740,7 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (!server_uri_set)
{
- clib_warning ("No uri provided! Using default: %s", default_uri);
+ es_cli ("No uri provided! Using default: %s", default_uri);
esm->server_uri = (char *) format (0, "%s%c", default_uri, 0);
}
@@ -571,7 +750,6 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
goto cleanup;
}
esm->transport_proto = sep.transport_proto;
- esm->is_dgram = (sep.transport_proto == TRANSPORT_PROTO_UDP);
rv = echo_server_create (vm, appns_id, appns_flags, appns_secret);
if (rv)
@@ -584,29 +762,22 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
cleanup:
vec_free (appns_id);
- if (barrier_acq_needed)
- vlib_worker_thread_barrier_sync (vm);
-
return error;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (echo_server_create_command, static) =
-{
+VLIB_CLI_COMMAND (echo_server_create_command, static) = {
.path = "test echo server",
- .short_help = "test echo server proto <proto> [no echo][fifo-size <mbytes>]"
- "[rcv-buf-size <bytes>][prealloc-fifos <count>]"
- "[private-segment-count <count>][private-segment-size <bytes[m|g]>]"
- "[uri <tcp://ip/port>]",
+ .short_help =
+ "test echo server proto <proto> [fifo-size <mbytes>]"
+ "[rcv-buf-size <bytes>][prealloc-fifos <count>]"
+ "[private-segment-count <count>][private-segment-size <bytes[m|g]>]"
+ "[uri <tcp://ip/port>]",
.function = echo_server_create_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
echo_server_main_init (vlib_main_t * vm)
{
- echo_server_main_t *esm = &echo_server_main;
- esm->my_client_index = ~0;
return 0;
}
diff --git a/src/plugins/hs_apps/hs_apps.c b/src/plugins/hs_apps/hs_apps.c
index 5067919cc28..8e991954c7e 100644
--- a/src/plugins/hs_apps/hs_apps.c
+++ b/src/plugins/hs_apps/hs_apps.c
@@ -17,13 +17,11 @@
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Host Stack Applications",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/hs_apps/hs_test.h b/src/plugins/hs_apps/hs_test.h
new file mode 100644
index 00000000000..167c7957229
--- /dev/null
+++ b/src/plugins/hs_apps/hs_test.h
@@ -0,0 +1,212 @@
+/*
+ * hs_test.h
+ *
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_hs_test_t__
+#define __included_hs_test_t__
+
+#include <vnet/session/application_interface.h>
+#include <vnet/session/session.h>
+
+#define HS_TEST_CFG_CTRL_MAGIC 0xfeedface
+#define HS_TEST_CFG_TXBUF_SIZE_DEF 8192
+#define HS_TEST_CFG_RXBUF_SIZE_DEF (64 * HS_TEST_CFG_TXBUF_SIZE_DEF)
+#define HS_TEST_CFG_NUM_WRITES_DEF 1000000
+
+#define VCL_TEST_TOKEN_HELP "#H"
+#define VCL_TEST_TOKEN_EXIT "#X"
+#define VCL_TEST_TOKEN_VERBOSE "#V"
+#define VCL_TEST_TOKEN_TXBUF_SIZE "#T:"
+#define VCL_TEST_TOKEN_NUM_TEST_SESS "#I:"
+#define VCL_TEST_TOKEN_NUM_WRITES "#N:"
+#define VCL_TEST_TOKEN_RXBUF_SIZE "#R:"
+#define VCL_TEST_TOKEN_SHOW_CFG "#C"
+#define HS_TEST_TOKEN_RUN_UNI "#U"
+#define HS_TEST_TOKEN_RUN_BI "#B"
+
+#define HS_TEST_SEPARATOR_STRING " -----------------------------\n"
+
+#define HS_CTRL_HANDLE (~0)
+
+typedef enum
+{
+ HS_TEST_CMD_SYNC,
+ HS_TEST_CMD_START,
+ HS_TEST_CMD_STOP,
+} hs_test_cmd_t;
+
+typedef enum
+{
+ HS_TEST_TYPE_NONE,
+ HS_TEST_TYPE_ECHO,
+ HS_TEST_TYPE_UNI,
+ HS_TEST_TYPE_BI,
+ HS_TEST_TYPE_EXIT,
+ HS_TEST_TYPE_EXIT_CLIENT,
+} hs_test_t;
+
+typedef struct __attribute__ ((packed))
+{
+ uint32_t magic;
+ uint32_t seq_num;
+ uint32_t test;
+ uint32_t cmd;
+ uint32_t ctrl_handle;
+ uint32_t num_test_sessions;
+ uint32_t num_test_sessions_perq;
+ uint32_t num_test_qsessions;
+ uint32_t verbose;
+ uint32_t address_ip6;
+ uint32_t transport_udp;
+ uint64_t rxbuf_size;
+ uint64_t txbuf_size;
+ uint64_t num_writes;
+ uint64_t total_bytes;
+ uint32_t test_bytes;
+} hs_test_cfg_t;
+
+static inline char *
+hs_test_type_str (hs_test_t t)
+{
+ switch (t)
+ {
+ case HS_TEST_TYPE_NONE:
+ return "NONE";
+
+ case HS_TEST_TYPE_ECHO:
+ return "ECHO";
+
+ case HS_TEST_TYPE_UNI:
+ return "UNI";
+
+ case HS_TEST_TYPE_BI:
+ return "BI";
+
+ case HS_TEST_TYPE_EXIT:
+ return "EXIT";
+
+ default:
+ return "Unknown";
+ }
+}
+
+static inline int
+hs_test_cfg_verify (hs_test_cfg_t *cfg, hs_test_cfg_t *valid_cfg)
+{
+ /* Note: txbuf & rxbuf on server are the same buffer,
+ * so txbuf_size is not included in this check.
+ */
+ return ((cfg->magic == valid_cfg->magic) && (cfg->test == valid_cfg->test) &&
+ (cfg->verbose == valid_cfg->verbose) &&
+ (cfg->rxbuf_size == valid_cfg->rxbuf_size) &&
+ (cfg->num_writes == valid_cfg->num_writes) &&
+ (cfg->total_bytes == valid_cfg->total_bytes));
+}
+
+static inline void
+hs_test_cfg_init (hs_test_cfg_t *cfg)
+{
+ cfg->magic = HS_TEST_CFG_CTRL_MAGIC;
+ cfg->test = HS_TEST_TYPE_UNI;
+ cfg->ctrl_handle = ~0;
+ cfg->num_test_sessions = 1;
+ cfg->num_test_sessions_perq = 1;
+ cfg->verbose = 0;
+ cfg->rxbuf_size = HS_TEST_CFG_RXBUF_SIZE_DEF;
+ cfg->num_writes = HS_TEST_CFG_NUM_WRITES_DEF;
+ cfg->txbuf_size = HS_TEST_CFG_TXBUF_SIZE_DEF;
+ cfg->total_bytes = cfg->num_writes * cfg->txbuf_size;
+ cfg->test_bytes = 0;
+}
+
+static inline char *
+hs_test_cmd_to_str (int cmd)
+{
+ switch (cmd)
+ {
+ case HS_TEST_CMD_SYNC:
+ return "SYNC";
+ case HS_TEST_CMD_START:
+ return "START";
+ case HS_TEST_CMD_STOP:
+ return "STOP";
+ }
+ return "";
+}
+
+static inline void
+hs_test_cfg_dump (hs_test_cfg_t *cfg, uint8_t is_client)
+{
+ char *spc = " ";
+
+ printf (" test config (%p):\n" HS_TEST_SEPARATOR_STRING
+ " command: %s\n"
+ " magic: 0x%08x\n"
+ " seq_num: 0x%08x\n"
+ " test bytes: %s\n"
+ "%-5s test: %s (%d)\n"
+ " ctrl handle: %d (0x%x)\n"
+ "%-5s num test sockets: %u (0x%08x)\n"
+ "%-5s verbose: %s (%d)\n"
+ "%-5s rxbuf size: %lu (0x%08lx)\n"
+ "%-5s txbuf size: %lu (0x%08lx)\n"
+ "%-5s num writes: %lu (0x%08lx)\n"
+ " client tx bytes: %lu (0x%08lx)\n" HS_TEST_SEPARATOR_STRING,
+ (void *) cfg, hs_test_cmd_to_str (cfg->cmd), cfg->magic,
+ cfg->seq_num, cfg->test_bytes ? "yes" : "no",
+ is_client && (cfg->test == HS_TEST_TYPE_UNI) ?
+ "'" HS_TEST_TOKEN_RUN_UNI "'" :
+ is_client && (cfg->test == HS_TEST_TYPE_BI) ?
+ "'" HS_TEST_TOKEN_RUN_BI "'" :
+ spc,
+ hs_test_type_str (cfg->test), cfg->test, cfg->ctrl_handle,
+ cfg->ctrl_handle,
+ is_client ? "'" VCL_TEST_TOKEN_NUM_TEST_SESS "'" : spc,
+ cfg->num_test_sessions, cfg->num_test_sessions,
+ is_client ? "'" VCL_TEST_TOKEN_VERBOSE "'" : spc,
+ cfg->verbose ? "on" : "off", cfg->verbose,
+ is_client ? "'" VCL_TEST_TOKEN_RXBUF_SIZE "'" : spc, cfg->rxbuf_size,
+ cfg->rxbuf_size, is_client ? "'" VCL_TEST_TOKEN_TXBUF_SIZE "'" : spc,
+ cfg->txbuf_size, cfg->txbuf_size,
+ is_client ? "'" VCL_TEST_TOKEN_NUM_WRITES "'" : spc, cfg->num_writes,
+ cfg->num_writes, cfg->total_bytes, cfg->total_bytes);
+}
+
+static inline u16
+hs_make_data_port (u16 p)
+{
+ p = clib_net_to_host_u16 (p);
+ return clib_host_to_net_u16 (p + 1);
+}
+
+static inline void
+hs_test_app_session_init_ (app_session_t *as, session_t *s)
+{
+ as->rx_fifo = s->rx_fifo;
+ as->tx_fifo = s->tx_fifo;
+ as->vpp_evt_q = session_main_get_vpp_event_queue (s->thread_index);
+ if (session_get_transport_proto (s) == TRANSPORT_PROTO_UDP)
+ {
+ transport_connection_t *tc;
+ tc = session_get_transport (s);
+ clib_memcpy_fast (&as->transport, tc, sizeof (as->transport));
+ as->is_dgram = 1;
+ }
+}
+
+#define hs_test_app_session_init(_as, _s) \
+ hs_test_app_session_init_ ((app_session_t *) (_as), (_s))
+
+#endif /* __included_hs_test_t__ */
diff --git a/src/plugins/hs_apps/http_cli.c b/src/plugins/hs_apps/http_cli.c
new file mode 100644
index 00000000000..5d4d49c0fba
--- /dev/null
+++ b/src/plugins/hs_apps/http_cli.c
@@ -0,0 +1,676 @@
+/*
+* Copyright (c) 2017-2019 Cisco and/or its affiliates.
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at:
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#include <vnet/session/application.h>
+#include <vnet/session/application_interface.h>
+#include <vnet/session/session.h>
+#include <http/http.h>
+
+typedef struct
+{
+ u32 hs_index;
+ u32 thread_index;
+ u64 node_index;
+ u8 *buf;
+} hcs_cli_args_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 session_index;
+ u32 thread_index;
+ u8 *tx_buf;
+ u32 tx_offset;
+ u32 vpp_session_index;
+} hcs_session_t;
+
+typedef struct
+{
+ hcs_session_t **sessions;
+ u32 *free_http_cli_process_node_indices;
+ u32 app_index;
+
+ /* Cert key pair for tls */
+ u32 ckpair_index;
+
+ u32 prealloc_fifos;
+ u32 private_segment_size;
+ u32 fifo_size;
+ u8 *uri;
+ vlib_main_t *vlib_main;
+} hcs_main_t;
+
+static hcs_main_t hcs_main;
+
+static hcs_session_t *
+hcs_session_alloc (u32 thread_index)
+{
+ hcs_main_t *hcm = &hcs_main;
+ hcs_session_t *hs;
+ pool_get (hcm->sessions[thread_index], hs);
+ memset (hs, 0, sizeof (*hs));
+ hs->session_index = hs - hcm->sessions[thread_index];
+ hs->thread_index = thread_index;
+ return hs;
+}
+
+static hcs_session_t *
+hcs_session_get (u32 thread_index, u32 hs_index)
+{
+ hcs_main_t *hcm = &hcs_main;
+ if (pool_is_free_index (hcm->sessions[thread_index], hs_index))
+ return 0;
+ return pool_elt_at_index (hcm->sessions[thread_index], hs_index);
+}
+
+static void
+hcs_session_free (hcs_session_t *hs)
+{
+ hcs_main_t *hcm = &hcs_main;
+ u32 thread = hs->thread_index;
+ if (CLIB_DEBUG)
+ memset (hs, 0xfa, sizeof (*hs));
+ pool_put (hcm->sessions[thread], hs);
+}
+
+static void
+hcs_cli_process_free (hcs_cli_args_t *args)
+{
+ vlib_main_t *vm = vlib_get_first_main ();
+ hcs_main_t *hcm = &hcs_main;
+ hcs_cli_args_t **save_args;
+ vlib_node_runtime_t *rt;
+ vlib_node_t *n;
+ u32 node_index;
+
+ node_index = args->node_index;
+ ASSERT (node_index != 0);
+
+ n = vlib_get_node (vm, node_index);
+ rt = vlib_node_get_runtime (vm, n->index);
+ save_args = vlib_node_get_runtime_data (vm, n->index);
+
+ /* Reset process session pointer */
+ clib_mem_free (*save_args);
+ *save_args = 0;
+
+ /* Turn off the process node */
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+
+ /* add node index to the freelist */
+ vec_add1 (hcm->free_http_cli_process_node_indices, node_index);
+}
+
+/* Header, including incantation to suppress favicon.ico requests */
+static const char *html_header_template =
+ "<html><head><title>%v</title></head>"
+ "<link rel=\"icon\" href=\"data:,\">"
+ "<body><pre>";
+
+static const char *html_footer =
+ "</pre></body></html>\r\n";
+
+static void
+hcs_cli_output (uword arg, u8 *buffer, uword buffer_bytes)
+{
+ u8 **output_vecp = (u8 **) arg;
+ u8 *output_vec;
+ u32 offset;
+
+ output_vec = *output_vecp;
+
+ offset = vec_len (output_vec);
+ vec_validate (output_vec, offset + buffer_bytes - 1);
+ clib_memcpy_fast (output_vec + offset, buffer, buffer_bytes);
+
+ *output_vecp = output_vec;
+}
+
+static void
+start_send_data (hcs_session_t *hs, http_status_code_t status)
+{
+ http_msg_t msg;
+ session_t *ts;
+ int rv;
+
+ msg.type = HTTP_MSG_REPLY;
+ msg.code = status;
+ msg.content_type = HTTP_CONTENT_TEXT_HTML;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.len = vec_len (hs->tx_buf);
+
+ ts = session_get (hs->vpp_session_index, hs->thread_index);
+ rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (!msg.data.len)
+ goto done;
+
+ rv = svm_fifo_enqueue (ts->tx_fifo, vec_len (hs->tx_buf), hs->tx_buf);
+
+ if (rv != vec_len (hs->tx_buf))
+ {
+ hs->tx_offset = rv;
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ }
+ else
+ {
+ vec_free (hs->tx_buf);
+ }
+
+done:
+
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+}
+
+static void
+send_data_to_http (void *rpc_args)
+{
+ hcs_cli_args_t *args = (hcs_cli_args_t *) rpc_args;
+ hcs_session_t *hs;
+
+ hs = hcs_session_get (args->thread_index, args->hs_index);
+ if (!hs)
+ {
+ vec_free (args->buf);
+ goto cleanup;
+ }
+
+ hs->tx_buf = args->buf;
+ start_send_data (hs, HTTP_STATUS_OK);
+
+cleanup:
+
+ clib_mem_free (rpc_args);
+}
+
+static uword
+hcs_cli_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ u8 *request = 0, *reply = 0, *html = 0;
+ hcs_cli_args_t *args, *rpc_args;
+ hcs_main_t *hcm = &hcs_main;
+ hcs_cli_args_t **save_args;
+ unformat_input_t input;
+ int i;
+
+ save_args = vlib_node_get_runtime_data (hcm->vlib_main, rt->node_index);
+ args = *save_args;
+
+ request = args->buf;
+
+ /* Replace slashes with spaces, stop at the end of the path */
+ i = 0;
+ while (i < vec_len (request))
+ {
+ if (request[i] == '/')
+ request[i] = ' ';
+ else if (request[i] == ' ')
+ {
+ /* vlib_cli_input is vector-based, no need for a NULL */
+ vec_set_len (request, i);
+ break;
+ }
+ i++;
+ }
+
+ /* Generate the html header */
+ html = format (0, html_header_template, request /* title */ );
+
+ /* Run the command */
+ unformat_init_vector (&input, vec_dup (request));
+ vlib_cli_input (vm, &input, hcs_cli_output, (uword) &reply);
+ unformat_free (&input);
+ request = 0;
+
+ /* Generate the html page */
+ html = format (html, "%v", reply);
+ html = format (html, html_footer);
+
+ /* Send it */
+ rpc_args = clib_mem_alloc (sizeof (*args));
+ clib_memcpy_fast (rpc_args, args, sizeof (*args));
+ rpc_args->buf = html;
+
+ session_send_rpc_evt_to_thread_force (args->thread_index, send_data_to_http,
+ rpc_args);
+
+ vec_free (reply);
+ vec_free (args->buf);
+ hcs_cli_process_free (args);
+
+ return (0);
+}
+
+static void
+alloc_cli_process (hcs_cli_args_t *args)
+{
+ hcs_main_t *hcm = &hcs_main;
+ vlib_main_t *vm = hcm->vlib_main;
+ hcs_cli_args_t **save_args;
+ vlib_node_t *n;
+ uword l;
+
+ l = vec_len (hcm->free_http_cli_process_node_indices);
+ if (l > 0)
+ {
+ n = vlib_get_node (vm, hcm->free_http_cli_process_node_indices[l - 1]);
+ vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
+ vec_set_len (hcm->free_http_cli_process_node_indices, l - 1);
+ }
+ else
+ {
+ static vlib_node_registration_t r = {
+ .function = hcs_cli_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .process_log2_n_stack_bytes = 16,
+ .runtime_data_bytes = sizeof (void *),
+ };
+
+ vlib_register_node (vm, &r, "http-cli-%d", l);
+
+ n = vlib_get_node (vm, r.index);
+ }
+
+ /* Save the node index in the args. It won't be zero. */
+ args->node_index = n->index;
+
+ /* Save the args (pointer) in the node runtime */
+ save_args = vlib_node_get_runtime_data (vm, n->index);
+ *save_args = clib_mem_alloc (sizeof (*args));
+ clib_memcpy_fast (*save_args, args, sizeof (*args));
+
+ vlib_start_process (vm, n->runtime_index);
+}
+
+static void
+alloc_cli_process_callback (void *cb_args)
+{
+ alloc_cli_process ((hcs_cli_args_t *) cb_args);
+}
+
+static int
+hcs_ts_rx_callback (session_t *ts)
+{
+ hcs_cli_args_t args = {};
+ hcs_session_t *hs;
+ http_msg_t msg;
+ int rv;
+
+ hs = hcs_session_get (ts->thread_index, ts->opaque);
+
+ /* Read the http message header */
+ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (msg.type != HTTP_MSG_REQUEST || msg.method_type != HTTP_REQ_GET)
+ {
+ hs->tx_buf = 0;
+ start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED);
+ return 0;
+ }
+
+ /* send the command to a new/recycled vlib process */
+ vec_validate (args.buf, msg.data.len - 1);
+ rv = svm_fifo_dequeue (ts->rx_fifo, msg.data.len, args.buf);
+ ASSERT (rv == msg.data.len);
+ vec_set_len (args.buf, rv);
+
+ args.hs_index = hs->session_index;
+ args.thread_index = ts->thread_index;
+
+ /* Send RPC request to main thread */
+ if (vlib_get_thread_index () != 0)
+ vlib_rpc_call_main_thread (alloc_cli_process_callback, (u8 *) &args,
+ sizeof (args));
+ else
+ alloc_cli_process (&args);
+ return 0;
+}
+
+static int
+hcs_ts_tx_callback (session_t *ts)
+{
+ hcs_session_t *hs;
+ u32 to_send;
+ int rv;
+
+ hs = hcs_session_get (ts->thread_index, ts->opaque);
+ if (!hs || !hs->tx_buf)
+ return 0;
+
+ to_send = vec_len (hs->tx_buf) - hs->tx_offset;
+ rv = svm_fifo_enqueue (ts->tx_fifo, to_send, hs->tx_buf + hs->tx_offset);
+
+ if (rv <= 0)
+ {
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ return 0;
+ }
+
+ if (rv < to_send)
+ {
+ hs->tx_offset += rv;
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ }
+ else
+ {
+ vec_free (hs->tx_buf);
+ }
+
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+
+ return 0;
+}
+
+static int
+hcs_ts_accept_callback (session_t *ts)
+{
+ hcs_session_t *hs;
+
+ hs = hcs_session_alloc (ts->thread_index);
+ hs->vpp_session_index = ts->session_index;
+
+ ts->opaque = hs->session_index;
+ ts->session_state = SESSION_STATE_READY;
+
+ return 0;
+}
+
+static int
+hcs_ts_connected_callback (u32 app_index, u32 api_context, session_t *s,
+ session_error_t err)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+static void
+hcs_ts_disconnect_callback (session_t *s)
+{
+ hcs_main_t *hcm = &hcs_main;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+
+ a->handle = session_handle (s);
+ a->app_index = hcm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static void
+hcs_ts_reset_callback (session_t *s)
+{
+ hcs_main_t *hcm = &hcs_main;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+
+ a->handle = session_handle (s);
+ a->app_index = hcm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static void
+hcs_ts_cleanup_callback (session_t *s, session_cleanup_ntf_t ntf)
+{
+ hcs_session_t *hs;
+
+ if (ntf == SESSION_CLEANUP_TRANSPORT)
+ return;
+
+ hs = hcs_session_get (s->thread_index, s->opaque);
+ if (!hs)
+ return;
+
+ vec_free (hs->tx_buf);
+ hcs_session_free (hs);
+}
+
+static int
+hcs_add_segment_callback (u32 client_index, u64 segment_handle)
+{
+ return 0;
+}
+
+static int
+hcs_del_segment_callback (u32 client_index, u64 segment_handle)
+{
+ return 0;
+}
+
+static session_cb_vft_t hcs_session_cb_vft = {
+ .session_accept_callback = hcs_ts_accept_callback,
+ .session_disconnect_callback = hcs_ts_disconnect_callback,
+ .session_connected_callback = hcs_ts_connected_callback,
+ .add_segment_callback = hcs_add_segment_callback,
+ .del_segment_callback = hcs_del_segment_callback,
+ .builtin_app_rx_callback = hcs_ts_rx_callback,
+ .builtin_app_tx_callback = hcs_ts_tx_callback,
+ .session_reset_callback = hcs_ts_reset_callback,
+ .session_cleanup_callback = hcs_ts_cleanup_callback,
+};
+
+static int
+hcs_attach ()
+{
+ vnet_app_add_cert_key_pair_args_t _ck_pair, *ck_pair = &_ck_pair;
+ hcs_main_t *hcm = &hcs_main;
+ u64 options[APP_OPTIONS_N_OPTIONS];
+ vnet_app_attach_args_t _a, *a = &_a;
+ u32 segment_size = 128 << 20;
+
+ clib_memset (a, 0, sizeof (*a));
+ clib_memset (options, 0, sizeof (options));
+
+ if (hcm->private_segment_size)
+ segment_size = hcm->private_segment_size;
+
+ a->api_client_index = ~0;
+ a->name = format (0, "http_cli_server");
+ a->session_cb_vft = &hcs_session_cb_vft;
+ a->options = options;
+ a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
+ a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size;
+ a->options[APP_OPTIONS_RX_FIFO_SIZE] =
+ hcm->fifo_size ? hcm->fifo_size : 8 << 10;
+ a->options[APP_OPTIONS_TX_FIFO_SIZE] =
+ hcm->fifo_size ? hcm->fifo_size : 32 << 10;
+ a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
+ a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hcm->prealloc_fifos;
+
+ if (vnet_application_attach (a))
+ {
+ vec_free (a->name);
+ clib_warning ("failed to attach server");
+ return -1;
+ }
+ vec_free (a->name);
+ hcm->app_index = a->app_index;
+
+ clib_memset (ck_pair, 0, sizeof (*ck_pair));
+ ck_pair->cert = (u8 *) test_srv_crt_rsa;
+ ck_pair->key = (u8 *) test_srv_key_rsa;
+ ck_pair->cert_len = test_srv_crt_rsa_len;
+ ck_pair->key_len = test_srv_key_rsa_len;
+ vnet_app_add_cert_key_pair (ck_pair);
+ hcm->ckpair_index = ck_pair->index;
+
+ return 0;
+}
+
+static int
+hcs_transport_needs_crypto (transport_proto_t proto)
+{
+ return proto == TRANSPORT_PROTO_TLS || proto == TRANSPORT_PROTO_DTLS ||
+ proto == TRANSPORT_PROTO_QUIC;
+}
+
+static int
+hcs_listen ()
+{
+ session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
+ hcs_main_t *hcm = &hcs_main;
+ vnet_listen_args_t _a, *a = &_a;
+ char *uri = "tcp://0.0.0.0/80";
+ u8 need_crypto;
+ int rv;
+
+ clib_memset (a, 0, sizeof (*a));
+ a->app_index = hcm->app_index;
+
+ if (hcm->uri)
+ uri = (char *) hcm->uri;
+
+ if (parse_uri (uri, &sep))
+ return -1;
+
+ need_crypto = hcs_transport_needs_crypto (sep.transport_proto);
+
+ sep.transport_proto = TRANSPORT_PROTO_HTTP;
+ clib_memcpy (&a->sep_ext, &sep, sizeof (sep));
+
+ if (need_crypto)
+ {
+ session_endpoint_alloc_ext_cfg (&a->sep_ext,
+ TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
+ a->sep_ext.ext_cfg->crypto.ckpair_index = hcm->ckpair_index;
+ }
+
+ rv = vnet_listen (a);
+
+ if (need_crypto)
+ clib_mem_free (a->sep_ext.ext_cfg);
+
+ return rv;
+}
+
+static void
+hcs_detach ()
+{
+ vnet_app_detach_args_t _a, *a = &_a;
+ hcs_main_t *hcm = &hcs_main;
+ a->app_index = hcm->app_index;
+ a->api_client_index = APP_INVALID_INDEX;
+ hcm->app_index = ~0;
+ vnet_application_detach (a);
+}
+
+static int
+hcs_create (vlib_main_t *vm)
+{
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ hcs_main_t *hcm = &hcs_main;
+ u32 num_threads;
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+ vec_validate (hcm->sessions, num_threads - 1);
+
+ if (hcs_attach ())
+ {
+ clib_warning ("failed to attach server");
+ return -1;
+ }
+ if (hcs_listen ())
+ {
+ hcs_detach ();
+ clib_warning ("failed to start listening");
+ return -1;
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+hcs_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ hcs_main_t *hcm = &hcs_main;
+ u64 seg_size;
+ int rv;
+
+ hcm->prealloc_fifos = 0;
+ hcm->private_segment_size = 0;
+ hcm->fifo_size = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ goto start_server;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "prealloc-fifos %d", &hcm->prealloc_fifos))
+ ;
+ else if (unformat (line_input, "private-segment-size %U",
+ unformat_memory_size, &seg_size))
+ hcm->private_segment_size = seg_size;
+ else if (unformat (line_input, "fifo-size %d", &hcm->fifo_size))
+ hcm->fifo_size <<= 10;
+ else if (unformat (line_input, "uri %s", &hcm->uri))
+ ;
+ else
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ }
+
+ unformat_free (line_input);
+
+start_server:
+
+ if (hcm->app_index != (u32) ~0)
+ return clib_error_return (0, "test http server is already running");
+
+ vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
+
+ rv = hcs_create (vm);
+ switch (rv)
+ {
+ case 0:
+ break;
+ default:
+ return clib_error_return (0, "server_create returned %d", rv);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (hcs_create_command, static) = {
+ .path = "http cli server",
+ .short_help = "http cli server [uri <uri>] [fifo-size <nbytes>] "
+ "[private-segment-size <nMG>] [prealloc-fifos <n>]",
+ .function = hcs_create_command_fn,
+};
+
+static clib_error_t *
+hcs_main_init (vlib_main_t *vm)
+{
+ hcs_main_t *hcs = &hcs_main;
+
+ hcs->app_index = ~0;
+ hcs->vlib_main = vm;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (hcs_main_init);
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/plugins/hs_apps/http_cli.h b/src/plugins/hs_apps/http_cli.h
new file mode 100644
index 00000000000..f774552d60f
--- /dev/null
+++ b/src/plugins/hs_apps/http_cli.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+const char *html_header_template = "<html><head><title>%v</title></head>"
+ "<link rel=\"icon\" href=\"data:,\">"
+ "<body><pre>";
+
+const char *html_footer = "</pre></body></html>\r\n";
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/hs_apps/http_client_cli.c b/src/plugins/hs_apps/http_client_cli.c
new file mode 100644
index 00000000000..085a2b69bf7
--- /dev/null
+++ b/src/plugins/hs_apps/http_client_cli.c
@@ -0,0 +1,555 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/session/application.h>
+#include <vnet/session/application_interface.h>
+#include <vnet/session/session.h>
+#include <http/http.h>
+#include <hs_apps/http_cli.h>
+
+#define HCC_DEBUG 0
+
+#if HCC_DEBUG
+#define HCC_DBG(_fmt, _args...) clib_warning (_fmt, ##_args)
+#else
+#define HCC_DBG(_fmt, _args...)
+#endif
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 session_index;
+ u32 thread_index;
+ u32 rx_offset;
+ u32 vpp_session_index;
+ u32 to_recv;
+ u8 is_closed;
+} hcc_session_t;
+
+typedef struct
+{
+ hcc_session_t *sessions;
+ u8 *rx_buf;
+ u32 thread_index;
+} hcc_worker_t;
+
+typedef struct
+{
+ hcc_worker_t *wrk;
+ u32 app_index;
+
+ u32 prealloc_fifos;
+ u32 private_segment_size;
+ u32 fifo_size;
+ u8 *uri;
+ u8 *http_query;
+ session_endpoint_cfg_t connect_sep;
+
+ u8 test_client_attached;
+ vlib_main_t *vlib_main;
+ u32 cli_node_index;
+ u8 *http_response;
+ u8 *appns_id;
+ u64 appns_secret;
+} hcc_main_t;
+
+typedef enum
+{
+ HCC_REPLY_RECEIVED = 100,
+} hcc_cli_signal_t;
+
+static hcc_main_t hcc_main;
+
+static hcc_worker_t *
+hcc_worker_get (u32 thread_index)
+{
+ return vec_elt_at_index (hcc_main.wrk, thread_index);
+}
+
+static hcc_session_t *
+hcc_session_alloc (hcc_worker_t *wrk)
+{
+ hcc_session_t *hs;
+ pool_get_zero (wrk->sessions, hs);
+ hs->session_index = hs - wrk->sessions;
+ hs->thread_index = wrk->thread_index;
+ return hs;
+}
+
+static hcc_session_t *
+hcc_session_get (u32 hs_index, u32 thread_index)
+{
+ hcc_worker_t *wrk = hcc_worker_get (thread_index);
+ return pool_elt_at_index (wrk->sessions, hs_index);
+}
+
+static void
+hcc_session_free (u32 thread_index, hcc_session_t *hs)
+{
+ hcc_worker_t *wrk = hcc_worker_get (thread_index);
+ pool_put (wrk->sessions, hs);
+}
+
+static int
+hcc_ts_accept_callback (session_t *ts)
+{
+ clib_warning ("bug");
+ return -1;
+}
+
+static void
+hcc_ts_disconnect_callback (session_t *s)
+{
+ hcc_main_t *hcm = &hcc_main;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+
+ a->handle = session_handle (s);
+ a->app_index = hcm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static int
+hcc_ts_connected_callback (u32 app_index, u32 hc_index, session_t *as,
+ session_error_t err)
+{
+ hcc_main_t *hcm = &hcc_main;
+ hcc_session_t *hs, *new_hs;
+ hcc_worker_t *wrk;
+ http_msg_t msg;
+ int rv;
+
+ HCC_DBG ("hc_index: %d", hc_index);
+
+ if (err)
+ {
+ clib_warning ("connected error: hc_index(%d): %U", hc_index,
+ format_session_error, err);
+ return -1;
+ }
+
+ /* TODO delete half open session once the support is added in http layer */
+ hs = hcc_session_get (hc_index, 0);
+ wrk = hcc_worker_get (as->thread_index);
+ new_hs = hcc_session_alloc (wrk);
+ clib_memcpy_fast (new_hs, hs, sizeof (*hs));
+
+ hs->vpp_session_index = as->session_index;
+
+ msg.type = HTTP_MSG_REQUEST;
+ msg.method_type = HTTP_REQ_GET;
+ msg.content_type = HTTP_CONTENT_TEXT_HTML;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.len = vec_len (hcm->http_query);
+
+ svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) },
+ { hcm->http_query, vec_len (hcm->http_query) } };
+
+ rv = svm_fifo_enqueue_segments (as->tx_fifo, segs, 2, 0 /* allow partial */);
+ if (rv < 0 || rv != sizeof (msg) + vec_len (hcm->http_query))
+ {
+ clib_warning ("failed app enqueue");
+ return -1;
+ }
+
+ if (svm_fifo_set_event (as->tx_fifo))
+ session_send_io_evt_to_thread (as->tx_fifo, SESSION_IO_EVT_TX);
+
+ return 0;
+}
+
+static void
+hcc_ts_reset_callback (session_t *s)
+{
+ hcc_main_t *hcm = &hcc_main;
+ hcc_session_t *hs;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+
+ hs = hcc_session_get (s->opaque, s->thread_index);
+ hs->is_closed = 1;
+
+ a->handle = session_handle (s);
+ a->app_index = hcm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static int
+hcc_ts_tx_callback (session_t *ts)
+{
+ clib_warning ("bug");
+ return -1;
+}
+
+static void
+hcc_session_disconnect (session_t *s)
+{
+ hcc_main_t *hcm = &hcc_main;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+ a->handle = session_handle (s);
+ a->app_index = hcm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static int
+hcc_ts_rx_callback (session_t *ts)
+{
+ hcc_main_t *hcm = &hcc_main;
+ hcc_session_t *hs;
+ http_msg_t msg;
+ int rv;
+
+ hs = hcc_session_get (ts->opaque, ts->thread_index);
+
+ if (hs->is_closed)
+ {
+ clib_warning ("session is closed");
+ return 0;
+ }
+
+ if (hs->to_recv == 0)
+ {
+ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (msg.type != HTTP_MSG_REPLY || msg.code != HTTP_STATUS_OK)
+ {
+ clib_warning ("unexpected msg type %d", msg.type);
+ return 0;
+ }
+ vec_validate (hcm->http_response, msg.data.len - 1);
+ vec_reset_length (hcm->http_response);
+ hs->to_recv = msg.data.len;
+ }
+
+ u32 max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
+
+ u32 n_deq = clib_min (hs->to_recv, max_deq);
+ u32 curr = vec_len (hcm->http_response);
+ rv = svm_fifo_dequeue (ts->rx_fifo, n_deq, hcm->http_response + curr);
+ if (rv < 0)
+ {
+ clib_warning ("app dequeue(n=%d) failed; rv = %d", n_deq, rv);
+ return -1;
+ }
+
+ if (rv != n_deq)
+ return -1;
+
+ vec_set_len (hcm->http_response, curr + n_deq);
+ ASSERT (hs->to_recv >= rv);
+ hs->to_recv -= rv;
+ HCC_DBG ("app rcvd %d, remains %d", rv, hs->to_recv);
+
+ if (hs->to_recv == 0)
+ {
+ hcc_session_disconnect (ts);
+ vlib_process_signal_event_mt (hcm->vlib_main, hcm->cli_node_index,
+ HCC_REPLY_RECEIVED, 0);
+ }
+
+ return 0;
+}
+
+static void
+hcc_ts_cleanup_callback (session_t *s, session_cleanup_ntf_t ntf)
+{
+ hcc_session_t *hs;
+
+ hs = hcc_session_get (s->thread_index, s->opaque);
+ if (!hs)
+ return;
+
+ hcc_session_free (s->thread_index, hs);
+}
+
+static session_cb_vft_t hcc_session_cb_vft = {
+ .session_accept_callback = hcc_ts_accept_callback,
+ .session_disconnect_callback = hcc_ts_disconnect_callback,
+ .session_connected_callback = hcc_ts_connected_callback,
+ .builtin_app_rx_callback = hcc_ts_rx_callback,
+ .builtin_app_tx_callback = hcc_ts_tx_callback,
+ .session_reset_callback = hcc_ts_reset_callback,
+ .session_cleanup_callback = hcc_ts_cleanup_callback,
+};
+
+static clib_error_t *
+hcc_attach ()
+{
+ hcc_main_t *hcm = &hcc_main;
+ vnet_app_attach_args_t _a, *a = &_a;
+ u64 options[18];
+ u32 segment_size = 128 << 20;
+ int rv;
+
+ if (hcm->private_segment_size)
+ segment_size = hcm->private_segment_size;
+
+ clib_memset (a, 0, sizeof (*a));
+ clib_memset (options, 0, sizeof (options));
+
+ a->api_client_index = ~0;
+ a->name = format (0, "http_cli_client");
+ a->session_cb_vft = &hcc_session_cb_vft;
+ a->options = options;
+ a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
+ a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size;
+ a->options[APP_OPTIONS_RX_FIFO_SIZE] =
+ hcm->fifo_size ? hcm->fifo_size : 8 << 10;
+ a->options[APP_OPTIONS_TX_FIFO_SIZE] =
+ hcm->fifo_size ? hcm->fifo_size : 32 << 10;
+ a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
+ a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hcm->prealloc_fifos;
+ if (hcm->appns_id)
+ {
+ a->namespace_id = hcm->appns_id;
+ a->options[APP_OPTIONS_NAMESPACE_SECRET] = hcm->appns_secret;
+ }
+
+ if ((rv = vnet_application_attach (a)))
+ return clib_error_return (0, "attach returned %d", rv);
+
+ hcm->app_index = a->app_index;
+ vec_free (a->name);
+ hcm->test_client_attached = 1;
+ return 0;
+}
+
+static int
+hcc_connect_rpc (void *rpc_args)
+{
+ vnet_connect_args_t *a = rpc_args;
+ int rv;
+
+ rv = vnet_connect (a);
+ if (rv)
+ clib_warning (0, "connect returned: %U", format_session_error, rv);
+
+ vec_free (a);
+ return rv;
+}
+
+static void
+hcc_program_connect (vnet_connect_args_t *a)
+{
+ session_send_rpc_evt_to_thread_force (transport_cl_thread (),
+ hcc_connect_rpc, a);
+}
+
+static clib_error_t *
+hcc_connect ()
+{
+ vnet_connect_args_t *a = 0;
+ hcc_main_t *hcm = &hcc_main;
+ hcc_worker_t *wrk;
+ hcc_session_t *hs;
+
+ vec_validate (a, 0);
+ clib_memset (a, 0, sizeof (a[0]));
+
+ clib_memcpy (&a->sep_ext, &hcm->connect_sep, sizeof (hcm->connect_sep));
+ a->app_index = hcm->app_index;
+
+ /* allocate http session on main thread */
+ wrk = hcc_worker_get (0);
+ hs = hcc_session_alloc (wrk);
+ a->api_context = hs->session_index;
+
+ hcc_program_connect (a);
+ return 0;
+}
+
+static clib_error_t *
+hcc_run (vlib_main_t *vm, int print_output)
+{
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ hcc_main_t *hcm = &hcc_main;
+ uword event_type, *event_data = 0;
+ u32 num_threads;
+ clib_error_t *err = 0;
+ hcc_worker_t *wrk;
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+ vec_validate (hcm->wrk, num_threads);
+ vec_foreach (wrk, hcm->wrk)
+ {
+ wrk->thread_index = wrk - hcm->wrk;
+ }
+
+ if ((err = hcc_attach ()))
+ {
+ return clib_error_return (0, "http client attach: %U", format_clib_error,
+ err);
+ }
+
+ if ((err = hcc_connect ()))
+ {
+ return clib_error_return (0, "http client connect: %U",
+ format_clib_error, err);
+ }
+
+ vlib_process_wait_for_event_or_clock (vm, 10);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case ~0:
+ err = clib_error_return (0, "timeout");
+ goto cleanup;
+
+ case HCC_REPLY_RECEIVED:
+ if (print_output)
+ vlib_cli_output (vm, "%v", hcm->http_response);
+ vec_free (hcm->http_response);
+ break;
+ default:
+ err = clib_error_return (0, "unexpected event %d", event_type);
+ break;
+ }
+
+cleanup:
+ vec_free (event_data);
+ return err;
+}
+
+static int
+hcc_detach ()
+{
+ hcc_main_t *hcm = &hcc_main;
+ vnet_app_detach_args_t _da, *da = &_da;
+ int rv;
+
+ if (!hcm->test_client_attached)
+ return 0;
+
+ da->app_index = hcm->app_index;
+ da->api_client_index = ~0;
+ rv = vnet_application_detach (da);
+ hcm->test_client_attached = 0;
+ hcm->app_index = ~0;
+
+ return rv;
+}
+
+static clib_error_t *
+hcc_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ hcc_main_t *hcm = &hcc_main;
+ u64 seg_size;
+ u8 *appns_id = 0;
+ clib_error_t *err = 0;
+ int rv, print_output = 1;
+
+ hcm->prealloc_fifos = 0;
+ hcm->private_segment_size = 0;
+ hcm->fifo_size = 0;
+
+ if (hcm->test_client_attached)
+ return clib_error_return (0, "failed: already running!");
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "expected URI");
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "prealloc-fifos %d", &hcm->prealloc_fifos))
+ ;
+ else if (unformat (line_input, "private-segment-size %U",
+ unformat_memory_size, &seg_size))
+ hcm->private_segment_size = seg_size;
+ else if (unformat (line_input, "fifo-size %d", &hcm->fifo_size))
+ hcm->fifo_size <<= 10;
+ else if (unformat (line_input, "uri %s", &hcm->uri))
+ ;
+ else if (unformat (line_input, "no-output"))
+ print_output = 0;
+ else if (unformat (line_input, "appns %_%v%_", &appns_id))
+ ;
+ else if (unformat (line_input, "secret %lu", &hcm->appns_secret))
+ ;
+ else if (unformat (line_input, "query %s", &hcm->http_query))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ vec_free (hcm->appns_id);
+ hcm->appns_id = appns_id;
+ hcm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index;
+
+ if (!hcm->uri)
+ {
+ err = clib_error_return (0, "URI not defined");
+ goto done;
+ }
+
+ if ((rv = parse_uri ((char *) hcm->uri, &hcm->connect_sep)))
+ {
+ err = clib_error_return (0, "Uri parse error: %d", rv);
+ goto done;
+ }
+
+ vlib_worker_thread_barrier_sync (vm);
+ vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */);
+ vlib_worker_thread_barrier_release (vm);
+
+ err = hcc_run (vm, print_output);
+
+ if (hcc_detach ())
+ {
+ /* don't override last error */
+ if (!err)
+ err = clib_error_return (0, "failed: app detach");
+ clib_warning ("WARNING: app detach failed...");
+ }
+
+done:
+ vec_free (hcm->uri);
+ vec_free (hcm->http_query);
+ unformat_free (line_input);
+ return err;
+}
+
+VLIB_CLI_COMMAND (hcc_command, static) = {
+ .path = "http cli client",
+ .short_help = "[appns <app-ns> secret <appns-secret>] uri http://<ip-addr> "
+ "query <query-string> [no-output]",
+ .function = hcc_command_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+hcc_main_init (vlib_main_t *vm)
+{
+ hcc_main_t *hcm = &hcc_main;
+
+ hcm->app_index = ~0;
+ hcm->vlib_main = vm;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (hcc_main_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/hs_apps/http_server.c b/src/plugins/hs_apps/http_server.c
deleted file mode 100644
index a46e0a4ae13..00000000000
--- a/src/plugins/hs_apps/http_server.c
+++ /dev/null
@@ -1,1004 +0,0 @@
-/*
-* Copyright (c) 2017-2019 Cisco and/or its affiliates.
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at:
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-#include <vnet/vnet.h>
-#include <vnet/session/application.h>
-#include <vnet/session/application_interface.h>
-#include <vnet/session/session.h>
-#include <vppinfra/tw_timer_2t_1w_2048sl.h>
-
-typedef enum
-{
- EVENT_WAKEUP = 1,
-} http_process_event_t;
-
-typedef struct
-{
- u32 hs_index;
- u32 thread_index;
- u64 node_index;
-} http_server_args;
-
-typedef enum
-{
- HTTP_STATE_CLOSED,
- HTTP_STATE_ESTABLISHED,
- HTTP_STATE_OK_SENT,
-} http_session_state_t;
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-#define _(type, name) type name;
- foreach_app_session_field
-#undef _
- u32 thread_index;
- u8 *rx_buf;
- u32 vpp_session_index;
- u64 vpp_session_handle;
- u32 timer_handle;
-} http_session_t;
-
-typedef struct
-{
- http_session_t **sessions;
- clib_rwlock_t sessions_lock;
- u32 **session_to_http_session;
-
- svm_msg_q_t **vpp_queue;
-
- uword *handler_by_get_request;
-
- u32 *free_http_cli_process_node_indices;
-
- /* Sever's event queue */
- svm_queue_t *vl_input_queue;
-
- /* API client handle */
- u32 my_client_index;
-
- u32 app_index;
-
- /* process node index for evnt scheduling */
- u32 node_index;
-
- /* Cert key pair for tls */
- u32 ckpair_index;
-
- tw_timer_wheel_2t_1w_2048sl_t tw;
- clib_spinlock_t tw_lock;
-
- u32 prealloc_fifos;
- u32 private_segment_size;
- u32 fifo_size;
- u8 *uri;
- u32 is_static;
- vlib_main_t *vlib_main;
-} http_server_main_t;
-
-http_server_main_t http_server_main;
-
-static void
-http_server_sessions_reader_lock (void)
-{
- clib_rwlock_reader_lock (&http_server_main.sessions_lock);
-}
-
-static void
-http_server_sessions_reader_unlock (void)
-{
- clib_rwlock_reader_unlock (&http_server_main.sessions_lock);
-}
-
-static void
-http_server_sessions_writer_lock (void)
-{
- clib_rwlock_writer_lock (&http_server_main.sessions_lock);
-}
-
-static void
-http_server_sessions_writer_unlock (void)
-{
- clib_rwlock_writer_unlock (&http_server_main.sessions_lock);
-}
-
-static http_session_t *
-http_server_session_alloc (u32 thread_index)
-{
- http_server_main_t *hsm = &http_server_main;
- http_session_t *hs;
- pool_get (hsm->sessions[thread_index], hs);
- memset (hs, 0, sizeof (*hs));
- hs->session_index = hs - hsm->sessions[thread_index];
- hs->thread_index = thread_index;
- hs->timer_handle = ~0;
- return hs;
-}
-
-static http_session_t *
-http_server_session_get (u32 thread_index, u32 hs_index)
-{
- http_server_main_t *hsm = &http_server_main;
- if (pool_is_free_index (hsm->sessions[thread_index], hs_index))
- return 0;
- return pool_elt_at_index (hsm->sessions[thread_index], hs_index);
-}
-
-static void
-http_server_session_free (http_session_t * hs)
-{
- http_server_main_t *hsm = &http_server_main;
- u32 thread = hs->thread_index;
- if (CLIB_DEBUG)
- memset (hs, 0xfa, sizeof (*hs));
- pool_put (hsm->sessions[thread], hs);
-}
-
-static void
-http_server_session_lookup_add (u32 thread_index, u32 s_index, u32 hs_index)
-{
- http_server_main_t *hsm = &http_server_main;
- vec_validate (hsm->session_to_http_session[thread_index], s_index);
- hsm->session_to_http_session[thread_index][s_index] = hs_index;
-}
-
-static void
-http_server_session_lookup_del (u32 thread_index, u32 s_index)
-{
- http_server_main_t *hsm = &http_server_main;
- hsm->session_to_http_session[thread_index][s_index] = ~0;
-}
-
-static http_session_t *
-http_server_session_lookup (u32 thread_index, u32 s_index)
-{
- http_server_main_t *hsm = &http_server_main;
- u32 hs_index;
-
- if (s_index < vec_len (hsm->session_to_http_session[thread_index]))
- {
- hs_index = hsm->session_to_http_session[thread_index][s_index];
- return http_server_session_get (thread_index, hs_index);
- }
- return 0;
-}
-
-
-static void
-http_server_session_timer_start (http_session_t * hs)
-{
- u32 hs_handle;
- hs_handle = hs->thread_index << 24 | hs->session_index;
- clib_spinlock_lock (&http_server_main.tw_lock);
- hs->timer_handle = tw_timer_start_2t_1w_2048sl (&http_server_main.tw,
- hs_handle, 0, 60);
- clib_spinlock_unlock (&http_server_main.tw_lock);
-}
-
-static void
-http_server_session_timer_stop (http_session_t * hs)
-{
- if (hs->timer_handle == ~0)
- return;
- clib_spinlock_lock (&http_server_main.tw_lock);
- tw_timer_stop_2t_1w_2048sl (&http_server_main.tw, hs->timer_handle);
- clib_spinlock_unlock (&http_server_main.tw_lock);
-}
-
-static void
-http_server_session_disconnect (http_session_t * hs)
-{
- vnet_disconnect_args_t _a = { 0 }, *a = &_a;
- a->handle = hs->vpp_session_handle;
- a->app_index = http_server_main.app_index;
- vnet_disconnect_session (a);
-}
-
-static void
-http_process_free (http_server_args * args)
-{
- vlib_node_runtime_t *rt;
- vlib_main_t *vm = vlib_get_first_main ();
- http_server_main_t *hsm = &http_server_main;
- vlib_node_t *n;
- u32 node_index;
- http_server_args **save_args;
-
- node_index = args->node_index;
- ASSERT (node_index != 0);
-
- n = vlib_get_node (vm, node_index);
- rt = vlib_node_get_runtime (vm, n->index);
- save_args = vlib_node_get_runtime_data (vm, n->index);
-
- /* Reset process session pointer */
- clib_mem_free (*save_args);
- *save_args = 0;
-
- /* Turn off the process node */
- vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
-
- /* add node index to the freelist */
- vec_add1 (hsm->free_http_cli_process_node_indices, node_index);
-}
-
-/* *INDENT-OFF* */
-static const char *http_ok =
- "HTTP/1.1 200 OK\r\n";
-
-static const char *http_response =
- "Content-Type: text/html\r\n"
- "Expires: Mon, 11 Jan 1970 10:10:10 GMT\r\n"
- "Connection: close \r\n"
- "Pragma: no-cache\r\n"
- "Content-Length: %d\r\n\r\n%v";
-
-static const char *http_error_template =
- "HTTP/1.1 %s\r\n"
- "Content-Type: text/html\r\n"
- "Expires: Mon, 11 Jan 1970 10:10:10 GMT\r\n"
- "Connection: close\r\n"
- "Pragma: no-cache\r\n"
- "Content-Length: 0\r\n\r\n";
-
-/* Header, including incantation to suppress favicon.ico requests */
-static const char *html_header_template =
- "<html><head><title>%v</title></head>"
- "<link rel=\"icon\" href=\"data:,\">"
- "<body><pre>";
-
-static const char *html_footer =
- "</pre></body></html>\r\n";
-
-static const char *html_header_static =
- "<html><head><title>static reply</title></head>"
- "<link rel=\"icon\" href=\"data:,\">"
- "<body><pre>hello</pre></body></html>\r\n";
-/* *INDENT-ON* */
-
-static u8 *static_http;
-static u8 *static_ok;
-
-static void
-http_cli_output (uword arg, u8 * buffer, uword buffer_bytes)
-{
- u8 **output_vecp = (u8 **) arg;
- u8 *output_vec;
- u32 offset;
-
- output_vec = *output_vecp;
-
- offset = vec_len (output_vec);
- vec_validate (output_vec, offset + buffer_bytes - 1);
- clib_memcpy_fast (output_vec + offset, buffer, buffer_bytes);
-
- *output_vecp = output_vec;
-}
-
-void
-send_data (http_session_t * hs, u8 * data)
-{
- http_server_main_t *hsm = &http_server_main;
- vnet_disconnect_args_t _a = { 0 }, *a = &_a;
- vlib_main_t *vm = vlib_get_main ();
- f64 last_sent_timer = vlib_time_now (vm);
- u32 offset, bytes_to_send;
- f64 delay = 10e-3;
-
- bytes_to_send = vec_len (data);
- offset = 0;
-
- while (bytes_to_send > 0)
- {
- int actual_transfer;
-
- actual_transfer = svm_fifo_enqueue
- (hs->tx_fifo, bytes_to_send, data + offset);
-
- /* Made any progress? */
- if (actual_transfer <= 0)
- {
- http_server_sessions_reader_unlock ();
- vlib_process_suspend (vm, delay);
- http_server_sessions_reader_lock ();
-
- /* 10s deadman timer */
- if (vlib_time_now (vm) > last_sent_timer + 10.0)
- {
- a->handle = hs->vpp_session_handle;
- a->app_index = hsm->app_index;
- vnet_disconnect_session (a);
- break;
- }
- /* Exponential backoff, within reason */
- if (delay < 1.0)
- delay = delay * 2.0;
- }
- else
- {
- last_sent_timer = vlib_time_now (vm);
- offset += actual_transfer;
- bytes_to_send -= actual_transfer;
-
- if (svm_fifo_set_event (hs->tx_fifo))
- session_send_io_evt_to_thread (hs->tx_fifo,
- SESSION_IO_EVT_TX_FLUSH);
- delay = 10e-3;
- }
- }
-}
-
-static void
-send_error (http_session_t * hs, char *str)
-{
- u8 *data;
-
- data = format (0, http_error_template, str);
- send_data (hs, data);
- vec_free (data);
-}
-
-static uword
-http_cli_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
- vlib_frame_t * f)
-{
- u8 *request = 0, *reply = 0, *http = 0, *html = 0;
- http_server_main_t *hsm = &http_server_main;
- http_server_args **save_args;
- http_server_args *args;
- unformat_input_t input;
- http_session_t *hs;
- int i;
-
- save_args = vlib_node_get_runtime_data (hsm->vlib_main, rt->node_index);
- args = *save_args;
-
- http_server_sessions_reader_lock ();
-
- hs = http_server_session_get (args->thread_index, args->hs_index);
- ASSERT (hs);
-
- request = hs->rx_buf;
- if (vec_len (request) < 7)
- {
- send_error (hs, "400 Bad Request");
- goto out;
- }
-
- for (i = 0; i < vec_len (request) - 4; i++)
- {
- if (request[i] == 'G' &&
- request[i + 1] == 'E' &&
- request[i + 2] == 'T' && request[i + 3] == ' ')
- goto found;
- }
-bad_request:
- send_error (hs, "400 Bad Request");
- goto out;
-
-found:
- /* Lose "GET " */
- vec_delete (request, i + 5, 0);
-
- /* Replace slashes with spaces, stop at the end of the path */
- i = 0;
- while (1)
- {
- if (request[i] == '/')
- request[i] = ' ';
- else if (request[i] == ' ')
- {
- /* vlib_cli_input is vector-based, no need for a NULL */
- _vec_len (request) = i;
- break;
- }
- i++;
- /* Should never happen */
- if (i == vec_len (request))
- goto bad_request;
- }
-
- /* Generate the html header */
- html = format (0, html_header_template, request /* title */ );
-
- /* Run the command */
- unformat_init_vector (&input, vec_dup (request));
- vlib_cli_input (vm, &input, http_cli_output, (uword) & reply);
- unformat_free (&input);
- request = 0;
-
- /* Generate the html page */
- html = format (html, "%v", reply);
- html = format (html, html_footer);
- /* And the http reply */
- http = format (0, http_ok);
- http = format (http, http_response, vec_len (html), html);
-
- /* Send it */
- send_data (hs, http);
-
-out:
- /* Cleanup */
- http_server_sessions_reader_unlock ();
- vec_free (reply);
- vec_free (html);
- vec_free (http);
-
- http_process_free (args);
- return (0);
-}
-
-static void
-alloc_http_process (http_server_args * args)
-{
- char *name;
- vlib_node_t *n;
- http_server_main_t *hsm = &http_server_main;
- vlib_main_t *vm = hsm->vlib_main;
- uword l = vec_len (hsm->free_http_cli_process_node_indices);
- http_server_args **save_args;
-
- if (vec_len (hsm->free_http_cli_process_node_indices) > 0)
- {
- n = vlib_get_node (vm, hsm->free_http_cli_process_node_indices[l - 1]);
- vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
- _vec_len (hsm->free_http_cli_process_node_indices) = l - 1;
- }
- else
- {
- static vlib_node_registration_t r = {
- .function = http_cli_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .process_log2_n_stack_bytes = 16,
- .runtime_data_bytes = sizeof (void *),
- };
-
- name = (char *) format (0, "http-cli-%d", l);
- r.name = name;
- vlib_register_node (vm, &r);
- vec_free (name);
-
- n = vlib_get_node (vm, r.index);
- }
-
- /* Save the node index in the args. It won't be zero. */
- args->node_index = n->index;
-
- /* Save the args (pointer) in the node runtime */
- save_args = vlib_node_get_runtime_data (vm, n->index);
- *save_args = clib_mem_alloc (sizeof (*args));
- clib_memcpy_fast (*save_args, args, sizeof (*args));
-
- vlib_start_process (vm, n->runtime_index);
-}
-
-static void
-alloc_http_process_callback (void *cb_args)
-{
- alloc_http_process ((http_server_args *) cb_args);
-}
-
-static int
-session_rx_request (http_session_t * hs)
-{
- u32 max_dequeue, cursize;
- int n_read;
-
- cursize = vec_len (hs->rx_buf);
- max_dequeue = svm_fifo_max_dequeue_cons (hs->rx_fifo);
- if (PREDICT_FALSE (max_dequeue == 0))
- return -1;
-
- vec_validate (hs->rx_buf, cursize + max_dequeue - 1);
- n_read = app_recv_stream_raw (hs->rx_fifo, hs->rx_buf + cursize,
- max_dequeue, 0, 0 /* peek */ );
- ASSERT (n_read == max_dequeue);
- if (svm_fifo_is_empty_cons (hs->rx_fifo))
- svm_fifo_unset_event (hs->rx_fifo);
-
- _vec_len (hs->rx_buf) = cursize + n_read;
- return 0;
-}
-
-static int
-http_server_rx_callback (session_t * s)
-{
- http_server_args args;
- http_session_t *hs;
- int rv;
-
- http_server_sessions_reader_lock ();
-
- hs = http_server_session_lookup (s->thread_index, s->session_index);
- if (!hs || hs->session_state != HTTP_STATE_ESTABLISHED)
- return -1;
-
- rv = session_rx_request (hs);
- if (rv)
- return rv;
-
- /* send the command to a new/recycled vlib process */
- args.hs_index = hs->session_index;
- args.thread_index = hs->thread_index;
-
- http_server_sessions_reader_unlock ();
-
- /* Send RPC request to main thread */
- if (vlib_get_thread_index () != 0)
- vlib_rpc_call_main_thread (alloc_http_process_callback, (u8 *) & args,
- sizeof (args));
- else
- alloc_http_process (&args);
- return 0;
-}
-
-static int
-http_server_rx_callback_static (session_t * s)
-{
- http_session_t *hs;
- u32 request_len;
- u8 *request = 0;
- int i, rv;
-
- hs = http_server_session_lookup (s->thread_index, s->session_index);
- if (!hs || hs->session_state == HTTP_STATE_CLOSED)
- return 0;
-
- /* ok 200 was sent */
- if (hs->session_state == HTTP_STATE_OK_SENT)
- goto send_data;
-
- rv = session_rx_request (hs);
- if (rv)
- goto wait_for_data;
-
- request = hs->rx_buf;
- request_len = vec_len (request);
- if (vec_len (request) < 7)
- {
- send_error (hs, "400 Bad Request");
- goto close_session;
- }
-
- for (i = 0; i < request_len - 4; i++)
- {
- if (request[i] == 'G' &&
- request[i + 1] == 'E' &&
- request[i + 2] == 'T' && request[i + 3] == ' ')
- goto find_end;
- }
- send_error (hs, "400 Bad Request");
- goto close_session;
-
-find_end:
-
- /* check for the end sequence: /r/n/r/n */
- if (request[request_len - 1] != 0xa || request[request_len - 3] != 0xa
- || request[request_len - 2] != 0xd || request[request_len - 4] != 0xd)
- goto wait_for_data;
-
- /* send 200 OK first */
- send_data (hs, static_ok);
- hs->session_state = HTTP_STATE_OK_SENT;
- goto postpone;
-
-send_data:
- send_data (hs, static_http);
-
-close_session:
- http_server_session_disconnect (hs);
- return 0;
-
-postpone:
- (void) svm_fifo_set_event (hs->rx_fifo);
- session_send_io_evt_to_thread (hs->rx_fifo, SESSION_IO_EVT_BUILTIN_RX);
- return 0;
-
-wait_for_data:
- return 0;
-}
-
-static int
-http_server_session_accept_callback (session_t * s)
-{
- http_server_main_t *hsm = &http_server_main;
- http_session_t *hs;
-
- hsm->vpp_queue[s->thread_index] =
- session_main_get_vpp_event_queue (s->thread_index);
-
- if (!hsm->is_static)
- http_server_sessions_writer_lock ();
-
- hs = http_server_session_alloc (s->thread_index);
- http_server_session_lookup_add (s->thread_index, s->session_index,
- hs->session_index);
- hs->rx_fifo = s->rx_fifo;
- hs->tx_fifo = s->tx_fifo;
- hs->vpp_session_index = s->session_index;
- hs->vpp_session_handle = session_handle (s);
- hs->session_state = HTTP_STATE_ESTABLISHED;
- http_server_session_timer_start (hs);
-
- if (!hsm->is_static)
- http_server_sessions_writer_unlock ();
-
- s->session_state = SESSION_STATE_READY;
- return 0;
-}
-
-static void
-http_server_session_disconnect_callback (session_t * s)
-{
- http_server_main_t *hsm = &http_server_main;
- vnet_disconnect_args_t _a = { 0 }, *a = &_a;
-
- a->handle = session_handle (s);
- a->app_index = hsm->app_index;
- vnet_disconnect_session (a);
-}
-
-static void
-http_server_session_reset_callback (session_t * s)
-{
- http_server_main_t *hsm = &http_server_main;
- vnet_disconnect_args_t _a = { 0 }, *a = &_a;
-
- a->handle = session_handle (s);
- a->app_index = hsm->app_index;
- vnet_disconnect_session (a);
-}
-
-static int
-http_server_session_connected_callback (u32 app_index, u32 api_context,
- session_t * s, session_error_t err)
-{
- clib_warning ("called...");
- return -1;
-}
-
-static int
-http_server_add_segment_callback (u32 client_index, u64 segment_handle)
-{
- return 0;
-}
-
-static void
-http_server_cleanup_callback (session_t * s, session_cleanup_ntf_t ntf)
-{
- http_server_main_t *hsm = &http_server_main;
- http_session_t *hs;
-
- if (ntf == SESSION_CLEANUP_TRANSPORT)
- return;
-
- if (!hsm->is_static)
- http_server_sessions_writer_lock ();
-
- hs = http_server_session_lookup (s->thread_index, s->session_index);
- if (!hs)
- goto done;
-
- http_server_session_lookup_del (hs->thread_index, hs->vpp_session_index);
- vec_free (hs->rx_buf);
- http_server_session_timer_stop (hs);
- http_server_session_free (hs);
-
-done:
-
- if (!hsm->is_static)
- http_server_sessions_writer_unlock ();
-}
-
-static session_cb_vft_t http_server_session_cb_vft = {
- .session_accept_callback = http_server_session_accept_callback,
- .session_disconnect_callback = http_server_session_disconnect_callback,
- .session_connected_callback = http_server_session_connected_callback,
- .add_segment_callback = http_server_add_segment_callback,
- .builtin_app_rx_callback = http_server_rx_callback,
- .session_reset_callback = http_server_session_reset_callback,
- .session_cleanup_callback = http_server_cleanup_callback,
-};
-
-static int
-http_server_attach ()
-{
- vnet_app_add_cert_key_pair_args_t _ck_pair, *ck_pair = &_ck_pair;
- http_server_main_t *hsm = &http_server_main;
- u64 options[APP_OPTIONS_N_OPTIONS];
- vnet_app_attach_args_t _a, *a = &_a;
- u32 segment_size = 128 << 20;
-
- clib_memset (a, 0, sizeof (*a));
- clib_memset (options, 0, sizeof (options));
-
- if (hsm->private_segment_size)
- segment_size = hsm->private_segment_size;
-
- a->api_client_index = ~0;
- a->name = format (0, "test_http_server");
- a->session_cb_vft = &http_server_session_cb_vft;
- a->options = options;
- a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
- a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size;
- a->options[APP_OPTIONS_RX_FIFO_SIZE] =
- hsm->fifo_size ? hsm->fifo_size : 8 << 10;
- a->options[APP_OPTIONS_TX_FIFO_SIZE] =
- hsm->fifo_size ? hsm->fifo_size : 32 << 10;
- a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
- a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hsm->prealloc_fifos;
-
- if (vnet_application_attach (a))
- {
- vec_free (a->name);
- clib_warning ("failed to attach server");
- return -1;
- }
- vec_free (a->name);
- hsm->app_index = a->app_index;
-
- clib_memset (ck_pair, 0, sizeof (*ck_pair));
- ck_pair->cert = (u8 *) test_srv_crt_rsa;
- ck_pair->key = (u8 *) test_srv_key_rsa;
- ck_pair->cert_len = test_srv_crt_rsa_len;
- ck_pair->key_len = test_srv_key_rsa_len;
- vnet_app_add_cert_key_pair (ck_pair);
- hsm->ckpair_index = ck_pair->index;
-
- return 0;
-}
-
-static int
-http_transport_needs_crypto (transport_proto_t proto)
-{
- return proto == TRANSPORT_PROTO_TLS || proto == TRANSPORT_PROTO_DTLS ||
- proto == TRANSPORT_PROTO_QUIC;
-}
-
-static int
-http_server_listen ()
-{
- session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
- http_server_main_t *hsm = &http_server_main;
- vnet_listen_args_t _a, *a = &_a;
- char *uri = "tcp://0.0.0.0/80";
- int rv;
-
- clib_memset (a, 0, sizeof (*a));
- a->app_index = hsm->app_index;
-
- if (hsm->uri)
- uri = (char *) hsm->uri;
-
- if (parse_uri (uri, &sep))
- return -1;
-
- clib_memcpy (&a->sep_ext, &sep, sizeof (sep));
- if (http_transport_needs_crypto (a->sep_ext.transport_proto))
- {
- session_endpoint_alloc_ext_cfg (&a->sep_ext,
- TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
- a->sep_ext.ext_cfg->crypto.ckpair_index = hsm->ckpair_index;
- }
-
- rv = vnet_listen (a);
- if (a->sep_ext.ext_cfg)
- clib_mem_free (a->sep_ext.ext_cfg);
- return rv;
-}
-
-static void
-http_server_session_close_cb (void *hs_handlep)
-{
- http_session_t *hs;
- uword hs_handle;
- hs_handle = pointer_to_uword (hs_handlep);
- hs = http_server_session_get (hs_handle >> 24, hs_handle & 0x00FFFFFF);
- if (!hs)
- return;
- hs->timer_handle = ~0;
- http_server_session_disconnect (hs);
-}
-
-static void
-http_expired_timers_dispatch (u32 * expired_timers)
-{
- u32 hs_handle;
- int i;
-
- for (i = 0; i < vec_len (expired_timers); i++)
- {
- /* Get session handle. The first bit is the timer id */
- hs_handle = expired_timers[i] & 0x7FFFFFFF;
- session_send_rpc_evt_to_thread (hs_handle >> 24,
- http_server_session_close_cb,
- uword_to_pointer (hs_handle, void *));
- }
-}
-
-static uword
-http_server_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
- vlib_frame_t * f)
-{
- http_server_main_t *hsm = &http_server_main;
- f64 now, timeout = 1.0;
- uword *event_data = 0;
- uword __clib_unused event_type;
-
- while (1)
- {
- vlib_process_wait_for_event_or_clock (vm, timeout);
- now = vlib_time_now (vm);
- event_type = vlib_process_get_events (vm, (uword **) & event_data);
-
- /* expire timers */
- clib_spinlock_lock (&http_server_main.tw_lock);
- tw_timer_expire_timers_2t_1w_2048sl (&hsm->tw, now);
- clib_spinlock_unlock (&http_server_main.tw_lock);
-
- vec_reset_length (event_data);
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (http_server_process_node) =
-{
- .function = http_server_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "http-server-process",
- .state = VLIB_NODE_STATE_DISABLED,
-};
-/* *INDENT-ON* */
-
-static int
-http_server_create (vlib_main_t * vm)
-{
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
- http_server_main_t *hsm = &http_server_main;
- u32 num_threads;
- vlib_node_t *n;
-
- num_threads = 1 /* main thread */ + vtm->n_threads;
- vec_validate (hsm->vpp_queue, num_threads - 1);
- vec_validate (hsm->sessions, num_threads - 1);
- vec_validate (hsm->session_to_http_session, num_threads - 1);
-
- clib_rwlock_init (&hsm->sessions_lock);
- clib_spinlock_init (&hsm->tw_lock);
-
- if (http_server_attach ())
- {
- clib_warning ("failed to attach server");
- return -1;
- }
- if (http_server_listen ())
- {
- clib_warning ("failed to start listening");
- return -1;
- }
-
- /* Init timer wheel and process */
- tw_timer_wheel_init_2t_1w_2048sl (&hsm->tw, http_expired_timers_dispatch,
- 1 /* timer interval */ , ~0);
- vlib_node_set_state (vm, http_server_process_node.index,
- VLIB_NODE_STATE_POLLING);
- n = vlib_get_node (vm, http_server_process_node.index);
- vlib_start_process (vm, n->runtime_index);
-
- return 0;
-}
-
-static clib_error_t *
-http_server_create_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- http_server_main_t *hsm = &http_server_main;
- unformat_input_t _line_input, *line_input = &_line_input;
- u64 seg_size;
- u8 *html;
- int rv;
-
- hsm->prealloc_fifos = 0;
- hsm->private_segment_size = 0;
- hsm->fifo_size = 0;
- hsm->is_static = 0;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- goto start_server;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "static"))
- hsm->is_static = 1;
- else
- if (unformat (line_input, "prealloc-fifos %d", &hsm->prealloc_fifos))
- ;
- else if (unformat (line_input, "private-segment-size %U",
- unformat_memory_size, &seg_size))
- {
- if (seg_size >= 0x100000000ULL)
- {
- vlib_cli_output (vm, "private segment size %llu, too large",
- seg_size);
- return 0;
- }
- hsm->private_segment_size = seg_size;
- }
- else if (unformat (line_input, "fifo-size %d", &hsm->fifo_size))
- hsm->fifo_size <<= 10;
- else if (unformat (line_input, "uri %s", &hsm->uri))
- ;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- }
- unformat_free (line_input);
-
-start_server:
-
- if (hsm->my_client_index != (u32) ~ 0)
- return clib_error_return (0, "test http server is already running");
-
- vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
-
- if (hsm->is_static)
- {
- http_server_session_cb_vft.builtin_app_rx_callback =
- http_server_rx_callback_static;
- html = format (0, html_header_static);
- static_http = format (0, http_response, vec_len (html), html);
- static_ok = format (0, http_ok);
- }
- rv = http_server_create (vm);
- switch (rv)
- {
- case 0:
- break;
- default:
- return clib_error_return (0, "server_create returned %d", rv);
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (http_server_create_command, static) =
-{
- .path = "test http server",
- .short_help = "test http server",
- .function = http_server_create_command_fn,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-http_server_main_init (vlib_main_t * vm)
-{
- http_server_main_t *hsm = &http_server_main;
-
- hsm->my_client_index = ~0;
- hsm->vlib_main = vm;
- return 0;
-}
-
-VLIB_INIT_FUNCTION (http_server_main_init);
-
-/*
-* fd.io coding-style-patch-verification: ON
-*
-* Local Variables:
-* eval: (c-set-style "gnu")
-* End:
-*/
diff --git a/src/plugins/hs_apps/http_tps.c b/src/plugins/hs_apps/http_tps.c
new file mode 100644
index 00000000000..920f7ea731f
--- /dev/null
+++ b/src/plugins/hs_apps/http_tps.c
@@ -0,0 +1,839 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/session/application.h>
+#include <vnet/session/application_interface.h>
+#include <vnet/session/session.h>
+#include <http/http.h>
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 session_index;
+ u32 thread_index;
+ u64 data_len;
+ u64 data_offset;
+ u32 vpp_session_index;
+ union
+ {
+ /** threshold after which connection is closed */
+ f64 close_threshold;
+ /** rate at which accepted sessions are marked for random close */
+ u32 close_rate;
+ };
+ u8 *uri;
+} hts_session_t;
+
+typedef struct hts_listen_cfg_
+{
+ u8 *uri;
+ u32 vrf;
+ f64 rnd_close;
+ u8 is_del;
+} hts_listen_cfg_t;
+
+typedef struct hs_main_
+{
+ hts_session_t **sessions;
+ u32 app_index;
+
+ u32 ckpair_index;
+ u8 *test_data;
+
+ /** Hash table of listener uris to handles */
+ uword *uri_to_handle;
+
+ /*
+ * Configs
+ */
+ u8 *uri;
+ u32 fifo_size;
+ u64 segment_size;
+ u8 debug_level;
+ u8 no_zc;
+ u8 *default_uri;
+ u32 seed;
+} hts_main_t;
+
+static hts_main_t hts_main;
+
+static hts_session_t *
+hts_session_alloc (u32 thread_index)
+{
+ hts_main_t *htm = &hts_main;
+ hts_session_t *hs;
+
+ pool_get_zero (htm->sessions[thread_index], hs);
+ hs->session_index = hs - htm->sessions[thread_index];
+ hs->thread_index = thread_index;
+
+ return hs;
+}
+
+static hts_session_t *
+hts_session_get (u32 thread_index, u32 hts_index)
+{
+ hts_main_t *htm = &hts_main;
+
+ if (pool_is_free_index (htm->sessions[thread_index], hts_index))
+ return 0;
+
+ return pool_elt_at_index (htm->sessions[thread_index], hts_index);
+}
+
+static void
+hts_session_free (hts_session_t *hs)
+{
+ hts_main_t *htm = &hts_main;
+ u32 thread = hs->thread_index;
+
+ if (htm->debug_level > 0)
+ clib_warning ("Freeing session %u", hs->session_index);
+
+ if (CLIB_DEBUG)
+ clib_memset (hs, 0xfa, sizeof (*hs));
+
+ pool_put (htm->sessions[thread], hs);
+}
+
+static void
+hts_disconnect_transport (hts_session_t *hs)
+{
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+ hts_main_t *htm = &hts_main;
+ session_t *ts;
+
+ if (htm->debug_level > 0)
+ clib_warning ("Actively closing session %u", hs->session_index);
+
+ ts = session_get (hs->vpp_session_index, hs->thread_index);
+ a->handle = session_handle (ts);
+ a->app_index = htm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static void
+hts_session_tx_zc (hts_session_t *hs, session_t *ts)
+{
+ u32 to_send, space;
+ u64 max_send;
+ int rv;
+
+ rv = svm_fifo_fill_chunk_list (ts->tx_fifo);
+ if (rv < 0)
+ {
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ return;
+ }
+
+ max_send = hs->data_len - hs->data_offset;
+ space = svm_fifo_max_enqueue (ts->tx_fifo);
+ ASSERT (space != 0);
+ to_send = clib_min (space, max_send);
+
+ svm_fifo_enqueue_nocopy (ts->tx_fifo, to_send);
+
+ hs->data_offset += to_send;
+
+ if (to_send < max_send)
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+}
+
+static void
+hts_session_tx_no_zc (hts_session_t *hs, session_t *ts)
+{
+ u32 n_segs, buf_offset, buf_left;
+ u64 max_send = 32 << 10, left;
+ hts_main_t *htm = &hts_main;
+ svm_fifo_seg_t seg[2];
+ int sent;
+
+ left = hs->data_len - hs->data_offset;
+ max_send = clib_min (left, max_send);
+ buf_offset = hs->data_offset % vec_len (htm->test_data);
+ buf_left = vec_len (htm->test_data) - buf_offset;
+
+ if (buf_left < max_send)
+ {
+ seg[0].data = htm->test_data + buf_offset;
+ seg[0].len = buf_left;
+ seg[1].data = htm->test_data;
+ seg[1].len = max_send - buf_left;
+ n_segs = 2;
+ }
+ else
+ {
+ seg[0].data = htm->test_data + buf_offset;
+ seg[0].len = max_send;
+ n_segs = 1;
+ }
+
+ sent = svm_fifo_enqueue_segments (ts->tx_fifo, seg, n_segs,
+ 1 /* allow partial */);
+
+ if (sent <= 0)
+ {
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ return;
+ }
+
+ hs->data_offset += sent;
+
+ if (sent < left)
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+}
+
+static inline void
+hts_session_tx (hts_session_t *hs, session_t *ts)
+{
+ hts_main_t *htm = &hts_main;
+
+ if (!htm->no_zc)
+ hts_session_tx_zc (hs, ts);
+ else
+ hts_session_tx_no_zc (hs, ts);
+
+ if (hs->close_threshold > 0)
+ {
+ if ((f64) hs->data_offset / hs->data_len > hs->close_threshold)
+ hts_disconnect_transport (hs);
+ }
+}
+
+static void
+hts_start_send_data (hts_session_t *hs, http_status_code_t status)
+{
+ http_msg_t msg;
+ session_t *ts;
+ int rv;
+
+ msg.type = HTTP_MSG_REPLY;
+ msg.code = status;
+ msg.content_type = HTTP_CONTENT_APP_OCTET_STREAM;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.len = hs->data_len;
+
+ ts = session_get (hs->vpp_session_index, hs->thread_index);
+ rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (!msg.data.len)
+ {
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+ return;
+ }
+
+ hts_session_tx (hs, ts);
+}
+
+static int
+try_test_file (hts_session_t *hs, u8 *request)
+{
+ char *test_str = "test_file";
+ hts_main_t *htm = &hts_main;
+ unformat_input_t input;
+ uword file_size;
+ int rc = 0;
+
+ if (memcmp (request, test_str, clib_strnlen (test_str, 9)))
+ return -1;
+
+ unformat_init_vector (&input, vec_dup (request));
+ if (!unformat (&input, "test_file_%U", unformat_memory_size, &file_size))
+ {
+ rc = -1;
+ goto done;
+ }
+
+ if (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ {
+ rc = -1;
+ goto done;
+ }
+
+ if (htm->debug_level)
+ clib_warning ("Requested file size %U", format_memory_size, file_size);
+
+ hs->data_len = file_size;
+ hs->data_offset = 0;
+
+ if (hs->close_threshold > 0)
+ {
+ /* Disconnect if the header is already enough to fill the quota */
+ if ((f64) 30 / hs->data_len > hs->close_threshold)
+ {
+ hts_disconnect_transport (hs);
+ goto done;
+ }
+ }
+
+ hts_start_send_data (hs, HTTP_STATUS_OK);
+
+done:
+ unformat_free (&input);
+
+ return rc;
+}
+
+static int
+hts_ts_rx_callback (session_t *ts)
+{
+ hts_session_t *hs;
+ u8 *request = 0;
+ http_msg_t msg;
+ int rv;
+
+ hs = hts_session_get (ts->thread_index, ts->opaque);
+
+ /* Read the http message header */
+ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (msg.type != HTTP_MSG_REQUEST || msg.method_type != HTTP_REQ_GET)
+ {
+ hts_start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED);
+ goto done;
+ }
+
+ if (!msg.data.len)
+ {
+ hts_start_send_data (hs, HTTP_STATUS_BAD_REQUEST);
+ goto done;
+ }
+
+ vec_validate (request, msg.data.len - 1);
+ rv = svm_fifo_dequeue (ts->rx_fifo, msg.data.len, request);
+
+ if (try_test_file (hs, request))
+ hts_start_send_data (hs, HTTP_STATUS_NOT_FOUND);
+
+done:
+
+ return 0;
+}
+
+static int
+hs_ts_tx_callback (session_t *ts)
+{
+ hts_session_t *hs;
+
+ hs = hts_session_get (ts->thread_index, ts->opaque);
+ if (!hs)
+ return 0;
+
+ hts_session_tx (hs, ts);
+
+ return 0;
+}
+
+static int
+hts_ts_accept_callback (session_t *ts)
+{
+ hts_main_t *htm = &hts_main;
+ hts_session_t *hs, *lhs;
+ session_t *ls;
+
+ hs = hts_session_alloc (ts->thread_index);
+ hs->vpp_session_index = ts->session_index;
+
+ ts->opaque = hs->session_index;
+ ts->session_state = SESSION_STATE_READY;
+
+ /* Check if listener configured for random closes */
+ ls = listen_session_get_from_handle (ts->listener_handle);
+ lhs = hts_session_get (0, ls->opaque);
+
+ if (lhs->close_rate)
+ {
+ /* overload listener's data_offset as session counter */
+ u32 cnt = __atomic_add_fetch (&lhs->data_offset, 1, __ATOMIC_RELEASE);
+ if ((cnt % lhs->close_rate) == 0)
+ hs->close_threshold = random_f64 (&htm->seed);
+ }
+
+ if (htm->debug_level > 0)
+ clib_warning ("Accepted session %u close threshold %.2f", ts->opaque,
+ hs->close_threshold);
+
+ return 0;
+}
+
+static int
+hts_ts_connected_callback (u32 app_index, u32 api_context, session_t *s,
+ session_error_t err)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+static void
+hts_ts_disconnect_callback (session_t *ts)
+{
+ hts_main_t *htm = &hts_main;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+
+ if (htm->debug_level > 0)
+ clib_warning ("Transport closing session %u", ts->opaque);
+
+ a->handle = session_handle (ts);
+ a->app_index = htm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static void
+hts_ts_reset_callback (session_t *ts)
+{
+ hts_main_t *htm = &hts_main;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+
+ if (htm->debug_level > 0)
+ clib_warning ("Transport reset session %u", ts->opaque);
+
+ a->handle = session_handle (ts);
+ a->app_index = htm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static void
+hts_ts_cleanup_callback (session_t *s, session_cleanup_ntf_t ntf)
+{
+ hts_session_t *hs;
+
+ if (ntf == SESSION_CLEANUP_TRANSPORT)
+ return;
+
+ hs = hts_session_get (s->thread_index, s->opaque);
+ if (!hs)
+ return;
+
+ hts_session_free (hs);
+}
+
+static int
+hts_add_segment_callback (u32 client_index, u64 segment_handle)
+{
+ return 0;
+}
+
+static int
+hts_del_segment_callback (u32 client_index, u64 segment_handle)
+{
+ return 0;
+}
+
+static session_cb_vft_t hs_session_cb_vft = {
+ .session_accept_callback = hts_ts_accept_callback,
+ .session_disconnect_callback = hts_ts_disconnect_callback,
+ .session_connected_callback = hts_ts_connected_callback,
+ .add_segment_callback = hts_add_segment_callback,
+ .del_segment_callback = hts_del_segment_callback,
+ .builtin_app_rx_callback = hts_ts_rx_callback,
+ .builtin_app_tx_callback = hs_ts_tx_callback,
+ .session_reset_callback = hts_ts_reset_callback,
+ .session_cleanup_callback = hts_ts_cleanup_callback,
+};
+
+static int
+hts_attach (hts_main_t *hm)
+{
+ vnet_app_add_cert_key_pair_args_t _ck_pair, *ck_pair = &_ck_pair;
+ u64 options[APP_OPTIONS_N_OPTIONS];
+ vnet_app_attach_args_t _a, *a = &_a;
+
+ clib_memset (a, 0, sizeof (*a));
+ clib_memset (options, 0, sizeof (options));
+
+ a->api_client_index = ~0;
+ a->name = format (0, "http_tps");
+ a->session_cb_vft = &hs_session_cb_vft;
+ a->options = options;
+ a->options[APP_OPTIONS_SEGMENT_SIZE] = hm->segment_size;
+ a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = hm->segment_size;
+ a->options[APP_OPTIONS_RX_FIFO_SIZE] = hm->fifo_size;
+ a->options[APP_OPTIONS_TX_FIFO_SIZE] = hm->fifo_size;
+ a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
+
+ if (vnet_application_attach (a))
+ {
+ vec_free (a->name);
+ clib_warning ("failed to attach server");
+ return -1;
+ }
+ vec_free (a->name);
+ hm->app_index = a->app_index;
+
+ clib_memset (ck_pair, 0, sizeof (*ck_pair));
+ ck_pair->cert = (u8 *) test_srv_crt_rsa;
+ ck_pair->key = (u8 *) test_srv_key_rsa;
+ ck_pair->cert_len = test_srv_crt_rsa_len;
+ ck_pair->key_len = test_srv_key_rsa_len;
+ vnet_app_add_cert_key_pair (ck_pair);
+ hm->ckpair_index = ck_pair->index;
+
+ return 0;
+}
+
+static int
+hts_transport_needs_crypto (transport_proto_t proto)
+{
+ return proto == TRANSPORT_PROTO_TLS || proto == TRANSPORT_PROTO_DTLS ||
+ proto == TRANSPORT_PROTO_QUIC;
+}
+
+static int
+hts_start_listen (hts_main_t *htm, session_endpoint_cfg_t *sep, u8 *uri,
+ f64 rnd_close)
+{
+ vnet_listen_args_t _a, *a = &_a;
+ u8 need_crypto;
+ hts_session_t *hls;
+ session_t *ls;
+ u32 thread_index = 0;
+ int rv;
+
+ clib_memset (a, 0, sizeof (*a));
+ a->app_index = htm->app_index;
+
+ need_crypto = hts_transport_needs_crypto (sep->transport_proto);
+
+ sep->transport_proto = TRANSPORT_PROTO_HTTP;
+ clib_memcpy (&a->sep_ext, sep, sizeof (*sep));
+
+ if (need_crypto)
+ {
+ session_endpoint_alloc_ext_cfg (&a->sep_ext,
+ TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
+ a->sep_ext.ext_cfg->crypto.ckpair_index = htm->ckpair_index;
+ }
+
+ rv = vnet_listen (a);
+
+ if (need_crypto)
+ clib_mem_free (a->sep_ext.ext_cfg);
+
+ if (rv)
+ return rv;
+
+ hls = hts_session_alloc (thread_index);
+ hls->uri = vec_dup (uri);
+ hls->close_rate = (f64) 1 / rnd_close;
+ ls = listen_session_get_from_handle (a->handle);
+ hls->vpp_session_index = ls->session_index;
+ hash_set_mem (htm->uri_to_handle, hls->uri, hls->session_index);
+
+ /* opaque holds index of hls, which is used in `hts_ts_accept_callback`
+ * to get back the pointer to hls */
+ ls->opaque = hls - htm->sessions[thread_index];
+
+ return 0;
+}
+
+static int
+hts_stop_listen (hts_main_t *htm, u32 hls_index)
+{
+ hts_session_t *hls;
+ session_t *ls;
+
+ hls = hts_session_get (0, hls_index);
+ ls = listen_session_get (hls->vpp_session_index);
+
+ vnet_unlisten_args_t ua = {
+ .handle = listen_session_get_handle (ls),
+ .app_index = htm->app_index,
+ .wrk_map_index = 0 /* default wrk */
+ };
+
+ hash_unset_mem (htm->uri_to_handle, hls->uri);
+
+ if (vnet_unlisten (&ua))
+ return -1;
+
+ vec_free (hls->uri);
+ hts_session_free (hls);
+
+ return 0;
+}
+
+static clib_error_t *
+hts_listen (hts_main_t *htm, hts_listen_cfg_t *lcfg)
+{
+ session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
+ clib_error_t *error = 0;
+ u8 *uri, *uri_key;
+ uword *p;
+ int rv;
+
+ uri = lcfg->uri ? lcfg->uri : htm->default_uri;
+ uri_key = format (0, "vrf%u-%s", lcfg->vrf, uri);
+ p = hash_get_mem (htm->uri_to_handle, uri_key);
+
+ if (lcfg->is_del)
+ {
+ if (!p)
+ error = clib_error_return (0, "not listening on %v", uri);
+ else if (hts_stop_listen (htm, p[0]))
+ error = clib_error_return (0, "failed to unlisten");
+ goto done;
+ }
+
+ if (p)
+ {
+ error = clib_error_return (0, "already listening %v", uri);
+ goto done;
+ }
+
+ if (parse_uri ((char *) uri, &sep))
+ {
+ error = clib_error_return (0, "failed to parse uri %v", uri);
+ goto done;
+ }
+
+ if (lcfg->vrf)
+ {
+ fib_protocol_t fp;
+ u32 fib_index;
+
+ fp = sep.is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
+ fib_index = fib_table_find (fp, lcfg->vrf);
+ if (fib_index == ~0)
+ {
+ error = clib_error_return (0, "no such vrf %u", lcfg->vrf);
+ goto done;
+ }
+ sep.fib_index = fib_index;
+ }
+
+ if ((rv = hts_start_listen (htm, &sep, uri_key, lcfg->rnd_close)))
+ {
+ error = clib_error_return (0, "failed to listen on %v: %U", uri,
+ format_session_error, rv);
+ }
+
+done:
+
+ vec_free (uri_key);
+ return error;
+}
+
+static int
+hts_create (vlib_main_t *vm)
+{
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ hts_main_t *htm = &hts_main;
+ u32 num_threads;
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+ vec_validate (htm->sessions, num_threads - 1);
+
+ if (htm->no_zc)
+ vec_validate (htm->test_data, (64 << 10) - 1);
+
+ if (hts_attach (htm))
+ {
+ clib_warning ("failed to attach server");
+ return -1;
+ }
+
+ htm->default_uri = format (0, "tcp://0.0.0.0/80%c", 0);
+ htm->uri_to_handle = hash_create_vec (0, sizeof (u8), sizeof (uword));
+
+ return 0;
+}
+
+static clib_error_t *
+hts_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ hts_main_t *htm = &hts_main;
+ hts_listen_cfg_t lcfg = {};
+ clib_error_t *error = 0;
+ u64 mem_size;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ goto start_server;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "private-segment-size %U",
+ unformat_memory_size, &mem_size))
+ htm->segment_size = mem_size;
+ else if (unformat (line_input, "fifo-size %U", unformat_memory_size,
+ &mem_size))
+ htm->fifo_size = mem_size;
+ else if (unformat (line_input, "no-zc"))
+ htm->no_zc = 1;
+ else if (unformat (line_input, "debug"))
+ htm->debug_level = 1;
+ else if (unformat (line_input, "vrf %u", &lcfg.vrf))
+ ;
+ else if (unformat (line_input, "uri %s", &lcfg.uri))
+ ;
+ else if (unformat (line_input, "rnd-close %f", &lcfg.rnd_close))
+ {
+ if (lcfg.rnd_close > 1.0)
+ {
+ error = clib_error_return (0, "invalid rnd close value %f",
+ lcfg.rnd_close);
+ break;
+ }
+ }
+ else if (unformat (line_input, "del"))
+ lcfg.is_del = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (error)
+ goto done;
+
+start_server:
+
+ if (htm->app_index == (u32) ~0)
+ {
+ vnet_session_enable_disable (vm, 1 /* is_enable */);
+
+ if (hts_create (vm))
+ {
+ error = clib_error_return (0, "http tps create failed");
+ goto done;
+ }
+ }
+
+ error = hts_listen (htm, &lcfg);
+
+done:
+
+ vec_free (lcfg.uri);
+ return error;
+}
+
+VLIB_CLI_COMMAND (http_tps_command, static) = {
+ .path = "http tps",
+ .short_help = "http tps [uri <uri>] [fifo-size <nbytes>] "
+ "[segment-size <nMG>] [prealloc-fifos <n>] [debug] [no-zc] "
+ "[del]",
+ .function = hts_create_command_fn,
+};
+
+static clib_error_t *
+hts_show_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ hts_main_t *htm = &hts_main;
+ clib_error_t *error = 0;
+ u8 do_listeners = 0;
+ hts_session_t **sessions;
+ u32 n_listeners = 0, n_sessions = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ goto no_input;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "listeners"))
+ do_listeners = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+
+ if (error)
+ return error;
+
+no_input:
+
+ if (htm->app_index == ~0)
+ {
+ vlib_cli_output (vm, "http tps not enabled");
+ goto done;
+ }
+
+ if (do_listeners)
+ {
+ uword handle;
+ u8 *s = 0, *uri;
+
+ /* clang-format off */
+ hash_foreach (uri, handle, htm->uri_to_handle, ({
+ s = format (s, "%-30v%lx\n", uri, handle);
+ }));
+ /* clang-format on */
+
+ if (s)
+ {
+ vlib_cli_output (vm, "%-29s%s", "URI", "Index");
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ }
+ goto done;
+ }
+
+ n_listeners = hash_elts (htm->uri_to_handle);
+ vec_foreach (sessions, htm->sessions)
+ n_sessions += pool_elts (*sessions);
+
+ vlib_cli_output (vm, " app index: %u\n listeners: %u\n sesions: %u",
+ htm->app_index, n_listeners, n_sessions - n_listeners);
+
+done:
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_http_tps_command, static) = {
+ .path = "show http tps",
+ .short_help = "http tps [listeners]",
+ .function = hts_show_command_fn,
+};
+
+static clib_error_t *
+hs_main_init (vlib_main_t *vm)
+{
+ hts_main_t *htm = &hts_main;
+
+ htm->app_index = ~0;
+ htm->segment_size = 128 << 20;
+ htm->fifo_size = 64 << 10;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (hs_main_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/hs_apps/proxy.c b/src/plugins/hs_apps/proxy.c
index eb0d70277da..e8fedf921a5 100644
--- a/src/plugins/hs_apps/proxy.c
+++ b/src/plugins/hs_apps/proxy.c
@@ -66,27 +66,69 @@ proxy_call_main_thread (vnet_connect_args_t * a)
}
static proxy_session_t *
-proxy_get_active_open (proxy_main_t * pm, session_handle_t handle)
+proxy_session_alloc (void)
{
- proxy_session_t *ps = 0;
- uword *p;
+ proxy_main_t *pm = &proxy_main;
+ proxy_session_t *ps;
+
+ pool_get_zero (pm->sessions, ps);
+ ps->ps_index = ps - pm->sessions;
- p = hash_get (pm->proxy_session_by_active_open_handle, handle);
- if (p)
- ps = pool_elt_at_index (pm->sessions, p[0]);
return ps;
}
-static proxy_session_t *
-proxy_get_passive_open (proxy_main_t * pm, session_handle_t handle)
+static inline proxy_session_t *
+proxy_session_get (u32 ps_index)
+{
+ proxy_main_t *pm = &proxy_main;
+
+ return pool_elt_at_index (pm->sessions, ps_index);
+}
+
+static inline proxy_session_t *
+proxy_session_get_if_valid (u32 ps_index)
+{
+ proxy_main_t *pm = &proxy_main;
+
+ if (pool_is_free_index (pm->sessions, ps_index))
+ return 0;
+ return pool_elt_at_index (pm->sessions, ps_index);
+}
+
+static void
+proxy_session_free (proxy_session_t *ps)
{
+ proxy_main_t *pm = &proxy_main;
+
+ if (CLIB_DEBUG > 0)
+ clib_memset (ps, 0xFE, sizeof (*ps));
+ pool_put (pm->sessions, ps);
+}
+
+static int
+proxy_session_postponed_free_rpc (void *arg)
+{
+ uword ps_index = pointer_to_uword (arg);
+ proxy_main_t *pm = &proxy_main;
proxy_session_t *ps = 0;
- uword *p;
- p = hash_get (pm->proxy_session_by_server_handle, handle);
- if (p)
- ps = pool_elt_at_index (pm->sessions, p[0]);
- return ps;
+ clib_spinlock_lock_if_init (&pm->sessions_lock);
+
+ ps = proxy_session_get (ps_index);
+ segment_manager_dealloc_fifos (ps->server_rx_fifo, ps->server_tx_fifo);
+ proxy_session_free (ps);
+
+ clib_spinlock_unlock_if_init (&pm->sessions_lock);
+
+ return 0;
+}
+
+static void
+proxy_session_postponed_free (proxy_session_t *ps)
+{
+ session_send_rpc_evt_to_thread (ps->po_thread_index,
+ proxy_session_postponed_free_rpc,
+ uword_to_pointer (ps->ps_index, void *));
}
static void
@@ -95,17 +137,13 @@ proxy_try_close_session (session_t * s, int is_active_open)
proxy_main_t *pm = &proxy_main;
proxy_session_t *ps = 0;
vnet_disconnect_args_t _a, *a = &_a;
- session_handle_t handle;
-
- handle = session_handle (s);
clib_spinlock_lock_if_init (&pm->sessions_lock);
+ ps = proxy_session_get (s->opaque);
+
if (is_active_open)
{
- ps = proxy_get_active_open (pm, handle);
- ASSERT (ps != 0);
-
a->handle = ps->vpp_active_open_handle;
a->app_index = pm->active_open_app_index;
vnet_disconnect_session (a);
@@ -122,9 +160,6 @@ proxy_try_close_session (session_t * s, int is_active_open)
}
else
{
- ps = proxy_get_passive_open (pm, handle);
- ASSERT (ps != 0);
-
a->handle = ps->vpp_server_handle;
a->app_index = pm->server_app_index;
vnet_disconnect_session (a);
@@ -146,43 +181,42 @@ proxy_try_close_session (session_t * s, int is_active_open)
}
static void
-proxy_session_free (proxy_session_t * ps)
-{
- proxy_main_t *pm = &proxy_main;
- if (CLIB_DEBUG > 0)
- clib_memset (ps, 0xFE, sizeof (*ps));
- pool_put (pm->sessions, ps);
-}
-
-static void
proxy_try_delete_session (session_t * s, u8 is_active_open)
{
proxy_main_t *pm = &proxy_main;
proxy_session_t *ps = 0;
- session_handle_t handle;
-
- handle = session_handle (s);
clib_spinlock_lock_if_init (&pm->sessions_lock);
+ ps = proxy_session_get (s->opaque);
+
if (is_active_open)
{
- ps = proxy_get_active_open (pm, handle);
- ASSERT (ps != 0);
-
ps->vpp_active_open_handle = SESSION_INVALID_HANDLE;
- hash_unset (pm->proxy_session_by_active_open_handle, handle);
+ /* Revert master thread index change on connect notification */
+ ps->server_rx_fifo->master_thread_index = ps->po_thread_index;
+
+ /* Passive open already cleaned up */
if (ps->vpp_server_handle == SESSION_INVALID_HANDLE)
- proxy_session_free (ps);
+ {
+ ASSERT (s->rx_fifo->refcnt == 1);
+
+ /* The two sides of the proxy on different threads */
+ if (ps->po_thread_index != s->thread_index)
+ {
+ /* This is not the right thread to delete the fifos */
+ s->rx_fifo = 0;
+ s->tx_fifo = 0;
+ proxy_session_postponed_free (ps);
+ }
+ else
+ proxy_session_free (ps);
+ }
}
else
{
- ps = proxy_get_passive_open (pm, handle);
- ASSERT (ps != 0);
-
ps->vpp_server_handle = SESSION_INVALID_HANDLE;
- hash_unset (pm->proxy_session_by_server_handle, handle);
if (ps->vpp_active_open_handle == SESSION_INVALID_HANDLE)
{
@@ -245,12 +279,12 @@ proxy_accept_callback (session_t * s)
clib_spinlock_lock_if_init (&pm->sessions_lock);
- pool_get_zero (pm->sessions, ps);
+ ps = proxy_session_alloc ();
ps->vpp_server_handle = session_handle (s);
ps->vpp_active_open_handle = SESSION_INVALID_HANDLE;
+ ps->po_thread_index = s->thread_index;
- hash_set (pm->proxy_session_by_server_handle, ps->vpp_server_handle,
- ps - pm->sessions);
+ s->opaque = ps->ps_index;
clib_spinlock_unlock_if_init (&pm->sessions_lock);
@@ -303,8 +337,7 @@ proxy_rx_callback (session_t * s)
clib_spinlock_lock_if_init (&pm->sessions_lock);
- ps = proxy_get_passive_open (pm, session_handle (s));
- ASSERT (ps != 0);
+ ps = proxy_session_get (s->opaque);
if (PREDICT_TRUE (ps->vpp_active_open_handle != SESSION_INVALID_HANDLE))
{
@@ -332,7 +365,7 @@ proxy_rx_callback (session_t * s)
{
vnet_connect_args_t _a, *a = &_a;
svm_fifo_t *tx_fifo, *rx_fifo;
- u32 max_dequeue, proxy_index;
+ u32 max_dequeue, ps_index;
int actual_transfer __attribute__ ((unused));
rx_fifo = s->rx_fifo;
@@ -344,7 +377,10 @@ proxy_rx_callback (session_t * s)
max_dequeue = svm_fifo_max_dequeue_cons (s->rx_fifo);
if (PREDICT_FALSE (max_dequeue == 0))
- return 0;
+ {
+ clib_spinlock_unlock_if_init (&pm->sessions_lock);
+ return 0;
+ }
max_dequeue = clib_min (pm->rcv_buffer_size, max_dequeue);
actual_transfer = svm_fifo_peek (rx_fifo, 0 /* relative_offset */ ,
@@ -357,12 +393,12 @@ proxy_rx_callback (session_t * s)
ps->server_rx_fifo = rx_fifo;
ps->server_tx_fifo = tx_fifo;
ps->active_open_establishing = 1;
- proxy_index = ps - pm->sessions;
+ ps_index = ps->ps_index;
clib_spinlock_unlock_if_init (&pm->sessions_lock);
clib_memcpy (&a->sep_ext, &pm->client_sep, sizeof (pm->client_sep));
- a->api_context = proxy_index;
+ a->api_context = ps_index;
a->app_index = pm->active_open_app_index;
if (proxy_transport_needs_crypto (a->sep.transport_proto))
@@ -407,11 +443,10 @@ proxy_tx_callback (session_t * proxy_s)
clib_spinlock_lock_if_init (&pm->sessions_lock);
- ps = proxy_get_passive_open (pm, session_handle (proxy_s));
- ASSERT (ps != 0);
+ ps = proxy_session_get (proxy_s->opaque);
if (ps->vpp_active_open_handle == SESSION_INVALID_HANDLE)
- return 0;
+ goto unlock;
/* Force ack on active open side to update rcv wnd. Make sure it's done on
* the right thread */
@@ -419,6 +454,7 @@ proxy_tx_callback (session_t * proxy_s)
session_send_rpc_evt_to_thread (ps->server_rx_fifo->master_thread_index,
proxy_force_ack, arg);
+unlock:
clib_spinlock_unlock_if_init (&pm->sessions_lock);
return 0;
@@ -442,10 +478,47 @@ static session_cb_vft_t proxy_session_cb_vft = {
.builtin_app_tx_callback = proxy_tx_callback,
.session_reset_callback = proxy_reset_callback,
.session_cleanup_callback = proxy_cleanup_callback,
- .fifo_tuning_callback = common_fifo_tuning_callback
+ .fifo_tuning_callback = common_fifo_tuning_callback,
};
static int
+active_open_alloc_session_fifos (session_t *s)
+{
+ proxy_main_t *pm = &proxy_main;
+ svm_fifo_t *rxf, *txf;
+ proxy_session_t *ps;
+
+ clib_spinlock_lock_if_init (&pm->sessions_lock);
+
+ ps = proxy_session_get (s->opaque);
+
+ txf = ps->server_rx_fifo;
+ rxf = ps->server_tx_fifo;
+
+ /*
+ * Reset the active-open tx-fifo master indices so the active-open session
+ * will receive data, etc.
+ */
+ txf->shr->master_session_index = s->session_index;
+ txf->master_thread_index = s->thread_index;
+
+ /*
+ * Account for the active-open session's use of the fifos
+ * so they won't disappear until the last session which uses
+ * them disappears
+ */
+ rxf->refcnt++;
+ txf->refcnt++;
+
+ clib_spinlock_unlock_if_init (&pm->sessions_lock);
+
+ s->rx_fifo = rxf;
+ s->tx_fifo = txf;
+
+ return 0;
+}
+
+static int
active_open_connected_callback (u32 app_index, u32 opaque,
session_t * s, session_error_t err)
{
@@ -458,7 +531,7 @@ active_open_connected_callback (u32 app_index, u32 opaque,
*/
clib_spinlock_lock_if_init (&pm->sessions_lock);
- ps = pool_elt_at_index (pm->sessions, opaque);
+ ps = proxy_session_get (opaque);
/* Connection failed */
if (err)
@@ -480,33 +553,12 @@ active_open_connected_callback (u32 app_index, u32 opaque,
if (ps->po_disconnected)
{
/* Setup everything for the cleanup notification */
- hash_set (pm->proxy_session_by_active_open_handle,
- ps->vpp_active_open_handle, opaque);
ps->ao_disconnected = 1;
clib_spinlock_unlock_if_init (&pm->sessions_lock);
return -1;
}
- s->tx_fifo = ps->server_rx_fifo;
- s->rx_fifo = ps->server_tx_fifo;
-
- /*
- * Reset the active-open tx-fifo master indices so the active-open session
- * will receive data, etc.
- */
- s->tx_fifo->shr->master_session_index = s->session_index;
- s->tx_fifo->master_thread_index = s->thread_index;
-
- /*
- * Account for the active-open session's use of the fifos
- * so they won't disappear until the last session which uses
- * them disappears
- */
- s->tx_fifo->refcnt++;
- s->rx_fifo->refcnt++;
-
- hash_set (pm->proxy_session_by_active_open_handle,
- ps->vpp_active_open_handle, opaque);
+ s->opaque = opaque;
clib_spinlock_unlock_if_init (&pm->sessions_lock);
@@ -568,11 +620,9 @@ active_open_tx_callback (session_t * ao_s)
{
proxy_main_t *pm = &proxy_main;
transport_connection_t *tc;
- session_handle_t handle;
proxy_session_t *ps;
session_t *proxy_s;
u32 min_free;
- uword *p;
min_free = clib_min (svm_fifo_size (ao_s->tx_fifo) >> 3, 128 << 10);
if (svm_fifo_max_enqueue (ao_s->tx_fifo) < min_free)
@@ -583,17 +633,12 @@ active_open_tx_callback (session_t * ao_s)
clib_spinlock_lock_if_init (&pm->sessions_lock);
- handle = session_handle (ao_s);
- p = hash_get (pm->proxy_session_by_active_open_handle, handle);
- if (!p)
- return 0;
-
- if (pool_is_free_index (pm->sessions, p[0]))
- return 0;
+ ps = proxy_session_get_if_valid (ao_s->opaque);
+ if (!ps)
+ goto unlock;
- ps = pool_elt_at_index (pm->sessions, p[0]);
if (ps->vpp_server_handle == ~0)
- return 0;
+ goto unlock;
proxy_s = session_get_from_handle (ps->vpp_server_handle);
@@ -601,6 +646,7 @@ active_open_tx_callback (session_t * ao_s)
tc = session_get_transport (proxy_s);
tcp_send_ack ((tcp_connection_t *) tc);
+unlock:
clib_spinlock_unlock_if_init (&pm->sessions_lock);
return 0;
@@ -615,7 +661,6 @@ active_open_cleanup_callback (session_t * s, session_cleanup_ntf_t ntf)
proxy_try_delete_session (s, 1 /* is_active_open */ );
}
-/* *INDENT-OFF* */
static session_cb_vft_t active_open_clients = {
.session_reset_callback = active_open_reset_callback,
.session_connected_callback = active_open_connected_callback,
@@ -624,9 +669,9 @@ static session_cb_vft_t active_open_clients = {
.session_cleanup_callback = active_open_cleanup_callback,
.builtin_app_rx_callback = active_open_rx_callback,
.builtin_app_tx_callback = active_open_tx_callback,
- .fifo_tuning_callback = common_fifo_tuning_callback
+ .fifo_tuning_callback = common_fifo_tuning_callback,
+ .proxy_alloc_session_fifos = active_open_alloc_session_fifos,
};
-/* *INDENT-ON* */
static int
proxy_server_attach ()
@@ -634,19 +679,16 @@ proxy_server_attach ()
proxy_main_t *pm = &proxy_main;
u64 options[APP_OPTIONS_N_OPTIONS];
vnet_app_attach_args_t _a, *a = &_a;
- u32 segment_size = 512 << 20;
clib_memset (a, 0, sizeof (*a));
clib_memset (options, 0, sizeof (options));
- if (pm->private_segment_size)
- segment_size = pm->private_segment_size;
a->name = format (0, "proxy-server");
a->api_client_index = pm->server_client_index;
a->session_cb_vft = &proxy_session_cb_vft;
a->options = options;
- a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
- a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size;
+ a->options[APP_OPTIONS_SEGMENT_SIZE] = pm->segment_size;
+ a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = pm->segment_size;
a->options[APP_OPTIONS_RX_FIFO_SIZE] = pm->fifo_size;
a->options[APP_OPTIONS_TX_FIFO_SIZE] = pm->fifo_size;
a->options[APP_OPTIONS_MAX_FIFO_SIZE] = pm->max_fifo_size;
@@ -753,14 +795,12 @@ proxy_server_add_ckpair (void)
static int
proxy_server_create (vlib_main_t * vm)
{
- proxy_main_t *pm = &proxy_main;
vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ proxy_main_t *pm = &proxy_main;
u32 num_threads;
int i;
num_threads = 1 /* main thread */ + vtm->n_threads;
- vec_validate (proxy_main.server_event_queue, num_threads - 1);
- vec_validate (proxy_main.active_open_event_queue, num_threads - 1);
vec_validate (pm->rx_buf, num_threads - 1);
for (i = 0; i < num_threads; i++)
@@ -784,15 +824,6 @@ proxy_server_create (vlib_main_t * vm)
return -1;
}
- for (i = 0; i < num_threads; i++)
- {
- pm->active_open_event_queue[i] = session_main_get_vpp_event_queue (i);
-
- ASSERT (pm->active_open_event_queue[i]);
-
- pm->server_event_queue[i] = session_main_get_vpp_event_queue (i);
- }
-
return 0;
}
@@ -816,7 +847,7 @@ proxy_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
pm->rcv_buffer_size = 1024;
pm->prealloc_fifos = 0;
pm->private_segment_count = 0;
- pm->private_segment_size = 0;
+ pm->segment_size = 512 << 20;
if (vlib_num_workers ())
clib_spinlock_init (&pm->sessions_lock);
@@ -846,13 +877,7 @@ proxy_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
else if (unformat (line_input, "private-segment-size %U",
unformat_memory_size, &tmp64))
{
- if (tmp64 >= 0x100000000ULL)
- {
- error = clib_error_return (
- 0, "private segment size %lld (%llu) too large", tmp64, tmp64);
- goto done;
- }
- pm->private_segment_size = tmp64;
+ pm->segment_size = tmp64;
}
else if (unformat (line_input, "server-uri %s", &server_uri))
vec_add1 (server_uri, 0);
@@ -908,7 +933,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (proxy_create_command, static) =
{
.path = "test proxy server",
@@ -919,7 +943,6 @@ VLIB_CLI_COMMAND (proxy_create_command, static) =
"[private-segment-size <mem>][private-segment-count <nn>]",
.function = proxy_server_create_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
proxy_main_init (vlib_main_t * vm)
@@ -927,8 +950,6 @@ proxy_main_init (vlib_main_t * vm)
proxy_main_t *pm = &proxy_main;
pm->server_client_index = ~0;
pm->active_open_client_index = ~0;
- pm->proxy_session_by_active_open_handle = hash_create (0, sizeof (uword));
- pm->proxy_session_by_server_handle = hash_create (0, sizeof (uword));
return 0;
}
diff --git a/src/plugins/hs_apps/proxy.h b/src/plugins/hs_apps/proxy.h
index aef23e1e556..26f4de2f729 100644
--- a/src/plugins/hs_apps/proxy.h
+++ b/src/plugins/hs_apps/proxy.h
@@ -36,54 +36,41 @@ typedef struct
volatile int active_open_establishing;
volatile int po_disconnected;
volatile int ao_disconnected;
+
+ u32 ps_index;
+ u32 po_thread_index;
} proxy_session_t;
typedef struct
{
- svm_queue_t *vl_input_queue; /**< vpe input queue */
- /** per-thread vectors */
- svm_msg_q_t **server_event_queue;
- svm_msg_q_t **active_open_event_queue;
+ proxy_session_t *sessions; /**< session pool, shared */
+ clib_spinlock_t sessions_lock; /**< lock for session pool */
u8 **rx_buf; /**< intermediate rx buffers */
- u32 cli_node_index; /**< cli process node index */
u32 server_client_index; /**< server API client handle */
u32 server_app_index; /**< server app index */
u32 active_open_client_index; /**< active open API client handle */
u32 active_open_app_index; /**< active open index after attach */
-
- uword *proxy_session_by_server_handle;
- uword *proxy_session_by_active_open_handle;
+ u32 ckpair_index; /**< certkey pair index for tls */
/*
* Configuration params
*/
- u8 *connect_uri; /**< URI for slave's connect */
- u32 configured_segment_size;
u32 fifo_size; /**< initial fifo size */
u32 max_fifo_size; /**< max fifo size */
u8 high_watermark; /**< high watermark (%) */
u8 low_watermark; /**< low watermark (%) */
u32 private_segment_count; /**< Number of private fifo segs */
- u32 private_segment_size; /**< size of private fifo segs */
+ u64 segment_size; /**< size of fifo segs */
+ u8 prealloc_fifos; /**< Request fifo preallocation */
int rcv_buffer_size;
session_endpoint_cfg_t server_sep;
session_endpoint_cfg_t client_sep;
- u32 ckpair_index;
- /*
- * Test state variables
- */
- proxy_session_t *sessions; /**< Session pool, shared */
- clib_spinlock_t sessions_lock;
- u32 **connection_index_by_thread;
- pthread_t client_thread_handle;
-
/*
* Flags
*/
u8 is_init;
- u8 prealloc_fifos; /**< Request fifo preallocation */
} proxy_main_t;
extern proxy_main_t proxy_main;
diff --git a/src/plugins/hs_apps/sapi/vpp_echo.c b/src/plugins/hs_apps/sapi/vpp_echo.c
index 80d274db5b0..08fd4e175e9 100644
--- a/src/plugins/hs_apps/sapi/vpp_echo.c
+++ b/src/plugins/hs_apps/sapi/vpp_echo.c
@@ -84,16 +84,19 @@ stop_signal (int signum)
em->time_to_stop = 1;
}
-int
-connect_to_vpp (char *name)
+static int
+connect_to_vpp (echo_main_t *em)
{
- echo_main_t *em = &echo_main;
api_main_t *am = vlibapi_get_main ();
+ if (em->use_app_socket_api)
+ return echo_api_connect_app_socket (em);
+
if (em->use_sock_api)
{
- if (vl_socket_client_connect ((char *) em->socket_name, name,
- 0 /* default rx, tx buffer */ ))
+ if (vl_socket_client_connect ((char *) em->socket_name,
+ (char *) em->app_name,
+ 0 /* default rx, tx buffer */))
{
ECHO_FAIL (ECHO_FAIL_SOCKET_CONNECT, "socket connect failed");
return -1;
@@ -107,7 +110,8 @@ connect_to_vpp (char *name)
}
else
{
- if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0)
+ if (vl_client_connect_to_vlib ("/vpe-api", (char *) em->app_name, 32) <
+ 0)
{
ECHO_FAIL (ECHO_FAIL_SHMEM_CONNECT, "shmem connect failed");
return -1;
@@ -286,13 +290,11 @@ echo_free_sessions (echo_main_t * em)
echo_session_t *s;
u32 *session_indexes = 0, *session_index;
- /* *INDENT-OFF* */
pool_foreach (s, em->sessions)
{
if (s->session_state == ECHO_SESSION_STATE_CLOSED)
vec_add1 (session_indexes, s->session_index);
}
- /* *INDENT-ON* */
vec_foreach (session_index, session_indexes)
{
/* Free session */
@@ -725,9 +727,18 @@ session_reset_handler (session_reset_msg_t * mp)
app_send_ctrl_evt_to_vpp (s->vpp_evt_q, app_evt);
}
+static int
+echo_recv_fd (echo_main_t *em, int *fds, int n_fds)
+{
+ if (em->use_app_socket_api)
+ return echo_sapi_recv_fd (em, fds, n_fds);
+ return echo_bapi_recv_fd (em, fds, n_fds);
+}
+
static void
add_segment_handler (session_app_add_segment_msg_t * mp)
{
+ echo_main_t *em = &echo_main;
fifo_segment_main_t *sm = &echo_main.segment_main;
fifo_segment_create_args_t _a, *a = &_a;
int *fds = 0, i;
@@ -737,10 +748,10 @@ add_segment_handler (session_app_add_segment_msg_t * mp)
if (mp->fd_flags & SESSION_FD_F_MEMFD_SEGMENT)
{
vec_validate (fds, 1);
- if (vl_socket_client_recv_fd_msg (fds, 1, 5))
+ if (echo_recv_fd (em, fds, 1))
{
- ECHO_FAIL (ECHO_FAIL_VL_API_RECV_FD_MSG,
- "vl_socket_client_recv_fd_msg failed");
+ ECHO_LOG (0, "echo_recv_fd failed");
+ em->time_to_stop = 1;
goto failed;
}
@@ -1112,6 +1123,8 @@ echo_process_opts (int argc, char **argv)
em->test_return_packets = RETURN_PACKETS_LOG_WRONG;
else if (unformat (a, "socket-name %s", &em->socket_name))
;
+ else if (unformat (a, "use-app-socket-api"))
+ em->use_app_socket_api = 1;
else if (unformat (a, "use-svm-api"))
em->use_sock_api = 0;
else if (unformat (a, "fifo-size %U", unformat_memory_size, &tmp))
@@ -1228,6 +1241,15 @@ echo_process_opts (int argc, char **argv)
}
}
+static int
+echo_needs_crypto (echo_main_t *em)
+{
+ u8 tr = em->uri_elts.transport_proto;
+ if (tr == TRANSPORT_PROTO_QUIC || tr == TRANSPORT_PROTO_TLS)
+ return 1;
+ return 0;
+}
+
void
echo_process_uri (echo_main_t * em)
{
@@ -1260,13 +1282,91 @@ vpp_echo_init ()
clib_memset (em, 0, sizeof (*em));
}
+static int
+echo_detach (echo_main_t *em)
+{
+ if (em->use_app_socket_api)
+ return echo_sapi_detach (em);
+
+ echo_send_detach (em);
+ if (wait_for_state_change (em, STATE_DETACHED, TIMEOUT))
+ {
+ ECHO_FAIL (ECHO_FAIL_DETACH, "Couldn't detach from vpp");
+ return -1;
+ }
+ return 0;
+}
+
+static void
+echo_add_cert_key (echo_main_t *em)
+{
+ if (em->use_app_socket_api)
+ echo_sapi_add_cert_key (em);
+ else
+ {
+ echo_send_add_cert_key (em);
+ if (wait_for_state_change (em, STATE_ATTACHED, TIMEOUT))
+ {
+ ECHO_FAIL (ECHO_FAIL_APP_ATTACH,
+ "Couldn't add crypto context to vpp\n");
+ exit (1);
+ }
+ }
+}
+
+static int
+echo_del_cert_key (echo_main_t *em)
+{
+ if (em->use_app_socket_api)
+ return echo_sapi_del_cert_key (em);
+
+ echo_send_del_cert_key (em);
+ if (wait_for_state_change (em, STATE_CLEANED_CERT_KEY, TIMEOUT))
+ {
+ ECHO_FAIL (ECHO_FAIL_DEL_CERT_KEY, "Couldn't cleanup cert and key");
+ return -1;
+ }
+ return 0;
+}
+
+static void
+echo_disconnect (echo_main_t *em)
+{
+ if (em->use_app_socket_api)
+ return;
+
+ if (em->use_sock_api)
+ vl_socket_client_disconnect ();
+ else
+ vl_client_disconnect_from_vlib ();
+}
+
+static int
+echo_attach (echo_main_t *em)
+{
+ if (em->use_app_socket_api)
+ return echo_sapi_attach (em);
+ else
+ {
+ echo_api_hookup (em);
+ echo_send_attach (em);
+ if (wait_for_state_change (em, STATE_ATTACHED_NO_CERT, TIMEOUT))
+ {
+ ECHO_FAIL (ECHO_FAIL_ATTACH_TO_VPP,
+ "Couldn't attach to vpp, did you run <session enable> ?");
+ return -1;
+ }
+ }
+ return 0;
+}
+
int
main (int argc, char **argv)
{
echo_main_t *em = &echo_main;
fifo_segment_main_t *sm = &em->segment_main;
- char *app_name;
u64 i;
+ int *rv;
svm_msg_q_cfg_t _cfg, *cfg = &_cfg;
u32 rpc_queue_size = 256 << 10;
@@ -1329,11 +1429,9 @@ main (int argc, char **argv)
for (i = 0; i < em->tx_buf_size; i++)
em->connect_test_data[i] = i & 0xff;
- /* *INDENT-OFF* */
svm_msg_q_ring_cfg_t rc[1] = {
{rpc_queue_size, sizeof (echo_rpc_msg_t), 0},
};
- /* *INDENT-ON* */
cfg->consumer_pid = getpid ();
cfg->n_rings = 1;
cfg->q_nitems = rpc_queue_size;
@@ -1344,8 +1442,10 @@ main (int argc, char **argv)
signal (SIGQUIT, stop_signal);
signal (SIGTERM, stop_signal);
- app_name = em->i_am_master ? "echo_server" : "echo_client";
- if (connect_to_vpp (app_name))
+ em->app_name =
+ format (0, "%s%c", em->i_am_master ? "echo_server" : "echo_client", 0);
+
+ if (connect_to_vpp (em))
{
svm_region_exit ();
ECHO_FAIL (ECHO_FAIL_CONNECT_TO_VPP, "Couldn't connect to vpp");
@@ -1355,34 +1455,22 @@ main (int argc, char **argv)
echo_session_prealloc (em);
echo_notify_event (em, ECHO_EVT_START);
- echo_api_hookup (em);
+ if (echo_attach (em))
+ goto exit_on_error;
- echo_send_attach (em);
- if (wait_for_state_change (em, STATE_ATTACHED_NO_CERT, TIMEOUT))
+ if (echo_needs_crypto (em))
{
- ECHO_FAIL (ECHO_FAIL_ATTACH_TO_VPP,
- "Couldn't attach to vpp, did you run <session enable> ?");
- goto exit_on_error;
+ ECHO_LOG (2, "Adding crypto context %U", echo_format_crypto_engine,
+ em->crypto_engine);
+ echo_add_cert_key (em);
}
-
- if (em->uri_elts.transport_proto != TRANSPORT_PROTO_QUIC
- && em->uri_elts.transport_proto != TRANSPORT_PROTO_TLS)
- em->state = STATE_ATTACHED;
else
{
- ECHO_LOG (2, "Adding crypto context %U", echo_format_crypto_engine,
- em->crypto_engine);
- echo_send_add_cert_key (em);
- if (wait_for_state_change (em, STATE_ATTACHED, TIMEOUT))
- {
- ECHO_FAIL (ECHO_FAIL_APP_ATTACH,
- "Couldn't add crypto context to vpp\n");
- exit (1);
- }
+ em->state = STATE_ATTACHED;
}
- if (pthread_create (&em->mq_thread_handle,
- NULL /*attr */ , echo_mq_thread_fn, 0))
+ if (pthread_create (&em->mq_thread_handle, NULL /*attr */, echo_mq_thread_fn,
+ 0))
{
ECHO_FAIL (ECHO_FAIL_PTHREAD_CREATE, "pthread create errored");
goto exit_on_error;
@@ -1402,30 +1490,22 @@ main (int argc, char **argv)
clients_run (em);
echo_notify_event (em, ECHO_EVT_EXIT);
echo_free_sessions (em);
- echo_send_del_cert_key (em);
- if (wait_for_state_change (em, STATE_CLEANED_CERT_KEY, TIMEOUT))
+ if (echo_needs_crypto (em))
{
- ECHO_FAIL (ECHO_FAIL_DEL_CERT_KEY, "Couldn't cleanup cert and key");
- goto exit_on_error;
+ if (echo_del_cert_key (em))
+ goto exit_on_error;
}
- echo_send_detach (em);
- if (wait_for_state_change (em, STATE_DETACHED, TIMEOUT))
- {
- ECHO_FAIL (ECHO_FAIL_DETACH, "Couldn't detach from vpp");
- goto exit_on_error;
- }
- int *rv;
+ if (echo_detach (em))
+ goto exit_on_error;
+
pthread_join (em->mq_thread_handle, (void **) &rv);
if (rv)
{
ECHO_FAIL (ECHO_FAIL_MQ_PTHREAD, "mq pthread errored %d", rv);
goto exit_on_error;
}
- if (em->use_sock_api)
- vl_socket_client_disconnect ();
- else
- vl_client_disconnect_from_vlib ();
+ echo_disconnect (em);
echo_assert_test_suceeded (em);
exit_on_error:
ECHO_LOG (1, "Test complete !\n");
diff --git a/src/plugins/hs_apps/sapi/vpp_echo_bapi.c b/src/plugins/hs_apps/sapi/vpp_echo_bapi.c
index 38fb522351c..868cc3a0591 100644
--- a/src/plugins/hs_apps/sapi/vpp_echo_bapi.c
+++ b/src/plugins/hs_apps/sapi/vpp_echo_bapi.c
@@ -103,6 +103,19 @@ echo_send_del_cert_key (echo_main_t * em)
vl_msg_api_send_shmem (em->vl_input_queue, (u8 *) & bmp);
}
+int
+echo_bapi_recv_fd (echo_main_t *em, int *fds, int n_fds)
+{
+ clib_error_t *err;
+ err = vl_socket_client_recv_fd_msg (fds, n_fds, 5);
+ if (err)
+ {
+ clib_error_report (err);
+ return -1;
+ }
+ return 0;
+}
+
static u8
echo_transport_needs_crypto (transport_proto_t proto)
{
@@ -265,11 +278,11 @@ echo_segment_lookup (u64 segment_handle)
clib_spinlock_lock (&em->segment_handles_lock);
segment_idxp = hash_get (em->shared_segment_handles, segment_handle);
clib_spinlock_unlock (&em->segment_handles_lock);
- if (!segment_idxp)
- return ~0;
+ if (segment_idxp)
+ return ((u32) *segment_idxp);
ECHO_LOG (2, "Segment not mapped (0x%lx)", segment_handle);
- return ((u32) *segment_idxp);
+ return ~0;
}
void
@@ -543,11 +556,14 @@ _(APPLICATION_DETACH_REPLY, application_detach_reply) \
_(APP_ADD_CERT_KEY_PAIR_REPLY, app_add_cert_key_pair_reply) \
_(APP_DEL_CERT_KEY_PAIR_REPLY, app_del_cert_key_pair_reply)
-#define vl_print(handle, ...) fformat (handle, __VA_ARGS__)
#define vl_endianfun
#include <vnet/session/session.api.h>
#undef vl_endianfun
+#define vl_calcsizefun
+#include <vnet/session/session.api.h>
+#undef vl_calcsizefun
+
#define vl_printfun
#include <vnet/session/session.api.h>
#undef vl_printfun
@@ -569,10 +585,18 @@ echo_api_hookup (echo_main_t * em)
return;
#define _(N, n) \
- vl_msg_api_set_handlers (REPLY_MSG_ID_BASE + VL_API_##N, #n, \
- vl_api_##n##_t_handler, vl_noop_handler, \
- vl_api_##n##_t_endian, vl_api_##n##_t_print, \
- sizeof (vl_api_##n##_t), 1);
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){ \
+ .id = REPLY_MSG_ID_BASE + VL_API_##N, \
+ .name = #n, \
+ .handler = vl_api_##n##_t_handler, \
+ .endian = vl_api_##n##_t_endian, \
+ .format_fn = vl_api_##n##_t_format, \
+ .size = sizeof (vl_api_##n##_t), \
+ .traced = 1, \
+ .tojson = vl_api_##n##_t_tojson, \
+ .fromjson = vl_api_##n##_t_fromjson, \
+ .calc_size = vl_api_##n##_t_calc_size, \
+ });
foreach_quic_echo_msg;
#undef _
}
diff --git a/src/plugins/hs_apps/sapi/vpp_echo_common.h b/src/plugins/hs_apps/sapi/vpp_echo_common.h
index 80c539ccb0f..9b2a2c677b5 100644
--- a/src/plugins/hs_apps/sapi/vpp_echo_common.h
+++ b/src/plugins/hs_apps/sapi/vpp_echo_common.h
@@ -26,6 +26,9 @@
#define LOG_EVERY_N_IDLE_CYCLES (1e8)
#define ECHO_MQ_SEG_HANDLE ((u64) ~0 - 1)
+#define ECHO_INVALID_SEGMENT_INDEX ((u32) ~0)
+#define ECHO_INVALID_SEGMENT_HANDLE ((u64) ~0)
+
#define foreach_echo_fail_code \
_(ECHO_FAIL_NONE, "ECHO_FAIL_NONE") \
_(ECHO_FAIL_USAGE, "ECHO_FAIL_USAGE") \
@@ -269,6 +272,7 @@ typedef struct
svm_queue_t *vl_input_queue; /* vpe input queue */
u32 my_client_index; /* API client handle */
u8 *uri; /* The URI we're playing with */
+ u8 *app_name;
u32 n_uris; /* Cycle through adjacent ips */
ip46_address_t lcl_ip; /* Local ip for client */
u8 lcl_ip_set;
@@ -277,6 +281,8 @@ typedef struct
svm_msg_q_t *ctrl_mq; /* Our control queue (towards vpp) */
clib_time_t clib_time; /* For deadman timers */
u8 *socket_name;
+ u8 use_app_socket_api;
+ clib_socket_t app_api_sock;
int i_am_master;
u32 *listen_session_indexes; /* vec of vpp listener sessions */
volatile u32 listen_session_cnt;
@@ -449,6 +455,15 @@ void echo_send_disconnect_session (echo_main_t * em, void *args);
void echo_api_hookup (echo_main_t * em);
void echo_send_add_cert_key (echo_main_t * em);
void echo_send_del_cert_key (echo_main_t * em);
+int echo_bapi_recv_fd (echo_main_t *em, int *fds, int n_fds);
+
+/* Session socket API */
+int echo_sapi_attach (echo_main_t *em);
+int echo_sapi_add_cert_key (echo_main_t *em);
+int echo_sapi_del_cert_key (echo_main_t *em);
+int echo_api_connect_app_socket (echo_main_t *em);
+int echo_sapi_detach (echo_main_t *em);
+int echo_sapi_recv_fd (echo_main_t *em, int *fds, int n_fds);
#endif /* __included_vpp_echo_common_h__ */
diff --git a/src/plugins/hs_apps/sapi/vpp_echo_proto_quic.c b/src/plugins/hs_apps/sapi/vpp_echo_proto_quic.c
index c67b35fd8e6..1b0dbf33e29 100644
--- a/src/plugins/hs_apps/sapi/vpp_echo_proto_quic.c
+++ b/src/plugins/hs_apps/sapi/vpp_echo_proto_quic.c
@@ -239,7 +239,6 @@ quic_echo_initiate_qsession_close_no_stream (echo_main_t * em)
/* Close Quic session without streams */
echo_session_t *s;
- /* *INDENT-OFF* */
pool_foreach (s, em->sessions)
{
if (s->session_type == ECHO_SESSION_TYPE_QUIC)
@@ -261,7 +260,6 @@ quic_echo_initiate_qsession_close_no_stream (echo_main_t * em)
ECHO_LOG (2,"%U: PASSIVE close", echo_format_session, s);
}
}
- /* *INDENT-ON* */
}
static void
diff --git a/src/plugins/hs_apps/sapi/vpp_echo_sapi.c b/src/plugins/hs_apps/sapi/vpp_echo_sapi.c
new file mode 100644
index 00000000000..a21fbea6183
--- /dev/null
+++ b/src/plugins/hs_apps/sapi/vpp_echo_sapi.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <hs_apps/sapi/vpp_echo_common.h>
+
+int
+echo_api_connect_app_socket (echo_main_t *em)
+{
+ clib_socket_t *cs = &em->app_api_sock;
+ clib_error_t *err;
+ int rv = 0;
+
+ cs->config = (char *) em->socket_name;
+ cs->flags =
+ CLIB_SOCKET_F_IS_CLIENT | CLIB_SOCKET_F_SEQPACKET | CLIB_SOCKET_F_BLOCKING;
+
+ if ((err = clib_socket_init (cs)))
+ {
+ clib_error_report (err);
+ rv = -1;
+ }
+
+ return rv;
+}
+
+static inline u64
+echo_vpp_worker_segment_handle (u32 wrk_index)
+{
+ return (ECHO_INVALID_SEGMENT_HANDLE - wrk_index - 1);
+}
+
+static int
+echo_segment_discover_mqs (uword segment_handle, int *fds, u32 n_fds)
+{
+ echo_main_t *em = &echo_main;
+ fifo_segment_t *fs;
+ u32 fs_index;
+
+ fs_index = echo_segment_lookup (segment_handle);
+ if (fs_index == ECHO_INVALID_SEGMENT_INDEX)
+ {
+ ECHO_LOG (0, "ERROR: mq segment %lx for is not attached!",
+ segment_handle);
+ return -1;
+ }
+
+ clib_spinlock_lock (&em->segment_handles_lock);
+
+ fs = fifo_segment_get_segment (&em->segment_main, fs_index);
+ fifo_segment_msg_qs_discover (fs, fds, n_fds);
+
+ clib_spinlock_unlock (&em->segment_handles_lock);
+
+ return 0;
+}
+
+static int
+echo_api_attach_reply_handler (app_sapi_attach_reply_msg_t *mp, int *fds)
+{
+ echo_main_t *em = &echo_main;
+ int i, rv, n_fds_used = 0;
+ u64 segment_handle;
+ u8 *segment_name;
+
+ if (mp->retval)
+ {
+ ECHO_LOG (0, "attach failed: %U", format_session_error, mp->retval);
+ goto failed;
+ }
+
+ em->my_client_index = mp->api_client_handle;
+ segment_handle = mp->segment_handle;
+ if (segment_handle == ECHO_INVALID_SEGMENT_HANDLE)
+ {
+ ECHO_LOG (0, "invalid segment handle");
+ goto failed;
+ }
+
+ if (!mp->n_fds)
+ goto failed;
+
+ if (mp->fd_flags & SESSION_FD_F_VPP_MQ_SEGMENT)
+ if (echo_segment_attach (echo_vpp_worker_segment_handle (0), "vpp-mq-seg",
+ SSVM_SEGMENT_MEMFD, fds[n_fds_used++]))
+ goto failed;
+
+ if (mp->fd_flags & SESSION_FD_F_MEMFD_SEGMENT)
+ {
+ segment_name = format (0, "memfd-%ld%c", segment_handle, 0);
+ rv = echo_segment_attach (segment_handle, (char *) segment_name,
+ SSVM_SEGMENT_MEMFD, fds[n_fds_used++]);
+ vec_free (segment_name);
+ if (rv != 0)
+ goto failed;
+ }
+
+ echo_segment_attach_mq (segment_handle, mp->app_mq, 0, &em->app_mq);
+
+ if (mp->fd_flags & SESSION_FD_F_MQ_EVENTFD)
+ {
+ ECHO_LOG (0, "SESSION_FD_F_MQ_EVENTFD unsupported!");
+ goto failed;
+ }
+
+ echo_segment_discover_mqs (echo_vpp_worker_segment_handle (0),
+ fds + n_fds_used, mp->n_fds - n_fds_used);
+ echo_segment_attach_mq (echo_vpp_worker_segment_handle (0), mp->vpp_ctrl_mq,
+ mp->vpp_ctrl_mq_thread, &em->ctrl_mq);
+
+ em->state = STATE_ATTACHED_NO_CERT;
+ return 0;
+
+failed:
+
+ for (i = clib_max (n_fds_used - 1, 0); i < mp->n_fds; i++)
+ close (fds[i]);
+
+ return -1;
+}
+
+static int
+echo_api_send_attach (clib_socket_t *cs)
+{
+ echo_main_t *em = &echo_main;
+ app_sapi_msg_t msg = { 0 };
+ app_sapi_attach_msg_t *mp = &msg.attach;
+ clib_error_t *err;
+
+ clib_memcpy (&mp->name, em->app_name, vec_len (em->app_name));
+ mp->options[APP_OPTIONS_FLAGS] =
+ APP_OPTIONS_FLAGS_ACCEPT_REDIRECT | APP_OPTIONS_FLAGS_ADD_SEGMENT;
+ mp->options[APP_OPTIONS_SEGMENT_SIZE] = 256 << 20;
+ mp->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = 128 << 20;
+ mp->options[APP_OPTIONS_RX_FIFO_SIZE] = em->fifo_size;
+ mp->options[APP_OPTIONS_TX_FIFO_SIZE] = em->fifo_size;
+ mp->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = em->prealloc_fifo_pairs;
+ mp->options[APP_OPTIONS_EVT_QUEUE_SIZE] = em->evt_q_size;
+
+ msg.type = APP_SAPI_MSG_TYPE_ATTACH;
+ err = clib_socket_sendmsg (cs, &msg, sizeof (msg), 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+echo_sapi_attach (echo_main_t *em)
+{
+ app_sapi_msg_t _rmp, *rmp = &_rmp;
+ clib_error_t *err;
+ clib_socket_t *cs;
+ int fds[32];
+
+ cs = &em->app_api_sock;
+ if (echo_api_send_attach (cs))
+ return -1;
+
+ /*
+ * Wait for attach reply
+ */
+ err = clib_socket_recvmsg (cs, rmp, sizeof (*rmp), fds, ARRAY_LEN (fds));
+ if (err)
+ {
+ clib_error_report (err);
+ return -1;
+ }
+
+ if (rmp->type != APP_SAPI_MSG_TYPE_ATTACH_REPLY)
+ return -1;
+
+ return echo_api_attach_reply_handler (&rmp->attach_reply, fds);
+}
+
+int
+echo_sapi_add_cert_key (echo_main_t *em)
+{
+ u32 cert_len = test_srv_crt_rsa_len;
+ u32 key_len = test_srv_key_rsa_len;
+ u32 certkey_len = cert_len + key_len;
+ app_sapi_msg_t _msg = { 0 }, *msg = &_msg;
+ app_sapi_cert_key_add_del_msg_t *mp;
+ app_sapi_msg_t _rmp, *rmp = &_rmp;
+ clib_error_t *err;
+ clib_socket_t *cs;
+ u8 *certkey = 0;
+ int rv = -1;
+
+ msg->type = APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY;
+ mp = &msg->cert_key_add_del;
+ mp->context = ntohl (0xfeedface);
+ mp->cert_len = cert_len;
+ mp->certkey_len = certkey_len;
+ mp->is_add = 1;
+
+ vec_validate (certkey, certkey_len - 1);
+ clib_memcpy_fast (certkey, test_srv_crt_rsa, cert_len);
+ clib_memcpy_fast (certkey + cert_len, test_srv_key_rsa, key_len);
+
+ cs = &em->app_api_sock;
+ err = clib_socket_sendmsg (cs, msg, sizeof (*msg), 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ goto done;
+ }
+
+ err = clib_socket_sendmsg (cs, certkey, certkey_len, 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ goto done;
+ }
+
+ /*
+ * Wait for reply and process it
+ */
+ err = clib_socket_recvmsg (cs, rmp, sizeof (*rmp), 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ goto done;
+ }
+
+ if (rmp->type != APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY_REPLY)
+ {
+ ECHO_LOG (0, "unexpected reply type %u", rmp->type);
+ goto done;
+ }
+
+ if (!rmp->cert_key_add_del_reply.retval)
+ rv = rmp->cert_key_add_del_reply.index;
+
+ em->state = STATE_ATTACHED;
+ em->ckpair_index = rv;
+
+done:
+
+ return rv;
+}
+
+int
+echo_sapi_recv_fd (echo_main_t *em, int *fds, int n_fds)
+{
+ app_sapi_msg_t _msg, *msg = &_msg;
+ clib_error_t *err =
+ clib_socket_recvmsg (&em->app_api_sock, msg, sizeof (*msg), fds, n_fds);
+ if (err)
+ {
+ clib_error_report (err);
+ return -1;
+ }
+ return 0;
+}
+
+int
+echo_sapi_detach (echo_main_t *em)
+{
+ clib_socket_t *cs = &em->app_api_sock;
+ clib_socket_close (cs);
+ em->state = STATE_DETACHED;
+ return 0;
+}
+
+int
+echo_sapi_del_cert_key (echo_main_t *em)
+{
+ app_sapi_msg_t _msg = { 0 }, *msg = &_msg;
+ app_sapi_cert_key_add_del_msg_t *mp;
+ app_sapi_msg_t _rmp, *rmp = &_rmp;
+ clib_error_t *err;
+ clib_socket_t *cs;
+
+ msg->type = APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY;
+ mp = &msg->cert_key_add_del;
+ mp->index = em->ckpair_index;
+
+ cs = &em->app_api_sock;
+ err = clib_socket_sendmsg (cs, msg, sizeof (*msg), 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ return -1;
+ }
+
+ /*
+ * Wait for reply and process it
+ */
+ err = clib_socket_recvmsg (cs, rmp, sizeof (*rmp), 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ return -1;
+ }
+
+ if (rmp->type != APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY_REPLY)
+ {
+ ECHO_LOG (0, "unexpected reply type %u", rmp->type);
+ return -1;
+ }
+
+ if (rmp->cert_key_add_del_reply.retval)
+ return -1;
+
+ em->state = STATE_CLEANED_CERT_KEY;
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/hs_apps/vcl/sock_test_client.c b/src/plugins/hs_apps/vcl/sock_test_client.c
index 35252da21bc..c8815692184 100644
--- a/src/plugins/hs_apps/vcl/sock_test_client.c
+++ b/src/plugins/hs_apps/vcl/sock_test_client.c
@@ -46,17 +46,17 @@ sock_test_cfg_sync (vcl_test_session_t * socket)
{
sock_client_main_t *scm = &sock_client_main;
vcl_test_session_t *ctrl = &scm->ctrl_socket;
- vcl_test_cfg_t *rl_cfg = (vcl_test_cfg_t *) socket->rxbuf;
+ hs_test_cfg_t *rl_cfg = (hs_test_cfg_t *) socket->rxbuf;
int rx_bytes, tx_bytes;
if (socket->cfg.verbose)
- vcl_test_cfg_dump (&socket->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&socket->cfg, 1 /* is_client */);
ctrl->cfg.seq_num = ++scm->cfg_seq_num;
if (socket->cfg.verbose)
{
stinf ("(fd %d): Sending config sent to server.\n", socket->fd);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
tx_bytes = sock_test_write (socket->fd, (uint8_t *) & ctrl->cfg,
sizeof (ctrl->cfg), NULL, ctrl->cfg.verbose);
@@ -64,21 +64,21 @@ sock_test_cfg_sync (vcl_test_session_t * socket)
stabrt ("(fd %d): write test cfg failed (%d)!", socket->fd, tx_bytes);
rx_bytes = sock_test_read (socket->fd, (uint8_t *) socket->rxbuf,
- sizeof (vcl_test_cfg_t), NULL);
+ sizeof (hs_test_cfg_t), NULL);
if (rx_bytes < 0)
return rx_bytes;
- if (rl_cfg->magic != VCL_TEST_CFG_CTRL_MAGIC)
+ if (rl_cfg->magic != HS_TEST_CFG_CTRL_MAGIC)
stabrt ("(fd %d): Bad server reply cfg -- aborting!\n", socket->fd);
- if ((rx_bytes != sizeof (vcl_test_cfg_t))
- || !vcl_test_cfg_verify (rl_cfg, &ctrl->cfg))
+ if ((rx_bytes != sizeof (hs_test_cfg_t)) ||
+ !hs_test_cfg_verify (rl_cfg, &ctrl->cfg))
stabrt ("(fd %d): Invalid config received from server!\n", socket->fd);
if (socket->cfg.verbose)
{
stinf ("(fd %d): Got config back from server.", socket->fd);
- vcl_test_cfg_dump (rl_cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (rl_cfg, 1 /* is_client */);
}
ctrl->cfg.ctrl_handle = ((ctrl->cfg.ctrl_handle == ~0) ?
rl_cfg->ctrl_handle : ctrl->cfg.ctrl_handle);
@@ -263,27 +263,25 @@ echo_test_client (void)
vcl_test_stats_dump ("CLIENT RESULTS", &ctrl->stats,
1 /* show_rx */ , 1 /* show tx */ ,
ctrl->cfg.verbose);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
if (ctrl->cfg.verbose > 1)
{
- stinf (" ctrl socket info\n"
- VCL_TEST_SEPARATOR_STRING
+ stinf (" ctrl socket info\n" HS_TEST_SEPARATOR_STRING
" fd: %d (0x%08x)\n"
" rxbuf: %p\n"
" rxbuf size: %u (0x%08x)\n"
" txbuf: %p\n"
- " txbuf size: %u (0x%08x)\n"
- VCL_TEST_SEPARATOR_STRING,
- ctrl->fd, (uint32_t) ctrl->fd,
- ctrl->rxbuf, ctrl->rxbuf_size, ctrl->rxbuf_size,
- ctrl->txbuf, ctrl->txbuf_size, ctrl->txbuf_size);
+ " txbuf size: %u (0x%08x)\n" HS_TEST_SEPARATOR_STRING,
+ ctrl->fd, (uint32_t) ctrl->fd, ctrl->rxbuf, ctrl->rxbuf_size,
+ ctrl->rxbuf_size, ctrl->txbuf, ctrl->txbuf_size,
+ ctrl->txbuf_size);
}
}
}
static void
-stream_test_client (vcl_test_t test)
+stream_test_client (hs_test_t test)
{
sock_client_main_t *scm = &sock_client_main;
vcl_test_session_t *ctrl = &scm->ctrl_socket;
@@ -292,7 +290,7 @@ stream_test_client (vcl_test_t test)
uint32_t i, n;
fd_set wr_fdset, rd_fdset;
fd_set _wfdset, *wfdset = &_wfdset;
- fd_set _rfdset, *rfdset = (test == VCL_TEST_TYPE_BI) ? &_rfdset : 0;
+ fd_set _rfdset, *rfdset = (test == HS_TEST_TYPE_BI) ? &_rfdset : 0;
ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size;
ctrl->cfg.ctrl_handle = ~0;
@@ -300,7 +298,7 @@ stream_test_client (vcl_test_t test)
stinf ("\n" SOCK_TEST_BANNER_STRING
"CLIENT (fd %d): %s-directional Stream Test!\n\n"
"CLIENT (fd %d): Sending config to server on ctrl socket...\n",
- ctrl->fd, test == VCL_TEST_TYPE_BI ? "Bi" : "Uni", ctrl->fd);
+ ctrl->fd, test == HS_TEST_TYPE_BI ? "Bi" : "Uni", ctrl->fd);
if (sock_test_cfg_sync (ctrl))
stabrt ("test cfg sync failed -- aborting!");
@@ -352,8 +350,7 @@ stream_test_client (vcl_test_t test)
(tsock->stats.stop.tv_nsec == 0)))
continue;
- if ((test == VCL_TEST_TYPE_BI) &&
- FD_ISSET (tsock->fd, rfdset) &&
+ if ((test == HS_TEST_TYPE_BI) && FD_ISSET (tsock->fd, rfdset) &&
(tsock->stats.rx_bytes < ctrl->cfg.total_bytes))
{
(void) sock_test_read (tsock->fd,
@@ -372,9 +369,9 @@ stream_test_client (vcl_test_t test)
tsock->fd);
}
- if (((test == VCL_TEST_TYPE_UNI) &&
+ if (((test == HS_TEST_TYPE_UNI) &&
(tsock->stats.tx_bytes >= ctrl->cfg.total_bytes)) ||
- ((test == VCL_TEST_TYPE_BI) &&
+ ((test == HS_TEST_TYPE_BI) &&
(tsock->stats.rx_bytes >= ctrl->cfg.total_bytes)))
{
clock_gettime (CLOCK_REALTIME, &tsock->stats.stop);
@@ -399,40 +396,39 @@ stream_test_client (vcl_test_t test)
snprintf (buf, sizeof (buf), "CLIENT (fd %d) RESULTS", tsock->fd);
vcl_test_stats_dump (buf, &tsock->stats,
- test == VCL_TEST_TYPE_BI /* show_rx */ ,
- 1 /* show tx */ , ctrl->cfg.verbose);
+ test == HS_TEST_TYPE_BI /* show_rx */,
+ 1 /* show tx */, ctrl->cfg.verbose);
}
vcl_test_stats_accumulate (&ctrl->stats, &tsock->stats);
}
vcl_test_stats_dump ("CLIENT RESULTS", &ctrl->stats,
- test == VCL_TEST_TYPE_BI /* show_rx */ ,
- 1 /* show tx */ , ctrl->cfg.verbose);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ test == HS_TEST_TYPE_BI /* show_rx */, 1 /* show tx */,
+ ctrl->cfg.verbose);
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
if (ctrl->cfg.verbose)
{
- stinf (" ctrl socket info\n"
- VCL_TEST_SEPARATOR_STRING
+ stinf (" ctrl socket info\n" HS_TEST_SEPARATOR_STRING
" fd: %d (0x%08x)\n"
" rxbuf: %p\n"
" rxbuf size: %u (0x%08x)\n"
" txbuf: %p\n"
- " txbuf size: %u (0x%08x)\n"
- VCL_TEST_SEPARATOR_STRING,
- ctrl->fd, (uint32_t) ctrl->fd,
- ctrl->rxbuf, ctrl->rxbuf_size, ctrl->rxbuf_size,
- ctrl->txbuf, ctrl->txbuf_size, ctrl->txbuf_size);
+ " txbuf size: %u (0x%08x)\n" HS_TEST_SEPARATOR_STRING,
+ ctrl->fd, (uint32_t) ctrl->fd, ctrl->rxbuf, ctrl->rxbuf_size,
+ ctrl->rxbuf_size, ctrl->txbuf, ctrl->txbuf_size,
+ ctrl->txbuf_size);
}
- ctrl->cfg.test = VCL_TEST_TYPE_ECHO;
+ ctrl->cfg.test = HS_TEST_TYPE_ECHO;
if (sock_test_cfg_sync (ctrl))
stabrt ("post-test cfg sync failed!");
- stinf ("(fd %d): %s-directional Stream Test Complete!\n"
- SOCK_TEST_BANNER_STRING "\n", ctrl->fd,
- test == VCL_TEST_TYPE_BI ? "Bi" : "Uni");
+ stinf (
+ "(fd %d): %s-directional Stream Test Complete!\n" SOCK_TEST_BANNER_STRING
+ "\n",
+ ctrl->fd, test == HS_TEST_TYPE_BI ? "Bi" : "Uni");
}
static void
@@ -448,24 +444,24 @@ exit_client (void)
for (i = 0; i < ctrl->cfg.num_test_sessions; i++)
{
tsock = &scm->test_socket[i];
- tsock->cfg.test = VCL_TEST_TYPE_EXIT;
+ tsock->cfg.test = HS_TEST_TYPE_EXIT;
/* coverity[COPY_PASTE_ERROR] */
if (ctrl->cfg.verbose)
{
stinf ("\(fd %d): Sending exit cfg to server...\n", tsock->fd);
- vcl_test_cfg_dump (&tsock->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&tsock->cfg, 1 /* is_client */);
}
(void) sock_test_write (tsock->fd, (uint8_t *) & tsock->cfg,
sizeof (tsock->cfg), &tsock->stats,
ctrl->cfg.verbose);
}
- ctrl->cfg.test = VCL_TEST_TYPE_EXIT;
+ ctrl->cfg.test = HS_TEST_TYPE_EXIT;
if (ctrl->cfg.verbose)
{
stinf ("\n(fd %d): Sending exit cfg to server...\n", ctrl->fd);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
(void) sock_test_write (ctrl->fd, (uint8_t *) & ctrl->cfg,
sizeof (ctrl->cfg), &ctrl->stats,
@@ -557,7 +553,7 @@ cfg_txbuf_size_set (void)
ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size;
vcl_test_buf_alloc (&ctrl->cfg, 0 /* is_rxbuf */ ,
(uint8_t **) & ctrl->txbuf, &ctrl->txbuf_size);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
stabrt ("Invalid txbuf size (%lu) < minimum buf size (%u)!",
@@ -576,7 +572,7 @@ cfg_num_writes_set (void)
{
ctrl->cfg.num_writes = num_writes;
ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size;
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
stabrt ("Invalid num writes: %u", num_writes);
@@ -596,7 +592,7 @@ cfg_num_test_sockets_set (void)
ctrl->cfg.num_test_sessions = num_test_sockets;
sock_test_connect_test_sockets (num_test_sockets);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
stabrt ("Invalid num test sockets: %u, (%d max)\n", num_test_sockets,
@@ -616,7 +612,7 @@ cfg_rxbuf_size_set (void)
ctrl->cfg.rxbuf_size = rxbuf_size;
vcl_test_buf_alloc (&ctrl->cfg, 1 /* is_rxbuf */ ,
(uint8_t **) & ctrl->rxbuf, &ctrl->rxbuf_size);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
stabrt ("Invalid rxbuf size (%lu) < minimum buf size (%u)!",
@@ -630,19 +626,19 @@ cfg_verbose_toggle (void)
vcl_test_session_t *ctrl = &scm->ctrl_socket;
ctrl->cfg.verbose = ctrl->cfg.verbose ? 0 : 1;
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
-static vcl_test_t
+static hs_test_t
parse_input ()
{
sock_client_main_t *scm = &sock_client_main;
vcl_test_session_t *ctrl = &scm->ctrl_socket;
- vcl_test_t rv = VCL_TEST_TYPE_NONE;
+ hs_test_t rv = HS_TEST_TYPE_NONE;
if (!strncmp (VCL_TEST_TOKEN_EXIT, ctrl->txbuf,
strlen (VCL_TEST_TOKEN_EXIT)))
- rv = VCL_TEST_TYPE_EXIT;
+ rv = HS_TEST_TYPE_EXIT;
else if (!strncmp (VCL_TEST_TOKEN_HELP, ctrl->txbuf,
strlen (VCL_TEST_TOKEN_HELP)))
@@ -672,16 +668,16 @@ parse_input ()
strlen (VCL_TEST_TOKEN_RXBUF_SIZE)))
cfg_rxbuf_size_set ();
- else if (!strncmp (VCL_TEST_TOKEN_RUN_UNI, ctrl->txbuf,
- strlen (VCL_TEST_TOKEN_RUN_UNI)))
- rv = ctrl->cfg.test = VCL_TEST_TYPE_UNI;
+ else if (!strncmp (HS_TEST_TOKEN_RUN_UNI, ctrl->txbuf,
+ strlen (HS_TEST_TOKEN_RUN_UNI)))
+ rv = ctrl->cfg.test = HS_TEST_TYPE_UNI;
- else if (!strncmp (VCL_TEST_TOKEN_RUN_BI, ctrl->txbuf,
- strlen (VCL_TEST_TOKEN_RUN_BI)))
- rv = ctrl->cfg.test = VCL_TEST_TYPE_BI;
+ else if (!strncmp (HS_TEST_TOKEN_RUN_BI, ctrl->txbuf,
+ strlen (HS_TEST_TOKEN_RUN_BI)))
+ rv = ctrl->cfg.test = HS_TEST_TYPE_BI;
else
- rv = VCL_TEST_TYPE_ECHO;
+ rv = HS_TEST_TYPE_ECHO;
return rv;
}
@@ -713,9 +709,9 @@ main (int argc, char **argv)
sock_client_main_t *scm = &sock_client_main;
vcl_test_session_t *ctrl = &scm->ctrl_socket;
int c, rv;
- vcl_test_t post_test = VCL_TEST_TYPE_NONE;
+ hs_test_t post_test = HS_TEST_TYPE_NONE;
- vcl_test_cfg_init (&ctrl->cfg);
+ hs_test_cfg_init (&ctrl->cfg);
vcl_test_session_buf_alloc (ctrl);
opterr = 0;
@@ -749,7 +745,7 @@ main (int argc, char **argv)
break;
case 'X':
- post_test = VCL_TEST_TYPE_EXIT;
+ post_test = HS_TEST_TYPE_EXIT;
break;
case 'E':
@@ -760,7 +756,7 @@ main (int argc, char **argv)
print_usage_and_exit ();
}
strncpy (ctrl->txbuf, optarg, ctrl->txbuf_size);
- ctrl->cfg.test = VCL_TEST_TYPE_ECHO;
+ ctrl->cfg.test = HS_TEST_TYPE_ECHO;
break;
case 'I':
@@ -836,11 +832,11 @@ main (int argc, char **argv)
break;
case 'U':
- ctrl->cfg.test = VCL_TEST_TYPE_UNI;
+ ctrl->cfg.test = HS_TEST_TYPE_UNI;
break;
case 'B':
- ctrl->cfg.test = VCL_TEST_TYPE_BI;
+ ctrl->cfg.test = HS_TEST_TYPE_BI;
break;
case 'V':
@@ -928,54 +924,54 @@ main (int argc, char **argv)
sock_test_connect_test_sockets (ctrl->cfg.num_test_sessions);
- while (ctrl->cfg.test != VCL_TEST_TYPE_EXIT)
+ while (ctrl->cfg.test != HS_TEST_TYPE_EXIT)
{
if (scm->dump_cfg)
{
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
scm->dump_cfg = 0;
}
switch (ctrl->cfg.test)
{
- case VCL_TEST_TYPE_ECHO:
+ case HS_TEST_TYPE_ECHO:
echo_test_client ();
break;
- case VCL_TEST_TYPE_UNI:
- case VCL_TEST_TYPE_BI:
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
stream_test_client (ctrl->cfg.test);
break;
- case VCL_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_EXIT:
continue;
- case VCL_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_NONE:
default:
break;
}
switch (post_test)
{
- case VCL_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_EXIT:
switch (ctrl->cfg.test)
{
- case VCL_TEST_TYPE_EXIT:
- case VCL_TEST_TYPE_UNI:
- case VCL_TEST_TYPE_BI:
- case VCL_TEST_TYPE_ECHO:
- ctrl->cfg.test = VCL_TEST_TYPE_EXIT;
+ case HS_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
+ case HS_TEST_TYPE_ECHO:
+ ctrl->cfg.test = HS_TEST_TYPE_EXIT;
continue;
- case VCL_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_NONE:
default:
break;
}
break;
- case VCL_TEST_TYPE_NONE:
- case VCL_TEST_TYPE_ECHO:
- case VCL_TEST_TYPE_UNI:
- case VCL_TEST_TYPE_BI:
+ case HS_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_ECHO:
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
default:
break;
}
diff --git a/src/plugins/hs_apps/vcl/sock_test_server.c b/src/plugins/hs_apps/vcl/sock_test_server.c
index d516c1722db..2356a4eadca 100644
--- a/src/plugins/hs_apps/vcl/sock_test_server.c
+++ b/src/plugins/hs_apps/vcl/sock_test_server.c
@@ -37,7 +37,7 @@ typedef struct
int fd;
uint8_t *buf;
uint32_t buf_size;
- vcl_test_cfg_t cfg;
+ hs_test_cfg_t cfg;
vcl_test_stats_t stats;
} sock_server_conn_t;
@@ -87,7 +87,7 @@ conn_pool_expand (size_t expand_size)
{
sock_server_conn_t *conn = &conn_pool[i];
memset (conn, 0, sizeof (*conn));
- vcl_test_cfg_init (&conn->cfg);
+ hs_test_cfg_init (&conn->cfg);
vcl_test_buf_alloc (&conn->cfg, 1 /* is_rxbuf */ , &conn->buf,
&conn->buf_size);
conn->cfg.txbuf_size = conn->cfg.rxbuf_size;
@@ -123,7 +123,7 @@ conn_pool_free (sock_server_conn_t * conn)
}
static inline void
-sync_config_and_reply (sock_server_conn_t * conn, vcl_test_cfg_t * rx_cfg)
+sync_config_and_reply (sock_server_conn_t *conn, hs_test_cfg_t *rx_cfg)
{
conn->cfg = *rx_cfg;
vcl_test_buf_alloc (&conn->cfg, 1 /* is_rxbuf */ ,
@@ -133,19 +133,18 @@ sync_config_and_reply (sock_server_conn_t * conn, vcl_test_cfg_t * rx_cfg)
if (conn->cfg.verbose)
{
stinf ("(fd %d): Replying to cfg message!\n", conn->fd);
- vcl_test_cfg_dump (&conn->cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (&conn->cfg, 0 /* is_client */);
}
(void) sock_test_write (conn->fd, (uint8_t *) & conn->cfg,
sizeof (conn->cfg), NULL, conn->cfg.verbose);
}
static void
-stream_test_server_start_stop (sock_server_conn_t * conn,
- vcl_test_cfg_t * rx_cfg)
+stream_test_server_start_stop (sock_server_conn_t *conn, hs_test_cfg_t *rx_cfg)
{
sock_server_main_t *ssm = &sock_server_main;
int client_fd = conn->fd;
- vcl_test_t test = rx_cfg->test;
+ hs_test_t test = rx_cfg->test;
if (rx_cfg->ctrl_handle == conn->fd)
{
@@ -166,39 +165,37 @@ stream_test_server_start_stop (sock_server_conn_t * conn,
snprintf (buf, sizeof (buf), "SERVER (fd %d) RESULTS",
tc->fd);
- vcl_test_stats_dump (buf, &tc->stats, 1 /* show_rx */ ,
- test == VCL_TEST_TYPE_BI
- /* show tx */ ,
+ vcl_test_stats_dump (buf, &tc->stats, 1 /* show_rx */,
+ test == HS_TEST_TYPE_BI
+ /* show tx */,
conn->cfg.verbose);
}
}
}
- vcl_test_stats_dump ("SERVER RESULTS", &conn->stats, 1 /* show_rx */ ,
- (test == VCL_TEST_TYPE_BI) /* show_tx */ ,
+ vcl_test_stats_dump ("SERVER RESULTS", &conn->stats, 1 /* show_rx */,
+ (test == HS_TEST_TYPE_BI) /* show_tx */,
conn->cfg.verbose);
- vcl_test_cfg_dump (&conn->cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (&conn->cfg, 0 /* is_client */);
if (conn->cfg.verbose)
{
- stinf (" sock server main\n"
- VCL_TEST_SEPARATOR_STRING
+ stinf (" sock server main\n" HS_TEST_SEPARATOR_STRING
" buf: %p\n"
- " buf size: %u (0x%08x)\n"
- VCL_TEST_SEPARATOR_STRING,
+ " buf size: %u (0x%08x)\n" HS_TEST_SEPARATOR_STRING,
conn->buf, conn->buf_size, conn->buf_size);
}
sync_config_and_reply (conn, rx_cfg);
- stinf ("SERVER (fd %d): %s-directional Stream Test Complete!\n"
- SOCK_TEST_BANNER_STRING "\n", conn->fd,
- test == VCL_TEST_TYPE_BI ? "Bi" : "Uni");
+ stinf ("SERVER (fd %d): %s-directional Stream Test "
+ "Complete!\n" SOCK_TEST_BANNER_STRING "\n",
+ conn->fd, test == HS_TEST_TYPE_BI ? "Bi" : "Uni");
}
else
{
stinf (SOCK_TEST_BANNER_STRING
"SERVER (fd %d): %s-directional Stream Test!\n"
" Sending client the test cfg to start streaming data...\n",
- client_fd, test == VCL_TEST_TYPE_BI ? "Bi" : "Uni");
+ client_fd, test == HS_TEST_TYPE_BI ? "Bi" : "Uni");
rx_cfg->ctrl_handle = (rx_cfg->ctrl_handle == ~0) ? conn->fd :
rx_cfg->ctrl_handle;
@@ -216,9 +213,9 @@ static inline void
stream_test_server (sock_server_conn_t * conn, int rx_bytes)
{
int client_fd = conn->fd;
- vcl_test_t test = conn->cfg.test;
+ hs_test_t test = conn->cfg.test;
- if (test == VCL_TEST_TYPE_BI)
+ if (test == HS_TEST_TYPE_BI)
(void) sock_test_write (client_fd, conn->buf, rx_bytes, &conn->stats,
conn->cfg.verbose);
@@ -373,15 +370,14 @@ sts_server_echo (sock_server_conn_t * conn, int rx_bytes)
}
static int
-sts_handle_cfg (vcl_test_cfg_t * rx_cfg, sock_server_conn_t * conn,
- int rx_bytes)
+sts_handle_cfg (hs_test_cfg_t *rx_cfg, sock_server_conn_t *conn, int rx_bytes)
{
sock_server_main_t *ssm = &sock_server_main;
if (rx_cfg->verbose)
{
stinf ("(fd %d): Received a cfg message!\n", conn->fd);
- vcl_test_cfg_dump (rx_cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 0 /* is_client */);
}
if (rx_bytes != sizeof (*rx_cfg))
@@ -393,7 +389,7 @@ sts_handle_cfg (vcl_test_cfg_t * rx_cfg, sock_server_conn_t * conn,
if (conn->cfg.verbose)
{
stinf ("(fd %d): Replying to cfg message!\n", conn->fd);
- vcl_test_cfg_dump (rx_cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 0 /* is_client */);
}
sock_test_write (conn->fd, (uint8_t *) & conn->cfg, sizeof (conn->cfg),
NULL, conn->cfg.verbose);
@@ -402,23 +398,23 @@ sts_handle_cfg (vcl_test_cfg_t * rx_cfg, sock_server_conn_t * conn,
switch (rx_cfg->test)
{
- case VCL_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_NONE:
sync_config_and_reply (conn, rx_cfg);
break;
- case VCL_TEST_TYPE_ECHO:
+ case HS_TEST_TYPE_ECHO:
if (socket_server_echo_af_unix_init (ssm))
goto done;
sync_config_and_reply (conn, rx_cfg);
break;
- case VCL_TEST_TYPE_BI:
- case VCL_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
+ case HS_TEST_TYPE_UNI:
stream_test_server_start_stop (conn, rx_cfg);
break;
- case VCL_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_EXIT:
stinf ("Have a great day connection %d!", conn->fd);
close (conn->fd);
conn_pool_free (conn);
@@ -428,7 +424,7 @@ sts_handle_cfg (vcl_test_cfg_t * rx_cfg, sock_server_conn_t * conn,
default:
stinf ("ERROR: Unknown test type!\n");
- vcl_test_cfg_dump (rx_cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 0 /* is_client */);
break;
}
@@ -439,7 +435,7 @@ done:
static int
sts_conn_expect_config (sock_server_conn_t * conn)
{
- if (conn->cfg.test == VCL_TEST_TYPE_ECHO)
+ if (conn->cfg.test == HS_TEST_TYPE_ECHO)
return 1;
return (conn->stats.rx_bytes < 128
@@ -452,7 +448,7 @@ main (int argc, char **argv)
int client_fd, rv, main_rv = 0, rx_bytes, c, v, i;
sock_server_main_t *ssm = &sock_server_main;
sock_server_conn_t *conn;
- vcl_test_cfg_t *rx_cfg;
+ hs_test_cfg_t *rx_cfg;
struct sockaddr_storage servaddr;
uint16_t port = VCL_TEST_SERVER_PORT;
uint32_t servaddr_size;
@@ -605,8 +601,8 @@ main (int argc, char **argv)
if (sts_conn_expect_config (conn))
{
- rx_cfg = (vcl_test_cfg_t *) conn->buf;
- if (rx_cfg->magic == VCL_TEST_CFG_CTRL_MAGIC)
+ rx_cfg = (hs_test_cfg_t *) conn->buf;
+ if (rx_cfg->magic == HS_TEST_CFG_CTRL_MAGIC)
{
sts_handle_cfg (rx_cfg, conn, rx_bytes);
if (!ssm->nfds)
@@ -619,8 +615,8 @@ main (int argc, char **argv)
}
}
- if ((conn->cfg.test == VCL_TEST_TYPE_UNI)
- || (conn->cfg.test == VCL_TEST_TYPE_BI))
+ if ((conn->cfg.test == HS_TEST_TYPE_UNI) ||
+ (conn->cfg.test == HS_TEST_TYPE_BI))
{
stream_test_server (conn, rx_bytes);
if (ioctl (conn->fd, FIONREAD))
diff --git a/src/plugins/hs_apps/vcl/vcl_test.h b/src/plugins/hs_apps/vcl/vcl_test.h
index 4f67e03f72b..0ce27ef84e2 100644
--- a/src/plugins/hs_apps/vcl/vcl_test.h
+++ b/src/plugins/hs_apps/vcl/vcl_test.h
@@ -16,6 +16,7 @@
#ifndef __vcl_test_h__
#define __vcl_test_h__
+#include <hs_apps/hs_test.h>
#include <netdb.h>
#include <errno.h>
#include <stdlib.h>
@@ -46,67 +47,18 @@
#define vt_atomic_add(_ptr, _val) \
__atomic_fetch_add (_ptr, _val, __ATOMIC_RELEASE)
-#define VCL_TEST_TOKEN_HELP "#H"
-#define VCL_TEST_TOKEN_EXIT "#X"
-#define VCL_TEST_TOKEN_VERBOSE "#V"
-#define VCL_TEST_TOKEN_TXBUF_SIZE "#T:"
-#define VCL_TEST_TOKEN_NUM_TEST_SESS "#I:"
-#define VCL_TEST_TOKEN_NUM_WRITES "#N:"
-#define VCL_TEST_TOKEN_RXBUF_SIZE "#R:"
-#define VCL_TEST_TOKEN_SHOW_CFG "#C"
-#define VCL_TEST_TOKEN_RUN_UNI "#U"
-#define VCL_TEST_TOKEN_RUN_BI "#B"
-
#define VCL_TEST_SERVER_PORT 22000
#define VCL_TEST_LOCALHOST_IPADDR "127.0.0.1"
-#define VCL_TEST_CFG_CTRL_MAGIC 0xfeedface
-#define VCL_TEST_CFG_NUM_WRITES_DEF 1000000
-#define VCL_TEST_CFG_TXBUF_SIZE_DEF 8192
-#define VCL_TEST_CFG_RXBUF_SIZE_DEF (64*VCL_TEST_CFG_TXBUF_SIZE_DEF)
#define VCL_TEST_CFG_BUF_SIZE_MIN 128
-#define VCL_TEST_CFG_MAX_TEST_SESS 512
+#define VCL_TEST_CFG_MAX_TEST_SESS ((uint32_t) 1e6)
+#define VCL_TEST_CFG_MAX_SELECT_SESS 512
+#define VCL_TEST_CFG_INIT_TEST_SESS 512
#define VCL_TEST_CFG_MAX_EPOLL_EVENTS 16
#define VCL_TEST_CTRL_LISTENER (~0 - 1)
#define VCL_TEST_DATA_LISTENER (~0)
#define VCL_TEST_DELAY_DISCONNECT 1
-#define VCL_TEST_SEPARATOR_STRING \
- " -----------------------------\n"
-typedef enum
-{
- VCL_TEST_TYPE_NONE,
- VCL_TEST_TYPE_ECHO,
- VCL_TEST_TYPE_UNI,
- VCL_TEST_TYPE_BI,
- VCL_TEST_TYPE_EXIT,
-} vcl_test_t;
-
-typedef enum
-{
- VCL_TEST_CMD_SYNC,
- VCL_TEST_CMD_START,
- VCL_TEST_CMD_STOP,
-} vcl_test_cmd_t;
-
-typedef struct __attribute__ ((packed))
-{
- uint32_t magic;
- uint32_t seq_num;
- uint32_t test;
- uint32_t cmd;
- uint32_t ctrl_handle;
- uint32_t num_test_sessions;
- uint32_t num_test_sessions_perq;
- uint32_t num_test_qsessions;
- uint32_t verbose;
- uint32_t address_ip6;
- uint32_t transport_udp;
- uint64_t rxbuf_size;
- uint64_t txbuf_size;
- uint64_t num_writes;
- uint64_t total_bytes;
-} vcl_test_cfg_t;
typedef struct
{
@@ -124,9 +76,10 @@ typedef struct
typedef struct vcl_test_session
{
- uint8_t is_alloc;
- uint8_t is_open;
uint8_t is_done;
+ uint8_t is_alloc : 1;
+ uint8_t is_open : 1;
+ uint8_t noblk_connect : 1;
int fd;
int (*read) (struct vcl_test_session *ts, void *buf, uint32_t buflen);
int (*write) (struct vcl_test_session *ts, void *buf, uint32_t buflen);
@@ -134,10 +87,11 @@ typedef struct vcl_test_session
uint32_t rxbuf_size;
char *txbuf;
char *rxbuf;
- vcl_test_cfg_t cfg;
+ hs_test_cfg_t cfg;
vcl_test_stats_t stats;
vcl_test_stats_t old_stats;
int session_index;
+ struct vcl_test_session *next;
vppcom_endpt_t endpt;
uint8_t ip[16];
vppcom_data_segment_t ds[2];
@@ -154,7 +108,7 @@ vcl_test_worker_index (void)
typedef struct
{
- int (*init) (vcl_test_cfg_t *cfg);
+ int (*init) (hs_test_cfg_t *cfg);
int (*open) (vcl_test_session_t *ts, vppcom_endpt_t *endpt);
int (*listen) (vcl_test_session_t *ts, vppcom_endpt_t *endpt);
int (*accept) (int listen_fd, vcl_test_session_t *ts);
@@ -172,7 +126,7 @@ typedef struct
{
const vcl_test_proto_vft_t *protos[VPPCOM_PROTO_SRTP + 1];
uint32_t ckpair_index;
- vcl_test_cfg_t cfg;
+ hs_test_cfg_t cfg;
vcl_test_wrk_t *wrk;
} vcl_test_main_t;
@@ -198,37 +152,8 @@ vcl_test_stats_accumulate (vcl_test_stats_t * accum, vcl_test_stats_t * incr)
}
static inline void
-vcl_test_cfg_init (vcl_test_cfg_t * cfg)
-{
- cfg->magic = VCL_TEST_CFG_CTRL_MAGIC;
- cfg->test = VCL_TEST_TYPE_NONE;
- cfg->ctrl_handle = ~0;
- cfg->num_test_sessions = 1;
- cfg->num_test_sessions_perq = 1;
- cfg->verbose = 0;
- cfg->rxbuf_size = VCL_TEST_CFG_RXBUF_SIZE_DEF;
- cfg->num_writes = VCL_TEST_CFG_NUM_WRITES_DEF;
- cfg->txbuf_size = VCL_TEST_CFG_TXBUF_SIZE_DEF;
- cfg->total_bytes = cfg->num_writes * cfg->txbuf_size;
-}
-
-static inline int
-vcl_test_cfg_verify (vcl_test_cfg_t * cfg, vcl_test_cfg_t * valid_cfg)
-{
- /* Note: txbuf & rxbuf on server are the same buffer,
- * so txbuf_size is not included in this check.
- */
- return ((cfg->magic == valid_cfg->magic)
- && (cfg->test == valid_cfg->test)
- && (cfg->verbose == valid_cfg->verbose)
- && (cfg->rxbuf_size == valid_cfg->rxbuf_size)
- && (cfg->num_writes == valid_cfg->num_writes)
- && (cfg->total_bytes == valid_cfg->total_bytes));
-}
-
-static inline void
-vcl_test_buf_alloc (vcl_test_cfg_t * cfg, uint8_t is_rxbuf, uint8_t ** buf,
- uint32_t * bufsize)
+vcl_test_buf_alloc (hs_test_cfg_t *cfg, uint8_t is_rxbuf, uint8_t **buf,
+ uint32_t *bufsize)
{
uint32_t alloc_size = is_rxbuf ? cfg->rxbuf_size : cfg->txbuf_size;
uint8_t *lb = realloc (*buf, (size_t) alloc_size);
@@ -269,69 +194,6 @@ vcl_test_session_buf_free (vcl_test_session_t *ts)
ts->txbuf = 0;
}
-static inline char *
-vcl_test_type_str (vcl_test_t t)
-{
- switch (t)
- {
- case VCL_TEST_TYPE_NONE:
- return "NONE";
-
- case VCL_TEST_TYPE_ECHO:
- return "ECHO";
-
- case VCL_TEST_TYPE_UNI:
- return "UNI";
-
- case VCL_TEST_TYPE_BI:
- return "BI";
-
- case VCL_TEST_TYPE_EXIT:
- return "EXIT";
-
- default:
- return "Unknown";
- }
-}
-
-static inline void
-vcl_test_cfg_dump (vcl_test_cfg_t * cfg, uint8_t is_client)
-{
- char *spc = " ";
-
- printf (" test config (%p):\n"
- VCL_TEST_SEPARATOR_STRING
- " magic: 0x%08x\n"
- " seq_num: 0x%08x\n"
- "%-5s test: %s (%d)\n"
- " ctrl handle: %d (0x%x)\n"
- "%-5s num test sockets: %u (0x%08x)\n"
- "%-5s verbose: %s (%d)\n"
- "%-5s rxbuf size: %lu (0x%08lx)\n"
- "%-5s txbuf size: %lu (0x%08lx)\n"
- "%-5s num writes: %lu (0x%08lx)\n"
- " client tx bytes: %lu (0x%08lx)\n"
- VCL_TEST_SEPARATOR_STRING,
- (void *) cfg, cfg->magic, cfg->seq_num,
- is_client && (cfg->test == VCL_TEST_TYPE_UNI) ?
- "'" VCL_TEST_TOKEN_RUN_UNI "'" :
- is_client && (cfg->test == VCL_TEST_TYPE_BI) ?
- "'" VCL_TEST_TOKEN_RUN_BI "'" : spc,
- vcl_test_type_str (cfg->test), cfg->test,
- cfg->ctrl_handle, cfg->ctrl_handle,
- is_client ? "'" VCL_TEST_TOKEN_NUM_TEST_SESS "'" : spc,
- cfg->num_test_sessions, cfg->num_test_sessions,
- is_client ? "'" VCL_TEST_TOKEN_VERBOSE "'" : spc,
- cfg->verbose ? "on" : "off", cfg->verbose,
- is_client ? "'" VCL_TEST_TOKEN_RXBUF_SIZE "'" : spc,
- cfg->rxbuf_size, cfg->rxbuf_size,
- is_client ? "'" VCL_TEST_TOKEN_TXBUF_SIZE "'" : spc,
- cfg->txbuf_size, cfg->txbuf_size,
- is_client ? "'" VCL_TEST_TOKEN_NUM_WRITES "'" : spc,
- cfg->num_writes, cfg->num_writes,
- cfg->total_bytes, cfg->total_bytes);
-}
-
static inline void
vcl_test_stats_dump (char *header, vcl_test_stats_t * stats,
uint8_t show_rx, uint8_t show_tx, uint8_t verbose)
@@ -361,31 +223,27 @@ vcl_test_stats_dump (char *header, vcl_test_stats_t * stats,
if (show_tx)
{
- printf (VCL_TEST_SEPARATOR_STRING
- " tx stats (0x%p):\n"
- VCL_TEST_SEPARATOR_STRING
+ printf (HS_TEST_SEPARATOR_STRING
+ " tx stats (0x%p):\n" HS_TEST_SEPARATOR_STRING
" writes: %lu (0x%08lx)\n"
" tx bytes: %lu (0x%08lx)\n"
" tx eagain: %u (0x%08x)\n"
" tx incomplete: %u (0x%08x)\n",
(void *) stats, stats->tx_xacts, stats->tx_xacts,
- stats->tx_bytes, stats->tx_bytes,
- stats->tx_eagain, stats->tx_eagain,
- stats->tx_incomp, stats->tx_incomp);
+ stats->tx_bytes, stats->tx_bytes, stats->tx_eagain,
+ stats->tx_eagain, stats->tx_incomp, stats->tx_incomp);
}
if (show_rx)
{
- printf (VCL_TEST_SEPARATOR_STRING
- " rx stats (0x%p):\n"
- VCL_TEST_SEPARATOR_STRING
+ printf (HS_TEST_SEPARATOR_STRING
+ " rx stats (0x%p):\n" HS_TEST_SEPARATOR_STRING
" reads: %lu (0x%08lx)\n"
" rx bytes: %lu (0x%08lx)\n"
" rx eagain: %u (0x%08x)\n"
" rx incomplete: %u (0x%08x)\n",
(void *) stats, stats->rx_xacts, stats->rx_xacts,
- stats->rx_bytes, stats->rx_bytes,
- stats->rx_eagain, stats->rx_eagain,
- stats->rx_incomp, stats->rx_incomp);
+ stats->rx_bytes, stats->rx_bytes, stats->rx_eagain,
+ stats->rx_eagain, stats->rx_incomp, stats->rx_incomp);
}
if (verbose)
printf (" start.tv_sec: %ld\n"
@@ -395,7 +253,7 @@ vcl_test_stats_dump (char *header, vcl_test_stats_t * stats,
stats->start.tv_sec, stats->start.tv_nsec,
stats->stop.tv_sec, stats->stop.tv_nsec);
- printf (VCL_TEST_SEPARATOR_STRING);
+ printf (HS_TEST_SEPARATOR_STRING);
}
static inline double
@@ -567,25 +425,18 @@ dump_help (void)
{
#define INDENT "\n "
- printf ("CLIENT: Test configuration commands:"
- INDENT VCL_TEST_TOKEN_HELP
- "\t\t\tDisplay help."
- INDENT VCL_TEST_TOKEN_EXIT
- "\t\t\tExit test client & server."
- INDENT VCL_TEST_TOKEN_SHOW_CFG
- "\t\t\tShow the current test cfg."
- INDENT VCL_TEST_TOKEN_RUN_UNI
- "\t\t\tRun the Uni-directional test."
- INDENT VCL_TEST_TOKEN_RUN_BI
- "\t\t\tRun the Bi-directional test."
- INDENT VCL_TEST_TOKEN_VERBOSE
- "\t\t\tToggle verbose setting."
- INDENT VCL_TEST_TOKEN_RXBUF_SIZE
- "<rxbuf size>\tRx buffer size (bytes)."
- INDENT VCL_TEST_TOKEN_TXBUF_SIZE
- "<txbuf size>\tTx buffer size (bytes)."
- INDENT VCL_TEST_TOKEN_NUM_WRITES
- "<# of writes>\tNumber of txbuf writes to server." "\n");
+ printf (
+ "CLIENT: Test configuration commands:" INDENT VCL_TEST_TOKEN_HELP
+ "\t\t\tDisplay help." INDENT VCL_TEST_TOKEN_EXIT
+ "\t\t\tExit test client & server." INDENT VCL_TEST_TOKEN_SHOW_CFG
+ "\t\t\tShow the current test cfg." INDENT HS_TEST_TOKEN_RUN_UNI
+ "\t\t\tRun the Uni-directional test." INDENT HS_TEST_TOKEN_RUN_BI
+ "\t\t\tRun the Bi-directional test." INDENT VCL_TEST_TOKEN_VERBOSE
+ "\t\t\tToggle verbose setting." INDENT VCL_TEST_TOKEN_RXBUF_SIZE
+ "<rxbuf size>\tRx buffer size (bytes)." INDENT VCL_TEST_TOKEN_TXBUF_SIZE
+ "<txbuf size>\tTx buffer size (bytes)." INDENT VCL_TEST_TOKEN_NUM_WRITES
+ "<# of writes>\tNumber of txbuf writes to server."
+ "\n");
}
#endif /* __vcl_test_h__ */
diff --git a/src/plugins/hs_apps/vcl/vcl_test_client.c b/src/plugins/hs_apps/vcl/vcl_test_client.c
index 4a9fb46e5b8..a4a10b562ff 100644
--- a/src/plugins/hs_apps/vcl/vcl_test_client.c
+++ b/src/plugins/hs_apps/vcl/vcl_test_client.c
@@ -26,18 +26,34 @@
#include <pthread.h>
#include <signal.h>
-typedef struct
+typedef struct vtc_worker_ vcl_test_client_worker_t;
+typedef int (vtc_worker_run_fn) (vcl_test_client_worker_t *wrk);
+
+struct vtc_worker_
{
vcl_test_session_t *sessions;
vcl_test_session_t *qsessions;
uint32_t n_sessions;
uint32_t wrk_index;
- fd_set wr_fdset;
- fd_set rd_fdset;
- int max_fd_index;
+ union
+ {
+ struct
+ {
+ fd_set wr_fdset;
+ fd_set rd_fdset;
+ int max_fd_index;
+ };
+ struct
+ {
+ uint32_t epoll_sh;
+ struct epoll_event ep_evts[VCL_TEST_CFG_MAX_EPOLL_EVENTS];
+ vcl_test_session_t *next_to_send;
+ };
+ };
pthread_t thread_handle;
- vcl_test_cfg_t cfg;
-} vcl_test_client_worker_t;
+ vtc_worker_run_fn *wrk_run_fn;
+ hs_test_cfg_t cfg;
+};
typedef struct
{
@@ -46,13 +62,17 @@ typedef struct
vppcom_endpt_t server_endpt;
uint32_t cfg_seq_num;
uint8_t dump_cfg;
- vcl_test_t post_test;
+ hs_test_t post_test;
uint8_t proto;
uint8_t incremental_stats;
uint32_t n_workers;
volatile int active_workers;
volatile int test_running;
- struct sockaddr_storage server_addr;
+ union
+ {
+ struct in_addr v4;
+ struct in6_addr v6;
+ } server_addr;
} vcl_test_client_main_t;
vcl_test_client_main_t vcl_client_main;
@@ -65,14 +85,14 @@ vcl_test_main_t vcl_test_main;
static int
vtc_cfg_sync (vcl_test_session_t * ts)
{
- vcl_test_cfg_t *rx_cfg = (vcl_test_cfg_t *) ts->rxbuf;
+ hs_test_cfg_t *rx_cfg = (hs_test_cfg_t *) ts->rxbuf;
int rx_bytes, tx_bytes;
vt_atomic_add (&ts->cfg.seq_num, 1);
if (ts->cfg.verbose)
{
vtinf ("(fd %d): Sending config to server.", ts->fd);
- vcl_test_cfg_dump (&ts->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ts->cfg, 1 /* is_client */);
}
tx_bytes = ts->write (ts, &ts->cfg, sizeof (ts->cfg));
if (tx_bytes < 0)
@@ -81,50 +101,48 @@ vtc_cfg_sync (vcl_test_session_t * ts)
return tx_bytes;
}
- rx_bytes = ts->read (ts, ts->rxbuf, sizeof (vcl_test_cfg_t));
+ rx_bytes = ts->read (ts, ts->rxbuf, sizeof (hs_test_cfg_t));
if (rx_bytes < 0)
return rx_bytes;
- if (rx_cfg->magic != VCL_TEST_CFG_CTRL_MAGIC)
+ if (rx_cfg->magic != HS_TEST_CFG_CTRL_MAGIC)
{
vtwrn ("(fd %d): Bad server reply cfg -- aborting!", ts->fd);
return -1;
}
- if ((rx_bytes != sizeof (vcl_test_cfg_t))
- || !vcl_test_cfg_verify (rx_cfg, &ts->cfg))
+ if ((rx_bytes != sizeof (hs_test_cfg_t)) ||
+ !hs_test_cfg_verify (rx_cfg, &ts->cfg))
{
vtwrn ("(fd %d): Invalid config received from server!", ts->fd);
- if (rx_bytes != sizeof (vcl_test_cfg_t))
+ if (rx_bytes != sizeof (hs_test_cfg_t))
{
vtinf ("\tRx bytes %d != cfg size %lu", rx_bytes,
- sizeof (vcl_test_cfg_t));
+ sizeof (hs_test_cfg_t));
}
else
{
- vcl_test_cfg_dump (rx_cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 1 /* is_client */);
vtinf ("(fd %d): Valid config sent to server.", ts->fd);
- vcl_test_cfg_dump (&ts->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ts->cfg, 1 /* is_client */);
}
return -1;
}
if (ts->cfg.verbose)
{
vtinf ("(fd %d): Got config back from server.", ts->fd);
- vcl_test_cfg_dump (rx_cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 1 /* is_client */);
}
return 0;
}
static int
-vtc_connect_test_sessions (vcl_test_client_worker_t * wrk)
+vtc_worker_alloc_sessions (vcl_test_client_worker_t *wrk)
{
- vcl_test_client_main_t *vcm = &vcl_client_main;
- vcl_test_main_t *vt = &vcl_test_main;
- const vcl_test_proto_vft_t *tp;
vcl_test_session_t *ts;
uint32_t n_test_sessions;
- int i, rv;
+ struct timespec now;
+ int i, j;
n_test_sessions = wrk->cfg.num_test_sessions;
if (n_test_sessions < 1)
@@ -148,62 +166,33 @@ vtc_connect_test_sessions (vcl_test_client_worker_t * wrk)
return errno;
}
- tp = vt->protos[vcm->proto];
+ clock_gettime (CLOCK_REALTIME, &now);
for (i = 0; i < n_test_sessions; i++)
{
ts = &wrk->sessions[i];
memset (ts, 0, sizeof (*ts));
ts->session_index = i;
+ ts->old_stats.stop = now;
ts->cfg = wrk->cfg;
vcl_test_session_buf_alloc (ts);
- rv = tp->open (&wrk->sessions[i], &vcm->server_endpt);
- if (rv < 0)
- return rv;
- }
- wrk->n_sessions = n_test_sessions;
-
-done:
- vtinf ("All test sessions (%d) connected!", n_test_sessions);
- return 0;
-}
-
-static int
-vtc_worker_test_setup (vcl_test_client_worker_t * wrk)
-{
- vcl_test_cfg_t *cfg = &wrk->cfg;
- vcl_test_session_t *ts;
- struct timespec now;
- uint32_t sidx;
- int i, j;
-
- FD_ZERO (&wrk->wr_fdset);
- FD_ZERO (&wrk->rd_fdset);
-
- clock_gettime (CLOCK_REALTIME, &now);
-
- for (i = 0; i < cfg->num_test_sessions; i++)
- {
- ts = &wrk->sessions[i];
- ts->old_stats.stop = now;
- switch (cfg->test)
+ switch (ts->cfg.test)
{
- case VCL_TEST_TYPE_UNI:
- case VCL_TEST_TYPE_BI:
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
for (j = 0; j < ts->txbuf_size; j++)
ts->txbuf[j] = j & 0xff;
break;
default:
break;
}
-
- FD_SET (vppcom_session_index (ts->fd), &wrk->wr_fdset);
- FD_SET (vppcom_session_index (ts->fd), &wrk->rd_fdset);
- sidx = vppcom_session_index (ts->fd);
- wrk->max_fd_index = vtc_max (sidx, wrk->max_fd_index);
}
- wrk->max_fd_index += 1;
+ wrk->n_sessions = n_test_sessions;
+
+done:
+
+ vtinf ("All test sessions (%d) initialized!", n_test_sessions);
return 0;
}
@@ -227,16 +216,13 @@ vtc_worker_init (vcl_test_client_worker_t * wrk)
}
vt_atomic_add (&vcm->active_workers, 1);
}
- rv = vtc_connect_test_sessions (wrk);
+ rv = vtc_worker_alloc_sessions (wrk);
if (rv)
{
- vterr ("vtc_connect_test_sessions ()", rv);
+ vterr ("vtc_worker_alloc_sessions ()", rv);
return rv;
}
- if (vtc_worker_test_setup (wrk))
- return -1;
-
return 0;
}
@@ -253,8 +239,7 @@ vtc_accumulate_stats (vcl_test_client_worker_t * wrk,
while (__sync_lock_test_and_set (&stats_lock, 1))
;
- if (ctrl->cfg.test == VCL_TEST_TYPE_BI
- || ctrl->cfg.test == VCL_TEST_TYPE_ECHO)
+ if (ctrl->cfg.test == HS_TEST_TYPE_BI || ctrl->cfg.test == HS_TEST_TYPE_ECHO)
show_rx = 1;
for (i = 0; i < wrk->cfg.num_test_sessions; i++)
@@ -308,32 +293,90 @@ vtc_inc_stats_check (vcl_test_session_t *ts)
}
}
-static void *
-vtc_worker_loop (void *arg)
+static void
+vtc_worker_start_transfer (vcl_test_client_worker_t *wrk)
+{
+ vtinf ("Worker %u starting transfer ...", wrk->wrk_index);
+
+ if (wrk->wrk_index == 0)
+ {
+ vcl_test_client_main_t *vcm = &vcl_client_main;
+ vcl_test_session_t *ctrl = &vcm->ctrl_session;
+
+ clock_gettime (CLOCK_REALTIME, &ctrl->stats.start);
+ }
+}
+
+static int
+vtc_session_check_is_done (vcl_test_session_t *ts, uint8_t check_rx)
+{
+ if ((!check_rx && ts->stats.tx_bytes >= ts->cfg.total_bytes) ||
+ (check_rx && ts->stats.rx_bytes >= ts->cfg.total_bytes))
+ {
+ clock_gettime (CLOCK_REALTIME, &ts->stats.stop);
+ ts->is_done = 1;
+ return 1;
+ }
+ return 0;
+}
+
+static int
+vtc_worker_connect_sessions_select (vcl_test_client_worker_t *wrk)
+{
+ vcl_test_client_main_t *vcm = &vcl_client_main;
+ vcl_test_main_t *vt = &vcl_test_main;
+ const vcl_test_proto_vft_t *tp;
+ vcl_test_session_t *ts;
+ uint32_t sidx;
+ int i, rv;
+
+ tp = vt->protos[vcm->proto];
+
+ FD_ZERO (&wrk->wr_fdset);
+ FD_ZERO (&wrk->rd_fdset);
+
+ for (i = 0; i < wrk->cfg.num_test_sessions; i++)
+ {
+ ts = &wrk->sessions[i];
+
+ rv = tp->open (&wrk->sessions[i], &vcm->server_endpt);
+ if (rv < 0)
+ return rv;
+
+ FD_SET (vppcom_session_index (ts->fd), &wrk->wr_fdset);
+ FD_SET (vppcom_session_index (ts->fd), &wrk->rd_fdset);
+ sidx = vppcom_session_index (ts->fd);
+ wrk->max_fd_index = vtc_max (sidx, wrk->max_fd_index);
+ }
+ wrk->max_fd_index += 1;
+
+ vtinf ("All test sessions (%d) connected!", wrk->cfg.num_test_sessions);
+
+ return 0;
+}
+
+static int
+vtc_worker_run_select (vcl_test_client_worker_t *wrk)
{
vcl_test_client_main_t *vcm = &vcl_client_main;
- vcl_test_session_t *ctrl = &vcm->ctrl_session;
- vcl_test_client_worker_t *wrk = arg;
- uint32_t n_active_sessions;
fd_set _wfdset, *wfdset = &_wfdset;
fd_set _rfdset, *rfdset = &_rfdset;
+ uint32_t n_active_sessions;
vcl_test_session_t *ts;
int i, rv, check_rx = 0;
- rv = vtc_worker_init (wrk);
+ rv = vtc_worker_connect_sessions_select (wrk);
if (rv)
{
- vterr ("vtc_worker_init()", rv);
- return 0;
+ vterr ("vtc_worker_connect_sessions()", rv);
+ return rv;
}
- vtinf ("Starting test ...");
+ check_rx = wrk->cfg.test != HS_TEST_TYPE_UNI;
+ n_active_sessions = wrk->cfg.num_test_sessions;
- if (wrk->wrk_index == 0)
- clock_gettime (CLOCK_REALTIME, &ctrl->stats.start);
+ vtc_worker_start_transfer (wrk);
- check_rx = wrk->cfg.test != VCL_TEST_TYPE_UNI;
- n_active_sessions = wrk->cfg.num_test_sessions;
while (n_active_sessions && vcm->test_running)
{
_wfdset = wrk->wr_fdset;
@@ -344,7 +387,7 @@ vtc_worker_loop (void *arg)
if (rv < 0)
{
vterr ("vppcom_select()", rv);
- goto exit;
+ break;
}
else if (rv == 0)
continue;
@@ -355,29 +398,29 @@ vtc_worker_loop (void *arg)
if (ts->is_done)
continue;
- if (FD_ISSET (vppcom_session_index (ts->fd), rfdset)
- && ts->stats.rx_bytes < ts->cfg.total_bytes)
+ if (FD_ISSET (vppcom_session_index (ts->fd), rfdset) &&
+ ts->stats.rx_bytes < ts->cfg.total_bytes)
{
rv = ts->read (ts, ts->rxbuf, ts->rxbuf_size);
if (rv < 0)
- goto exit;
+ break;
}
- if (FD_ISSET (vppcom_session_index (ts->fd), wfdset)
- && ts->stats.tx_bytes < ts->cfg.total_bytes)
+ if (FD_ISSET (vppcom_session_index (ts->fd), wfdset) &&
+ ts->stats.tx_bytes < ts->cfg.total_bytes)
{
rv = ts->write (ts, ts->txbuf, ts->cfg.txbuf_size);
if (rv < 0)
{
vtwrn ("vppcom_test_write (%d) failed -- aborting test",
ts->fd);
- goto exit;
+ break;
}
if (vcm->incremental_stats)
vtc_inc_stats_check (ts);
}
- if ((!check_rx && ts->stats.tx_bytes >= ts->cfg.total_bytes)
- || (check_rx && ts->stats.rx_bytes >= ts->cfg.total_bytes))
+ if ((!check_rx && ts->stats.tx_bytes >= ts->cfg.total_bytes) ||
+ (check_rx && ts->stats.rx_bytes >= ts->cfg.total_bytes))
{
clock_gettime (CLOCK_REALTIME, &ts->stats.stop);
ts->is_done = 1;
@@ -385,59 +428,343 @@ vtc_worker_loop (void *arg)
}
}
}
-exit:
+
+ return 0;
+}
+
+static void
+vtc_worker_epoll_send_add (vcl_test_client_worker_t *wrk,
+ vcl_test_session_t *ts)
+{
+ if (!wrk->next_to_send)
+ {
+ wrk->next_to_send = ts;
+ }
+ else
+ {
+ ts->next = wrk->next_to_send;
+ wrk->next_to_send = ts->next;
+ }
+}
+
+static void
+vtc_worker_epoll_send_del (vcl_test_client_worker_t *wrk,
+ vcl_test_session_t *ts, vcl_test_session_t *prev)
+{
+ if (!prev)
+ {
+ wrk->next_to_send = ts->next;
+ }
+ else
+ {
+ prev->next = ts->next;
+ }
+}
+
+static int
+vtc_worker_connect_sessions_epoll (vcl_test_client_worker_t *wrk)
+{
+ vcl_test_client_main_t *vcm = &vcl_client_main;
+ vcl_test_main_t *vt = &vcl_test_main;
+ const vcl_test_proto_vft_t *tp;
+ struct timespec start, end;
+ uint32_t n_connected = 0;
+ vcl_test_session_t *ts;
+ struct epoll_event ev;
+ int i, ci = 0, rv, n_ev;
+ double diff;
+
+ tp = vt->protos[vcm->proto];
+ wrk->epoll_sh = vppcom_epoll_create ();
+
+ ev.events = EPOLLET | EPOLLOUT;
+
+ clock_gettime (CLOCK_REALTIME, &start);
+
+ while (n_connected < wrk->cfg.num_test_sessions)
+ {
+ /*
+ * Try to connect more sessions if under pending threshold
+ */
+ while ((ci - n_connected) < 16 && ci < wrk->cfg.num_test_sessions)
+ {
+ ts = &wrk->sessions[ci];
+ ts->noblk_connect = 1;
+ rv = tp->open (&wrk->sessions[ci], &vcm->server_endpt);
+ if (rv < 0)
+ {
+ vtwrn ("open: %d", rv);
+ return rv;
+ }
+
+ ev.data.u64 = ci;
+ rv = vppcom_epoll_ctl (wrk->epoll_sh, EPOLL_CTL_ADD, ts->fd, &ev);
+ if (rv < 0)
+ {
+ vtwrn ("vppcom_epoll_ctl: %d", rv);
+ return rv;
+ }
+ ci += 1;
+ }
+
+ /*
+ * Handle connected events
+ */
+ n_ev =
+ vppcom_epoll_wait (wrk->epoll_sh, wrk->ep_evts,
+ VCL_TEST_CFG_MAX_EPOLL_EVENTS, 0 /* timeout */);
+ if (n_ev < 0)
+ {
+ vterr ("vppcom_epoll_wait() returned", n_ev);
+ return -1;
+ }
+ else if (n_ev == 0)
+ {
+ continue;
+ }
+
+ for (i = 0; i < n_ev; i++)
+ {
+ ts = &wrk->sessions[wrk->ep_evts[i].data.u32];
+ if (!(wrk->ep_evts[i].events & EPOLLOUT))
+ {
+ vtwrn ("connect failed");
+ return -1;
+ }
+ if (ts->is_open)
+ {
+ vtwrn ("connection already open?");
+ return -1;
+ }
+ ts->is_open = 1;
+ n_connected += 1;
+ }
+ }
+
+ clock_gettime (CLOCK_REALTIME, &end);
+
+ diff = vcl_test_time_diff (&start, &end);
+ vtinf ("Connected (%u) connected in %.2f seconds (%u CPS)!",
+ wrk->cfg.num_test_sessions, diff,
+ (uint32_t) ((double) wrk->cfg.num_test_sessions / diff));
+
+ ev.events = EPOLLET | EPOLLIN | EPOLLOUT;
+
+ for (i = 0; i < wrk->cfg.num_test_sessions; i++)
+ {
+ ts = &wrk->sessions[i];
+
+ /* No data to be sent */
+ if (ts->cfg.total_bytes == 0)
+ {
+ n_connected -= 1;
+ clock_gettime (CLOCK_REALTIME, &ts->stats.stop);
+ ts->is_done = 1;
+ continue;
+ }
+
+ ev.data.u64 = i;
+ rv = vppcom_epoll_ctl (wrk->epoll_sh, EPOLL_CTL_MOD, ts->fd, &ev);
+ if (rv < 0)
+ {
+ vtwrn ("vppcom_epoll_ctl: %d", rv);
+ return rv;
+ }
+ vtc_worker_epoll_send_add (wrk, ts);
+ }
+
+ return n_connected;
+}
+
+static int
+vtc_worker_run_epoll (vcl_test_client_worker_t *wrk)
+{
+ vcl_test_client_main_t *vcm = &vcl_client_main;
+ uint32_t n_active_sessions, max_writes = 16, n_writes = 0;
+ vcl_test_session_t *ts, *prev = 0;
+ int i, rv, check_rx = 0, n_ev;
+
+ rv = vtc_worker_connect_sessions_epoll (wrk);
+ if (rv < 0)
+ {
+ vterr ("vtc_worker_connect_sessions()", rv);
+ return rv;
+ }
+
+ n_active_sessions = rv;
+ check_rx = wrk->cfg.test != HS_TEST_TYPE_UNI;
+
+ vtc_worker_start_transfer (wrk);
+ ts = wrk->next_to_send;
+
+ while (n_active_sessions && vcm->test_running)
+ {
+ /*
+ * Try to write
+ */
+ if (!ts)
+ {
+ ts = wrk->next_to_send;
+ if (!ts)
+ goto get_epoll_evts;
+ }
+
+ rv = ts->write (ts, ts->txbuf, ts->cfg.txbuf_size);
+ if (rv > 0)
+ {
+ if (vcm->incremental_stats)
+ vtc_inc_stats_check (ts);
+ if (vtc_session_check_is_done (ts, check_rx))
+ n_active_sessions -= 1;
+ }
+ else if (rv == 0)
+ {
+ vtc_worker_epoll_send_del (wrk, ts, prev);
+ }
+ else
+ {
+ vtwrn ("vppcom_test_write (%d) failed -- aborting test", ts->fd);
+ return -1;
+ }
+ prev = ts;
+ ts = ts->next;
+ n_writes += 1;
+
+ if (rv > 0 && n_writes < max_writes)
+ continue;
+
+ get_epoll_evts:
+
+ /*
+ * Grab new events
+ */
+
+ n_ev =
+ vppcom_epoll_wait (wrk->epoll_sh, wrk->ep_evts,
+ VCL_TEST_CFG_MAX_EPOLL_EVENTS, 0 /* timeout */);
+ if (n_ev < 0)
+ {
+ vterr ("vppcom_epoll_wait()", n_ev);
+ break;
+ }
+ else if (n_ev == 0)
+ {
+ continue;
+ }
+
+ for (i = 0; i < n_ev; i++)
+ {
+ ts = &wrk->sessions[wrk->ep_evts[i].data.u32];
+
+ if (ts->is_done)
+ continue;
+
+ if (wrk->ep_evts[i].events & (EPOLLERR | EPOLLHUP | EPOLLRDHUP))
+ {
+ vtinf ("%u finished before reading all data?", ts->fd);
+ break;
+ }
+ if ((wrk->ep_evts[i].events & EPOLLIN) &&
+ ts->stats.rx_bytes < ts->cfg.total_bytes)
+ {
+ rv = ts->read (ts, ts->rxbuf, ts->rxbuf_size);
+ if (rv < 0)
+ break;
+ if (vtc_session_check_is_done (ts, check_rx))
+ n_active_sessions -= 1;
+ }
+ if ((wrk->ep_evts[i].events & EPOLLOUT) &&
+ ts->stats.tx_bytes < ts->cfg.total_bytes)
+ {
+ vtc_worker_epoll_send_add (wrk, ts);
+ }
+ }
+
+ n_writes = 0;
+ }
+
+ return 0;
+}
+
+static inline int
+vtc_worker_run (vcl_test_client_worker_t *wrk)
+{
+ int rv;
+
+ vtinf ("Worker %u starting test ...", wrk->wrk_index);
+
+ rv = wrk->wrk_run_fn (wrk);
+
vtinf ("Worker %d done ...", wrk->wrk_index);
+
+ return rv;
+}
+
+static void *
+vtc_worker_loop (void *arg)
+{
+ vcl_test_client_main_t *vcm = &vcl_client_main;
+ vcl_test_session_t *ctrl = &vcm->ctrl_session;
+ vcl_test_client_worker_t *wrk = arg;
+
+ if (vtc_worker_init (wrk))
+ goto done;
+
+ if (vtc_worker_run (wrk))
+ goto done;
+
vtc_accumulate_stats (wrk, ctrl);
sleep (VCL_TEST_DELAY_DISCONNECT);
vtc_worker_sessions_exit (wrk);
+
+done:
+
if (wrk->wrk_index)
vt_atomic_add (&vcm->active_workers, -1);
+
return 0;
}
static void
vtc_print_stats (vcl_test_session_t * ctrl)
{
- int is_echo = ctrl->cfg.test == VCL_TEST_TYPE_ECHO;
+ int is_echo = ctrl->cfg.test == HS_TEST_TYPE_ECHO;
int show_rx = 0;
char buf[64];
- if (ctrl->cfg.test == VCL_TEST_TYPE_BI
- || ctrl->cfg.test == VCL_TEST_TYPE_ECHO)
+ if (ctrl->cfg.test == HS_TEST_TYPE_BI || ctrl->cfg.test == HS_TEST_TYPE_ECHO)
show_rx = 1;
vcl_test_stats_dump ("CLIENT RESULTS", &ctrl->stats,
show_rx, 1 /* show tx */ ,
ctrl->cfg.verbose);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
if (ctrl->cfg.verbose)
{
- vtinf (" ctrl session info\n"
- VCL_TEST_SEPARATOR_STRING
+ vtinf (" ctrl session info\n" HS_TEST_SEPARATOR_STRING
" fd: %d (0x%08x)\n"
" rxbuf: %p\n"
" rxbuf size: %u (0x%08x)\n"
" txbuf: %p\n"
- " txbuf size: %u (0x%08x)\n"
- VCL_TEST_SEPARATOR_STRING,
- ctrl->fd, (uint32_t) ctrl->fd,
- ctrl->rxbuf, ctrl->rxbuf_size, ctrl->rxbuf_size,
- ctrl->txbuf, ctrl->txbuf_size, ctrl->txbuf_size);
+ " txbuf size: %u (0x%08x)\n" HS_TEST_SEPARATOR_STRING,
+ ctrl->fd, (uint32_t) ctrl->fd, ctrl->rxbuf, ctrl->rxbuf_size,
+ ctrl->rxbuf_size, ctrl->txbuf, ctrl->txbuf_size,
+ ctrl->txbuf_size);
}
if (is_echo)
snprintf (buf, sizeof (buf), "Echo");
else
snprintf (buf, sizeof (buf), "%s-directional Stream",
- ctrl->cfg.test == VCL_TEST_TYPE_BI ? "Bi" : "Uni");
+ ctrl->cfg.test == HS_TEST_TYPE_BI ? "Bi" : "Uni");
}
static void
vtc_echo_client (vcl_test_client_main_t * vcm)
{
vcl_test_session_t *ctrl = &vcm->ctrl_session;
- vcl_test_cfg_t *cfg = &ctrl->cfg;
+ hs_test_cfg_t *cfg = &ctrl->cfg;
int rv;
cfg->total_bytes = strlen (ctrl->txbuf) + 1;
@@ -457,12 +784,12 @@ static void
vtc_stream_client (vcl_test_client_main_t * vcm)
{
vcl_test_session_t *ctrl = &vcm->ctrl_session;
- vcl_test_cfg_t *cfg = &ctrl->cfg;
+ hs_test_cfg_t *cfg = &ctrl->cfg;
vcl_test_client_worker_t *wrk;
uint32_t i, n_conn, n_conn_per_wrk;
vtinf ("%s-directional Stream Test Starting!",
- ctrl->cfg.test == VCL_TEST_TYPE_BI ? "Bi" : "Uni");
+ ctrl->cfg.test == HS_TEST_TYPE_BI ? "Bi" : "Uni");
memset (&ctrl->stats, 0, sizeof (vcl_test_stats_t));
cfg->total_bytes = cfg->num_writes * cfg->txbuf_size;
@@ -480,7 +807,7 @@ vtc_stream_client (vcl_test_client_main_t * vcm)
}
vcm->test_running = 1;
- ctrl->cfg.cmd = VCL_TEST_CMD_START;
+ ctrl->cfg.cmd = HS_TEST_CMD_START;
if (vtc_cfg_sync (ctrl))
{
vtwrn ("test cfg sync failed -- aborting!");
@@ -490,8 +817,12 @@ vtc_stream_client (vcl_test_client_main_t * vcm)
for (i = 1; i < vcm->n_workers; i++)
{
wrk = &vcm->workers[i];
- pthread_create (&wrk->thread_handle, NULL, vtc_worker_loop,
- (void *) wrk);
+ if (pthread_create (&wrk->thread_handle, NULL, vtc_worker_loop,
+ (void *) wrk))
+ {
+ vtwrn ("pthread_create failed -- aborting!");
+ return;
+ }
}
vtc_worker_loop (&vcm->workers[0]);
@@ -499,7 +830,7 @@ vtc_stream_client (vcl_test_client_main_t * vcm)
;
vtinf ("Sending config on ctrl session (fd %d) for stats...", ctrl->fd);
- ctrl->cfg.cmd = VCL_TEST_CMD_STOP;
+ ctrl->cfg.cmd = HS_TEST_CMD_STOP;
if (vtc_cfg_sync (ctrl))
{
vtwrn ("test cfg sync failed -- aborting!");
@@ -508,8 +839,8 @@ vtc_stream_client (vcl_test_client_main_t * vcm)
vtc_print_stats (ctrl);
- ctrl->cfg.cmd = VCL_TEST_CMD_SYNC;
- ctrl->cfg.test = VCL_TEST_TYPE_ECHO;
+ ctrl->cfg.cmd = HS_TEST_CMD_SYNC;
+ ctrl->cfg.test = HS_TEST_TYPE_ECHO;
ctrl->cfg.total_bytes = 0;
if (vtc_cfg_sync (ctrl))
vtwrn ("post-test cfg sync failed!");
@@ -529,7 +860,7 @@ cfg_txbuf_size_set (void)
ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size;
vcl_test_buf_alloc (&ctrl->cfg, 0 /* is_rxbuf */ ,
(uint8_t **) & ctrl->txbuf, &ctrl->txbuf_size);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
vtwrn ("Invalid txbuf size (%lu) < minimum buf size (%u)!",
@@ -548,7 +879,7 @@ cfg_num_writes_set (void)
{
ctrl->cfg.num_writes = num_writes;
ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size;
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
{
@@ -568,7 +899,7 @@ cfg_num_test_sessions_set (void)
(num_test_sessions <= VCL_TEST_CFG_MAX_TEST_SESS))
{
ctrl->cfg.num_test_sessions = num_test_sessions;
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
{
@@ -590,7 +921,7 @@ cfg_rxbuf_size_set (void)
ctrl->cfg.rxbuf_size = rxbuf_size;
vcl_test_buf_alloc (&ctrl->cfg, 1 /* is_rxbuf */ ,
(uint8_t **) & ctrl->rxbuf, &ctrl->rxbuf_size);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
vtwrn ("Invalid rxbuf size (%lu) < minimum buf size (%u)!",
@@ -604,20 +935,19 @@ cfg_verbose_toggle (void)
vcl_test_session_t *ctrl = &vcm->ctrl_session;
ctrl->cfg.verbose = ctrl->cfg.verbose ? 0 : 1;
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
-
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
-static vcl_test_t
+static hs_test_t
parse_input ()
{
vcl_test_client_main_t *vcm = &vcl_client_main;
vcl_test_session_t *ctrl = &vcm->ctrl_session;
- vcl_test_t rv = VCL_TEST_TYPE_NONE;
+ hs_test_t rv = HS_TEST_TYPE_NONE;
if (!strncmp (VCL_TEST_TOKEN_EXIT, ctrl->txbuf,
strlen (VCL_TEST_TOKEN_EXIT)))
- rv = VCL_TEST_TYPE_EXIT;
+ rv = HS_TEST_TYPE_EXIT;
else if (!strncmp (VCL_TEST_TOKEN_HELP, ctrl->txbuf,
strlen (VCL_TEST_TOKEN_HELP)))
@@ -647,16 +977,16 @@ parse_input ()
strlen (VCL_TEST_TOKEN_RXBUF_SIZE)))
cfg_rxbuf_size_set ();
- else if (!strncmp (VCL_TEST_TOKEN_RUN_UNI, ctrl->txbuf,
- strlen (VCL_TEST_TOKEN_RUN_UNI)))
- rv = ctrl->cfg.test = VCL_TEST_TYPE_UNI;
+ else if (!strncmp (HS_TEST_TOKEN_RUN_UNI, ctrl->txbuf,
+ strlen (HS_TEST_TOKEN_RUN_UNI)))
+ rv = ctrl->cfg.test = HS_TEST_TYPE_UNI;
- else if (!strncmp (VCL_TEST_TOKEN_RUN_BI, ctrl->txbuf,
- strlen (VCL_TEST_TOKEN_RUN_BI)))
- rv = ctrl->cfg.test = VCL_TEST_TYPE_BI;
+ else if (!strncmp (HS_TEST_TOKEN_RUN_BI, ctrl->txbuf,
+ strlen (HS_TEST_TOKEN_RUN_BI)))
+ rv = ctrl->cfg.test = HS_TEST_TYPE_BI;
else
- rv = VCL_TEST_TYPE_ECHO;
+ rv = HS_TEST_TYPE_ECHO;
return rv;
}
@@ -682,6 +1012,7 @@ print_usage_and_exit (void)
" -T <txbuf-size> Test Cfg: tx buffer size.\n"
" -U Run Uni-directional test.\n"
" -B Run Bi-directional test.\n"
+ " -b <bytes> Total number of bytes transferred\n"
" -V Verbose mode.\n"
" -I <N> Use N sessions.\n"
" -s <N> Use N sessions.\n"
@@ -697,7 +1028,7 @@ vtc_process_opts (vcl_test_client_main_t * vcm, int argc, char **argv)
int c, v;
opterr = 0;
- while ((c = getopt (argc, argv, "chnp:w:XE:I:N:R:T:UBV6DLs:q:S")) != -1)
+ while ((c = getopt (argc, argv, "chnp:w:xXE:I:N:R:T:b:UBV6DLs:q:S")) != -1)
switch (c)
{
case 'c':
@@ -752,7 +1083,11 @@ vtc_process_opts (vcl_test_client_main_t * vcm, int argc, char **argv)
break;
case 'X':
- vcm->post_test = VCL_TEST_TYPE_EXIT;
+ vcm->post_test = HS_TEST_TYPE_EXIT;
+ break;
+
+ case 'x':
+ vcm->post_test = HS_TEST_TYPE_NONE;
break;
case 'E':
@@ -763,7 +1098,7 @@ vtc_process_opts (vcl_test_client_main_t * vcm, int argc, char **argv)
print_usage_and_exit ();
}
strncpy (ctrl->txbuf, optarg, ctrl->txbuf_size);
- ctrl->cfg.test = VCL_TEST_TYPE_ECHO;
+ ctrl->cfg.test = HS_TEST_TYPE_ECHO;
break;
case 'N':
@@ -822,13 +1157,28 @@ vtc_process_opts (vcl_test_client_main_t * vcm, int argc, char **argv)
print_usage_and_exit ();
}
break;
+ case 'b':
+ if (sscanf (optarg, "0x%lu", &ctrl->cfg.total_bytes) != 1)
+ if (sscanf (optarg, "%ld", &ctrl->cfg.total_bytes) != 1)
+ {
+ vtwrn ("Invalid value for option -%c!", c);
+ print_usage_and_exit ();
+ }
+ if (ctrl->cfg.total_bytes % ctrl->cfg.txbuf_size)
+ {
+ vtwrn ("total bytes must be mutliple of txbuf size(0x%lu)!",
+ ctrl->cfg.txbuf_size);
+ print_usage_and_exit ();
+ }
+ ctrl->cfg.num_writes = ctrl->cfg.total_bytes / ctrl->cfg.txbuf_size;
+ break;
case 'U':
- ctrl->cfg.test = VCL_TEST_TYPE_UNI;
+ ctrl->cfg.test = HS_TEST_TYPE_UNI;
break;
case 'B':
- ctrl->cfg.test = VCL_TEST_TYPE_BI;
+ ctrl->cfg.test = HS_TEST_TYPE_BI;
break;
case 'V':
@@ -882,9 +1232,9 @@ vtc_process_opts (vcl_test_client_main_t * vcm, int argc, char **argv)
print_usage_and_exit ();
}
- if (argc < (optind + 2))
+ if (argc > (optind + 2))
{
- vtwrn ("Insufficient number of arguments!");
+ vtwrn ("Invalid number of arguments!");
print_usage_and_exit ();
}
@@ -895,26 +1245,25 @@ vtc_process_opts (vcl_test_client_main_t * vcm, int argc, char **argv)
memset (&vcm->server_addr, 0, sizeof (vcm->server_addr));
if (ctrl->cfg.address_ip6)
{
- struct sockaddr_in6 *sddr6 = (struct sockaddr_in6 *) &vcm->server_addr;
- sddr6->sin6_family = AF_INET6;
- inet_pton (AF_INET6, argv[optind++], &(sddr6->sin6_addr));
- sddr6->sin6_port = htons (atoi (argv[optind]));
+ struct in6_addr *in6 = &vcm->server_addr.v6;
+ inet_pton (AF_INET6, argv[optind++], in6);
vcm->server_endpt.is_ip4 = 0;
- vcm->server_endpt.ip = (uint8_t *) & sddr6->sin6_addr;
- vcm->server_endpt.port = (uint16_t) sddr6->sin6_port;
+ vcm->server_endpt.ip = (uint8_t *) in6;
}
else
{
- struct sockaddr_in *saddr4 = (struct sockaddr_in *) &vcm->server_addr;
- saddr4->sin_family = AF_INET;
- inet_pton (AF_INET, argv[optind++], &(saddr4->sin_addr));
- saddr4->sin_port = htons (atoi (argv[optind]));
+ struct in_addr *in4 = &vcm->server_addr.v4;
+ inet_pton (AF_INET, argv[optind++], in4);
vcm->server_endpt.is_ip4 = 1;
- vcm->server_endpt.ip = (uint8_t *) & saddr4->sin_addr;
- vcm->server_endpt.port = (uint16_t) saddr4->sin_port;
+ vcm->server_endpt.ip = (uint8_t *) in4;
}
+
+ if (argc == optind + 1)
+ vcm->server_endpt.port = htons (atoi (argv[optind]));
+ else
+ vcm->server_endpt.port = htons (VCL_TEST_SERVER_PORT);
}
static void
@@ -944,10 +1293,14 @@ vtc_ctrl_session_exit (void)
vcl_test_session_t *ctrl = &vcm->ctrl_session;
int verbose = ctrl->cfg.verbose;
- ctrl->cfg.test = VCL_TEST_TYPE_EXIT;
+ /* Only clients exits, server can accept new connections */
+ if (vcm->post_test == HS_TEST_TYPE_EXIT_CLIENT)
+ return;
+
+ ctrl->cfg.test = HS_TEST_TYPE_EXIT;
vtinf ("(fd %d): Sending exit cfg to server...", ctrl->fd);
if (verbose)
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
(void) vcl_test_write (ctrl, (uint8_t *) &ctrl->cfg, sizeof (ctrl->cfg));
sleep (1);
}
@@ -976,7 +1329,7 @@ vtc_ctrl_session_init (vcl_test_client_main_t *vcm, vcl_test_session_t *ctrl)
ctrl->read = vcl_test_read;
ctrl->write = vcl_test_write;
- ctrl->cfg.cmd = VCL_TEST_CMD_SYNC;
+ ctrl->cfg.cmd = HS_TEST_CMD_SYNC;
rv = vtc_cfg_sync (ctrl);
if (rv)
{
@@ -984,7 +1337,7 @@ vtc_ctrl_session_init (vcl_test_client_main_t *vcm, vcl_test_session_t *ctrl)
return rv;
}
- ctrl->cfg.ctrl_handle = ((vcl_test_cfg_t *) ctrl->rxbuf)->ctrl_handle;
+ ctrl->cfg.ctrl_handle = ((hs_test_cfg_t *) ctrl->rxbuf)->ctrl_handle;
memset (&ctrl->stats, 0, sizeof (ctrl->stats));
return 0;
@@ -1015,6 +1368,24 @@ vt_incercept_sigs (void)
}
}
+static void
+vtc_alloc_workers (vcl_test_client_main_t *vcm)
+{
+ vcl_test_main_t *vt = &vcl_test_main;
+ vtc_worker_run_fn *run_fn;
+
+ vcm->workers = calloc (vcm->n_workers, sizeof (vcl_test_client_worker_t));
+ vt->wrk = calloc (vcm->n_workers, sizeof (vcl_test_wrk_t));
+
+ if (vcm->ctrl_session.cfg.num_test_sessions > VCL_TEST_CFG_MAX_SELECT_SESS)
+ run_fn = vtc_worker_run_epoll;
+ else
+ run_fn = vtc_worker_run_select;
+
+ for (int i = 0; i < vcm->n_workers; i++)
+ vcm->workers[i].wrk_run_fn = run_fn;
+}
+
int
main (int argc, char **argv)
{
@@ -1024,13 +1395,14 @@ main (int argc, char **argv)
int rv;
vcm->n_workers = 1;
- vcl_test_cfg_init (&ctrl->cfg);
+ vcm->post_test = HS_TEST_TYPE_EXIT_CLIENT;
+
+ hs_test_cfg_init (&ctrl->cfg);
+ vt_incercept_sigs ();
vcl_test_session_buf_alloc (ctrl);
vtc_process_opts (vcm, argc, argv);
- vt_incercept_sigs ();
- vcm->workers = calloc (vcm->n_workers, sizeof (vcl_test_client_worker_t));
- vt->wrk = calloc (vcm->n_workers, sizeof (vcl_test_wrk_t));
+ vtc_alloc_workers (vcm);
rv = vppcom_app_create ("vcl_test_client");
if (rv < 0)
@@ -1038,62 +1410,67 @@ main (int argc, char **argv)
/* Protos like tls/dtls/quic need init */
if (vt->protos[vcm->proto]->init)
- vt->protos[vcm->proto]->init (&ctrl->cfg);
+ {
+ rv = vt->protos[vcm->proto]->init (&ctrl->cfg);
+ if (rv)
+ vtfail ("client init failed", rv);
+ }
if ((rv = vtc_ctrl_session_init (vcm, ctrl)))
vtfail ("vppcom_session_create() ctrl session", rv);
/* Update ctrl port to data port */
- vcm->server_endpt.port += 1;
+ vcm->server_endpt.port = hs_make_data_port (vcm->server_endpt.port);
- while (ctrl->cfg.test != VCL_TEST_TYPE_EXIT)
+ while (ctrl->cfg.test != HS_TEST_TYPE_EXIT)
{
if (vcm->dump_cfg)
{
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
vcm->dump_cfg = 0;
}
switch (ctrl->cfg.test)
{
- case VCL_TEST_TYPE_ECHO:
+ case HS_TEST_TYPE_ECHO:
vtc_echo_client (vcm);
break;
- case VCL_TEST_TYPE_UNI:
- case VCL_TEST_TYPE_BI:
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
vtc_stream_client (vcm);
break;
- case VCL_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_EXIT:
continue;
- case VCL_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_NONE:
default:
break;
}
switch (vcm->post_test)
{
- case VCL_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_EXIT_CLIENT:
switch (ctrl->cfg.test)
{
- case VCL_TEST_TYPE_EXIT:
- case VCL_TEST_TYPE_UNI:
- case VCL_TEST_TYPE_BI:
- case VCL_TEST_TYPE_ECHO:
- ctrl->cfg.test = VCL_TEST_TYPE_EXIT;
+ case HS_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
+ case HS_TEST_TYPE_ECHO:
+ ctrl->cfg.test = HS_TEST_TYPE_EXIT;
continue;
- case VCL_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_NONE:
default:
break;
}
break;
- case VCL_TEST_TYPE_NONE:
- case VCL_TEST_TYPE_ECHO:
- case VCL_TEST_TYPE_UNI:
- case VCL_TEST_TYPE_BI:
+ case HS_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_ECHO:
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
default:
break;
}
diff --git a/src/plugins/hs_apps/vcl/vcl_test_protos.c b/src/plugins/hs_apps/vcl/vcl_test_protos.c
index 60ee09265c9..cd1ac2b24f4 100644
--- a/src/plugins/hs_apps/vcl/vcl_test_protos.c
+++ b/src/plugins/hs_apps/vcl/vcl_test_protos.c
@@ -21,16 +21,15 @@ vt_tcp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
uint32_t flags, flen;
int rv;
- ts->fd = vppcom_session_create (VPPCOM_PROTO_TCP, 0 /* is_nonblocking */);
+ ts->fd = vppcom_session_create (VPPCOM_PROTO_TCP, ts->noblk_connect);
if (ts->fd < 0)
{
vterr ("vppcom_session_create()", ts->fd);
return ts->fd;
}
- /* Connect is blocking */
rv = vppcom_session_connect (ts->fd, endpt);
- if (rv < 0)
+ if (rv < 0 && rv != VPPCOM_EINPROGRESS)
{
vterr ("vppcom_session_connect()", rv);
return rv;
@@ -38,10 +37,14 @@ vt_tcp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
ts->read = vcl_test_read;
ts->write = vcl_test_write;
- flags = O_NONBLOCK;
- flen = sizeof (flags);
- vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
- vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+
+ if (!ts->noblk_connect)
+ {
+ flags = O_NONBLOCK;
+ flen = sizeof (flags);
+ vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
+ vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+ }
return 0;
}
@@ -108,16 +111,15 @@ vt_udp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
uint32_t flags, flen;
int rv;
- ts->fd = vppcom_session_create (VPPCOM_PROTO_UDP, 0 /* is_nonblocking */);
+ ts->fd = vppcom_session_create (VPPCOM_PROTO_UDP, ts->noblk_connect);
if (ts->fd < 0)
{
vterr ("vppcom_session_create()", ts->fd);
return ts->fd;
}
- /* Connect is blocking */
rv = vppcom_session_connect (ts->fd, endpt);
- if (rv < 0)
+ if (rv < 0 && rv != VPPCOM_EINPROGRESS)
{
vterr ("vppcom_session_connect()", rv);
return rv;
@@ -125,10 +127,14 @@ vt_udp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
ts->read = vcl_test_read;
ts->write = vcl_test_write;
- flags = O_NONBLOCK;
- flen = sizeof (flags);
- vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
- vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+
+ if (!ts->noblk_connect)
+ {
+ flags = O_NONBLOCK;
+ flen = sizeof (flags);
+ vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
+ vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+ }
return 0;
}
@@ -270,7 +276,7 @@ vt_add_cert_key_pair ()
}
static int
-vt_tls_init (vcl_test_cfg_t *cfg)
+vt_tls_init (hs_test_cfg_t *cfg)
{
return vt_add_cert_key_pair ();
}
@@ -282,7 +288,7 @@ vt_tls_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
uint32_t flags, flen, ckp_len;
int rv;
- ts->fd = vppcom_session_create (VPPCOM_PROTO_TLS, 0 /* is_nonblocking */);
+ ts->fd = vppcom_session_create (VPPCOM_PROTO_TLS, ts->noblk_connect);
if (ts->fd < 0)
{
vterr ("vppcom_session_create()", ts->fd);
@@ -293,9 +299,8 @@ vt_tls_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_CKPAIR, &vt->ckpair_index,
&ckp_len);
- /* Connect is blocking */
rv = vppcom_session_connect (ts->fd, endpt);
- if (rv < 0)
+ if (rv < 0 && rv != VPPCOM_EINPROGRESS)
{
vterr ("vppcom_session_connect()", rv);
return rv;
@@ -303,10 +308,14 @@ vt_tls_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
ts->read = vcl_test_read;
ts->write = vcl_test_write;
- flags = O_NONBLOCK;
- flen = sizeof (flags);
- vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
- vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+
+ if (!ts->noblk_connect)
+ {
+ flags = O_NONBLOCK;
+ flen = sizeof (flags);
+ vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
+ vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+ }
return 0;
}
@@ -375,7 +384,7 @@ static const vcl_test_proto_vft_t vcl_test_tls = {
VCL_TEST_REGISTER_PROTO (VPPCOM_PROTO_TLS, vcl_test_tls);
static int
-vt_dtls_init (vcl_test_cfg_t *cfg)
+vt_dtls_init (hs_test_cfg_t *cfg)
{
return vt_add_cert_key_pair ();
}
@@ -387,7 +396,7 @@ vt_dtls_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
uint32_t flags, flen, ckp_len;
int rv;
- ts->fd = vppcom_session_create (VPPCOM_PROTO_DTLS, 0 /* is_nonblocking */);
+ ts->fd = vppcom_session_create (VPPCOM_PROTO_DTLS, ts->noblk_connect);
if (ts->fd < 0)
{
vterr ("vppcom_session_create()", ts->fd);
@@ -398,9 +407,8 @@ vt_dtls_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_CKPAIR, &vt->ckpair_index,
&ckp_len);
- /* Connect is blocking */
rv = vppcom_session_connect (ts->fd, endpt);
- if (rv < 0)
+ if (rv < 0 && rv != VPPCOM_EINPROGRESS)
{
vterr ("vppcom_session_connect()", rv);
return rv;
@@ -408,10 +416,14 @@ vt_dtls_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
ts->read = vcl_test_read;
ts->write = vcl_test_write;
- flags = O_NONBLOCK;
- flen = sizeof (flags);
- vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
- vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+
+ if (!ts->noblk_connect)
+ {
+ flags = O_NONBLOCK;
+ flen = sizeof (flags);
+ vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
+ vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+ }
return 0;
}
@@ -480,7 +492,7 @@ static const vcl_test_proto_vft_t vcl_test_dtls = {
VCL_TEST_REGISTER_PROTO (VPPCOM_PROTO_DTLS, vcl_test_dtls);
static int
-vt_quic_init (vcl_test_cfg_t *cfg)
+vt_quic_init (hs_test_cfg_t *cfg)
{
vcl_test_main_t *vt = &vcl_test_main;
@@ -568,7 +580,7 @@ vt_quic_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
/* Make sure qsessions are initialized */
vt_quic_maybe_init_wrk (vt, wrk, endpt);
- ts->fd = vppcom_session_create (VPPCOM_PROTO_QUIC, 0 /* is_nonblocking */);
+ ts->fd = vppcom_session_create (VPPCOM_PROTO_QUIC, ts->noblk_connect);
if (ts->fd < 0)
{
vterr ("vppcom_session_create()", ts->fd);
@@ -579,21 +591,23 @@ vt_quic_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
tq = &wrk->qsessions[ts->session_index / vt->cfg.num_test_sessions_perq];
rv = vppcom_session_stream_connect (ts->fd, tq->fd);
- if (rv < 0)
+ if (rv < 0 && rv != VPPCOM_EINPROGRESS)
{
vterr ("vppcom_session_stream_connect()", rv);
return rv;
}
- flags = O_NONBLOCK;
- flen = sizeof (flags);
- vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
-
ts->read = vcl_test_read;
ts->write = vcl_test_write;
- vtinf ("Test (quic stream) session %d (fd %d) connected.", ts->session_index,
- ts->fd);
+ if (!ts->noblk_connect)
+ {
+ flags = O_NONBLOCK;
+ flen = sizeof (flags);
+ vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
+ vtinf ("Test (quic stream) session %d (fd %d) connected.",
+ ts->session_index, ts->fd);
+ }
return 0;
}
@@ -864,7 +878,7 @@ vt_srtp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
uint32_t flags, flen;
int rv;
- ts->fd = vppcom_session_create (VPPCOM_PROTO_SRTP, 0 /* is_nonblocking */);
+ ts->fd = vppcom_session_create (VPPCOM_PROTO_SRTP, ts->noblk_connect);
if (ts->fd < 0)
{
vterr ("vppcom_session_create()", ts->fd);
@@ -873,9 +887,8 @@ vt_srtp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
vt_session_add_srtp_policy (ts, 1 /* is connect */);
- /* Connect is blocking */
rv = vppcom_session_connect (ts->fd, endpt);
- if (rv < 0)
+ if (rv < 0 && rv != VPPCOM_EINPROGRESS)
{
vterr ("vppcom_session_connect()", rv);
return rv;
@@ -883,10 +896,14 @@ vt_srtp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
ts->read = vt_srtp_read;
ts->write = vt_srtp_write;
- flags = O_NONBLOCK;
- flen = sizeof (flags);
- vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
- vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+
+ if (!ts->noblk_connect)
+ {
+ flags = O_NONBLOCK;
+ flen = sizeof (flags);
+ vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
+ vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+ }
vt_srtp_session_init (ts, 1 /* is connect */);
diff --git a/src/plugins/hs_apps/vcl/vcl_test_server.c b/src/plugins/hs_apps/vcl/vcl_test_server.c
index 93c244484c8..d17a2089ba7 100644
--- a/src/plugins/hs_apps/vcl/vcl_test_server.c
+++ b/src/plugins/hs_apps/vcl/vcl_test_server.c
@@ -28,6 +28,17 @@
#include <vppinfra/mem.h>
#include <pthread.h>
+/*
+ * XXX: Unfortunately libepoll-shim requires some hacks to work, one of these
+ * defines 'close' as a macro. This collides with vcl test callback 'close'.
+ * Undef the 'close' macro on FreeBSD if it exists.
+ */
+#ifdef __FreeBSD__
+#ifdef close
+#undef close
+#endif
+#endif /* __FreeBSD__ */
+
typedef struct
{
uint16_t port;
@@ -106,7 +117,7 @@ again:
conn->endpt.ip = wrk->conn_pool[i].ip;
conn->is_alloc = 1;
conn->session_index = i;
- vcl_test_cfg_init (&conn->cfg);
+ hs_test_cfg_init (&conn->cfg);
return (&wrk->conn_pool[i]);
}
}
@@ -130,7 +141,7 @@ conn_pool_free (vcl_test_session_t *ts)
}
static inline void
-sync_config_and_reply (vcl_test_session_t *conn, vcl_test_cfg_t *rx_cfg)
+sync_config_and_reply (vcl_test_session_t *conn, hs_test_cfg_t *rx_cfg)
{
conn->cfg = *rx_cfg;
vcl_test_buf_alloc (&conn->cfg, 1 /* is_rxbuf */, (uint8_t **) &conn->rxbuf,
@@ -140,7 +151,7 @@ sync_config_and_reply (vcl_test_session_t *conn, vcl_test_cfg_t *rx_cfg)
if (conn->cfg.verbose)
{
vtinf ("(fd %d): Replying to cfg message!\n", conn->fd);
- vcl_test_cfg_dump (&conn->cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (&conn->cfg, 0 /* is_client */);
}
(void) vcl_test_write (conn, &conn->cfg, sizeof (conn->cfg));
}
@@ -185,14 +196,14 @@ vts_wrk_cleanup_all (vcl_test_server_worker_t *wrk)
static void
vts_test_cmd (vcl_test_server_worker_t *wrk, vcl_test_session_t *conn,
- vcl_test_cfg_t *rx_cfg)
+ hs_test_cfg_t *rx_cfg)
{
- u8 is_bi = rx_cfg->test == VCL_TEST_TYPE_BI;
+ u8 is_bi = rx_cfg->test == HS_TEST_TYPE_BI;
vcl_test_session_t *tc;
char buf[64];
int i;
- if (rx_cfg->cmd == VCL_TEST_CMD_STOP)
+ if (rx_cfg->cmd == HS_TEST_CMD_STOP)
{
struct timespec stop;
clock_gettime (CLOCK_REALTIME, &stop);
@@ -232,25 +243,25 @@ vts_test_cmd (vcl_test_server_worker_t *wrk, vcl_test_session_t *conn,
vcl_test_stats_dump ("SERVER RESULTS", &conn->stats, 1 /* show_rx */ ,
is_bi /* show_tx */ , conn->cfg.verbose);
- vcl_test_cfg_dump (&conn->cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (&conn->cfg, 0 /* is_client */);
if (conn->cfg.verbose)
{
- vtinf (" vcl server main\n" VCL_TEST_SEPARATOR_STRING
+ vtinf (" vcl server main\n" HS_TEST_SEPARATOR_STRING
" buf: %p\n"
- " buf size: %u (0x%08x)\n" VCL_TEST_SEPARATOR_STRING,
+ " buf size: %u (0x%08x)\n" HS_TEST_SEPARATOR_STRING,
conn->rxbuf, conn->rxbuf_size, conn->rxbuf_size);
}
sync_config_and_reply (conn, rx_cfg);
memset (&conn->stats, 0, sizeof (conn->stats));
}
- else if (rx_cfg->cmd == VCL_TEST_CMD_SYNC)
+ else if (rx_cfg->cmd == HS_TEST_CMD_SYNC)
{
rx_cfg->ctrl_handle = conn->fd;
vtinf ("Set control fd %d for test!", conn->fd);
sync_config_and_reply (conn, rx_cfg);
}
- else if (rx_cfg->cmd == VCL_TEST_CMD_START)
+ else if (rx_cfg->cmd == HS_TEST_CMD_START)
{
vtinf ("Starting %s-directional Stream Test (fd %d)!",
is_bi ? "Bi" : "Uni", conn->fd);
@@ -268,7 +279,7 @@ vts_server_process_rx (vcl_test_session_t *conn, int rx_bytes)
{
vcl_test_server_main_t *vsm = &vcl_server_main;
- if (conn->cfg.test == VCL_TEST_TYPE_BI)
+ if (conn->cfg.test == HS_TEST_TYPE_BI)
{
if (vsm->use_ds)
{
@@ -373,8 +384,9 @@ vts_accept_client (vcl_test_server_worker_t *wrk, int listen_fd)
if (tp->accept (listen_fd, conn))
return 0;
- vtinf ("Got a connection -- fd = %d (0x%08x) on listener fd = %d (0x%08x)",
- conn->fd, conn->fd, listen_fd, listen_fd);
+ if (conn->cfg.num_test_sessions < VCL_TEST_CFG_MAX_SELECT_SESS)
+ vtinf ("Got a connection -- fd = %d (0x%08x) on listener fd = %d (0x%08x)",
+ conn->fd, conn->fd, listen_fd, listen_fd);
ev.events = EPOLLET | EPOLLIN;
ev.data.u64 = conn - wrk->conn_pool;
@@ -502,31 +514,33 @@ vcl_test_server_process_opts (vcl_test_server_main_t * vsm, int argc,
print_usage_and_exit ();
}
- if (argc < (optind + 1))
+ if (argc > (optind + 1))
{
- fprintf (stderr, "SERVER: ERROR: Insufficient number of arguments!\n");
+ fprintf (stderr, "Incorrect number of arguments!\n");
print_usage_and_exit ();
}
-
- if (sscanf (argv[optind], "%d", &v) == 1)
- vsm->server_cfg.port = (uint16_t) v;
- else
+ else if (argc > 1 && argc == (optind + 1))
{
- fprintf (stderr, "SERVER: ERROR: Invalid port (%s)!\n", argv[optind]);
- print_usage_and_exit ();
+ if (sscanf (argv[optind], "%d", &v) == 1)
+ vsm->server_cfg.port = (uint16_t) v;
+ else
+ {
+ fprintf (stderr, "Invalid port (%s)!\n", argv[optind]);
+ print_usage_and_exit ();
+ }
}
vcl_test_init_endpoint_addr (vsm);
}
int
-vts_handle_ctrl_cfg (vcl_test_server_worker_t *wrk, vcl_test_cfg_t *rx_cfg,
+vts_handle_ctrl_cfg (vcl_test_server_worker_t *wrk, hs_test_cfg_t *rx_cfg,
vcl_test_session_t *conn, int rx_bytes)
{
if (rx_cfg->verbose)
{
vtinf ("(fd %d): Received a cfg msg!", conn->fd);
- vcl_test_cfg_dump (rx_cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 0 /* is_client */);
}
if (rx_bytes != sizeof (*rx_cfg))
@@ -538,7 +552,7 @@ vts_handle_ctrl_cfg (vcl_test_server_worker_t *wrk, vcl_test_cfg_t *rx_cfg,
if (conn->cfg.verbose)
{
vtinf ("(fd %d): Replying to cfg msg", conn->fd);
- vcl_test_cfg_dump (rx_cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 0 /* is_client */);
}
conn->write (conn, &conn->cfg, sizeof (conn->cfg));
return -1;
@@ -546,27 +560,28 @@ vts_handle_ctrl_cfg (vcl_test_server_worker_t *wrk, vcl_test_cfg_t *rx_cfg,
switch (rx_cfg->test)
{
- case VCL_TEST_TYPE_NONE:
- case VCL_TEST_TYPE_ECHO:
+ case HS_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_ECHO:
sync_config_and_reply (conn, rx_cfg);
break;
- case VCL_TEST_TYPE_BI:
- case VCL_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
+ case HS_TEST_TYPE_UNI:
vts_test_cmd (wrk, conn, rx_cfg);
break;
- case VCL_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_EXIT:
vtinf ("Ctrl session fd %d closing!", conn->fd);
vts_session_cleanup (conn);
wrk->nfds--;
if (wrk->nfds)
vts_wrk_cleanup_all (wrk);
+ vcl_server_main.ctrl = 0;
break;
default:
vtwrn ("Unknown test type %d", rx_cfg->test);
- vcl_test_cfg_dump (rx_cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 0 /* is_client */);
break;
}
@@ -586,7 +601,7 @@ vts_worker_init (vcl_test_server_worker_t * wrk)
vtinf ("Initializing worker ...");
- conn_pool_expand (wrk, VCL_TEST_CFG_MAX_TEST_SESS + 1);
+ conn_pool_expand (wrk, VCL_TEST_CFG_INIT_TEST_SESS + 1);
if (wrk->wrk_index)
if (vppcom_worker_register ())
vtfail ("vppcom_worker_register()", 1);
@@ -648,7 +663,7 @@ vts_worker_loop (void *arg)
vcl_test_server_worker_t *wrk = arg;
vcl_test_session_t *conn;
int i, rx_bytes, num_ev;
- vcl_test_cfg_t *rx_cfg;
+ hs_test_cfg_t *rx_cfg;
if (wrk->wrk_index)
vts_worker_init (wrk);
@@ -675,13 +690,13 @@ vts_worker_loop (void *arg)
*/
if (ep_evts[i].events & (EPOLLHUP | EPOLLRDHUP))
{
- vts_session_cleanup (conn);
- wrk->nfds--;
- if (!wrk->nfds)
+ if (conn == vsm->ctrl)
{
- vtinf ("All client connections closed\n");
- goto done;
+ vtinf ("ctrl session went away");
+ vsm->ctrl = 0;
}
+ vts_session_cleanup (conn);
+ wrk->nfds--;
continue;
}
@@ -699,6 +714,10 @@ vts_worker_loop (void *arg)
vsm->ctrl = vts_accept_ctrl (wrk, vsm->ctrl_listen_fd);
continue;
}
+
+ /* at this point ctrl session must be valid */
+ ASSERT (vsm->ctrl);
+
if (ep_evts[i].data.u32 == VCL_TEST_DATA_LISTENER)
{
conn = vts_accept_client (wrk, wrk->listener.fd);
@@ -718,8 +737,8 @@ vts_worker_loop (void *arg)
if (!wrk->wrk_index && conn->fd == vsm->ctrl->fd)
{
rx_bytes = conn->read (conn, conn->rxbuf, conn->rxbuf_size);
- rx_cfg = (vcl_test_cfg_t *) conn->rxbuf;
- if (rx_cfg->magic == VCL_TEST_CFG_CTRL_MAGIC)
+ rx_cfg = (hs_test_cfg_t *) conn->rxbuf;
+ if (rx_cfg->magic == HS_TEST_CFG_CTRL_MAGIC)
{
vts_handle_ctrl_cfg (wrk, rx_cfg, conn, rx_bytes);
if (!wrk->nfds)
@@ -847,13 +866,15 @@ main (int argc, char **argv)
vts_ctrl_session_init (&vsm->workers[0]);
/* Update ctrl port to data port */
- vsm->server_cfg.endpt.port += 1;
+ vsm->server_cfg.endpt.port = hs_make_data_port (vsm->server_cfg.endpt.port);
vts_worker_init (&vsm->workers[0]);
for (i = 1; i < vsm->server_cfg.workers; i++)
{
vsm->workers[i].wrk_index = i;
rv = pthread_create (&vsm->workers[i].thread_handle, NULL,
vts_worker_loop, (void *) &vsm->workers[i]);
+ if (rv)
+ vtfail ("pthread_create()", rv);
}
vts_worker_loop (&vsm->workers[0]);
diff --git a/src/plugins/hsi/CMakeLists.txt b/src/plugins/hsi/CMakeLists.txt
new file mode 100644
index 00000000000..629f5e3762c
--- /dev/null
+++ b/src/plugins/hsi/CMakeLists.txt
@@ -0,0 +1,17 @@
+# Copyright (c) 2021 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(hsi
+ SOURCES
+ hsi.c
+)
diff --git a/src/plugins/hsi/FEATURE.yaml b/src/plugins/hsi/FEATURE.yaml
new file mode 100644
index 00000000000..d6bf15fc25b
--- /dev/null
+++ b/src/plugins/hsi/FEATURE.yaml
@@ -0,0 +1,8 @@
+---
+name: HSI (Host Stack Intercept)
+maintainer: Florin Coras <fcoras@cisco.com>
+features:
+ - Host stack intercept feature
+description: "Feature that enables selective punting of flows to the host stack"
+state: experimental
+properties: [MULTITHREAD]
diff --git a/src/plugins/hsi/hsi.c b/src/plugins/hsi/hsi.c
new file mode 100644
index 00000000000..0fea0a3f288
--- /dev/null
+++ b/src/plugins/hsi/hsi.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+#include <hsi/hsi.h>
+#include <vnet/tcp/tcp_types.h>
+
+char *hsi_error_strings[] = {
+#define hsi_error(n, s) s,
+#include <hsi/hsi_error.def>
+#undef hsi_error
+};
+
+typedef enum hsi_input_next_
+{
+ HSI_INPUT_NEXT_UDP_INPUT,
+ HSI_INPUT_NEXT_TCP_INPUT,
+ HSI_INPUT_NEXT_TCP_INPUT_NOLOOKUP,
+ HSI_INPUT_N_NEXT
+} hsi_input_next_t;
+
+#define foreach_hsi4_input_next \
+ _ (UDP_INPUT, "udp4-input") \
+ _ (TCP_INPUT, "tcp4-input") \
+ _ (TCP_INPUT_NOLOOKUP, "tcp4-input-nolookup")
+
+#define foreach_hsi6_input_next \
+ _ (UDP_INPUT, "udp6-input") \
+ _ (TCP_INPUT, "tcp6-input") \
+ _ (TCP_INPUT_NOLOOKUP, "tcp6-input-nolookup")
+
+typedef struct
+{
+ u32 next_node;
+} hsi_trace_t;
+
+static u8 *
+format_hsi_trace (u8 *s, va_list *args)
+{
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ vlib_node_t *node = va_arg (*args, vlib_node_t *);
+ hsi_trace_t *t = va_arg (*args, hsi_trace_t *);
+ vlib_node_t *nn;
+
+ nn = vlib_get_next_node (vm, node->index, t->next_node);
+ s = format (s, "session %sfound, next node: %v",
+ t->next_node < HSI_INPUT_N_NEXT ? "" : "not ", nn->name);
+ return s;
+}
+
+always_inline u8
+hsi_udp_lookup (vlib_buffer_t *b, void *ip_hdr, u8 is_ip4)
+{
+ udp_header_t *hdr;
+ session_t *s;
+
+ if (is_ip4)
+ {
+ ip4_header_t *ip4 = (ip4_header_t *) ip_hdr;
+ hdr = ip4_next_header (ip4);
+ s = session_lookup_safe4 (
+ vnet_buffer (b)->ip.fib_index, &ip4->dst_address, &ip4->src_address,
+ hdr->dst_port, hdr->src_port, TRANSPORT_PROTO_UDP);
+ }
+ else
+ {
+ ip6_header_t *ip6 = (ip6_header_t *) ip_hdr;
+ hdr = ip6_next_header (ip6);
+ s = session_lookup_safe6 (
+ vnet_buffer (b)->ip.fib_index, &ip6->dst_address, &ip6->src_address,
+ hdr->dst_port, hdr->src_port, TRANSPORT_PROTO_UDP);
+ }
+
+ return s ? 1 : 0;
+}
+
+always_inline transport_connection_t *
+hsi_tcp_lookup (vlib_buffer_t *b, void *ip_hdr, tcp_header_t **rhdr, u8 is_ip4)
+{
+ transport_connection_t *tc;
+ tcp_header_t *hdr;
+ u8 result = 0;
+
+ if (is_ip4)
+ {
+ ip4_header_t *ip4 = (ip4_header_t *) ip_hdr;
+ *rhdr = hdr = ip4_next_header (ip4);
+ tc = session_lookup_connection_wt4 (
+ vnet_buffer (b)->ip.fib_index, &ip4->dst_address, &ip4->src_address,
+ hdr->dst_port, hdr->src_port, TRANSPORT_PROTO_TCP,
+ vlib_get_thread_index (), &result);
+ }
+ else
+ {
+ ip6_header_t *ip6 = (ip6_header_t *) ip_hdr;
+ *rhdr = hdr = ip6_next_header (ip6);
+ tc = session_lookup_connection_wt6 (
+ vnet_buffer (b)->ip.fib_index, &ip6->dst_address, &ip6->src_address,
+ hdr->dst_port, hdr->src_port, TRANSPORT_PROTO_TCP,
+ vlib_get_thread_index (), &result);
+ }
+
+ return result == 0 ? tc : 0;
+}
+
+always_inline void
+hsi_lookup_and_update (vlib_buffer_t *b, u32 *next, u8 is_ip4, u8 is_input)
+{
+ u8 proto, state, have_udp;
+ tcp_header_t *tcp_hdr = 0;
+ tcp_connection_t *tc;
+ u32 rw_len = 0;
+ void *ip_hdr;
+
+ if (is_input)
+ {
+ ip_hdr = vlib_buffer_get_current (b);
+ if (is_ip4)
+ ip_lookup_set_buffer_fib_index (ip4_main.fib_index_by_sw_if_index, b);
+ else
+ ip_lookup_set_buffer_fib_index (ip6_main.fib_index_by_sw_if_index, b);
+ }
+ else
+ {
+ rw_len = vnet_buffer (b)->ip.save_rewrite_length;
+ ip_hdr = vlib_buffer_get_current (b) + rw_len;
+ }
+
+ if (is_ip4)
+ proto = ((ip4_header_t *) ip_hdr)->protocol;
+ else
+ proto = ((ip6_header_t *) ip_hdr)->protocol;
+
+ switch (proto)
+ {
+ case IP_PROTOCOL_TCP:
+ tc = (tcp_connection_t *) hsi_tcp_lookup (b, ip_hdr, &tcp_hdr, is_ip4);
+ if (tc)
+ {
+ state = tc->state;
+ if (state == TCP_STATE_LISTEN)
+ {
+ /* Avoid processing non syn packets that match listener */
+ if (!tcp_syn (tcp_hdr))
+ {
+ vnet_feature_next (next, b);
+ break;
+ }
+ *next = HSI_INPUT_NEXT_TCP_INPUT;
+ }
+ else if (state == TCP_STATE_SYN_SENT)
+ {
+ *next = HSI_INPUT_NEXT_TCP_INPUT;
+ }
+ else
+ {
+ /* Lookup already done, use result */
+ *next = HSI_INPUT_NEXT_TCP_INPUT_NOLOOKUP;
+ vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
+ }
+ vlib_buffer_advance (b, rw_len);
+ }
+ else
+ {
+ vnet_feature_next (next, b);
+ }
+ break;
+ case IP_PROTOCOL_UDP:
+ have_udp = hsi_udp_lookup (b, ip_hdr, is_ip4);
+ if (have_udp)
+ {
+ *next = HSI_INPUT_NEXT_UDP_INPUT;
+ /* Emulate udp-local and consume headers up to udp payload */
+ rw_len += is_ip4 ? sizeof (ip4_header_t) : sizeof (ip6_header_t);
+ rw_len += sizeof (udp_header_t);
+ vlib_buffer_advance (b, rw_len);
+ }
+ else
+ {
+ vnet_feature_next (next, b);
+ }
+ break;
+ default:
+ vnet_feature_next (next, b);
+ break;
+ }
+}
+
+static void
+hsi_input_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t **bufs, u16 *nexts, u32 n_bufs, u8 is_ip4)
+{
+ vlib_buffer_t *b;
+ hsi_trace_t *t;
+ int i;
+
+ for (i = 0; i < n_bufs; i++)
+ {
+ b = bufs[i];
+ if (!(b->flags & VLIB_BUFFER_IS_TRACED))
+ continue;
+ t = vlib_add_trace (vm, node, b, sizeof (*t));
+ t->next_node = nexts[i];
+ }
+}
+
+always_inline uword
+hsi46_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, u8 is_ip4, u8 is_input)
+{
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u16 nexts[VLIB_FRAME_SIZE], *next;
+ u32 n_left_from, *from;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+ b = bufs;
+ next = nexts;
+
+ while (n_left_from >= 4)
+ {
+ u32 next0, next1;
+
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+
+ vlib_prefetch_buffer_header (b[3], LOAD);
+ CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+
+ hsi_lookup_and_update (b[0], &next0, is_ip4, is_input);
+ hsi_lookup_and_update (b[1], &next1, is_ip4, is_input);
+
+ next[0] = next0;
+ next[1] = next1;
+
+ b += 2;
+ next += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from)
+ {
+ u32 next0;
+
+ hsi_lookup_and_update (b[0], &next0, is_ip4, is_input);
+
+ next[0] = next0;
+
+ b += 1;
+ next += 1;
+ n_left_from -= 1;
+ }
+
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ hsi_input_trace_frame (vm, node, bufs, nexts, frame->n_vectors, is_ip4);
+
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (hsi4_in_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return hsi46_input_inline (vm, node, frame, 1 /* is_ip4 */,
+ 1 /* is_input */);
+}
+
+VLIB_REGISTER_NODE (hsi4_in_node) = {
+ .name = "hsi4-in",
+ .vector_size = sizeof (u32),
+ .format_trace = format_hsi_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = HSI_N_ERROR,
+ .error_strings = hsi_error_strings,
+ .n_next_nodes = HSI_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [HSI_INPUT_NEXT_##s] = n,
+ foreach_hsi4_input_next
+#undef _
+ },
+};
+
+VNET_FEATURE_INIT (hsi4_in_feature, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "hsi4-in",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+ .runs_after = VNET_FEATURES ("ip4-full-reassembly-feature"),
+};
+
+VLIB_NODE_FN (hsi4_out_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return hsi46_input_inline (vm, node, frame, 1 /* is_ip4 */,
+ 0 /* is_input */);
+}
+
+VLIB_REGISTER_NODE (hsi4_out_node) = {
+ .name = "hsi4-out",
+ .vector_size = sizeof (u32),
+ .format_trace = format_hsi_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = HSI_N_ERROR,
+ .error_strings = hsi_error_strings,
+ .n_next_nodes = HSI_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [HSI_INPUT_NEXT_##s] = n,
+ foreach_hsi4_input_next
+#undef _
+ },
+};
+
+VNET_FEATURE_INIT (hsi4_out_feature, static) = {
+ .arc_name = "ip4-output",
+ .node_name = "hsi4-out",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+
+VLIB_NODE_FN (hsi6_in_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return hsi46_input_inline (vm, node, frame, 0 /* is_ip4 */,
+ 1 /* is_input */);
+}
+
+VLIB_REGISTER_NODE (hsi6_in_node) = {
+ .name = "hsi6-in",
+ .vector_size = sizeof (u32),
+ .format_trace = format_hsi_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = HSI_N_ERROR,
+ .error_strings = hsi_error_strings,
+ .n_next_nodes = HSI_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [HSI_INPUT_NEXT_##s] = n,
+ foreach_hsi6_input_next
+#undef _
+ },
+};
+
+VNET_FEATURE_INIT (hsi6_in_feature, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "hsi6-in",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+ .runs_after = VNET_FEATURES ("ip6-full-reassembly-feature"),
+};
+
+VLIB_NODE_FN (hsi6_out_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return hsi46_input_inline (vm, node, frame, 0 /* is_ip4 */,
+ 0 /* is_input */);
+}
+
+VLIB_REGISTER_NODE (hsi6_out_node) = {
+ .name = "hsi6-out",
+ .vector_size = sizeof (u32),
+ .format_trace = format_hsi_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = HSI_N_ERROR,
+ .error_strings = hsi_error_strings,
+ .n_next_nodes = HSI_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [HSI_INPUT_NEXT_##s] = n,
+ foreach_hsi6_input_next
+#undef _
+ },
+};
+
+VNET_FEATURE_INIT (hsi6_out_feature, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "hsi6-out",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Host Stack Intercept (HSI)",
+ .default_disabled = 0,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/hsi/hsi.h b/src/plugins/hsi/hsi.h
new file mode 100644
index 00000000000..1eee1565ef1
--- /dev/null
+++ b/src/plugins/hsi/hsi.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_PLUGINS_HSI_HSI_H_
+#define SRC_PLUGINS_HSI_HSI_H_
+
+#include <vnet/session/session.h>
+
+typedef enum _hsi_error
+{
+#define hsi_error(n, s) HSI_ERROR_##n,
+#include <hsi/hsi_error.def>
+#undef hsi_error
+ HSI_N_ERROR,
+} hsi_error_t;
+
+#endif /* SRC_PLUGINS_HSI_HSI_H_ */
diff --git a/src/plugins/hsi/hsi_error.def b/src/plugins/hsi/hsi_error.def
new file mode 100644
index 00000000000..4e9d7f19238
--- /dev/null
+++ b/src/plugins/hsi/hsi_error.def
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+hsi_error (NONE, "no error") \ No newline at end of file
diff --git a/src/plugins/http/CMakeLists.txt b/src/plugins/http/CMakeLists.txt
new file mode 100644
index 00000000000..d9cd84a3955
--- /dev/null
+++ b/src/plugins/http/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Copyright (c) 2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(http
+ SOURCES
+ http.c
+ http_buffer.c
+ http_timer.c
+)
diff --git a/src/plugins/http/http.c b/src/plugins/http/http.c
new file mode 100644
index 00000000000..0fa113c8155
--- /dev/null
+++ b/src/plugins/http/http.c
@@ -0,0 +1,1504 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <http/http.h>
+#include <vnet/session/session.h>
+#include <http/http_timer.h>
+
+static http_main_t http_main;
+
+#define HTTP_FIFO_THRESH (16 << 10)
+#define CONTENT_LEN_STR "Content-Length: "
+
+/* HTTP state machine result */
+typedef enum http_sm_result_t_
+{
+ HTTP_SM_STOP = 0,
+ HTTP_SM_CONTINUE = 1,
+ HTTP_SM_ERROR = -1,
+} http_sm_result_t;
+
+const char *http_status_code_str[] = {
+#define _(c, s, str) str,
+ foreach_http_status_code
+#undef _
+};
+
+const char *http_content_type_str[] = {
+#define _(s, ext, str) str,
+ foreach_http_content_type
+#undef _
+};
+
+const http_buffer_type_t msg_to_buf_type[] = {
+ [HTTP_MSG_DATA_INLINE] = HTTP_BUFFER_FIFO,
+ [HTTP_MSG_DATA_PTR] = HTTP_BUFFER_PTR,
+};
+
+u8 *
+format_http_state (u8 *s, va_list *va)
+{
+ http_state_t state = va_arg (*va, http_state_t);
+
+ switch (state)
+ {
+ case HTTP_STATE_IDLE:
+ return format (s, "idle");
+ case HTTP_STATE_WAIT_APP_METHOD:
+ return format (s, "wait app method");
+ case HTTP_STATE_WAIT_SERVER_REPLY:
+ return format (s, "wait server reply");
+ case HTTP_STATE_CLIENT_IO_MORE_DATA:
+ return format (s, "client io more data");
+ case HTTP_STATE_WAIT_CLIENT_METHOD:
+ return format (s, "wait client method");
+ case HTTP_STATE_WAIT_APP_REPLY:
+ return format (s, "wait app reply");
+ case HTTP_STATE_APP_IO_MORE_DATA:
+ return format (s, "app io more data");
+ default:
+ break;
+ }
+ return format (s, "unknown");
+}
+
+#define http_state_change(_hc, _state) \
+ do \
+ { \
+ HTTP_DBG (1, "changing http state %U -> %U", format_http_state, \
+ (_hc)->http_state, format_http_state, _state); \
+ (_hc)->http_state = _state; \
+ } \
+ while (0)
+
+static inline http_worker_t *
+http_worker_get (u32 thread_index)
+{
+ return &http_main.wrk[thread_index];
+}
+
+static inline u32
+http_conn_alloc_w_thread (u32 thread_index)
+{
+ http_worker_t *wrk = http_worker_get (thread_index);
+ http_conn_t *hc;
+
+ pool_get_aligned_safe (wrk->conn_pool, hc, CLIB_CACHE_LINE_BYTES);
+ clib_memset (hc, 0, sizeof (*hc));
+ hc->c_thread_index = thread_index;
+ hc->h_hc_index = hc - wrk->conn_pool;
+ hc->h_pa_session_handle = SESSION_INVALID_HANDLE;
+ hc->h_tc_session_handle = SESSION_INVALID_HANDLE;
+ return hc->h_hc_index;
+}
+
+static inline http_conn_t *
+http_conn_get_w_thread (u32 hc_index, u32 thread_index)
+{
+ http_worker_t *wrk = http_worker_get (thread_index);
+ return pool_elt_at_index (wrk->conn_pool, hc_index);
+}
+
+void
+http_conn_free (http_conn_t *hc)
+{
+ http_worker_t *wrk = http_worker_get (hc->c_thread_index);
+ pool_put (wrk->conn_pool, hc);
+}
+
+static u32
+http_listener_alloc (void)
+{
+ http_main_t *hm = &http_main;
+ http_conn_t *lhc;
+
+ pool_get_zero (hm->listener_pool, lhc);
+ lhc->c_c_index = lhc - hm->listener_pool;
+ return lhc->c_c_index;
+}
+
+http_conn_t *
+http_listener_get (u32 lhc_index)
+{
+ return pool_elt_at_index (http_main.listener_pool, lhc_index);
+}
+
+void
+http_listener_free (http_conn_t *lhc)
+{
+ http_main_t *hm = &http_main;
+
+ if (CLIB_DEBUG)
+ memset (lhc, 0xfc, sizeof (*lhc));
+ pool_put (hm->listener_pool, lhc);
+}
+
+void
+http_disconnect_transport (http_conn_t *hc)
+{
+ vnet_disconnect_args_t a = {
+ .handle = hc->h_tc_session_handle,
+ .app_index = http_main.app_index,
+ };
+
+ hc->state = HTTP_CONN_STATE_CLOSED;
+
+ if (vnet_disconnect_session (&a))
+ clib_warning ("disconnect returned");
+}
+
+static void
+http_conn_timeout_cb (void *hc_handlep)
+{
+ http_conn_t *hc;
+ uword hs_handle;
+
+ hs_handle = pointer_to_uword (hc_handlep);
+ hc = http_conn_get_w_thread (hs_handle & 0x00FFFFFF, hs_handle >> 24);
+
+ HTTP_DBG (1, "terminate thread %d index %d hs %llx", hs_handle >> 24,
+ hs_handle & 0x00FFFFFF, hc);
+ if (!hc)
+ return;
+
+ hc->timer_handle = ~0;
+ session_transport_closing_notify (&hc->connection);
+ http_disconnect_transport (hc);
+}
+
+int
+http_ts_accept_callback (session_t *ts)
+{
+ session_t *ts_listener, *as, *asl;
+ app_worker_t *app_wrk;
+ http_conn_t *lhc, *hc;
+ u32 hc_index, thresh;
+ int rv;
+
+ ts_listener = listen_session_get_from_handle (ts->listener_handle);
+ lhc = http_listener_get (ts_listener->opaque);
+
+ hc_index = http_conn_alloc_w_thread (ts->thread_index);
+ hc = http_conn_get_w_thread (hc_index, ts->thread_index);
+ clib_memcpy_fast (hc, lhc, sizeof (*lhc));
+ hc->c_thread_index = ts->thread_index;
+ hc->h_hc_index = hc_index;
+
+ hc->h_tc_session_handle = session_handle (ts);
+ hc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
+
+ hc->state = HTTP_CONN_STATE_ESTABLISHED;
+ http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD);
+
+ ts->session_state = SESSION_STATE_READY;
+ ts->opaque = hc_index;
+
+ /*
+ * Alloc session and initialize
+ */
+ as = session_alloc (hc->c_thread_index);
+ hc->c_s_index = as->session_index;
+
+ as->app_wrk_index = hc->h_pa_wrk_index;
+ as->connection_index = hc->c_c_index;
+ as->session_state = SESSION_STATE_ACCEPTING;
+
+ asl = listen_session_get_from_handle (lhc->h_pa_session_handle);
+ as->session_type = asl->session_type;
+ as->listener_handle = lhc->h_pa_session_handle;
+
+ /*
+ * Init session fifos and notify app
+ */
+ if ((rv = app_worker_init_accepted (as)))
+ {
+ HTTP_DBG (1, "failed to allocate fifos");
+ session_free (as);
+ return rv;
+ }
+
+ hc->h_pa_session_handle = session_handle (as);
+ hc->h_pa_wrk_index = as->app_wrk_index;
+ app_wrk = app_worker_get (as->app_wrk_index);
+
+ HTTP_DBG (1, "Accepted on listener %u new connection [%u]%x",
+ ts_listener->opaque, vlib_get_thread_index (), hc_index);
+
+ if ((rv = app_worker_accept_notify (app_wrk, as)))
+ {
+ HTTP_DBG (0, "app accept returned");
+ session_free (as);
+ return rv;
+ }
+
+ /* Avoid enqueuing small chunks of data on transport tx notifications. If
+ * the fifo is small (under 16K) we set the threshold to it's size, meaning
+ * a notification will be given when the fifo empties.
+ */
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+ thresh = clib_min (svm_fifo_size (ts->tx_fifo), HTTP_FIFO_THRESH);
+ svm_fifo_set_deq_thresh (ts->tx_fifo, thresh);
+
+ http_conn_timer_start (hc);
+
+ return 0;
+}
+
+static int
+http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts,
+ session_error_t err)
+{
+ u32 new_hc_index;
+ session_t *as;
+ http_conn_t *hc, *ho_hc;
+ app_worker_t *app_wrk;
+ int rv;
+
+ if (err)
+ {
+ clib_warning ("ERROR: %d", err);
+ return 0;
+ }
+
+ new_hc_index = http_conn_alloc_w_thread (ts->thread_index);
+ hc = http_conn_get_w_thread (new_hc_index, ts->thread_index);
+ ho_hc = http_conn_get_w_thread (ho_hc_index, 0);
+
+ ASSERT (ho_hc->state == HTTP_CONN_STATE_CONNECTING);
+
+ clib_memcpy_fast (hc, ho_hc, sizeof (*hc));
+
+ hc->c_thread_index = ts->thread_index;
+ hc->h_tc_session_handle = session_handle (ts);
+ hc->c_c_index = new_hc_index;
+ hc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
+ hc->state = HTTP_CONN_STATE_ESTABLISHED;
+ http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD);
+
+ ts->session_state = SESSION_STATE_READY;
+ ts->opaque = new_hc_index;
+
+ /* allocate app session and initialize */
+
+ as = session_alloc (hc->c_thread_index);
+ hc->c_s_index = as->session_index;
+ as->connection_index = hc->c_c_index;
+ as->app_wrk_index = hc->h_pa_wrk_index;
+ as->session_state = SESSION_STATE_READY;
+ as->opaque = hc->h_pa_app_api_ctx;
+ as->session_type = session_type_from_proto_and_ip (
+ TRANSPORT_PROTO_HTTP, session_type_is_ip4 (ts->session_type));
+
+ HTTP_DBG (1, "half-open hc index %d, hc index %d", ho_hc_index,
+ new_hc_index);
+
+ app_wrk = app_worker_get (hc->h_pa_wrk_index);
+ if (!app_wrk)
+ {
+ clib_warning ("no app worker");
+ return -1;
+ }
+
+ if ((rv = app_worker_init_connected (app_wrk, as)))
+ {
+ HTTP_DBG (1, "failed to allocate fifos");
+ session_free (as);
+ return rv;
+ }
+ app_worker_connect_notify (app_wrk, as, err, hc->h_pa_app_api_ctx);
+ hc->h_pa_session_handle = session_handle (as);
+ http_conn_timer_start (hc);
+
+ return 0;
+}
+
+static void
+http_ts_disconnect_callback (session_t *ts)
+{
+ http_conn_t *hc;
+
+ hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
+
+ if (hc->state < HTTP_CONN_STATE_TRANSPORT_CLOSED)
+ hc->state = HTTP_CONN_STATE_TRANSPORT_CLOSED;
+
+ /* Nothing more to rx, propagate to app */
+ if (!svm_fifo_max_dequeue_cons (ts->rx_fifo))
+ session_transport_closing_notify (&hc->connection);
+}
+
+static void
+http_ts_reset_callback (session_t *ts)
+{
+ http_conn_t *hc;
+
+ hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
+
+ hc->state = HTTP_CONN_STATE_CLOSED;
+ http_buffer_free (&hc->tx_buf);
+ http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD);
+ session_transport_reset_notify (&hc->connection);
+
+ http_disconnect_transport (hc);
+}
+
+/**
+ * http error boilerplate
+ */
+static const char *http_error_template = "HTTP/1.1 %s\r\n"
+ "Date: %U GMT\r\n"
+ "Content-Type: text/html\r\n"
+ "Connection: close\r\n"
+ "Pragma: no-cache\r\n"
+ "Content-Length: 0\r\n\r\n";
+
+static const char *http_redirect_template = "HTTP/1.1 %s\r\n";
+
+/**
+ * http response boilerplate
+ */
+static const char *http_response_template = "HTTP/1.1 %s\r\n"
+ "Date: %U GMT\r\n"
+ "Expires: %U GMT\r\n"
+ "Server: VPP Static\r\n"
+ "Content-Type: %s\r\n"
+ "Content-Length: %lu\r\n\r\n";
+
+static const char *http_request_template = "GET %s HTTP/1.1\r\n"
+ "User-Agent: VPP HTTP client\r\n"
+ "Accept: */*\r\n";
+
+static u32
+http_send_data (http_conn_t *hc, u8 *data, u32 length, u32 offset)
+{
+ const u32 max_burst = 64 << 10;
+ session_t *ts;
+ u32 to_send;
+ int sent;
+
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+
+ to_send = clib_min (length - offset, max_burst);
+ sent = svm_fifo_enqueue (ts->tx_fifo, to_send, data + offset);
+
+ if (sent <= 0)
+ return offset;
+
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+
+ return (offset + sent);
+}
+
+static void
+http_send_error (http_conn_t *hc, http_status_code_t ec)
+{
+ http_main_t *hm = &http_main;
+ u8 *data;
+ f64 now;
+
+ if (ec >= HTTP_N_STATUS)
+ ec = HTTP_STATUS_INTERNAL_ERROR;
+
+ now = clib_timebase_now (&hm->timebase);
+ data = format (0, http_error_template, http_status_code_str[ec],
+ format_clib_timebase_time, now);
+ http_send_data (hc, data, vec_len (data), 0);
+ vec_free (data);
+}
+
+static int
+http_read_message (http_conn_t *hc)
+{
+ u32 max_deq, cursize;
+ session_t *ts;
+ int n_read;
+
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+
+ cursize = vec_len (hc->rx_buf);
+ max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
+ if (PREDICT_FALSE (max_deq == 0))
+ return -1;
+
+ vec_validate (hc->rx_buf, cursize + max_deq - 1);
+ n_read = svm_fifo_dequeue (ts->rx_fifo, max_deq, hc->rx_buf + cursize);
+ ASSERT (n_read == max_deq);
+
+ if (svm_fifo_is_empty (ts->rx_fifo))
+ svm_fifo_unset_event (ts->rx_fifo);
+
+ vec_set_len (hc->rx_buf, cursize + n_read);
+ return 0;
+}
+
+static int
+v_find_index (u8 *vec, u32 offset, char *str)
+{
+ int start_index = offset;
+ u32 slen = (u32) strnlen_s_inline (str, 16);
+ u32 vlen = vec_len (vec);
+
+ ASSERT (slen > 0);
+
+ if (vlen <= slen)
+ return -1;
+
+ for (; start_index < (vlen - slen); start_index++)
+ {
+ if (!memcmp (vec + start_index, str, slen))
+ return start_index;
+ }
+
+ return -1;
+}
+
+static int
+http_parse_header (http_conn_t *hc, int *content_length)
+{
+ unformat_input_t input;
+ int i, len;
+ u8 *line;
+
+ i = v_find_index (hc->rx_buf, hc->rx_buf_offset, CONTENT_LEN_STR);
+ if (i < 0)
+ {
+ clib_warning ("cannot find '%s' in the header!", CONTENT_LEN_STR);
+ return -1;
+ }
+
+ hc->rx_buf_offset = i;
+
+ i = v_find_index (hc->rx_buf, hc->rx_buf_offset, "\n");
+ if (i < 0)
+ {
+ clib_warning ("end of line missing; incomplete data");
+ return -1;
+ }
+
+ len = i - hc->rx_buf_offset;
+ line = vec_new (u8, len);
+ clib_memcpy (line, hc->rx_buf + hc->rx_buf_offset, len);
+
+ unformat_init_vector (&input, line);
+ if (!unformat (&input, CONTENT_LEN_STR "%d", content_length))
+ {
+ clib_warning ("failed to unformat content length!");
+ return -1;
+ }
+ unformat_free (&input);
+
+ /* skip rest of the header */
+ hc->rx_buf_offset += len;
+ i = v_find_index (hc->rx_buf, hc->rx_buf_offset, "<html>");
+ if (i < 0)
+ {
+ clib_warning ("<html> tag not found");
+ return -1;
+ }
+ hc->rx_buf_offset = i;
+
+ return 0;
+}
+
+static http_sm_result_t
+http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp)
+{
+ int i, rv, content_length;
+ http_msg_t msg = {};
+ app_worker_t *app_wrk;
+ session_t *as;
+ http_status_code_t ec;
+
+ rv = http_read_message (hc);
+
+ /* Nothing yet, wait for data or timer expire */
+ if (rv)
+ return HTTP_SM_STOP;
+
+ if (vec_len (hc->rx_buf) < 8)
+ {
+ ec = HTTP_STATUS_BAD_REQUEST;
+ goto error;
+ }
+
+ if ((i = v_find_index (hc->rx_buf, 0, "200 OK")) >= 0)
+ {
+ msg.type = HTTP_MSG_REPLY;
+ msg.content_type = HTTP_CONTENT_TEXT_HTML;
+ msg.code = HTTP_STATUS_OK;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.len = 0;
+
+ rv = http_parse_header (hc, &content_length);
+ if (rv)
+ {
+ clib_warning ("failed to parse http reply");
+ session_transport_closing_notify (&hc->connection);
+ http_disconnect_transport (hc);
+ return -1;
+ }
+ msg.data.len = content_length;
+ u32 dlen = vec_len (hc->rx_buf) - hc->rx_buf_offset;
+ as = session_get_from_handle (hc->h_pa_session_handle);
+ svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) },
+ { &hc->rx_buf[hc->rx_buf_offset], dlen } };
+
+ rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2,
+ 0 /* allow partial */);
+ if (rv < 0)
+ {
+ clib_warning ("error enqueue");
+ return HTTP_SM_ERROR;
+ }
+
+ hc->rx_buf_offset += dlen;
+ hc->to_recv = content_length - dlen;
+
+ if (hc->rx_buf_offset == vec_len (hc->rx_buf))
+ {
+ vec_reset_length (hc->rx_buf);
+ hc->rx_buf_offset = 0;
+ }
+
+ if (hc->to_recv == 0)
+ {
+ hc->rx_buf_offset = 0;
+ vec_reset_length (hc->rx_buf);
+ http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD);
+ }
+ else
+ {
+ http_state_change (hc, HTTP_STATE_CLIENT_IO_MORE_DATA);
+ }
+
+ app_wrk = app_worker_get_if_valid (as->app_wrk_index);
+ if (app_wrk)
+ app_worker_rx_notify (app_wrk, as);
+ return HTTP_SM_STOP;
+ }
+ else
+ {
+ HTTP_DBG (0, "Unknown http method %v", hc->rx_buf);
+ ec = HTTP_STATUS_METHOD_NOT_ALLOWED;
+ goto error;
+ }
+
+error:
+
+ http_send_error (hc, ec);
+ session_transport_closing_notify (&hc->connection);
+ http_disconnect_transport (hc);
+
+ return HTTP_SM_ERROR;
+}
+
+static http_sm_result_t
+http_state_wait_client_method (http_conn_t *hc, transport_send_params_t *sp)
+{
+ http_status_code_t ec;
+ app_worker_t *app_wrk;
+ http_msg_t msg;
+ session_t *as;
+ int i, rv;
+ u32 len;
+ u8 *buf;
+
+ rv = http_read_message (hc);
+
+ /* Nothing yet, wait for data or timer expire */
+ if (rv)
+ return HTTP_SM_STOP;
+
+ if (vec_len (hc->rx_buf) < 8)
+ {
+ ec = HTTP_STATUS_BAD_REQUEST;
+ goto error;
+ }
+
+ if ((i = v_find_index (hc->rx_buf, 0, "GET ")) >= 0)
+ {
+ hc->method = HTTP_REQ_GET;
+ hc->rx_buf_offset = i + 5;
+
+ i = v_find_index (hc->rx_buf, hc->rx_buf_offset, "HTTP");
+ if (i < 0)
+ {
+ ec = HTTP_STATUS_BAD_REQUEST;
+ goto error;
+ }
+
+ HTTP_DBG (0, "GET method %v", hc->rx_buf);
+ len = i - hc->rx_buf_offset - 1;
+ }
+ else if ((i = v_find_index (hc->rx_buf, 0, "POST ")) >= 0)
+ {
+ hc->method = HTTP_REQ_POST;
+ hc->rx_buf_offset = i + 6;
+ len = vec_len (hc->rx_buf) - hc->rx_buf_offset - 1;
+ HTTP_DBG (0, "POST method %v", hc->rx_buf);
+ }
+ else
+ {
+ HTTP_DBG (0, "Unknown http method %v", hc->rx_buf);
+ ec = HTTP_STATUS_METHOD_NOT_ALLOWED;
+ goto error;
+ }
+
+ buf = &hc->rx_buf[hc->rx_buf_offset];
+
+ msg.type = HTTP_MSG_REQUEST;
+ msg.method_type = hc->method;
+ msg.content_type = HTTP_CONTENT_TEXT_HTML;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.len = len;
+
+ svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, { buf, len } };
+
+ as = session_get_from_handle (hc->h_pa_session_handle);
+ rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0 /* allow partial */);
+ if (rv < 0 || rv != sizeof (msg) + len)
+ {
+ clib_warning ("failed app enqueue");
+ /* This should not happen as we only handle 1 request per session,
+ * and fifo is allocated, but going forward we should consider
+ * rescheduling */
+ return HTTP_SM_ERROR;
+ }
+
+ vec_free (hc->rx_buf);
+ http_state_change (hc, HTTP_STATE_WAIT_APP_REPLY);
+
+ app_wrk = app_worker_get_if_valid (as->app_wrk_index);
+ if (app_wrk)
+ app_worker_rx_notify (app_wrk, as);
+
+ return HTTP_SM_STOP;
+
+error:
+
+ http_send_error (hc, ec);
+ session_transport_closing_notify (&hc->connection);
+ http_disconnect_transport (hc);
+
+ return HTTP_SM_ERROR;
+}
+
+static http_sm_result_t
+http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp)
+{
+ http_main_t *hm = &http_main;
+ u8 *header;
+ u32 offset;
+ f64 now;
+ session_t *as;
+ http_status_code_t sc;
+ http_msg_t msg;
+ int rv;
+
+ as = session_get_from_handle (hc->h_pa_session_handle);
+
+ rv = svm_fifo_dequeue (as->tx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (msg.data.type > HTTP_MSG_DATA_PTR)
+ {
+ clib_warning ("no data");
+ sc = HTTP_STATUS_INTERNAL_ERROR;
+ goto error;
+ }
+
+ if (msg.type != HTTP_MSG_REPLY)
+ {
+ clib_warning ("unexpected message type %d", msg.type);
+ sc = HTTP_STATUS_INTERNAL_ERROR;
+ goto error;
+ }
+
+ http_buffer_init (&hc->tx_buf, msg_to_buf_type[msg.data.type], as->tx_fifo,
+ msg.data.len);
+
+ /*
+ * Add headers. For now:
+ * - current time
+ * - expiration time
+ * - content type
+ * - data length
+ */
+ now = clib_timebase_now (&hm->timebase);
+
+ switch (msg.code)
+ {
+ case HTTP_STATUS_OK:
+ header =
+ format (0, http_response_template, http_status_code_str[msg.code],
+ /* Date */
+ format_clib_timebase_time, now,
+ /* Expires */
+ format_clib_timebase_time, now + 600.0,
+ /* Content type */
+ http_content_type_str[msg.content_type],
+ /* Length */
+ msg.data.len);
+ break;
+ case HTTP_STATUS_MOVED:
+ header =
+ format (0, http_redirect_template, http_status_code_str[msg.code]);
+ /* Location: http(s)://new-place already queued up as data */
+ break;
+ default:
+ return HTTP_SM_ERROR;
+ }
+
+ offset = http_send_data (hc, header, vec_len (header), 0);
+ if (offset != vec_len (header))
+ {
+ clib_warning ("couldn't send response header!");
+ sc = HTTP_STATUS_INTERNAL_ERROR;
+ vec_free (header);
+ goto error;
+ }
+ vec_free (header);
+
+ /* Start sending the actual data */
+ http_state_change (hc, HTTP_STATE_APP_IO_MORE_DATA);
+
+ ASSERT (sp->max_burst_size >= offset);
+ sp->max_burst_size -= offset;
+ return HTTP_SM_CONTINUE;
+
+error:
+ clib_warning ("unexpected msg type from app %u", msg.type);
+ http_send_error (hc, sc);
+ http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD);
+ session_transport_closing_notify (&hc->connection);
+ http_disconnect_transport (hc);
+ return HTTP_SM_STOP;
+}
+
+static http_sm_result_t
+http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp)
+{
+ http_msg_t msg;
+ session_t *as;
+ u8 *buf = 0, *request;
+ u32 offset;
+ int rv;
+
+ as = session_get_from_handle (hc->h_pa_session_handle);
+
+ rv = svm_fifo_dequeue (as->tx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (msg.data.type > HTTP_MSG_DATA_PTR)
+ {
+ clib_warning ("no data");
+ goto error;
+ }
+
+ if (msg.type != HTTP_MSG_REQUEST)
+ {
+ clib_warning ("unexpected message type %d", msg.type);
+ goto error;
+ }
+
+ vec_validate (buf, msg.data.len - 1);
+ rv = svm_fifo_dequeue (as->tx_fifo, msg.data.len, buf);
+ ASSERT (rv == msg.data.len);
+
+ request = format (0, http_request_template, buf);
+ offset = http_send_data (hc, request, vec_len (request), 0);
+ if (offset != vec_len (request))
+ {
+ clib_warning ("sending request failed!");
+ goto error;
+ }
+
+ http_state_change (hc, HTTP_STATE_WAIT_SERVER_REPLY);
+
+ vec_free (buf);
+ vec_free (request);
+
+ return HTTP_SM_STOP;
+
+error:
+ session_transport_closing_notify (&hc->connection);
+ http_disconnect_transport (hc);
+ return HTTP_SM_ERROR;
+}
+
+static http_sm_result_t
+http_state_client_io_more_data (http_conn_t *hc, transport_send_params_t *sp)
+{
+ session_t *as, *ts;
+ app_worker_t *app_wrk;
+ svm_fifo_seg_t _seg, *seg = &_seg;
+ u32 max_len, max_deq, max_enq, n_segs = 1;
+ int rv, len;
+
+ as = session_get_from_handle (hc->h_pa_session_handle);
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+
+ max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
+ if (max_deq == 0)
+ {
+ HTTP_DBG (1, "no data to deq");
+ return HTTP_SM_STOP;
+ }
+
+ max_enq = svm_fifo_max_enqueue (as->rx_fifo);
+ if (max_enq == 0)
+ {
+ HTTP_DBG (1, "app's rx fifo full");
+ svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ return HTTP_SM_STOP;
+ }
+
+ max_len = clib_min (max_enq, max_deq);
+ len = svm_fifo_segments (ts->rx_fifo, 0, seg, &n_segs, max_len);
+ if (len < 0)
+ {
+ HTTP_DBG (1, "svm_fifo_segments() len %d", len);
+ return HTTP_SM_STOP;
+ }
+
+ rv = svm_fifo_enqueue_segments (as->rx_fifo, seg, 1, 0 /* allow partial */);
+ if (rv < 0)
+ {
+ clib_warning ("data enqueue failed, rv: %d", rv);
+ return HTTP_SM_ERROR;
+ }
+
+ svm_fifo_dequeue_drop (ts->rx_fifo, rv);
+ if (rv > hc->to_recv)
+ {
+ clib_warning ("http protocol error: received more data than expected");
+ session_transport_closing_notify (&hc->connection);
+ http_disconnect_transport (hc);
+ http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD);
+ return HTTP_SM_ERROR;
+ }
+ hc->to_recv -= rv;
+ HTTP_DBG (1, "drained %d from ts; remains %d", rv, hc->to_recv);
+
+ app_wrk = app_worker_get_if_valid (as->app_wrk_index);
+ if (app_wrk)
+ app_worker_rx_notify (app_wrk, as);
+
+ if (svm_fifo_max_dequeue_cons (ts->rx_fifo))
+ session_enqueue_notify (ts);
+
+ return HTTP_SM_STOP;
+}
+
+static http_sm_result_t
+http_state_app_io_more_data (http_conn_t *hc, transport_send_params_t *sp)
+{
+ u32 max_send = 64 << 10, n_segs;
+ http_buffer_t *hb = &hc->tx_buf;
+ svm_fifo_seg_t *seg;
+ session_t *ts;
+ int sent = 0;
+
+ max_send = clib_min (max_send, sp->max_burst_size);
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+ if ((seg = http_buffer_get_segs (hb, max_send, &n_segs)))
+ sent = svm_fifo_enqueue_segments (ts->tx_fifo, seg, n_segs,
+ 1 /* allow partial */);
+
+ if (sent > 0)
+ {
+ /* Ask scheduler to notify app of deq event if needed */
+ sp->bytes_dequeued += http_buffer_drain (hb, sent);
+ sp->max_burst_size -= sent;
+ }
+
+ /* Not finished sending all data */
+ if (!http_buffer_is_drained (hb))
+ {
+ if (sent && svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+
+ if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH)
+ {
+ /* Deschedule http session and wait for deq notification if
+ * underlying ts tx fifo almost full */
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ transport_connection_deschedule (&hc->connection);
+ sp->flags |= TRANSPORT_SND_F_DESCHED;
+ }
+ }
+ else
+ {
+ if (sent && svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX_FLUSH);
+
+ /* Finished transaction, back to HTTP_STATE_WAIT_METHOD */
+ http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD);
+ http_buffer_free (&hc->tx_buf);
+ }
+
+ return HTTP_SM_STOP;
+}
+
+typedef http_sm_result_t (*http_sm_handler) (http_conn_t *,
+ transport_send_params_t *sp);
+
+static http_sm_handler state_funcs[HTTP_N_STATES] = {
+ 0, /* idle state */
+ http_state_wait_app_method,
+ http_state_wait_client_method,
+ http_state_wait_server_reply,
+ http_state_wait_app_reply,
+ http_state_client_io_more_data,
+ http_state_app_io_more_data,
+};
+
+static void
+http_req_run_state_machine (http_conn_t *hc, transport_send_params_t *sp)
+{
+ http_sm_result_t res;
+
+ do
+ {
+ res = state_funcs[hc->http_state](hc, sp);
+ if (res == HTTP_SM_ERROR)
+ {
+ HTTP_DBG (1, "error in state machine %d", res);
+ return;
+ }
+ }
+ while (res == HTTP_SM_CONTINUE);
+
+ /* Reset the session expiration timer */
+ http_conn_timer_update (hc);
+}
+
+static int
+http_ts_rx_callback (session_t *ts)
+{
+ http_conn_t *hc;
+
+ hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
+ if (!hc)
+ {
+ clib_warning ("http connection not found (ts %d)", ts->opaque);
+ return -1;
+ }
+
+ if (hc->state == HTTP_CONN_STATE_CLOSED)
+ {
+ svm_fifo_dequeue_drop_all (ts->tx_fifo);
+ return 0;
+ }
+
+ http_req_run_state_machine (hc, 0);
+
+ if (hc->state == HTTP_CONN_STATE_TRANSPORT_CLOSED)
+ {
+ if (!svm_fifo_max_dequeue_cons (ts->rx_fifo))
+ session_transport_closing_notify (&hc->connection);
+ }
+ return 0;
+}
+
+int
+http_ts_builtin_tx_callback (session_t *ts)
+{
+ http_conn_t *hc;
+
+ hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
+ transport_connection_reschedule (&hc->connection);
+
+ return 0;
+}
+
+static void
+http_ts_cleanup_callback (session_t *ts, session_cleanup_ntf_t ntf)
+{
+ http_conn_t *hc;
+
+ if (ntf == SESSION_CLEANUP_TRANSPORT)
+ return;
+
+ hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
+ if (!hc)
+ {
+ clib_warning ("no http connection for %u", ts->session_index);
+ return;
+ }
+
+ vec_free (hc->rx_buf);
+
+ http_buffer_free (&hc->tx_buf);
+ http_conn_timer_stop (hc);
+
+ session_transport_delete_notify (&hc->connection);
+ http_conn_free (hc);
+}
+
+int
+http_add_segment_callback (u32 client_index, u64 segment_handle)
+{
+ /* No-op for builtin */
+ return 0;
+}
+
+int
+http_del_segment_callback (u32 client_index, u64 segment_handle)
+{
+ return 0;
+}
+
+static session_cb_vft_t http_app_cb_vft = {
+ .session_accept_callback = http_ts_accept_callback,
+ .session_disconnect_callback = http_ts_disconnect_callback,
+ .session_connected_callback = http_ts_connected_callback,
+ .session_reset_callback = http_ts_reset_callback,
+ .session_cleanup_callback = http_ts_cleanup_callback,
+ .add_segment_callback = http_add_segment_callback,
+ .del_segment_callback = http_del_segment_callback,
+ .builtin_app_rx_callback = http_ts_rx_callback,
+ .builtin_app_tx_callback = http_ts_builtin_tx_callback,
+};
+
+static clib_error_t *
+http_transport_enable (vlib_main_t *vm, u8 is_en)
+{
+ vnet_app_detach_args_t _da, *da = &_da;
+ vnet_app_attach_args_t _a, *a = &_a;
+ u64 options[APP_OPTIONS_N_OPTIONS];
+ http_main_t *hm = &http_main;
+
+ if (!is_en)
+ {
+ da->app_index = hm->app_index;
+ da->api_client_index = APP_INVALID_INDEX;
+ vnet_application_detach (da);
+ return 0;
+ }
+
+ vec_validate (hm->wrk, vlib_num_workers ());
+
+ clib_memset (a, 0, sizeof (*a));
+ clib_memset (options, 0, sizeof (options));
+
+ a->session_cb_vft = &http_app_cb_vft;
+ a->api_client_index = APP_INVALID_INDEX;
+ a->options = options;
+ a->name = format (0, "http");
+ a->options[APP_OPTIONS_SEGMENT_SIZE] = hm->first_seg_size;
+ a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = hm->add_seg_size;
+ a->options[APP_OPTIONS_RX_FIFO_SIZE] = hm->fifo_size;
+ a->options[APP_OPTIONS_TX_FIFO_SIZE] = hm->fifo_size;
+ a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
+ a->options[APP_OPTIONS_FLAGS] |= APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
+ a->options[APP_OPTIONS_FLAGS] |= APP_OPTIONS_FLAGS_IS_TRANSPORT_APP;
+
+ if (vnet_application_attach (a))
+ return clib_error_return (0, "failed to attach http app");
+
+ hm->app_index = a->app_index;
+ vec_free (a->name);
+
+ clib_timebase_init (&hm->timebase, 0 /* GMT */, CLIB_TIMEBASE_DAYLIGHT_NONE,
+ &vm->clib_time /* share the system clock */);
+
+ http_timers_init (vm, http_conn_timeout_cb);
+
+ return 0;
+}
+
+static int
+http_transport_connect (transport_endpoint_cfg_t *tep)
+{
+ vnet_connect_args_t _cargs, *cargs = &_cargs;
+ http_main_t *hm = &http_main;
+ session_endpoint_cfg_t *sep = (session_endpoint_cfg_t *) tep;
+ application_t *app;
+ http_conn_t *hc;
+ int error;
+ u32 hc_index;
+ app_worker_t *app_wrk = app_worker_get (sep->app_wrk_index);
+
+ clib_memset (cargs, 0, sizeof (*cargs));
+ clib_memcpy (&cargs->sep_ext, sep, sizeof (session_endpoint_cfg_t));
+ cargs->sep.transport_proto = TRANSPORT_PROTO_TCP;
+ cargs->app_index = hm->app_index;
+ app = application_get (app_wrk->app_index);
+ cargs->sep_ext.ns_index = app->ns_index;
+
+ hc_index = http_conn_alloc_w_thread (0 /* ts->thread_index */);
+ hc = http_conn_get_w_thread (hc_index, 0);
+ hc->h_pa_wrk_index = sep->app_wrk_index;
+ hc->h_pa_app_api_ctx = sep->opaque;
+ hc->state = HTTP_CONN_STATE_CONNECTING;
+ cargs->api_context = hc_index;
+
+ HTTP_DBG (1, "hc ho_index %x", hc_index);
+
+ if ((error = vnet_connect (cargs)))
+ return error;
+
+ return 0;
+}
+
+static u32
+http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
+{
+ vnet_listen_args_t _args = {}, *args = &_args;
+ session_t *ts_listener, *app_listener;
+ http_main_t *hm = &http_main;
+ session_endpoint_cfg_t *sep;
+ app_worker_t *app_wrk;
+ transport_proto_t tp;
+ app_listener_t *al;
+ application_t *app;
+ http_conn_t *lhc;
+ u32 lhc_index;
+
+ sep = (session_endpoint_cfg_t *) tep;
+
+ app_wrk = app_worker_get (sep->app_wrk_index);
+ app = application_get (app_wrk->app_index);
+
+ args->app_index = hm->app_index;
+ args->sep_ext = *sep;
+ args->sep_ext.ns_index = app->ns_index;
+ tp = sep->ext_cfg ? TRANSPORT_PROTO_TLS : TRANSPORT_PROTO_TCP;
+ args->sep_ext.transport_proto = tp;
+
+ if (vnet_listen (args))
+ return SESSION_INVALID_INDEX;
+
+ lhc_index = http_listener_alloc ();
+ lhc = http_listener_get (lhc_index);
+
+ /* Grab transport connection listener and link to http listener */
+ lhc->h_tc_session_handle = args->handle;
+ al = app_listener_get_w_handle (lhc->h_tc_session_handle);
+ ts_listener = app_listener_get_session (al);
+ ts_listener->opaque = lhc_index;
+
+ /* Grab application listener and link to http listener */
+ app_listener = listen_session_get (app_listener_index);
+ lhc->h_pa_wrk_index = sep->app_wrk_index;
+ lhc->h_pa_session_handle = listen_session_get_handle (app_listener);
+ lhc->c_s_index = app_listener_index;
+ lhc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
+
+ return lhc_index;
+}
+
+static u32
+http_stop_listen (u32 listener_index)
+{
+ http_conn_t *lhc;
+ int rv;
+
+ lhc = http_listener_get (listener_index);
+
+ vnet_unlisten_args_t a = {
+ .handle = lhc->h_tc_session_handle,
+ .app_index = http_main.app_index,
+ .wrk_map_index = 0 /* default wrk */
+ };
+
+ if ((rv = vnet_unlisten (&a)))
+ clib_warning ("unlisten returned %d", rv);
+
+ http_listener_free (lhc);
+
+ return 0;
+}
+
+static void
+http_transport_close (u32 hc_index, u32 thread_index)
+{
+ session_t *as;
+ http_conn_t *hc;
+
+ HTTP_DBG (1, "App disconnecting %x", hc_index);
+
+ hc = http_conn_get_w_thread (hc_index, thread_index);
+ if (hc->state == HTTP_CONN_STATE_CONNECTING)
+ {
+ hc->state = HTTP_CONN_STATE_APP_CLOSED;
+ http_disconnect_transport (hc);
+ return;
+ }
+
+ as = session_get_from_handle (hc->h_pa_session_handle);
+
+ /* Nothing more to send, confirm close */
+ if (!svm_fifo_max_dequeue_cons (as->tx_fifo))
+ {
+ session_transport_closed_notify (&hc->connection);
+ http_disconnect_transport (hc);
+ }
+ else
+ {
+ /* Wait for all data to be written to ts */
+ hc->state = HTTP_CONN_STATE_APP_CLOSED;
+ }
+}
+
+static transport_connection_t *
+http_transport_get_connection (u32 hc_index, u32 thread_index)
+{
+ http_conn_t *hc = http_conn_get_w_thread (hc_index, thread_index);
+ return &hc->connection;
+}
+
+static transport_connection_t *
+http_transport_get_listener (u32 listener_index)
+{
+ http_conn_t *lhc = http_listener_get (listener_index);
+ return &lhc->connection;
+}
+
+static int
+http_app_tx_callback (void *session, transport_send_params_t *sp)
+{
+ session_t *as = (session_t *) session;
+ u32 max_burst_sz, sent;
+ http_conn_t *hc;
+
+ HTTP_DBG (1, "app session conn index %x", as->connection_index);
+
+ hc = http_conn_get_w_thread (as->connection_index, as->thread_index);
+ if (!http_state_is_tx_valid (hc))
+ {
+ if (hc->state != HTTP_CONN_STATE_CLOSED)
+ clib_warning ("app data req state '%U' session state %u",
+ format_http_state, hc->http_state, hc->state);
+ svm_fifo_dequeue_drop_all (as->tx_fifo);
+ return 0;
+ }
+
+ max_burst_sz = sp->max_burst_size * TRANSPORT_PACER_MIN_MSS;
+ sp->max_burst_size = max_burst_sz;
+
+ http_req_run_state_machine (hc, sp);
+
+ if (hc->state == HTTP_CONN_STATE_APP_CLOSED)
+ {
+ if (!svm_fifo_max_dequeue_cons (as->tx_fifo))
+ http_disconnect_transport (hc);
+ }
+
+ sent = max_burst_sz - sp->max_burst_size;
+
+ return sent > 0 ? clib_max (sent / TRANSPORT_PACER_MIN_MSS, 1) : 0;
+}
+
+static void
+http_transport_get_endpoint (u32 hc_index, u32 thread_index,
+ transport_endpoint_t *tep, u8 is_lcl)
+{
+ http_conn_t *hc = http_conn_get_w_thread (hc_index, thread_index);
+ session_t *ts;
+
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+ session_get_endpoint (ts, tep, is_lcl);
+}
+
+static u8 *
+format_http_connection (u8 *s, va_list *args)
+{
+ http_conn_t *hc = va_arg (*args, http_conn_t *);
+ session_t *ts;
+
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+ s = format (s, "[%d:%d][H] app_wrk %u ts %d:%d", hc->c_thread_index,
+ hc->c_s_index, hc->h_pa_wrk_index, ts->thread_index,
+ ts->session_index);
+
+ return s;
+}
+
+static u8 *
+format_http_listener (u8 *s, va_list *args)
+{
+ http_conn_t *lhc = va_arg (*args, http_conn_t *);
+ app_listener_t *al;
+ session_t *lts;
+
+ al = app_listener_get_w_handle (lhc->h_tc_session_handle);
+ lts = app_listener_get_session (al);
+ s = format (s, "[%d:%d][H] app_wrk %u ts %d:%d", lhc->c_thread_index,
+ lhc->c_s_index, lhc->h_pa_wrk_index, lts->thread_index,
+ lts->session_index);
+
+ return s;
+}
+
+static u8 *
+format_http_conn_state (u8 *s, va_list *args)
+{
+ http_conn_t *hc = va_arg (*args, http_conn_t *);
+
+ switch (hc->state)
+ {
+ case HTTP_CONN_STATE_LISTEN:
+ s = format (s, "LISTEN");
+ break;
+ case HTTP_CONN_STATE_CONNECTING:
+ s = format (s, "CONNECTING");
+ break;
+ case HTTP_CONN_STATE_ESTABLISHED:
+ s = format (s, "ESTABLISHED");
+ break;
+ case HTTP_CONN_STATE_TRANSPORT_CLOSED:
+ s = format (s, "TRANSPORT_CLOSED");
+ break;
+ case HTTP_CONN_STATE_APP_CLOSED:
+ s = format (s, "APP_CLOSED");
+ break;
+ case HTTP_CONN_STATE_CLOSED:
+ s = format (s, "CLOSED");
+ break;
+ }
+
+ return s;
+}
+
+static u8 *
+format_http_transport_connection (u8 *s, va_list *args)
+{
+ u32 tc_index = va_arg (*args, u32);
+ u32 thread_index = va_arg (*args, u32);
+ u32 verbose = va_arg (*args, u32);
+ http_conn_t *hc;
+
+ hc = http_conn_get_w_thread (tc_index, thread_index);
+
+ s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_http_connection, hc);
+ if (verbose)
+ {
+ s =
+ format (s, "%-" SESSION_CLI_STATE_LEN "U", format_http_conn_state, hc);
+ if (verbose > 1)
+ s = format (s, "\n");
+ }
+
+ return s;
+}
+
+static u8 *
+format_http_transport_listener (u8 *s, va_list *args)
+{
+ u32 tc_index = va_arg (*args, u32);
+ u32 __clib_unused thread_index = va_arg (*args, u32);
+ u32 __clib_unused verbose = va_arg (*args, u32);
+ http_conn_t *lhc = http_listener_get (tc_index);
+
+ s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_http_listener, lhc);
+ if (verbose)
+ s =
+ format (s, "%-" SESSION_CLI_STATE_LEN "U", format_http_conn_state, lhc);
+ return s;
+}
+
+static const transport_proto_vft_t http_proto = {
+ .enable = http_transport_enable,
+ .connect = http_transport_connect,
+ .start_listen = http_start_listen,
+ .stop_listen = http_stop_listen,
+ .close = http_transport_close,
+ .custom_tx = http_app_tx_callback,
+ .get_connection = http_transport_get_connection,
+ .get_listener = http_transport_get_listener,
+ .get_transport_endpoint = http_transport_get_endpoint,
+ .format_connection = format_http_transport_connection,
+ .format_listener = format_http_transport_listener,
+ .transport_options = {
+ .name = "http",
+ .short_name = "H",
+ .tx_type = TRANSPORT_TX_INTERNAL,
+ .service_type = TRANSPORT_SERVICE_APP,
+ },
+};
+
+static clib_error_t *
+http_transport_init (vlib_main_t *vm)
+{
+ http_main_t *hm = &http_main;
+
+ transport_register_protocol (TRANSPORT_PROTO_HTTP, &http_proto,
+ FIB_PROTOCOL_IP4, ~0);
+ transport_register_protocol (TRANSPORT_PROTO_HTTP, &http_proto,
+ FIB_PROTOCOL_IP6, ~0);
+
+ /* Default values, configurable via startup conf */
+ hm->add_seg_size = 256 << 20;
+ hm->first_seg_size = 32 << 20;
+ hm->fifo_size = 512 << 10;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (http_transport_init);
+
+static clib_error_t *
+http_config_fn (vlib_main_t *vm, unformat_input_t *input)
+{
+ http_main_t *hm = &http_main;
+ uword mem_sz;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "first-segment-size %U", unformat_memory_size,
+ &mem_sz))
+ {
+ hm->first_seg_size = clib_max (mem_sz, 1 << 20);
+ if (hm->first_seg_size != mem_sz)
+ clib_warning ("first seg size too small %u", mem_sz);
+ }
+ else if (unformat (input, "add-segment-size %U", unformat_memory_size,
+ &mem_sz))
+ {
+ hm->add_seg_size = clib_max (mem_sz, 1 << 20);
+ if (hm->add_seg_size != mem_sz)
+ clib_warning ("add seg size too small %u", mem_sz);
+ }
+ else if (unformat (input, "fifo-size %U", unformat_memory_size, &mem_sz))
+ {
+ hm->fifo_size = clib_clamp (mem_sz, 4 << 10, 2 << 30);
+ if (hm->fifo_size != mem_sz)
+ clib_warning ("invalid fifo size %lu", mem_sz);
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (http_config_fn, "http");
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Hypertext Transfer Protocol (HTTP)",
+ .default_disabled = 0,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http/http.h b/src/plugins/http/http.h
new file mode 100644
index 00000000000..dbae5ac4611
--- /dev/null
+++ b/src/plugins/http/http.h
@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_PLUGINS_HTTP_HTTP_H_
+#define SRC_PLUGINS_HTTP_HTTP_H_
+
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+#include <vppinfra/time_range.h>
+
+#include <vnet/session/application_interface.h>
+#include <vnet/session/application.h>
+#include <http/http_buffer.h>
+
+#define HTTP_DEBUG 0
+
+#if HTTP_DEBUG
+#define HTTP_DBG(_lvl, _fmt, _args...) \
+ if (_lvl <= HTTP_DEBUG) \
+ clib_warning (_fmt, ##_args)
+#else
+#define HTTP_DBG(_lvl, _fmt, _args...)
+#endif
+
+typedef struct http_conn_id_
+{
+ union
+ {
+ session_handle_t app_session_handle;
+ u32 parent_app_api_ctx;
+ };
+ session_handle_t tc_session_handle;
+ u32 parent_app_wrk_index;
+} http_conn_id_t;
+
+STATIC_ASSERT (sizeof (http_conn_id_t) <= TRANSPORT_CONN_ID_LEN,
+ "ctx id must be less than TRANSPORT_CONN_ID_LEN");
+
+typedef enum http_conn_state_
+{
+ HTTP_CONN_STATE_LISTEN,
+ HTTP_CONN_STATE_CONNECTING,
+ HTTP_CONN_STATE_ESTABLISHED,
+ HTTP_CONN_STATE_TRANSPORT_CLOSED,
+ HTTP_CONN_STATE_APP_CLOSED,
+ HTTP_CONN_STATE_CLOSED
+} http_conn_state_t;
+
+typedef enum http_state_
+{
+ HTTP_STATE_IDLE = 0,
+ HTTP_STATE_WAIT_APP_METHOD,
+ HTTP_STATE_WAIT_CLIENT_METHOD,
+ HTTP_STATE_WAIT_SERVER_REPLY,
+ HTTP_STATE_WAIT_APP_REPLY,
+ HTTP_STATE_CLIENT_IO_MORE_DATA,
+ HTTP_STATE_APP_IO_MORE_DATA,
+ HTTP_N_STATES,
+} http_state_t;
+
+typedef enum http_req_method_
+{
+ HTTP_REQ_GET = 0,
+ HTTP_REQ_POST,
+} http_req_method_t;
+
+typedef enum http_msg_type_
+{
+ HTTP_MSG_REQUEST,
+ HTTP_MSG_REPLY
+} http_msg_type_t;
+
+#define foreach_http_content_type \
+ _ (APP_7Z, ".7z", "application / x - 7z - compressed") \
+ _ (APP_DOC, ".doc", "application / msword") \
+ _ (APP_DOCX, ".docx", \
+ "application / vnd.openxmlformats - " \
+ "officedocument.wordprocessingml.document") \
+ _ (APP_EPUB, ".epub", "application / epub + zip") \
+ _ (APP_FONT, ".eot", "application / vnd.ms - fontobject") \
+ _ (APP_JAR, ".jar", "application / java - archive") \
+ _ (APP_JSON, ".json", "application / json") \
+ _ (APP_JSON_LD, ".jsonld", "application / ld + json") \
+ _ (APP_MPKG, ".mpkg", "application / vnd.apple.installer + xml") \
+ _ (APP_ODP, ".odp", "application / vnd.oasis.opendocument.presentation") \
+ _ (APP_ODS, ".ods", "application / vnd.oasis.opendocument.spreadsheet") \
+ _ (APP_ODT, ".odt", "application / vnd.oasis.opendocument.text") \
+ _ (APP_OGX, ".ogx", "application / ogg") \
+ _ (APP_PDF, ".pdf", "application / pdf") \
+ _ (APP_PHP, ".php", "application / x - httpd - php") \
+ _ (APP_PPT, ".ppt", "application / vnd.ms - powerpoint") \
+ _ (APP_PPTX, ".pptx", "application / vnd.ms - powerpoint") \
+ _ (APP_RAR, ".rar", "application / vnd.rar") \
+ _ (APP_RTF, ".rtf", "application / rtf") \
+ _ (APP_SH, ".sh", "application / x - sh") \
+ _ (APP_TAR, ".tar", "application / x - tar") \
+ _ (APP_VSD, ".vsd", "application / vnd.visio") \
+ _ (APP_XHTML, ".xhtml", "application / xhtml + xml") \
+ _ (APP_XLS, ".xls", "application / vnd.ms - excel") \
+ _ (APP_XML, ".xml", "application / xml") \
+ _ (APP_XSLX, ".xlsx", \
+ "application / vnd.openxmlformats - officedocument.spreadsheetml.sheet") \
+ _ (APP_XUL, ".xul", "application / vnd.mozilla.xul + xml") \
+ _ (APP_ZIP, ".zip", "application / zip") \
+ _ (AUDIO_AAC, ".aac", "audio / aac") \
+ _ (AUDIO_CD, ".cda", "application / x - cdf") \
+ _ (AUDIO_WAV, ".wav", "audio / wav") \
+ _ (AUDIO_WEBA, ".weba", "audio / webm") \
+ _ (AUDO_MIDI, ".midi", "audio / midi") \
+ _ (AUDO_MID, ".mid", "audo / midi") \
+ _ (AUDO_MP3, ".mp3", "audio / mpeg") \
+ _ (AUDO_OGA, ".oga", "audio / ogg") \
+ _ (AUDO_OPUS, ".opus", "audio / opus") \
+ _ (APP_OCTET_STREAM, ".bin", "application / octet - stream") \
+ _ (BZIP2, ".bz2", "application / x - bzip2") \
+ _ (BZIP, ".bz", "application / x - bzip") \
+ _ (FONT_OTF, ".otf", "font / otf") \
+ _ (FONT_TTF, ".ttf", "font / ttf") \
+ _ (FONT_WOFF2, ".woff2", "font / woff2") \
+ _ (FONT_WOFF, ".woff", "font / woff") \
+ _ (GZIP, ".gz", "application / gzip") \
+ _ (IMAGE_AVIF, ".avif", "image / avif") \
+ _ (IMAGE_BMP, ".bmp", "image / bmp") \
+ _ (IMAGE_GIF, ".gif", "image / gif") \
+ _ (IMAGE_ICON, ".ico", "image / vnd.microsoft.icon") \
+ _ (IMAGE_JPEG, ".jpeg", "image / jpeg") \
+ _ (IMAGE_JPG, ".jpg", "image / jpeg") \
+ _ (IMAGE_PNG, ".png", "image / png") \
+ _ (IMAGE_SVG, ".svg", "image / svg + xml") \
+ _ (IMAGE_TIFF, ".tiff", "image / tiff") \
+ _ (IMAGE_TIF, ".tif", "image / tiff") \
+ _ (IMAGE_WEBP, ".webp", "image / webp") \
+ _ (SCRIPT_CSH, ".csh", "application / x - csh") \
+ _ (TEXT_ABIWORD, ".abw", "application / x - abiword") \
+ _ (TEXT_ARCHIVE, ".arc", "application / x - freearc") \
+ _ (TEXT_AZW, ".azw", "application / vnd.amazon.ebook") \
+ _ (TEXT_CALENDAR, ".ics", "text / calendar") \
+ _ (TEXT_CSS, ".css", "text / css") \
+ _ (TEXT_CSV, ".csv", "text / csv") \
+ _ (TEXT_HTM, ".htm", "text / html") \
+ _ (TEXT_HTML, ".html", "text / html") \
+ _ (TEXT_JS, ".js", "text / javascript") \
+ _ (TEXT_MJS, ".mjs", "text / javascript") \
+ _ (TEXT_PLAIN, ".txt", "text / plain") \
+ _ (VIDEO_3GP2, ".3g2", "video / 3gpp2") \
+ _ (VIDEO_3GP, ".3gp", "video / 3gpp") \
+ _ (VIDEO_AVI, ".avi", "video / x - msvideo") \
+ _ (VIDEO_MP4, ".mp4", "video / mp4") \
+ _ (VIDEO_MPEG, ".mpeg", "video / mpeg") \
+ _ (VIDEO_OGG, ".ogv", "video / ogg") \
+ _ (VIDEO_TS, ".ts", "video / mp2t") \
+ _ (VIDEO_WEBM, ".webm", "video / webm")
+
+typedef enum http_content_type_
+{
+#define _(s, ext, str) HTTP_CONTENT_##s,
+ foreach_http_content_type
+#undef _
+} http_content_type_t;
+
+#define foreach_http_status_code \
+ _ (200, OK, "200 OK") \
+ _ (301, MOVED, "301 Moved Permanently") \
+ _ (400, BAD_REQUEST, "400 Bad Request") \
+ _ (404, NOT_FOUND, "404 Not Found") \
+ _ (405, METHOD_NOT_ALLOWED, "405 Method Not Allowed") \
+ _ (500, INTERNAL_ERROR, "500 Internal Server Error")
+
+typedef enum http_status_code_
+{
+#define _(c, s, str) HTTP_STATUS_##s,
+ foreach_http_status_code
+#undef _
+ HTTP_N_STATUS
+} http_status_code_t;
+
+typedef enum http_msg_data_type_
+{
+ HTTP_MSG_DATA_INLINE,
+ HTTP_MSG_DATA_PTR
+} http_msg_data_type_t;
+
+typedef struct http_msg_data_
+{
+ http_msg_data_type_t type;
+ u64 len;
+ u8 data[0];
+} http_msg_data_t;
+
+typedef struct http_msg_
+{
+ http_msg_type_t type;
+ union
+ {
+ http_req_method_t method_type;
+ http_status_code_t code;
+ };
+ http_content_type_t content_type;
+ http_msg_data_t data;
+} http_msg_t;
+
+typedef struct http_tc_
+{
+ union
+ {
+ transport_connection_t connection;
+ http_conn_id_t c_http_conn_id;
+ };
+#define h_tc_session_handle c_http_conn_id.tc_session_handle
+#define h_pa_wrk_index c_http_conn_id.parent_app_wrk_index
+#define h_pa_session_handle c_http_conn_id.app_session_handle
+#define h_pa_app_api_ctx c_http_conn_id.parent_app_api_ctx
+#define h_hc_index connection.c_index
+
+ http_conn_state_t state;
+ u32 timer_handle;
+
+ /*
+ * Current request
+ */
+ http_state_t http_state;
+ http_req_method_t method;
+ u8 *rx_buf;
+ u32 rx_buf_offset;
+ http_buffer_t tx_buf;
+ u32 to_recv;
+ u32 bytes_dequeued;
+} http_conn_t;
+
+typedef struct http_worker_
+{
+ http_conn_t *conn_pool;
+} http_worker_t;
+
+typedef struct http_main_
+{
+ http_worker_t *wrk;
+ http_conn_t *listener_pool;
+ u32 app_index;
+
+ clib_timebase_t timebase;
+
+ /*
+ * Runtime config
+ */
+ u8 debug_level;
+
+ /*
+ * Config
+ */
+ u64 first_seg_size;
+ u64 add_seg_size;
+ u32 fifo_size;
+} http_main_t;
+
+static inline int
+http_state_is_tx_valid (http_conn_t *hc)
+{
+ http_state_t state = hc->http_state;
+ return (state == HTTP_STATE_APP_IO_MORE_DATA ||
+ state == HTTP_STATE_CLIENT_IO_MORE_DATA ||
+ state == HTTP_STATE_WAIT_APP_REPLY ||
+ state == HTTP_STATE_WAIT_APP_METHOD);
+}
+
+#endif /* SRC_PLUGINS_HTTP_HTTP_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http/http_buffer.c b/src/plugins/http/http_buffer.c
new file mode 100644
index 00000000000..f3dc308dbf8
--- /dev/null
+++ b/src/plugins/http/http_buffer.c
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <http/http_buffer.h>
+#include <http/http.h>
+
+static http_buffer_vft_t buf_vfts[HTTP_BUFFER_PTR + 1];
+
+#define HTTP_BUFFER_REGISTER_VFT(type, vft) \
+ static void __attribute__ ((constructor)) http_buf_init_##type (void) \
+ { \
+ buf_vfts[type] = vft; \
+ }
+
+typedef struct http_buffer_fifo_
+{
+ svm_fifo_t *src;
+ svm_fifo_seg_t *segs;
+ u64 len;
+ u64 offset;
+} http_buffer_fifo_t;
+
+STATIC_ASSERT (sizeof (http_buffer_fifo_t) <= HTTP_BUFFER_DATA_SZ, "buf data");
+
+static void
+buf_fifo_init (http_buffer_t *hb, void *data, u64 len)
+{
+ svm_fifo_t *f = (svm_fifo_t *) data;
+ http_buffer_fifo_t *bf;
+
+ bf = (http_buffer_fifo_t *) &hb->data;
+
+ bf->len = len;
+ bf->offset = 0;
+ bf->src = f;
+ bf->segs = 0;
+}
+
+static void
+buf_fifo_free (http_buffer_t *hb)
+{
+ http_buffer_fifo_t *bf = (http_buffer_fifo_t *) &hb->data;
+
+ bf->src = 0;
+ vec_free (bf->segs);
+}
+
+static svm_fifo_seg_t *
+buf_fifo_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs)
+{
+ http_buffer_fifo_t *bf = (http_buffer_fifo_t *) &hb->data;
+
+ u32 _n_segs = 5;
+ int len;
+
+ max_len = clib_min (bf->len - bf->offset, (u64) max_len);
+
+ vec_validate (bf->segs, _n_segs);
+
+ len = svm_fifo_segments (bf->src, 0, bf->segs, &_n_segs, max_len);
+ if (len < 0)
+ return 0;
+
+ *n_segs = _n_segs;
+
+ HTTP_DBG (1, "available to send %u n_segs %u", len, *n_segs);
+
+ return bf->segs;
+}
+
+static u32
+buf_fifo_drain (http_buffer_t *hb, u32 len)
+{
+ http_buffer_fifo_t *bf = (http_buffer_fifo_t *) &hb->data;
+
+ bf->offset += len;
+ svm_fifo_dequeue_drop (bf->src, len);
+ HTTP_DBG (1, "drained %u len %u offset %u", len, bf->len, bf->offset);
+
+ return len;
+}
+
+static u8
+buf_fifo_is_drained (http_buffer_t *hb)
+{
+ http_buffer_fifo_t *bf = (http_buffer_fifo_t *) &hb->data;
+
+ ASSERT (bf->offset <= bf->len);
+ return (bf->offset == bf->len);
+}
+
+const static http_buffer_vft_t buf_fifo_vft = {
+ .init = buf_fifo_init,
+ .free = buf_fifo_free,
+ .get_segs = buf_fifo_get_segs,
+ .drain = buf_fifo_drain,
+ .is_drained = buf_fifo_is_drained,
+};
+
+HTTP_BUFFER_REGISTER_VFT (HTTP_BUFFER_FIFO, buf_fifo_vft);
+
+typedef struct http_buffer_ptr_
+{
+ svm_fifo_seg_t *segs;
+ svm_fifo_t *f;
+} http_buffer_ptr_t;
+
+STATIC_ASSERT (sizeof (http_buffer_ptr_t) <= HTTP_BUFFER_DATA_SZ, "buf data");
+
+static void
+buf_ptr_init (http_buffer_t *hb, void *data, u64 len)
+{
+ svm_fifo_t *f = (svm_fifo_t *) data;
+ http_buffer_ptr_t *bf;
+ uword ptr;
+ int rv;
+
+ bf = (http_buffer_ptr_t *) &hb->data;
+
+ /* Peek the pointer, do not drain the fifo until done with transfer */
+ rv = svm_fifo_peek (f, 0, sizeof (ptr), (u8 *) &ptr);
+ ASSERT (rv == sizeof (ptr));
+
+ bf->f = f;
+ bf->segs = 0;
+ vec_validate (bf->segs, 1);
+
+ bf->segs[0].data = uword_to_pointer (ptr, u8 *);
+ bf->segs[0].len = len;
+
+ bf->segs[1] = bf->segs[0];
+}
+
+static void
+buf_ptr_free (http_buffer_t *hb)
+{
+ http_buffer_ptr_t *bf = (http_buffer_ptr_t *) &hb->data;
+
+ bf->f = 0;
+ vec_free (bf->segs);
+}
+
+static svm_fifo_seg_t *
+buf_ptr_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs)
+{
+ http_buffer_ptr_t *bf = (http_buffer_ptr_t *) &hb->data;
+
+ *n_segs = 1;
+ bf->segs[1].len = clib_min (bf->segs[0].len, max_len);
+
+ return &bf->segs[1];
+}
+
+static u32
+buf_ptr_drain (http_buffer_t *hb, u32 len)
+{
+ http_buffer_ptr_t *bf = (http_buffer_ptr_t *) &hb->data;
+
+ ASSERT (bf->segs[0].len >= len);
+
+ bf->segs[1].data += len;
+ bf->segs[0].len -= len;
+
+ HTTP_DBG (1, "drained %u left %u", len, bf->segs[1].len);
+
+ if (!bf->segs[0].len)
+ {
+ svm_fifo_dequeue_drop (bf->f, sizeof (uword));
+ return sizeof (uword);
+ }
+
+ return 0;
+}
+
+static u8
+buf_ptr_is_drained (http_buffer_t *hb)
+{
+ http_buffer_ptr_t *bf = (http_buffer_ptr_t *) &hb->data;
+
+ return (bf->segs[0].len == 0);
+}
+
+const static http_buffer_vft_t buf_ptr_vft = {
+ .init = buf_ptr_init,
+ .free = buf_ptr_free,
+ .get_segs = buf_ptr_get_segs,
+ .drain = buf_ptr_drain,
+ .is_drained = buf_ptr_is_drained,
+};
+
+HTTP_BUFFER_REGISTER_VFT (HTTP_BUFFER_PTR, buf_ptr_vft);
+
+void
+http_buffer_init (http_buffer_t *hb, http_buffer_type_t type, svm_fifo_t *f,
+ u64 data_len)
+{
+ hb->vft = &buf_vfts[type];
+ hb->vft->init (hb, f, data_len);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http/http_buffer.h b/src/plugins/http/http_buffer.h
new file mode 100644
index 00000000000..1140be42d6e
--- /dev/null
+++ b/src/plugins/http/http_buffer.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_PLUGINS_HTTP_HTTP_BUFFER_H_
+#define SRC_PLUGINS_HTTP_HTTP_BUFFER_H_
+
+#include <svm/svm_fifo.h>
+
+#define HTTP_BUFFER_DATA_SZ 32
+
+typedef enum http_buffer_type_
+{
+ HTTP_BUFFER_FIFO,
+ HTTP_BUFFER_PTR,
+} http_buffer_type_t;
+
+typedef struct http_buffer_vft_ http_buffer_vft_t;
+
+typedef struct http_buffer_
+{
+ http_buffer_vft_t *vft;
+ u8 data[HTTP_BUFFER_DATA_SZ];
+} http_buffer_t;
+
+struct http_buffer_vft_
+{
+ void (*init) (http_buffer_t *, void *data, u64 len);
+ void (*free) (http_buffer_t *);
+ svm_fifo_seg_t *(*get_segs) (http_buffer_t *, u32 max_len, u32 *n_segs);
+ u32 (*drain) (http_buffer_t *, u32 len);
+ u8 (*is_drained) (http_buffer_t *);
+};
+
+void http_buffer_init (http_buffer_t *hb, http_buffer_type_t type,
+ svm_fifo_t *f, u64 data_len);
+
+static inline void
+http_buffer_free (http_buffer_t *hb)
+{
+ if (hb->vft)
+ hb->vft->free (hb);
+}
+
+static inline svm_fifo_seg_t *
+http_buffer_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs)
+{
+ return hb->vft->get_segs (hb, max_len, n_segs);
+}
+
+static inline u32
+http_buffer_drain (http_buffer_t *hb, u32 len)
+{
+ return hb->vft->drain (hb, len);
+}
+
+static inline u8
+http_buffer_is_drained (http_buffer_t *hb)
+{
+ return hb->vft->is_drained (hb);
+}
+
+#endif /* SRC_PLUGINS_HTTP_HTTP_BUFFER_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http/http_timer.c b/src/plugins/http/http_timer.c
new file mode 100644
index 00000000000..42fe69076fe
--- /dev/null
+++ b/src/plugins/http/http_timer.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <http/http_timer.h>
+#include <vnet/session/session.h>
+
+http_tw_ctx_t http_tw_ctx;
+
+static void
+http_timer_process_expired_cb (u32 *expired_timers)
+{
+ http_tw_ctx_t *twc = &http_tw_ctx;
+ u32 hs_handle;
+ int i;
+
+ for (i = 0; i < vec_len (expired_timers); i++)
+ {
+ /* Get session handle. The first bit is the timer id */
+ hs_handle = expired_timers[i] & 0x7FFFFFFF;
+ session_send_rpc_evt_to_thread (hs_handle >> 24, twc->cb_fn,
+ uword_to_pointer (hs_handle, void *));
+ }
+}
+
+static uword
+http_timer_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ http_tw_ctx_t *twc = &http_tw_ctx;
+ f64 now, timeout = 1.0;
+ uword *event_data = 0;
+ uword __clib_unused event_type;
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+ now = vlib_time_now (vm);
+ event_type = vlib_process_get_events (vm, (uword **) &event_data);
+
+ /* expire timers */
+ clib_spinlock_lock (&twc->tw_lock);
+ tw_timer_expire_timers_2t_1w_2048sl (&twc->tw, now);
+ clib_spinlock_unlock (&twc->tw_lock);
+
+ vec_reset_length (event_data);
+ }
+ return 0;
+}
+
+VLIB_REGISTER_NODE (http_timer_process_node) = {
+ .function = http_timer_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "http-timer-process",
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+
+void
+http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *cb_fn)
+{
+ http_tw_ctx_t *twc = &http_tw_ctx;
+ vlib_node_t *n;
+
+ tw_timer_wheel_init_2t_1w_2048sl (&twc->tw, http_timer_process_expired_cb,
+ 1.0 /* timer interval */, ~0);
+ clib_spinlock_init (&twc->tw_lock);
+ twc->cb_fn = cb_fn;
+
+ vlib_node_set_state (vm, http_timer_process_node.index,
+ VLIB_NODE_STATE_POLLING);
+ n = vlib_get_node (vm, http_timer_process_node.index);
+ vlib_start_process (vm, n->runtime_index);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http/http_timer.h b/src/plugins/http/http_timer.h
new file mode 100644
index 00000000000..eec5a4595fe
--- /dev/null
+++ b/src/plugins/http/http_timer.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_PLUGINS_HTTP_HTTP_TIMER_H_
+#define SRC_PLUGINS_HTTP_HTTP_TIMER_H_
+
+#include <http/http.h>
+#include <vppinfra/tw_timer_2t_1w_2048sl.h>
+
+#define HTTP_CONN_TIMEOUT 60
+
+typedef void (http_conn_timeout_fn) (void *);
+
+typedef struct http_tw_ctx_
+{
+ tw_timer_wheel_2t_1w_2048sl_t tw;
+ clib_spinlock_t tw_lock;
+ http_conn_timeout_fn *cb_fn;
+} http_tw_ctx_t;
+
+extern http_tw_ctx_t http_tw_ctx;
+
+void http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *cb_fn);
+
+static inline void
+http_conn_timer_start (http_conn_t *hc)
+{
+ http_tw_ctx_t *twc = &http_tw_ctx;
+ u32 hs_handle;
+ u64 timeout;
+
+ timeout = HTTP_CONN_TIMEOUT;
+ hs_handle = hc->c_thread_index << 24 | hc->c_c_index;
+
+ clib_spinlock_lock (&twc->tw_lock);
+ hc->timer_handle =
+ tw_timer_start_2t_1w_2048sl (&twc->tw, hs_handle, 0, timeout);
+ clib_spinlock_unlock (&twc->tw_lock);
+}
+
+static inline void
+http_conn_timer_stop (http_conn_t *hc)
+{
+ http_tw_ctx_t *twc = &http_tw_ctx;
+
+ if (hc->timer_handle == ~0)
+ return;
+
+ clib_spinlock_lock (&twc->tw_lock);
+ tw_timer_stop_2t_1w_2048sl (&twc->tw, hc->timer_handle);
+ hc->timer_handle = ~0;
+ clib_spinlock_unlock (&twc->tw_lock);
+}
+
+static inline void
+http_conn_timer_update (http_conn_t *hc)
+{
+ http_tw_ctx_t *twc = &http_tw_ctx;
+ u64 timeout;
+
+ if (hc->timer_handle == ~0)
+ return;
+
+ timeout = HTTP_CONN_TIMEOUT;
+
+ clib_spinlock_lock (&twc->tw_lock);
+ tw_timer_update_2t_1w_2048sl (&twc->tw, hc->timer_handle, timeout);
+ clib_spinlock_unlock (&twc->tw_lock);
+}
+
+#endif /* SRC_PLUGINS_HTTP_HTTP_TIMER_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http_static/CMakeLists.txt b/src/plugins/http_static/CMakeLists.txt
index f9ccb15beae..5e51704dc96 100644
--- a/src/plugins/http_static/CMakeLists.txt
+++ b/src/plugins/http_static/CMakeLists.txt
@@ -14,9 +14,11 @@
add_vpp_plugin(http_static
SOURCES
+ http_cache.c
+ http_cache.h
http_static.c
static_server.c
- http_static.h
+ builtinurl/json_urls.c
API_FILES
http_static.api
diff --git a/src/plugins/http_static/FEATURE.yaml b/src/plugins/http_static/FEATURE.yaml
index d40855f2de2..ff4e147c495 100644
--- a/src/plugins/http_static/FEATURE.yaml
+++ b/src/plugins/http_static/FEATURE.yaml
@@ -1,10 +1,18 @@
---
-name: Static http https server
-maintainer: Dave Barach <dave@barachs.net>
+name: Static HTTP(S) Server
+maintainer:
+ - Dave Barach <dave@barachs.net>
+ - Florin Coras <fcoras@cisco.com>
features:
- - An extensible static http/https server with caching
-description: "A simple caching static http / https server
- A built-in vpp host stack application.
- Supports HTTP GET and HTTP POST methods."
+ - HTTP GET/POST handling
+ - LRU file caching
+ - pluggable URL handlers
+ - builtin json URL handles:
+ - version.json - vpp version info
+ - interface_list.json - list of interfaces
+ - interface_stats - single interface via HTTP POST
+ - interface_stats - all intfcs via HTTP GET."
+description: "Static HTTP(S) server implemented as a
+ built-in vpp host stack application. "
state: production
properties: [API, CLI, MULTITHREAD]
diff --git a/src/plugins/http_static/builtinurl/json_urls.c b/src/plugins/http_static/builtinurl/json_urls.c
new file mode 100644
index 00000000000..808893aac79
--- /dev/null
+++ b/src/plugins/http_static/builtinurl/json_urls.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <http_static/http_static.h>
+#include <vpp/app/version.h>
+
+hss_url_handler_rc_t
+handle_get_version (hss_url_handler_args_t *args)
+{
+ u8 *s = 0;
+
+ s = format (s, "{\"vpp_details\": {");
+ s = format (s, " \"version\": \"%s\",", VPP_BUILD_VER);
+ s = format (s, " \"build_date\": \"%s\"}}\r\n", VPP_BUILD_DATE);
+
+ args->data = s;
+ args->data_len = vec_len (s);
+ args->free_vec_data = 1;
+ return HSS_URL_HANDLER_OK;
+}
+
+void
+trim_path_from_request (u8 *s, char *path)
+{
+ u8 *cp;
+ int trim_length = strlen (path) + 1 /* remove '?' */;
+
+ /* Get rid of the path and question-mark */
+ vec_delete (s, trim_length, 0);
+
+ /* Tail trim irrelevant browser info */
+ cp = s;
+ while ((cp - s) < vec_len (s))
+ {
+ if (*cp == ' ')
+ {
+ /*
+ * Makes request a vector which happens to look
+ * like a c-string.
+ */
+ *cp = 0;
+ vec_set_len (s, cp - s);
+ break;
+ }
+ cp++;
+ }
+}
+
+hss_url_handler_rc_t
+handle_get_interface_stats (hss_url_handler_args_t *args)
+{
+ u8 *s = 0, *stats = 0;
+ uword *p;
+ u32 *sw_if_indices = 0;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ char *q = "\"";
+ int i;
+ int need_comma = 0;
+ u8 *format_vnet_sw_interface_cntrs (u8 * s, vnet_interface_main_t * im,
+ vnet_sw_interface_t * si, int json);
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+
+ /* Get stats for a single interface via http POST */
+ if (args->reqtype == HTTP_REQ_POST)
+ {
+ trim_path_from_request (args->request, "interface_stats.json");
+
+ /* Find the sw_if_index */
+ p = hash_get (im->hw_interface_by_name, args->request);
+ if (!p)
+ {
+ s = format (s, "{\"interface_stats\": {[\n");
+ s = format (s, " \"name\": \"%s\",", args->request);
+ s = format (s, " \"error\": \"%s\"", "UnknownInterface");
+ s = format (s, "]}\n");
+ goto out;
+ }
+
+ vec_add1 (sw_if_indices, p[0]);
+ }
+ else /* default, HTTP_BUILTIN_METHOD_GET */
+ {
+ pool_foreach (hi, im->hw_interfaces)
+ {
+ vec_add1 (sw_if_indices, hi->sw_if_index);
+ }
+ }
+
+ s = format (s, "{%sinterface_stats%s: [\n", q, q);
+
+ for (i = 0; i < vec_len (sw_if_indices); i++)
+ {
+ si = vnet_get_sw_interface (vnm, sw_if_indices[i]);
+ if (need_comma)
+ s = format (s, ",\n");
+
+ need_comma = 1;
+
+ s = format (s, "{%sname%s: %s%U%s, ", q, q, q,
+ format_vnet_sw_if_index_name, vnm, sw_if_indices[i], q);
+
+ stats = format_vnet_sw_interface_cntrs (stats, &vnm->interface_main, si,
+ 1 /* want json */);
+ if (vec_len (stats))
+ s = format (s, "%v}", stats);
+ else
+ s = format (s, "%snone%s: %strue%s}", q, q, q, q);
+ vec_reset_length (stats);
+ }
+
+ s = format (s, "]}\n");
+
+out:
+ args->data = s;
+ args->data_len = vec_len (s);
+ args->free_vec_data = 1;
+ vec_free (sw_if_indices);
+ vec_free (stats);
+ return HSS_URL_HANDLER_OK;
+}
+
+hss_url_handler_rc_t
+handle_get_interface_list (hss_url_handler_args_t *args)
+{
+ u8 *s = 0;
+ int i;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_hw_interface_t *hi;
+ u32 *hw_if_indices = 0;
+ int need_comma = 0;
+
+ /* Construct vector of active hw_if_indexes ... */
+ pool_foreach (hi, im->hw_interfaces)
+ {
+ /* No point in mentioning "local0"... */
+ if (hi - im->hw_interfaces)
+ vec_add1 (hw_if_indices, hi - im->hw_interfaces);
+ }
+
+ /* Build answer */
+ s = format (s, "{\"interface_list\": [\n");
+ for (i = 0; i < vec_len (hw_if_indices); i++)
+ {
+ if (need_comma)
+ s = format (s, ",\n");
+ hi = pool_elt_at_index (im->hw_interfaces, hw_if_indices[i]);
+ s = format (s, "\"%v\"", hi->name);
+ need_comma = 1;
+ }
+ s = format (s, "]}\n");
+ vec_free (hw_if_indices);
+
+ args->data = s;
+ args->data_len = vec_len (s);
+ args->free_vec_data = 1;
+ return HSS_URL_HANDLER_OK;
+}
+
+void
+hss_builtinurl_json_handlers_init (void)
+{
+ hss_register_url_handler (handle_get_version, "version.json", HTTP_REQ_GET);
+ hss_register_url_handler (handle_get_interface_list, "interface_list.json",
+ HTTP_REQ_GET);
+ hss_register_url_handler (handle_get_interface_stats, "interface_stats.json",
+ HTTP_REQ_GET);
+ hss_register_url_handler (handle_get_interface_stats, "interface_stats.json",
+ HTTP_REQ_POST);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http_static/http_cache.c b/src/plugins/http_static/http_cache.c
new file mode 100644
index 00000000000..8b9751b7f78
--- /dev/null
+++ b/src/plugins/http_static/http_cache.c
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <http_static/http_cache.h>
+#include <vppinfra/bihash_template.c>
+#include <vppinfra/unix.h>
+#include <vlib/vlib.h>
+
+static void
+hss_cache_lock (hss_cache_t *hc)
+{
+ clib_spinlock_lock (&hc->cache_lock);
+}
+
+static void
+hss_cache_unlock (hss_cache_t *hc)
+{
+ clib_spinlock_unlock (&hc->cache_lock);
+}
+
+/** \brief Sanity-check the forward and reverse LRU lists
+ */
+static inline void
+lru_validate (hss_cache_t *hc)
+{
+#if CLIB_DEBUG > 0
+ f64 last_timestamp;
+ u32 index;
+ int i;
+ hss_cache_entry_t *ce;
+
+ last_timestamp = 1e70;
+ for (i = 1, index = hc->first_index; index != ~0;)
+ {
+ ce = pool_elt_at_index (hc->cache_pool, index);
+ /* Timestamps should be smaller (older) as we walk the fwd list */
+ if (ce->last_used > last_timestamp)
+ {
+ clib_warning ("%d[%d]: last used %.6f, last_timestamp %.6f", index,
+ i, ce->last_used, last_timestamp);
+ }
+ index = ce->next_index;
+ last_timestamp = ce->last_used;
+ i++;
+ }
+
+ last_timestamp = 0.0;
+ for (i = 1, index = hc->last_index; index != ~0;)
+ {
+ ce = pool_elt_at_index (hc->cache_pool, index);
+ /* Timestamps should be larger (newer) as we walk the rev list */
+ if (ce->last_used < last_timestamp)
+ {
+ clib_warning ("%d[%d]: last used %.6f, last_timestamp %.6f", index,
+ i, ce->last_used, last_timestamp);
+ }
+ index = ce->prev_index;
+ last_timestamp = ce->last_used;
+ i++;
+ }
+#endif
+}
+
+/** \brief Remove a data cache entry from the LRU lists
+ */
+static inline void
+lru_remove (hss_cache_t *hc, hss_cache_entry_t *ce)
+{
+ hss_cache_entry_t *next_ep, *prev_ep;
+ u32 ce_index;
+
+ lru_validate (hc);
+
+ ce_index = ce - hc->cache_pool;
+
+ /* Deal with list heads */
+ if (ce_index == hc->first_index)
+ hc->first_index = ce->next_index;
+ if (ce_index == hc->last_index)
+ hc->last_index = ce->prev_index;
+
+ /* Fix next->prev */
+ if (ce->next_index != ~0)
+ {
+ next_ep = pool_elt_at_index (hc->cache_pool, ce->next_index);
+ next_ep->prev_index = ce->prev_index;
+ }
+ /* Fix prev->next */
+ if (ce->prev_index != ~0)
+ {
+ prev_ep = pool_elt_at_index (hc->cache_pool, ce->prev_index);
+ prev_ep->next_index = ce->next_index;
+ }
+ lru_validate (hc);
+}
+
+/** \brief Add an entry to the LRU lists, tag w/ supplied timestamp
+ */
+static inline void
+lru_add (hss_cache_t *hc, hss_cache_entry_t *ce, f64 now)
+{
+ hss_cache_entry_t *next_ce;
+ u32 ce_index;
+
+ lru_validate (hc);
+
+ ce_index = ce - hc->cache_pool;
+
+ /*
+ * Re-add at the head of the forward LRU list,
+ * tail of the reverse LRU list
+ */
+ if (hc->first_index != ~0)
+ {
+ next_ce = pool_elt_at_index (hc->cache_pool, hc->first_index);
+ next_ce->prev_index = ce_index;
+ }
+
+ ce->prev_index = ~0;
+
+ /* ep now the new head of the LRU forward list */
+ ce->next_index = hc->first_index;
+ hc->first_index = ce_index;
+
+ /* single session case: also the tail of the reverse LRU list */
+ if (hc->last_index == ~0)
+ hc->last_index = ce_index;
+ ce->last_used = now;
+
+ lru_validate (hc);
+}
+
+/** \brief Remove and re-add a cache entry from/to the LRU lists
+ */
+static inline void
+lru_update (hss_cache_t *hc, hss_cache_entry_t *ep, f64 now)
+{
+ lru_remove (hc, ep);
+ lru_add (hc, ep, now);
+}
+
+static void
+hss_cache_attach_entry (hss_cache_t *hc, u32 ce_index, u8 **data,
+ u64 *data_len)
+{
+ hss_cache_entry_t *ce;
+
+ /* Expect ce_index to be validated outside */
+ ce = pool_elt_at_index (hc->cache_pool, ce_index);
+ ce->inuse++;
+ *data = ce->data;
+ *data_len = vec_len (ce->data);
+
+ /* Update the cache entry, mark it in-use */
+ lru_update (hc, ce, vlib_time_now (vlib_get_main ()));
+
+ if (hc->debug_level > 1)
+ clib_warning ("index %d refcnt now %d", ce_index, ce->inuse);
+}
+
+/** \brief Detach cache entry from session
+ */
+void
+hss_cache_detach_entry (hss_cache_t *hc, u32 ce_index)
+{
+ hss_cache_entry_t *ce;
+
+ hss_cache_lock (hc);
+
+ ce = pool_elt_at_index (hc->cache_pool, ce_index);
+ ce->inuse--;
+
+ if (hc->debug_level > 1)
+ clib_warning ("index %d refcnt now %d", ce_index, ce->inuse);
+
+ hss_cache_unlock (hc);
+}
+
+static u32
+hss_cache_lookup (hss_cache_t *hc, u8 *path)
+{
+ BVT (clib_bihash_kv) kv;
+ int rv;
+
+ kv.key = (u64) path;
+ kv.value = ~0;
+
+ /* Value updated only if lookup succeeds */
+ rv = BV (clib_bihash_search) (&hc->name_to_data, &kv, &kv);
+ ASSERT (!rv || kv.value == ~0);
+
+ if (hc->debug_level > 1)
+ clib_warning ("lookup '%s' %s", kv.key, kv.value == ~0 ? "fail" : "found");
+
+ return kv.value;
+}
+
+u32
+hss_cache_lookup_and_attach (hss_cache_t *hc, u8 *path, u8 **data,
+ u64 *data_len)
+{
+ u32 ce_index;
+
+ /* Make sure nobody removes the entry while we look it up */
+ hss_cache_lock (hc);
+
+ ce_index = hss_cache_lookup (hc, path);
+ if (ce_index != ~0)
+ hss_cache_attach_entry (hc, ce_index, data, data_len);
+
+ hss_cache_unlock (hc);
+
+ return ce_index;
+}
+
+static void
+hss_cache_do_evictions (hss_cache_t *hc)
+{
+ BVT (clib_bihash_kv) kv;
+ hss_cache_entry_t *ce;
+ u32 free_index;
+
+ free_index = hc->last_index;
+
+ while (free_index != ~0)
+ {
+ /* pick the LRU */
+ ce = pool_elt_at_index (hc->cache_pool, free_index);
+ /* Which could be in use... */
+ if (ce->inuse)
+ {
+ if (hc->debug_level > 1)
+ clib_warning ("index %d in use refcnt %d", free_index, ce->inuse);
+ }
+ free_index = ce->prev_index;
+ kv.key = (u64) (ce->filename);
+ kv.value = ~0ULL;
+ if (BV (clib_bihash_add_del) (&hc->name_to_data, &kv, 0 /* is_add */) <
+ 0)
+ {
+ clib_warning ("LRU delete '%s' FAILED!", ce->filename);
+ }
+ else if (hc->debug_level > 1)
+ clib_warning ("LRU delete '%s' ok", ce->filename);
+
+ lru_remove (hc, ce);
+ hc->cache_size -= vec_len (ce->data);
+ hc->cache_evictions++;
+ vec_free (ce->filename);
+ vec_free (ce->data);
+
+ if (hc->debug_level > 1)
+ clib_warning ("pool put index %d", ce - hc->cache_pool);
+
+ pool_put (hc->cache_pool, ce);
+ if (hc->cache_size < hc->cache_limit)
+ break;
+ }
+}
+
+u32
+hss_cache_add_and_attach (hss_cache_t *hc, u8 *path, u8 **data, u64 *data_len)
+{
+ BVT (clib_bihash_kv) kv;
+ hss_cache_entry_t *ce;
+ clib_error_t *error;
+ u8 *file_data;
+ u32 ce_index;
+
+ hss_cache_lock (hc);
+
+ /* Need to recycle one (or more cache) entries? */
+ if (hc->cache_size > hc->cache_limit)
+ hss_cache_do_evictions (hc);
+
+ /* Read the file */
+ error = clib_file_contents ((char *) path, &file_data);
+ if (error)
+ {
+ clib_warning ("Error reading '%s'", path);
+ clib_error_report (error);
+ return ~0;
+ }
+
+ /* Create a cache entry for it */
+ pool_get_zero (hc->cache_pool, ce);
+ ce->filename = vec_dup (path);
+ ce->data = file_data;
+
+ /* Attach cache entry without additional lock */
+ ce->inuse++;
+ *data = file_data;
+ *data_len = vec_len (file_data);
+ lru_add (hc, ce, vlib_time_now (vlib_get_main ()));
+
+ hc->cache_size += vec_len (ce->data);
+ ce_index = ce - hc->cache_pool;
+
+ if (hc->debug_level > 1)
+ clib_warning ("index %d refcnt now %d", ce_index, ce->inuse);
+
+ /* Add to the lookup table */
+
+ kv.key = (u64) vec_dup (path);
+ kv.value = ce_index;
+
+ if (hc->debug_level > 1)
+ clib_warning ("add '%s' value %lld", kv.key, kv.value);
+
+ if (BV (clib_bihash_add_del) (&hc->name_to_data, &kv, 1 /* is_add */) < 0)
+ {
+ clib_warning ("BUG: add failed!");
+ }
+
+ hss_cache_unlock (hc);
+
+ return ce_index;
+}
+
+u32
+hss_cache_clear (hss_cache_t *hc)
+{
+ u32 free_index, busy_items = 0;
+ hss_cache_entry_t *ce;
+ BVT (clib_bihash_kv) kv;
+
+ hss_cache_lock (hc);
+
+ /* Walk the LRU list to find active entries */
+ free_index = hc->last_index;
+ while (free_index != ~0)
+ {
+ ce = pool_elt_at_index (hc->cache_pool, free_index);
+ free_index = ce->prev_index;
+ /* Which could be in use... */
+ if (ce->inuse)
+ {
+ busy_items++;
+ free_index = ce->next_index;
+ continue;
+ }
+ kv.key = (u64) (ce->filename);
+ kv.value = ~0ULL;
+ if (BV (clib_bihash_add_del) (&hc->name_to_data, &kv, 0 /* is_add */) <
+ 0)
+ {
+ clib_warning ("BUG: cache clear delete '%s' FAILED!", ce->filename);
+ }
+
+ lru_remove (hc, ce);
+ hc->cache_size -= vec_len (ce->data);
+ hc->cache_evictions++;
+ vec_free (ce->filename);
+ vec_free (ce->data);
+ if (hc->debug_level > 1)
+ clib_warning ("pool put index %d", ce - hc->cache_pool);
+ pool_put (hc->cache_pool, ce);
+ free_index = hc->last_index;
+ }
+
+ hss_cache_unlock (hc);
+
+ return busy_items;
+}
+
+void
+hss_cache_init (hss_cache_t *hc, uword cache_size, u8 debug_level)
+{
+ clib_spinlock_init (&hc->cache_lock);
+
+ /* Init path-to-cache hash table */
+ BV (clib_bihash_init) (&hc->name_to_data, "http cache", 128, 32 << 20);
+
+ hc->cache_limit = cache_size;
+ hc->debug_level = debug_level;
+ hc->first_index = hc->last_index = ~0;
+}
+
+/** \brief format a file cache entry
+ */
+static u8 *
+format_hss_cache_entry (u8 *s, va_list *args)
+{
+ hss_cache_entry_t *ep = va_arg (*args, hss_cache_entry_t *);
+ f64 now = va_arg (*args, f64);
+
+ /* Header */
+ if (ep == 0)
+ {
+ s = format (s, "%40s%12s%20s", "File", "Size", "Age");
+ return s;
+ }
+ s = format (s, "%40s%12lld%20.2f", ep->filename, vec_len (ep->data),
+ now - ep->last_used);
+ return s;
+}
+
+u8 *
+format_hss_cache (u8 *s, va_list *args)
+{
+ hss_cache_t *hc = va_arg (*args, hss_cache_t *);
+ u32 verbose = va_arg (*args, u32);
+ hss_cache_entry_t *ce;
+ vlib_main_t *vm;
+ u32 index;
+ f64 now;
+
+ if (verbose == 0)
+ {
+ s = format (s, "cache size %lld bytes, limit %lld bytes, evictions %lld",
+ hc->cache_size, hc->cache_limit, hc->cache_evictions);
+ return 0;
+ }
+
+ vm = vlib_get_main ();
+ now = vlib_time_now (vm);
+
+ s = format (s, "%U", format_hss_cache_entry, 0 /* header */, now);
+
+ for (index = hc->first_index; index != ~0;)
+ {
+ ce = pool_elt_at_index (hc->cache_pool, index);
+ index = ce->next_index;
+ s = format (s, "%U", format_hss_cache_entry, ce, now);
+ }
+
+ s = format (s, "%40s%12lld", "Total Size", hc->cache_size);
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http_static/http_cache.h b/src/plugins/http_static/http_cache.h
new file mode 100644
index 00000000000..a89ed5e7e94
--- /dev/null
+++ b/src/plugins/http_static/http_cache.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_PLUGINS_HTTP_STATIC_HTTP_CACHE_H_
+#define SRC_PLUGINS_HTTP_STATIC_HTTP_CACHE_H_
+
+#include <vppinfra/bihash_vec8_8.h>
+
+typedef struct hss_cache_entry_
+{
+ /** Name of the file */
+ u8 *filename;
+ /** Contents of the file, as a u8 * vector */
+ u8 *data;
+ /** Last time the cache entry was used */
+ f64 last_used;
+ /** Cache LRU links */
+ u32 next_index;
+ u32 prev_index;
+ /** Reference count, so we don't recycle while referenced */
+ int inuse;
+} hss_cache_entry_t;
+
+typedef struct hss_cache_
+{
+ /** Unified file data cache pool */
+ hss_cache_entry_t *cache_pool;
+ /** Hash table which maps file name to file data */
+ BVT (clib_bihash) name_to_data;
+
+ /** Session pool lock */
+ clib_spinlock_t cache_lock;
+
+ /** Current cache size */
+ u64 cache_size;
+ /** Max cache size in bytes */
+ u64 cache_limit;
+ /** Number of cache evictions */
+ u64 cache_evictions;
+
+ /** Cache LRU listheads */
+ u32 first_index;
+ u32 last_index;
+
+ u8 debug_level;
+} hss_cache_t;
+
+u32 hss_cache_lookup_and_attach (hss_cache_t *hc, u8 *path, u8 **data,
+ u64 *data_len);
+u32 hss_cache_add_and_attach (hss_cache_t *hc, u8 *path, u8 **data,
+ u64 *data_len);
+void hss_cache_detach_entry (hss_cache_t *hc, u32 ce_index);
+u32 hss_cache_clear (hss_cache_t *hc);
+void hss_cache_init (hss_cache_t *hc, uword cache_size, u8 debug_level);
+
+u8 *format_hss_cache (u8 *s, va_list *args);
+
+#endif /* SRC_PLUGINS_HTTP_STATIC_HTTP_CACHE_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http_static/http_static.c b/src/plugins/http_static/http_static.c
index 48ae593718a..8f8fe37b7c1 100644
--- a/src/plugins/http_static/http_static.c
+++ b/src/plugins/http_static/http_static.c
@@ -1,7 +1,5 @@
/*
- * http_static.c - skeleton vpp engine plug-in
- *
- * Copyright (c) <current-year> <your-organization>
+ * Copyright (c) 2017-2022 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -29,57 +27,116 @@
#include <vpp/api/types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define REPLY_MSG_ID_BASE hmp->msg_id_base
+#define REPLY_MSG_ID_BASE hsm->msg_id_base
#include <vlibapi/api_helper_macros.h>
-http_static_main_t http_static_main;
+__clib_export void
+hss_register_url_handler (hss_url_handler_fn fp, const char *url,
+ http_req_method_t request_type)
+{
+ hss_main_t *hsm = &hss_main;
+ uword *p, *url_table;
+
+ url_table = (request_type == HTTP_REQ_GET) ? hsm->get_url_handlers :
+ hsm->post_url_handlers;
+
+ p = hash_get_mem (url_table, url);
+
+ if (p)
+ {
+ clib_warning ("WARNING: attempt to replace handler for %s '%s' ignored",
+ (request_type == HTTP_REQ_GET) ? "GET" : "POST", url);
+ return;
+ }
+
+ hash_set_mem (url_table, url, (uword) fp);
+
+ /*
+ * Need to update the hash table pointer in http_static_server_main
+ * in case we just expanded it...
+ */
+ if (request_type == HTTP_REQ_GET)
+ hsm->get_url_handlers = url_table;
+ else
+ hsm->post_url_handlers = url_table;
+}
+
+/** \brief API helper function for vl_api_http_static_enable_t messages
+ */
+static int
+hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos,
+ u32 private_segment_size, u8 *www_root, u8 *uri)
+{
+ hss_main_t *hsm = &hss_main;
+ int rv;
+
+ hsm->fifo_size = fifo_size;
+ hsm->cache_size = cache_limit;
+ hsm->prealloc_fifos = prealloc_fifos;
+ hsm->private_segment_size = private_segment_size;
+ hsm->www_root = format (0, "%s%c", www_root, 0);
+ hsm->uri = format (0, "%s%c", uri, 0);
+
+ if (vec_len (hsm->www_root) < 2)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ if (hsm->app_index != ~0)
+ return VNET_API_ERROR_APP_ALREADY_ATTACHED;
+
+ vnet_session_enable_disable (hsm->vlib_main, 1 /* turn on TCP, etc. */);
+
+ rv = hss_create (hsm->vlib_main);
+ switch (rv)
+ {
+ case 0:
+ break;
+ default:
+ vec_free (hsm->www_root);
+ vec_free (hsm->uri);
+ return VNET_API_ERROR_INIT_FAILED;
+ }
+ return 0;
+}
/* API message handler */
static void vl_api_http_static_enable_t_handler
(vl_api_http_static_enable_t * mp)
{
vl_api_http_static_enable_reply_t *rmp;
- http_static_main_t *hmp = &http_static_main;
+ hss_main_t *hsm = &hss_main;
int rv;
mp->uri[ARRAY_LEN (mp->uri) - 1] = 0;
mp->www_root[ARRAY_LEN (mp->www_root) - 1] = 0;
- rv = http_static_server_enable_api
- (ntohl (mp->fifo_size),
- ntohl (mp->cache_size_limit),
- ntohl (mp->prealloc_fifos),
- ntohl (mp->private_segment_size), mp->www_root, mp->uri);
+ rv =
+ hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit),
+ ntohl (mp->prealloc_fifos),
+ ntohl (mp->private_segment_size), mp->www_root, mp->uri);
REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_REPLY);
}
#include <http_static/http_static.api.c>
static clib_error_t *
-http_static_init (vlib_main_t * vm)
+hss_api_init (vlib_main_t *vm)
{
- http_static_main_t *hmp = &http_static_main;
-
- hmp->vlib_main = vm;
- hmp->vnet_main = vnet_get_main ();
+ hss_main_t *hsm = &hss_main;
/* Ask for a correctly-sized block of API message decode slots */
- hmp->msg_id_base = setup_message_id_table ();
+ hsm->msg_id_base = setup_message_id_table ();
return 0;
}
-VLIB_INIT_FUNCTION (http_static_init);
+VLIB_INIT_FUNCTION (hss_api_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "HTTP Static Server"
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/http_static/http_static.h b/src/plugins/http_static/http_static.h
index 8ee0f92cd44..2850d356b74 100644
--- a/src/plugins/http_static/http_static.h
+++ b/src/plugins/http_static/http_static.h
@@ -1,8 +1,5 @@
-
/*
- * http_static.h - skeleton vpp engine plug-in header file
- *
- * Copyright (c) <current-year> <your-organization>
+ * Copyright (c) 2017-2022 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -18,199 +15,160 @@
#ifndef __included_http_static_h__
#define __included_http_static_h__
-#include <vnet/vnet.h>
-#include <vnet/session/application.h>
#include <vnet/session/application_interface.h>
#include <vnet/session/session.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
+#include <http/http.h>
#include <vppinfra/hash.h>
#include <vppinfra/error.h>
-#include <vppinfra/time_range.h>
-#include <vppinfra/tw_timer_2t_1w_2048sl.h>
-#include <vppinfra/bihash_vec8_8.h>
+#include <http_static/http_cache.h>
/** @file http_static.h
* Static http server definitions
*/
-typedef struct
-{
- /* API message ID base */
- u16 msg_id_base;
-
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
-} http_static_main_t;
-
-extern http_static_main_t http_static_main;
-
-/** \brief Session States
- */
-
-typedef enum
-{
- /** Session is closed */
- HTTP_STATE_CLOSED,
- /** Session is established */
- HTTP_STATE_ESTABLISHED,
- /** Session has sent an OK response */
- HTTP_STATE_OK_SENT,
- /** Session has sent an HTML response */
- HTTP_STATE_SEND_MORE_DATA,
- /** Number of states */
- HTTP_STATE_N_STATES,
-} http_session_state_t;
-
-typedef enum
-{
- CALLED_FROM_RX,
- CALLED_FROM_TX,
- CALLED_FROM_TIMER,
-} http_state_machine_called_from_t;
-
-typedef enum
-{
- HTTP_BUILTIN_METHOD_GET = 0,
- HTTP_BUILTIN_METHOD_POST,
-} http_builtin_method_type_t;
-
-
/** \brief Application session
*/
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- /** Base class instance variables */
-#define _(type, name) type name;
- foreach_app_session_field
-#undef _
+ u32 session_index;
/** rx thread index */
u32 thread_index;
- /** rx buffer */
- u8 *rx_buf;
/** vpp session index, handle */
u32 vpp_session_index;
- u64 vpp_session_handle;
- /** Timeout timer handle */
- u32 timer_handle;
+ session_handle_t vpp_session_handle;
/** Fully-resolved file path */
u8 *path;
- /** File data, a vector */
+ /** Data to send */
u8 *data;
+ /** Data length */
+ u64 data_len;
/** Current data send offset */
u32 data_offset;
/** Need to free data in detach_cache_entry */
int free_data;
-
/** File cache pool index */
u32 cache_pool_index;
- /** state machine called from... */
- http_state_machine_called_from_t called_from;
-} http_session_t;
+ /** Content type, e.g. text, text/javascript, etc. */
+ http_content_type_t content_type;
+} hss_session_t;
-/** \brief In-memory file data cache entry
- */
-typedef struct
+typedef struct hss_session_handle_
{
- /** Name of the file */
- u8 *filename;
- /** Contents of the file, as a u8 * vector */
- u8 *data;
- /** Last time the cache entry was used */
- f64 last_used;
- /** Cache LRU links */
- u32 next_index;
- u32 prev_index;
- /** Reference count, so we don't recycle while referenced */
- int inuse;
-} file_data_cache_t;
+ union
+ {
+ struct
+ {
+ u32 session_index;
+ u32 thread_index;
+ };
+ u64 as_u64;
+ };
+} hss_session_handle_t;
+
+STATIC_ASSERT_SIZEOF (hss_session_handle_t, sizeof (u64));
+
+
+typedef struct hss_url_handler_args_
+{
+ hss_session_handle_t sh;
+
+ union
+ {
+ /* Request args */
+ struct
+ {
+ u8 *request;
+ http_req_method_t reqtype;
+ };
+
+ /* Reply args */
+ struct
+ {
+ u8 *data;
+ uword data_len;
+ u8 free_vec_data;
+ http_status_code_t sc;
+ };
+ };
+} hss_url_handler_args_t;
+
+typedef enum hss_url_handler_rc_
+{
+ HSS_URL_HANDLER_OK,
+ HSS_URL_HANDLER_ERROR,
+ HSS_URL_HANDLER_ASYNC,
+} hss_url_handler_rc_t;
+
+typedef hss_url_handler_rc_t (*hss_url_handler_fn) (hss_url_handler_args_t *);
+typedef void (*hss_register_url_fn) (hss_url_handler_fn, char *, int);
+typedef void (*hss_session_send_fn) (hss_url_handler_args_t *args);
/** \brief Main data structure
*/
-
typedef struct
{
/** Per thread vector of session pools */
- http_session_t **sessions;
- /** Session pool reader writer lock */
- clib_rwlock_t sessions_lock;
- /** vpp session to http session index map */
- u32 **session_to_http_session;
-
- /** Enable debug messages */
- int debug_level;
-
- /** vpp message/event queue */
- svm_msg_q_t **vpp_queue;
-
- /** Unified file data cache pool */
- file_data_cache_t *cache_pool;
- /** Hash table which maps file name to file data */
- BVT (clib_bihash) name_to_data;
+ hss_session_t **sessions;
/** Hash tables for built-in GET and POST handlers */
uword *get_url_handlers;
uword *post_url_handlers;
- /** Current cache size */
- u64 cache_size;
- /** Max cache size in bytes */
- u64 cache_limit;
- /** Number of cache evictions */
- u64 cache_evictions;
-
- /** Cache LRU listheads */
- u32 first_index;
- u32 last_index;
+ hss_cache_t cache;
/** root path to be served */
u8 *www_root;
- /** Server's event queue */
- svm_queue_t *vl_input_queue;
-
- /** API client handle */
- u32 my_client_index;
-
/** Application index */
u32 app_index;
- /** Process node index for event scheduling */
- u32 node_index;
-
/** Cert and key pair for tls */
u32 ckpair_index;
- /** Session cleanup timer wheel */
- tw_timer_wheel_2t_1w_2048sl_t tw;
- clib_spinlock_t tw_lock;
+ /* API message ID base */
+ u16 msg_id_base;
+
+ vlib_main_t *vlib_main;
- /** Time base, so we can generate browser cache control http spew */
- clib_timebase_t timebase;
+ /*
+ * Config
+ */
+ /** Enable debug messages */
+ int debug_level;
/** Number of preallocated fifos, usually 0 */
u32 prealloc_fifos;
/** Private segment size, usually 0 */
- u32 private_segment_size;
+ u64 private_segment_size;
/** Size of the allocated rx, tx fifos, roughly 8K or so */
u32 fifo_size;
/** The bind URI, defaults to tcp://0.0.0.0/80 */
u8 *uri;
- vlib_main_t *vlib_main;
-} http_static_server_main_t;
+ /** Threshold for switching to ptr data in http msgs */
+ u64 use_ptr_thresh;
+ /** Enable the use of builtinurls */
+ u8 enable_url_handlers;
+ /** Max cache size before LRU occurs */
+ u64 cache_size;
+
+ /** hash table of file extensions to mime types string indices */
+ uword *mime_type_indices_by_file_extensions;
+} hss_main_t;
-extern http_static_server_main_t http_static_server_main;
+extern hss_main_t hss_main;
-int http_static_server_enable_api (u32 fifo_size, u32 cache_limit,
- u32 prealloc_fifos,
- u32 private_segment_size,
- u8 * www_root, u8 * uri);
+int hss_create (vlib_main_t *vm);
-void http_static_server_register_builtin_handler
- (void *fp, char *url, int type);
+/**
+ * Register a GET or POST URL handler
+ */
+void hss_register_url_handler (hss_url_handler_fn fp, const char *url,
+ http_req_method_t type);
+void hss_session_send_data (hss_url_handler_args_t *args);
+void hss_builtinurl_json_handlers_init (void);
+hss_session_t *hss_session_get (u32 thread_index, u32 hs_index);
#endif /* __included_http_static_h__ */
diff --git a/src/plugins/http_static/static_server.c b/src/plugins/http_static/static_server.c
index c715dfa6fb8..040cdca9d7a 100644
--- a/src/plugins/http_static/static_server.c
+++ b/src/plugins/http_static/static_server.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Cisco and/or its affiliates.
+ * Copyright (c) 2017-2022 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -13,158 +13,48 @@
* limitations under the License.
*/
-#include <vnet/vnet.h>
-#include <vnet/session/application.h>
-#include <vnet/session/application_interface.h>
-#include <vnet/session/session.h>
-#include <vppinfra/unix.h>
+#include <http_static/http_static.h>
+
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
-#include <http_static/http_static.h>
-
-#include <vppinfra/bihash_template.c>
/** @file static_server.c
- * Static http server, sufficient to
- * serve .html / .css / .js content.
+ * Static http server, sufficient to serve .html / .css / .js content.
*/
/*? %%clicmd:group_label Static HTTP Server %% ?*/
-http_static_server_main_t http_static_server_main;
-
-/** \brief Format the called-from enum
- */
-
-static u8 *
-format_state_machine_called_from (u8 * s, va_list * args)
-{
- http_state_machine_called_from_t cf =
- va_arg (*args, http_state_machine_called_from_t);
- char *which = "bogus!";
-
- switch (cf)
- {
- case CALLED_FROM_RX:
- which = "from rx";
- break;
- case CALLED_FROM_TX:
- which = "from tx";
- break;
- case CALLED_FROM_TIMER:
- which = "from timer";
- break;
-
- default:
- break;
- }
-
- s = format (s, "%s", which);
- return s;
-}
-
-
-/** \brief Acquire reader lock on the sessions pools
- */
-static void
-http_static_server_sessions_reader_lock (void)
-{
- clib_rwlock_reader_lock (&http_static_server_main.sessions_lock);
-}
-
-/** \brief Drop reader lock on the sessions pools
- */
-static void
-http_static_server_sessions_reader_unlock (void)
-{
- clib_rwlock_reader_unlock (&http_static_server_main.sessions_lock);
-}
-
-/** \brief Acquire writer lock on the sessions pools
- */
-static void
-http_static_server_sessions_writer_lock (void)
-{
- clib_rwlock_writer_lock (&http_static_server_main.sessions_lock);
-}
-
-/** \brief Drop writer lock on the sessions pools
- */
-static void
-http_static_server_sessions_writer_unlock (void)
-{
- clib_rwlock_writer_unlock (&http_static_server_main.sessions_lock);
-}
-
-/** \brief Start a session cleanup timer
- */
-static void
-http_static_server_session_timer_start (http_session_t * hs)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- u32 hs_handle;
+#define HSS_FIFO_THRESH (16 << 10)
- /* The session layer may fire a callback at a later date... */
- if (!pool_is_free (hsm->sessions[hs->thread_index], hs))
- {
- hs_handle = hs->thread_index << 24 | hs->session_index;
- clib_spinlock_lock (&http_static_server_main.tw_lock);
- hs->timer_handle = tw_timer_start_2t_1w_2048sl
- (&http_static_server_main.tw, hs_handle, 0, 60);
- clib_spinlock_unlock (&http_static_server_main.tw_lock);
- }
-}
+hss_main_t hss_main;
-/** \brief stop a session cleanup timer
- */
-static void
-http_static_server_session_timer_stop (http_session_t * hs)
+static hss_session_t *
+hss_session_alloc (u32 thread_index)
{
- if (hs->timer_handle == ~0)
- return;
- clib_spinlock_lock (&http_static_server_main.tw_lock);
- tw_timer_stop_2t_1w_2048sl (&http_static_server_main.tw, hs->timer_handle);
- clib_spinlock_unlock (&http_static_server_main.tw_lock);
-}
+ hss_main_t *hsm = &hss_main;
+ hss_session_t *hs;
-/** \brief Allocate an http session
- */
-static http_session_t *
-http_static_server_session_alloc (u32 thread_index)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- http_session_t *hs;
- pool_get_aligned_zero_numa (hsm->sessions[thread_index], hs,
- 0 /* not aligned */ ,
- 1 /* zero */ ,
- os_get_numa_index ());
+ pool_get_zero (hsm->sessions[thread_index], hs);
hs->session_index = hs - hsm->sessions[thread_index];
hs->thread_index = thread_index;
- hs->timer_handle = ~0;
hs->cache_pool_index = ~0;
return hs;
}
-/** \brief Get an http session by index
- */
-static http_session_t *
-http_static_server_session_get (u32 thread_index, u32 hs_index)
+__clib_export hss_session_t *
+hss_session_get (u32 thread_index, u32 hs_index)
{
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
if (pool_is_free_index (hsm->sessions[thread_index], hs_index))
return 0;
return pool_elt_at_index (hsm->sessions[thread_index], hs_index);
}
-/** \brief Free an http session
- */
static void
-http_static_server_session_free (http_session_t * hs)
+hss_session_free (hss_session_t *hs)
{
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
- /* Make sure the timer is stopped... */
- http_static_server_session_timer_stop (hs);
pool_put (hsm->sessions[hs->thread_index], hs);
if (CLIB_DEBUG)
@@ -173,974 +63,571 @@ http_static_server_session_free (http_session_t * hs)
save_thread_index = hs->thread_index;
/* Poison the entry, preserve timer state and thread index */
memset (hs, 0xfa, sizeof (*hs));
- hs->timer_handle = ~0;
hs->thread_index = save_thread_index;
}
}
-/** \brief add a session to the vpp < -- > http session index map
+/** \brief Disconnect a session
*/
static void
-http_static_server_session_lookup_add (u32 thread_index, u32 s_index,
- u32 hs_index)
+hss_session_disconnect_transport (hss_session_t *hs)
{
- http_static_server_main_t *hsm = &http_static_server_main;
- vec_validate (hsm->session_to_http_session[thread_index], s_index);
- hsm->session_to_http_session[thread_index][s_index] = hs_index;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+ a->handle = hs->vpp_session_handle;
+ a->app_index = hss_main.app_index;
+ vnet_disconnect_session (a);
}
-/** \brief Remove a session from the vpp < -- > http session index map
- */
static void
-http_static_server_session_lookup_del (u32 thread_index, u32 s_index)
+start_send_data (hss_session_t *hs, http_status_code_t status)
{
- http_static_server_main_t *hsm = &http_static_server_main;
- hsm->session_to_http_session[thread_index][s_index] = ~0;
-}
+ http_msg_t msg;
+ session_t *ts;
+ int rv;
-/** \brief lookup a session in the vpp < -- > http session index map
- */
+ ts = session_get (hs->vpp_session_index, hs->thread_index);
-static http_session_t *
-http_static_server_session_lookup (u32 thread_index, u32 s_index)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- u32 hs_index;
+ msg.type = HTTP_MSG_REPLY;
+ msg.code = status;
+ msg.content_type = hs->content_type;
+ msg.data.len = hs->data_len;
- if (s_index < vec_len (hsm->session_to_http_session[thread_index]))
+ if (hs->data_len > hss_main.use_ptr_thresh)
{
- hs_index = hsm->session_to_http_session[thread_index][s_index];
- return http_static_server_session_get (thread_index, hs_index);
- }
- return 0;
-}
-
-/** \brief Detach cache entry from session
- */
+ msg.data.type = HTTP_MSG_DATA_PTR;
+ rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
-static void
-http_static_server_detach_cache_entry (http_session_t * hs)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- file_data_cache_t *ep;
+ uword data = pointer_to_uword (hs->data);
+ rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (data), (u8 *) &data);
+ ASSERT (rv == sizeof (sizeof (data)));
- /*
- * Decrement cache pool entry reference count
- * Note that if e.g. a file lookup fails, the cache pool index
- * won't be set
- */
- if (hs->cache_pool_index != ~0)
- {
- ep = pool_elt_at_index (hsm->cache_pool, hs->cache_pool_index);
- ep->inuse--;
- if (hsm->debug_level > 1)
- clib_warning ("index %d refcnt now %d", hs->cache_pool_index,
- ep->inuse);
+ goto done;
}
- hs->cache_pool_index = ~0;
- if (hs->free_data)
- vec_free (hs->data);
- hs->data = 0;
- hs->data_offset = 0;
- hs->free_data = 0;
- vec_free (hs->path);
-}
-/** \brief Disconnect a session
- */
-static void
-http_static_server_session_disconnect (http_session_t * hs)
-{
- vnet_disconnect_args_t _a = { 0 }, *a = &_a;
- a->handle = hs->vpp_session_handle;
- a->app_index = http_static_server_main.app_index;
- vnet_disconnect_session (a);
-}
+ msg.data.type = HTTP_MSG_DATA_INLINE;
-/* *INDENT-OFF* */
-/** \brief http error boilerplate
- */
-static const char *http_error_template =
- "HTTP/1.1 %s\r\n"
- "Date: %U GMT\r\n"
- "Content-Type: text/html\r\n"
- "Connection: close\r\n"
- "Pragma: no-cache\r\n"
- "Content-Length: 0\r\n\r\n";
-
-/** \brief http response boilerplate
- */
-static const char *http_response_template =
- "Date: %U GMT\r\n"
- "Expires: %U GMT\r\n"
- "Server: VPP Static\r\n"
- "Content-Type: %s\r\n"
- "Content-Length: %d\r\n\r\n";
-
-/* *INDENT-ON* */
-
-/** \brief send http data
- @param hs - http session
- @param data - the data vector to transmit
- @param length - length of data
- @param offset - transmit offset for this operation
- @return offset for next transmit operation, may be unchanged w/ full fifo
-*/
+ rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
-static u32
-static_send_data (http_session_t * hs, u8 * data, u32 length, u32 offset)
-{
- u32 bytes_to_send;
- http_static_server_main_t *hsm = &http_static_server_main;
+ if (!msg.data.len)
+ goto done;
- bytes_to_send = length - offset;
+ rv = svm_fifo_enqueue (ts->tx_fifo, hs->data_len, hs->data);
- while (bytes_to_send > 0)
+ if (rv != hs->data_len)
{
- int actual_transfer;
-
- actual_transfer = svm_fifo_enqueue
- (hs->tx_fifo, bytes_to_send, data + offset);
-
- /* Made any progress? */
- if (actual_transfer <= 0)
- {
- if (hsm->debug_level > 0 && bytes_to_send > 0)
- clib_warning ("WARNING: still %d bytes to send", bytes_to_send);
- return offset;
- }
- else
- {
- offset += actual_transfer;
- bytes_to_send -= actual_transfer;
-
- if (hsm->debug_level && bytes_to_send > 0)
- clib_warning ("WARNING: still %d bytes to send", bytes_to_send);
-
- if (svm_fifo_set_event (hs->tx_fifo))
- session_send_io_evt_to_thread (hs->tx_fifo,
- SESSION_IO_EVT_TX_FLUSH);
- return offset;
- }
+ hs->data_offset = rv;
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
}
- /* NOTREACHED */
- return ~0;
-}
-/** \brief Send an http error string
- @param hs - the http session
- @param str - the error string, e.g. "404 Not Found"
-*/
-static void
-send_error (http_session_t * hs, char *str)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- u8 *data;
- f64 now;
-
- now = clib_timebase_now (&hsm->timebase);
- data = format (0, http_error_template, str, format_clib_timebase_time, now);
- static_send_data (hs, data, vec_len (data), 0);
- vec_free (data);
+done:
+
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
}
-/** \brief Retrieve data from the application layer
- */
-static int
-session_rx_request (http_session_t * hs)
+__clib_export void
+hss_session_send_data (hss_url_handler_args_t *args)
{
- u32 max_dequeue, cursize;
- int n_read;
+ hss_session_t *hs;
- cursize = vec_len (hs->rx_buf);
- max_dequeue = svm_fifo_max_dequeue (hs->rx_fifo);
- if (PREDICT_FALSE (max_dequeue == 0))
- return -1;
+ hs = hss_session_get (args->sh.thread_index, args->sh.session_index);
+ if (!hs)
+ return;
- vec_validate (hs->rx_buf, cursize + max_dequeue - 1);
- n_read = app_recv_stream_raw (hs->rx_fifo, hs->rx_buf + cursize,
- max_dequeue, 0, 0 /* peek */ );
- ASSERT (n_read == max_dequeue);
- if (svm_fifo_is_empty (hs->rx_fifo))
- svm_fifo_unset_event (hs->rx_fifo);
+ if (hs->data && hs->free_data)
+ vec_free (hs->data);
- _vec_len (hs->rx_buf) = cursize + n_read;
- return 0;
+ hs->data = args->data;
+ hs->data_len = args->data_len;
+ hs->free_data = args->free_vec_data;
+ start_send_data (hs, args->sc);
}
-/** \brief Sanity-check the forward and reverse LRU lists
+/*
+ * path_has_known_suffix()
+ * Returns 1 if the request ends with a known suffix, like .htm or .ico
+ * Used to avoid looking for "/favicon.ico/index.html" or similar.
*/
-static inline void
-lru_validate (http_static_server_main_t * hsm)
+
+static int
+path_has_known_suffix (u8 *request)
{
-#if CLIB_DEBUG > 0
- f64 last_timestamp;
- u32 index;
- int i;
- file_data_cache_t *ep;
-
- last_timestamp = 1e70;
- for (i = 1, index = hsm->first_index; index != ~0;)
- {
- ep = pool_elt_at_index (hsm->cache_pool, index);
- index = ep->next_index;
- /* Timestamps should be smaller (older) as we walk the fwd list */
- if (ep->last_used > last_timestamp)
- {
- clib_warning ("%d[%d]: last used %.6f, last_timestamp %.6f",
- ep - hsm->cache_pool, i,
- ep->last_used, last_timestamp);
- }
- last_timestamp = ep->last_used;
- i++;
- }
+ u8 *ext;
+ uword *p;
- last_timestamp = 0.0;
- for (i = 1, index = hsm->last_index; index != ~0;)
+ if (vec_len (request) == 0)
{
- ep = pool_elt_at_index (hsm->cache_pool, index);
- index = ep->prev_index;
- /* Timestamps should be larger (newer) as we walk the rev list */
- if (ep->last_used < last_timestamp)
- {
- clib_warning ("%d[%d]: last used %.6f, last_timestamp %.6f",
- ep - hsm->cache_pool, i,
- ep->last_used, last_timestamp);
- }
- last_timestamp = ep->last_used;
- i++;
+ return 0;
}
-#endif
-}
-/** \brief Remove a data cache entry from the LRU lists
- */
-static inline void
-lru_remove (http_static_server_main_t * hsm, file_data_cache_t * ep)
-{
- file_data_cache_t *next_ep, *prev_ep;
- u32 ep_index;
+ ext = request + vec_len (request) - 1;
- lru_validate (hsm);
+ while (ext > request && ext[0] != '.')
+ ext--;
- ep_index = ep - hsm->cache_pool;
+ if (ext == request)
+ return 0;
- /* Deal with list heads */
- if (ep_index == hsm->first_index)
- hsm->first_index = ep->next_index;
- if (ep_index == hsm->last_index)
- hsm->last_index = ep->prev_index;
+ p = hash_get_mem (hss_main.mime_type_indices_by_file_extensions, ext);
+ if (p)
+ return 1;
- /* Fix next->prev */
- if (ep->next_index != ~0)
- {
- next_ep = pool_elt_at_index (hsm->cache_pool, ep->next_index);
- next_ep->prev_index = ep->prev_index;
- }
- /* Fix prev->next */
- if (ep->prev_index != ~0)
- {
- prev_ep = pool_elt_at_index (hsm->cache_pool, ep->prev_index);
- prev_ep->next_index = ep->next_index;
- }
- lru_validate (hsm);
+ return 0;
}
-/** \brief Add an entry to the LRU lists, tag w/ supplied timestamp
+/*
+ * content_type_from_request
+ * Returns the index of the request's suffix in the
+ * http-layer http_content_type_str[] array.
*/
-static inline void
-lru_add (http_static_server_main_t * hsm, file_data_cache_t * ep, f64 now)
+static http_content_type_t
+content_type_from_request (u8 *request)
{
- file_data_cache_t *next_ep;
- u32 ep_index;
+ u8 *ext;
+ uword *p;
+ /* default to text/html */
+ http_content_type_t rv = HTTP_CONTENT_TEXT_HTML;
- lru_validate (hsm);
+ ASSERT (vec_len (request) > 0);
- ep_index = ep - hsm->cache_pool;
+ ext = request + vec_len (request) - 1;
- /*
- * Re-add at the head of the forward LRU list,
- * tail of the reverse LRU list
- */
- if (hsm->first_index != ~0)
- {
- next_ep = pool_elt_at_index (hsm->cache_pool, hsm->first_index);
- next_ep->prev_index = ep_index;
- }
+ while (ext > request && ext[0] != '.')
+ ext--;
- ep->prev_index = ~0;
+ if (ext == request)
+ return rv;
- /* ep now the new head of the LRU forward list */
- ep->next_index = hsm->first_index;
- hsm->first_index = ep_index;
+ p = hash_get_mem (hss_main.mime_type_indices_by_file_extensions, ext);
- /* single session case: also the tail of the reverse LRU list */
- if (hsm->last_index == ~0)
- hsm->last_index = ep_index;
- ep->last_used = now;
+ if (p == 0)
+ return rv;
- lru_validate (hsm);
+ rv = p[0];
+ return rv;
}
-/** \brief Remove and re-add a cache entry from/to the LRU lists
- */
-
-static inline void
-lru_update (http_static_server_main_t * hsm, file_data_cache_t * ep, f64 now)
+static int
+try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
+ u8 *request)
{
- lru_remove (hsm, ep);
- lru_add (hsm, ep, now);
-}
+ http_status_code_t sc = HTTP_STATUS_OK;
+ hss_url_handler_args_t args = {};
+ uword *p, *url_table;
+ http_content_type_t type;
+ int rv;
-/** \brief Session-layer (main) data rx callback.
- Parse the http request, and reply to it.
- Future extensions might include POST processing, active content, etc.
-*/
+ if (!hsm->enable_url_handlers || !request)
+ return -1;
-/* svm_fifo_add_want_deq_ntf (tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL)
-get shoulder-tap when transport dequeues something, set in
-xmit routine. */
+ /* zero-length? try "index.html" */
+ if (vec_len (request) == 0)
+ {
+ request = format (request, "index.html");
+ }
-/** \brief closed state - should never really get here
- */
-static int
-state_closed (session_t * s, http_session_t * hs,
- http_state_machine_called_from_t cf)
-{
- clib_warning ("WARNING: http session %d, called from %U",
- hs->session_index, format_state_machine_called_from, cf);
- return -1;
-}
+ type = content_type_from_request (request);
-static void
-close_session (http_session_t * hs)
-{
- http_static_server_session_disconnect (hs);
-}
+ /* Look for built-in GET / POST handlers */
+ url_table =
+ (rt == HTTP_REQ_GET) ? hsm->get_url_handlers : hsm->post_url_handlers;
-/** \brief Register a builtin GET or POST handler
- */
-__clib_export void http_static_server_register_builtin_handler
- (void *fp, char *url, int request_type)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- uword *p, *builtin_table;
+ p = hash_get_mem (url_table, request);
+ if (!p)
+ return -1;
- builtin_table = (request_type == HTTP_BUILTIN_METHOD_GET)
- ? hsm->get_url_handlers : hsm->post_url_handlers;
+ hs->path = 0;
+ hs->data_offset = 0;
+ hs->cache_pool_index = ~0;
- p = hash_get_mem (builtin_table, url);
+ if (hsm->debug_level > 0)
+ clib_warning ("%s '%s'", (rt == HTTP_REQ_GET) ? "GET" : "POST", request);
- if (p)
+ args.reqtype = rt;
+ args.request = request;
+ args.sh.thread_index = hs->thread_index;
+ args.sh.session_index = hs->session_index;
+
+ rv = ((hss_url_handler_fn) p[0]) (&args);
+
+ /* Wait for data from handler */
+ if (rv == HSS_URL_HANDLER_ASYNC)
+ return 0;
+
+ if (rv == HSS_URL_HANDLER_ERROR)
{
- clib_warning ("WARNING: attempt to replace handler for %s '%s' ignored",
- (request_type == HTTP_BUILTIN_METHOD_GET) ?
- "GET" : "POST", url);
- return;
+ clib_warning ("builtin handler %llx hit on %s '%s' but failed!", p[0],
+ (rt == HTTP_REQ_GET) ? "GET" : "POST", request);
+ sc = HTTP_STATUS_NOT_FOUND;
}
- hash_set_mem (builtin_table, url, (uword) fp);
+ hs->data = args.data;
+ hs->data_len = args.data_len;
+ hs->free_data = args.free_vec_data;
+ hs->content_type = type;
- /*
- * Need to update the hash table pointer in http_static_server_main
- * in case we just expanded it...
- */
- if (request_type == HTTP_BUILTIN_METHOD_GET)
- hsm->get_url_handlers = builtin_table;
- else
- hsm->post_url_handlers = builtin_table;
+ start_send_data (hs, sc);
+
+ if (!hs->data)
+ hss_session_disconnect_transport (hs);
+
+ return 0;
}
-static int
-v_find_index (u8 * vec, char *str)
+static u8
+file_path_is_valid (u8 *path)
{
- int start_index;
- u32 slen = (u32) strnlen_s_inline (str, 8);
- u32 vlen = vec_len (vec);
+ struct stat _sb, *sb = &_sb;
- ASSERT (slen > 0);
+ if (stat ((char *) path, sb) < 0 /* can't stat the file */
+ || (sb->st_mode & S_IFMT) != S_IFREG /* not a regular file */)
+ return 0;
- if (vlen <= slen)
- return -1;
+ return 1;
+}
- for (start_index = 0; start_index < (vlen - slen); start_index++)
- {
- if (!memcmp (vec, str, slen))
- return start_index;
- }
+static u32
+try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path)
+{
+ u8 *port_str = 0, *redirect;
+ transport_endpoint_t endpt;
+ transport_proto_t proto;
+ int print_port = 0;
+ u16 local_port;
+ session_t *ts;
+ u32 plen;
+
+ /* Remove the trailing space */
+ vec_dec_len (path, 1);
+ plen = vec_len (path);
+
+ /* Append "index.html" */
+ if (path[plen - 1] != '/')
+ path = format (path, "/index.html%c", 0);
+ else
+ path = format (path, "index.html%c", 0);
- return -1;
-}
+ if (hsm->debug_level > 0)
+ clib_warning ("trying to find index: %s", path);
-/** \brief established state - waiting for GET, POST, etc.
- */
-static int
-state_established (session_t * s, http_session_t * hs,
- http_state_machine_called_from_t cf)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- u8 *request = 0;
- u8 *path;
- int i, rv;
- struct stat _sb, *sb = &_sb;
- clib_error_t *error;
- u8 request_type = HTTP_BUILTIN_METHOD_GET;
- u8 save_byte = 0;
- uword *p, *builtin_table;
+ if (!file_path_is_valid (path))
+ return HTTP_STATUS_NOT_FOUND;
- /* Read data from the sessison layer */
- rv = session_rx_request (hs);
+ /*
+ * We found an index.html file, build a redirect
+ */
+ vec_delete (path, vec_len (hsm->www_root) - 1, 0);
- /* No data? Odd, but stay in this state and await further instructions */
- if (rv)
- return 0;
+ ts = session_get (hs->vpp_session_index, hs->thread_index);
+ session_get_endpoint (ts, &endpt, 1 /* is_local */);
- /* Process the client request */
- request = hs->rx_buf;
- if (vec_len (request) < 8)
- {
- send_error (hs, "400 Bad Request");
- close_session (hs);
- return -1;
- }
+ local_port = clib_net_to_host_u16 (endpt.port);
+ proto = session_type_transport_proto (ts->session_type);
- if ((i = v_find_index (request, "GET ")) >= 0)
- goto find_end;
- else if ((i = v_find_index (request, "POST ")) >= 0)
+ if ((proto == TRANSPORT_PROTO_TCP && local_port != 80) ||
+ (proto == TRANSPORT_PROTO_TLS && local_port != 443))
{
- request_type = HTTP_BUILTIN_METHOD_POST;
- goto find_end;
+ print_port = 1;
+ port_str = format (0, ":%u", (u32) local_port);
}
- if (hsm->debug_level > 1)
- clib_warning ("Unknown http method");
+ redirect =
+ format (0,
+ "Location: http%s://%U%s%s\r\n\r\n",
+ proto == TRANSPORT_PROTO_TLS ? "s" : "", format_ip46_address,
+ &endpt.ip, endpt.is_ip4, print_port ? port_str : (u8 *) "", path);
- send_error (hs, "405 Method Not Allowed");
- close_session (hs);
- return -1;
+ if (hsm->debug_level > 0)
+ clib_warning ("redirect: %s", redirect);
-find_end:
+ vec_free (port_str);
- /* Lose "GET " or "POST " */
- vec_delete (request, i + 5 + request_type, 0);
+ hs->data = redirect;
+ hs->data_len = vec_len (redirect);
+ hs->free_data = 1;
- /* Temporarily drop in a NULL byte for lookup purposes */
- for (i = 0; i < vec_len (request); i++)
- {
- if (request[i] == ' ' || request[i] == '?')
- {
- save_byte = request[i];
- request[i] = 0;
- break;
- }
- }
+ return HTTP_STATUS_MOVED;
+}
+
+static int
+try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
+ u8 *request)
+{
+ http_status_code_t sc = HTTP_STATUS_OK;
+ u8 *path;
+ u32 ce_index;
+ http_content_type_t type;
+
+ /* Feature not enabled */
+ if (!hsm->www_root)
+ return -1;
+
+ type = content_type_from_request (request);
/*
- * Now we can construct the file to open
+ * Construct the file to open
* Browsers are capable of sporadically including a leading '/'
*/
- if (request[0] == '/')
+ if (!request)
+ path = format (0, "%s%c", hsm->www_root, 0);
+ else if (request[0] == '/')
path = format (0, "%s%s%c", hsm->www_root, request, 0);
else
path = format (0, "%s/%s%c", hsm->www_root, request, 0);
if (hsm->debug_level > 0)
- clib_warning ("%s '%s'", (request_type) == HTTP_BUILTIN_METHOD_GET ?
- "GET" : "POST", path);
+ clib_warning ("%s '%s'", (rt == HTTP_REQ_GET) ? "GET" : "POST", path);
- /* Look for built-in GET / POST handlers */
- builtin_table = (request_type == HTTP_BUILTIN_METHOD_GET) ?
- hsm->get_url_handlers : hsm->post_url_handlers;
-
- p = hash_get_mem (builtin_table, request);
-
- if (save_byte != 0)
- request[i] = save_byte;
+ if (hs->data && hs->free_data)
+ vec_free (hs->data);
- if (p)
- {
- int rv;
- int (*fp) (http_builtin_method_type_t, u8 *, http_session_t *);
- fp = (void *) p[0];
- hs->path = path;
- rv = (*fp) (request_type, request, hs);
- if (rv)
- {
- clib_warning ("builtin handler %llx hit on %s '%s' but failed!",
- p[0], (request_type == HTTP_BUILTIN_METHOD_GET) ?
- "GET" : "POST", request);
- send_error (hs, "404 Not Found");
- close_session (hs);
- return -1;
- }
- vec_reset_length (hs->rx_buf);
- goto send_ok;
- }
- vec_reset_length (hs->rx_buf);
- /* poison request, it's not valid anymore */
- request = 0;
- /* The static server itself doesn't do POSTs */
- if (request_type == HTTP_BUILTIN_METHOD_POST)
- {
- send_error (hs, "404 Not Found");
- close_session (hs);
- return -1;
- }
+ hs->data_offset = 0;
- /* Try to find the file. 2x special cases to find index.html */
- if (stat ((char *) path, sb) < 0 /* cant even stat the file */
- || sb->st_size < 20 /* file too small */
- || (sb->st_mode & S_IFMT) != S_IFREG /* not a regular file */ )
+ ce_index =
+ hss_cache_lookup_and_attach (&hsm->cache, path, &hs->data, &hs->data_len);
+ if (ce_index == ~0)
{
- u32 save_length = vec_len (path) - 1;
- /* Try appending "index.html"... */
- _vec_len (path) -= 1;
- path = format (path, "index.html%c", 0);
- if (stat ((char *) path, sb) < 0 /* cant even stat the file */
- || sb->st_size < 20 /* file too small */
- || (sb->st_mode & S_IFMT) != S_IFREG /* not a regular file */ )
+ if (!file_path_is_valid (path))
{
- _vec_len (path) = save_length;
- path = format (path, "/index.html%c", 0);
-
- /* Send a redirect, otherwise the browser will confuse itself */
- if (stat ((char *) path, sb) < 0 /* cant even stat the file */
- || sb->st_size < 20 /* file too small */
- || (sb->st_mode & S_IFMT) != S_IFREG /* not a regular file */ )
- {
- vec_free (path);
- send_error (hs, "404 Not Found");
- close_session (hs);
- return -1;
- }
- else
+ /*
+ * Generate error 404 right now if we can't find a path with
+ * a known file extension. It's silly to look for
+ * "favicon.ico/index.html" if you can't find
+ * "favicon.ico"; realistic example which used to happen.
+ */
+ if (path_has_known_suffix (path))
{
- transport_endpoint_t endpoint;
- transport_proto_t proto;
- u16 local_port;
- int print_port = 0;
- u8 *port_str = 0;
-
- /*
- * To make this bit work correctly, we need to know our local
- * IP address, etc. and send it in the redirect...
- */
- u8 *redirect;
-
- vec_delete (path, vec_len (hsm->www_root) - 1, 0);
-
- session_get_endpoint (s, &endpoint, 1 /* is_local */ );
-
- local_port = clib_net_to_host_u16 (endpoint.port);
-
- proto = session_type_transport_proto (s->session_type);
-
- if ((proto == TRANSPORT_PROTO_TCP && local_port != 80)
- || (proto == TRANSPORT_PROTO_TLS && local_port != 443))
- {
- print_port = 1;
- port_str = format (0, ":%u", (u32) local_port);
- }
-
- redirect = format (0, "HTTP/1.1 301 Moved Permanently\r\n"
- "Location: http%s://%U%s%s\r\n\r\n",
- proto == TRANSPORT_PROTO_TLS ? "s" : "",
- format_ip46_address, &endpoint.ip,
- endpoint.is_ip4,
- print_port ? port_str : (u8 *) "", path);
- if (hsm->debug_level > 0)
- clib_warning ("redirect: %s", redirect);
-
- vec_free (port_str);
-
- static_send_data (hs, redirect, vec_len (redirect), 0);
- hs->session_state = HTTP_STATE_CLOSED;
- hs->path = 0;
- vec_free (redirect);
- vec_free (path);
- close_session (hs);
- return -1;
+ sc = HTTP_STATUS_NOT_FOUND;
+ goto done;
}
+ sc = try_index_file (hsm, hs, path);
+ goto done;
+ }
+ ce_index =
+ hss_cache_add_and_attach (&hsm->cache, path, &hs->data, &hs->data_len);
+ if (ce_index == ~0)
+ {
+ sc = HTTP_STATUS_INTERNAL_ERROR;
+ goto done;
}
}
- /* find or read the file if we haven't done so yet. */
- if (hs->data == 0)
- {
- BVT (clib_bihash_kv) kv;
- file_data_cache_t *dp;
+ hs->path = path;
+ hs->cache_pool_index = ce_index;
- hs->path = path;
+done:
- /* First, try the cache */
- kv.key = (u64) hs->path;
- if (BV (clib_bihash_search) (&hsm->name_to_data, &kv, &kv) == 0)
- {
- if (hsm->debug_level > 1)
- clib_warning ("lookup '%s' returned %lld", kv.key, kv.value);
-
- /* found the data.. */
- dp = pool_elt_at_index (hsm->cache_pool, kv.value);
- hs->data = dp->data;
- /* Update the cache entry, mark it in-use */
- lru_update (hsm, dp, vlib_time_now (vlib_get_main ()));
- hs->cache_pool_index = dp - hsm->cache_pool;
- dp->inuse++;
- if (hsm->debug_level > 1)
- clib_warning ("index %d refcnt now %d", hs->cache_pool_index,
- dp->inuse);
- }
- else
- {
- if (hsm->debug_level > 1)
- clib_warning ("lookup '%s' failed", kv.key, kv.value);
- /* Need to recycle one (or more cache) entries? */
- if (hsm->cache_size > hsm->cache_limit)
- {
- int free_index = hsm->last_index;
-
- while (free_index != ~0)
- {
- /* pick the LRU */
- dp = pool_elt_at_index (hsm->cache_pool, free_index);
- free_index = dp->prev_index;
- /* Which could be in use... */
- if (dp->inuse)
- {
- if (hsm->debug_level > 1)
- clib_warning ("index %d in use refcnt %d",
- dp - hsm->cache_pool, dp->inuse);
-
- }
- kv.key = (u64) (dp->filename);
- kv.value = ~0ULL;
- if (BV (clib_bihash_add_del) (&hsm->name_to_data, &kv,
- 0 /* is_add */ ) < 0)
- {
- clib_warning ("LRU delete '%s' FAILED!", dp->filename);
- }
- else if (hsm->debug_level > 1)
- clib_warning ("LRU delete '%s' ok", dp->filename);
-
- lru_remove (hsm, dp);
- hsm->cache_size -= vec_len (dp->data);
- hsm->cache_evictions++;
- vec_free (dp->filename);
- vec_free (dp->data);
- if (hsm->debug_level > 1)
- clib_warning ("pool put index %d", dp - hsm->cache_pool);
- pool_put (hsm->cache_pool, dp);
- if (hsm->cache_size < hsm->cache_limit)
- break;
- }
- }
+ hs->content_type = type;
+ start_send_data (hs, sc);
+ if (!hs->data)
+ hss_session_disconnect_transport (hs);
- /* Read the file */
- error = clib_file_contents ((char *) (hs->path), &hs->data);
- if (error)
- {
- clib_warning ("Error reading '%s'", hs->path);
- clib_error_report (error);
- vec_free (hs->path);
- close_session (hs);
- return -1;
- }
- /* Create a cache entry for it */
- pool_get (hsm->cache_pool, dp);
- memset (dp, 0, sizeof (*dp));
- dp->filename = vec_dup (hs->path);
- dp->data = hs->data;
- hs->cache_pool_index = dp - hsm->cache_pool;
- dp->inuse++;
- if (hsm->debug_level > 1)
- clib_warning ("index %d refcnt now %d", hs->cache_pool_index,
- dp->inuse);
- lru_add (hsm, dp, vlib_time_now (vlib_get_main ()));
- kv.key = (u64) vec_dup (hs->path);
- kv.value = dp - hsm->cache_pool;
- /* Add to the lookup table */
- if (hsm->debug_level > 1)
- clib_warning ("add '%s' value %lld", kv.key, kv.value);
-
- if (BV (clib_bihash_add_del) (&hsm->name_to_data, &kv,
- 1 /* is_add */ ) < 0)
- {
- clib_warning ("BUG: add failed!");
- }
- hsm->cache_size += vec_len (dp->data);
- }
- hs->data_offset = 0;
- }
- /* send 200 OK first */
-send_ok:
- static_send_data (hs, (u8 *) "HTTP/1.1 200 OK\r\n", 17, 0);
- hs->session_state = HTTP_STATE_OK_SENT;
- return 1;
+ return 0;
}
static int
-state_send_more_data (session_t * s, http_session_t * hs,
- http_state_machine_called_from_t cf)
+handle_request (hss_session_t *hs, http_req_method_t rt, u8 *request)
{
+ hss_main_t *hsm = &hss_main;
- /* Start sending data */
- hs->data_offset = static_send_data (hs, hs->data, vec_len (hs->data),
- hs->data_offset);
+ if (!try_url_handler (hsm, hs, rt, request))
+ return 0;
- /* Did we finish? */
- if (hs->data_offset < vec_len (hs->data))
- {
- /* No: ask for a shoulder-tap when the tx fifo has space */
- svm_fifo_add_want_deq_ntf (hs->tx_fifo,
- SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL);
- hs->session_state = HTTP_STATE_SEND_MORE_DATA;
- return 0;
- }
- /* Finished with this transaction, back to HTTP_STATE_ESTABLISHED */
+ if (!try_file_handler (hsm, hs, rt, request))
+ return 0;
+
+ /* Handler did not find anything return 404 */
+ start_send_data (hs, HTTP_STATUS_NOT_FOUND);
+ hss_session_disconnect_transport (hs);
- /* Let go of the file cache entry */
- http_static_server_detach_cache_entry (hs);
- hs->session_state = HTTP_STATE_ESTABLISHED;
return 0;
}
static int
-state_sent_ok (session_t * s, http_session_t * hs,
- http_state_machine_called_from_t cf)
+hss_ts_rx_callback (session_t *ts)
{
- http_static_server_main_t *hsm = &http_static_server_main;
- char *suffix;
- char *http_type;
- u8 *http_response;
- f64 now;
- u32 offset;
-
- /* What kind of dog food are we serving? */
- suffix = (char *) (hs->path + vec_len (hs->path) - 1);
- while ((u8 *) suffix >= hs->path && *suffix != '.')
- suffix--;
- suffix++;
- http_type = "text/html";
- if (!clib_strcmp (suffix, "css"))
- http_type = "text/css";
- else if (!clib_strcmp (suffix, "js"))
- http_type = "text/javascript";
- else if (!clib_strcmp (suffix, "json"))
- http_type = "application/json";
-
- if (hs->data == 0)
+ hss_session_t *hs;
+ u8 *request = 0;
+ http_msg_t msg;
+ int rv;
+
+ hs = hss_session_get (ts->thread_index, ts->opaque);
+
+ /* Read the http message header */
+ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (msg.type != HTTP_MSG_REQUEST ||
+ (msg.method_type != HTTP_REQ_GET && msg.method_type != HTTP_REQ_POST))
{
- clib_warning ("BUG: hs->data not set for session %d",
- hs->session_index);
- close_session (hs);
+ hs->data = 0;
+ start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED);
return 0;
}
- /*
- * Send an http response, which needs the current time,
- * the expiration time, and the data length
- */
- now = clib_timebase_now (&hsm->timebase);
- http_response = format (0, http_response_template,
- /* Date */
- format_clib_timebase_time, now,
- /* Expires */
- format_clib_timebase_time, now + 600.0,
- http_type, vec_len (hs->data));
- offset = static_send_data (hs, http_response, vec_len (http_response), 0);
- if (offset != vec_len (http_response))
+ /* Read request */
+ if (msg.data.len)
{
- clib_warning ("BUG: couldn't send response header!");
- close_session (hs);
- return 0;
+ vec_validate (request, msg.data.len - 1);
+ rv = svm_fifo_dequeue (ts->rx_fifo, msg.data.len, request);
+ ASSERT (rv == msg.data.len);
+ /* request must be a proper C-string in addition to a vector */
+ vec_add1 (request, 0);
}
- vec_free (http_response);
- /* Send data from the beginning... */
- hs->data_offset = 0;
- hs->session_state = HTTP_STATE_SEND_MORE_DATA;
- return 1;
-}
+ /* Find and send data */
+ handle_request (hs, msg.method_type, request);
-static void *state_funcs[HTTP_STATE_N_STATES] = {
- state_closed,
- /* Waiting for GET, POST, etc. */
- state_established,
- /* Sent OK */
- state_sent_ok,
- /* Send more data */
- state_send_more_data,
-};
+ vec_free (request);
-static inline int
-http_static_server_rx_tx_callback (session_t * s,
- http_state_machine_called_from_t cf)
+ return 0;
+}
+
+static int
+hss_ts_tx_callback (session_t *ts)
{
- http_session_t *hs;
- int (*fp) (session_t *, http_session_t *, http_state_machine_called_from_t);
+ hss_session_t *hs;
+ u32 to_send;
int rv;
- /* Acquire a reader lock on the session table */
- http_static_server_sessions_reader_lock ();
- hs = http_static_server_session_lookup (s->thread_index, s->session_index);
+ hs = hss_session_get (ts->thread_index, ts->opaque);
+ if (!hs || !hs->data)
+ return 0;
- if (!hs)
+ to_send = hs->data_len - hs->data_offset;
+ rv = svm_fifo_enqueue (ts->tx_fifo, to_send, hs->data + hs->data_offset);
+
+ if (rv <= 0)
{
- clib_warning ("No http session for thread %d session_index %d",
- s->thread_index, s->session_index);
- http_static_server_sessions_reader_unlock ();
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
return 0;
}
- /* Execute state machine for this session */
- do
+ if (rv < to_send)
{
- fp = state_funcs[hs->session_state];
- rv = (*fp) (s, hs, cf);
- if (rv < 0)
- goto session_closed;
+ hs->data_offset += rv;
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
}
- while (rv);
- /* Reset the session expiration timer */
- http_static_server_session_timer_stop (hs);
- http_static_server_session_timer_start (hs);
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
-session_closed:
- http_static_server_sessions_reader_unlock ();
return 0;
}
-static int
-http_static_server_rx_callback (session_t * s)
-{
- return http_static_server_rx_tx_callback (s, CALLED_FROM_RX);
-}
-
-static int
-http_static_server_tx_callback (session_t * s)
-{
- return http_static_server_rx_tx_callback (s, CALLED_FROM_TX);
-}
-
-
/** \brief Session accept callback
*/
-
static int
-http_static_server_session_accept_callback (session_t * s)
+hss_ts_accept_callback (session_t *ts)
{
- http_static_server_main_t *hsm = &http_static_server_main;
- http_session_t *hs;
-
- hsm->vpp_queue[s->thread_index] =
- session_main_get_vpp_event_queue (s->thread_index);
+ hss_session_t *hs;
+ u32 thresh;
- http_static_server_sessions_writer_lock ();
+ hs = hss_session_alloc (ts->thread_index);
- hs = http_static_server_session_alloc (s->thread_index);
- http_static_server_session_lookup_add (s->thread_index, s->session_index,
- hs->session_index);
- hs->rx_fifo = s->rx_fifo;
- hs->tx_fifo = s->tx_fifo;
- hs->vpp_session_index = s->session_index;
- hs->vpp_session_handle = session_handle (s);
- hs->session_state = HTTP_STATE_ESTABLISHED;
- http_static_server_session_timer_start (hs);
+ hs->vpp_session_index = ts->session_index;
+ hs->vpp_session_handle = session_handle (ts);
- http_static_server_sessions_writer_unlock ();
+ /* The application sets a threshold for it's fifo to get notified when
+ * additional data can be enqueued. We want to keep the TX fifo reasonably
+ * full, however avoid entering a state where the
+ * fifo is full all the time and small chunks of data are being enqueued
+ * each time. If the fifo is small (under 16K) we set
+ * the threshold to it's size, meaning a notification will be given when the
+ * fifo empties.
+ */
+ thresh = clib_min (svm_fifo_size (ts->tx_fifo), HSS_FIFO_THRESH);
+ svm_fifo_set_deq_thresh (ts->tx_fifo, thresh);
- s->session_state = SESSION_STATE_READY;
+ ts->opaque = hs->session_index;
+ ts->session_state = SESSION_STATE_READY;
return 0;
}
-/** \brief Session disconnect callback
- */
-
static void
-http_static_server_session_disconnect_callback (session_t * s)
+hss_ts_disconnect_callback (session_t *ts)
{
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
vnet_disconnect_args_t _a = { 0 }, *a = &_a;
- a->handle = session_handle (s);
+ a->handle = session_handle (ts);
a->app_index = hsm->app_index;
vnet_disconnect_session (a);
}
-/** \brief Session reset callback
- */
-
static void
-http_static_server_session_reset_callback (session_t * s)
+hss_ts_reset_callback (session_t *ts)
{
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
vnet_disconnect_args_t _a = { 0 }, *a = &_a;
- a->handle = session_handle (s);
+ a->handle = session_handle (ts);
a->app_index = hsm->app_index;
vnet_disconnect_session (a);
}
static int
-http_static_server_session_connected_callback (u32 app_index, u32 api_context,
- session_t * s,
- session_error_t err)
+hss_ts_connected_callback (u32 app_index, u32 api_context, session_t *ts,
+ session_error_t err)
{
clib_warning ("called...");
return -1;
}
static int
-http_static_server_add_segment_callback (u32 client_index, u64 segment_handle)
+hss_add_segment_callback (u32 client_index, u64 segment_handle)
{
return 0;
}
static void
-http_static_session_cleanup (session_t * s, session_cleanup_ntf_t ntf)
+hss_ts_cleanup (session_t *s, session_cleanup_ntf_t ntf)
{
- http_session_t *hs;
+ hss_main_t *hsm = &hss_main;
+ hss_session_t *hs;
if (ntf == SESSION_CLEANUP_TRANSPORT)
return;
- http_static_server_sessions_writer_lock ();
-
- hs = http_static_server_session_lookup (s->thread_index, s->session_index);
+ hs = hss_session_get (s->thread_index, s->opaque);
if (!hs)
- goto done;
+ return;
- http_static_server_detach_cache_entry (hs);
- http_static_server_session_lookup_del (hs->thread_index,
- hs->vpp_session_index);
- vec_free (hs->rx_buf);
- http_static_server_session_free (hs);
+ if (hs->cache_pool_index != ~0)
+ {
+ hss_cache_detach_entry (&hsm->cache, hs->cache_pool_index);
+ hs->cache_pool_index = ~0;
+ }
-done:
- http_static_server_sessions_writer_unlock ();
+ if (hs->free_data)
+ vec_free (hs->data);
+ hs->data = 0;
+ hs->data_offset = 0;
+ hs->free_data = 0;
+ vec_free (hs->path);
+
+ hss_session_free (hs);
}
-/** \brief Session-layer virtual function table
- */
-static session_cb_vft_t http_static_server_session_cb_vft = {
- .session_accept_callback = http_static_server_session_accept_callback,
- .session_disconnect_callback =
- http_static_server_session_disconnect_callback,
- .session_connected_callback = http_static_server_session_connected_callback,
- .add_segment_callback = http_static_server_add_segment_callback,
- .builtin_app_rx_callback = http_static_server_rx_callback,
- .builtin_app_tx_callback = http_static_server_tx_callback,
- .session_reset_callback = http_static_server_session_reset_callback,
- .session_cleanup_callback = http_static_session_cleanup,
+static session_cb_vft_t hss_cb_vft = {
+ .session_accept_callback = hss_ts_accept_callback,
+ .session_disconnect_callback = hss_ts_disconnect_callback,
+ .session_connected_callback = hss_ts_connected_callback,
+ .add_segment_callback = hss_add_segment_callback,
+ .builtin_app_rx_callback = hss_ts_rx_callback,
+ .builtin_app_tx_callback = hss_ts_tx_callback,
+ .session_reset_callback = hss_ts_reset_callback,
+ .session_cleanup_callback = hss_ts_cleanup,
};
static int
-http_static_server_attach ()
+hss_attach ()
{
vnet_app_add_cert_key_pair_args_t _ck_pair, *ck_pair = &_ck_pair;
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
u64 options[APP_OPTIONS_N_OPTIONS];
vnet_app_attach_args_t _a, *a = &_a;
u32 segment_size = 128 << 20;
@@ -1152,8 +639,8 @@ http_static_server_attach ()
segment_size = hsm->private_segment_size;
a->api_client_index = ~0;
- a->name = format (0, "test_http_static_server");
- a->session_cb_vft = &http_static_server_session_cb_vft;
+ a->name = format (0, "http_static_server");
+ a->session_cb_vft = &hss_cb_vft;
a->options = options;
a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size;
@@ -1186,19 +673,20 @@ http_static_server_attach ()
}
static int
-http_static_transport_needs_crypto (transport_proto_t proto)
+hss_transport_needs_crypto (transport_proto_t proto)
{
return proto == TRANSPORT_PROTO_TLS || proto == TRANSPORT_PROTO_DTLS ||
proto == TRANSPORT_PROTO_QUIC;
}
static int
-http_static_server_listen ()
+hss_listen (void)
{
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
vnet_listen_args_t _a, *a = &_a;
char *uri = "tcp://0.0.0.0/80";
+ u8 need_crypto;
int rv;
clib_memset (a, 0, sizeof (*a));
@@ -1210,8 +698,12 @@ http_static_server_listen ()
if (parse_uri (uri, &sep))
return -1;
+ need_crypto = hss_transport_needs_crypto (sep.transport_proto);
+
+ sep.transport_proto = TRANSPORT_PROTO_HTTP;
clib_memcpy (&a->sep_ext, &sep, sizeof (sep));
- if (http_static_transport_needs_crypto (a->sep_ext.transport_proto))
+
+ if (need_crypto)
{
session_endpoint_alloc_ext_cfg (&a->sep_ext,
TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
@@ -1219,257 +711,142 @@ http_static_server_listen ()
}
rv = vnet_listen (a);
- if (a->sep_ext.ext_cfg)
+
+ if (need_crypto)
clib_mem_free (a->sep_ext.ext_cfg);
- return rv;
-}
-static void
-http_static_server_session_close_cb (void *hs_handlep)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- http_session_t *hs;
- uword hs_handle;
- hs_handle = pointer_to_uword (hs_handlep);
- hs =
- http_static_server_session_get (hs_handle >> 24, hs_handle & 0x00FFFFFF);
-
- if (hsm->debug_level > 1)
- clib_warning ("terminate thread %d index %d hs %llx",
- hs_handle >> 24, hs_handle & 0x00FFFFFF, hs);
- if (!hs)
- return;
- hs->timer_handle = ~0;
- http_static_server_session_disconnect (hs);
+ return rv;
}
-/** \brief Expired session timer-wheel callback
- */
static void
-http_expired_timers_dispatch (u32 * expired_timers)
+hss_url_handlers_init (hss_main_t *hsm)
{
- u32 hs_handle;
- int i;
-
- for (i = 0; i < vec_len (expired_timers); i++)
+ if (!hsm->get_url_handlers)
{
- /* Get session handle. The first bit is the timer id */
- hs_handle = expired_timers[i] & 0x7FFFFFFF;
- session_send_rpc_evt_to_thread (hs_handle >> 24,
- http_static_server_session_close_cb,
- uword_to_pointer (hs_handle, void *));
+ hsm->get_url_handlers = hash_create_string (0, sizeof (uword));
+ hsm->post_url_handlers = hash_create_string (0, sizeof (uword));
}
-}
-
-/** \brief Timer-wheel expiration process
- */
-static uword
-http_static_server_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
- vlib_frame_t * f)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- f64 now, timeout = 1.0;
- uword *event_data = 0;
- uword __clib_unused event_type;
-
- while (1)
- {
- vlib_process_wait_for_event_or_clock (vm, timeout);
- now = vlib_time_now (vm);
- event_type = vlib_process_get_events (vm, (uword **) & event_data);
-
- /* expire timers */
- clib_spinlock_lock (&http_static_server_main.tw_lock);
- tw_timer_expire_timers_2t_1w_2048sl (&hsm->tw, now);
- clib_spinlock_unlock (&http_static_server_main.tw_lock);
- vec_reset_length (event_data);
- }
- return 0;
+ hss_builtinurl_json_handlers_init ();
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (http_static_server_process_node) =
-{
- .function = http_static_server_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "static-http-server-process",
- .state = VLIB_NODE_STATE_DISABLED,
-};
-/* *INDENT-ON* */
-
-static int
-http_static_server_create (vlib_main_t * vm)
+int
+hss_create (vlib_main_t *vm)
{
vlib_thread_main_t *vtm = vlib_get_thread_main ();
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
u32 num_threads;
- vlib_node_t *n;
num_threads = 1 /* main thread */ + vtm->n_threads;
- vec_validate (hsm->vpp_queue, num_threads - 1);
vec_validate (hsm->sessions, num_threads - 1);
- vec_validate (hsm->session_to_http_session, num_threads - 1);
- clib_rwlock_init (&hsm->sessions_lock);
- clib_spinlock_init (&hsm->tw_lock);
-
- if (http_static_server_attach ())
+ if (hss_attach ())
{
clib_warning ("failed to attach server");
return -1;
}
- if (http_static_server_listen ())
+ if (hss_listen ())
{
clib_warning ("failed to start listening");
return -1;
}
- /* Init path-to-cache hash table */
- BV (clib_bihash_init) (&hsm->name_to_data, "http cache", 128, 32 << 20);
-
- hsm->get_url_handlers = hash_create_string (0, sizeof (uword));
- hsm->post_url_handlers = hash_create_string (0, sizeof (uword));
+ if (hsm->www_root)
+ hss_cache_init (&hsm->cache, hsm->cache_size, hsm->debug_level);
- /* Init timer wheel and process */
- tw_timer_wheel_init_2t_1w_2048sl (&hsm->tw, http_expired_timers_dispatch,
- 1.0 /* timer interval */ , ~0);
- vlib_node_set_state (vm, http_static_server_process_node.index,
- VLIB_NODE_STATE_POLLING);
- n = vlib_get_node (vm, http_static_server_process_node.index);
- vlib_start_process (vm, n->runtime_index);
+ if (hsm->enable_url_handlers)
+ hss_url_handlers_init (hsm);
return 0;
}
-/** \brief API helper function for vl_api_http_static_enable_t messages
- */
-int
-http_static_server_enable_api (u32 fifo_size, u32 cache_limit,
- u32 prealloc_fifos,
- u32 private_segment_size,
- u8 * www_root, u8 * uri)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- int rv;
-
- hsm->fifo_size = fifo_size;
- hsm->cache_limit = cache_limit;
- hsm->prealloc_fifos = prealloc_fifos;
- hsm->private_segment_size = private_segment_size;
- hsm->www_root = format (0, "%s%c", www_root, 0);
- hsm->uri = format (0, "%s%c", uri, 0);
-
- if (vec_len (hsm->www_root) < 2)
- return VNET_API_ERROR_INVALID_VALUE;
-
- if (hsm->my_client_index != ~0)
- return VNET_API_ERROR_APP_ALREADY_ATTACHED;
-
- vnet_session_enable_disable (hsm->vlib_main, 1 /* turn on TCP, etc. */ );
-
- rv = http_static_server_create (hsm->vlib_main);
- switch (rv)
- {
- case 0:
- break;
- default:
- vec_free (hsm->www_root);
- vec_free (hsm->uri);
- return VNET_API_ERROR_INIT_FAILED;
- }
- return 0;
-}
-
static clib_error_t *
-http_static_server_create_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
- http_static_server_main_t *hsm = &http_static_server_main;
unformat_input_t _line_input, *line_input = &_line_input;
+ hss_main_t *hsm = &hss_main;
+ clib_error_t *error = 0;
u64 seg_size;
- u8 *www_root = 0;
int rv;
+ if (hsm->app_index != (u32) ~0)
+ return clib_error_return (0, "http server already running...");
+
hsm->prealloc_fifos = 0;
hsm->private_segment_size = 0;
hsm->fifo_size = 0;
- /* 10mb cache limit, before LRU occurs */
- hsm->cache_limit = 10 << 20;
+ hsm->cache_size = 10 << 20;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
- goto no_wwwroot;
+ goto no_input;
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "www-root %s", &www_root))
+ if (unformat (line_input, "www-root %s", &hsm->www_root))
;
else
if (unformat (line_input, "prealloc-fifos %d", &hsm->prealloc_fifos))
;
else if (unformat (line_input, "private-segment-size %U",
unformat_memory_size, &seg_size))
- {
- if (seg_size >= 0x100000000ULL)
- {
- vlib_cli_output (vm, "private segment size %llu, too large",
- seg_size);
- return 0;
- }
- hsm->private_segment_size = seg_size;
- }
+ hsm->private_segment_size = seg_size;
else if (unformat (line_input, "fifo-size %d", &hsm->fifo_size))
hsm->fifo_size <<= 10;
else if (unformat (line_input, "cache-size %U", unformat_memory_size,
- &hsm->cache_limit))
- {
- if (hsm->cache_limit < (128 << 10))
- {
- return clib_error_return (0,
- "cache-size must be at least 128kb");
- }
- }
-
+ &hsm->cache_size))
+ ;
else if (unformat (line_input, "uri %s", &hsm->uri))
;
else if (unformat (line_input, "debug %d", &hsm->debug_level))
;
else if (unformat (line_input, "debug"))
hsm->debug_level = 1;
+ else if (unformat (line_input, "ptr-thresh %U", unformat_memory_size,
+ &hsm->use_ptr_thresh))
+ ;
+ else if (unformat (line_input, "url-handlers"))
+ hsm->enable_url_handlers = 1;
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
}
+
unformat_free (line_input);
- if (www_root == 0)
+no_input:
+
+ if (error)
+ goto done;
+
+ if (hsm->www_root == 0 && !hsm->enable_url_handlers)
{
- no_wwwroot:
- return clib_error_return (0, "Must specify www-root <path>");
+ error = clib_error_return (0, "Must set www-root or url-handlers");
+ goto done;
}
- if (hsm->my_client_index != (u32) ~ 0)
+ if (hsm->cache_size < (128 << 10))
{
- vec_free (www_root);
- return clib_error_return (0, "http server already running...");
+ error = clib_error_return (0, "cache-size must be at least 128kb");
+ vec_free (hsm->www_root);
+ goto done;
}
- hsm->www_root = www_root;
-
vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
- rv = http_static_server_create (vm);
- switch (rv)
+ if ((rv = hss_create (vm)))
{
- case 0:
- break;
- default:
+ error = clib_error_return (0, "server_create returned %d", rv);
vec_free (hsm->www_root);
- return clib_error_return (0, "server_create returned %d", rv);
}
- return 0;
+
+done:
+
+ return error;
}
/*?
@@ -1484,92 +861,33 @@ http_static_server_create_command_fn (vlib_main_t * vm,
* @cliexcmd{http static server www-root <path> [prealloc-fios <nn>]
* [private-segment-size <nnMG>] [fifo-size <nbytes>] [uri <uri>]}
?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (http_static_server_create_command, static) =
-{
+VLIB_CLI_COMMAND (hss_create_command, static) = {
.path = "http static server",
- .short_help = "http static server www-root <path> [prealloc-fifos <nn>]\n"
- "[private-segment-size <nnMG>] [fifo-size <nbytes>] [uri <uri>]\n"
- "[debug [nn]]\n",
- .function = http_static_server_create_command_fn,
+ .short_help =
+ "http static server www-root <path> [prealloc-fifos <nn>]\n"
+ "[private-segment-size <nnMG>] [fifo-size <nbytes>] [uri <uri>]\n"
+ "[ptr-thresh <nn>] [url-handlers] [debug [nn]]\n",
+ .function = hss_create_command_fn,
};
-/* *INDENT-ON* */
-
-/** \brief format a file cache entry
- */
-u8 *
-format_hsm_cache_entry (u8 * s, va_list * args)
-{
- file_data_cache_t *ep = va_arg (*args, file_data_cache_t *);
- f64 now = va_arg (*args, f64);
-
- /* Header */
- if (ep == 0)
- {
- s = format (s, "%40s%12s%20s", "File", "Size", "Age");
- return s;
- }
- s = format (s, "%40s%12lld%20.2f", ep->filename, vec_len (ep->data),
- now - ep->last_used);
- return s;
-}
-u8 *
-format_http_session_state (u8 * s, va_list * args)
-{
- http_session_state_t state = va_arg (*args, http_session_state_t);
- char *state_string = "bogus!";
-
- switch (state)
- {
- case HTTP_STATE_CLOSED:
- state_string = "closed";
- break;
- case HTTP_STATE_ESTABLISHED:
- state_string = "established";
- break;
- case HTTP_STATE_OK_SENT:
- state_string = "ok sent";
- break;
- case HTTP_STATE_SEND_MORE_DATA:
- state_string = "send more data";
- break;
- default:
- break;
- }
-
- return format (s, "%s", state_string);
-}
-
-u8 *
-format_http_session (u8 * s, va_list * args)
+static u8 *
+format_hss_session (u8 *s, va_list *args)
{
- http_session_t *hs = va_arg (*args, http_session_t *);
- int verbose = va_arg (*args, int);
+ hss_session_t *hs = va_arg (*args, hss_session_t *);
+ int __clib_unused verbose = va_arg (*args, int);
- s = format (s, "[%d]: state %U", hs->session_index,
- format_http_session_state, hs->session_state);
- if (verbose > 0)
- {
- s = format (s, "\n path %s, data length %u, data_offset %u",
- hs->path ? hs->path : (u8 *) "[none]",
- vec_len (hs->data), hs->data_offset);
- }
+ s = format (s, "\n path %s, data length %u, data_offset %u",
+ hs->path ? hs->path : (u8 *) "[none]", hs->data_len,
+ hs->data_offset);
return s;
}
static clib_error_t *
-http_show_static_server_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+hss_show_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
- http_static_server_main_t *hsm = &http_static_server_main;
- file_data_cache_t *ep, **entries = 0;
- int verbose = 0;
- int show_cache = 0;
- int show_sessions = 0;
- u32 index;
- f64 now;
+ int verbose = 0, show_cache = 0, show_sessions = 0;
+ hss_main_t *hsm = &hss_main;
if (hsm->www_root == 0)
return clib_error_return (0, "Static server disabled");
@@ -1592,61 +910,29 @@ http_show_static_server_command_fn (vlib_main_t * vm,
return clib_error_return (0, "specify one or more of cache, sessions");
if (show_cache)
- {
- if (verbose == 0)
- {
- vlib_cli_output
- (vm, "www_root %s, cache size %lld bytes, limit %lld bytes, "
- "evictions %lld",
- hsm->www_root, hsm->cache_size, hsm->cache_limit,
- hsm->cache_evictions);
- return 0;
- }
-
- now = vlib_time_now (vm);
-
- vlib_cli_output (vm, "%U", format_hsm_cache_entry, 0 /* header */ ,
- now);
-
- for (index = hsm->first_index; index != ~0;)
- {
- ep = pool_elt_at_index (hsm->cache_pool, index);
- index = ep->next_index;
- vlib_cli_output (vm, "%U", format_hsm_cache_entry, ep, now);
- }
-
- vlib_cli_output (vm, "%40s%12lld", "Total Size", hsm->cache_size);
-
- vec_free (entries);
- }
+ vlib_cli_output (vm, "%U", format_hss_cache, &hsm->cache, verbose);
if (show_sessions)
{
u32 *session_indices = 0;
- http_session_t *hs;
+ hss_session_t *hs;
int i, j;
- http_static_server_sessions_reader_lock ();
for (i = 0; i < vec_len (hsm->sessions); i++)
{
- /* *INDENT-OFF* */
pool_foreach (hs, hsm->sessions[i])
- {
vec_add1 (session_indices, hs - hsm->sessions[i]);
- }
- /* *INDENT-ON* */
for (j = 0; j < vec_len (session_indices); j++)
{
- vlib_cli_output (vm, "%U", format_http_session,
- pool_elt_at_index
- (hsm->sessions[i], session_indices[j]),
- verbose);
+ vlib_cli_output (
+ vm, "%U", format_hss_session,
+ pool_elt_at_index (hsm->sessions[i], session_indices[j]),
+ verbose);
}
vec_reset_length (session_indices);
}
- http_static_server_sessions_reader_unlock ();
vec_free (session_indices);
}
return 0;
@@ -1662,63 +948,24 @@ http_show_static_server_command_fn (vlib_main_t * vm,
* @cliend
* @cliexcmd{show http static server sessions cache [verbose [nn]]}
?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (http_show_static_server_command, static) =
-{
+VLIB_CLI_COMMAND (hss_show_command, static) = {
.path = "show http static server",
.short_help = "show http static server sessions cache [verbose [<nn>]]",
- .function = http_show_static_server_command_fn,
+ .function = hss_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
-http_clear_static_cache_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+hss_clear_cache_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
- http_static_server_main_t *hsm = &http_static_server_main;
- file_data_cache_t *dp;
- u32 free_index;
+ hss_main_t *hsm = &hss_main;
u32 busy_items = 0;
- BVT (clib_bihash_kv) kv;
if (hsm->www_root == 0)
return clib_error_return (0, "Static server disabled");
- http_static_server_sessions_reader_lock ();
-
- /* Walk the LRU list to find active entries */
- free_index = hsm->last_index;
- while (free_index != ~0)
- {
- dp = pool_elt_at_index (hsm->cache_pool, free_index);
- free_index = dp->prev_index;
- /* Which could be in use... */
- if (dp->inuse)
- {
- busy_items++;
- free_index = dp->next_index;
- continue;
- }
- kv.key = (u64) (dp->filename);
- kv.value = ~0ULL;
- if (BV (clib_bihash_add_del) (&hsm->name_to_data, &kv,
- 0 /* is_add */ ) < 0)
- {
- clib_warning ("BUG: cache clear delete '%s' FAILED!", dp->filename);
- }
+ busy_items = hss_cache_clear (&hsm->cache);
- lru_remove (hsm, dp);
- hsm->cache_size -= vec_len (dp->data);
- hsm->cache_evictions++;
- vec_free (dp->filename);
- vec_free (dp->data);
- if (hsm->debug_level > 1)
- clib_warning ("pool put index %d", dp - hsm->cache_pool);
- pool_put (hsm->cache_pool, dp);
- free_index = hsm->last_index;
- }
- http_static_server_sessions_reader_unlock ();
if (busy_items > 0)
vlib_cli_output (vm, "Note: %d busy items still in cache...", busy_items);
else
@@ -1737,32 +984,34 @@ http_clear_static_cache_command_fn (vlib_main_t * vm,
* @cliend
* @cliexcmd{clear http static cache}
?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (clear_http_static_cache_command, static) =
-{
+VLIB_CLI_COMMAND (clear_hss_cache_command, static) = {
.path = "clear http static cache",
.short_help = "clear http static cache",
- .function = http_clear_static_cache_command_fn,
+ .function = hss_clear_cache_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
-http_static_server_main_init (vlib_main_t * vm)
+hss_main_init (vlib_main_t *vm)
{
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
- hsm->my_client_index = ~0;
+ hsm->app_index = ~0;
hsm->vlib_main = vm;
- hsm->first_index = hsm->last_index = ~0;
- clib_timebase_init (&hsm->timebase, 0 /* GMT */ ,
- CLIB_TIMEBASE_DAYLIGHT_NONE,
- &vm->clib_time /* share the system clock */ );
+ /* Set up file extension to mime type index map */
+ hsm->mime_type_indices_by_file_extensions =
+ hash_create_string (0, sizeof (uword));
+
+#define _(def, ext, str) \
+ hash_set_mem (hsm->mime_type_indices_by_file_extensions, ext, \
+ HTTP_CONTENT_##def);
+ foreach_http_content_type;
+#undef _
return 0;
}
-VLIB_INIT_FUNCTION (http_static_server_main_init);
+VLIB_INIT_FUNCTION (hss_main_init);
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/idpf/CMakeLists.txt b/src/plugins/idpf/CMakeLists.txt
new file mode 100644
index 00000000000..1c7e5ec619c
--- /dev/null
+++ b/src/plugins/idpf/CMakeLists.txt
@@ -0,0 +1,28 @@
+# Copyright (c) 2023 Intel and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(idpf
+ SOURCES
+ cli.c
+ device.c
+ format.c
+ plugin.c
+ idpf_controlq.c
+ idpf_api.c
+
+ API_FILES
+ idpf.api
+
+ API_TEST_SOURCES
+ idpf_test.c
+)
diff --git a/src/plugins/idpf/README.rst b/src/plugins/idpf/README.rst
new file mode 100644
index 00000000000..7d4a6b93f3a
--- /dev/null
+++ b/src/plugins/idpf/README.rst
@@ -0,0 +1,59 @@
+Intel IDPF device driver
+========================
+
+Overview
+--------
+
+This plugins provides native device support for Intel Infrastructure
+Data Path Function (IDPF). The current IDPF is a driver specification
+for future Intel Physical Function devices. IDPF defines communication
+channel between Data Plane (DP) and Control Plane (CP).
+
+Prerequisites
+-------------
+
+- Driver requires MSI-X interrupt support, which is not supported by
+ uio_pci_generic driver, so vfio-pci needs to be used. On systems
+ without IOMMU vfio driver can still be used with recent kernels which
+ support no-iommu mode.
+
+Known issues
+------------
+
+- This driver is still in experimental phase, and the corresponding device
+is not released yet.
+
+- Current version only supports device initialization. Basic I/O function
+will be supported in the next release.
+
+Usage
+-----
+
+Interface Creation
+~~~~~~~~~~~~~~~~~~
+
+Interfaces can be dynamically created by using following CLI:
+
+::
+
+ create interface idpf 0000:4b:00.0 vport-num 1 rx-single 1 tx-single 1
+ set int state idpf-0/4b/0/0 up
+
+vport-num: number of vport to be created. Each vport is related to one netdev.
+rx-single: configure Rx queue mode, split queue mode by default.
+tx-single: configure Tx queue mode, split queue mode by default.
+
+Interface Deletion
+~~~~~~~~~~~~~~~~~~
+
+Interface can be deleted with following CLI:
+
+::
+
+ delete interface idpf <interface name>
+
+Interface Statistics
+~~~~~~~~~~~~~~~~~~~~
+
+Interface statistics can be displayed with
+``sh hardware-interface <if-name>`` command.
diff --git a/src/plugins/idpf/cli.c b/src/plugins/idpf/cli.c
new file mode 100644
index 00000000000..592c2612c97
--- /dev/null
+++ b/src/plugins/idpf/cli.c
@@ -0,0 +1,135 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <idpf/idpf.h>
+
+static clib_error_t *
+idpf_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ idpf_create_if_args_t args;
+ u32 tmp;
+
+ clib_memset (&args, 0, sizeof (idpf_create_if_args_t));
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vlib_pci_addr, &args.addr))
+ ;
+ else if (unformat (line_input, "rx-single %u", &tmp))
+ args.rxq_single = 1;
+ else if (unformat (line_input, "tx-single %u", &tmp))
+ args.txq_single = 1;
+ else if (unformat (line_input, "rxq-num %u", &tmp))
+ args.rxq_num = tmp;
+ else if (unformat (line_input, "txq-num %u", &tmp))
+ args.txq_num = tmp;
+ else if (unformat (line_input, "rxq-size %u", &tmp))
+ args.rxq_size = tmp;
+ else if (unformat (line_input, "txq-size %u", &tmp))
+ args.txq_size = tmp;
+ else if (unformat (line_input, "vport-num %u", &tmp))
+ args.req_vport_nb = tmp;
+ else if (unformat (line_input, "name %s", &args.name))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ idpf_create_if (vm, &args);
+
+ vec_free (args.name);
+
+ return args.error;
+}
+
+VLIB_CLI_COMMAND (idpf_create_command, static) = {
+ .path = "create interface idpf",
+ .short_help = "create interface idpf <pci-address> "
+ "[vport <size>] [rx-single <size>] [tx-single <size>]",
+ .function = idpf_create_command_fn,
+};
+
+static clib_error_t *
+idpf_delete_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 sw_if_index = ~0;
+ vnet_hw_interface_t *hw;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0,
+ "please specify interface name or sw_if_index");
+
+ hw = vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index);
+ if (hw == NULL || idpf_device_class.index != hw->dev_class_index)
+ return clib_error_return (0, "not an IDPF interface");
+
+ vlib_process_signal_event (vm, idpf_process_node.index,
+ IDPF_PROCESS_EVENT_DELETE_IF, hw->dev_instance);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (idpf_delete_command, static) = {
+ .path = "delete interface idpf",
+ .short_help = "delete interface idpf "
+ "{<interface> | sw_if_index <sw_idx>}",
+ .function = idpf_delete_command_fn,
+ .is_mp_safe = 1,
+};
+
+clib_error_t *
+idpf_cli_init (vlib_main_t *vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (idpf_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/idpf/device.c b/src/plugins/idpf/device.c
new file mode 100644
index 00000000000..44b8116d996
--- /dev/null
+++ b/src/plugins/idpf/device.c
@@ -0,0 +1,2265 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <idpf/idpf.h>
+#include <vpp/app/version.h>
+#include <vnet/plugin/plugin.h>
+
+#define IDPF_RXQ_SZ 512
+#define IDPF_TXQ_SZ 512
+
+#define PCI_VENDOR_ID_INTEL 0x8086
+#define PCI_DEVICE_ID_INTEL_IDPF_PF 0x1452
+#define PCI_DEVICE_ID_INTEL_IDPF_VF 0x1889
+
+VLIB_REGISTER_LOG_CLASS (idpf_log) = {
+ .class_name = "idpf",
+};
+
+VLIB_REGISTER_LOG_CLASS (idpf_stats_log) = {
+ .class_name = "idpf",
+ .subclass_name = "stats",
+};
+
+idpf_main_t idpf_main;
+void idpf_delete_if (vlib_main_t *vm, idpf_device_t *id, int with_barrier);
+
+static pci_device_id_t idpf_pci_device_ids[] = {
+ { .vendor_id = PCI_VENDOR_ID_INTEL,
+ .device_id = PCI_DEVICE_ID_INTEL_IDPF_PF },
+ { .vendor_id = PCI_VENDOR_ID_INTEL,
+ .device_id = PCI_DEVICE_ID_INTEL_IDPF_VF },
+ { 0 },
+};
+
+static int
+idpf_vc_clean (vlib_main_t *vm, idpf_device_t *id)
+{
+ idpf_ctlq_msg_t *q_msg[IDPF_CTLQ_LEN];
+ uint16_t num_q_msg = IDPF_CTLQ_LEN;
+ idpf_dma_mem_t *dma_mem;
+ uint32_t i;
+ int err;
+
+ for (i = 0; i < 10; i++)
+ {
+ err = idpf_ctlq_clean_sq (id->asq, &num_q_msg, q_msg);
+ vlib_process_suspend (vm, 0.02);
+ if (num_q_msg > 0)
+ break;
+ }
+ if (err != 0)
+ return err;
+
+ /* Empty queue is not an error */
+ for (i = 0; i < num_q_msg; i++)
+ {
+ dma_mem = q_msg[i]->ctx.indirect.payload;
+ if (dma_mem != NULL)
+ idpf_free_dma_mem (id, dma_mem);
+ clib_mem_free (q_msg[i]);
+ }
+
+ return 0;
+}
+
+static idpf_vc_result_t
+idpf_read_msg_from_cp (idpf_device_t *id, u16 buf_len, u8 *buf)
+{
+ idpf_ctlq_msg_t ctlq_msg;
+ idpf_dma_mem_t *dma_mem = NULL;
+ idpf_vc_result_t result = IDPF_MSG_NON;
+ u32 opcode;
+ u16 pending = 1;
+ int ret;
+
+ ret = idpf_ctlq_recv (id->arq, &pending, &ctlq_msg);
+ if (ret != 0)
+ {
+ idpf_log_debug (id, "Can't read msg from AQ");
+ if (ret != -ENOMSG)
+ result = IDPF_MSG_ERR;
+ return result;
+ }
+
+ clib_memcpy_fast (buf, ctlq_msg.ctx.indirect.payload->va, buf_len);
+
+ opcode = ctlq_msg.cookie.mbx.chnl_opcode;
+ id->cmd_retval = ctlq_msg.cookie.mbx.chnl_retval;
+
+ idpf_log_debug (id, "CQ from CP carries opcode %u, retval %d", opcode,
+ id->cmd_retval);
+
+ if (opcode == VIRTCHNL2_OP_EVENT)
+ {
+ virtchnl2_event_t *ve =
+ (virtchnl2_event_t *) ctlq_msg.ctx.indirect.payload->va;
+
+ result = IDPF_MSG_SYS;
+ switch (ve->event)
+ {
+ case VIRTCHNL2_EVENT_LINK_CHANGE:
+ break;
+ default:
+ idpf_log_err (id, "%s: Unknown event %d from CP", __func__,
+ ve->event);
+ break;
+ }
+ }
+ else
+ {
+ /* async reply msg on command issued by pf previously */
+ result = IDPF_MSG_CMD;
+ if (opcode != id->pend_cmd)
+ {
+ idpf_log_warn (id, "command mismatch, expect %u, get %u",
+ id->pend_cmd, opcode);
+ result = IDPF_MSG_ERR;
+ }
+ }
+
+ if (ctlq_msg.data_len != 0)
+ dma_mem = ctlq_msg.ctx.indirect.payload;
+ else
+ pending = 0;
+
+ ret = idpf_ctlq_post_rx_buffs (id, id->arq, &pending, &dma_mem);
+ if (ret != 0 && dma_mem != NULL)
+ idpf_free_dma_mem (id, dma_mem);
+
+ return result;
+}
+
+clib_error_t *
+idpf_send_vc_msg (vlib_main_t *vm, idpf_device_t *id, virtchnl2_op_t op,
+ u8 *in, u16 in_len)
+{
+ idpf_ctlq_msg_t *ctlq_msg;
+ idpf_dma_mem_t *dma_mem;
+ int error = 0;
+
+ error = idpf_vc_clean (vm, id);
+ if (error)
+ goto err;
+
+ ctlq_msg = clib_mem_alloc (sizeof (idpf_ctlq_msg_t));
+ if (ctlq_msg == NULL)
+ goto err;
+ clib_memset (ctlq_msg, 0, sizeof (idpf_ctlq_msg_t));
+
+ dma_mem = clib_mem_alloc (sizeof (idpf_dma_mem_t));
+ if (dma_mem == NULL)
+ goto dma_mem_error;
+ clib_memset (dma_mem, 0, sizeof (idpf_dma_mem_t));
+
+ dma_mem->va = idpf_alloc_dma_mem (vm, id, dma_mem, IDPF_DFLT_MBX_BUF_SIZE);
+ if (dma_mem->va == NULL)
+ {
+ clib_mem_free (dma_mem);
+ goto err;
+ }
+
+ clib_memcpy (dma_mem->va, in, in_len);
+
+ ctlq_msg->opcode = idpf_mbq_opc_send_msg_to_pf;
+ ctlq_msg->func_id = 0;
+ ctlq_msg->data_len = in_len;
+ ctlq_msg->cookie.mbx.chnl_opcode = op;
+ ctlq_msg->cookie.mbx.chnl_retval = VIRTCHNL2_STATUS_SUCCESS;
+ ctlq_msg->ctx.indirect.payload = dma_mem;
+
+ error = idpf_ctlq_send (id, id->asq, 1, ctlq_msg);
+ if (error)
+ goto send_error;
+
+ return 0;
+
+send_error:
+ idpf_free_dma_mem (id, dma_mem);
+dma_mem_error:
+ clib_mem_free (ctlq_msg);
+err:
+ return clib_error_return (0, "idpf send vc msg to PF failed");
+}
+
+clib_error_t *
+idpf_read_one_msg (vlib_main_t *vm, idpf_device_t *id, u32 ops, u8 *buf,
+ u16 buf_len)
+{
+ int i = 0, ret;
+ f64 suspend_time = IDPF_SEND_TO_PF_SUSPEND_TIME;
+
+ do
+ {
+ ret = idpf_read_msg_from_cp (id, buf_len, buf);
+ if (ret == IDPF_MSG_CMD)
+ break;
+ vlib_process_suspend (vm, suspend_time);
+ }
+ while (i++ < IDPF_SEND_TO_PF_MAX_TRY_TIMES);
+ if (i >= IDPF_SEND_TO_PF_MAX_TRY_TIMES ||
+ id->cmd_retval != VIRTCHNL2_STATUS_SUCCESS)
+ return clib_error_return (0, "idpf read one msg failed");
+
+ return 0;
+}
+
+clib_error_t *
+idpf_execute_vc_cmd (vlib_main_t *vm, idpf_device_t *id, idpf_cmd_info_t *args)
+{
+ clib_error_t *error = 0;
+ f64 suspend_time = IDPF_SEND_TO_PF_SUSPEND_TIME;
+ int i = 0;
+
+ if (id->pend_cmd == VIRTCHNL2_OP_UNKNOWN)
+ id->pend_cmd = args->ops;
+ else
+ return clib_error_return (0, "There is incomplete cmd %d", id->pend_cmd);
+
+ if ((error = idpf_send_vc_msg (vm, id, args->ops, args->in_args,
+ args->in_args_size)))
+ return error;
+
+ switch (args->ops)
+ {
+ case VIRTCHNL2_OP_VERSION:
+ case VIRTCHNL2_OP_GET_CAPS:
+ case VIRTCHNL2_OP_CREATE_VPORT:
+ case VIRTCHNL2_OP_DESTROY_VPORT:
+ case VIRTCHNL2_OP_SET_RSS_KEY:
+ case VIRTCHNL2_OP_SET_RSS_LUT:
+ case VIRTCHNL2_OP_SET_RSS_HASH:
+ case VIRTCHNL2_OP_CONFIG_RX_QUEUES:
+ case VIRTCHNL2_OP_CONFIG_TX_QUEUES:
+ case VIRTCHNL2_OP_ENABLE_QUEUES:
+ case VIRTCHNL2_OP_DISABLE_QUEUES:
+ case VIRTCHNL2_OP_ENABLE_VPORT:
+ case VIRTCHNL2_OP_DISABLE_VPORT:
+ case VIRTCHNL2_OP_MAP_QUEUE_VECTOR:
+ case VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR:
+ case VIRTCHNL2_OP_ALLOC_VECTORS:
+ case VIRTCHNL2_OP_DEALLOC_VECTORS:
+ case VIRTCHNL2_OP_GET_STATS:
+ /* for init virtchnl ops, need to poll the response */
+ error = idpf_read_one_msg (vm, id, args->ops, args->out_buffer,
+ args->out_size);
+ if (error)
+ return clib_error_return (0, "idpf read vc message from PF failed");
+ clear_cmd (id);
+ break;
+ case VIRTCHNL2_OP_GET_PTYPE_INFO:
+ break;
+ default:
+ do
+ {
+ if (id->pend_cmd == VIRTCHNL2_OP_UNKNOWN)
+ break;
+ vlib_process_suspend (vm, suspend_time);
+ /* If don't read msg or read sys event, continue */
+ }
+ while (i++ < IDPF_SEND_TO_PF_MAX_TRY_TIMES);
+ /* If there's no response is received, clear command */
+ if (i >= IDPF_SEND_TO_PF_MAX_TRY_TIMES ||
+ id->cmd_retval != VIRTCHNL2_STATUS_SUCCESS)
+ return clib_error_return (
+ 0, "No response or return failure (%d) for cmd %d", id->cmd_retval,
+ args->ops);
+ break;
+ }
+
+ return error;
+}
+
+static inline uword
+idpf_dma_addr (vlib_main_t *vm, idpf_device_t *id, void *p)
+{
+ return (id->flags & IDPF_DEVICE_F_VA_DMA) ? pointer_to_uword (p) :
+ vlib_physmem_get_pa (vm, p);
+}
+
+clib_error_t *
+idpf_vc_config_irq_map_unmap (vlib_main_t *vm, idpf_device_t *id,
+ idpf_vport_t *vport, bool map)
+{
+ virtchnl2_queue_vector_maps_t *map_info;
+ virtchnl2_queue_vector_t *vecmap;
+ u16 nb_rxq = vport->id->n_rx_queues;
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+ int len, i;
+
+ len = sizeof (virtchnl2_queue_vector_maps_t) +
+ (nb_rxq - 1) * sizeof (virtchnl2_queue_vector_t);
+
+ map_info = clib_mem_alloc_aligned (len, CLIB_CACHE_LINE_BYTES);
+ clib_memset (map_info, 0, len);
+
+ map_info->vport_id = vport->vport_id;
+ map_info->num_qv_maps = nb_rxq;
+ for (i = 0; i < nb_rxq; i++)
+ {
+ vecmap = &map_info->qv_maps[i];
+ vecmap->queue_id = vport->qv_map[i].queue_id;
+ vecmap->vector_id = vport->qv_map[i].vector_id;
+ vecmap->itr_idx = VIRTCHNL2_ITR_IDX_0;
+ vecmap->queue_type = VIRTCHNL2_QUEUE_TYPE_RX;
+ }
+
+ args.ops =
+ map ? VIRTCHNL2_OP_MAP_QUEUE_VECTOR : VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR;
+ args.in_args = (u8 *) map_info;
+ args.in_args_size = len;
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command of VIRTCHNL2_OP_%s_QUEUE_VECTOR",
+ map ? "MAP" : "UNMAP");
+
+ clib_mem_free (map_info);
+ return error;
+}
+
+clib_error_t *
+idpf_config_rx_queues_irqs (vlib_main_t *vm, idpf_device_t *id,
+ idpf_vport_t *vport)
+{
+ virtchnl2_queue_vector_t *qv_map;
+ clib_error_t *error = 0;
+ u32 dynctl_reg_start;
+ u32 itrn_reg_start;
+ u32 dynctl_val, itrn_val;
+ int i;
+
+ qv_map = clib_mem_alloc_aligned (id->n_rx_queues *
+ sizeof (virtchnl2_queue_vector_t),
+ CLIB_CACHE_LINE_BYTES);
+ clib_memset (qv_map, 0, id->n_rx_queues * sizeof (virtchnl2_queue_vector_t));
+
+ dynctl_reg_start = vport->recv_vectors->vchunks.vchunks->dynctl_reg_start;
+ itrn_reg_start = vport->recv_vectors->vchunks.vchunks->itrn_reg_start;
+ dynctl_val = idpf_reg_read (id, dynctl_reg_start);
+ idpf_log_debug (id, "Value of dynctl_reg_start is 0x%x", dynctl_val);
+ itrn_val = idpf_reg_read (id, itrn_reg_start);
+ idpf_log_debug (id, "Value of itrn_reg_start is 0x%x", itrn_val);
+
+ if (itrn_val != 0)
+ idpf_reg_write (id, dynctl_reg_start,
+ VIRTCHNL2_ITR_IDX_0 << PF_GLINT_DYN_CTL_ITR_INDX_S |
+ PF_GLINT_DYN_CTL_WB_ON_ITR_M |
+ itrn_val << PF_GLINT_DYN_CTL_INTERVAL_S);
+ else
+ idpf_reg_write (id, dynctl_reg_start,
+ VIRTCHNL2_ITR_IDX_0 << PF_GLINT_DYN_CTL_ITR_INDX_S |
+ PF_GLINT_DYN_CTL_WB_ON_ITR_M |
+ IDPF_DFLT_INTERVAL << PF_GLINT_DYN_CTL_INTERVAL_S);
+
+ for (i = 0; i < id->n_rx_queues; i++)
+ {
+ /* map all queues to the same vector */
+ qv_map[i].queue_id = vport->chunks_info.rx_start_qid + i;
+ qv_map[i].vector_id =
+ vport->recv_vectors->vchunks.vchunks->start_vector_id;
+ }
+ vport->qv_map = qv_map;
+
+ if ((error = idpf_vc_config_irq_map_unmap (vm, id, vport, true)))
+ {
+ idpf_log_err (id, "config interrupt mapping failed");
+ goto config_irq_map_err;
+ }
+
+ return error;
+
+config_irq_map_err:
+ clib_mem_free (vport->qv_map);
+ vport->qv_map = NULL;
+
+ return error;
+}
+
+clib_error_t *
+idpf_rx_split_bufq_setup (vlib_main_t *vm, idpf_device_t *id,
+ idpf_vport_t *vport, idpf_rxq_t *bufq, u16 qid,
+ u16 rxq_size)
+{
+ clib_error_t *err;
+ u32 n_alloc, i;
+
+ bufq->size = rxq_size;
+ bufq->next = 0;
+ bufq->descs = vlib_physmem_alloc_aligned_on_numa (
+ vm, bufq->size * sizeof (virtchnl2_rx_desc_t), 2 * CLIB_CACHE_LINE_BYTES,
+ id->numa_node);
+
+ bufq->buffer_pool_index =
+ vlib_buffer_pool_get_default_for_numa (vm, id->numa_node);
+
+ if ((err = vlib_pci_map_dma (vm, id->pci_dev_handle, (void *) bufq->descs)))
+ return err;
+
+ clib_memset ((void *) bufq->descs, 0,
+ bufq->size * sizeof (virtchnl2_rx_desc_t));
+ vec_validate_aligned (bufq->bufs, bufq->size, CLIB_CACHE_LINE_BYTES);
+ bufq->qrx_tail = id->bar0 + (vport->chunks_info.rx_buf_qtail_start +
+ qid * vport->chunks_info.rx_buf_qtail_spacing);
+
+ n_alloc = vlib_buffer_alloc_from_pool (vm, bufq->bufs, bufq->size - 8,
+ bufq->buffer_pool_index);
+ if (n_alloc == 0)
+ return clib_error_return (0, "buffer allocation error");
+
+ bufq->n_enqueued = n_alloc;
+ virtchnl2_rx_desc_t *d = bufq->descs;
+ for (i = 0; i < n_alloc; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, bufq->bufs[i]);
+ if (id->flags & IDPF_DEVICE_F_VA_DMA)
+ d->qword[0] = vlib_buffer_get_va (b);
+ else
+ d->qword[0] = vlib_buffer_get_pa (vm, b);
+ d++;
+ }
+
+ return 0;
+}
+
+clib_error_t *
+idpf_split_rxq_init (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid, u16 rxq_size)
+{
+ clib_error_t *err;
+ idpf_rxq_t *rxq;
+ u32 n_alloc, i;
+
+ vec_validate_aligned (vport->rxqs, qid, CLIB_CACHE_LINE_BYTES);
+ rxq = vec_elt_at_index (vport->rxqs, qid);
+ rxq->size = rxq_size;
+ rxq->next = 0;
+ rxq->descs = vlib_physmem_alloc_aligned_on_numa (
+ vm, rxq->size * sizeof (virtchnl2_rx_desc_t), 2 * CLIB_CACHE_LINE_BYTES,
+ id->numa_node);
+
+ rxq->buffer_pool_index =
+ vlib_buffer_pool_get_default_for_numa (vm, id->numa_node);
+
+ if (rxq->descs == 0)
+ return vlib_physmem_last_error (vm);
+
+ if ((err = vlib_pci_map_dma (vm, id->pci_dev_handle, (void *) rxq->descs)))
+ return err;
+
+ clib_memset ((void *) rxq->descs, 0,
+ rxq->size * sizeof (virtchnl2_rx_desc_t));
+ vec_validate_aligned (rxq->bufs, rxq->size, CLIB_CACHE_LINE_BYTES);
+ rxq->qrx_tail = id->bar0 + (vport->chunks_info.rx_qtail_start +
+ qid * vport->chunks_info.rx_qtail_spacing);
+
+ n_alloc = vlib_buffer_alloc_from_pool (vm, rxq->bufs, rxq->size - 8,
+ rxq->buffer_pool_index);
+
+ if (n_alloc == 0)
+ return clib_error_return (0, "buffer allocation error");
+
+ rxq->n_enqueued = n_alloc;
+ virtchnl2_rx_desc_t *d = rxq->descs;
+ for (i = 0; i < n_alloc; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, rxq->bufs[i]);
+ if (id->flags & IDPF_DEVICE_F_VA_DMA)
+ d->qword[0] = vlib_buffer_get_va (b);
+ else
+ d->qword[0] = vlib_buffer_get_pa (vm, b);
+ d++;
+ }
+
+ err =
+ idpf_rx_split_bufq_setup (vm, id, vport, rxq->bufq1, 2 * qid, rxq_size);
+ if (err)
+ return err;
+ err =
+ idpf_rx_split_bufq_setup (vm, id, vport, rxq->bufq2, 2 * qid, rxq_size);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+clib_error_t *
+idpf_single_rxq_init (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid, u16 rxq_size)
+{
+ clib_error_t *err;
+ idpf_rxq_t *rxq;
+ u32 n_alloc, i;
+
+ vec_validate_aligned (vport->rxqs, qid, CLIB_CACHE_LINE_BYTES);
+ rxq = vec_elt_at_index (vport->rxqs, qid);
+ rxq->queue_index = vport->chunks_info.rx_start_qid + qid;
+ rxq->size = rxq_size;
+ rxq->next = 0;
+ rxq->descs = vlib_physmem_alloc_aligned_on_numa (
+ vm, rxq->size * sizeof (virtchnl2_rx_desc_t), 2 * CLIB_CACHE_LINE_BYTES,
+ id->numa_node);
+
+ rxq->buffer_pool_index =
+ vlib_buffer_pool_get_default_for_numa (vm, id->numa_node);
+
+ if (rxq->descs == 0)
+ return vlib_physmem_last_error (vm);
+
+ err = vlib_pci_map_dma (vm, id->pci_dev_handle, (void *) rxq->descs);
+ if (err)
+ return err;
+
+ clib_memset ((void *) rxq->descs, 0,
+ rxq->size * sizeof (virtchnl2_rx_desc_t));
+ vec_validate_aligned (rxq->bufs, rxq->size, CLIB_CACHE_LINE_BYTES);
+ rxq->qrx_tail = id->bar0 + (vport->chunks_info.rx_qtail_start +
+ qid * vport->chunks_info.rx_qtail_spacing);
+
+ n_alloc = vlib_buffer_alloc_from_pool (vm, rxq->bufs, rxq->size - 8,
+ rxq->buffer_pool_index);
+
+ if (n_alloc == 0)
+ return clib_error_return (0, "buffer allocation error");
+
+ rxq->n_enqueued = n_alloc;
+ virtchnl2_rx_desc_t *d = rxq->descs;
+ for (i = 0; i < n_alloc; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, rxq->bufs[i]);
+ if (id->flags & IDPF_DEVICE_F_VA_DMA)
+ d->qword[0] = vlib_buffer_get_va (b);
+ else
+ d->qword[0] = vlib_buffer_get_pa (vm, b);
+ d++;
+ }
+
+ return 0;
+}
+
+clib_error_t *
+idpf_rx_queue_setup (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid, u16 rxq_size)
+{
+ if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
+ return idpf_single_rxq_init (vm, id, vport, qid, rxq_size);
+ else
+ return idpf_split_rxq_init (vm, id, vport, qid, rxq_size);
+}
+
+clib_error_t *
+idpf_tx_split_complq_setup (vlib_main_t *vm, idpf_device_t *id,
+ idpf_vport_t *vport, idpf_txq_t *complq, u16 qid,
+ u16 txq_size)
+{
+ clib_error_t *err;
+ u16 n;
+ u8 bpi = vlib_buffer_pool_get_default_for_numa (vm, id->numa_node);
+
+ complq->size = txq_size;
+ complq->next = 0;
+ clib_spinlock_init (&complq->lock);
+
+ n = (complq->size / 510) + 1;
+ vec_validate_aligned (complq->ph_bufs, n, CLIB_CACHE_LINE_BYTES);
+
+ if (!vlib_buffer_alloc_from_pool (vm, complq->ph_bufs, n, bpi))
+ return clib_error_return (0, "buffer allocation error");
+
+ complq->descs = vlib_physmem_alloc_aligned_on_numa (
+ vm, complq->size * sizeof (idpf_tx_desc_t), 2 * CLIB_CACHE_LINE_BYTES,
+ id->numa_node);
+ if (complq->descs == 0)
+ return vlib_physmem_last_error (vm);
+
+ if ((err =
+ vlib_pci_map_dma (vm, id->pci_dev_handle, (void *) complq->descs)))
+ return err;
+
+ vec_validate_aligned (complq->bufs, complq->size, CLIB_CACHE_LINE_BYTES);
+ complq->qtx_tail =
+ id->bar0 + (vport->chunks_info.tx_compl_qtail_start +
+ qid * vport->chunks_info.tx_compl_qtail_spacing);
+
+ /* initialize ring of pending RS slots */
+ clib_ring_new_aligned (complq->rs_slots, 32, CLIB_CACHE_LINE_BYTES);
+
+ vec_validate_aligned (complq->tmp_descs, complq->size,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (complq->tmp_bufs, complq->size, CLIB_CACHE_LINE_BYTES);
+
+ return 0;
+}
+
+clib_error_t *
+idpf_split_txq_init (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid, u16 txq_size)
+{
+ clib_error_t *err;
+ idpf_txq_t *txq;
+ u16 n, complq_qid;
+ u8 bpi = vlib_buffer_pool_get_default_for_numa (vm, id->numa_node);
+
+ vec_validate_aligned (vport->txqs, qid, CLIB_CACHE_LINE_BYTES);
+ txq = vec_elt_at_index (vport->txqs, qid);
+ txq->size = txq_size;
+ txq->next = 0;
+ clib_spinlock_init (&txq->lock);
+
+ n = (txq->size / 510) + 1;
+ vec_validate_aligned (txq->ph_bufs, n, CLIB_CACHE_LINE_BYTES);
+
+ if (!vlib_buffer_alloc_from_pool (vm, txq->ph_bufs, n, bpi))
+ return clib_error_return (0, "buffer allocation error");
+
+ txq->descs = vlib_physmem_alloc_aligned_on_numa (
+ vm, txq->size * sizeof (idpf_tx_desc_t), 2 * CLIB_CACHE_LINE_BYTES,
+ id->numa_node);
+ if (txq->descs == 0)
+ return vlib_physmem_last_error (vm);
+
+ err = vlib_pci_map_dma (vm, id->pci_dev_handle, (void *) txq->descs);
+ if (err)
+ return err;
+
+ vec_validate_aligned (txq->bufs, txq->size, CLIB_CACHE_LINE_BYTES);
+ txq->qtx_tail = id->bar0 + (vport->chunks_info.tx_qtail_start +
+ qid * vport->chunks_info.tx_qtail_spacing);
+
+ /* initialize ring of pending RS slots */
+ clib_ring_new_aligned (txq->rs_slots, 32, CLIB_CACHE_LINE_BYTES);
+
+ vec_validate_aligned (txq->tmp_descs, txq->size, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (txq->tmp_bufs, txq->size, CLIB_CACHE_LINE_BYTES);
+
+ complq_qid = vport->chunks_info.tx_compl_start_qid + qid;
+ err = idpf_tx_split_complq_setup (vm, id, vport, txq->complq, complq_qid,
+ 2 * txq_size);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+clib_error_t *
+idpf_single_txq_init (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid, u16 txq_size)
+{
+ clib_error_t *err;
+ idpf_txq_t *txq;
+ u16 n;
+ u8 bpi = vlib_buffer_pool_get_default_for_numa (vm, id->numa_node);
+
+ vec_validate_aligned (vport->txqs, qid, CLIB_CACHE_LINE_BYTES);
+ txq = vec_elt_at_index (vport->txqs, qid);
+ txq->queue_index = vport->chunks_info.tx_start_qid + qid;
+ txq->size = txq_size;
+ txq->next = 0;
+ clib_spinlock_init (&txq->lock);
+
+ n = (txq->size / 510) + 1;
+ vec_validate_aligned (txq->ph_bufs, n, CLIB_CACHE_LINE_BYTES);
+
+ if (!vlib_buffer_alloc_from_pool (vm, txq->ph_bufs, n, bpi))
+ return clib_error_return (0, "buffer allocation error");
+
+ txq->descs = vlib_physmem_alloc_aligned_on_numa (
+ vm, txq->size * sizeof (idpf_tx_desc_t), 2 * CLIB_CACHE_LINE_BYTES,
+ id->numa_node);
+ if (txq->descs == 0)
+ return vlib_physmem_last_error (vm);
+
+ err = vlib_pci_map_dma (vm, id->pci_dev_handle, (void *) txq->descs);
+ if (err)
+ return err;
+
+ vec_validate_aligned (txq->bufs, txq->size, CLIB_CACHE_LINE_BYTES);
+ txq->qtx_tail = id->bar0 + (vport->chunks_info.tx_qtail_start +
+ qid * vport->chunks_info.tx_qtail_spacing);
+
+ /* initialize ring of pending RS slots */
+ clib_ring_new_aligned (txq->rs_slots, 32, CLIB_CACHE_LINE_BYTES);
+
+ vec_validate_aligned (txq->tmp_descs, txq->size, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (txq->tmp_bufs, txq->size, CLIB_CACHE_LINE_BYTES);
+
+ return 0;
+}
+
+clib_error_t *
+idpf_tx_queue_setup (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid, u16 txq_size)
+{
+ if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
+ return idpf_single_txq_init (vm, id, vport, qid, txq_size);
+ else
+ return idpf_split_txq_init (vm, id, vport, qid, txq_size);
+}
+
+clib_error_t *
+idpf_vc_config_txq (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid)
+{
+ idpf_txq_t *txq;
+ virtchnl2_config_tx_queues_t *vc_txqs = NULL;
+ virtchnl2_txq_info_t *txq_info;
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+ u16 num_qs;
+ int size;
+
+ vec_validate_aligned (vport->txqs, qid, CLIB_CACHE_LINE_BYTES);
+ txq = vec_elt_at_index (vport->txqs, qid);
+
+ if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
+ num_qs = IDPF_TXQ_PER_GRP;
+ else
+ num_qs = IDPF_TXQ_PER_GRP + IDPF_TX_COMPLQ_PER_GRP;
+
+ size = sizeof (*vc_txqs) + (num_qs - 1) * sizeof (virtchnl2_txq_info_t);
+ vc_txqs = clib_mem_alloc_aligned (size, CLIB_CACHE_LINE_BYTES);
+ clib_memset (vc_txqs, 0, size);
+
+ vc_txqs->vport_id = vport->vport_id;
+ vc_txqs->num_qinfo = num_qs;
+
+ if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
+ {
+ txq_info = &vc_txqs->qinfo[0];
+ txq_info->dma_ring_addr = idpf_dma_addr (vm, id, (void *) txq->descs);
+ txq_info->type = VIRTCHNL2_QUEUE_TYPE_TX;
+ txq_info->queue_id = txq->queue_index;
+ txq_info->model = VIRTCHNL2_QUEUE_MODEL_SINGLE;
+ txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE;
+ txq_info->ring_len = txq->size;
+ }
+ else
+ {
+ /* txq info */
+ txq_info = &vc_txqs->qinfo[0];
+ txq_info->dma_ring_addr = idpf_dma_addr (vm, id, (void *) txq->descs);
+ txq_info->type = VIRTCHNL2_QUEUE_TYPE_TX;
+ txq_info->queue_id = txq->queue_index;
+ txq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+ txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW;
+ txq_info->ring_len = txq->size;
+ txq_info->tx_compl_queue_id = txq->complq->queue_index;
+ txq_info->relative_queue_id = txq_info->queue_id;
+
+ /* tx completion queue info */
+ idpf_txq_t *complq = txq->complq;
+ txq_info = &vc_txqs->qinfo[1];
+ txq_info->dma_ring_addr = idpf_dma_addr (vm, id, (void *) complq->descs);
+ txq_info->type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION;
+ txq_info->queue_id = complq->queue_index;
+ txq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+ txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW;
+ txq_info->ring_len = complq->size;
+ }
+
+ clib_memset (&args, 0, sizeof (args));
+ args.ops = VIRTCHNL2_OP_CONFIG_TX_QUEUES;
+ args.in_args = (u8 *) vc_txqs;
+ args.in_args_size = size;
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ clib_mem_free (vc_txqs);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command VIRTCHNL2_OP_CONFIG_TX_QUEUES");
+
+ return error;
+}
+
+clib_error_t *
+idpf_vc_config_rxq (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid)
+{
+ idpf_rxq_t *rxq;
+ virtchnl2_config_rx_queues_t *vc_rxqs = NULL;
+ virtchnl2_rxq_info_t *rxq_info;
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+ u16 num_qs;
+ int size, i;
+
+ vec_validate_aligned (vport->rxqs, qid, CLIB_CACHE_LINE_BYTES);
+ rxq = vec_elt_at_index (vport->rxqs, qid);
+
+ if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
+ num_qs = IDPF_RXQ_PER_GRP;
+ else
+ num_qs = IDPF_RXQ_PER_GRP + IDPF_RX_BUFQ_PER_GRP;
+
+ size = sizeof (*vc_rxqs) + (num_qs - 1) * sizeof (virtchnl2_rxq_info_t);
+ vc_rxqs = clib_mem_alloc_aligned (size, CLIB_CACHE_LINE_BYTES);
+ clib_memset (vc_rxqs, 0, size);
+
+ vc_rxqs->vport_id = vport->vport_id;
+ vc_rxqs->num_qinfo = num_qs;
+
+ if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
+ {
+ rxq_info = &vc_rxqs->qinfo[0];
+ rxq_info->dma_ring_addr = idpf_dma_addr (vm, id, (void *) rxq->descs);
+ rxq_info->type = VIRTCHNL2_QUEUE_TYPE_RX;
+ rxq_info->queue_id = rxq->queue_index;
+ rxq_info->model = VIRTCHNL2_QUEUE_MODEL_SINGLE;
+ rxq_info->data_buffer_size = vlib_buffer_get_default_data_size (vm);
+ rxq_info->max_pkt_size = ETHERNET_MAX_PACKET_BYTES;
+
+ rxq_info->desc_ids = VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M;
+ rxq_info->qflags |= VIRTCHNL2_RX_DESC_SIZE_32BYTE;
+
+ rxq_info->ring_len = rxq->size;
+ }
+ else
+ {
+ /* Rx queue */
+ rxq_info = &vc_rxqs->qinfo[0];
+ rxq_info->dma_ring_addr = idpf_dma_addr (vm, id, (void *) rxq->descs);
+ rxq_info->type = VIRTCHNL2_QUEUE_TYPE_RX;
+ rxq_info->queue_id = rxq->queue_index;
+ rxq_info->model = VIRTCHNL2_QUEUE_MODEL_SINGLE;
+ rxq_info->data_buffer_size = vlib_buffer_get_default_data_size (vm);
+ rxq_info->max_pkt_size = ETHERNET_MAX_PACKET_BYTES;
+
+ rxq_info->desc_ids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M;
+ rxq_info->qflags |= VIRTCHNL2_RX_DESC_SIZE_32BYTE;
+
+ rxq_info->ring_len = rxq->size;
+ rxq_info->rx_bufq1_id = rxq->bufq1->queue_index;
+ rxq_info->rx_bufq2_id = rxq->bufq2->queue_index;
+ rxq_info->rx_buffer_low_watermark = 64;
+
+ /* Buffer queue */
+ for (i = 1; i <= IDPF_RX_BUFQ_PER_GRP; i++)
+ {
+ idpf_rxq_t *bufq = (i == 1 ? rxq->bufq1 : rxq->bufq2);
+ rxq_info = &vc_rxqs->qinfo[i];
+ rxq_info->dma_ring_addr =
+ idpf_dma_addr (vm, id, (void *) bufq->descs);
+ rxq_info->type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER;
+ rxq_info->queue_id = bufq->queue_index;
+ rxq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+ rxq_info->data_buffer_size = vlib_buffer_get_default_data_size (vm);
+ rxq_info->desc_ids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M;
+ rxq_info->ring_len = bufq->size;
+
+ rxq_info->buffer_notif_stride = IDPF_RX_BUF_STRIDE;
+ rxq_info->rx_buffer_low_watermark = 64;
+ }
+ }
+
+ clib_memset (&args, 0, sizeof (args));
+ args.ops = VIRTCHNL2_OP_CONFIG_RX_QUEUES;
+ args.in_args = (u8 *) vc_rxqs;
+ args.in_args_size = size;
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ clib_mem_free (vc_rxqs);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command VIRTCHNL2_OP_CONFIG_RX_QUEUES");
+
+ return error;
+}
+
+clib_error_t *
+idpf_alloc_vectors (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ uint16_t num_vectors)
+{
+ virtchnl2_alloc_vectors_t *alloc_vec;
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+ int len;
+
+ len = sizeof (virtchnl2_alloc_vectors_t) +
+ (num_vectors - 1) * sizeof (virtchnl2_vector_chunk_t);
+ alloc_vec = clib_mem_alloc_aligned (len, CLIB_CACHE_LINE_BYTES);
+ clib_memset (alloc_vec, 0, len);
+
+ alloc_vec->num_vectors = num_vectors;
+
+ args.ops = VIRTCHNL2_OP_ALLOC_VECTORS;
+ args.in_args = (u8 *) alloc_vec;
+ args.in_args_size = sizeof (virtchnl2_alloc_vectors_t);
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command VIRTCHNL2_OP_ALLOC_VECTORS");
+
+ if (vport->recv_vectors == NULL)
+ {
+ vport->recv_vectors =
+ clib_mem_alloc_aligned (len, CLIB_CACHE_LINE_BYTES);
+ clib_memset (vport->recv_vectors, 0, len);
+ }
+
+ clib_memcpy (vport->recv_vectors, args.out_buffer, len);
+ clib_mem_free (alloc_vec);
+ return error;
+}
+
+clib_error_t *
+idpf_vc_ena_dis_one_queue (vlib_main_t *vm, idpf_device_t *id,
+ idpf_vport_t *vport, u16 qid, u32 type, bool on)
+{
+ virtchnl2_del_ena_dis_queues_t *queue_select;
+ virtchnl2_queue_chunk_t *queue_chunk;
+ idpf_cmd_info_t args;
+ clib_error_t *error = 0;
+ int len;
+
+ len = sizeof (virtchnl2_del_ena_dis_queues_t);
+ queue_select = clib_mem_alloc_aligned (len, CLIB_CACHE_LINE_BYTES);
+ clib_memset (queue_select, 0, len);
+
+ queue_chunk = queue_select->chunks.chunks;
+ queue_select->chunks.num_chunks = 1;
+ queue_select->vport_id = vport->vport_id;
+
+ queue_chunk->type = type;
+ queue_chunk->start_queue_id = qid;
+ queue_chunk->num_queues = 1;
+
+ args.ops = on ? VIRTCHNL2_OP_ENABLE_QUEUES : VIRTCHNL2_OP_DISABLE_QUEUES;
+ args.in_args = (u8 *) queue_select;
+ args.in_args_size = len;
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command of VIRTCHNL2_OP_%s_QUEUES",
+ on ? "ENABLE" : "DISABLE");
+
+ clib_mem_free (queue_select);
+ return error;
+}
+
+clib_error_t *
+idpf_op_enable_queues (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid, bool rx, bool on)
+{
+ clib_error_t *error;
+ u16 queue_index;
+ u32 type;
+
+ /* switch txq/rxq */
+ type = rx ? VIRTCHNL2_QUEUE_TYPE_RX : VIRTCHNL2_QUEUE_TYPE_TX;
+
+ if (type == VIRTCHNL2_QUEUE_TYPE_RX)
+ {
+ queue_index = vport->chunks_info.rx_start_qid + qid;
+ error = idpf_vc_ena_dis_one_queue (vm, id, vport, queue_index, type, on);
+ }
+ else
+ {
+ queue_index = vport->chunks_info.tx_start_qid + qid;
+ error = idpf_vc_ena_dis_one_queue (vm, id, vport, queue_index, type, on);
+ }
+ if (error != 0)
+ return error;
+
+ /* switch tx completion queue */
+ if (!rx && vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT)
+ {
+ type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION;
+ queue_index = vport->chunks_info.tx_compl_start_qid + qid;
+ error = idpf_vc_ena_dis_one_queue (vm, id, vport, queue_index, type, on);
+ if (error != 0)
+ return error;
+ }
+
+ /* switch rx buffer queue */
+ if (rx && vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT)
+ {
+ type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER;
+ queue_index = vport->chunks_info.rx_buf_start_qid + 2 * qid;
+ error = idpf_vc_ena_dis_one_queue (vm, id, vport, queue_index, type, on);
+ if (error != 0)
+ return error;
+ queue_index++;
+ error = idpf_vc_ena_dis_one_queue (vm, id, vport, queue_index, type, on);
+ if (error != 0)
+ return error;
+ }
+
+ return error;
+}
+
+clib_error_t *
+idpf_queue_init (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ idpf_create_if_args_t *args)
+{
+ clib_error_t *error = 0;
+ int i;
+
+ for (i = 0; i < id->n_rx_queues; i++)
+ {
+ if ((error = idpf_rx_queue_setup (vm, id, vport, i, args->rxq_size)))
+ return error;
+ if ((error = idpf_vc_config_rxq (vm, id, vport, i)))
+ return error;
+ if ((error = idpf_op_enable_queues (vm, id, vport, i, true, true)))
+ return error;
+ }
+
+ for (i = 0; i < id->n_tx_queues; i++)
+ {
+ if ((error = idpf_tx_queue_setup (vm, id, vport, i, args->txq_size)))
+ return error;
+ if ((error = idpf_vc_config_txq (vm, id, vport, i)))
+ return error;
+ if ((error = idpf_op_enable_queues (vm, id, vport, i, false, true)))
+ return error;
+ }
+
+ if ((error = idpf_alloc_vectors (vm, id, vport, IDPF_DFLT_Q_VEC_NUM)))
+ return error;
+
+ if ((error = idpf_config_rx_queues_irqs (vm, id, vport)))
+ return error;
+
+ return error;
+}
+
+clib_error_t *
+idpf_op_version (vlib_main_t *vm, idpf_device_t *id)
+{
+ clib_error_t *error = 0;
+ idpf_cmd_info_t args;
+ virtchnl2_version_info_t myver = {
+ .major = VIRTCHNL2_VERSION_MAJOR_2,
+ .minor = VIRTCHNL2_VERSION_MINOR_0,
+ };
+ virtchnl2_version_info_t ver = { 0 };
+
+ idpf_log_debug (id, "version: major %u minor %u", myver.major, myver.minor);
+
+ args.ops = VIRTCHNL2_OP_VERSION;
+ args.in_args = (u8 *) &myver;
+ args.in_args_size = sizeof (myver);
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (0,
+ "Failed to execute command VIRTCHNL_OP_VERSION");
+
+ clib_memcpy (&ver, args.out_buffer, sizeof (ver));
+
+ if (ver.major != VIRTCHNL2_VERSION_MAJOR_2 ||
+ ver.minor != VIRTCHNL2_VERSION_MINOR_0)
+ return clib_error_return (0,
+ "incompatible virtchnl version "
+ "(remote %d.%d)",
+ ver.major, ver.minor);
+
+ return 0;
+}
+
+clib_error_t *
+idpf_op_get_caps (vlib_main_t *vm, idpf_device_t *id,
+ virtchnl2_get_capabilities_t *caps)
+{
+ virtchnl2_get_capabilities_t caps_msg = { 0 };
+ idpf_cmd_info_t args;
+ clib_error_t *error = 0;
+
+ caps_msg.csum_caps =
+ VIRTCHNL2_CAP_TX_CSUM_L3_IPV4 | VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP |
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP | VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP |
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP | VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP |
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP | VIRTCHNL2_CAP_TX_CSUM_GENERIC |
+ VIRTCHNL2_CAP_RX_CSUM_L3_IPV4 | VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP |
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP | VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP |
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP | VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP |
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP | VIRTCHNL2_CAP_RX_CSUM_GENERIC;
+
+ caps_msg.other_caps = VIRTCHNL2_CAP_WB_ON_ITR;
+
+ args.ops = VIRTCHNL2_OP_GET_CAPS;
+ args.in_args = (u8 *) &caps_msg;
+ args.in_args_size = sizeof (caps_msg);
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command VIRTCHNL2_OP_GET_CAPS");
+
+ clib_memcpy (caps, args.out_buffer, sizeof (*caps));
+ return error;
+}
+
+#define CTLQ_NUM 2
+clib_error_t *
+idpf_mbx_init (vlib_main_t *vm, idpf_device_t *id)
+{
+ idpf_ctlq_create_info_t ctlq_info[CTLQ_NUM] = {
+ {
+ .type = IDPF_CTLQ_TYPE_MAILBOX_TX,
+ .id = IDPF_CTLQ_ID,
+ .len = IDPF_CTLQ_LEN,
+ .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
+ .reg = {
+ .head = PF_FW_ATQH,
+ .tail = PF_FW_ATQT,
+ .len = PF_FW_ATQLEN,
+ .bah = PF_FW_ATQBAH,
+ .bal = PF_FW_ATQBAL,
+ .len_mask = PF_FW_ATQLEN_ATQLEN_M,
+ .len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M,
+ .head_mask = PF_FW_ATQH_ATQH_M,
+ }
+ },
+ {
+ .type = IDPF_CTLQ_TYPE_MAILBOX_RX,
+ .id = IDPF_CTLQ_ID,
+ .len = IDPF_CTLQ_LEN,
+ .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
+ .reg = {
+ .head = PF_FW_ARQH,
+ .tail = PF_FW_ARQT,
+ .len = PF_FW_ARQLEN,
+ .bah = PF_FW_ARQBAH,
+ .bal = PF_FW_ARQBAL,
+ .len_mask = PF_FW_ARQLEN_ARQLEN_M,
+ .len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M,
+ .head_mask = PF_FW_ARQH_ARQH_M,
+ }
+ }
+ };
+ struct idpf_ctlq_info *ctlq;
+
+ if (idpf_ctlq_init (vm, id, CTLQ_NUM, ctlq_info))
+ return clib_error_return (0, "ctlq init failed");
+
+ LIST_FOR_EACH_ENTRY_SAFE (ctlq, NULL, &id->cq_list_head,
+ struct idpf_ctlq_info, cq_list)
+ {
+ if (ctlq->q_id == IDPF_CTLQ_ID &&
+ ctlq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_TX)
+ id->asq = ctlq;
+ if (ctlq->q_id == IDPF_CTLQ_ID &&
+ ctlq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_RX)
+ id->arq = ctlq;
+ }
+
+ if (!id->asq || !id->arq)
+ {
+ idpf_ctlq_deinit (id);
+ return clib_error_return (0, "ctlq deinit");
+ }
+
+ return 0;
+}
+
+clib_error_t *
+idpf_vc_query_ptype_info (vlib_main_t *vm, idpf_device_t *id)
+{
+ virtchnl2_get_ptype_info_t ptype_info;
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+
+ ptype_info.start_ptype_id = 0;
+ ptype_info.num_ptypes = IDPF_MAX_PKT_TYPE;
+ args.ops = VIRTCHNL2_OP_GET_PTYPE_INFO;
+ args.in_args = (u8 *) &ptype_info;
+ args.in_args_size = sizeof (virtchnl2_get_ptype_info_t);
+ args.out_buffer = NULL;
+ args.out_size = 0;
+
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command VIRTCHNL2_OP_GET_PTYPE_INFO");
+
+ return error;
+}
+
+clib_error_t *
+idpf_get_pkt_type (vlib_main_t *vm, idpf_device_t *id)
+{
+ virtchnl2_get_ptype_info_t *ptype_info;
+ u16 ptype_recvd = 0, ptype_offset, i, j;
+ clib_error_t *error;
+
+ error = idpf_vc_query_ptype_info (vm, id);
+ if (error != 0)
+ return clib_error_return (0, "Fail to query packet type information");
+
+ ptype_info =
+ clib_mem_alloc_aligned (IDPF_DFLT_MBX_BUF_SIZE, CLIB_CACHE_LINE_BYTES);
+
+ while (ptype_recvd < IDPF_MAX_PKT_TYPE)
+ {
+ error = idpf_read_one_msg (vm, id, VIRTCHNL2_OP_GET_PTYPE_INFO,
+ (u8 *) ptype_info, IDPF_DFLT_MBX_BUF_SIZE);
+ if (error != 0)
+ {
+ error = clib_error_return (0, "Fail to get packet type information");
+ goto free_ptype_info;
+ }
+
+ ptype_recvd += ptype_info->num_ptypes;
+ ptype_offset =
+ sizeof (virtchnl2_get_ptype_info_t) - sizeof (virtchnl2_ptype_t);
+
+ for (i = 0; i < ptype_info->num_ptypes; i++)
+ {
+ bool is_inner = false, is_ip = false;
+ virtchnl2_ptype_t *ptype;
+ u32 proto_hdr = 0;
+
+ ptype = (virtchnl2_ptype_t *) ((u8 *) ptype_info + ptype_offset);
+ ptype_offset += IDPF_GET_PTYPE_SIZE (ptype);
+ if (ptype_offset > IDPF_DFLT_MBX_BUF_SIZE)
+ {
+ error =
+ clib_error_return (0, "Ptype offset exceeds mbx buffer size");
+ goto free_ptype_info;
+ }
+
+ if (ptype->ptype_id_10 == 0xFFFF)
+ goto free_ptype_info;
+
+ for (j = 0; j < ptype->proto_id_count; j++)
+ {
+ switch (ptype->proto_id[j])
+ {
+ case VIRTCHNL2_PROTO_HDR_GRE:
+ case VIRTCHNL2_PROTO_HDR_VXLAN:
+ proto_hdr &= ~IDPF_PTYPE_L4_MASK;
+ proto_hdr |= IDPF_PTYPE_TUNNEL_GRENAT;
+ is_inner = true;
+ break;
+ case VIRTCHNL2_PROTO_HDR_MAC:
+ if (is_inner)
+ {
+ proto_hdr &= ~IDPF_PTYPE_INNER_L2_MASK;
+ proto_hdr |= IDPF_PTYPE_INNER_L2_ETHER;
+ }
+ else
+ {
+ proto_hdr &= ~IDPF_PTYPE_L2_MASK;
+ proto_hdr |= IDPF_PTYPE_L2_ETHER;
+ }
+ break;
+ case VIRTCHNL2_PROTO_HDR_VLAN:
+ if (is_inner)
+ {
+ proto_hdr &= ~IDPF_PTYPE_INNER_L2_MASK;
+ proto_hdr |= IDPF_PTYPE_INNER_L2_ETHER_VLAN;
+ }
+ break;
+ case VIRTCHNL2_PROTO_HDR_PTP:
+ proto_hdr &= ~IDPF_PTYPE_L2_MASK;
+ proto_hdr |= IDPF_PTYPE_L2_ETHER_TIMESYNC;
+ break;
+ case VIRTCHNL2_PROTO_HDR_LLDP:
+ proto_hdr &= ~IDPF_PTYPE_L2_MASK;
+ proto_hdr |= IDPF_PTYPE_L2_ETHER_LLDP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_ARP:
+ proto_hdr &= ~IDPF_PTYPE_L2_MASK;
+ proto_hdr |= IDPF_PTYPE_L2_ETHER_ARP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_PPPOE:
+ proto_hdr &= ~IDPF_PTYPE_L2_MASK;
+ proto_hdr |= IDPF_PTYPE_L2_ETHER_PPPOE;
+ break;
+ case VIRTCHNL2_PROTO_HDR_IPV4:
+ if (!is_ip)
+ {
+ proto_hdr |= IDPF_PTYPE_L3_IPV4_EXT_UNKNOWN;
+ is_ip = true;
+ }
+ else
+ {
+ proto_hdr |= IDPF_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+ IDPF_PTYPE_TUNNEL_IP;
+ is_inner = true;
+ }
+ break;
+ case VIRTCHNL2_PROTO_HDR_IPV6:
+ if (!is_ip)
+ {
+ proto_hdr |= IDPF_PTYPE_L3_IPV6_EXT_UNKNOWN;
+ is_ip = true;
+ }
+ else
+ {
+ proto_hdr |= IDPF_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ IDPF_PTYPE_TUNNEL_IP;
+ is_inner = true;
+ }
+ break;
+ case VIRTCHNL2_PROTO_HDR_IPV4_FRAG:
+ case VIRTCHNL2_PROTO_HDR_IPV6_FRAG:
+ if (is_inner)
+ proto_hdr |= IDPF_PTYPE_INNER_L4_FRAG;
+ else
+ proto_hdr |= IDPF_PTYPE_L4_FRAG;
+ break;
+ case VIRTCHNL2_PROTO_HDR_UDP:
+ if (is_inner)
+ proto_hdr |= IDPF_PTYPE_INNER_L4_UDP;
+ else
+ proto_hdr |= IDPF_PTYPE_L4_UDP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_TCP:
+ if (is_inner)
+ proto_hdr |= IDPF_PTYPE_INNER_L4_TCP;
+ else
+ proto_hdr |= IDPF_PTYPE_L4_TCP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_SCTP:
+ if (is_inner)
+ proto_hdr |= IDPF_PTYPE_INNER_L4_SCTP;
+ else
+ proto_hdr |= IDPF_PTYPE_L4_SCTP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_ICMP:
+ if (is_inner)
+ proto_hdr |= IDPF_PTYPE_INNER_L4_ICMP;
+ else
+ proto_hdr |= IDPF_PTYPE_L4_ICMP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_ICMPV6:
+ if (is_inner)
+ proto_hdr |= IDPF_PTYPE_INNER_L4_ICMP;
+ else
+ proto_hdr |= IDPF_PTYPE_L4_ICMP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_L2TPV2:
+ case VIRTCHNL2_PROTO_HDR_L2TPV2_CONTROL:
+ case VIRTCHNL2_PROTO_HDR_L2TPV3:
+ is_inner = true;
+ proto_hdr |= IDPF_PTYPE_TUNNEL_L2TP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_NVGRE:
+ is_inner = true;
+ proto_hdr |= IDPF_PTYPE_TUNNEL_NVGRE;
+ break;
+ case VIRTCHNL2_PROTO_HDR_GTPC_TEID:
+ is_inner = true;
+ proto_hdr |= IDPF_PTYPE_TUNNEL_GTPC;
+ break;
+ case VIRTCHNL2_PROTO_HDR_GTPU:
+ case VIRTCHNL2_PROTO_HDR_GTPU_UL:
+ case VIRTCHNL2_PROTO_HDR_GTPU_DL:
+ is_inner = true;
+ proto_hdr |= IDPF_PTYPE_TUNNEL_GTPU;
+ break;
+ case VIRTCHNL2_PROTO_HDR_PAY:
+ case VIRTCHNL2_PROTO_HDR_IPV6_EH:
+ case VIRTCHNL2_PROTO_HDR_PRE_MAC:
+ case VIRTCHNL2_PROTO_HDR_POST_MAC:
+ case VIRTCHNL2_PROTO_HDR_ETHERTYPE:
+ case VIRTCHNL2_PROTO_HDR_SVLAN:
+ case VIRTCHNL2_PROTO_HDR_CVLAN:
+ case VIRTCHNL2_PROTO_HDR_MPLS:
+ case VIRTCHNL2_PROTO_HDR_MMPLS:
+ case VIRTCHNL2_PROTO_HDR_CTRL:
+ case VIRTCHNL2_PROTO_HDR_ECP:
+ case VIRTCHNL2_PROTO_HDR_EAPOL:
+ case VIRTCHNL2_PROTO_HDR_PPPOD:
+ case VIRTCHNL2_PROTO_HDR_IGMP:
+ case VIRTCHNL2_PROTO_HDR_AH:
+ case VIRTCHNL2_PROTO_HDR_ESP:
+ case VIRTCHNL2_PROTO_HDR_IKE:
+ case VIRTCHNL2_PROTO_HDR_NATT_KEEP:
+ case VIRTCHNL2_PROTO_HDR_GTP:
+ case VIRTCHNL2_PROTO_HDR_GTP_EH:
+ case VIRTCHNL2_PROTO_HDR_GTPCV2:
+ case VIRTCHNL2_PROTO_HDR_ECPRI:
+ case VIRTCHNL2_PROTO_HDR_VRRP:
+ case VIRTCHNL2_PROTO_HDR_OSPF:
+ case VIRTCHNL2_PROTO_HDR_TUN:
+ case VIRTCHNL2_PROTO_HDR_VXLAN_GPE:
+ case VIRTCHNL2_PROTO_HDR_GENEVE:
+ case VIRTCHNL2_PROTO_HDR_NSH:
+ case VIRTCHNL2_PROTO_HDR_QUIC:
+ case VIRTCHNL2_PROTO_HDR_PFCP:
+ case VIRTCHNL2_PROTO_HDR_PFCP_NODE:
+ case VIRTCHNL2_PROTO_HDR_PFCP_SESSION:
+ case VIRTCHNL2_PROTO_HDR_RTP:
+ case VIRTCHNL2_PROTO_HDR_NO_PROTO:
+ default:
+ continue;
+ }
+ id->ptype_tbl[ptype->ptype_id_10] = proto_hdr;
+ }
+ }
+ }
+
+free_ptype_info:
+ clib_mem_free (ptype_info);
+ clear_cmd (id);
+ return error;
+}
+
+static void
+idpf_reset_pf (idpf_device_t *id)
+{
+ u32 reg;
+
+ reg = idpf_reg_read (id, PFGEN_CTRL);
+ idpf_reg_write (id, PFGEN_CTRL, (reg | PFGEN_CTRL_PFSWR));
+}
+
+#define IDPF_RESET_WAIT_CNT 100
+clib_error_t *
+idpf_check_pf_reset_done (vlib_main_t *vm, idpf_device_t *id)
+{
+ u32 reg;
+ int i;
+
+ for (i = 0; i < IDPF_RESET_WAIT_CNT; i++)
+ {
+ reg = idpf_reg_read (id, PFGEN_RSTAT);
+ if (reg != 0xFFFFFFFF && (reg & PFGEN_RSTAT_PFR_STATE_M))
+ return 0;
+ vlib_process_suspend (vm, 1.0);
+ }
+
+ return clib_error_return (0, "pf reset time out");
+}
+
+void
+idpf_init_vport_req_info (idpf_device_t *id,
+ virtchnl2_create_vport_t *vport_info)
+{
+ vport_info->vport_type = VIRTCHNL2_VPORT_TYPE_DEFAULT;
+ if (id->txq_model == 1)
+ {
+ vport_info->txq_model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+ vport_info->num_tx_q = IDPF_DEFAULT_TXQ_NUM;
+ vport_info->num_tx_complq =
+ IDPF_DEFAULT_TXQ_NUM * IDPF_TX_COMPLQ_PER_GRP;
+ }
+ else
+ {
+ vport_info->txq_model = VIRTCHNL2_QUEUE_MODEL_SINGLE;
+ vport_info->num_tx_q = IDPF_DEFAULT_TXQ_NUM;
+ vport_info->num_tx_complq = 0;
+ }
+ if (id->rxq_model == 1)
+ {
+ vport_info->rxq_model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+ vport_info->num_rx_q = IDPF_DEFAULT_RXQ_NUM;
+ vport_info->num_rx_bufq = IDPF_DEFAULT_RXQ_NUM * IDPF_RX_BUFQ_PER_GRP;
+ }
+ else
+ {
+ vport_info->rxq_model = VIRTCHNL2_QUEUE_MODEL_SINGLE;
+ vport_info->num_rx_q = IDPF_DEFAULT_RXQ_NUM;
+ vport_info->num_rx_bufq = 0;
+ }
+
+ return;
+}
+
+clib_error_t *
+idpf_vc_create_vport (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ virtchnl2_create_vport_t *vport_req_info)
+{
+ virtchnl2_create_vport_t vport_msg = { 0 };
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+
+ vport_msg.vport_type = vport_req_info->vport_type;
+ vport_msg.txq_model = vport_req_info->txq_model;
+ vport_msg.rxq_model = vport_req_info->rxq_model;
+ vport_msg.num_tx_q = vport_req_info->num_tx_q;
+ vport_msg.num_tx_complq = vport_req_info->num_tx_complq;
+ vport_msg.num_rx_q = vport_req_info->num_rx_q;
+ vport_msg.num_rx_bufq = vport_req_info->num_rx_bufq;
+
+ clib_memset (&args, 0, sizeof (args));
+ args.ops = VIRTCHNL2_OP_CREATE_VPORT;
+ args.in_args = (u8 *) &vport_msg;
+ args.in_args_size = sizeof (vport_msg);
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command of VIRTCHNL2_OP_CREATE_VPORT");
+
+ clib_memcpy (vport->vport_info, args.out_buffer, IDPF_DFLT_MBX_BUF_SIZE);
+ return error;
+}
+
+clib_error_t *
+idpf_vc_destroy_vport (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport)
+{
+ virtchnl2_vport_t vc_vport;
+ idpf_cmd_info_t args;
+ clib_error_t *error = 0;
+
+ vc_vport.vport_id = vport->vport_id;
+
+ clib_memset (&args, 0, sizeof (args));
+ args.ops = VIRTCHNL2_OP_DESTROY_VPORT;
+ args.in_args = (u8 *) &vc_vport;
+ args.in_args_size = sizeof (vc_vport);
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command of VIRTCHNL2_OP_DESTROY_VPORT");
+
+ return error;
+}
+
+clib_error_t *
+idpf_init_vport (idpf_device_t *id, idpf_vport_t *vport)
+{
+ virtchnl2_create_vport_t *vport_info = vport->vport_info;
+ int i, type;
+
+ vport->vport_id = vport_info->vport_id;
+ vport->txq_model = vport_info->txq_model;
+ vport->rxq_model = vport_info->rxq_model;
+ vport->num_tx_q = vport_info->num_tx_q;
+ vport->num_tx_complq = vport_info->num_tx_complq;
+ vport->num_rx_q = vport_info->num_rx_q;
+ vport->num_rx_bufq = vport_info->num_rx_bufq;
+ vport->max_mtu = vport_info->max_mtu;
+ clib_memcpy (vport->default_mac_addr, vport_info->default_mac_addr,
+ IDPF_ETH_ALEN);
+
+ for (i = 0; i < vport_info->chunks.num_chunks; i++)
+ {
+ type = vport_info->chunks.chunks[i].type;
+ switch (type)
+ {
+ case VIRTCHNL2_QUEUE_TYPE_TX:
+ vport->chunks_info.tx_start_qid =
+ vport_info->chunks.chunks[i].start_queue_id;
+ vport->chunks_info.tx_qtail_start =
+ vport_info->chunks.chunks[i].qtail_reg_start;
+ vport->chunks_info.tx_qtail_spacing =
+ vport_info->chunks.chunks[i].qtail_reg_spacing;
+ break;
+ case VIRTCHNL2_QUEUE_TYPE_RX:
+ vport->chunks_info.rx_start_qid =
+ vport_info->chunks.chunks[i].start_queue_id;
+ vport->chunks_info.rx_qtail_start =
+ vport_info->chunks.chunks[i].qtail_reg_start;
+ vport->chunks_info.rx_qtail_spacing =
+ vport_info->chunks.chunks[i].qtail_reg_spacing;
+ break;
+ case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION:
+ vport->chunks_info.tx_compl_start_qid =
+ vport_info->chunks.chunks[i].start_queue_id;
+ vport->chunks_info.tx_compl_qtail_start =
+ vport_info->chunks.chunks[i].qtail_reg_start;
+ vport->chunks_info.tx_compl_qtail_spacing =
+ vport_info->chunks.chunks[i].qtail_reg_spacing;
+ break;
+ case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER:
+ vport->chunks_info.rx_buf_start_qid =
+ vport_info->chunks.chunks[i].start_queue_id;
+ vport->chunks_info.rx_buf_qtail_start =
+ vport_info->chunks.chunks[i].qtail_reg_start;
+ vport->chunks_info.rx_buf_qtail_spacing =
+ vport_info->chunks.chunks[i].qtail_reg_spacing;
+ break;
+ default:
+ return clib_error_return (0, "Unsupported queue type");
+ }
+ }
+
+ return 0;
+}
+
+clib_error_t *
+idpf_ena_dis_vport (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ bool enable)
+{
+ virtchnl2_vport_t vc_vport;
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+
+ vc_vport.vport_id = vport->vport_id;
+ args.ops = enable ? VIRTCHNL2_OP_ENABLE_VPORT : VIRTCHNL2_OP_DISABLE_VPORT;
+ args.in_args = (u8 *) &vc_vport;
+ args.in_args_size = sizeof (vc_vport);
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ {
+ return clib_error_return (
+ 0, "Failed to execute command of VIRTCHNL2_OP_%s_VPORT",
+ enable ? "ENABLE" : "DISABLE");
+ }
+
+ return error;
+}
+
+clib_error_t *
+idpf_dealloc_vectors (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport)
+{
+ virtchnl2_alloc_vectors_t *alloc_vec;
+ virtchnl2_vector_chunks_t *vcs;
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+ int len;
+
+ alloc_vec = vport->recv_vectors;
+ vcs = &alloc_vec->vchunks;
+
+ len = sizeof (virtchnl2_vector_chunks_t) +
+ (vcs->num_vchunks - 1) * sizeof (virtchnl2_vector_chunk_t);
+
+ args.ops = VIRTCHNL2_OP_DEALLOC_VECTORS;
+ args.in_args = (u8 *) vcs;
+ args.in_args_size = len;
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command VIRTCHNL2_OP_DEALLOC_VECTORS");
+
+ return error;
+}
+
+clib_error_t *
+idpf_dev_vport_init (vlib_main_t *vm, idpf_device_t *id,
+ idpf_vport_param_t *param)
+{
+ idpf_vport_t *vport;
+ virtchnl2_create_vport_t vport_req_info = { 0 };
+ clib_error_t *error = 0;
+
+ vport = clib_mem_alloc (sizeof (idpf_vport_t));
+ clib_memset (vport, 0, sizeof (idpf_vport_t));
+
+ vport->vport_info = clib_mem_alloc (IDPF_DFLT_MBX_BUF_SIZE);
+ clib_memset (vport->vport_info, 0, IDPF_DFLT_MBX_BUF_SIZE);
+
+ id->vports[param->idx] = vport;
+ vport->id = id;
+ vport->idx = param->idx;
+
+ idpf_init_vport_req_info (id, &vport_req_info);
+
+ error = idpf_vc_create_vport (vm, id, vport, &vport_req_info);
+ if (error != 0)
+ {
+ idpf_log_err (id, "Failed to create vport.");
+ goto err_create_vport;
+ }
+
+ error = idpf_init_vport (id, vport);
+ if (error != 0)
+ {
+ idpf_log_err (id, "Failed to init vports.");
+ goto err_init_vport;
+ }
+
+ id->vports[param->idx] = vport;
+
+ clib_memcpy (id->hwaddr, vport->default_mac_addr, IDPF_ETH_ALEN);
+
+ return error;
+
+err_init_vport:
+ id->vports[param->idx] = NULL; /* reset */
+ idpf_vc_destroy_vport (vm, id, vport);
+err_create_vport:
+ clib_mem_free (vport->vport_info);
+ clib_mem_free (vport);
+ return error;
+}
+
+/* dev configure */
+clib_error_t *
+idpf_device_init (vlib_main_t *vm, idpf_main_t *im, idpf_device_t *id,
+ idpf_create_if_args_t *args)
+{
+ idpf_vport_t *vport;
+ idpf_vport_param_t vport_param = { 0 };
+ virtchnl2_get_capabilities_t caps = { 0 };
+ clib_error_t *error;
+ u16 rxq_num, txq_num;
+ int i;
+
+ idpf_reset_pf (id);
+ error = idpf_check_pf_reset_done (vm, id);
+ if (error)
+ return error;
+
+ /*
+ * Init mailbox configuration
+ */
+ if ((error = idpf_mbx_init (vm, id)))
+ return error;
+
+ /*
+ * Check API version
+ */
+ error = idpf_op_version (vm, id);
+ if (error)
+ return error;
+
+ /*
+ * Get pkt type table
+ */
+ error = idpf_get_pkt_type (vm, id);
+ if (error)
+ return error;
+
+ /* Get idpf capability */
+ error = idpf_op_get_caps (vm, id, &caps);
+ if (error)
+ return error;
+
+ rxq_num = args->rxq_num ? args->rxq_num : 1;
+ txq_num = args->txq_num ? args->txq_num : vlib_get_n_threads ();
+
+ /* Sync capabilities */
+ id->n_rx_queues = rxq_num;
+ id->n_tx_queues = txq_num;
+ id->csum_caps = caps.csum_caps;
+ id->seg_caps = caps.seg_caps;
+ id->hsplit_caps = caps.hsplit_caps;
+ id->rsc_caps = caps.rsc_caps;
+ id->rss_caps = caps.rss_caps;
+ id->other_caps = caps.other_caps;
+ id->max_rx_q = caps.max_rx_q;
+ id->max_tx_q = caps.max_tx_q;
+ id->max_rx_bufq = caps.max_rx_bufq;
+ id->max_tx_complq = caps.max_tx_complq;
+ id->max_sriov_vfs = caps.max_sriov_vfs;
+ id->max_vports = caps.max_vports;
+ id->default_num_vports = caps.default_num_vports;
+
+ id->vports = clib_mem_alloc (id->max_vports * sizeof (*id->vports));
+ id->max_rxq_per_msg =
+ (IDPF_DFLT_MBX_BUF_SIZE - sizeof (virtchnl2_config_rx_queues_t)) /
+ sizeof (virtchnl2_rxq_info_t);
+ id->max_txq_per_msg =
+ (IDPF_DFLT_MBX_BUF_SIZE - sizeof (virtchnl2_config_tx_queues_t)) /
+ sizeof (virtchnl2_txq_info_t);
+
+ id->cur_vport_idx = 0;
+ id->cur_vports = 0;
+ id->cur_vport_nb = 0;
+
+ if (!args->rxq_single)
+ id->rxq_model = 1;
+ if (!args->txq_single)
+ id->txq_model = 1;
+
+ /* Init and enable vports */
+ if (args->req_vport_nb == 1)
+ {
+ vport_param.id = id;
+ vport_param.idx = 0;
+ error = idpf_dev_vport_init (vm, id, &vport_param);
+ if (error)
+ return error;
+ vport = id->vports[vport_param.idx];
+ error = idpf_ena_dis_vport (vm, id, vport, true);
+ if (error)
+ return error;
+ id->cur_vports |= 1ULL << vport_param.idx;
+ id->cur_vport_nb++;
+ id->cur_vport_idx++;
+ error = idpf_queue_init (vm, id, vport, args);
+ if (error)
+ return error;
+ }
+ else
+ {
+ for (i = 0; i < args->req_vport_nb; i++)
+ {
+ vport_param.id = id;
+ vport_param.idx = i;
+ if ((error = idpf_dev_vport_init (vm, id, &vport_param)))
+ return error;
+ vport = id->vports[vport_param.idx];
+ error = idpf_ena_dis_vport (vm, id, vport, true);
+ if (error)
+ return error;
+ id->cur_vports |= 1ULL << vport_param.idx;
+ id->cur_vport_nb++;
+ id->cur_vport_idx++;
+ error = idpf_queue_init (vm, id, vport, args);
+ if (error)
+ return error;
+ }
+ }
+
+ id->flags |= IDPF_DEVICE_F_INITIALIZED;
+ return error;
+}
+
+static u32
+idpf_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hw, u32 flags)
+{
+ idpf_device_t *id = idpf_get_device (hw->dev_instance);
+
+ switch (flags)
+ {
+ case ETHERNET_INTERFACE_FLAG_DEFAULT_L3:
+ id->flags &= ~IDPF_DEVICE_F_PROMISC;
+ break;
+ case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
+ id->flags |= IDPF_DEVICE_F_PROMISC;
+ break;
+ default:
+ return ~0;
+ }
+
+ return 0;
+}
+
+void
+idpf_delete_if (vlib_main_t *vm, idpf_device_t *id, int with_barrier)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ idpf_main_t *im = &idpf_main;
+ idpf_vport_t *vport;
+ int i;
+ u32 dev_instance;
+
+ id->flags &= ~IDPF_DEVICE_F_ADMIN_UP;
+
+ if (id->hw_if_index)
+ {
+ if (with_barrier)
+ vlib_worker_thread_barrier_sync (vm);
+ vnet_hw_interface_set_flags (vnm, id->hw_if_index, 0);
+ ethernet_delete_interface (vnm, id->hw_if_index);
+ if (with_barrier)
+ vlib_worker_thread_barrier_release (vm);
+ }
+
+ for (i = 0; i < id->cur_vport_nb; i++)
+ {
+ vport = id->vports[i];
+ if (vport->recv_vectors != NULL)
+ idpf_dealloc_vectors (vm, id, vport);
+ }
+
+ vlib_pci_device_close (vm, id->pci_dev_handle);
+
+ vlib_physmem_free (vm, id->asq);
+ vlib_physmem_free (vm, id->arq);
+
+ for (i = 0; i < id->cur_vport_nb; i++)
+ {
+ vport = id->vports[i];
+ vec_foreach_index (i, vport->rxqs)
+ {
+ idpf_rxq_t *rxq = vec_elt_at_index (vport->rxqs, i);
+ vlib_physmem_free (vm, (void *) rxq->descs);
+ if (rxq->n_enqueued)
+ vlib_buffer_free_from_ring (vm, rxq->bufs, rxq->next, rxq->size,
+ rxq->n_enqueued);
+ vec_free (rxq->bufs);
+ }
+
+ vec_free (vport->rxqs);
+
+ vec_foreach_index (i, vport->txqs)
+ {
+ idpf_txq_t *txq = vec_elt_at_index (vport->txqs, i);
+ vlib_physmem_free (vm, (void *) txq->descs);
+ if (txq->n_enqueued)
+ {
+ u16 first = (txq->next - txq->n_enqueued) & (txq->size - 1);
+ vlib_buffer_free_from_ring (vm, txq->bufs, first, txq->size,
+ txq->n_enqueued);
+ }
+ vec_free (txq->ph_bufs);
+ vec_free (txq->bufs);
+ clib_ring_free (txq->rs_slots);
+ vec_free (txq->tmp_bufs);
+ vec_free (txq->tmp_descs);
+ clib_spinlock_free (&txq->lock);
+ }
+ vec_free (vport->txqs);
+ }
+
+ vec_free (id->name);
+
+ clib_error_free (id->error);
+ dev_instance = id->dev_instance;
+ clib_mem_free (id->mbx_resp);
+ clib_memset (id, 0, sizeof (*id));
+ pool_put_index (im->devices, dev_instance);
+ clib_mem_free (id);
+}
+
+static u8
+idpf_validate_queue_size (idpf_create_if_args_t *args)
+{
+ clib_error_t *error = 0;
+
+ args->rxq_size = (args->rxq_size == 0) ? IDPF_RXQ_SZ : args->rxq_size;
+ args->txq_size = (args->txq_size == 0) ? IDPF_TXQ_SZ : args->txq_size;
+
+ if ((args->rxq_size > IDPF_QUEUE_SZ_MAX) ||
+ (args->txq_size > IDPF_QUEUE_SZ_MAX))
+ {
+ args->rv = VNET_API_ERROR_INVALID_VALUE;
+ args->error = clib_error_return (
+ error, "queue size must not be greater than %u", IDPF_QUEUE_SZ_MAX);
+ return 1;
+ }
+ if ((args->rxq_size < IDPF_QUEUE_SZ_MIN) ||
+ (args->txq_size < IDPF_QUEUE_SZ_MIN))
+ {
+ args->rv = VNET_API_ERROR_INVALID_VALUE;
+ args->error = clib_error_return (
+ error, "queue size must not be smaller than %u", IDPF_QUEUE_SZ_MIN);
+ return 1;
+ }
+ if ((args->rxq_size & (args->rxq_size - 1)) ||
+ (args->txq_size & (args->txq_size - 1)))
+ {
+ args->rv = VNET_API_ERROR_INVALID_VALUE;
+ args->error =
+ clib_error_return (error, "queue size must be a power of two");
+ return 1;
+ }
+ return 0;
+}
+
+void
+idpf_process_one_device (vlib_main_t *vm, idpf_device_t *id, int is_irq)
+{
+ /* placeholder */
+ return;
+}
+
+static uword
+idpf_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ idpf_main_t *im = &idpf_main;
+ uword *event_data = 0, event_type;
+ int enabled = 0, irq;
+ f64 last_run_duration = 0;
+ f64 last_periodic_time = 0;
+ idpf_device_t **dev_pointers = 0;
+ u32 i;
+
+ while (1)
+ {
+ if (enabled)
+ vlib_process_wait_for_event_or_clock (vm, 5.0 - last_run_duration);
+ else
+ vlib_process_wait_for_event (vm);
+
+ event_type = vlib_process_get_events (vm, &event_data);
+ irq = 0;
+
+ switch (event_type)
+ {
+ case ~0:
+ last_periodic_time = vlib_time_now (vm);
+ break;
+ case IDPF_PROCESS_EVENT_START:
+ enabled = 1;
+ break;
+ case IDPF_PROCESS_EVENT_DELETE_IF:
+ for (int i = 0; i < vec_len (event_data); i++)
+ {
+ idpf_device_t *id = idpf_get_device (event_data[i]);
+ idpf_delete_if (vm, id, /* with_barrier */ 1);
+ }
+ if (pool_elts (im->devices) < 1)
+ enabled = 0;
+ break;
+ case IDPF_PROCESS_EVENT_AQ_INT:
+ irq = 1;
+ break;
+
+ default:
+ ASSERT (0);
+ }
+
+ vec_reset_length (event_data);
+
+ if (enabled == 0)
+ continue;
+
+ /* create local list of device pointers as device pool may grow
+ * during suspend */
+ vec_reset_length (dev_pointers);
+
+ pool_foreach_index (i, im->devices)
+ {
+ vec_add1 (dev_pointers, idpf_get_device (i));
+ }
+
+ vec_foreach_index (i, dev_pointers)
+ {
+ idpf_process_one_device (vm, dev_pointers[i], irq);
+ };
+
+ last_run_duration = vlib_time_now (vm) - last_periodic_time;
+ }
+ return 0;
+}
+
+VLIB_REGISTER_NODE (idpf_process_node) = {
+ .function = idpf_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "idpf-process",
+};
+
+void
+idpf_create_if (vlib_main_t *vm, idpf_create_if_args_t *args)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_eth_interface_registration_t eir = {};
+ idpf_main_t *im = &idpf_main;
+ idpf_device_t *id, **idp;
+ vlib_pci_dev_handle_t h;
+ clib_error_t *error = 0;
+ int i, j, v;
+
+ /* check input args */
+ if (idpf_validate_queue_size (args) != 0)
+ return;
+
+ pool_foreach (idp, im->devices)
+ {
+ if ((*idp)->pci_addr.as_u32 == args->addr.as_u32)
+ {
+ args->rv = VNET_API_ERROR_ADDRESS_IN_USE;
+ args->error =
+ clib_error_return (error, "%U: %s", format_vlib_pci_addr,
+ &args->addr, "pci address in use");
+ return;
+ }
+ }
+
+ pool_get (im->devices, idp);
+ idp[0] = id =
+ clib_mem_alloc_aligned (sizeof (idpf_device_t), CLIB_CACHE_LINE_BYTES);
+ clib_memset (id, 0, sizeof (idpf_device_t));
+ id->mbx_resp = clib_mem_alloc (IDPF_DFLT_MBX_BUF_SIZE);
+ id->dev_instance = idp - im->devices;
+ id->per_interface_next_index = ~0;
+ id->name = vec_dup (args->name);
+
+ if ((error =
+ vlib_pci_device_open (vm, &args->addr, idpf_pci_device_ids, &h)))
+ {
+ pool_put (im->devices, idp);
+ clib_mem_free (id);
+ args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+ args->error = clib_error_return (error, "pci-addr %U",
+ format_vlib_pci_addr, &args->addr);
+ return;
+ }
+ id->pci_dev_handle = h;
+ id->pci_addr = args->addr;
+ id->numa_node = vlib_pci_get_numa_node (vm, h);
+
+ vlib_pci_set_private_data (vm, h, id->dev_instance);
+
+ if ((error = vlib_pci_bus_master_enable (vm, h)))
+ goto error;
+
+ if ((error = vlib_pci_map_region (vm, h, 0, &id->bar0)))
+ goto error;
+
+ if (vlib_pci_supports_virtual_addr_dma (vm, h))
+ id->flags |= IDPF_DEVICE_F_VA_DMA;
+
+ if ((error = idpf_device_init (vm, im, id, args)))
+ goto error;
+
+ /* create interface */
+ eir.dev_class_index = idpf_device_class.index;
+ eir.dev_instance = id->dev_instance;
+ eir.address = id->hwaddr;
+ eir.cb.flag_change = idpf_flag_change;
+ id->hw_if_index = vnet_eth_register_interface (vnm, &eir);
+
+ ethernet_set_flags (vnm, id->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
+
+ vnet_sw_interface_t *sw = vnet_get_hw_sw_interface (vnm, id->hw_if_index);
+ args->sw_if_index = id->sw_if_index = sw->sw_if_index;
+
+ vnet_hw_if_set_caps (vnm, id->hw_if_index,
+ VNET_HW_IF_CAP_INT_MODE | VNET_HW_IF_CAP_MAC_FILTER |
+ VNET_HW_IF_CAP_TX_CKSUM | VNET_HW_IF_CAP_TCP_GSO);
+
+ for (v = 0; v < id->cur_vport_nb; v++)
+ {
+ for (j = 0; j < id->n_rx_queues; j++)
+ {
+ u32 qi;
+ i = v * id->n_rx_queues + j;
+ qi = vnet_hw_if_register_rx_queue (vnm, id->hw_if_index, i,
+ VNET_HW_IF_RXQ_THREAD_ANY);
+ id->vports[v]->rxqs[j].queue_index = qi;
+ }
+ for (j = 0; j < id->n_tx_queues; j++)
+ {
+ u32 qi;
+ i = v * id->n_tx_queues + j;
+ qi = vnet_hw_if_register_tx_queue (vnm, id->hw_if_index, i);
+ id->vports[v]->txqs[j].queue_index = qi;
+ }
+ }
+
+ for (v = 0; v < id->cur_vport_nb; v++)
+ for (i = 0; i < vlib_get_n_threads (); i++)
+ {
+ u32 qi = id->vports[v]->txqs[i % id->n_tx_queues].queue_index;
+ vnet_hw_if_tx_queue_assign_thread (vnm, qi, i);
+ }
+
+ vnet_hw_if_update_runtime_data (vnm, id->hw_if_index);
+
+ if (pool_elts (im->devices) == 1)
+ vlib_process_signal_event (vm, idpf_process_node.index,
+ IDPF_PROCESS_EVENT_START, 0);
+
+ return;
+
+error:
+ idpf_delete_if (vm, id, /* with_barrier */ 0);
+ args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+ args->error = clib_error_return (error, "pci-addr %U", format_vlib_pci_addr,
+ &args->addr);
+ idpf_log_err (id, "error: %U", format_clib_error, args->error);
+}
+
+void *
+idpf_alloc_dma_mem (vlib_main_t *vm, idpf_device_t *id, idpf_dma_mem_t *mem,
+ u64 size)
+{
+ void *mz = NULL;
+ vlib_pci_dev_handle_t h = id->pci_dev_handle;
+
+ if (!mem)
+ return NULL;
+
+ /* Fixme */
+ mz = vlib_physmem_alloc_aligned_on_numa (vm, size, CLIB_CACHE_LINE_BYTES,
+ id->numa_node);
+ if (!mz)
+ return NULL;
+ if (vlib_pci_map_dma (vm, h, mz))
+ return NULL;
+
+ mem->size = size;
+ if (id->flags & IDPF_DEVICE_F_VA_DMA)
+ {
+ mem->va = mz;
+ clib_memset (mem->va, 0, size);
+ }
+ else
+ {
+ mem->va = NULL;
+ }
+ mem->pa = idpf_dma_addr (vm, id, mz);
+
+ return mem->va;
+}
+
+void
+idpf_free_dma_mem (idpf_device_t *id, idpf_dma_mem_t *mem)
+{
+ mem->size = 0;
+ mem->va = NULL;
+ mem->pa = 0;
+
+ clib_mem_free (mem);
+}
+
+static clib_error_t *
+idpf_interface_admin_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ idpf_device_t *id = idpf_get_device (hi->dev_instance);
+ uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ if (id->flags & IDPF_DEVICE_F_ERROR)
+ return clib_error_return (0, "device is in error state");
+
+ if (is_up)
+ {
+ vnet_hw_interface_set_flags (vnm, id->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ id->flags |= IDPF_DEVICE_F_ADMIN_UP;
+ }
+ else
+ {
+ vnet_hw_interface_set_flags (vnm, id->hw_if_index, 0);
+ id->flags &= ~IDPF_DEVICE_F_ADMIN_UP;
+ }
+ return 0;
+}
+
+VNET_DEVICE_CLASS (idpf_device_class, ) = {
+ .name = "Infrastructure Data Path Function (IDPF) interface",
+ .format_device_name = format_idpf_device_name,
+ .admin_up_down_function = idpf_interface_admin_up_down,
+};
+
+clib_error_t *
+idpf_init (vlib_main_t *vm)
+{
+ idpf_main_t *im = &idpf_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ vec_validate_aligned (im->per_thread_data, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (idpf_init) = {
+ .runs_after = VLIB_INITS ("pci_bus_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/idpf/format.c b/src/plugins/idpf/format.c
new file mode 100644
index 00000000000..86a4b884286
--- /dev/null
+++ b/src/plugins/idpf/format.c
@@ -0,0 +1,77 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <idpf/idpf.h>
+
+u8 *
+format_idpf_device_name (u8 *s, va_list *args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 i = va_arg (*args, u32);
+ idpf_device_t *id = idpf_get_device (i);
+ vlib_pci_addr_t *addr = vlib_pci_get_addr (vm, id->pci_dev_handle);
+
+ if (id->name)
+ return format (s, "%s", id->name);
+
+ s = format (s, "idpf-%x/%x/%x/%x", addr->domain, addr->bus, addr->slot,
+ addr->function);
+ return s;
+}
+
+u8 *
+format_idpf_device_flags (u8 *s, va_list *args)
+{
+ idpf_device_t *id = va_arg (*args, idpf_device_t *);
+ u8 *t = 0;
+
+#define _(a, b, c) \
+ if (id->flags & (1 << a)) \
+ t = format (t, "%s%s", t ? " " : "", c);
+ foreach_idpf_device_flags
+#undef _
+ s = format (s, "%v", t);
+ vec_free (t);
+ return s;
+}
+
+u8 *
+format_idpf_checksum_cap_flags (u8 *s, va_list *args)
+{
+ u32 flags = va_arg (*args, u32);
+ int not_first = 0;
+
+ char *strs[32] = {
+#define _(a, b, c) [a] = c,
+ foreach_idpf_checksum_cap_flag
+#undef _
+ };
+
+ for (int i = 0; i < 32; i++)
+ {
+ if ((flags & (1 << i)) == 0)
+ continue;
+ if (not_first)
+ s = format (s, " ");
+ if (strs[i])
+ s = format (s, "%s", strs[i]);
+ else
+ s = format (s, "unknown(%u)", i);
+ not_first = 1;
+ }
+ return s;
+}
diff --git a/src/plugins/idpf/idpf.api b/src/plugins/idpf/idpf.api
new file mode 100644
index 00000000000..5d02957ac38
--- /dev/null
+++ b/src/plugins/idpf/idpf.api
@@ -0,0 +1,80 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+option version = "1.0.0";
+import "vnet/interface_types.api";
+
+/** \brief
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param pci_addr - pci address as unsigned 32bit integer:
+ 0-15 domain, 16-23 bus, 24-28 slot, 29-31 function
+ ddddddddddddddddbbbbbbbbsssssfff
+ @param rxq_num - number of receive queues
+ @param rxq_size - receive queue size
+ @param txq_size - transmit queue size
+*/
+
+define idpf_create
+{
+ u32 client_index;
+ u32 context;
+
+ u32 pci_addr;
+ u16 rxq_single;
+ u16 txq_single;
+ u16 rxq_num;
+ u16 txq_num;
+ u16 rxq_size;
+ u16 txq_size;
+ u16 req_vport_nb;
+ option vat_help = "<pci-address> [vport-num <size>] [rx-single <size>] [tx-single <size>] [rxq-num <size>] [txq-num <size>] [rxq-size <size>] [txq-size <size>]";
+};
+
+/** \brief
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+ @param sw_if_index - software index for the new idpf interface
+*/
+
+define idpf_create_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index
+*/
+
+autoreply define idpf_delete
+{
+ u32 client_index;
+ u32 context;
+
+ vl_api_interface_index_t sw_if_index;
+ option vat_help = "<sw_if_index>";
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/idpf/idpf.h b/src/plugins/idpf/idpf.h
new file mode 100644
index 00000000000..0bac575d4b4
--- /dev/null
+++ b/src/plugins/idpf/idpf.h
@@ -0,0 +1,929 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _IDPF_H_
+#define _IDPF_H_
+
+#include <vlib/vlib.h>
+#include <vppinfra/ring.h>
+#include <vlib/unix/unix.h>
+#include <vlib/pci/pci.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
+
+#include <vppinfra/types.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/lock.h>
+
+#include <vlib/log.h>
+#include <vlib/pci/pci.h>
+
+#include <vnet/interface.h>
+
+#include <vnet/devices/devices.h>
+#include <vnet/flow/flow.h>
+
+#include <idpf/virtchnl2.h>
+#include <sys/queue.h>
+
+#define BIT(a) (1UL << (a))
+
+/*
+ * LAN PF register
+ */
+#define MAKEMASK(m, s) ((m) << (s))
+
+/* Receive queues */
+#define PF_QRX_BASE 0x00000000
+#define PF_QRX_TAIL(_QRX) (PF_QRX_BASE + (((_QRX) *0x1000)))
+#define PF_QRX_BUFFQ_BASE 0x03000000
+#define PF_QRX_BUFFQ_TAIL(_QRX) (PF_QRX_BUFFQ_BASE + (((_QRX) *0x1000)))
+
+/* Transmit queues */
+#define PF_QTX_BASE 0x05000000
+#define PF_QTX_COMM_DBELL(_DBQM) (PF_QTX_BASE + ((_DBQM) *0x1000))
+
+/* Control(PF Mailbox) Queue */
+#define PF_FW_BASE 0x08400000
+
+#define PF_FW_ARQBAL (PF_FW_BASE)
+#define PF_FW_ARQBAH (PF_FW_BASE + 0x4)
+#define PF_FW_ARQLEN (PF_FW_BASE + 0x8)
+#define PF_FW_ARQLEN_ARQLEN_S 0
+#define PF_FW_ARQLEN_ARQLEN_M MAKEMASK (0x1FFF, PF_FW_ARQLEN_ARQLEN_S)
+#define PF_FW_ARQLEN_ARQVFE_S 28
+#define PF_FW_ARQLEN_ARQVFE_M BIT (PF_FW_ARQLEN_ARQVFE_S)
+#define PF_FW_ARQLEN_ARQOVFL_S 29
+#define PF_FW_ARQLEN_ARQOVFL_M BIT (PF_FW_ARQLEN_ARQOVFL_S)
+#define PF_FW_ARQLEN_ARQCRIT_S 30
+#define PF_FW_ARQLEN_ARQCRIT_M BIT (PF_FW_ARQLEN_ARQCRIT_S)
+#define PF_FW_ARQLEN_ARQENABLE_S 31
+#define PF_FW_ARQLEN_ARQENABLE_M BIT (PF_FW_ARQLEN_ARQENABLE_S)
+#define PF_FW_ARQH (PF_FW_BASE + 0xC)
+#define PF_FW_ARQH_ARQH_S 0
+#define PF_FW_ARQH_ARQH_M MAKEMASK (0x1FFF, PF_FW_ARQH_ARQH_S)
+#define PF_FW_ARQT (PF_FW_BASE + 0x10)
+
+#define PF_FW_ATQBAL (PF_FW_BASE + 0x14)
+#define PF_FW_ATQBAH (PF_FW_BASE + 0x18)
+#define PF_FW_ATQLEN (PF_FW_BASE + 0x1C)
+#define PF_FW_ATQLEN_ATQLEN_S 0
+#define PF_FW_ATQLEN_ATQLEN_M MAKEMASK (0x3FF, PF_FW_ATQLEN_ATQLEN_S)
+#define PF_FW_ATQLEN_ATQVFE_S 28
+#define PF_FW_ATQLEN_ATQVFE_M BIT (PF_FW_ATQLEN_ATQVFE_S)
+#define PF_FW_ATQLEN_ATQOVFL_S 29
+#define PF_FW_ATQLEN_ATQOVFL_M BIT (PF_FW_ATQLEN_ATQOVFL_S)
+#define PF_FW_ATQLEN_ATQCRIT_S 30
+#define PF_FW_ATQLEN_ATQCRIT_M BIT (PF_FW_ATQLEN_ATQCRIT_S)
+#define PF_FW_ATQLEN_ATQENABLE_S 31
+#define PF_FW_ATQLEN_ATQENABLE_M BIT (PF_FW_ATQLEN_ATQENABLE_S)
+#define PF_FW_ATQH (PF_FW_BASE + 0x20)
+#define PF_FW_ATQH_ATQH_S 0
+#define PF_FW_ATQH_ATQH_M MAKEMASK (0x3FF, PF_FW_ATQH_ATQH_S)
+#define PF_FW_ATQT (PF_FW_BASE + 0x24)
+
+/* Interrupts */
+#define PF_GLINT_BASE 0x08900000
+#define PF_GLINT_DYN_CTL_ITR_INDX_S 3
+#define PF_GLINT_DYN_CTL_ITR_INDX_M MAKEMASK (0x3, PF_GLINT_DYN_CTL_ITR_INDX_S)
+#define PF_GLINT_DYN_CTL_INTERVAL_S 5
+#define PF_GLINT_DYN_CTL_INTERVAL_M BIT (PF_GLINT_DYN_CTL_INTERVAL_S)
+#define PF_GLINT_DYN_CTL_WB_ON_ITR_S 30
+#define PF_GLINT_DYN_CTL_WB_ON_ITR_M BIT (PF_GLINT_DYN_CTL_WB_ON_ITR_S)
+
+/* Generic registers */
+#define PFGEN_RSTAT 0x08407008 /* PFR Status */
+#define PFGEN_RSTAT_PFR_STATE_S 0
+#define PFGEN_RSTAT_PFR_STATE_M MAKEMASK (0x3, PFGEN_RSTAT_PFR_STATE_S)
+#define PFGEN_CTRL 0x0840700C
+#define PFGEN_CTRL_PFSWR BIT (0)
+
+#define IDPF_CTLQ_ID -1
+#define IDPF_CTLQ_LEN 64
+#define IDPF_DFLT_MBX_BUF_SIZE 4096
+
+#define IDPF_MAX_NUM_QUEUES 256
+#define IDPF_MIN_BUF_SIZE 1024
+#define IDPF_MAX_FRAME_SIZE 9728
+#define IDPF_MAX_PKT_TYPE 1024
+#define IDPF_QUEUE_SZ_MAX 4096
+#define IDPF_QUEUE_SZ_MIN 64
+
+#define IDPF_RESET_SUSPEND_TIME 20e-3
+#define IDPF_RESET_MAX_WAIT_TIME 1
+
+#define IDPF_SEND_TO_PF_SUSPEND_TIME 10e-3
+#define IDPF_SEND_TO_PF_MAX_WAIT_TIME 1
+#define IDPF_SEND_TO_PF_MAX_TRY_TIMES 200
+
+#define IDPF_RX_MAX_DESC_IN_CHAIN 5
+
+#define IDPF_MAX_VPORT_NUM 8
+#define IDPF_DFLT_Q_VEC_NUM 1
+#define IDPF_DFLT_INTERVAL 16
+
+#define IDPF_DEFAULT_RXQ_NUM 16
+#define IDPF_DEFAULT_TXQ_NUM 16
+
+#define IDPF_ETH_ALEN 6
+
+#define IDPF_INVALID_VPORT_IDX 0xffff
+#define IDPF_TXQ_PER_GRP 1
+#define IDPF_TX_COMPLQ_PER_GRP 1
+#define IDPF_RXQ_PER_GRP 1
+#define IDPF_RX_BUFQ_PER_GRP 2
+#define IDPF_RX_BUF_STRIDE 64
+
+/* Maximum buffer lengths for all control queue types */
+#define IDPF_CTLQ_MAX_RING_SIZE 1024
+#define IDPF_CTLQ_MAX_BUF_LEN 4096
+
+#define IDPF_HI_DWORD(x) ((u32) ((((x) >> 16) >> 16) & 0xFFFFFFFF))
+#define IDPF_LO_DWORD(x) ((u32) ((x) &0xFFFFFFFF))
+#define IDPF_HI_WORD(x) ((u16) (((x) >> 16) & 0xFFFF))
+#define IDPF_LO_WORD(x) ((u16) ((x) &0xFFFF))
+
+#define IDPF_CTLQ_DESC(R, i) (&(((idpf_ctlq_desc_t *) ((R)->desc_ring.va))[i]))
+
+#define IDPF_CTLQ_DESC_UNUSED(R) \
+ (u16) ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->ring_size) + \
+ (R)->next_to_clean - (R)->next_to_use - 1)
+
+#define IDPF_GET_PTYPE_SIZE(p) \
+ (sizeof (virtchnl2_ptype_t) + \
+ (((p)->proto_id_count ? ((p)->proto_id_count - 1) : 0) * \
+ sizeof ((p)->proto_id[0])))
+
+/* log configuration */
+extern vlib_log_class_registration_t idpf_log;
+extern vlib_log_class_registration_t idpf_stats_log;
+
+#define idpf_log_err(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, idpf_log.class, "%U: " f, \
+ format_vlib_pci_addr, &dev->pci_addr, ##__VA_ARGS__)
+
+#define idpf_log_warn(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_WARNING, idpf_log.class, "%U: " f, \
+ format_vlib_pci_addr, &dev->pci_addr, ##__VA_ARGS__)
+
+#define idpf_log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, idpf_log.class, "%U: " f, \
+ format_vlib_pci_addr, &dev->pci_addr, ##__VA_ARGS__)
+
+#define idpf_stats_log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, idpf_stats_log.class, "%U: " f, \
+ format_vlib_pci_addr, &dev->pci_addr, ##__VA_ARGS__)
+
+/* List handler */
+#ifndef LIST_HEAD_TYPE
+#define LIST_HEAD_TYPE(list_name, type) LIST_HEAD (list_name, type)
+#endif
+
+#ifndef LIST_ENTRY_TYPE
+#define LIST_ENTRY_TYPE(type) LIST_ENTRY (type)
+#endif
+
+#ifndef LIST_FOR_EACH_ENTRY_SAFE
+#define LIST_FOR_EACH_ENTRY_SAFE(pos, temp, head, entry_type, list) \
+ LIST_FOREACH (pos, head, list)
+#endif
+
+#ifndef LIST_FOR_EACH_ENTRY
+#define LIST_FOR_EACH_ENTRY(pos, head, entry_type, list) \
+ LIST_FOREACH (pos, head, list)
+#endif
+
+#define foreach_idpf_device_flags \
+ _ (0, INITIALIZED, "initialized") \
+ _ (1, ERROR, "error") \
+ _ (2, ADMIN_UP, "admin-up") \
+ _ (3, VA_DMA, "vaddr-dma") \
+ _ (4, LINK_UP, "link-up") \
+ _ (6, ELOG, "elog") \
+ _ (7, PROMISC, "promisc") \
+ _ (8, RX_INT, "rx-interrupts") \
+ _ (9, RX_FLOW_OFFLOAD, "rx-flow-offload")
+
+enum
+{
+#define _(a, b, c) IDPF_DEVICE_F_##b = (1 << a),
+ foreach_idpf_device_flags
+#undef _
+};
+
+#define IDPF_PTYPE_UNKNOWN 0x00000000
+#define IDPF_PTYPE_L2_ETHER 0x00000001
+#define IDPF_PTYPE_L2_ETHER_TIMESYNC 0x00000002
+#define IDPF_PTYPE_L2_ETHER_ARP 0x00000003
+#define IDPF_PTYPE_L2_ETHER_LLDP 0x00000004
+#define IDPF_PTYPE_L2_ETHER_NSH 0x00000005
+#define IDPF_PTYPE_L2_ETHER_VLAN 0x00000006
+#define IDPF_PTYPE_L2_ETHER_QINQ 0x00000007
+#define IDPF_PTYPE_L2_ETHER_PPPOE 0x00000008
+#define IDPF_PTYPE_L2_ETHER_FCOE 0x00000009
+#define IDPF_PTYPE_L2_ETHER_MPLS 0x0000000a
+#define IDPF_PTYPE_L2_MASK 0x0000000f
+#define IDPF_PTYPE_L3_IPV4 0x00000010
+#define IDPF_PTYPE_L3_IPV4_EXT 0x00000030
+#define IDPF_PTYPE_L3_IPV6 0x00000040
+#define IDPF_PTYPE_L3_IPV4_EXT_UNKNOWN 0x00000090
+#define IDPF_PTYPE_L3_IPV6_EXT 0x000000c0
+#define IDPF_PTYPE_L3_IPV6_EXT_UNKNOWN 0x000000e0
+#define IDPF_PTYPE_L3_MASK 0x000000f0
+#define IDPF_PTYPE_L4_TCP 0x00000100
+#define IDPF_PTYPE_L4_UDP 0x00000200
+#define IDPF_PTYPE_L4_FRAG 0x00000300
+#define IDPF_PTYPE_L4_SCTP 0x00000400
+#define IDPF_PTYPE_L4_ICMP 0x00000500
+#define IDPF_PTYPE_L4_NONFRAG 0x00000600
+#define IDPF_PTYPE_L4_IGMP 0x00000700
+#define IDPF_PTYPE_L4_MASK 0x00000f00
+#define IDPF_PTYPE_TUNNEL_IP 0x00001000
+#define IDPF_PTYPE_TUNNEL_GRE 0x00002000
+#define IDPF_PTYPE_TUNNEL_VXLAN 0x00003000
+#define IDPF_PTYPE_TUNNEL_NVGRE 0x00004000
+#define IDPF_PTYPE_TUNNEL_GENEVE 0x00005000
+#define IDPF_PTYPE_TUNNEL_GRENAT 0x00006000
+#define IDPF_PTYPE_TUNNEL_GTPC 0x00007000
+#define IDPF_PTYPE_TUNNEL_GTPU 0x00008000
+#define IDPF_PTYPE_TUNNEL_ESP 0x00009000
+#define IDPF_PTYPE_TUNNEL_L2TP 0x0000a000
+#define IDPF_PTYPE_TUNNEL_VXLAN_GPE 0x0000b000
+#define IDPF_PTYPE_TUNNEL_MPLS_IN_GRE 0x0000c000
+#define IDPF_PTYPE_TUNNEL_MPLS_IN_UDP 0x0000d000
+#define IDPF_PTYPE_TUNNEL_MASK 0x0000f000
+#define IDPF_PTYPE_INNER_L2_ETHER 0x00010000
+#define IDPF_PTYPE_INNER_L2_ETHER_VLAN 0x00020000
+#define IDPF_PTYPE_INNER_L2_ETHER_QINQ 0x00030000
+#define IDPF_PTYPE_INNER_L2_MASK 0x000f0000
+#define IDPF_PTYPE_INNER_L3_IPV4 0x00100000
+#define IDPF_PTYPE_INNER_L3_IPV4_EXT 0x00200000
+#define IDPF_PTYPE_INNER_L3_IPV6 0x00300000
+#define IDPF_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000
+#define IDPF_PTYPE_INNER_L3_IPV6_EXT 0x00500000
+#define IDPF_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000
+#define IDPF_PTYPE_INNER_L3_MASK 0x00f00000
+#define IDPF_PTYPE_INNER_L4_TCP 0x01000000
+#define IDPF_PTYPE_INNER_L4_UDP 0x02000000
+#define IDPF_PTYPE_INNER_L4_FRAG 0x03000000
+#define IDPF_PTYPE_INNER_L4_SCTP 0x04000000
+#define IDPF_PTYPE_INNER_L4_ICMP 0x05000000
+#define IDPF_PTYPE_INNER_L4_NONFRAG 0x06000000
+#define IDPF_PTYPE_INNER_L4_MASK 0x0f000000
+#define IDPF_PTYPE_ALL_MASK 0x0fffffff
+
+/* Flags sub-structure
+ * |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 |
+ * |DD |CMP|ERR| * RSV * |FTYPE | *RSV* |RD |VFC|BUF| HOST_ID |
+ */
+/* command flags and offsets */
+#define IDPF_CTLQ_FLAG_DD_S 0
+#define IDPF_CTLQ_FLAG_CMP_S 1
+#define IDPF_CTLQ_FLAG_ERR_S 2
+#define IDPF_CTLQ_FLAG_FTYPE_S 6
+#define IDPF_CTLQ_FLAG_RD_S 10
+#define IDPF_CTLQ_FLAG_VFC_S 11
+#define IDPF_CTLQ_FLAG_BUF_S 12
+#define IDPF_CTLQ_FLAG_HOST_ID_S 13
+
+#define IDPF_CTLQ_FLAG_DD BIT (IDPF_CTLQ_FLAG_DD_S) /* 0x1 */
+#define IDPF_CTLQ_FLAG_CMP BIT (IDPF_CTLQ_FLAG_CMP_S) /* 0x2 */
+#define IDPF_CTLQ_FLAG_ERR BIT (IDPF_CTLQ_FLAG_ERR_S) /* 0x4 */
+#define IDPF_CTLQ_FLAG_FTYPE_VM \
+ BIT (IDPF_CTLQ_FLAG_FTYPE_S) /* 0x40 */
+#define IDPF_CTLQ_FLAG_FTYPE_PF BIT (IDPF_CTLQ_FLAG_FTYPE_S + 1) /* 0x80 */
+#define IDPF_CTLQ_FLAG_RD BIT (IDPF_CTLQ_FLAG_RD_S) /* 0x400 */
+#define IDPF_CTLQ_FLAG_VFC BIT (IDPF_CTLQ_FLAG_VFC_S) /* 0x800 */
+#define IDPF_CTLQ_FLAG_BUF BIT (IDPF_CTLQ_FLAG_BUF_S) /* 0x1000 */
+
+/* Host ID is a special field that has 3b and not a 1b flag */
+#define IDPF_CTLQ_FLAG_HOST_ID_M MAKE_MASK (0x7000UL, IDPF_CTLQ_FLAG_HOST_ID_S)
+
+#define IDPF_FLEX_TXD_QW1_DTYPE_S 0
+#define IDPF_FLEX_TXD_QW1_DTYPE_M MAKEMASK (0x1FUL, IDPF_FLEX_TXD_QW1_DTYPE_S)
+#define IDPF_FLEX_TXD_QW1_CMD_S 5
+#define IDPF_FLEX_TXD_QW1_CMD_M MAKEMASK (0x7FFUL, IDPF_FLEX_TXD_QW1_CMD_S)
+
+typedef struct idpf_vport idpf_vport_t;
+
+typedef volatile struct
+{
+ u64 buf_addr; /* Packet buffer address */
+ struct
+ {
+ u64 cmd_dtype;
+ union
+ {
+ /* DTYPE = IDPF_TX_DESC_DTYPE_FLEX_DATA_(0x03) */
+ u8 raw[4];
+
+ /* DTYPE = IDPF_TX_DESC_DTYPE_FLEX_TSYN_L2TAG1 (0x06) */
+ struct
+ {
+ u16 l2tag1;
+ u8 flex;
+ u8 tsync;
+ } tsync;
+
+ /* DTYPE=IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2 (0x07) */
+ struct
+ {
+ u16 l2tag1;
+ u16 l2tag2;
+ } l2tags;
+ } flex;
+ u16 buf_size;
+ } qw1;
+} idpf_flex_tx_desc_t;
+
+typedef struct
+{
+ union
+ {
+ u64 qword[2];
+ };
+} idpf_tx_desc_t;
+
+STATIC_ASSERT_SIZEOF (idpf_tx_desc_t, 16);
+
+typedef struct idpf_rxq
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ volatile u32 *qrx_tail;
+ u16 next;
+ u16 size;
+ virtchnl2_rx_desc_t *descs;
+ u32 *bufs;
+ u16 n_enqueued;
+ u8 int_mode;
+ u8 buffer_pool_index;
+ u32 queue_index;
+
+ struct idpf_rxq *bufq1;
+ struct idpf_rxq *bufq2;
+} idpf_rxq_t;
+
+typedef struct idpf_txq
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ volatile u32 *qtx_tail;
+ u16 next;
+ u16 size;
+ u32 *ph_bufs;
+ clib_spinlock_t lock;
+ idpf_tx_desc_t *descs;
+ u32 *bufs;
+ u16 n_enqueued;
+ u16 *rs_slots;
+
+ idpf_tx_desc_t *tmp_descs;
+ u32 *tmp_bufs;
+ u32 queue_index;
+
+ struct idpf_txq *complq;
+} idpf_txq_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 flags;
+ u32 per_interface_next_index;
+ u32 cmd_retval;
+ u8 *mbx_resp;
+ virtchnl2_op_t pend_cmd;
+
+ u32 dev_instance;
+ u32 sw_if_index;
+ u32 hw_if_index;
+ vlib_pci_dev_handle_t pci_dev_handle;
+ u32 numa_node;
+ void *bar0;
+ u8 *name;
+
+ /* queues */
+ u16 n_tx_queues;
+ u16 n_rx_queues;
+ u32 txq_model;
+ u32 rxq_model;
+
+ u16 vsi_id;
+ u8 hwaddr[6];
+ u16 max_mtu;
+ vlib_pci_addr_t pci_addr;
+
+ /* error */
+ clib_error_t *error;
+
+ /* hw info */
+ u8 *hw_addr;
+ u64 hw_addr_len;
+
+ /* control queue - send and receive */
+ struct idpf_ctlq_info *asq;
+ struct idpf_ctlq_info *arq;
+
+ /* pci info */
+ u16 device_id;
+ u16 vendor_id;
+ u16 subsystem_device_id;
+ u16 subsystem_vendor_id;
+
+ /* max config queue number per vc message */
+ u32 max_rxq_per_msg;
+ u32 max_txq_per_msg;
+
+ /* vport info */
+ idpf_vport_t **vports;
+ u16 max_vport_nb;
+ u16 req_vports[IDPF_MAX_VPORT_NUM];
+ u16 req_vport_nb;
+ u16 cur_vports;
+ u16 cur_vport_nb;
+ u16 cur_vport_idx;
+
+ u32 ptype_tbl[IDPF_MAX_PKT_TYPE];
+
+ /* device capability */
+ u32 csum_caps;
+ u32 seg_caps;
+ u32 hsplit_caps;
+ u32 rsc_caps;
+ u64 rss_caps;
+ u64 other_caps;
+
+ u16 max_rx_q;
+ u16 max_tx_q;
+ u16 max_rx_bufq;
+ u16 max_tx_complq;
+ u16 max_sriov_vfs;
+ u16 max_vports;
+ u16 default_num_vports;
+
+ u32 device_type;
+
+ LIST_HEAD_TYPE (list_head, idpf_ctlq_info) cq_list_head;
+} idpf_device_t;
+
+/* memory allocation tracking */
+typedef struct
+{
+ void *va;
+ u64 pa;
+ u32 size;
+} idpf_dma_mem_t;
+
+/* Message type read in virtual channel from PF */
+typedef enum
+{
+ IDPF_MSG_ERR = -1, /* Meet error when accessing admin queue */
+ IDPF_MSG_NON, /* Read nothing from admin queue */
+ IDPF_MSG_SYS, /* Read system msg from admin queue */
+ IDPF_MSG_CMD, /* Read async command result */
+} idpf_vc_result_t;
+
+typedef struct
+{
+ u32 tx_start_qid;
+ u32 rx_start_qid;
+ u32 tx_compl_start_qid;
+ u32 rx_buf_start_qid;
+
+ u64 tx_qtail_start;
+ u32 tx_qtail_spacing;
+ u64 rx_qtail_start;
+ u32 rx_qtail_spacing;
+ u64 tx_compl_qtail_start;
+ u32 tx_compl_qtail_spacing;
+ u64 rx_buf_qtail_start;
+ u32 rx_buf_qtail_spacing;
+} idpf_chunks_info_t;
+
+typedef struct
+{
+ u32 ops;
+ u8 *in_args; /* buffer for sending */
+ u32 in_args_size; /* buffer size for sending */
+ u8 *out_buffer; /* buffer for response */
+ u32 out_size; /* buffer size for response */
+} idpf_cmd_info_t;
+
+typedef struct
+{
+ idpf_device_t *id;
+ u16 idx;
+} idpf_vport_param_t;
+
+struct idpf_vport
+{
+ idpf_device_t *id;
+ virtchnl2_create_vport_t *vport_info;
+ u16 idx;
+ u16 vport_id;
+ u32 txq_model;
+ u32 rxq_model;
+ u32 num_tx_q;
+ idpf_txq_t *txqs;
+ u16 num_tx_complq;
+ u16 num_rx_q;
+ idpf_rxq_t *rxqs;
+ u16 num_rx_bufq;
+
+ u16 max_mtu;
+ u8 default_mac_addr[VIRTCHNL2_ETH_LENGTH_OF_ADDRESS];
+
+ u16 max_pkt_len; /* Maximum packet length */
+
+ /* MSIX info*/
+ virtchnl2_queue_vector_t *qv_map; /* queue vector mapping */
+ u16 max_vectors;
+ virtchnl2_alloc_vectors_t *recv_vectors;
+
+ /* Chunk info */
+ idpf_chunks_info_t chunks_info;
+
+ virtchnl2_vport_stats_t eth_stats_offset;
+};
+
+#define IDPF_RX_VECTOR_SZ VLIB_FRAME_SIZE
+
+typedef enum
+{
+ IDPF_PROCESS_REQ_ADD_DEL_ETH_ADDR = 1,
+ IDPF_PROCESS_REQ_CONFIG_PROMISC_MDDE = 2,
+ IDPF_PROCESS_REQ_PROGRAM_FLOW = 3,
+} idpf_process_req_type_t;
+
+typedef struct
+{
+ idpf_process_req_type_t type;
+ u32 dev_instance;
+ u32 calling_process_index;
+ u8 eth_addr[6];
+ int is_add, is_enable;
+
+ /* below parameters are used for 'program flow' event */
+ u8 *rule;
+ u32 rule_len;
+ u8 *program_status;
+ u32 status_len;
+
+ clib_error_t *error;
+} idpf_process_req_t;
+
+typedef struct
+{
+ u64 qw1s[IDPF_RX_MAX_DESC_IN_CHAIN - 1];
+ u32 buffers[IDPF_RX_MAX_DESC_IN_CHAIN - 1];
+} idpf_rx_tail_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vlib_buffer_t *bufs[IDPF_RX_VECTOR_SZ];
+ u16 next[IDPF_RX_VECTOR_SZ];
+ u64 qw1s[IDPF_RX_VECTOR_SZ];
+ u32 flow_ids[IDPF_RX_VECTOR_SZ];
+ idpf_rx_tail_t tails[IDPF_RX_VECTOR_SZ];
+ vlib_buffer_t buffer_template;
+} idpf_per_thread_data_t;
+
+typedef struct
+{
+ u16 msg_id_base;
+
+ idpf_device_t **devices;
+ idpf_per_thread_data_t *per_thread_data;
+} idpf_main_t;
+
+extern idpf_main_t idpf_main;
+
+typedef struct
+{
+ vlib_pci_addr_t addr;
+ u8 *name;
+ u16 rxq_single;
+ u16 txq_single;
+ u16 rxq_num;
+ u16 txq_num;
+ u16 req_vport_nb;
+ u16 rxq_size;
+ u16 txq_size;
+ int rv;
+ u32 sw_if_index;
+ clib_error_t *error;
+} idpf_create_if_args_t;
+
+void idpf_create_if (vlib_main_t *vm, idpf_create_if_args_t *args);
+
+extern vlib_node_registration_t idpf_process_node;
+extern vnet_device_class_t idpf_device_class;
+
+/* format.c */
+format_function_t format_idpf_device_name;
+format_function_t format_idpf_device_flags;
+
+static inline void
+clear_cmd (idpf_device_t *id)
+{
+ /* Return value may be checked in anither thread, need to ensure the
+ * coherence. */
+ CLIB_MEMORY_BARRIER ();
+ id->pend_cmd = VIRTCHNL2_OP_UNKNOWN;
+ id->cmd_retval = VIRTCHNL2_STATUS_SUCCESS;
+}
+
+static_always_inline idpf_device_t *
+idpf_get_device (u32 dev_instance)
+{
+ return pool_elt_at_index (idpf_main.devices, dev_instance)[0];
+}
+
+static inline void
+idpf_reg_write (idpf_device_t *id, u32 addr, u32 val)
+{
+ *(volatile u32 *) ((u8 *) id->bar0 + addr) = val;
+}
+
+static inline u32
+idpf_reg_read (idpf_device_t *id, u32 addr)
+{
+ u32 val = *(volatile u32 *) (id->bar0 + addr);
+ return val;
+}
+
+static inline void
+idpf_reg_flush (idpf_device_t *id)
+{
+ idpf_reg_read (id, PFGEN_RSTAT);
+ asm volatile("" ::: "memory");
+}
+
+typedef struct
+{
+ u16 qid;
+ u16 next_index;
+ u32 hw_if_index;
+ u32 flow_id;
+ u64 qw1s[IDPF_RX_MAX_DESC_IN_CHAIN];
+} idpf_input_trace_t;
+
+/* Error Codes */
+/* Linux kernel driver can't directly use these. Instead, they are mapped to
+ * linux compatible error codes which get translated in the build script.
+ */
+#define IDPF_SUCCESS 0
+#define IDPF_ERR_PARAM -53 /* -EBADR */
+#define IDPF_ERR_NOT_IMPL -95 /* -EOPNOTSUPP */
+#define IDPF_ERR_NOT_READY -16 /* -EBUSY */
+#define IDPF_ERR_BAD_PTR -14 /* -EFAULT */
+#define IDPF_ERR_INVAL_SIZE -90 /* -EMSGSIZE */
+#define IDPF_ERR_DEVICE_NOT_SUPPORTED -19 /* -ENODEV */
+#define IDPF_ERR_FW_API_VER -13 /* -EACCESS */
+#define IDPF_ERR_NO_MEMORY -12 /* -ENOMEM */
+#define IDPF_ERR_CFG -22 /* -EINVAL */
+#define IDPF_ERR_OUT_OF_RANGE -34 /* -ERANGE */
+#define IDPF_ERR_ALREADY_EXISTS -17 /* -EEXIST */
+#define IDPF_ERR_DOES_NOT_EXIST -6 /* -ENXIO */
+#define IDPF_ERR_IN_USE -114 /* -EALREADY */
+#define IDPF_ERR_MAX_LIMIT -109 /* -ETOOMANYREFS */
+#define IDPF_ERR_RESET_ONGOING -104 /* -ECONNRESET */
+
+/* CRQ/CSQ specific error codes */
+#define IDPF_ERR_CTLQ_ERROR -74 /* -EBADMSG */
+#define IDPF_ERR_CTLQ_TIMEOUT -110 /* -ETIMEDOUT */
+#define IDPF_ERR_CTLQ_FULL -28 /* -ENOSPC */
+#define IDPF_ERR_CTLQ_NO_WORK -42 /* -ENOMSG */
+#define IDPF_ERR_CTLQ_EMPTY -105 /* -ENOBUFS */
+
+/* Used for queue init, response and events */
+typedef enum
+{
+ IDPF_CTLQ_TYPE_MAILBOX_TX = 0,
+ IDPF_CTLQ_TYPE_MAILBOX_RX = 1,
+ IDPF_CTLQ_TYPE_CONFIG_TX = 2,
+ IDPF_CTLQ_TYPE_CONFIG_RX = 3,
+ IDPF_CTLQ_TYPE_EVENT_RX = 4,
+ IDPF_CTLQ_TYPE_RDMA_TX = 5,
+ IDPF_CTLQ_TYPE_RDMA_RX = 6,
+ IDPF_CTLQ_TYPE_RDMA_COMPL = 7
+} idpf_ctlq_type_t;
+
+typedef enum
+{
+ IDPF_PROCESS_EVENT_START = 1,
+ IDPF_PROCESS_EVENT_DELETE_IF = 2,
+ IDPF_PROCESS_EVENT_AQ_INT = 3,
+ IDPF_PROCESS_EVENT_REQ = 4,
+} idpf_process_event_t;
+
+/*
+ * Generic Control Queue Structures
+ */
+typedef struct
+{
+ /* used for queue tracking */
+ u32 head;
+ u32 tail;
+ /* Below applies only to default mb (if present) */
+ u32 len;
+ u32 bah;
+ u32 bal;
+ u32 len_mask;
+ u32 len_ena_mask;
+ u32 head_mask;
+} idpf_ctlq_reg_t;
+
+/* Generic queue msg structure */
+typedef struct
+{
+ u8 vmvf_type; /* represents the source of the message on recv */
+#define IDPF_VMVF_TYPE_VF 0
+#define IDPF_VMVF_TYPE_VM 1
+#define IDPF_VMVF_TYPE_PF 2
+ u8 host_id;
+ /* 3b field used only when sending a message to peer - to be used in
+ * combination with target func_id to route the message
+ */
+#define IDPF_HOST_ID_MASK 0x7
+
+ u16 opcode;
+ u16 data_len; /* data_len = 0 when no payload is attached */
+ union
+ {
+ u16 func_id; /* when sending a message */
+ u16 status; /* when receiving a message */
+ };
+ union
+ {
+ struct
+ {
+ u32 chnl_retval;
+ u32 chnl_opcode;
+ } mbx;
+ u64 cookie;
+ } cookie;
+ union
+ {
+#define IDPF_DIRECT_CTX_SIZE 16
+#define IDPF_INDIRECT_CTX_SIZE 8
+ /* 16 bytes of context can be provided or 8 bytes of context
+ * plus the address of a DMA buffer
+ */
+ u8 direct[IDPF_DIRECT_CTX_SIZE];
+ struct
+ {
+ u8 context[IDPF_INDIRECT_CTX_SIZE];
+ idpf_dma_mem_t *payload;
+ } indirect;
+ } ctx;
+} idpf_ctlq_msg_t;
+
+/* Generic queue info structures */
+/* MB, CONFIG and EVENT q do not have extended info */
+typedef struct
+{
+ idpf_ctlq_type_t type;
+ int id; /* absolute queue offset passed as input
+ * -1 for default mailbox if present
+ */
+ u16 len; /* Queue length passed as input */
+ u16 buf_size; /* buffer size passed as input */
+ u64 base_address; /* output, HPA of the Queue start */
+ idpf_ctlq_reg_t reg; /* registers accessed by ctlqs */
+
+ int ext_info_size;
+ void *ext_info; /* Specific to q type */
+} idpf_ctlq_create_info_t;
+
+/* Control Queue information */
+typedef struct idpf_ctlq_info
+{
+ LIST_ENTRY_TYPE (idpf_ctlq_info) cq_list;
+
+ idpf_ctlq_type_t cq_type;
+ int q_id;
+ clib_spinlock_t cq_lock; /* queue lock */
+
+ /* used for interrupt processing */
+ u16 next_to_use;
+ u16 next_to_clean;
+ u16 next_to_post;
+
+ idpf_dma_mem_t desc_ring; /* descriptor ring memory */
+
+ union
+ {
+ idpf_dma_mem_t **rx_buff;
+ idpf_ctlq_msg_t **tx_msg;
+ } bi;
+
+ u16 buf_size; /* queue buffer size */
+ u16 ring_size; /* Number of descriptors */
+ idpf_ctlq_reg_t reg; /* registers accessed by ctlqs */
+} idpf_ctlq_info_t;
+
+/* PF/VF mailbox commands */
+enum idpf_mbx_opc
+{
+ /* idpf_mbq_opc_send_msg_to_pf:
+ * usage: used by PF or VF to send a message to its CPF
+ * target: RX queue and function ID of parent PF taken from HW
+ */
+ idpf_mbq_opc_send_msg_to_pf = 0x0801,
+
+ /* idpf_mbq_opc_send_msg_to_vf:
+ * usage: used by PF to send message to a VF
+ * target: VF control queue ID must be specified in descriptor
+ */
+ idpf_mbq_opc_send_msg_to_vf = 0x0802,
+
+ /* idpf_mbq_opc_send_msg_to_peer_pf:
+ * usage: used by any function to send message to any peer PF
+ * target: RX queue and host of parent PF taken from HW
+ */
+ idpf_mbq_opc_send_msg_to_peer_pf = 0x0803,
+
+ /* idpf_mbq_opc_send_msg_to_peer_drv:
+ * usage: used by any function to send message to any peer driver
+ * target: RX queue and target host must be specific in descriptor
+ */
+ idpf_mbq_opc_send_msg_to_peer_drv = 0x0804,
+};
+
+typedef struct
+{
+ u16 flags;
+ u16 opcode;
+ u16 datalen; /* 0 for direct commands */
+ union
+ {
+ u16 ret_val;
+ u16 pfid_vfid;
+ };
+ u32 cookie_high;
+ u32 cookie_low;
+ union
+ {
+ struct
+ {
+ u32 param0;
+ u32 param1;
+ u32 param2;
+ u32 param3;
+ } direct;
+ struct
+ {
+ u32 param0;
+ u32 param1;
+ u32 addr_high;
+ u32 addr_low;
+ } indirect;
+ u8 raw[16];
+ } params;
+} idpf_ctlq_desc_t;
+
+int idpf_ctlq_init (vlib_main_t *vm, idpf_device_t *id, u8 num_q,
+ idpf_ctlq_create_info_t *q_info);
+int idpf_ctlq_add (vlib_main_t *vm, idpf_device_t *id,
+ idpf_ctlq_create_info_t *qinfo, struct idpf_ctlq_info **cq);
+void idpf_ctlq_remove (idpf_device_t *id, struct idpf_ctlq_info *cq);
+int idpf_ctlq_send (idpf_device_t *id, struct idpf_ctlq_info *cq,
+ u16 num_q_msg, idpf_ctlq_msg_t q_msg[]);
+int idpf_ctlq_recv (struct idpf_ctlq_info *cq, u16 *num_q_msg,
+ idpf_ctlq_msg_t *q_msg);
+int idpf_ctlq_clean_sq (struct idpf_ctlq_info *cq, u16 *clean_count,
+ idpf_ctlq_msg_t *msg_status[]);
+int idpf_ctlq_post_rx_buffs (idpf_device_t *id, struct idpf_ctlq_info *cq,
+ u16 *buff_count, idpf_dma_mem_t **buffs);
+void idpf_ctlq_deinit (idpf_device_t *id);
+int idpf_ctlq_alloc_ring_res (vlib_main_t *vm, idpf_device_t *id,
+ struct idpf_ctlq_info *cq);
+void idpf_ctlq_dealloc_ring_res (idpf_device_t *id, struct idpf_ctlq_info *cq);
+void *idpf_alloc_dma_mem (vlib_main_t *vm, idpf_device_t *id,
+ idpf_dma_mem_t *mem, u64 size);
+void idpf_free_dma_mem (idpf_device_t *id, idpf_dma_mem_t *mem);
+
+#endif /* IDPF_H */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/idpf/idpf_api.c b/src/plugins/idpf/idpf_api.c
new file mode 100644
index 00000000000..8ca78e62dc0
--- /dev/null
+++ b/src/plugins/idpf/idpf_api.c
@@ -0,0 +1,111 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/pci/pci.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <idpf/idpf.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+/* define message IDs */
+#include <idpf/idpf.api_enum.h>
+#include <idpf/idpf.api_types.h>
+
+#define REPLY_MSG_ID_BASE (im->msg_id_base)
+#include <vlibapi/api_helper_macros.h>
+
+static void
+vl_api_idpf_create_t_handler (vl_api_idpf_create_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ idpf_main_t *im = &idpf_main;
+ vl_api_idpf_create_reply_t *rmp;
+ idpf_create_if_args_t args;
+ int rv;
+
+ clib_memset (&args, 0, sizeof (idpf_create_if_args_t));
+
+ args.addr.as_u32 = ntohl (mp->pci_addr);
+ args.rxq_single = ntohs (mp->rxq_single);
+ args.txq_single = ntohs (mp->txq_single);
+ args.rxq_num = ntohs (mp->rxq_num);
+ args.txq_num = ntohs (mp->txq_num);
+ args.rxq_size = ntohs (mp->rxq_size);
+ args.txq_size = ntohs (mp->txq_size);
+ args.req_vport_nb = ntohs (mp->req_vport_nb);
+
+ idpf_create_if (vm, &args);
+ rv = args.rv;
+
+ REPLY_MACRO2 (VL_API_IDPF_CREATE_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
+}
+
+static void
+vl_api_idpf_delete_t_handler (vl_api_idpf_delete_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ idpf_main_t *im = &idpf_main;
+ vl_api_idpf_delete_reply_t *rmp;
+ vnet_hw_interface_t *hw;
+ int rv = 0;
+
+ hw = vnet_get_sup_hw_interface_api_visible_or_null (vnm,
+ htonl (mp->sw_if_index));
+ if (hw == NULL || idpf_device_class.index != hw->dev_class_index)
+ {
+ rv = VNET_API_ERROR_INVALID_INTERFACE;
+ goto reply;
+ }
+
+ vlib_process_signal_event (vm, idpf_process_node.index,
+ IDPF_PROCESS_EVENT_DELETE_IF, hw->dev_instance);
+
+reply:
+ REPLY_MACRO (VL_API_IDPF_DELETE_REPLY);
+}
+
+/* set tup the API message handling tables */
+#include <idpf/idpf.api.c>
+static clib_error_t *
+idpf_plugin_api_hookup (vlib_main_t *vm)
+{
+ idpf_main_t *ivm = &idpf_main;
+ api_main_t *am = vlibapi_get_main ();
+
+ /* ask for a correctly-sized block of API message decode slots */
+ ivm->msg_id_base = setup_message_id_table ();
+
+ vl_api_set_msg_thread_safe (am, ivm->msg_id_base + VL_API_IDPF_DELETE, 1);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (idpf_plugin_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/idpf/idpf_controlq.c b/src/plugins/idpf/idpf_controlq.c
new file mode 100644
index 00000000000..4887bf71c86
--- /dev/null
+++ b/src/plugins/idpf/idpf_controlq.c
@@ -0,0 +1,890 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <idpf/idpf.h>
+
+/**
+ * idpf_ctlq_alloc_desc_ring - Allocate Control Queue (CQ) rings
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ */
+static int
+idpf_ctlq_alloc_desc_ring (vlib_main_t *vm, idpf_device_t *id,
+ struct idpf_ctlq_info *cq)
+{
+ size_t size = cq->ring_size * sizeof (idpf_ctlq_desc_t);
+
+ /* Fixme: alloc dma va */
+ cq->desc_ring.va = idpf_alloc_dma_mem (vm, id, &cq->desc_ring, size);
+ if (!cq->desc_ring.va)
+ return IDPF_ERR_NO_MEMORY;
+
+ return IDPF_SUCCESS;
+}
+
+/**
+ * idpf_ctlq_alloc_bufs - Allocate Control Queue (CQ) buffers
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Allocate the buffer head for all control queues, and if it's a receive
+ * queue, allocate DMA buffers
+ */
+static int
+idpf_ctlq_alloc_bufs (vlib_main_t *vm, idpf_device_t *id,
+ struct idpf_ctlq_info *cq)
+{
+ int i = 0;
+ u16 len;
+
+ /* Do not allocate DMA buffers for transmit queues */
+ if (cq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_TX)
+ return IDPF_SUCCESS;
+
+ /* We'll be allocating the buffer info memory first, then we can
+ * allocate the mapped buffers for the event processing
+ */
+ len = cq->ring_size * sizeof (idpf_dma_mem_t *);
+ cq->bi.rx_buff = (idpf_dma_mem_t **) clib_mem_alloc (len);
+ if (!cq->bi.rx_buff)
+ return IDPF_ERR_NO_MEMORY;
+ clib_memset (cq->bi.rx_buff, 0, len);
+
+ /* allocate the mapped buffers (except for the last one) */
+ for (i = 0; i < cq->ring_size - 1; i++)
+ {
+ idpf_dma_mem_t *bi;
+ int num = 1; /* number of idpf_dma_mem to be allocated */
+
+ cq->bi.rx_buff[i] =
+ (idpf_dma_mem_t *) clib_mem_alloc (num * sizeof (idpf_dma_mem_t));
+ if (!cq->bi.rx_buff[i])
+ goto unwind_alloc_cq_bufs;
+
+ bi = cq->bi.rx_buff[i];
+
+ bi->va = idpf_alloc_dma_mem (vm, id, bi, cq->buf_size);
+ if (!bi->va)
+ {
+ /* unwind will not free the failed entry */
+ clib_mem_free (cq->bi.rx_buff[i]);
+ goto unwind_alloc_cq_bufs;
+ }
+ }
+
+ return IDPF_SUCCESS;
+
+unwind_alloc_cq_bufs:
+ /* don't try to free the one that failed... */
+ i--;
+ for (; i >= 0; i--)
+ {
+ idpf_free_dma_mem (id, cq->bi.rx_buff[i]);
+ clib_mem_free (cq->bi.rx_buff[i]);
+ }
+ clib_mem_free (cq->bi.rx_buff);
+
+ return IDPF_ERR_NO_MEMORY;
+}
+
+/**
+ * idpf_ctlq_free_desc_ring - Free Control Queue (CQ) rings
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * This assumes the posted send buffers have already been cleaned
+ * and de-allocated
+ */
+static void
+idpf_ctlq_free_desc_ring (idpf_device_t *id, struct idpf_ctlq_info *cq)
+{
+ idpf_free_dma_mem (id, &cq->desc_ring);
+}
+
+/**
+ * idpf_ctlq_free_bufs - Free CQ buffer info elements
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Free the DMA buffers for RX queues, and DMA buffer header for both RX and TX
+ * queues. The upper layers are expected to manage freeing of TX DMA buffers
+ */
+static void
+idpf_ctlq_free_bufs (idpf_device_t *id, struct idpf_ctlq_info *cq)
+{
+ void *bi;
+
+ if (cq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_RX)
+ {
+ int i;
+
+ /* free DMA buffers for rx queues*/
+ for (i = 0; i < cq->ring_size; i++)
+ {
+ if (cq->bi.rx_buff[i])
+ {
+ idpf_free_dma_mem (id, cq->bi.rx_buff[i]);
+ /* Attention */
+ clib_mem_free (cq->bi.rx_buff[i]);
+ }
+ }
+
+ bi = (void *) cq->bi.rx_buff;
+ }
+ else
+ {
+ bi = (void *) cq->bi.tx_msg;
+ }
+
+ /* free the buffer header */
+ clib_mem_free (bi);
+}
+
+/**
+ * idpf_ctlq_dealloc_ring_res - Free memory allocated for control queue
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Free the memory used by the ring, buffers and other related structures
+ */
+void
+idpf_ctlq_dealloc_ring_res (idpf_device_t *id, struct idpf_ctlq_info *cq)
+{
+ /* free ring buffers and the ring itself */
+ idpf_ctlq_free_bufs (id, cq);
+ idpf_ctlq_free_desc_ring (id, cq);
+}
+
+/**
+ * idpf_ctlq_alloc_ring_res - allocate memory for descriptor ring and bufs
+ * @hw: pointer to hw struct
+ * @cq: pointer to control queue struct
+ *
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ */
+int
+idpf_ctlq_alloc_ring_res (vlib_main_t *vm, idpf_device_t *id,
+ struct idpf_ctlq_info *cq)
+{
+ int ret_code;
+
+ /* verify input for valid configuration */
+ if (!cq->ring_size || !cq->buf_size)
+ return IDPF_ERR_CFG;
+
+ /* allocate the ring memory */
+ ret_code = idpf_ctlq_alloc_desc_ring (vm, id, cq);
+ if (ret_code)
+ return ret_code;
+
+ /* allocate buffers in the rings */
+ ret_code = idpf_ctlq_alloc_bufs (vm, id, cq);
+ if (ret_code)
+ goto idpf_init_cq_free_ring;
+
+ /* success! */
+ return IDPF_SUCCESS;
+
+idpf_init_cq_free_ring:
+ idpf_free_dma_mem (id, &cq->desc_ring);
+ return ret_code;
+}
+
+/**
+ * idpf_ctlq_setup_regs - initialize control queue registers
+ * @cq: pointer to the specific control queue
+ * @q_create_info: structs containing info for each queue to be initialized
+ */
+static void
+idpf_ctlq_setup_regs (struct idpf_ctlq_info *cq,
+ idpf_ctlq_create_info_t *q_create_info)
+{
+ /* set head and tail registers in our local struct */
+ cq->reg.head = q_create_info->reg.head;
+ cq->reg.tail = q_create_info->reg.tail;
+ cq->reg.len = q_create_info->reg.len;
+ cq->reg.bah = q_create_info->reg.bah;
+ cq->reg.bal = q_create_info->reg.bal;
+ cq->reg.len_mask = q_create_info->reg.len_mask;
+ cq->reg.len_ena_mask = q_create_info->reg.len_ena_mask;
+ cq->reg.head_mask = q_create_info->reg.head_mask;
+}
+
+/**
+ * idpf_ctlq_init_regs - Initialize control queue registers
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ * @is_rxq: true if receive control queue, false otherwise
+ *
+ * Initialize registers. The caller is expected to have already initialized the
+ * descriptor ring memory and buffer memory
+ */
+static void
+idpf_ctlq_init_regs (vlib_main_t *vm, idpf_device_t *id,
+ struct idpf_ctlq_info *cq, bool is_rxq)
+{
+ /* Update tail to post pre-allocated buffers for rx queues */
+ if (is_rxq)
+ idpf_reg_write (id, cq->reg.tail, (u32) (cq->ring_size - 1));
+
+ /* For non-Mailbox control queues only TAIL need to be set */
+ if (cq->q_id != -1)
+ return;
+
+ /* Clear Head for both send or receive */
+ idpf_reg_write (id, cq->reg.head, 0);
+
+ /* set starting point */
+ idpf_reg_write (id, cq->reg.bal, IDPF_LO_DWORD (cq->desc_ring.pa));
+ idpf_reg_write (id, cq->reg.bah, IDPF_HI_DWORD (cq->desc_ring.pa));
+ idpf_reg_write (id, cq->reg.len, (cq->ring_size | cq->reg.len_ena_mask));
+}
+
+/**
+ * idpf_ctlq_init_rxq_bufs - populate receive queue descriptors with buf
+ * @cq: pointer to the specific Control queue
+ *
+ * Record the address of the receive queue DMA buffers in the descriptors.
+ * The buffers must have been previously allocated.
+ */
+static void
+idpf_ctlq_init_rxq_bufs (struct idpf_ctlq_info *cq)
+{
+ int i = 0;
+
+ for (i = 0; i < cq->ring_size; i++)
+ {
+ idpf_ctlq_desc_t *desc = IDPF_CTLQ_DESC (cq, i);
+ idpf_dma_mem_t *bi = cq->bi.rx_buff[i];
+
+ /* No buffer to post to descriptor, continue */
+ if (!bi)
+ continue;
+
+ desc->flags = IDPF_CTLQ_FLAG_BUF | IDPF_CTLQ_FLAG_RD;
+ desc->opcode = 0;
+ desc->datalen = (u16) bi->size;
+ desc->ret_val = 0;
+ desc->cookie_high = 0;
+ desc->cookie_low = 0;
+ desc->params.indirect.addr_high = IDPF_HI_DWORD (bi->pa);
+ desc->params.indirect.addr_low = IDPF_LO_DWORD (bi->pa);
+ desc->params.indirect.param0 = 0;
+ desc->params.indirect.param1 = 0;
+ }
+}
+
+/**
+ * idpf_ctlq_shutdown - shutdown the CQ
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * The main shutdown routine for any controq queue
+ */
+static void
+idpf_ctlq_shutdown (idpf_device_t *id, struct idpf_ctlq_info *cq)
+{
+ clib_spinlock_init (&cq->cq_lock);
+
+ if (!cq->ring_size)
+ goto shutdown_sq_out;
+
+ /* free ring buffers and the ring itself */
+ idpf_ctlq_dealloc_ring_res (id, cq);
+
+ /* Set ring_size to 0 to indicate uninitialized queue */
+ cq->ring_size = 0;
+
+shutdown_sq_out:
+ clib_spinlock_unlock (&cq->cq_lock);
+ clib_spinlock_free (&cq->cq_lock);
+}
+
+/**
+ * idpf_ctlq_add - add one control queue
+ * @hw: pointer to hardware struct
+ * @qinfo: info for queue to be created
+ * @cq_out: (output) double pointer to control queue to be created
+ *
+ * Allocate and initialize a control queue and add it to the control queue
+ * list. The cq parameter will be allocated/initialized and passed back to the
+ * caller if no errors occur.
+ *
+ * Note: idpf_ctlq_init must be called prior to any calls to idpf_ctlq_add
+ */
+int
+idpf_ctlq_add (vlib_main_t *vm, idpf_device_t *id,
+ idpf_ctlq_create_info_t *qinfo, struct idpf_ctlq_info **cq_out)
+{
+ bool is_rxq = false;
+ int status = IDPF_SUCCESS;
+
+ if (!qinfo->len || !qinfo->buf_size ||
+ qinfo->len > IDPF_CTLQ_MAX_RING_SIZE ||
+ qinfo->buf_size > IDPF_CTLQ_MAX_BUF_LEN)
+ return IDPF_ERR_CFG;
+
+ /* Fixme: memory allocation */
+ *cq_out = vlib_physmem_alloc_aligned_on_numa (
+ vm, sizeof (struct idpf_ctlq_info), CLIB_CACHE_LINE_BYTES, id->numa_node);
+ if (!(*cq_out))
+ return IDPF_ERR_NO_MEMORY;
+
+ if ((vlib_pci_map_dma (vm, id->pci_dev_handle, *cq_out)))
+ {
+ status = IDPF_ERR_NO_MEMORY;
+ goto init_free_q;
+ }
+
+ (*cq_out)->cq_type = qinfo->type;
+ (*cq_out)->q_id = qinfo->id;
+ (*cq_out)->buf_size = qinfo->buf_size;
+ (*cq_out)->ring_size = qinfo->len;
+
+ (*cq_out)->next_to_use = 0;
+ (*cq_out)->next_to_clean = 0;
+ (*cq_out)->next_to_post = (*cq_out)->ring_size - 1;
+
+ switch (qinfo->type)
+ {
+ case IDPF_CTLQ_TYPE_MAILBOX_RX:
+ is_rxq = true;
+ case IDPF_CTLQ_TYPE_MAILBOX_TX:
+ status = idpf_ctlq_alloc_ring_res (vm, id, *cq_out);
+ break;
+ default:
+ status = IDPF_ERR_PARAM;
+ break;
+ }
+
+ if (status)
+ goto init_free_q;
+
+ if (is_rxq)
+ {
+ idpf_ctlq_init_rxq_bufs (*cq_out);
+ }
+ else
+ {
+ /* Allocate the array of msg pointers for TX queues */
+ (*cq_out)->bi.tx_msg = (idpf_ctlq_msg_t **) clib_mem_alloc (
+ qinfo->len * sizeof (idpf_ctlq_msg_t *));
+ if (!(*cq_out)->bi.tx_msg)
+ {
+ status = IDPF_ERR_NO_MEMORY;
+ goto init_dealloc_q_mem;
+ }
+ }
+
+ idpf_ctlq_setup_regs (*cq_out, qinfo);
+
+ idpf_ctlq_init_regs (vm, id, *cq_out, is_rxq);
+
+ /* Fixeme: lock issue */
+ clib_spinlock_init (&(*cq_out)->cq_lock);
+
+ LIST_INSERT_HEAD (&id->cq_list_head, (*cq_out), cq_list);
+
+ return status;
+
+init_dealloc_q_mem:
+ /* free ring buffers and the ring itself */
+ idpf_ctlq_dealloc_ring_res (id, *cq_out);
+init_free_q:
+ clib_mem_free (*cq_out);
+
+ return status;
+}
+
+/**
+ * idpf_ctlq_remove - deallocate and remove specified control queue
+ * @hw: pointer to hardware struct
+ * @cq: pointer to control queue to be removed
+ */
+void
+idpf_ctlq_remove (idpf_device_t *id, struct idpf_ctlq_info *cq)
+{
+ LIST_REMOVE (cq, cq_list);
+ idpf_ctlq_shutdown (id, cq);
+ clib_mem_free (cq);
+}
+
+/**
+ * idpf_ctlq_init - main initialization routine for all control queues
+ * @hw: pointer to hardware struct
+ * @num_q: number of queues to initialize
+ * @q_info: array of structs containing info for each queue to be initialized
+ *
+ * This initializes any number and any type of control queues. This is an all
+ * or nothing routine; if one fails, all previously allocated queues will be
+ * destroyed. This must be called prior to using the individual add/remove
+ * APIs.
+ */
+int
+idpf_ctlq_init (vlib_main_t *vm, idpf_device_t *id, u8 num_q,
+ idpf_ctlq_create_info_t *q_info)
+{
+ struct idpf_ctlq_info *cq = NULL;
+ int ret_code = IDPF_SUCCESS;
+ int i = 0;
+
+ LIST_INIT (&id->cq_list_head);
+
+ for (i = 0; i < num_q; i++)
+ {
+ idpf_ctlq_create_info_t *qinfo = q_info + i;
+
+ ret_code = idpf_ctlq_add (vm, id, qinfo, &cq);
+ if (ret_code)
+ goto init_destroy_qs;
+ }
+
+ return ret_code;
+
+init_destroy_qs:
+ LIST_FOR_EACH_ENTRY_SAFE (cq, NULL, &id->cq_list_head, struct idpf_ctlq_info,
+ cq_list)
+ {
+ idpf_ctlq_remove (id, cq);
+ }
+
+ return ret_code;
+}
+
+/**
+ * idpf_ctlq_deinit - destroy all control queues
+ * @hw: pointer to hw struct
+ */
+void
+idpf_ctlq_deinit (idpf_device_t *id)
+{
+ struct idpf_ctlq_info *cq = NULL;
+
+ LIST_FOR_EACH_ENTRY_SAFE (cq, NULL, &id->cq_list_head, struct idpf_ctlq_info,
+ cq_list)
+ {
+ idpf_ctlq_remove (id, cq);
+ }
+
+ return;
+}
+
+/**
+ * idpf_ctlq_send - send command to Control Queue (CTQ)
+ * @id: pointer to device struct
+ * @cq: handle to control queue struct to send on
+ * @num_q_msg: number of messages to send on control queue
+ * @q_msg: pointer to array of queue messages to be sent
+ *
+ * The caller is expected to allocate DMAable buffers and pass them to the
+ * send routine via the q_msg struct / control queue specific data struct.
+ * The control queue will hold a reference to each send message until
+ * the completion for that message has been cleaned.
+ */
+int
+idpf_ctlq_send (idpf_device_t *id, struct idpf_ctlq_info *cq, u16 num_q_msg,
+ idpf_ctlq_msg_t q_msg[])
+{
+ idpf_ctlq_desc_t *desc;
+ int num_desc_avail = 0;
+ int status = IDPF_SUCCESS;
+ int i = 0;
+
+ if (!cq || !cq->ring_size)
+ return -ENOBUFS;
+
+ clib_spinlock_lock (&cq->cq_lock);
+
+ /* Ensure there are enough descriptors to send all messages */
+ num_desc_avail = IDPF_CTLQ_DESC_UNUSED (cq);
+ if (num_desc_avail == 0 || num_desc_avail < num_q_msg)
+ {
+ status = -ENOSPC;
+ goto sq_send_command_out;
+ }
+
+ for (i = 0; i < num_q_msg; i++)
+ {
+ idpf_ctlq_msg_t *msg = &q_msg[i];
+ u64 msg_cookie;
+
+ desc = IDPF_CTLQ_DESC (cq, cq->next_to_use);
+
+ /* Pay attention to CPU_TO_LE16 */
+ desc->opcode = msg->opcode;
+ desc->pfid_vfid = msg->func_id;
+
+ msg_cookie = msg->cookie.cookie;
+ desc->cookie_high = IDPF_HI_DWORD (msg_cookie);
+ desc->cookie_low = IDPF_LO_DWORD (msg_cookie);
+
+ desc->flags = (msg->host_id & IDPF_HOST_ID_MASK)
+ << IDPF_CTLQ_FLAG_HOST_ID_S;
+ if (msg->data_len)
+ {
+ idpf_dma_mem_t *buff = msg->ctx.indirect.payload;
+
+ desc->datalen |= msg->data_len;
+ desc->flags |= IDPF_CTLQ_FLAG_BUF;
+ desc->flags |= IDPF_CTLQ_FLAG_RD;
+
+ /* Update the address values in the desc with the pa
+ * value for respective buffer
+ */
+ desc->params.indirect.addr_high = IDPF_HI_DWORD (buff->pa);
+ desc->params.indirect.addr_low = IDPF_LO_DWORD (buff->pa);
+
+ clib_memcpy (&desc->params, msg->ctx.indirect.context,
+ IDPF_INDIRECT_CTX_SIZE);
+ }
+ else
+ {
+ clib_memcpy (&desc->params, msg->ctx.direct, IDPF_DIRECT_CTX_SIZE);
+ }
+
+ /* Store buffer info */
+ cq->bi.tx_msg[cq->next_to_use] = msg;
+
+ (cq->next_to_use)++;
+ if (cq->next_to_use == cq->ring_size)
+ cq->next_to_use = 0;
+ }
+
+ /* Force memory write to complete before letting hardware
+ * know that there are new descriptors to fetch.
+ */
+ CLIB_MEMORY_BARRIER ();
+
+ idpf_reg_write (id, cq->reg.tail, cq->next_to_use);
+
+sq_send_command_out:
+ clib_spinlock_unlock (&cq->cq_lock);
+
+ return status;
+}
+
+/**
+ * idpf_ctlq_clean_sq - reclaim send descriptors on HW write back for the
+ * requested queue
+ * @cq: pointer to the specific Control queue
+ * @clean_count: (input|output) number of descriptors to clean as input, and
+ * number of descriptors actually cleaned as output
+ * @msg_status: (output) pointer to msg pointer array to be populated; needs
+ * to be allocated by caller
+ *
+ * Returns an array of message pointers associated with the cleaned
+ * descriptors. The pointers are to the original ctlq_msgs sent on the cleaned
+ * descriptors. The status will be returned for each; any messages that failed
+ * to send will have a non-zero status. The caller is expected to free original
+ * ctlq_msgs and free or reuse the DMA buffers.
+ */
+int
+idpf_ctlq_clean_sq (struct idpf_ctlq_info *cq, u16 *clean_count,
+ idpf_ctlq_msg_t *msg_status[])
+{
+ idpf_ctlq_desc_t *desc;
+ u16 i = 0, num_to_clean;
+ u16 ntc, desc_err;
+ int ret = IDPF_SUCCESS;
+
+ if (!cq || !cq->ring_size)
+ return IDPF_ERR_CTLQ_EMPTY;
+
+ if (*clean_count == 0)
+ return IDPF_SUCCESS;
+ if (*clean_count > cq->ring_size)
+ return IDPF_ERR_PARAM;
+
+ /* Fixme rte func */
+ clib_spinlock_lock (&cq->cq_lock);
+
+ ntc = cq->next_to_clean;
+
+ num_to_clean = *clean_count;
+
+ for (i = 0; i < num_to_clean; i++)
+ {
+ /* Fetch next descriptor and check if marked as done */
+ desc = IDPF_CTLQ_DESC (cq, ntc);
+ if (!(desc->flags & IDPF_CTLQ_FLAG_DD))
+ break;
+
+ desc_err = desc->ret_val;
+ if (desc_err)
+ {
+ /* strip off FW internal code */
+ desc_err &= 0xff;
+ }
+
+ msg_status[i] = cq->bi.tx_msg[ntc];
+ msg_status[i]->status = desc_err;
+
+ cq->bi.tx_msg[ntc] = NULL;
+
+ /* Zero out any stale data */
+ clib_memset (desc, 0, sizeof (*desc));
+
+ ntc++;
+ if (ntc == cq->ring_size)
+ ntc = 0;
+ }
+
+ cq->next_to_clean = ntc;
+
+ clib_spinlock_unlock (&cq->cq_lock);
+
+ /* Return number of descriptors actually cleaned */
+ *clean_count = i;
+
+ return ret;
+}
+
+/**
+ * idpf_ctlq_post_rx_buffs - post buffers to descriptor ring
+ * @hw: pointer to hw struct
+ * @cq: pointer to control queue handle
+ * @buff_count: (input|output) input is number of buffers caller is trying to
+ * return; output is number of buffers that were not posted
+ * @buffs: array of pointers to dma mem structs to be given to hardware
+ *
+ * Caller uses this function to return DMA buffers to the descriptor ring after
+ * consuming them; buff_count will be the number of buffers.
+ *
+ * Note: this function needs to be called after a receive call even
+ * if there are no DMA buffers to be returned, i.e. buff_count = 0,
+ * buffs = NULL to support direct commands
+ */
+int
+idpf_ctlq_post_rx_buffs (idpf_device_t *id, struct idpf_ctlq_info *cq,
+ u16 *buff_count, idpf_dma_mem_t **buffs)
+{
+ idpf_ctlq_desc_t *desc;
+ u16 ntp = cq->next_to_post;
+ bool buffs_avail = false;
+ u16 tbp = ntp + 1;
+ int status = IDPF_SUCCESS;
+ int i = 0;
+
+ if (*buff_count > cq->ring_size)
+ return IDPF_ERR_PARAM;
+
+ if (*buff_count > 0)
+ buffs_avail = true;
+
+ clib_spinlock_lock (&cq->cq_lock);
+
+ if (tbp >= cq->ring_size)
+ tbp = 0;
+
+ if (tbp == cq->next_to_clean)
+ /* Nothing to do */
+ goto post_buffs_out;
+
+ /* Post buffers for as many as provided or up until the last one used */
+ while (ntp != cq->next_to_clean)
+ {
+ desc = IDPF_CTLQ_DESC (cq, ntp);
+
+ if (cq->bi.rx_buff[ntp])
+ goto fill_desc;
+ if (!buffs_avail)
+ {
+ /* If the caller hasn't given us any buffers or
+ * there are none left, search the ring itself
+ * for an available buffer to move to this
+ * entry starting at the next entry in the ring
+ */
+ tbp = ntp + 1;
+
+ /* Wrap ring if necessary */
+ if (tbp >= cq->ring_size)
+ tbp = 0;
+
+ while (tbp != cq->next_to_clean)
+ {
+ if (cq->bi.rx_buff[tbp])
+ {
+ cq->bi.rx_buff[ntp] = cq->bi.rx_buff[tbp];
+ cq->bi.rx_buff[tbp] = NULL;
+
+ /* Found a buffer, no need to
+ * search anymore
+ */
+ break;
+ }
+
+ /* Wrap ring if necessary */
+ tbp++;
+ if (tbp >= cq->ring_size)
+ tbp = 0;
+ }
+
+ if (tbp == cq->next_to_clean)
+ goto post_buffs_out;
+ }
+ else
+ {
+ /* Give back pointer to DMA buffer */
+ cq->bi.rx_buff[ntp] = buffs[i];
+ i++;
+
+ if (i >= *buff_count)
+ buffs_avail = false;
+ }
+
+ fill_desc:
+ desc->flags = IDPF_CTLQ_FLAG_BUF | IDPF_CTLQ_FLAG_RD;
+
+ /* Post buffers to descriptor */
+ desc->datalen = cq->bi.rx_buff[ntp]->size;
+ desc->params.indirect.addr_high =
+ IDPF_HI_DWORD (cq->bi.rx_buff[ntp]->pa);
+ desc->params.indirect.addr_low = IDPF_LO_DWORD (cq->bi.rx_buff[ntp]->pa);
+
+ ntp++;
+ if (ntp == cq->ring_size)
+ ntp = 0;
+ }
+
+post_buffs_out:
+ /* Only update tail if buffers were actually posted */
+ if (cq->next_to_post != ntp)
+ {
+ if (ntp)
+ /* Update next_to_post to ntp - 1 since current ntp
+ * will not have a buffer
+ */
+ cq->next_to_post = ntp - 1;
+ else
+ /* Wrap to end of end ring since current ntp is 0 */
+ cq->next_to_post = cq->ring_size - 1;
+
+ idpf_reg_write (id, cq->reg.tail, cq->next_to_post);
+ }
+
+ clib_spinlock_unlock (&cq->cq_lock);
+
+ /* return the number of buffers that were not posted */
+ *buff_count = *buff_count - i;
+
+ return status;
+}
+
+/**
+ * idpf_ctlq_recv - receive control queue message call back
+ * @cq: pointer to control queue handle to receive on
+ * @num_q_msg: (input|output) input number of messages that should be received;
+ * output number of messages actually received
+ * @q_msg: (output) array of received control queue messages on this q;
+ * needs to be pre-allocated by caller for as many messages as requested
+ *
+ * Called by interrupt handler or polling mechanism. Caller is expected
+ * to free buffers
+ */
+int
+idpf_ctlq_recv (struct idpf_ctlq_info *cq, u16 *num_q_msg,
+ idpf_ctlq_msg_t *q_msg)
+{
+ u16 num_to_clean, ntc, ret_val, flags;
+ idpf_ctlq_desc_t *desc;
+ int ret_code = 0;
+ u16 i = 0;
+
+ if (!cq || !cq->ring_size)
+ return -ENOBUFS;
+
+ if (*num_q_msg == 0)
+ return 0;
+ else if (*num_q_msg > cq->ring_size)
+ return -EINVAL;
+
+ /* Fixme: take the lock before we start messing with the ring */
+ clib_spinlock_lock (&cq->cq_lock);
+
+ ntc = cq->next_to_clean;
+
+ num_to_clean = *num_q_msg;
+
+ for (i = 0; i < num_to_clean; i++)
+ {
+ u64 msg_cookie;
+
+ /* Fetch next descriptor and check if marked as done */
+ desc = IDPF_CTLQ_DESC (cq, ntc);
+ flags = desc->flags;
+
+ if (!(flags & IDPF_CTLQ_FLAG_DD))
+ break;
+
+ ret_val = desc->ret_val;
+
+ q_msg[i].vmvf_type =
+ (flags & (IDPF_CTLQ_FLAG_FTYPE_VM | IDPF_CTLQ_FLAG_FTYPE_PF)) >>
+ IDPF_CTLQ_FLAG_FTYPE_S;
+
+ if (flags & IDPF_CTLQ_FLAG_ERR)
+ ret_code = IDPF_ERR_CTLQ_ERROR;
+
+ msg_cookie = (u64) desc->cookie_high << 32;
+ msg_cookie |= (u64) desc->cookie_low;
+ clib_memcpy_fast (&q_msg[i].cookie, &msg_cookie, sizeof (u64));
+
+ q_msg[i].opcode = desc->opcode;
+ q_msg[i].data_len = desc->datalen;
+ q_msg[i].status = ret_val;
+
+ if (desc->datalen)
+ {
+ clib_memcpy_fast (q_msg[i].ctx.indirect.context,
+ &desc->params.indirect, IDPF_INDIRECT_CTX_SIZE);
+
+ /* Assign pointer to dma buffer to ctlq_msg array
+ * to be given to upper layer
+ */
+ q_msg[i].ctx.indirect.payload = cq->bi.rx_buff[ntc];
+
+ /* Zero out pointer to DMA buffer info;
+ * will be repopulated by post buffers API
+ */
+ cq->bi.rx_buff[ntc] = NULL;
+ }
+ else
+ {
+ clib_memcpy_fast (q_msg[i].ctx.direct, desc->params.raw,
+ IDPF_DIRECT_CTX_SIZE);
+ }
+
+ /* Zero out stale data in descriptor */
+ clib_memset (desc, 0, sizeof (idpf_ctlq_desc_t));
+
+ ntc++;
+ if (ntc == cq->ring_size)
+ ntc = 0;
+ };
+
+ cq->next_to_clean = ntc;
+
+ /* Fixme */
+ clib_spinlock_unlock (&cq->cq_lock);
+
+ *num_q_msg = i;
+ if (*num_q_msg == 0)
+ ret_code = -ENOMSG;
+
+ return ret_code;
+}
diff --git a/src/plugins/idpf/idpf_test.c b/src/plugins/idpf/idpf_test.c
new file mode 100644
index 00000000000..85b12966681
--- /dev/null
+++ b/src/plugins/idpf/idpf_test.c
@@ -0,0 +1,169 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/pci/pci.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#include <vppinfra/error.h>
+#include <idpf/idpf.h>
+
+#define __plugin_msg_base idpf_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* declare message IDs */
+#include <idpf/idpf.api_enum.h>
+#include <idpf/idpf.api_types.h>
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} idpf_test_main_t;
+
+idpf_test_main_t idpf_test_main;
+
+/* idpf create API */
+static int
+api_idpf_create (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_idpf_create_t *mp;
+ idpf_create_if_args_t args;
+ uint32_t tmp;
+ int ret;
+ u32 x[4];
+
+ clib_memset (&args, 0, sizeof (idpf_create_if_args_t));
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%x:%x:%x.%x", &x[0], &x[1], &x[2], &x[3]))
+ {
+ args.addr.domain = x[0];
+ args.addr.bus = x[1];
+ args.addr.slot = x[2];
+ args.addr.function = x[3];
+ }
+ else if (unformat (i, "rx-single %u", &tmp))
+ args.rxq_single = 1;
+ else if (unformat (i, "tx-single %u", &tmp))
+ args.txq_single = 1;
+ else if (unformat (i, "rxq-size %u", &tmp))
+ args.rxq_size = tmp;
+ else if (unformat (i, "txq-size %u", &tmp))
+ args.txq_size = tmp;
+ else if (unformat (i, "rxq-num %u", &tmp))
+ args.rxq_num = tmp;
+ else if (unformat (i, "txq-num %u", &tmp))
+ args.txq_num = tmp;
+ else if (unformat (i, "vport-num %u", &tmp))
+ args.req_vport_nb = tmp;
+ else
+ {
+ clib_warning ("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IDPF_CREATE, mp);
+
+ mp->pci_addr = clib_host_to_net_u32 (args.addr.as_u32);
+ mp->rxq_single = clib_host_to_net_u16 (args.rxq_single);
+ mp->txq_single = clib_host_to_net_u16 (args.txq_single);
+ mp->rxq_num = clib_host_to_net_u16 (args.rxq_num);
+ mp->txq_num = clib_host_to_net_u16 (args.txq_num);
+ mp->rxq_size = clib_host_to_net_u16 (args.rxq_size);
+ mp->txq_size = clib_host_to_net_u16 (args.txq_size);
+ mp->req_vport_nb = clib_host_to_net_u16 (args.req_vport_nb);
+
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
+/* idpf-create reply handler */
+static void
+vl_api_idpf_create_reply_t_handler (vl_api_idpf_create_reply_t *mp)
+{
+ vat_main_t *vam = idpf_test_main.vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (retval == 0)
+ {
+ fformat (vam->ofp, "created idpf with sw_if_index %d\n",
+ ntohl (mp->sw_if_index));
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+ vam->regenerate_interface_table = 1;
+}
+
+/* idpf delete API */
+static int
+api_idpf_delete (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_idpf_delete_t *mp;
+ u32 sw_if_index = 0;
+ u8 index_defined = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %u", &sw_if_index))
+ index_defined = 1;
+ else
+ {
+ clib_warning ("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!index_defined)
+ {
+ errmsg ("missing sw_if_index\n");
+ return -99;
+ }
+
+ M (IDPF_DELETE, mp);
+
+ mp->sw_if_index = clib_host_to_net_u32 (sw_if_index);
+
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
+#include <idpf/idpf.api_test.c>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/idpf/plugin.c b/src/plugins/idpf/plugin.c
new file mode 100644
index 00000000000..745ba43f606
--- /dev/null
+++ b/src/plugins/idpf/plugin.c
@@ -0,0 +1,35 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description =
+ "Intel Infrastructure Data Path Function (IDPF) Device Driver",
+ .default_disabled = 1,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/idpf/virtchnl2.h b/src/plugins/idpf/virtchnl2.h
new file mode 100644
index 00000000000..8db68483f22
--- /dev/null
+++ b/src/plugins/idpf/virtchnl2.h
@@ -0,0 +1,855 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _IDPF_VIRTCHNL_H_
+#define _IDPF_VIRTCHNL_H_
+
+#include <idpf/virtchnl2_lan_desc.h>
+
+#define foreach_virtchnl2_status \
+ _ (0, SUCCESS) \
+ _ (-5, ERR_PARAM) \
+ _ (-38, ERR_OPCODE_MISMATCH)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_STATUS_##n = v,
+ foreach_virtchnl2_status
+#undef _
+} virtchnl2_status_t;
+
+#define foreach_virtchnl2_op \
+ _ (0, UNKNOWN) \
+ _ (1, VERSION) \
+ _ (500, GET_CAPS) \
+ _ (501, CREATE_VPORT) \
+ _ (502, DESTROY_VPORT) \
+ _ (503, ENABLE_VPORT) \
+ _ (504, DISABLE_VPORT) \
+ _ (505, CONFIG_TX_QUEUES) \
+ _ (506, CONFIG_RX_QUEUES) \
+ _ (507, ENABLE_QUEUES) \
+ _ (508, DISABLE_QUEUES) \
+ _ (509, ADD_QUEUES) \
+ _ (510, DEL_QUEUES) \
+ _ (511, MAP_QUEUE_VECTOR) \
+ _ (512, UNMAP_QUEUE_VECTOR) \
+ _ (513, GET_RSS_KEY) \
+ _ (514, SET_RSS_KEY) \
+ _ (515, GET_RSS_LUT) \
+ _ (516, SET_RSS_LUT) \
+ _ (517, GET_RSS_HASH) \
+ _ (518, SET_RSS_HASH) \
+ _ (519, SET_SRIOV_VFS) \
+ _ (520, ALLOC_VECTORS) \
+ _ (521, DEALLOC_VECTORS) \
+ _ (522, EVENT) \
+ _ (523, GET_STATS) \
+ _ (524, RESET_VF) \
+ _ (526, GET_PTYPE_INFO) \
+ _ (532, CREATE_ADI) \
+ _ (533, DESTROY_ADI) \
+ _ (534, LOOPBACK) \
+ _ (535, ADD_MAC_ADDR) \
+ _ (536, DEL_MAC_ADDR) \
+ _ (537, CONFIG_PROMISCUOUS_MODE)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_OP_##n = v,
+ foreach_virtchnl2_op
+#undef _
+} virtchnl2_op_t;
+
+/* VIRTCHNL2_VPORT_TYPE
+ * Type of virtual port
+ */
+#define foreach_virtchnl2_vport_type \
+ _ (0, DEFAULT) \
+ _ (1, SRIOV) \
+ _ (2, SIOV) \
+ _ (3, SUBDEV) \
+ _ (4, MNG)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_VPORT_TYPE_##n = v,
+ foreach_virtchnl2_vport_type
+#undef _
+} virtchnl2_vport_type_t;
+
+/* VIRTCHNL2_QUEUE_MODEL
+ * Type of queue model
+ */
+#define VIRTCHNL2_QUEUE_MODEL_SINGLE 0
+#define VIRTCHNL2_QUEUE_MODEL_SPLIT 1
+
+#define foreach_idpf_checksum_cap_flag \
+ _ (0, TX_CSUM_L3_IPV4, "tx-csum-l3-ipv4") \
+ _ (1, TX_CSUM_L4_IPV4_TCP, "tx-csum-l4-ipv4-tcp") \
+ _ (2, TX_CSUM_L4_IPV4_UDP, "tx-csum-l4-ipv4-udp") \
+ _ (3, TX_CSUM_L4_IPV4_SCTP, "tx-csum-l4-ipv4-sctp") \
+ _ (4, TX_CSUM_L4_IPV6_TCP, "tx-csum-l4-ipv6-tcp") \
+ _ (5, TX_CSUM_L4_IPV6_UDP, "tx-csum-l4-ipv6-udp") \
+ _ (6, TX_CSUM_L4_IPV6_SCTP, "tx-csum-l4-ipv6-sctp") \
+ _ (7, TX_CSUM_GENERIC, "tx-csum-generic") \
+ _ (8, RX_CSUM_L3_IPV4, "rx-csum-l3-ipv4") \
+ _ (9, RX_CSUM_L4_IPV4_TCP, "rx-csum-l4-ipv4-tcp") \
+ _ (10, RX_CSUM_L4_IPV4_UDP, "rx-csum-l4-ipv4-udp") \
+ _ (11, RX_CSUM_L4_IPV4_SCTP, "rx-csum-l4-ipv4-sctp") \
+ _ (12, RX_CSUM_L4_IPV6_TCP, "rx-csum-l4-ipv6-tcp") \
+ _ (13, RX_CSUM_L4_IPV6_UDP, "rx-csum-l4-ipv6-udp") \
+ _ (14, RX_CSUM_L4_IPV6_SCTP, "rx-csum-l4-ipv6-sctp") \
+ _ (15, RX_CSUM_GENERIC, "rx-csum-generic") \
+ _ (16, TX_CSUM_L3_SINGLE_TUNNEL, "tx-csum-l3-single-tunnel") \
+ _ (17, TX_CSUM_L3_DOUBLE_TUNNEL, "tx-csum-l3-double-tunnel") \
+ _ (18, RX_CSUM_L3_SINGLE_TUNNEL, "rx-csum-l3-single-tunnel") \
+ _ (19, RX_CSUM_L3_DOUBLE_TUNNEL, "rx-csum-l3-double-tunnel") \
+ _ (20, TX_CSUM_L4_SINGLE_TUNNEL, "tx-csum-l4-single-tunnel") \
+ _ (21, TX_CSUM_L4_DOUBLE_TUNNEL, "tx-csum-l4-double-tunnel") \
+ _ (22, RX_CSUM_L4_SINGLE_TUNNEL, "rx-csum-l4-single-tunnel") \
+ _ (23, RX_CSUM_L4_DOUBLE_TUNNEL, "rx-csum-l4-double-tunnel")
+
+typedef enum
+{
+#define _(a, b, c) VIRTCHNL2_CAP_##b = (1 << a),
+ foreach_idpf_checksum_cap_flag
+#undef _
+} idpf_checksum_cap_flag_t;
+
+#define foreach_idpf_seg_cap_flag \
+ _ (0, IPV4_TCP, "ipv4-tcp") \
+ _ (1, IPV4_UDP, "ipv4-udp") \
+ _ (2, IPV4_SCTP, "ipv4-sctp") \
+ _ (3, IPV6_TCP, "ipv6-tcp") \
+ _ (4, IPV6_UDP, "ipv6-udp") \
+ _ (5, IPV6_SCTP, "ipv6-sctp") \
+ _ (6, GENERIC, "generic") \
+ _ (7, TX_SINGLE_TUNNEL, "tx-single-tunnel") \
+ _ (8, TX_DOUBLE_TUNNEL, "tx-double-tunnel")
+
+typedef enum
+{
+#define _(a, b, c) VIRTCHNL2_CAP_SEG_##b = (1 << a),
+ foreach_idpf_seg_cap_flag
+#undef _
+} idpf_seg_cap_flag_t;
+
+#define foreach_idpf_rss_cap_flag \
+ _ (0, IPV4_TCP, "ipv4-tcp") \
+ _ (1, IPV4_UDP, "ipv4-udp") \
+ _ (2, IPV4_SCTP, "ipv4-sctp") \
+ _ (3, IPV4_OTHER, "ipv4-other") \
+ _ (4, IPV6_TCP, "ipv6-tcp") \
+ _ (5, IPV6_UDP, "ipv6-udp") \
+ _ (6, IPV6_SCTP, "ipv6-sctp") \
+ _ (7, IPV6_OTHER, "ipv6-other") \
+ _ (8, IPV4_AH, "ipv4-ah") \
+ _ (9, IPV4_ESP, "ipv4-esp") \
+ _ (10, IPV4_AH_ESP, "ipv4-ah-esp") \
+ _ (11, IPV6_AH, "ipv6-ah") \
+ _ (12, IPV6_ESP, "ipv6-esp") \
+ _ (13, IPV6_AH_ESP, "ipv6-ah-esp")
+
+typedef enum
+{
+#define _(a, b, c) VIRTCHNL2_CAP_RSS_##b = (1 << a),
+ foreach_idpf_rss_cap_flag
+#undef _
+} idpf_rss_cap_flag_t;
+
+#define foreach_idpf_hsplit_cap_flag \
+ _ (0, AT_L2, "at-l2") \
+ _ (1, AT_L3, "at-l3") \
+ _ (2, AT_L4V4, "at-l4v4") \
+ _ (3, AT_L4V6, "at-l4v6")
+
+typedef enum
+{
+#define _(a, b, c) VIRTCHNL2_CAP_RX_HSPLIT_##b = (1 << a),
+ foreach_idpf_hsplit_cap_flag
+#undef _
+} idpf_hsplit_cap_flag_t;
+
+#define foreach_idpf_rsc_cap_flag \
+ _ (0, IPV4_TCP, "ipv4-tcp") \
+ _ (1, IPV4_SCTP, "ipv4-sctp") \
+ _ (2, IPV6_TCP, "ipv6-tcp") \
+ _ (3, IPV6_SCTP, "ipv6-sctp")
+
+typedef enum
+{
+#define _(a, b, c) VIRTCHNL2_CAP_RSC_##b = (1 << a),
+ foreach_idpf_rsc_cap_flag
+#undef _
+} idpf_rsc_cap_flag_t;
+
+#define foreach_idpf_other_cap_flag \
+ _ (0, RDMA, "rdma") \
+ _ (1, SRIOV, "sriov") \
+ _ (2, MACFILTER, "macfilter") \
+ _ (3, FLOW_DIRECTOR, "flow-director") \
+ _ (4, SPLITQ_QSCHED, "spliteq-qsched") \
+ _ (5, CRC, "crc") \
+ _ (6, ADQ, "adq") \
+ _ (7, WB_ON_ITR, "wb-on-itr") \
+ _ (8, PROMISC, "promisc") \
+ _ (9, LINK_SPEED, "link-speed") \
+ _ (10, INLINE_IPSEC, "inline-ipsec") \
+ _ (11, LARGE_NUM_QUEUES, "large-num-queues") \
+ _ (12, VLAN, "vlan") \
+ _ (13, PTP, "ptp") \
+ _ (15, ADV_RSS, "adv-rss") \
+ _ (16, FDIR, "fdir") \
+ _ (17, RX_FLEX_DESC, "rx-flex-desc") \
+ _ (18, PTYPE, "ptype") \
+ _ (19, LOOPBACK, "loopback") \
+ _ (20, OEM, "oem")
+
+typedef enum
+{
+#define _(a, b, c) VIRTCHNL2_CAP_##b = (1 << a),
+ foreach_idpf_other_cap_flag
+#undef _
+} idpf_other_cap_flag_t;
+
+#define VIRTCHNL2_TXQ_SCHED_MODE_QUEUE 0
+#define VIRTCHNL2_TXQ_SCHED_MODE_FLOW 1
+
+#define VIRTCHNL2_TXQ_ENABLE_MISS_COMPL BIT (0)
+
+#define VIRTCHNL2_RDMA_CPF 0
+#define VIRTCHNL2_NVME_CPF 1
+#define VIRTCHNL2_ATE_CPF 2
+#define VIRTCHNL2_LCE_CPF 3
+
+#define VIRTCHNL2_RXQ_RSC BIT (0)
+#define VIRTCHNL2_RXQ_HDR_SPLIT BIT (1)
+#define VIRTCHNL2_RXQ_IMMEDIATE_WRITE_BACK BIT (2)
+#define VIRTCHNL2_RX_DESC_SIZE_16BYTE BIT (3)
+#define VIRTCHNL2_RX_DESC_SIZE_32BYTE BIT (4)
+
+#define foreach_virtchnl2_rss_alg \
+ _ (0, TOEPLITZ_ASYMMETRIC) \
+ _ (1, R_ASYMMETRIC) \
+ _ (2, TOEPLITZ_SYMMETRIC) \
+ _ (3, XOR_SYMMETRIC)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RSS_ALG_##n = v,
+ foreach_virtchnl2_rss_alg
+#undef _
+} virtchnl2_rss_alg_t;
+
+#define foreach_virtchnl2_event \
+ _ (0, UNKNOWN) \
+ _ (1, LINK_CHANGE) \
+ _ (2, START_RESET_ADI) \
+ _ (3, FINISH_RESET_ADI)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_EVENT_##n = v,
+ foreach_virtchnl2_event
+#undef _
+} virtchnl2_event_name_t;
+
+#define foreach_idpf_queue_type \
+ _ (0, TX) \
+ _ (1, RX) \
+ _ (2, TX_COMPLETION) \
+ _ (3, RX_BUFFER) \
+ _ (4, CONFIG_TX) \
+ _ (5, CONFIG_RX) \
+ _ (6, P2P_TX) \
+ _ (7, P2P_RX) \
+ _ (8, P2P_TX_COMPLETION) \
+ _ (9, P2P_RX_BUFFER) \
+ _ (10, MBX_TX) \
+ _ (11, MBX_RX)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_QUEUE_TYPE_##n = v,
+ foreach_idpf_queue_type
+#undef _
+} idpf_queue_type_t;
+
+#define foreach_virtchnl2_itr_idx \
+ _ (0, 0) \
+ _ (1, 1) \
+ _ (2, 2) \
+ _ (3, NO_ITR)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_ITR_IDX_##n = v,
+ foreach_virtchnl2_itr_idx
+#undef _
+} virtchnl2_itr_idx_t;
+
+#define VIRTCHNL2_MAC_ADDR_PRIMARY 1
+#define VIRTCHNL2_MAC_ADDR_EXTRA 2
+
+#define VIRTCHNL2_UNICAST_PROMISC BIT (0)
+#define VIRTCHNL2_MULTICAST_PROMISC BIT (1)
+
+#define foreach_virtchnl2_proto_hdr \
+ _ (0, ANY) \
+ _ (1, PRE_MAC) \
+ _ (2, MAC) \
+ _ (3, POST_MAC) \
+ _ (4, ETHERTYPE) \
+ _ (5, VLAN) \
+ _ (6, SVLAN) \
+ _ (7, CVLAN) \
+ _ (8, MPLS) \
+ _ (9, UMPLS) \
+ _ (10, MMPLS) \
+ _ (11, PTP) \
+ _ (12, CTRL) \
+ _ (13, LLDP) \
+ _ (14, ARP) \
+ _ (15, ECP) \
+ _ (16, EAPOL) \
+ _ (17, PPPOD) \
+ _ (18, PPPOE) \
+ _ (19, IPV4) \
+ _ (20, IPV4_FRAG) \
+ _ (21, IPV6) \
+ _ (22, IPV6_FRAG) \
+ _ (23, IPV6_EH) \
+ _ (24, UDP) \
+ _ (25, TCP) \
+ _ (26, SCTP) \
+ _ (27, ICMP) \
+ _ (28, ICMPV6) \
+ _ (29, IGMP) \
+ _ (30, AH) \
+ _ (31, ESP) \
+ _ (32, IKE) \
+ _ (33, NATT_KEEP) \
+ _ (34, PAY) \
+ _ (35, L2TPV2) \
+ _ (36, L2TPV2_CONTROL) \
+ _ (37, L2TPV3) \
+ _ (38, GTP) \
+ _ (39, GTP_EH) \
+ _ (40, GTPCV2) \
+ _ (41, GTPC_TEID) \
+ _ (42, GTPU) \
+ _ (43, GTPU_UL) \
+ _ (44, GTPU_DL) \
+ _ (45, ECPRI) \
+ _ (46, VRRP) \
+ _ (47, OSPF) \
+ _ (48, TUN) \
+ _ (49, GRE) \
+ _ (50, NVGRE) \
+ _ (51, VXLAN) \
+ _ (52, VXLAN_GPE) \
+ _ (53, GENEVE) \
+ _ (54, NSH) \
+ _ (55, QUIC) \
+ _ (56, PFCP) \
+ _ (57, PFCP_NODE) \
+ _ (58, PFCP_SESSION) \
+ _ (59, RTP) \
+ _ (60, ROCE) \
+ _ (61, ROCEV1) \
+ _ (62, ROCEV2) \
+ _ (65535, NO_PROTO)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_PROTO_HDR_##n = v,
+ foreach_virtchnl2_proto_hdr
+#undef _
+} virtchnl2_proto_hdr_t;
+
+#define VIRTCHNL2_VERSION_MAJOR_2 2
+#define VIRTCHNL2_VERSION_MINOR_0 0
+
+typedef struct
+{
+ u32 major;
+ u32 minor;
+} virtchnl2_version_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_version_info_t, 8);
+
+typedef struct
+{
+ u32 csum_caps;
+ u32 seg_caps;
+ u32 hsplit_caps;
+ u32 rsc_caps;
+ u64 rss_caps;
+ u64 other_caps;
+
+ u32 mailbox_dyn_ctl;
+ u16 mailbox_vector_id;
+ u16 num_allocated_vectors;
+
+ u16 max_rx_q;
+ u16 max_tx_q;
+ u16 max_rx_bufq;
+ u16 max_tx_complq;
+
+ u16 max_sriov_vfs;
+
+ u16 max_vports;
+ u16 default_num_vports;
+
+ u16 max_tx_hdr_size;
+
+ u8 max_sg_bufs_per_tx_pkt;
+
+ u8 itr_idx_map;
+
+ u16 pad1;
+
+ u16 oem_cp_ver_major;
+ u16 oem_cp_ver_minor;
+ u32 device_type;
+
+ u8 reserved[12];
+} virtchnl2_get_capabilities_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_get_capabilities_t, 80);
+
+typedef struct
+{
+ /* see VIRTCHNL2_QUEUE_TYPE definitions */
+ u32 type;
+ u32 start_queue_id;
+ u32 num_queues;
+ u32 pad;
+
+ /* Queue tail register offset and spacing provided by CP */
+ u64 qtail_reg_start;
+ u32 qtail_reg_spacing;
+
+ u8 reserved[4];
+} virtchnl2_queue_reg_chunk_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_queue_reg_chunk_t, 32);
+
+/* structure to specify several chunks of contiguous queues */
+typedef struct
+{
+ u16 num_chunks;
+ u8 reserved[6];
+ virtchnl2_queue_reg_chunk_t chunks[1];
+} virtchnl2_queue_reg_chunks_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_queue_reg_chunks_t, 40);
+
+#define VIRTCHNL2_ETH_LENGTH_OF_ADDRESS 6
+
+typedef struct
+{
+ u16 vport_type;
+ u16 txq_model;
+ u16 rxq_model;
+ u16 num_tx_q;
+ u16 num_tx_complq;
+ u16 num_rx_q;
+ u16 num_rx_bufq;
+ u16 default_rx_q;
+ u16 vport_index;
+
+ u16 max_mtu;
+ u32 vport_id;
+ u8 default_mac_addr[VIRTCHNL2_ETH_LENGTH_OF_ADDRESS];
+ u16 pad;
+ u64 rx_desc_ids;
+ u64 tx_desc_ids;
+
+#define MAX_Q_REGIONS 16
+ u32 max_qs_per_qregion[MAX_Q_REGIONS];
+ u32 qregion_total_qs;
+ u16 qregion_type;
+ u16 pad2;
+
+ u32 rss_algorithm;
+ u16 rss_key_size;
+ u16 rss_lut_size;
+
+ u32 rx_split_pos;
+
+ u8 reserved[20];
+ virtchnl2_queue_reg_chunks_t chunks;
+} virtchnl2_create_vport_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_create_vport_t, 192);
+
+typedef struct
+{
+ u32 vport_id;
+ u8 reserved[4];
+} virtchnl2_vport_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_vport_t, 8);
+
+typedef struct
+{
+ u64 dma_ring_addr;
+ u32 type;
+ u32 queue_id;
+ u16 relative_queue_id;
+ u16 model;
+ u16 sched_mode;
+ u16 qflags;
+ u16 ring_len;
+
+ u16 tx_compl_queue_id;
+ u16 peer_type;
+ u16 peer_rx_queue_id;
+
+ u16 qregion_id;
+ u8 pad[2];
+
+ u32 egress_pasid;
+ u32 egress_hdr_pasid;
+ u32 egress_buf_pasid;
+
+ u8 reserved[8];
+} virtchnl2_txq_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_txq_info_t, 56);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 num_qinfo;
+
+ u8 reserved[10];
+ virtchnl2_txq_info_t qinfo[1];
+} virtchnl2_config_tx_queues_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_config_tx_queues_t, 72);
+
+/* Receive queue config info */
+typedef struct
+{
+ u64 desc_ids;
+ u64 dma_ring_addr;
+
+ u32 type;
+ u32 queue_id;
+
+ u16 model;
+
+ u16 hdr_buffer_size;
+ u32 data_buffer_size;
+ u32 max_pkt_size;
+
+ u16 ring_len;
+ u8 buffer_notif_stride;
+ u8 pad[1];
+
+ u64 dma_head_wb_addr;
+
+ u16 qflags;
+
+ u16 rx_buffer_low_watermark;
+
+ u16 rx_bufq1_id;
+ u16 rx_bufq2_id;
+ u8 bufq2_ena;
+ u8 pad2;
+
+ u16 qregion_id;
+
+ u32 ingress_pasid;
+ u32 ingress_hdr_pasid;
+ u32 ingress_buf_pasid;
+
+ u8 reserved[16];
+} virtchnl2_rxq_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_rxq_info_t, 88);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 num_qinfo;
+
+ u8 reserved[18];
+ virtchnl2_rxq_info_t qinfo[1];
+} virtchnl2_config_rx_queues_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_config_rx_queues_t, 112);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 num_tx_q;
+ u16 num_tx_complq;
+ u16 num_rx_q;
+ u16 num_rx_bufq;
+ u8 reserved[4];
+ virtchnl2_queue_reg_chunks_t chunks;
+} virtchnl2_add_queues_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_add_queues_t, 56);
+
+typedef struct
+{
+ u16 start_vector_id;
+ u16 start_evv_id;
+ u16 num_vectors;
+ u16 pad1;
+
+ u32 dynctl_reg_start;
+ u32 dynctl_reg_spacing;
+
+ u32 itrn_reg_start;
+ u32 itrn_reg_spacing;
+ u8 reserved[8];
+} virtchnl2_vector_chunk_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_vector_chunk_t, 32);
+
+typedef struct
+{
+ u16 num_vchunks;
+ u8 reserved[14];
+ virtchnl2_vector_chunk_t vchunks[1];
+} virtchnl2_vector_chunks_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_vector_chunks_t, 48);
+
+typedef struct
+{
+ u16 num_vectors;
+ u8 reserved[14];
+ virtchnl2_vector_chunks_t vchunks;
+} virtchnl2_alloc_vectors_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_alloc_vectors_t, 64);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 lut_entries_start;
+ u16 lut_entries;
+ u8 reserved[4];
+ u32 lut[1]; /* RSS lookup table */
+} virtchnl2_rss_lut_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_rss_lut_t, 16);
+
+typedef struct
+{
+ /* Packet Type Groups bitmap */
+ u64 ptype_groups;
+ u32 vport_id;
+ u8 reserved[4];
+} virtchnl2_rss_hash_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_rss_hash_t, 16);
+
+typedef struct
+{
+ u16 num_vfs;
+ u16 pad;
+} virtchnl2_sriov_vfs_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_sriov_vfs_info_t, 4);
+
+typedef struct
+{
+ u32 pasid;
+ u16 mbx_id;
+ u16 mbx_vec_id;
+ u16 adi_id;
+ u8 reserved[64];
+ u8 pad[6];
+ virtchnl2_queue_reg_chunks_t chunks;
+ virtchnl2_vector_chunks_t vchunks;
+} virtchnl2_create_adi_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_create_adi_t, 168);
+
+typedef struct
+{
+ u16 adi_id;
+ u8 reserved[2];
+} virtchnl2_destroy_adi_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_destroy_adi_t, 4);
+
+typedef struct
+{
+ u16 ptype_id_10;
+ u8 ptype_id_8;
+ u8 proto_id_count;
+ u16 pad;
+ u16 proto_id[1];
+} virtchnl2_ptype_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_ptype_t, 8);
+
+typedef struct
+{
+ u16 start_ptype_id;
+ u16 num_ptypes;
+ u32 pad;
+ virtchnl2_ptype_t ptype[1];
+} virtchnl2_get_ptype_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_get_ptype_info_t, 16);
+
+typedef struct
+{
+ u32 vport_id;
+ u8 pad[4];
+
+ u64 rx_bytes;
+ u64 rx_unicast;
+ u64 rx_multicast;
+ u64 rx_broadcast;
+ u64 rx_discards;
+ u64 rx_errors;
+ u64 rx_unknown_protocol;
+ u64 tx_bytes;
+ u64 tx_unicast;
+ u64 tx_multicast;
+ u64 tx_broadcast;
+ u64 tx_discards;
+ u64 tx_errors;
+ u64 rx_invalid_frame_length;
+ u64 rx_overflow_drop;
+} virtchnl2_vport_stats_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_vport_stats_t, 128);
+
+typedef struct
+{
+ u32 event;
+ u32 link_speed;
+ u32 vport_id;
+ u8 link_status;
+ u8 pad[1];
+ u16 adi_id;
+} virtchnl2_event_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_event_t, 16);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 key_len;
+ u8 pad;
+ u8 key[1];
+} virtchnl2_rss_key_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_rss_key_t, 8);
+
+typedef struct
+{
+ u32 type;
+ u32 start_queue_id;
+ u32 num_queues;
+ u8 reserved[4];
+} virtchnl2_queue_chunk_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_queue_chunk_t, 16);
+
+typedef struct
+{
+ u16 num_chunks;
+ u8 reserved[6];
+ virtchnl2_queue_chunk_t chunks[1];
+} virtchnl2_queue_chunks_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_queue_chunks_t, 24);
+
+typedef struct
+{
+ u32 vport_id;
+ u8 reserved[4];
+ virtchnl2_queue_chunks_t chunks;
+} virtchnl2_del_ena_dis_queues_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_del_ena_dis_queues_t, 32);
+
+typedef struct
+{
+ u32 queue_id;
+ u16 vector_id;
+ u8 pad[2];
+
+ u32 itr_idx;
+
+ u32 queue_type;
+ u8 reserved[8];
+} virtchnl2_queue_vector_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_queue_vector_t, 24);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 num_qv_maps;
+ u8 pad[10];
+ virtchnl2_queue_vector_t qv_maps[1];
+} virtchnl2_queue_vector_maps_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_queue_vector_maps_t, 40);
+
+typedef struct
+{
+ u32 vport_id;
+ u8 enable;
+ u8 pad[3];
+} virtchnl2_loopback_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_loopback_t, 8);
+
+typedef struct
+{
+ u8 addr[VIRTCHNL2_ETH_LENGTH_OF_ADDRESS];
+ u8 type;
+ u8 pad;
+} virtchnl2_mac_addr_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_mac_addr_t, 8);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 num_mac_addr;
+ u8 pad[2];
+ virtchnl2_mac_addr_t mac_addr_list[1];
+} virtchnl2_mac_addr_list_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_mac_addr_list_t, 16);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 flags;
+ u8 pad[2];
+} virtchnl2_promisc_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_promisc_info_t, 8);
+
+#endif /* _IDPF_VIRTCHNL_H_ */
diff --git a/src/plugins/idpf/virtchnl2_lan_desc.h b/src/plugins/idpf/virtchnl2_lan_desc.h
new file mode 100644
index 00000000000..31eff81fd81
--- /dev/null
+++ b/src/plugins/idpf/virtchnl2_lan_desc.h
@@ -0,0 +1,610 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _IDPF_VIRTCHNL_LAN_DESC_H_
+#define _IDPF_VIRTCHNL_LAN_DESC_H_
+
+/* VIRTCHNL2_TX_DESC_IDS
+ * Transmit descriptor ID flags
+ */
+#define foreach_idpf_txdid \
+ _ (0, DATA) \
+ _ (1, CTX) \
+ _ (2, REINJECT_CTX) \
+ _ (3, FLEX_DATA) \
+ _ (4, FLEX_CTX) \
+ _ (5, FLEX_TSO_CTX) \
+ _ (6, FLEX_TSYN_L2TAG1) \
+ _ (7, FLEX_L2TAG1_L2TAG2) \
+ _ (8, FLEX_TSO_L2TAG2_PARSTAG_CTX) \
+ _ (9, FLEX_HOSTSPLIT_SA_TSO_CTX) \
+ _ (10, FLEX_HOSTSPLIT_SA_CTX) \
+ _ (11, FLEX_L2TAG2_CTX) \
+ _ (12, FLEX_FLOW_SCHED) \
+ _ (13, FLEX_HOSTSPLIT_TSO_CTX) \
+ _ (14, FLEX_HOSTSPLIT_CTX) \
+ _ (15, DESC_DONE)
+
+typedef enum
+{
+#define _(a, b) VIRTCHNL2_TXDID_##b = (1 << a),
+ foreach_idpf_txdid
+#undef _
+} idpf_txdid_t;
+
+/* VIRTCHNL2_RX_DESC_IDS
+ * Receive descriptor IDs (range from 0 to 63)
+ */
+#define foreach_virtchnl2_rxdid \
+ _ (0, 0_16B_BASE) \
+ _ (1, 1_32B_BASE) \
+ _ (2, 2_FLEX_SPLITQ) \
+ _ (2, 2_FLEX_SQ_NIC) \
+ _ (3, 3_FLEX_SQ_SW) \
+ _ (4, 4_FLEX_SQ_NIC_VEB) \
+ _ (5, 5_FLEX_SQ_NIC_ACL) \
+ _ (6, 6_FLEX_SQ_NIC_2) \
+ _ (7, 7_HW_RSVD) \
+ _ (16, 16_COMMS_GENERIC) \
+ _ (17, 17_COMMS_AUX_VLAN) \
+ _ (18, 18_COMMS_AUX_IPV4) \
+ _ (19, 19_COMMS_AUX_IPV6) \
+ _ (20, 20_COMMS_AUX_FLOW) \
+ _ (21, 21_COMMS_AUX_TCP)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RXDID_##n = v,
+ foreach_virtchnl2_rxdid
+#undef _
+} virtchnl2_rxdid_t;
+
+/* VIRTCHNL2_RX_DESC_ID_BITMASKS
+ * Receive descriptor ID bitmasks
+ */
+#define VIRTCHNL2_RXDID_0_16B_BASE_M BIT (VIRTCHNL2_RXDID_0_16B_BASE)
+#define VIRTCHNL2_RXDID_1_32B_BASE_M BIT (VIRTCHNL2_RXDID_1_32B_BASE)
+#define VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M BIT (VIRTCHNL2_RXDID_2_FLEX_SPLITQ)
+#define VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M BIT (VIRTCHNL2_RXDID_2_FLEX_SQ_NIC)
+#define VIRTCHNL2_RXDID_3_FLEX_SQ_SW_M BIT (VIRTCHNL2_RXDID_3_FLEX_SQ_SW)
+#define VIRTCHNL2_RXDID_4_FLEX_SQ_NIC_VEB_M \
+ BIT (VIRTCHNL2_RXDID_4_FLEX_SQ_NIC_VEB)
+#define VIRTCHNL2_RXDID_5_FLEX_SQ_NIC_ACL_M \
+ BIT (VIRTCHNL2_RXDID_5_FLEX_SQ_NIC_ACL)
+#define VIRTCHNL2_RXDID_6_FLEX_SQ_NIC_2_M BIT (VIRTCHNL2_RXDID_6_FLEX_SQ_NIC_2)
+#define VIRTCHNL2_RXDID_7_HW_RSVD_M BIT (VIRTCHNL2_RXDID_7_HW_RSVD)
+/* 9 through 15 are reserved */
+#define VIRTCHNL2_RXDID_16_COMMS_GENERIC_M \
+ BIT (VIRTCHNL2_RXDID_16_COMMS_GENERIC)
+#define VIRTCHNL2_RXDID_17_COMMS_AUX_VLAN_M \
+ BIT (VIRTCHNL2_RXDID_17_COMMS_AUX_VLAN)
+#define VIRTCHNL2_RXDID_18_COMMS_AUX_IPV4_M \
+ BIT (VIRTCHNL2_RXDID_18_COMMS_AUX_IPV4)
+#define VIRTCHNL2_RXDID_19_COMMS_AUX_IPV6_M \
+ BIT (VIRTCHNL2_RXDID_19_COMMS_AUX_IPV6)
+#define VIRTCHNL2_RXDID_20_COMMS_AUX_FLOW_M \
+ BIT (VIRTCHNL2_RXDID_20_COMMS_AUX_FLOW)
+#define VIRTCHNL2_RXDID_21_COMMS_AUX_TCP_M \
+ BIT (VIRTCHNL2_RXDID_21_COMMS_AUX_TCP)
+/* 22 through 63 are reserved */
+
+/* Rx */
+/* For splitq virtchnl2_rx_flex_desc_adv desc members */
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_S 0
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M \
+ MAKEMASK (0xFUL, VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_S 0
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M \
+ MAKEMASK (0x3FFUL, VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_UMBCAST_S 10
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_UMBCAST_M \
+ MAKEMASK (0x3UL, VIRTCHNL2_RX_FLEX_DESC_ADV_UMBCAST_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF0_S 12
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF0_M \
+ MAKEMASK (0xFUL, VIRTCHNL2_RX_FLEX_DESC_ADV_FF0_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_S 0
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M \
+ MAKEMASK (0x3FFFUL, VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S 14
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M \
+ BIT_ULL (VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S 15
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M \
+ BIT_ULL (VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_S 0
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_M \
+ MAKEMASK (0x3FFUL, VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_S 10
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M \
+ BIT_ULL (VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_S 11
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_M \
+ BIT_ULL (VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_S 12
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_M \
+ BIT_ULL (VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF1_S 13
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF1_M \
+ MAKEMASK (0x7UL, VIRTCHNL2_RX_FLEX_DESC_ADV_FF1_M)
+
+#define foreach_virtchnl2_rx_flex_desc_adv_status0_qw1 \
+ _ (0, DD_S) \
+ _ (1, EOF_S) \
+ _ (2, HBO_S) \
+ _ (3, L3L4P_S) \
+ _ (4, XSUM_IPE_S) \
+ _ (5, XSUM_L4E_S) \
+ _ (6, XSUM_EIPE_S) \
+ _ (7, XSUM_EUDPE_S)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_##n = v,
+ foreach_virtchnl2_rx_flex_desc_adv_status0_qw1
+#undef _
+} virtchnl2_rx_flex_desc_adv_status0_qw1_t;
+
+#define foreach_virtchnl2_rx_flex_desc_adv_status0_qw0 \
+ _ (0, LPBK_S) \
+ _ (1, IPV6EXADD_S) \
+ _ (2, RXE_S) \
+ _ (3, CRCP_S) \
+ _ (4, RSS_VALID_S) \
+ _ (5, L2TAG1P_S) \
+ _ (6, XTRMD0_VALID_S) \
+ _ (7, XTRMD1_VALID_S) \
+ _ (8, LAST)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_##n = v,
+ foreach_virtchnl2_rx_flex_desc_adv_status0_qw0
+#undef _
+} virtchnl2_rx_flex_desc_adv_status0_qw0_t;
+
+#define foreach_virtchnl2_rx_flex_desc_adv_status1 \
+ _ (0, RSVD_S) \
+ _ (2, ATRAEFAIL_S) \
+ _ (3, L2TAG2P_S) \
+ _ (4, XTRMD2_VALID_S) \
+ _ (5, XTRMD3_VALID_S) \
+ _ (6, XTRMD4_VALID_S) \
+ _ (7, XTRMD5_VALID_S) \
+ _ (8, LAST)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_##n = v,
+ foreach_virtchnl2_rx_flex_desc_adv_status1
+#undef _
+} virtchnl2_rx_flex_desc_adv_status1_t;
+
+#define VIRTCHNL2_RX_FLEX_DESC_PTYPE_S 0
+#define VIRTCHNL2_RX_FLEX_DESC_PTYPE_M \
+ MAKEMASK (0x3FFUL, VIRTCHNL2_RX_FLEX_DESC_PTYPE_S) /* 10 bits */
+
+#define VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_S 0
+#define VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M \
+ MAKEMASK (0x3FFFUL, VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_S) /* 14 bits */
+
+#define foreach_virtchnl2_rx_flex_desc_status0 \
+ _ (0, DD_S) \
+ _ (1, EOF_S) \
+ _ (2, HBO_S) \
+ _ (3, L3L4P_S) \
+ _ (4, XSUM_IPE_S) \
+ _ (5, XSUM_L4E_S) \
+ _ (6, XSUM_EIPE_S) \
+ _ (7, XSUM_EUDPE_S) \
+ _ (8, LPBK_S) \
+ _ (9, IPV6EXADD_S) \
+ _ (10, RXE_S) \
+ _ (11, CRCP_S) \
+ _ (12, RSS_VALID_S) \
+ _ (13, L2TAG1P_S) \
+ _ (14, XTRMD0_VALID_S) \
+ _ (15, XTRMD1_VALID_S) \
+ _ (16, LAST)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_FLEX_DESC_STATUS0_##n = v,
+ foreach_virtchnl2_rx_flex_desc_status0
+#undef _
+} virtchnl2_rx_flex_desc_status0_t;
+
+#define foreach_virtchnl2_rx_flex_desc_status1 \
+ _ (0, CPM_S) \
+ _ (4, NAT_S) \
+ _ (5, CRYPTO_S) \
+ _ (11, L2TAG2P_S) \
+ _ (12, XTRMD2_VALID_S) \
+ _ (13, XTRMD3_VALID_S) \
+ _ (14, XTRMD4_VALID_S) \
+ _ (15, XTRMD5_VALID_S) \
+ _ (16, LAST)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_FLEX_DESC_STATUS1_##n = v,
+ foreach_virtchnl2_rx_flex_desc_status1
+#undef _
+} virtchnl2_rx_flex_desc_status1_t;
+
+#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_SPH_S 63
+#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_SPH_M \
+ BIT_ULL (VIRTCHNL2_RX_BASE_DESC_QW1_LEN_SPH_S)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_HBUF_S 52
+#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_HBUF_M \
+ MAKEMASK (0x7FFULL, VIRTCHNL2_RX_BASE_DESC_QW1_LEN_HBUF_S)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_S 38
+#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M \
+ MAKEMASK (0x3FFFULL, VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_S)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_S 30
+#define VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M \
+ MAKEMASK (0xFFULL, VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_S)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_S 19
+#define VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M \
+ MAKEMASK (0xFFUL, VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_S)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_S 0
+#define VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M \
+ MAKEMASK (0x7FFFFUL, VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_S)
+
+#define foreach_virtchnl2_rx_base_desc_status \
+ _ (0, DD_S) \
+ _ (1, EOF_S) \
+ _ (2, L2TAG1P_S) \
+ _ (3, L3L4P_S) \
+ _ (4, CRCP_S) \
+ _ (5, RSVD_S) \
+ _ (8, EXT_UDP_0_S) \
+ _ (9, UMBCAST_S) \
+ _ (11, FLM_S) \
+ _ (12, FLTSTAT_S) \
+ _ (14, LPBK_S) \
+ _ (15, IPV6EXADD_S) \
+ _ (16, RSVD1_S) \
+ _ (18, INT_UDP_0_S) \
+ _ (19, LAST)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_BASE_DESC_STATUS_##n = v,
+ foreach_virtchnl2_rx_base_desc_status
+#undef _
+} virtchnl2_rx_base_desc_status_t;
+
+#define VIRTCHNL2_RX_BASE_DESC_EXT_STATUS_L2TAG2P_S 0
+
+#define foreach_virtchnl2_rx_base_desc_error \
+ _ (0, RXE_S) \
+ _ (1, ATRAEFAIL_S) \
+ _ (2, HBO_S) \
+ _ (3, L3L4E_S) \
+ _ (3, IPE_S) \
+ _ (4, L4E_S) \
+ _ (5, EIPE_S) \
+ _ (6, OVERSIZE_S) \
+ _ (7, PPRS_S)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_BASE_DESC_ERROR_##n = v,
+ foreach_virtchnl2_rx_base_desc_error
+#undef _
+} virtchnl2_rx_base_desc_error_t;
+
+#define foreach_virtchnl2_rx_base_desc_fltstat \
+ _ (0, NO_DATA) \
+ _ (1, FD_ID) \
+ _ (2, RSV) \
+ _ (3, RSS_HASH)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_BASE_DESC_FLTSTAT_##n = v,
+ foreach_virtchnl2_rx_base_desc_fltstat
+#undef _
+} virtchnl2_rx_base_desc_fltstat_t;
+
+/* Receive Descriptors */
+/* splitq buf
+ | 16| 0|
+ ----------------------------------------------------------------
+ | RSV | Buffer ID |
+ ----------------------------------------------------------------
+ | Rx packet buffer adresss |
+ ----------------------------------------------------------------
+ | Rx header buffer adresss |
+ ----------------------------------------------------------------
+ | RSV |
+ ----------------------------------------------------------------
+ | 0|
+ */
+typedef struct
+{
+ struct
+ {
+ u16 buf_id;
+ u16 rsvd0;
+ u32 rsvd1;
+ } qword0;
+ u64 pkt_addr;
+ u64 hdr_addr;
+ u64 rsvd2;
+} virtchnl2_splitq_rx_buf_desc_t;
+
+typedef struct
+{
+ u64 pkt_addr;
+ u64 hdr_addr;
+ u64 rsvd1;
+ u64 rsvd2;
+} virtchnl2_singleq_rx_buf_desc_t;
+
+union virtchnl2_rx_buf_desc
+{
+ virtchnl2_singleq_rx_buf_desc_t read;
+ virtchnl2_splitq_rx_buf_desc_t split_rd;
+};
+
+typedef struct
+{
+ struct
+ {
+ struct
+ {
+ u16 mirroring_status;
+ u16 l2tag1;
+ } lo_dword;
+ union
+ {
+ u32 rss;
+ u32 fd_id;
+ } hi_dword;
+ } qword0;
+ struct
+ {
+ u64 status_error_ptype_len;
+ } qword1;
+ struct
+ {
+ u16 ext_status;
+ u16 rsvd;
+ u16 l2tag2_1;
+ u16 l2tag2_2;
+ } qword2;
+ struct
+ {
+ u32 reserved;
+ u32 fd_id;
+ } qword3;
+} virtchnl2_singleq_base_rx_desc_t;
+
+typedef struct
+{
+ /* Qword 0 */
+ u8 rxdid;
+ u8 mir_id_umb_cast;
+ u16 ptype_flex_flags0;
+ u16 pkt_len;
+ u16 hdr_len_sph_flex_flags1;
+
+ /* Qword 1 */
+ u16 status_error0;
+ u16 l2tag1;
+ u16 flex_meta0;
+ u16 flex_meta1;
+
+ /* Qword 2 */
+ u16 status_error1;
+ u8 flex_flags2;
+ u8 time_stamp_low;
+ u16 l2tag2_1st;
+ u16 l2tag2_2nd;
+
+ /* Qword 3 */
+ u16 flex_meta2;
+ u16 flex_meta3;
+ union
+ {
+ struct
+ {
+ u16 flex_meta4;
+ u16 flex_meta5;
+ } flex;
+ u32 ts_high;
+ } flex_ts;
+} virtchnl2_rx_flex_desc_t;
+
+typedef struct
+{
+ /* Qword 0 */
+ u8 rxdid;
+ u8 mir_id_umb_cast;
+ u16 ptype_flex_flags0;
+ u16 pkt_len;
+ u16 hdr_len_sph_flex_flags1;
+
+ /* Qword 1 */
+ u16 status_error0;
+ u16 l2tag1;
+ u32 rss_hash;
+
+ /* Qword 2 */
+ u16 status_error1;
+ u8 flexi_flags2;
+ u8 ts_low;
+ u16 l2tag2_1st;
+ u16 l2tag2_2nd;
+
+ /* Qword 3 */
+ u32 flow_id;
+ union
+ {
+ struct
+ {
+ u16 rsvd;
+ u16 flow_id_ipv6;
+ } flex;
+ u32 ts_high;
+ } flex_ts;
+} virtchnl2_rx_flex_desc_nic_t;
+
+typedef struct
+{
+ /* Qword 0 */
+ u8 rxdid;
+ u8 mir_id_umb_cast;
+ u16 ptype_flex_flags0;
+ u16 pkt_len;
+ u16 hdr_len_sph_flex_flags1;
+
+ /* Qword 1 */
+ u16 status_error0;
+ u16 l2tag1;
+ u16 src_vsi;
+ u16 flex_md1_rsvd;
+
+ /* Qword 2 */
+ u16 status_error1;
+ u8 flex_flags2;
+ u8 ts_low;
+ u16 l2tag2_1st;
+ u16 l2tag2_2nd;
+
+ /* Qword 3 */
+ u32 rsvd;
+ u32 ts_high;
+} virtchnl2_rx_flex_desc_sw_t;
+
+typedef struct
+{
+ /* Qword 0 */
+ u8 rxdid;
+ u8 mir_id_umb_cast;
+ u16 ptype_flex_flags0;
+ u16 pkt_len;
+ u16 hdr_len_sph_flex_flags1;
+
+ /* Qword 1 */
+ u16 status_error0;
+ u16 l2tag1;
+ u32 rss_hash;
+
+ /* Qword 2 */
+ u16 status_error1;
+ u8 flexi_flags2;
+ u8 ts_low;
+ u16 l2tag2_1st;
+ u16 l2tag2_2nd;
+
+ /* Qword 3 */
+ u16 flow_id;
+ u16 src_vsi;
+ union
+ {
+ struct
+ {
+ u16 rsvd;
+ u16 flow_id_ipv6;
+ } flex;
+ u32 ts_high;
+ } flex_ts;
+} virtchnl2_rx_flex_desc_nic_2_t;
+
+typedef struct
+{
+ /* Qword 0 */
+ u8 rxdid_ucast;
+ u8 status_err0_qw0;
+ u16 ptype_err_fflags0;
+ u16 pktlen_gen_bufq_id;
+ u16 hdrlen_flags;
+
+ /* Qword 1 */
+ u8 status_err0_qw1;
+ u8 status_err1;
+ u8 fflags1;
+ u8 ts_low;
+ u16 fmd0;
+ u16 fmd1;
+ /* Qword 2 */
+ u16 fmd2;
+ u8 fflags2;
+ u8 hash3;
+ u16 fmd3;
+ u16 fmd4;
+ /* Qword 3 */
+ u16 fmd5;
+ u16 fmd6;
+ u16 fmd7_0;
+ u16 fmd7_1;
+} virtchnl2_rx_flex_desc_adv_t;
+
+typedef struct
+{
+ /* Qword 0 */
+ u8 rxdid_ucast;
+ u8 status_err0_qw0;
+ u16 ptype_err_fflags0;
+ u16 pktlen_gen_bufq_id;
+ u16 hdrlen_flags;
+
+ /* Qword 1 */
+ u8 status_err0_qw1;
+ u8 status_err1;
+ u8 fflags1;
+ u8 ts_low;
+ u16 buf_id;
+ union
+ {
+ u16 raw_cs;
+ u16 l2tag1;
+ u16 rscseglen;
+ } misc;
+ /* Qword 2 */
+ u16 hash1;
+ union
+ {
+ u8 fflags2;
+ u8 mirrorid;
+ u8 hash2;
+ } ff2_mirrid_hash2;
+ u8 hash3;
+ u16 l2tag2;
+ u16 fmd4;
+ /* Qword 3 */
+ u16 l2tag1;
+ u16 fmd6;
+ u32 ts_high;
+} virtchnl2_rx_flex_desc_adv_nic_3_t;
+
+typedef union
+{
+ virtchnl2_singleq_rx_buf_desc_t read;
+ virtchnl2_singleq_base_rx_desc_t base_wb;
+ virtchnl2_rx_flex_desc_t flex_wb;
+ virtchnl2_rx_flex_desc_nic_t flex_nic_wb;
+ virtchnl2_rx_flex_desc_sw_t flex_sw_wb;
+ virtchnl2_rx_flex_desc_nic_2_t flex_nic_2_wb;
+ virtchnl2_rx_flex_desc_adv_t flex_adv_wb;
+ virtchnl2_rx_flex_desc_adv_nic_3_t flex_adv_nic_3_wb;
+ u64 qword[4];
+} virtchnl2_rx_desc_t;
+
+#endif /* _IDPF_VIRTCHNL_LAN_DESC_H_ */
diff --git a/src/plugins/igmp/igmp.c b/src/plugins/igmp/igmp.c
index 1c686e39cf2..30f167d483a 100644
--- a/src/plugins/igmp/igmp.c
+++ b/src/plugins/igmp/igmp.c
@@ -35,7 +35,6 @@
igmp_main_t igmp_main;
-/* *INDENT-OFF* */
/* General Query address */
const static mfib_prefix_t mpfx_general_query = {
.fp_proto = FIB_PROTOCOL_IP4,
@@ -57,7 +56,6 @@ const static mfib_prefix_t mpfx_report = {
},
},
};
-/* *INDENT-ON* */
/**
* @brief igmp send query (igmp_timer_function_t)
@@ -345,7 +343,6 @@ igmp_enable_disable (u32 sw_if_index, u8 enable, igmp_mode_t mode)
IGMP_DBG ("%s: %U", (enable ? "Enabled" : "Disabled"),
format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index);
- /* *INDENT-OFF* */
fib_route_path_t via_itf_path =
{
.frp_proto = fib_proto_to_dpo (FIB_PROTOCOL_IP4),
@@ -365,7 +362,6 @@ igmp_enable_disable (u32 sw_if_index, u8 enable, igmp_mode_t mode)
.frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
};
- /* *INDENT-ON* */
/* find configuration, if it doesn't exist, create new */
config = igmp_config_lookup (sw_if_index);
mfib_index = mfib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
@@ -408,18 +404,19 @@ igmp_enable_disable (u32 sw_if_index, u8 enable, igmp_mode_t mode)
{
/* first config in this FIB */
mfib_table_lock (mfib_index, FIB_PROTOCOL_IP4, MFIB_SOURCE_IGMP);
- mfib_table_entry_path_update (mfib_index,
- &mpfx_general_query,
- MFIB_SOURCE_IGMP, &for_us_path);
- mfib_table_entry_path_update (mfib_index,
- &mpfx_report,
- MFIB_SOURCE_IGMP, &for_us_path);
+ mfib_table_entry_path_update (mfib_index, &mpfx_general_query,
+ MFIB_SOURCE_IGMP,
+ MFIB_ENTRY_FLAG_NONE, &for_us_path);
+ mfib_table_entry_path_update (mfib_index, &mpfx_report,
+ MFIB_SOURCE_IGMP,
+ MFIB_ENTRY_FLAG_NONE, &for_us_path);
}
- mfib_table_entry_path_update (mfib_index,
- &mpfx_general_query,
- MFIB_SOURCE_IGMP, &via_itf_path);
+ mfib_table_entry_path_update (mfib_index, &mpfx_general_query,
+ MFIB_SOURCE_IGMP, MFIB_ENTRY_FLAG_NONE,
+ &via_itf_path);
mfib_table_entry_path_update (mfib_index, &mpfx_report,
- MFIB_SOURCE_IGMP, &via_itf_path);
+ MFIB_SOURCE_IGMP, MFIB_ENTRY_FLAG_NONE,
+ &via_itf_path);
}
}
else if (config && !enable)
@@ -487,7 +484,6 @@ igmp_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (igmp_init) =
{
.runs_after = VLIB_INITS("ip4_lookup_init"),
@@ -497,7 +493,6 @@ VLIB_PLUGIN_REGISTER () =
.version = VPP_BUILD_VER,
.description = "Internet Group Management Protocol (IGMP)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/igmp/igmp.h b/src/plugins/igmp/igmp.h
index 9f9b611a649..4131d6d3b09 100644
--- a/src/plugins/igmp/igmp.h
+++ b/src/plugins/igmp/igmp.h
@@ -20,6 +20,7 @@
#include <vlib/vlib.h>
#include <vnet/ip/ip.h>
+#define REPLY_MSG_ID_BASE (igmp_main.msg_id_base)
#include <vlibapi/api_helper_macros.h>
#include <vnet/ip/igmp_packet.h>
#include <vnet/adj/adj_mcast.h>
diff --git a/src/plugins/igmp/igmp_api.c b/src/plugins/igmp/igmp_api.c
index 72c1b0394a8..3f743d8fee1 100644
--- a/src/plugins/igmp/igmp_api.c
+++ b/src/plugins/igmp/igmp_api.c
@@ -71,7 +71,7 @@ vl_api_igmp_listen_t_handler (vl_api_igmp_listen_t * mp)
BAD_SW_IF_INDEX_LABEL;
done:;
- REPLY_MACRO (IGMP_MSG_ID (VL_API_IGMP_LISTEN_REPLY));
+ REPLY_MACRO (VL_API_IGMP_LISTEN_REPLY);
}
static void
@@ -88,7 +88,7 @@ vl_api_igmp_enable_disable_t_handler (vl_api_igmp_enable_disable_t * mp)
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO (IGMP_MSG_ID (VL_API_IGMP_ENABLE_DISABLE_REPLY));
+ REPLY_MACRO (VL_API_IGMP_ENABLE_DISABLE_REPLY);
}
static void
@@ -106,7 +106,7 @@ vl_api_igmp_proxy_device_add_del_t_handler (vl_api_igmp_proxy_device_add_del_t
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO (IGMP_MSG_ID (VL_API_IGMP_PROXY_DEVICE_ADD_DEL_REPLY));
+ REPLY_MACRO (VL_API_IGMP_PROXY_DEVICE_ADD_DEL_REPLY);
}
static void
@@ -124,8 +124,7 @@ static void
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO (IGMP_MSG_ID
- (VL_API_IGMP_PROXY_DEVICE_ADD_DEL_INTERFACE_REPLY));
+ REPLY_MACRO (VL_API_IGMP_PROXY_DEVICE_ADD_DEL_INTERFACE_REPLY);
}
static void
@@ -155,7 +154,6 @@ igmp_config_dump (igmp_main_t * im,
igmp_group_t *group;
igmp_src_t *src;
- /* *INDENT-OFF* */
FOR_EACH_GROUP (group, config,
({
FOR_EACH_SRC (src, group, IGMP_FILTER_MODE_INCLUDE,
@@ -163,7 +161,6 @@ igmp_config_dump (igmp_main_t * im,
send_igmp_details (rp, im, config, group, src, context);
}));
}));
- /* *INDENT-ON* */
}
static void
@@ -181,12 +178,10 @@ vl_api_igmp_dump_t_handler (vl_api_igmp_dump_t * mp)
sw_if_index = ntohl (mp->sw_if_index);
if (~0 == sw_if_index)
{
- /* *INDENT-OFF* */
pool_foreach (config, im->configs)
{
igmp_config_dump(im, rp, mp->context, config);
}
- /* *INDENT-ON* */
}
else
{
@@ -209,7 +204,7 @@ vl_api_igmp_clear_interface_t_handler (vl_api_igmp_clear_interface_t * mp)
if (config)
igmp_clear_config (config);
- REPLY_MACRO (IGMP_MSG_ID (VL_API_IGMP_CLEAR_INTERFACE_REPLY));
+ REPLY_MACRO (VL_API_IGMP_CLEAR_INTERFACE_REPLY);
}
static vl_api_group_prefix_type_t
@@ -250,7 +245,7 @@ vl_api_igmp_group_prefix_set_t_handler (vl_api_igmp_group_prefix_set_t * mp)
ip_prefix_decode (&mp->gp.prefix, &pfx);
igmp_group_prefix_set (&pfx, igmp_group_type_api_to_int (mp->gp.type));
- REPLY_MACRO (IGMP_MSG_ID (VL_API_IGMP_GROUP_PREFIX_SET_REPLY));
+ REPLY_MACRO (VL_API_IGMP_GROUP_PREFIX_SET_REPLY);
}
typedef struct igmp_ssm_range_walk_ctx_t_
@@ -343,7 +338,7 @@ vl_api_want_igmp_events_t_handler (vl_api_want_igmp_events_t * mp)
rv = VNET_API_ERROR_INVALID_REGISTRATION;
done:
- REPLY_MACRO (VL_API_WANT_IGMP_EVENTS_REPLY + im->msg_id_base);
+ REPLY_MACRO (VL_API_WANT_IGMP_EVENTS_REPLY);
}
static clib_error_t *
@@ -402,14 +397,12 @@ igmp_event (igmp_filter_mode_t filter,
vnet_get_main (), sw_if_index, format_igmp_filter_mode, filter);
- /* *INDENT-OFF* */
pool_foreach (api_client, im->api_clients)
{
rp = vl_api_client_index_to_registration (api_client->client_index);
if (rp)
send_igmp_event (rp, filter, sw_if_index, saddr, gaddr);
}
- /* *INDENT-ON* */
}
/* Set up the API message handling tables */
diff --git a/src/plugins/igmp/igmp_cli.c b/src/plugins/igmp/igmp_cli.c
index f84cdaf50f7..713f4c31bfb 100644
--- a/src/plugins/igmp/igmp_cli.c
+++ b/src/plugins/igmp/igmp_cli.c
@@ -69,13 +69,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_clear_interface_command, static) = {
.path = "clear igmp",
.short_help = "clear igmp int <interface>",
.function = igmp_clear_interface_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_listen_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -150,14 +148,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_listen_command, static) = {
.path = "igmp listen",
.short_help = "igmp listen [<enable|disable>] "
"int <interface> saddr <ip4-address> gaddr <ip4-address>",
.function = igmp_listen_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_enable_cli (vlib_main_t * vm,
@@ -211,13 +207,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_enable_command, static) = {
.path = "igmp",
.short_help = "igmp <enable|disable> <host|router> <interface>",
.function = igmp_enable_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_proxy_device_add_del_command_fn (vlib_main_t * vm,
@@ -275,13 +269,11 @@ done:
unformat_free (line_input);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_proxy_device_add_del_command, static) = {
.path = "igmp proxy-dev",
.short_help = "igmp proxy-dev <add|del> vrf-id <table-id> <interface>",
.function = igmp_proxy_device_add_del_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_proxy_device_add_del_interface_command_fn (vlib_main_t * vm,
@@ -339,13 +331,11 @@ done:
unformat_free (line_input);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_proxy_device_add_del_interface_command, static) = {
.path = "igmp proxy-dev itf",
.short_help = "igmp proxy-dev itf <add|del> vrf-id <table-id> <interface>",
.function = igmp_proxy_device_add_del_interface_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -355,23 +345,19 @@ igmp_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
igmp_main_t *im = &igmp_main;
igmp_config_t *config;
- /* *INDENT-OFF* */
pool_foreach (config, im->configs)
{
vlib_cli_output (vm, "%U", format_igmp_config, config);
}
- /* *INDENT-ON* */
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_show_command, static) = {
.path = "show igmp config",
.short_help = "show igmp config",
.function = igmp_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_show_timers_command_fn (vlib_main_t * vm,
@@ -384,13 +370,11 @@ igmp_show_timers_command_fn (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_show_timers_command, static) = {
.path = "show igmp timers",
.short_help = "show igmp timers",
.function = igmp_show_timers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_igmp_command_fn (vlib_main_t * vm,
@@ -414,13 +398,11 @@ test_igmp_command_fn (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_igmp_command, static) = {
.path = "test igmp timers",
.short_help = "Change the default values for IGMP timers - only sensible during unit tests",
.function = test_igmp_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
diff --git a/src/plugins/igmp/igmp_config.c b/src/plugins/igmp/igmp_config.c
index 7637adba5bf..288d9c87222 100644
--- a/src/plugins/igmp/igmp_config.c
+++ b/src/plugins/igmp/igmp_config.c
@@ -28,12 +28,10 @@ igmp_clear_config (igmp_config_t * config)
format_vnet_sw_if_index_name,
vnet_get_main (), config->sw_if_index);
- /* *INDENT-OFF* */
FOR_EACH_GROUP (group, config,
({
igmp_group_clear (&group);
}));
- /* *INDENT-ON* */
for (ii = 0; ii < IGMP_CONFIG_N_TIMERS; ii++)
{
@@ -125,12 +123,10 @@ format_igmp_config (u8 * s, va_list * args)
format_igmp_timer_id, config->timers[ii]);
}
- /* *INDENT-OFF* */
FOR_EACH_GROUP (group, config,
({
s = format (s, "\n%U", format_igmp_group, group, 4);
}));
- /* *INDENT-ON* */
return (s);
}
diff --git a/src/plugins/igmp/igmp_group.c b/src/plugins/igmp/igmp_group.c
index eec4c9b8f81..be3d997cbeb 100644
--- a/src/plugins/igmp/igmp_group.c
+++ b/src/plugins/igmp/igmp_group.c
@@ -23,12 +23,10 @@ igmp_group_free_all_srcs (igmp_group_t * group)
{
igmp_src_t *src;
- /* *INDENT-OFF* */
FOR_EACH_SRC (src, group, IGMP_FILTER_MODE_INCLUDE,
({
igmp_src_free(src);
}));
- /* *INDENT-ON* */
hash_free (group->igmp_src_by_key[IGMP_FILTER_MODE_INCLUDE]);
hash_free (group->igmp_src_by_key[IGMP_FILTER_MODE_EXCLUDE]);
@@ -152,7 +150,6 @@ igmp_group_present_minus_new (igmp_group_t * group,
pmn = NULL;
- /* *INDENT-OFF* */
if (0 == vec_len(saddrs))
{
FOR_EACH_SRC(src, group, mode,
@@ -178,7 +175,6 @@ igmp_group_present_minus_new (igmp_group_t * group,
vec_add1(pmn, *src->key);
}));
}
- /* *INDENT-ON* */
return (pmn);
}
@@ -198,7 +194,6 @@ igmp_group_new_minus_present (igmp_group_t * group,
npm = NULL;
- /* *INDENT-OFF* */
vec_foreach(s1, saddrs)
{
found = 0;
@@ -214,7 +209,6 @@ igmp_group_new_minus_present (igmp_group_t * group,
if (!found)
vec_add1(npm, *s1);
}
- /* *INDENT-ON* */
return (npm);
}
@@ -230,7 +224,6 @@ igmp_group_new_intersect_present (igmp_group_t * group,
intersect = NULL;
- /* *INDENT-OFF* */
FOR_EACH_SRC(src, group, mode,
({
vec_foreach(s1, saddrs)
@@ -242,7 +235,6 @@ igmp_group_new_intersect_present (igmp_group_t * group,
}
}
}));
- /* *INDENT-ON* */
return (intersect);
}
@@ -311,12 +303,10 @@ format_igmp_group (u8 * s, va_list * args)
format_igmp_group_timer_type, ii,
format_igmp_timer_id, group->timers[ii]);
- /* *INDENT-OFF* */
FOR_EACH_SRC (src, group, IGMP_FILTER_MODE_INCLUDE,
({
s = format (s, "\n%U", format_igmp_src, src, indent+4);
}));
- /* *INDENT-ON* */
return (s);
}
diff --git a/src/plugins/igmp/igmp_input.c b/src/plugins/igmp/igmp_input.c
index 1858a1b4d66..012c22399de 100644
--- a/src/plugins/igmp/igmp_input.c
+++ b/src/plugins/igmp/igmp_input.c
@@ -219,7 +219,6 @@ igmp_input (vlib_main_t * vm, vlib_node_runtime_t * node,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (igmp_input_node) =
{
.function = igmp_input,
@@ -239,7 +238,6 @@ VLIB_REGISTER_NODE (igmp_input_node) =
[IGMP_INPUT_NEXT_PARSE_REPORT] = "igmp-parse-report",
}
};
-/* *INDENT-ON* */
static uword
igmp_parse_query (vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -325,7 +323,6 @@ igmp_parse_query (vlib_main_t * vm, vlib_node_runtime_t * node,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (igmp_parse_query_node) =
{
.function = igmp_parse_query,
@@ -343,7 +340,6 @@ VLIB_REGISTER_NODE (igmp_parse_query_node) =
[IGMP_PARSE_QUERY_NEXT_DROP] = "error-drop",
}
};
-/* *INDENT-ON* */
static uword
igmp_parse_report (vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -436,7 +432,6 @@ igmp_parse_report (vlib_main_t * vm, vlib_node_runtime_t * node,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (igmp_parse_report_node) =
{
.function = igmp_parse_report,
@@ -454,7 +449,6 @@ VLIB_REGISTER_NODE (igmp_parse_report_node) =
[IGMP_PARSE_REPORT_NEXT_DROP] = "error-drop",
}
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_input_init (vlib_main_t * vm)
@@ -466,12 +460,10 @@ igmp_input_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (igmp_input_init) =
{
.runs_after = VLIB_INITS("igmp_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/igmp/igmp_pkt.c b/src/plugins/igmp/igmp_pkt.c
index c2ce5c71255..7fadeb638a3 100644
--- a/src/plugins/igmp/igmp_pkt.c
+++ b/src/plugins/igmp/igmp_pkt.c
@@ -329,7 +329,6 @@ igmp_pkt_report_v3_add_report (igmp_pkt_build_report_t * br,
if (NULL == igmp_group)
return;
- /* *INDENT-OFF* */
vec_foreach(s, srcs)
{
igmp_group = igmp_pkt_report_v3_append_src(br, igmp_group,
@@ -337,7 +336,6 @@ igmp_pkt_report_v3_add_report (igmp_pkt_build_report_t * br,
if (NULL == igmp_group)
return;
};
- /* *INDENT-ON* */
igmp_group->n_src_addresses = clib_host_to_net_u16 (br->n_srcs);
@@ -378,7 +376,6 @@ igmp_pkt_report_v3_add_group (igmp_pkt_build_report_t * br,
igmp_group = igmp_pkt_report_v3_append_group (br, group->key, type);
- /* *INDENT-OFF* */
FOR_EACH_SRC (src, group, IGMP_FILTER_MODE_INCLUDE,
({
igmp_group = igmp_pkt_report_v3_append_src(br, igmp_group,
@@ -387,7 +384,6 @@ igmp_pkt_report_v3_add_group (igmp_pkt_build_report_t * br,
if (NULL == igmp_group)
return;
}));
- /* *INDENT-ON* */
igmp_group->n_src_addresses = clib_host_to_net_u16 (br->n_srcs);
IGMP_DBG (" ..add-group: %U srcs:%d",
diff --git a/src/plugins/igmp/igmp_proxy.c b/src/plugins/igmp/igmp_proxy.c
index 2167740fc8a..bf5e3aafad8 100644
--- a/src/plugins/igmp/igmp_proxy.c
+++ b/src/plugins/igmp/igmp_proxy.c
@@ -34,7 +34,6 @@ igmp_proxy_device_mfib_path_add_del (igmp_group_t * group, u8 add)
mfib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
config->sw_if_index);
- /* *INDENT-OFF* */
mfib_prefix_t mpfx_group_addr = {
.fp_proto = FIB_PROTOCOL_IP4,
.fp_len = 32,
@@ -51,11 +50,11 @@ igmp_proxy_device_mfib_path_add_del (igmp_group_t * group, u8 add)
.frp_weight = 1,
.frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
};
- /* *INDENT-ON* */
if (add)
mfib_table_entry_path_update (mfib_index, &mpfx_group_addr,
- MFIB_SOURCE_IGMP, &via_itf_path);
+ MFIB_SOURCE_IGMP, MFIB_ENTRY_FLAG_NONE,
+ &via_itf_path);
else
mfib_table_entry_path_remove (mfib_index, &mpfx_group_addr,
MFIB_SOURCE_IGMP, &via_itf_path);
@@ -345,12 +344,10 @@ igmp_proxy_device_merge_group (igmp_proxy_device_t * proxy_device,
igmp_proxy_device_mfib_path_add_del (group, 0);
}
- /* *INDENT-OFF* */
FOR_EACH_SRC (src, group, group->router_filter_mode,
({
igmp_proxy_device_merge_src (&proxy_group, src, srcaddrs, block);
}));
- /* *INDENT-ON* */
return proxy_group;
}
@@ -369,7 +366,6 @@ igmp_proxy_device_merge_config (igmp_config_t * config, u8 block)
igmp_pkt_build_report_init (&br, proxy_device->upstream_if);
- /* *INDENT-OFF* */
FOR_EACH_GROUP(group, config,
({
proxy_group = igmp_proxy_device_merge_group (proxy_device, group, &srcaddrs, block);
@@ -382,7 +378,6 @@ igmp_proxy_device_merge_config (igmp_config_t * config, u8 block)
}
vec_free (srcaddrs);
}));
- /* *INDENT-ON* */
igmp_pkt_report_v3_send (&br);
diff --git a/src/plugins/igmp/igmp_query.c b/src/plugins/igmp/igmp_query.c
index c75b01a295b..c5bf8fca992 100644
--- a/src/plugins/igmp/igmp_query.c
+++ b/src/plugins/igmp/igmp_query.c
@@ -155,14 +155,12 @@ igmp_send_general_report_v3 (u32 obj, void *data)
igmp_pkt_build_report_init (&br, config->sw_if_index);
- /* *INDENT-OFF* */
FOR_EACH_GROUP (group, config,
({
igmp_pkt_report_v3_add_group
(&br, group,
igmp_filter_mode_to_report_type(group->router_filter_mode));
}));
- /* *INDENT-ON* */
igmp_pkt_report_v3_send (&br);
}
diff --git a/src/plugins/igmp/igmp_ssm_range.c b/src/plugins/igmp/igmp_ssm_range.c
index c74d312b508..a71741cd5f8 100644
--- a/src/plugins/igmp/igmp_ssm_range.c
+++ b/src/plugins/igmp/igmp_ssm_range.c
@@ -127,13 +127,11 @@ igmp_ssm_range_show (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_show_timers_command, static) = {
.path = "show igmp ssm-ranges",
.short_help = "show igmp ssm-ranges",
.function = igmp_ssm_range_show,
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_ssm_range_init (vlib_main_t * vm)
@@ -145,12 +143,10 @@ igmp_ssm_range_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (igmp_ssm_range_init) =
{
.runs_after = VLIB_INITS("igmp_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/igmp/igmp_timer.c b/src/plugins/igmp/igmp_timer.c
index 2d38dd07a99..8abef8e554e 100644
--- a/src/plugins/igmp/igmp_timer.c
+++ b/src/plugins/igmp/igmp_timer.c
@@ -181,7 +181,6 @@ igmp_timer_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (igmp_timer_process_node) =
{
.function = igmp_timer_process,
@@ -189,7 +188,6 @@ VLIB_REGISTER_NODE (igmp_timer_process_node) =
.name = "igmp-timer-process",
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
igmp_timer_id_t
igmp_timer_schedule (f64 when, u32 obj, igmp_timer_function_t fn, void *data)
diff --git a/src/plugins/ikev2/CMakeLists.txt b/src/plugins/ikev2/CMakeLists.txt
index 6f2e5a68153..568271ed7d9 100644
--- a/src/plugins/ikev2/CMakeLists.txt
+++ b/src/plugins/ikev2/CMakeLists.txt
@@ -11,8 +11,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-add_definitions (-DWITH_LIBSSL=1)
+if(NOT OPENSSL_FOUND)
+ message(WARNING "openssl headers not found - ikev2 plugin disabled")
+ return()
+endif()
+
include_directories(${OPENSSL_INCLUDE_DIR})
+add_compile_definitions(OPENSSL_SUPPRESS_DEPRECATED)
add_vpp_plugin(ikev2
SOURCES
@@ -35,5 +40,5 @@ add_vpp_plugin(ikev2
ikev2_priv.h
LINK_LIBRARIES
- ${OPENSSL_LIBRARIES}
+ ${OPENSSL_CRYPTO_LIBRARIES}
)
diff --git a/src/plugins/ikev2/ikev2.api b/src/plugins/ikev2/ikev2.api
index ff9ed72e888..de276e7f3ea 100644
--- a/src/plugins/ikev2/ikev2.api
+++ b/src/plugins/ikev2/ikev2.api
@@ -72,7 +72,26 @@ define ikev2_sa_dump
{
u32 client_index;
u32 context;
+};
+/** \brief Dump all SAs
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define ikev2_sa_v2_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Dump all SAs
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define ikev2_sa_v3_dump
+{
+ u32 client_index;
+ u32 context;
option status = "in_progress";
};
@@ -87,6 +106,32 @@ define ikev2_sa_details
i32 retval;
vl_api_ikev2_sa_t sa;
+};
+
+/** \brief Details about IKE SA
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param sa - SA data
+*/
+define ikev2_sa_v2_details
+{
+ u32 context;
+ i32 retval;
+
+ vl_api_ikev2_sa_v2_t sa;
+};
+
+/** \brief Details about IKE SA
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param sa - SA data
+*/
+define ikev2_sa_v3_details
+{
+ u32 context;
+ i32 retval;
+
+ vl_api_ikev2_sa_v3_t sa;
option status = "in_progress";
};
@@ -102,7 +147,6 @@ define ikev2_child_sa_dump
u32 sa_index;
option vat_help = "sa_index <index>";
- option status = "in_progress";
};
/** \brief Child SA details
@@ -116,6 +160,34 @@ define ikev2_child_sa_details
i32 retval;
vl_api_ikev2_child_sa_t child_sa;
+};
+
+/** \brief Dump child SA of specific SA
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sa_index - index of specific sa
+*/
+define ikev2_child_sa_v2_dump
+{
+ u32 client_index;
+ u32 context;
+
+ u32 sa_index;
+ option vat_help = "sa_index <index>";
+ option status = "in_progress";
+};
+
+/** \brief Child SA details
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param child_sa - child SA data
+*/
+define ikev2_child_sa_v2_details
+{
+ u32 context;
+ i32 retval;
+
+ vl_api_ikev2_child_sa_v2_t child_sa;
option status = "in_progress";
};
diff --git a/src/plugins/ikev2/ikev2.c b/src/plugins/ikev2/ikev2.c
index fa653760b1d..9bea2c96d12 100644
--- a/src/plugins/ikev2/ikev2.c
+++ b/src/plugins/ikev2/ikev2.c
@@ -110,14 +110,14 @@ typedef enum
typedef u32 ikev2_non_esp_marker;
-static_always_inline u16
-ikev2_get_port (ikev2_sa_t * sa)
+static u16
+ikev2_get_port (ikev2_sa_t *sa)
{
return ikev2_natt_active (sa) ? IKEV2_PORT_NATT : IKEV2_PORT;
}
-static_always_inline int
-ikev2_insert_non_esp_marker (ike_header_t * ike, int len)
+static int
+ikev2_insert_non_esp_marker (ike_header_t *ike, int len)
{
memmove ((u8 *) ike + sizeof (ikev2_non_esp_marker), ike, len);
clib_memset (ike, 0, sizeof (ikev2_non_esp_marker));
@@ -211,6 +211,8 @@ ikev2_select_proposal (ikev2_sa_proposal_t * proposals,
rv->proposal_num = proposal->proposal_num;
rv->protocol_id = proposal->protocol_id;
RAND_bytes ((u8 *) & rv->spi, sizeof (rv->spi));
+ if (rv->protocol_id != IKEV2_PROTOCOL_IKE)
+ rv->spi &= 0xffffffff;
goto done;
}
else
@@ -405,8 +407,8 @@ ikev2_generate_sa_init_data (ikev2_sa_t * sa)
RAND_bytes ((u8 *) & sa->rspi, 8);
/* generate nonce */
- sa->r_nonce = vec_new (u8, IKEV2_NONCE_SIZE);
- RAND_bytes ((u8 *) sa->r_nonce, IKEV2_NONCE_SIZE);
+ sa->r_nonce = vec_new (u8, vec_len (sa->i_nonce));
+ RAND_bytes ((u8 *) sa->r_nonce, vec_len (sa->i_nonce));
}
/* generate dh keys */
@@ -480,11 +482,10 @@ ikev2_complete_sa_data (ikev2_sa_t * sa, ikev2_sa_t * sai)
}
static void
-ikev2_calc_keys (ikev2_sa_t * sa)
+ikev2_calc_keys_internal (ikev2_sa_t *sa, u8 *skeyseed)
{
u8 *tmp;
/* calculate SKEYSEED = prf(Ni | Nr, g^ir) */
- u8 *skeyseed = 0;
u8 *s = 0;
u16 integ_key_len = 0, salt_len = 0;
ikev2_sa_transform_t *tr_encr, *tr_prf, *tr_integ;
@@ -502,7 +503,6 @@ ikev2_calc_keys (ikev2_sa_t * sa)
vec_append (s, sa->i_nonce);
vec_append (s, sa->r_nonce);
- skeyseed = ikev2_calc_prf (tr_prf, s, sa->dh_shared_key);
/* Calculate S = Ni | Nr | SPIi | SPIr */
u64 *spi;
@@ -520,7 +520,6 @@ ikev2_calc_keys (ikev2_sa_t * sa)
salt_len * 2;
keymat = ikev2_calc_prfplus (tr_prf, skeyseed, s, len);
- vec_free (skeyseed);
vec_free (s);
int pos = 0;
@@ -568,7 +567,42 @@ ikev2_calc_keys (ikev2_sa_t * sa)
}
static void
-ikev2_calc_child_keys (ikev2_sa_t * sa, ikev2_child_sa_t * child)
+ikev2_calc_keys_rekey (ikev2_sa_t *sa_new, ikev2_sa_t *sa_old)
+{
+ u8 *s = 0, *skeyseed = 0;
+ ikev2_sa_transform_t *tr_prf =
+ ikev2_sa_get_td_for_type (sa_old->r_proposals, IKEV2_TRANSFORM_TYPE_PRF);
+
+ vec_append (s, sa_new->dh_shared_key);
+ vec_append (s, sa_new->i_nonce);
+ vec_append (s, sa_new->r_nonce);
+ skeyseed = ikev2_calc_prf (tr_prf, sa_old->sk_d, s);
+
+ ikev2_calc_keys_internal (sa_new, skeyseed);
+
+ vec_free (skeyseed);
+ vec_free (s);
+}
+
+static void
+ikev2_calc_keys (ikev2_sa_t *sa)
+{
+ u8 *s = 0, *skeyseed = 0;
+ ikev2_sa_transform_t *tr_prf =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF);
+
+ vec_append (s, sa->i_nonce);
+ vec_append (s, sa->r_nonce);
+ skeyseed = ikev2_calc_prf (tr_prf, s, sa->dh_shared_key);
+
+ ikev2_calc_keys_internal (sa, skeyseed);
+
+ vec_free (skeyseed);
+ vec_free (s);
+}
+
+static void
+ikev2_calc_child_keys (ikev2_sa_t *sa, ikev2_child_sa_t *child, u8 kex)
{
u8 *s = 0;
u16 integ_key_len = 0;
@@ -587,6 +621,8 @@ ikev2_calc_child_keys (ikev2_sa_t * sa, ikev2_child_sa_t * child)
else
salt_len = sizeof (u32);
+ if (kex)
+ vec_append (s, sa->dh_shared_key);
vec_append (s, sa->i_nonce);
vec_append (s, sa->r_nonce);
/* calculate PRFplus */
@@ -638,8 +674,8 @@ ikev2_calc_child_keys (ikev2_sa_t * sa, ikev2_child_sa_t * child)
vec_free (keymat);
}
-static_always_inline u8 *
-ikev2_compute_nat_sha1 (u64 ispi, u64 rspi, ip_address_t * ia, u16 port)
+static u8 *
+ikev2_compute_nat_sha1 (u64 ispi, u64 rspi, ip_address_t *ia, u16 port)
{
const u32 max_buf_size =
sizeof (ispi) + sizeof (rspi) + sizeof (ip6_address_t) + sizeof (u16);
@@ -662,7 +698,10 @@ ikev2_parse_ke_payload (const void *p, u32 rlen, ikev2_sa_t * sa,
u16 plen = clib_net_to_host_u16 (ke->length);
ASSERT (plen >= sizeof (*ke) && plen <= rlen);
if (sizeof (*ke) > rlen)
- return 0;
+ {
+ ikev2_elog_error ("KE: packet too small");
+ return 0;
+ }
sa->dh_group = clib_net_to_host_u16 (ke->dh_group);
vec_reset_length (ke_data[0]);
@@ -671,13 +710,20 @@ ikev2_parse_ke_payload (const void *p, u32 rlen, ikev2_sa_t * sa,
}
static int
-ikev2_parse_nonce_payload (const void *p, u32 rlen, u8 * nonce)
+ikev2_parse_nonce_payload (const void *p, u32 rlen, const u8 **nonce)
{
const ike_payload_header_t *ikep = p;
u16 plen = clib_net_to_host_u16 (ikep->length);
ASSERT (plen >= sizeof (*ikep) && plen <= rlen);
- clib_memcpy_fast (nonce, ikep->payload, plen - sizeof (*ikep));
- return 1;
+ int len = plen - sizeof (*ikep);
+ ASSERT (len >= 16 && len <= 256);
+ if (PREDICT_FALSE (len < 16 || len > 256))
+ {
+ ikev2_elog_error ("NONCE: bad size");
+ return 0;
+ }
+ *nonce = ikep->payload;
+ return len;
}
static int
@@ -685,10 +731,16 @@ ikev2_check_payload_length (const ike_payload_header_t * ikep, int rlen,
u16 * plen)
{
if (sizeof (*ikep) > rlen)
- return 0;
+ {
+ ikev2_elog_error ("payload: packet too small");
+ return 0;
+ }
*plen = clib_net_to_host_u16 (ikep->length);
if (*plen < sizeof (*ikep) || *plen > rlen)
- return 0;
+ {
+ ikev2_elog_error ("payload: bad size");
+ return 0;
+ }
return 1;
}
@@ -696,7 +748,6 @@ static int
ikev2_process_sa_init_req (vlib_main_t *vm, ikev2_sa_t *sa, ike_header_t *ike,
udp_header_t *udp, u32 len, u32 sw_if_index)
{
- u8 nonce[IKEV2_NONCE_SIZE];
int p = 0;
u8 payload = ike->nextpayload;
ike_payload_header_t *ikep;
@@ -716,7 +767,10 @@ ikev2_process_sa_init_req (vlib_main_t *vm, ikev2_sa_t *sa, ike_header_t *ike,
vec_add (sa->last_sa_init_req_packet_data, ike, len);
if (len < sizeof (*ike))
- return 0;
+ {
+ ikev2_elog_error ("IKE_INIT request too small");
+ return 0;
+ }
len -= sizeof (*ike);
while (p < len && payload != IKEV2_PAYLOAD_NONE)
@@ -739,9 +793,13 @@ ikev2_process_sa_init_req (vlib_main_t *vm, ikev2_sa_t *sa, ike_header_t *ike,
}
else if (payload == IKEV2_PAYLOAD_NONCE)
{
+ const u8 *nonce;
+ int nonce_len;
vec_reset_length (sa->i_nonce);
- if (ikev2_parse_nonce_payload (ikep, current_length, nonce))
- vec_add (sa->i_nonce, nonce, plen - sizeof (*ikep));
+ if ((nonce_len = ikev2_parse_nonce_payload (ikep, current_length,
+ &nonce)) <= 0)
+ return 0;
+ vec_add (sa->i_nonce, nonce, nonce_len);
}
else if (payload == IKEV2_PAYLOAD_NOTIFY)
{
@@ -805,7 +863,6 @@ ikev2_process_sa_init_resp (vlib_main_t * vm,
ikev2_sa_t * sa, ike_header_t * ike,
udp_header_t * udp, u32 len)
{
- u8 nonce[IKEV2_NONCE_SIZE];
int p = 0;
u8 payload = ike->nextpayload;
ike_payload_header_t *ikep;
@@ -824,7 +881,10 @@ ikev2_process_sa_init_resp (vlib_main_t * vm,
vec_add (sa->last_sa_init_res_packet_data, ike, len);
if (sizeof (*ike) > len)
- return;
+ {
+ ikev2_elog_error ("IKE_INIT response too small");
+ return;
+ }
len -= sizeof (*ike);
while (p < len && payload != IKEV2_PAYLOAD_NONE)
@@ -853,9 +913,13 @@ ikev2_process_sa_init_resp (vlib_main_t * vm,
}
else if (payload == IKEV2_PAYLOAD_NONCE)
{
+ const u8 *nonce;
+ int nonce_len;
vec_reset_length (sa->r_nonce);
- if (ikev2_parse_nonce_payload (ikep, current_length, nonce))
- vec_add (sa->r_nonce, nonce, plen - sizeof (*ikep));
+ if ((nonce_len = ikev2_parse_nonce_payload (ikep, current_length,
+ &nonce)) <= 0)
+ return;
+ vec_add (sa->r_nonce, nonce, nonce_len);
}
else if (payload == IKEV2_PAYLOAD_NOTIFY)
{
@@ -1021,8 +1085,8 @@ ikev2_decrypt_sk_payload (ikev2_sa_t * sa, ike_header_t * ike,
return plaintext;
}
-static_always_inline int
-ikev2_is_id_equal (ikev2_id_t * i1, ikev2_id_t * i2)
+static int
+ikev2_is_id_equal (const ikev2_id_t *i1, const ikev2_id_t *i2)
{
if (i1->type != i2->type)
return 0;
@@ -1046,7 +1110,6 @@ ikev2_initial_contact_cleanup_internal (ikev2_main_per_thread_data_t * ptd,
ikev2_child_sa_t *c;
/* find old IKE SAs with the same authenticated identity */
- /* *INDENT-OFF* */
pool_foreach (tmp, ptd->sas) {
if (!ikev2_is_id_equal (&tmp->i_id, &sa->i_id)
|| !ikev2_is_id_equal(&tmp->r_id, &sa->r_id))
@@ -1055,7 +1118,6 @@ ikev2_initial_contact_cleanup_internal (ikev2_main_per_thread_data_t * ptd,
if (sa->rspi != tmp->rspi)
vec_add1(delete, tmp - ptd->sas);
}
- /* *INDENT-ON* */
for (i = 0; i < vec_len (delete); i++)
{
@@ -1332,6 +1394,159 @@ ikev2_process_informational_req (vlib_main_t * vm,
}
static int
+ikev2_process_create_child_sa_rekey (ikev2_sa_t *sa, ikev2_sa_t *sar,
+ ikev2_rekey_t *rekey,
+ ikev2_sa_proposal_t *proposal,
+ ikev2_ts_t *tsi, ikev2_ts_t *tsr,
+ const u8 *nonce, int nonce_len)
+{
+ ikev2_sa_transform_t *tr;
+
+ rekey->i_proposal = proposal;
+ rekey->r_proposal = ikev2_select_proposal (proposal, IKEV2_PROTOCOL_ESP);
+
+ if (sar->dh_group)
+ {
+ tr =
+ ikev2_sa_get_td_for_type (rekey->r_proposal, IKEV2_TRANSFORM_TYPE_DH);
+
+ if (!tr || tr->dh_type != sar->dh_group)
+ {
+ rekey->notify_type = IKEV2_NOTIFY_MSG_INVALID_KE_PAYLOAD;
+ ikev2_sa_free_proposal_vector (&rekey->r_proposal);
+ return 0;
+ }
+
+ vec_free (sa->dh_shared_key);
+ vec_free (sa->dh_private_key);
+ vec_free (sa->i_dh_data);
+ vec_free (sa->r_dh_data);
+
+ sa->dh_group = sar->dh_group;
+ sa->i_dh_data = sar->i_dh_data;
+ sar->i_dh_data = 0;
+
+ ikev2_generate_dh (sa, tr);
+ rekey->kex = 1;
+ }
+
+ vec_reset_length (sa->i_nonce);
+ vec_add (sa->i_nonce, nonce, nonce_len);
+
+ vec_validate (sa->r_nonce, nonce_len - 1);
+ RAND_bytes ((u8 *) sa->r_nonce, nonce_len);
+
+ rekey->tsi = tsi;
+ rekey->tsr = tsr;
+
+ return 1;
+}
+
+static void
+ikev2_complete_sa_rekey (ikev2_sa_t *sa_new, ikev2_sa_t *sa_old,
+ ikev2_sa_rekey_t *sa_rekey)
+{
+ sa_new->del = 0;
+ sa_new->rekey = 0;
+ sa_new->new_child = 0;
+ sa_new->sa_rekey = 0;
+ sa_new->last_sa_init_req_packet_data = 0;
+ sa_new->last_sa_init_res_packet_data = 0;
+ sa_new->last_msg_id = ~0;
+ sa_new->last_res_packet_data = 0;
+ sa_new->last_init_msg_id = 0;
+ clib_memset (&sa_new->stats, 0, sizeof (sa_new->stats));
+
+ sa_new->ispi = sa_rekey->ispi;
+ sa_new->rspi = sa_rekey->rspi;
+ sa_new->i_nonce = sa_rekey->i_nonce;
+ sa_new->r_nonce = sa_rekey->r_nonce;
+ sa_new->dh_group = sa_rekey->dh_group;
+ sa_new->dh_shared_key = sa_rekey->dh_shared_key;
+ sa_new->dh_private_key = sa_rekey->dh_private_key;
+ sa_new->i_dh_data = sa_rekey->i_dh_data;
+ sa_new->r_dh_data = sa_rekey->r_dh_data;
+ sa_new->i_proposals = sa_rekey->i_proposals;
+ sa_new->r_proposals = sa_rekey->r_proposals;
+
+ sa_new->sk_d = 0;
+ sa_new->sk_ai = 0;
+ sa_new->sk_ar = 0;
+ sa_new->sk_ei = 0;
+ sa_new->sk_er = 0;
+ sa_new->sk_pi = 0;
+ sa_new->sk_pr = 0;
+ ikev2_calc_keys_rekey (sa_new, sa_old);
+
+ sa_new->i_auth.data = vec_dup (sa_old->i_auth.data);
+ sa_new->i_auth.key = sa_old->i_auth.key;
+ if (sa_new->i_auth.key)
+ EVP_PKEY_up_ref (sa_new->i_auth.key);
+
+ sa_new->r_auth.data = vec_dup (sa_old->r_auth.data);
+ sa_new->r_auth.key = sa_old->r_auth.key;
+ if (sa_new->r_auth.key)
+ EVP_PKEY_up_ref (sa_new->r_auth.key);
+
+ sa_new->i_id.data = vec_dup (sa_old->i_id.data);
+ sa_new->r_id.data = vec_dup (sa_old->r_id.data);
+
+ sa_old->is_tun_itf_set = 0;
+ sa_old->tun_itf = ~0;
+ sa_old->old_id_expiration = 0;
+ sa_old->current_remote_id_mask = 0;
+ sa_old->old_remote_id = 0;
+ sa_old->old_remote_id_present = 0;
+ sa_old->childs = 0;
+ sa_old->sw_if_index = ~0;
+}
+
+static void
+ikev2_process_sa_rekey (ikev2_sa_t *sa_new, ikev2_sa_t *sa_old,
+ ikev2_sa_rekey_t *sa_rekey)
+{
+ ikev2_sa_transform_t *tr;
+
+ if (ikev2_generate_sa_init_data (sa_new) != IKEV2_GENERATE_SA_INIT_OK)
+ {
+ sa_rekey->notify_type = IKEV2_NOTIFY_MSG_INVALID_KE_PAYLOAD;
+ return;
+ }
+
+ sa_new->r_proposals =
+ ikev2_select_proposal (sa_new->i_proposals, IKEV2_PROTOCOL_IKE);
+
+ tr = ikev2_sa_get_td_for_type (sa_new->r_proposals, IKEV2_TRANSFORM_TYPE_DH);
+ if (!tr || tr->dh_type != sa_new->dh_group)
+ {
+ sa_rekey->notify_type = IKEV2_NOTIFY_MSG_INVALID_KE_PAYLOAD;
+ return;
+ }
+
+ sa_rekey->notify_type = 0;
+ sa_rekey->ispi = sa_new->i_proposals[0].spi;
+ sa_rekey->rspi = sa_new->r_proposals[0].spi;
+ sa_rekey->i_nonce = sa_new->i_nonce;
+ sa_rekey->r_nonce = sa_new->r_nonce;
+ sa_rekey->dh_group = sa_new->dh_group;
+ sa_rekey->dh_shared_key = sa_new->dh_shared_key;
+ sa_rekey->dh_private_key = sa_new->dh_private_key;
+ sa_rekey->i_dh_data = sa_new->i_dh_data;
+ sa_rekey->r_dh_data = sa_new->r_dh_data;
+ sa_rekey->i_proposals = sa_new->i_proposals;
+ sa_rekey->r_proposals = sa_new->r_proposals;
+
+ sa_new->i_nonce = 0;
+ sa_new->r_nonce = 0;
+ sa_new->dh_shared_key = 0;
+ sa_new->dh_private_key = 0;
+ sa_new->i_dh_data = 0;
+ sa_new->r_dh_data = 0;
+ sa_new->i_proposals = 0;
+ sa_new->r_proposals = 0;
+}
+
+static int
ikev2_process_create_child_sa_req (vlib_main_t * vm,
ikev2_sa_t * sa, ike_header_t * ike,
u32 len)
@@ -1339,8 +1554,6 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
int p = 0;
u8 payload = ike->nextpayload;
u8 *plaintext = 0;
- u8 rekeying = 0;
- u8 nonce[IKEV2_NONCE_SIZE];
ikev2_rekey_t *rekey;
ike_payload_header_t *ikep;
ikev2_notify_t *n = 0;
@@ -1350,6 +1563,11 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
ikev2_child_sa_t *child_sa;
u32 dlen = 0, src;
u16 plen;
+ const u8 *nonce = 0;
+ int nonce_len = 0;
+ ikev2_sa_t sar;
+
+ clib_memset (&sar, 0, sizeof (sar));
if (sa->is_initiator)
src = ip_addr_v4 (&sa->raddr).as_u32;
@@ -1379,13 +1597,23 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
{
proposal = ikev2_parse_sa_payload (ikep, current_length);
}
+ else if (payload == IKEV2_PAYLOAD_KE)
+ {
+ if (!ikev2_parse_ke_payload (ikep, current_length, &sar,
+ &sar.i_dh_data))
+ goto cleanup_and_exit;
+ }
else if (payload == IKEV2_PAYLOAD_NOTIFY)
{
- n = ikev2_parse_notify_payload (ikep, current_length);
- if (n->msg_type == IKEV2_NOTIFY_MSG_REKEY_SA)
+ ikev2_notify_t *n0;
+ n0 = ikev2_parse_notify_payload (ikep, current_length);
+ if (n0->msg_type == IKEV2_NOTIFY_MSG_REKEY_SA)
{
- rekeying = 1;
+ vec_free (n);
+ n = n0;
}
+ else
+ vec_free (n0);
}
else if (payload == IKEV2_PAYLOAD_DELETE)
{
@@ -1397,7 +1625,9 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
}
else if (payload == IKEV2_PAYLOAD_NONCE)
{
- ikev2_parse_nonce_payload (ikep, current_length, nonce);
+ nonce_len = ikev2_parse_nonce_payload (ikep, current_length, &nonce);
+ if (nonce_len <= 0)
+ goto cleanup_and_exit;
}
else if (payload == IKEV2_PAYLOAD_TSI)
{
@@ -1421,7 +1651,9 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
p += plen;
}
- if (!proposal || proposal->protocol_id != IKEV2_PROTOCOL_ESP)
+ if (!proposal || !nonce ||
+ (proposal->protocol_id != IKEV2_PROTOCOL_ESP &&
+ proposal->protocol_id != IKEV2_PROTOCOL_IKE))
goto cleanup_and_exit;
if (sa->is_initiator)
@@ -1429,6 +1661,7 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
rekey = sa->rekey;
if (vec_len (rekey) == 0)
goto cleanup_and_exit;
+ rekey->notify_type = 0;
rekey->protocol_id = proposal->protocol_id;
rekey->i_proposal =
ikev2_select_proposal (proposal, IKEV2_PROTOCOL_ESP);
@@ -1438,7 +1671,7 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
rekey->tsr = tsr;
/* update Nr */
vec_reset_length (sa->r_nonce);
- vec_add (sa->r_nonce, nonce, IKEV2_NONCE_SIZE);
+ vec_add (sa->r_nonce, nonce, nonce_len);
child_sa = ikev2_sa_get_child (sa, rekey->ispi, IKEV2_PROTOCOL_ESP, 1);
if (child_sa)
{
@@ -1447,7 +1680,7 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
}
else
{
- if (rekeying)
+ if (n)
{
child_sa = ikev2_sa_get_child (sa, n->spi, n->protocol_id, 1);
if (!child_sa)
@@ -1457,36 +1690,52 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
goto cleanup_and_exit;
}
vec_add2 (sa->rekey, rekey, 1);
+ rekey->notify_type = 0;
+ rekey->kex = 0;
rekey->protocol_id = n->protocol_id;
rekey->spi = n->spi;
- rekey->i_proposal = proposal;
- rekey->r_proposal =
- ikev2_select_proposal (proposal, IKEV2_PROTOCOL_ESP);
- /* update Ni */
- vec_reset_length (sa->i_nonce);
- vec_add (sa->i_nonce, nonce, IKEV2_NONCE_SIZE);
- /* generate new Nr */
- vec_validate (sa->r_nonce, IKEV2_NONCE_SIZE - 1);
- RAND_bytes ((u8 *) sa->r_nonce, IKEV2_NONCE_SIZE);
+ if (sa->old_remote_id_present)
+ {
+ rekey->notify_type = IKEV2_NOTIFY_MSG_TEMPORARY_FAILURE;
+ vec_free (proposal);
+ vec_free (tsr);
+ vec_free (tsi);
+ }
+ else if (!ikev2_process_create_child_sa_rekey (
+ sa, &sar, rekey, proposal, tsi, tsr, nonce, nonce_len))
+ {
+ vec_free (proposal);
+ vec_free (tsr);
+ vec_free (tsi);
+ }
+ }
+ else if (proposal[0].protocol_id == IKEV2_PROTOCOL_IKE)
+ {
+ ikev2_sa_rekey_t *sa_rekey;
+ if (tsi || tsr)
+ goto cleanup_and_exit;
+ sar.i_proposals = proposal;
+ vec_add (sar.i_nonce, nonce, nonce_len);
+ vec_add2 (sa->sa_rekey, sa_rekey, 1);
+ ikev2_process_sa_rekey (&sar, sa, sa_rekey);
}
else
{
/* create new child SA */
vec_add2 (sa->new_child, rekey, 1);
- rekey->i_proposal = proposal;
- rekey->r_proposal =
- ikev2_select_proposal (proposal, IKEV2_PROTOCOL_ESP);
- /* update Ni */
- vec_reset_length (sa->i_nonce);
- vec_add (sa->i_nonce, nonce, IKEV2_NONCE_SIZE);
- /* generate new Nr */
- vec_validate (sa->r_nonce, IKEV2_NONCE_SIZE - 1);
- RAND_bytes ((u8 *) sa->r_nonce, IKEV2_NONCE_SIZE);
+ rekey->notify_type = 0;
+ rekey->kex = 0;
+ if (!ikev2_process_create_child_sa_rekey (
+ sa, &sar, rekey, proposal, tsi, tsr, nonce, nonce_len))
+ {
+ vec_free (proposal);
+ vec_free (tsr);
+ vec_free (tsi);
+ }
}
- rekey->tsi = tsi;
- rekey->tsr = tsr;
}
vec_free (n);
+ ikev2_sa_free_all_vec (&sar);
return 1;
cleanup_and_exit:
@@ -1494,6 +1743,7 @@ cleanup_and_exit:
vec_free (proposal);
vec_free (tsr);
vec_free (tsi);
+ ikev2_sa_free_all_vec (&sar);
return 0;
}
@@ -1541,6 +1791,25 @@ ikev2_sa_generate_authmsg (ikev2_sa_t * sa, int is_responder)
}
static int
+ikev2_match_profile (const ikev2_profile_t *p, const ikev2_id_t *id_loc,
+ const ikev2_id_t *id_rem, int is_initiator)
+{
+ /* on the initiator, IDi is always present and must match
+ * however on the responder, IDr (which is our local id) is optional */
+ if ((is_initiator || id_loc->type != 0) &&
+ !ikev2_is_id_equal (&p->loc_id, id_loc))
+ return 0;
+
+ /* on the initiator, we might not have configured a specific remote id
+ * however on the responder, the remote id should always be configured */
+ if ((!is_initiator || p->rem_id.type != 0) &&
+ !ikev2_is_id_equal (&p->rem_id, id_rem))
+ return 0;
+
+ return 1;
+}
+
+static int
ikev2_ts_cmp (ikev2_ts_t * ts1, ikev2_ts_t * ts2)
{
if (ts1->ts_type == ts2->ts_type && ts1->protocol_id == ts2->protocol_id &&
@@ -1560,7 +1829,6 @@ ikev2_sa_match_ts (ikev2_sa_t * sa)
ikev2_ts_t *ts, *p_tsi, *p_tsr, *tsi = 0, *tsr = 0;
ikev2_id_t *id_rem, *id_loc;
- /* *INDENT-OFF* */
pool_foreach (p, km->profiles) {
if (sa->is_initiator)
@@ -1578,9 +1846,7 @@ ikev2_sa_match_ts (ikev2_sa_t * sa)
id_loc = &sa->r_id;
}
- /* check id */
- if (!ikev2_is_id_equal (&p->rem_id, id_rem)
- || !ikev2_is_id_equal (&p->loc_id, id_loc))
+ if (!ikev2_match_profile (p, id_loc, id_rem, sa->is_initiator))
continue;
sa->profile_index = p - km->profiles;
@@ -1605,7 +1871,6 @@ ikev2_sa_match_ts (ikev2_sa_t * sa)
break;
}
- /* *INDENT-ON* */
if (tsi && tsr)
{
@@ -1623,7 +1888,7 @@ ikev2_sa_match_ts (ikev2_sa_t * sa)
}
static ikev2_profile_t *
-ikev2_select_profile (ikev2_main_t *km, ikev2_sa_t *sa,
+ikev2_select_profile (vlib_main_t *vm, ikev2_main_t *km, ikev2_sa_t *sa,
ikev2_sa_transform_t *tr_prf, u8 *key_pad)
{
ikev2_profile_t *ret = 0, *p;
@@ -1648,9 +1913,7 @@ ikev2_select_profile (ikev2_main_t *km, ikev2_sa_t *sa,
pool_foreach (p, km->profiles)
{
- /* check id */
- if (!ikev2_is_id_equal (&p->rem_id, id_rem) ||
- !ikev2_is_id_equal (&p->loc_id, id_loc))
+ if (!ikev2_match_profile (p, id_loc, id_rem, sa->is_initiator))
continue;
if (sa_auth->method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC)
@@ -1665,6 +1928,7 @@ ikev2_select_profile (ikev2_main_t *km, ikev2_sa_t *sa,
if (!clib_memcmp (auth, sa_auth->data, vec_len (sa_auth->data)))
{
ikev2_set_state (sa, IKEV2_STATE_AUTHENTICATED);
+ sa->auth_timestamp = vlib_time_now (vm);
vec_free (auth);
ret = p;
break;
@@ -1683,6 +1947,7 @@ ikev2_select_profile (ikev2_main_t *km, ikev2_sa_t *sa,
if (ikev2_verify_sign (p->auth.key, sa_auth->data, authmsg) == 1)
{
ikev2_set_state (sa, IKEV2_STATE_AUTHENTICATED);
+ sa->auth_timestamp = vlib_time_now (vm);
ret = p;
break;
}
@@ -1698,7 +1963,7 @@ ikev2_select_profile (ikev2_main_t *km, ikev2_sa_t *sa,
}
static void
-ikev2_sa_auth (ikev2_sa_t *sa)
+ikev2_sa_auth (ikev2_sa_t *sa, vlib_main_t *vm)
{
ikev2_main_t *km = &ikev2_main;
ikev2_profile_t *sel_p = 0;
@@ -1719,7 +1984,7 @@ ikev2_sa_auth (ikev2_sa_t *sa)
}
key_pad = format (0, "%s", IKEV2_KEY_PAD);
- sel_p = ikev2_select_profile (km, sa, tr_prf, key_pad);
+ sel_p = ikev2_select_profile (vm, km, sa, tr_prf, key_pad);
if (sel_p)
{
@@ -1864,8 +2129,8 @@ ikev2_add_tunnel_from_main (ikev2_add_ipsec_tunnel_args_t * a)
.t_mode = TUNNEL_MODE_P2P,
.t_table_id = 0,
.t_hop_limit = 255,
- .t_src = a->local_ip,
- .t_dst = a->remote_ip,
+ .t_src = a->remote_ip,
+ .t_dst = a->local_ip,
};
tunnel_t tun_out = {
.t_flags = TUNNEL_FLAG_NONE,
@@ -1874,8 +2139,8 @@ ikev2_add_tunnel_from_main (ikev2_add_ipsec_tunnel_args_t * a)
.t_mode = TUNNEL_MODE_P2P,
.t_table_id = 0,
.t_hop_limit = 255,
- .t_src = a->remote_ip,
- .t_dst = a->local_ip,
+ .t_src = a->local_ip,
+ .t_dst = a->remote_ip,
};
if (~0 == a->sw_if_index)
@@ -1924,7 +2189,7 @@ ikev2_add_tunnel_from_main (ikev2_add_ipsec_tunnel_args_t * a)
rv = ipsec_sa_add_and_lock (a->local_sa_id, a->local_spi, IPSEC_PROTOCOL_ESP,
a->encr_type, &a->loc_ckey, a->integ_type,
&a->loc_ikey, a->flags, a->salt_local,
- a->src_port, a->dst_port, &tun_out, NULL);
+ a->src_port, a->dst_port, 0, &tun_out, NULL);
if (rv)
goto err0;
@@ -1932,7 +2197,7 @@ ikev2_add_tunnel_from_main (ikev2_add_ipsec_tunnel_args_t * a)
a->remote_sa_id, a->remote_spi, IPSEC_PROTOCOL_ESP, a->encr_type,
&a->rem_ckey, a->integ_type, &a->rem_ikey,
(a->flags | IPSEC_SA_FLAG_IS_INBOUND), a->salt_remote,
- a->ipsec_over_udp_port, a->ipsec_over_udp_port, &tun_in, NULL);
+ a->ipsec_over_udp_port, a->ipsec_over_udp_port, 0, &tun_in, NULL);
if (rv)
goto err1;
@@ -1951,10 +2216,9 @@ err0:
}
static int
-ikev2_create_tunnel_interface (vlib_main_t * vm,
- ikev2_sa_t * sa,
- ikev2_child_sa_t * child, u32 sa_index,
- u32 child_index, u8 is_rekey)
+ikev2_create_tunnel_interface (vlib_main_t *vm, ikev2_sa_t *sa,
+ ikev2_child_sa_t *child, u32 sa_index,
+ u32 child_index, u8 is_rekey, u8 kex)
{
u32 thread_index = vlib_get_thread_index ();
ikev2_main_t *km = &ikev2_main;
@@ -1968,6 +2232,8 @@ ikev2_create_tunnel_interface (vlib_main_t * vm,
clib_memset (&a, 0, sizeof (a));
+ child->timestamp = vlib_time_now (vm);
+
if (!child->r_proposals)
{
ikev2_set_state (sa, IKEV2_STATE_NO_PROPOSAL_CHOSEN);
@@ -2097,7 +2363,7 @@ ikev2_create_tunnel_interface (vlib_main_t * vm,
}
a.integ_type = integ_type;
- ikev2_calc_child_keys (sa, child);
+ ikev2_calc_child_keys (sa, child, kex);
if (sa->is_initiator)
{
@@ -2202,7 +2468,7 @@ typedef struct
u32 sw_if_index;
} ikev2_del_ipsec_tunnel_args_t;
-static_always_inline u32
+static u32
ikev2_flip_alternate_sa_bit (u32 id)
{
u32 mask = 0x800;
@@ -2220,14 +2486,12 @@ ikev2_del_tunnel_from_main (ikev2_del_ipsec_tunnel_args_t * a)
if (~0 == a->sw_if_index)
{
- /* *INDENT-OFF* */
ipip_tunnel_key_t key = {
.src = a->local_ip,
.dst = a->remote_ip,
.transport = IPIP_TRANSPORT_IP4,
.fib_index = 0,
};
- /* *INDENT-ON* */
ipip = ipip_tunnel_db_find (&key);
@@ -2284,6 +2548,47 @@ ikev2_delete_tunnel_interface (vnet_main_t * vnm, ikev2_sa_t * sa,
return 0;
}
+static void
+ikev2_add_invalid_ke_payload (ikev2_sa_t *sa, ikev2_payload_chain_t *chain)
+{
+ u8 *data = vec_new (u8, 2);
+ ikev2_sa_transform_t *tr_dh =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_DH);
+ ASSERT (tr_dh && tr_dh->dh_type);
+ data[0] = (tr_dh->dh_type >> 8) & 0xff;
+ data[1] = (tr_dh->dh_type) & 0xff;
+ ikev2_payload_add_notify (chain, IKEV2_NOTIFY_MSG_INVALID_KE_PAYLOAD, data);
+ vec_free (data);
+}
+
+static void
+ikev2_add_notify_payload (ikev2_sa_t *sa, ikev2_payload_chain_t *chain,
+ u16 notify_type)
+{
+ if (notify_type == IKEV2_NOTIFY_MSG_INVALID_KE_PAYLOAD)
+ ikev2_add_invalid_ke_payload (sa, chain);
+ else
+ ikev2_payload_add_notify (chain, notify_type, 0);
+}
+
+static void
+ikev2_add_create_child_resp (ikev2_sa_t *sa, ikev2_rekey_t *rekey,
+ ikev2_payload_chain_t *chain)
+{
+ if (rekey->notify_type)
+ {
+ ikev2_add_notify_payload (sa, chain, rekey->notify_type);
+ return;
+ }
+
+ ikev2_payload_add_sa (chain, rekey->r_proposal, 0);
+ ikev2_payload_add_nonce (chain, sa->r_nonce);
+ if (rekey->kex)
+ ikev2_payload_add_ke (chain, sa->dh_group, sa->r_dh_data);
+ ikev2_payload_add_ts (chain, rekey->tsi, IKEV2_PAYLOAD_TSI);
+ ikev2_payload_add_ts (chain, rekey->tsr, IKEV2_PAYLOAD_TSR);
+}
+
static u32
ikev2_generate_message (vlib_buffer_t *b, ikev2_sa_t *sa, ike_header_t *ike,
void *user, udp_header_t *udp, ikev2_stats_t *stats)
@@ -2314,20 +2619,7 @@ ikev2_generate_message (vlib_buffer_t *b, ikev2_sa_t *sa, ike_header_t *ike,
}
else if (sa->dh_group == IKEV2_TRANSFORM_DH_TYPE_NONE)
{
- u8 *data = vec_new (u8, 2);
- ikev2_sa_transform_t *tr_dh;
- tr_dh =
- ikev2_sa_get_td_for_type (sa->r_proposals,
- IKEV2_TRANSFORM_TYPE_DH);
- ASSERT (tr_dh && tr_dh->dh_type);
-
- data[0] = (tr_dh->dh_type >> 8) & 0xff;
- data[1] = (tr_dh->dh_type) & 0xff;
-
- ikev2_payload_add_notify (chain,
- IKEV2_NOTIFY_MSG_INVALID_KE_PAYLOAD,
- data);
- vec_free (data);
+ ikev2_add_invalid_ke_payload (sa, chain);
ikev2_set_state (sa, IKEV2_STATE_NOTIFY_AND_DELETE);
}
else if (sa->state == IKEV2_STATE_NOTIFY_AND_DELETE)
@@ -2345,7 +2637,7 @@ ikev2_generate_message (vlib_buffer_t *b, ikev2_sa_t *sa, ike_header_t *ike,
ASSERT (udp);
ike->rspi = clib_host_to_net_u64 (sa->rspi);
- ikev2_payload_add_sa (chain, sa->r_proposals);
+ ikev2_payload_add_sa (chain, sa->r_proposals, 0);
ikev2_payload_add_ke (chain, sa->dh_group, sa->r_dh_data);
ikev2_payload_add_nonce (chain, sa->r_nonce);
@@ -2372,9 +2664,8 @@ ikev2_generate_message (vlib_buffer_t *b, ikev2_sa_t *sa, ike_header_t *ike,
if (sa->state == IKEV2_STATE_AUTHENTICATED)
{
ikev2_payload_add_id (chain, &sa->r_id, IKEV2_PAYLOAD_IDR);
- ikev2_payload_add_id (chain, &sa->i_id, IKEV2_PAYLOAD_IDI);
ikev2_payload_add_auth (chain, &sa->r_auth);
- ikev2_payload_add_sa (chain, sa->childs[0].r_proposals);
+ ikev2_payload_add_sa (chain, sa->childs[0].r_proposals, 0);
ikev2_payload_add_ts (chain, sa->childs[0].tsi, IKEV2_PAYLOAD_TSI);
ikev2_payload_add_ts (chain, sa->childs[0].tsr, IKEV2_PAYLOAD_TSR);
}
@@ -2414,9 +2705,12 @@ ikev2_generate_message (vlib_buffer_t *b, ikev2_sa_t *sa, ike_header_t *ike,
else if (sa->state == IKEV2_STATE_SA_INIT)
{
ikev2_payload_add_id (chain, &sa->i_id, IKEV2_PAYLOAD_IDI);
- ikev2_payload_add_id (chain, &sa->r_id, IKEV2_PAYLOAD_IDR);
+ /* IDr is optional when sending INIT from the initiator */
+ ASSERT (sa->r_id.type != 0 || sa->is_initiator);
+ if (sa->r_id.type != 0)
+ ikev2_payload_add_id (chain, &sa->r_id, IKEV2_PAYLOAD_IDR);
ikev2_payload_add_auth (chain, &sa->i_auth);
- ikev2_payload_add_sa (chain, sa->childs[0].i_proposals);
+ ikev2_payload_add_sa (chain, sa->childs[0].i_proposals, 0);
ikev2_payload_add_ts (chain, sa->childs[0].tsi, IKEV2_PAYLOAD_TSI);
ikev2_payload_add_ts (chain, sa->childs[0].tsr, IKEV2_PAYLOAD_TSR);
ikev2_payload_add_notify (chain, IKEV2_NOTIFY_MSG_INITIAL_CONTACT,
@@ -2493,7 +2787,7 @@ ikev2_generate_message (vlib_buffer_t *b, ikev2_sa_t *sa, ike_header_t *ike,
notify.spi = sa->childs[0].i_proposals->spi;
*(u32 *) data = clib_host_to_net_u32 (notify.spi);
- ikev2_payload_add_sa (chain, proposals);
+ ikev2_payload_add_sa (chain, proposals, 0);
ikev2_payload_add_nonce (chain, sa->i_nonce);
ikev2_payload_add_ts (chain, sa->childs[0].tsi, IKEV2_PAYLOAD_TSI);
ikev2_payload_add_ts (chain, sa->childs[0].tsr, IKEV2_PAYLOAD_TSR);
@@ -2504,22 +2798,27 @@ ikev2_generate_message (vlib_buffer_t *b, ikev2_sa_t *sa, ike_header_t *ike,
}
else if (vec_len (sa->rekey) > 0)
{
- ikev2_payload_add_sa (chain, sa->rekey[0].r_proposal);
- ikev2_payload_add_nonce (chain, sa->r_nonce);
- ikev2_payload_add_ts (chain, sa->rekey[0].tsi, IKEV2_PAYLOAD_TSI);
- ikev2_payload_add_ts (chain, sa->rekey[0].tsr, IKEV2_PAYLOAD_TSR);
+ ikev2_add_create_child_resp (sa, &sa->rekey[0], chain);
vec_del1 (sa->rekey, 0);
}
else if (vec_len (sa->new_child) > 0)
{
- ikev2_payload_add_sa (chain, sa->new_child[0].r_proposal);
- ikev2_payload_add_nonce (chain, sa->r_nonce);
- ikev2_payload_add_ts (chain, sa->new_child[0].tsi,
- IKEV2_PAYLOAD_TSI);
- ikev2_payload_add_ts (chain, sa->new_child[0].tsr,
- IKEV2_PAYLOAD_TSR);
+ ikev2_add_create_child_resp (sa, &sa->new_child[0], chain);
vec_del1 (sa->new_child, 0);
}
+ else if (vec_len (sa->sa_rekey) > 0)
+ {
+ if (sa->sa_rekey[0].notify_type)
+ ikev2_add_notify_payload (sa, chain, sa->sa_rekey[0].notify_type);
+ else
+ {
+ ikev2_payload_add_sa (chain, sa->sa_rekey[0].r_proposals, 1);
+ ikev2_payload_add_nonce (chain, sa->sa_rekey[0].r_nonce);
+ ikev2_payload_add_ke (chain, sa->sa_rekey[0].dh_group,
+ sa->sa_rekey[0].r_dh_data);
+ }
+ vec_del1 (sa->sa_rekey, 0);
+ }
else if (sa->unsupported_cp)
{
u8 *data = vec_new (u8, 1);
@@ -2712,13 +3011,11 @@ ikev2_retransmit_sa_init (ike_header_t * ike, ip_address_t iaddr,
u32 res;
ikev2_main_per_thread_data_t *ptd = ikev2_get_per_thread_data ();
- /* *INDENT-OFF* */
pool_foreach (sa, ptd->sas) {
res = ikev2_retransmit_sa_init_one (sa, ike, iaddr, raddr, rlen);
if (res)
return res;
}
- /* *INDENT-ON* */
/* req is not retransmit */
return 0;
@@ -2796,8 +3093,8 @@ ikev2_del_sa_init (u64 ispi)
sizeof (ispi));
}
-static_always_inline void
-ikev2_rewrite_v6_addrs (ikev2_sa_t * sa, ip6_header_t * ih)
+static void
+ikev2_rewrite_v6_addrs (ikev2_sa_t *sa, ip6_header_t *ih)
{
if (sa->is_initiator)
{
@@ -2811,8 +3108,8 @@ ikev2_rewrite_v6_addrs (ikev2_sa_t * sa, ip6_header_t * ih)
}
}
-static_always_inline void
-ikev2_rewrite_v4_addrs (ikev2_sa_t * sa, ip4_header_t * ih)
+static void
+ikev2_rewrite_v4_addrs (ikev2_sa_t *sa, ip4_header_t *ih)
{
if (sa->is_initiator)
{
@@ -2826,7 +3123,7 @@ ikev2_rewrite_v4_addrs (ikev2_sa_t * sa, ip4_header_t * ih)
}
}
-static_always_inline void
+static void
ikev2_set_ip_address (ikev2_sa_t *sa, const void *iaddr, const void *raddr,
const ip_address_family_t af)
{
@@ -2881,7 +3178,7 @@ ikev2_update_stats (vlib_main_t *vm, u32 node_index, ikev2_stats_t *s)
s->n_sa_auth_req);
}
-static_always_inline uword
+static uword
ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, u8 is_ip4, u8 natt)
{
@@ -3131,18 +3428,19 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
sa0->dst_port = clib_net_to_host_u16 (udp0->src_port);
res = ikev2_process_auth_req (vm, sa0, ike0, rlen);
if (res)
- ikev2_sa_auth (sa0);
+ ikev2_sa_auth (sa0, vm);
else
vlib_node_increment_counter (vm, node->node_index,
IKEV2_ERROR_MALFORMED_PACKET, 1);
if (sa0->state == IKEV2_STATE_AUTHENTICATED)
{
ikev2_initial_contact_cleanup (ptd, sa0);
+ p = hash_get (ptd->sa_by_rspi,
+ clib_net_to_host_u64 (ike0->rspi));
ikev2_sa_match_ts (sa0);
if (sa0->state != IKEV2_STATE_TS_UNACCEPTABLE)
- ikev2_create_tunnel_interface (vm, sa0,
- &sa0->childs[0],
- p[0], 0, 0);
+ ikev2_create_tunnel_interface (vm, sa0, &sa0->childs[0],
+ p[0], 0, 0, 0);
}
if (sa0->is_initiator)
@@ -3267,11 +3565,12 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
goto dispatch0;
}
- if (sa0->rekey)
+ if (vec_len (sa0->rekey) > 0)
{
- if (sa0->rekey[0].protocol_id != IKEV2_PROTOCOL_IKE)
+ if (!sa0->rekey[0].notify_type &&
+ sa0->rekey[0].protocol_id != IKEV2_PROTOCOL_IKE)
{
- if (sa0->childs)
+ if (vec_len (sa0->childs) > 0)
ikev2_sa_free_all_child_sa (&sa0->childs);
ikev2_child_sa_t *child;
vec_add2 (sa0->childs, child, 1);
@@ -3281,7 +3580,8 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
child->tsi = sa0->rekey[0].tsi;
child->tsr = sa0->rekey[0].tsr;
ikev2_create_tunnel_interface (vm, sa0, child, p[0],
- child - sa0->childs, 1);
+ child - sa0->childs, 1,
+ sa0->rekey[0].kex);
}
if (ike_hdr_is_response (ike0))
{
@@ -3300,7 +3600,7 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
1);
}
}
- else if (sa0->new_child)
+ else if (vec_len (sa0->new_child) > 0)
{
ikev2_child_sa_t *c;
vec_add2 (sa0->childs, c, 1);
@@ -3310,7 +3610,8 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
c->tsi = sa0->new_child[0].tsi;
c->tsr = sa0->new_child[0].tsr;
ikev2_create_tunnel_interface (vm, sa0, c, p[0],
- c - sa0->childs, 0);
+ c - sa0->childs, 0,
+ sa0->new_child[0].kex);
if (ike_hdr_is_request (ike0))
{
ike0->flags = IKEV2_HDR_FLAG_RESPONSE;
@@ -3321,6 +3622,38 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
vm, node->node_index, IKEV2_ERROR_NO_BUFF_SPACE, 1);
}
}
+ else if (vec_len (sa0->sa_rekey) > 0)
+ {
+ if (!sa0->sa_rekey[0].notify_type)
+ {
+ ikev2_sa_t *sar, *tmp = 0;
+ pool_get (ptd->sas, tmp);
+ sa0 = pool_elt_at_index (ptd->sas, p[0]);
+ /* swap old/new SAs to keep index and inherit IPsec SA */
+ clib_memcpy_fast (tmp, sa0, sizeof (*tmp));
+ sar = sa0;
+ sa0 = tmp;
+ hash_set (ptd->sa_by_rspi, sa0->rspi, sa0 - ptd->sas);
+ p = hash_get (ptd->sa_by_rspi, sa0->rspi);
+ ikev2_complete_sa_rekey (sar, sa0, &sa0->sa_rekey[0]);
+ hash_set (ptd->sa_by_rspi, sar->rspi, sar - ptd->sas);
+ }
+ if (ike_hdr_is_response (ike0))
+ {
+ vec_free (sa0->sa_rekey);
+ }
+ else
+ {
+ stats->n_rekey_req++;
+ sa0->stats.n_rekey_req++;
+ ike0->flags = IKEV2_HDR_FLAG_RESPONSE;
+ slen =
+ ikev2_generate_message (b0, sa0, ike0, 0, udp0, stats);
+ if (~0 == slen)
+ vlib_node_increment_counter (
+ vm, node->node_index, IKEV2_ERROR_NO_BUFF_SPACE, 1);
+ }
+ }
}
}
else
@@ -3429,7 +3762,6 @@ ikev2_ip6 (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
return ikev2_node_internal (vm, node, frame, 0 /* is_ip4 */, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ikev2_node_ip4,static) = {
.function = ikev2_ip4,
.name = "ikev2-ip4",
@@ -3480,7 +3812,6 @@ VLIB_REGISTER_NODE (ikev2_node_ip6,static) = {
[IKEV2_NEXT_IP6_ERROR_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
// set ikev2 proposals when vpp is used as initiator
static clib_error_t *
@@ -3721,21 +4052,23 @@ ikev2_set_local_key (vlib_main_t * vm, u8 * file)
return 0;
}
-static_always_inline vnet_api_error_t
-ikev2_register_udp_port (ikev2_profile_t * p, u16 port)
+static vnet_api_error_t
+ikev2_register_udp_port (ikev2_profile_t *p, u16 port)
{
- ipsec_register_udp_port (port);
+ ipsec_register_udp_port (port, 0 /* is_ip4 */);
+ ipsec_register_udp_port (port, 1 /* is_ip4 */);
p->ipsec_over_udp_port = port;
return 0;
}
-static_always_inline void
-ikev2_unregister_udp_port (ikev2_profile_t * p)
+static void
+ikev2_unregister_udp_port (ikev2_profile_t *p)
{
if (p->ipsec_over_udp_port == IPSEC_UDP_PORT_NONE)
return;
- ipsec_unregister_udp_port (p->ipsec_over_udp_port);
+ ipsec_unregister_udp_port (p->ipsec_over_udp_port, 0 /* is_ip4 */);
+ ipsec_unregister_udp_port (p->ipsec_over_udp_port, 1 /* is_ip4 */);
p->ipsec_over_udp_port = IPSEC_UDP_PORT_NONE;
}
@@ -3820,12 +4153,10 @@ ikev2_cleanup_profile_sessions (ikev2_main_t * km, ikev2_profile_t * p)
u32 *sai;
u32 *del_sai = 0;
- /* *INDENT-OFF* */
pool_foreach (sa, km->sais) {
if (pi == sa->profile_index)
vec_add1 (del_sai, sa - km->sais);
}
- /* *INDENT-ON* */
vec_foreach (sai, del_sai)
{
@@ -3838,12 +4169,10 @@ ikev2_cleanup_profile_sessions (ikev2_main_t * km, ikev2_profile_t * p)
vec_foreach (tkm, km->per_thread_data)
{
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
if (sa->profile_index != ~0 && pi == sa->profile_index)
vec_add1 (del_sai, sa - tkm->sas);
}
- /* *INDENT-ON* */
vec_foreach (sai, del_sai)
{
@@ -3878,12 +4207,51 @@ ikev2_profile_free (ikev2_profile_t * p)
vec_free (p->rem_id.data);
}
+static void
+ikev2_bind (vlib_main_t *vm, ikev2_main_t *km)
+{
+ if (0 == km->bind_refcount)
+ {
+ udp_register_dst_port (vm, IKEV2_PORT, ikev2_node_ip4.index, 1);
+ udp_register_dst_port (vm, IKEV2_PORT, ikev2_node_ip6.index, 0);
+ udp_register_dst_port (vm, IKEV2_PORT_NATT, ikev2_node_ip4.index, 1);
+ udp_register_dst_port (vm, IKEV2_PORT_NATT, ikev2_node_ip6.index, 0);
+
+ vlib_punt_register (km->punt_hdl,
+ ipsec_punt_reason[IPSEC_PUNT_IP4_SPI_UDP_0],
+ "ikev2-ip4-natt");
+ }
+
+ km->bind_refcount++;
+}
+
+static void
+ikev2_unbind (vlib_main_t *vm, ikev2_main_t *km)
+{
+ km->bind_refcount--;
+ if (0 == km->bind_refcount)
+ {
+ vlib_punt_unregister (km->punt_hdl,
+ ipsec_punt_reason[IPSEC_PUNT_IP4_SPI_UDP_0],
+ "ikev2-ip4-natt");
+
+ udp_unregister_dst_port (vm, IKEV2_PORT_NATT, 0);
+ udp_unregister_dst_port (vm, IKEV2_PORT_NATT, 1);
+ udp_unregister_dst_port (vm, IKEV2_PORT, 0);
+ udp_unregister_dst_port (vm, IKEV2_PORT, 1);
+ }
+}
+
+static void ikev2_lazy_init (ikev2_main_t *km);
+
clib_error_t *
ikev2_add_del_profile (vlib_main_t * vm, u8 * name, int is_add)
{
ikev2_main_t *km = &ikev2_main;
ikev2_profile_t *p;
+ ikev2_lazy_init (km);
+
if (is_add)
{
if (ikev2_profile_index_by_name (name))
@@ -3897,6 +4265,8 @@ ikev2_add_del_profile (vlib_main_t * vm, u8 * name, int is_add)
p->tun_itf = ~0;
uword index = p - km->profiles;
mhash_set_mem (&km->profile_index_by_name, name, &index, 0);
+
+ ikev2_bind (vm, km);
}
else
{
@@ -3904,6 +4274,8 @@ ikev2_add_del_profile (vlib_main_t * vm, u8 * name, int is_add)
if (!p)
return clib_error_return (0, "policy %v does not exists", name);
+ ikev2_unbind (vm, km);
+
ikev2_unregister_udp_port (p);
ikev2_cleanup_profile_sessions (km, p);
@@ -3995,8 +4367,8 @@ ikev2_set_profile_id (vlib_main_t * vm, u8 * name, u8 id_type, u8 * data,
return 0;
}
-static_always_inline void
-ikev2_set_ts_type (ikev2_ts_t * ts, const ip_address_t * addr)
+static void
+ikev2_set_ts_type (ikev2_ts_t *ts, const ip_address_t *addr)
{
if (ip_addr_version (addr) == AF_IP4)
ts->ts_type = TS_IPV4_ADDR_RANGE;
@@ -4004,9 +4376,9 @@ ikev2_set_ts_type (ikev2_ts_t * ts, const ip_address_t * addr)
ts->ts_type = TS_IPV6_ADDR_RANGE;
}
-static_always_inline void
-ikev2_set_ts_addrs (ikev2_ts_t * ts, const ip_address_t * start,
- const ip_address_t * end)
+static void
+ikev2_set_ts_addrs (ikev2_ts_t *ts, const ip_address_t *start,
+ const ip_address_t *end)
{
ip_address_copy (&ts->start_addr, start);
ip_address_copy (&ts->end_addr, end);
@@ -4103,15 +4475,15 @@ ikev2_set_profile_ike_transforms (vlib_main_t * vm, u8 * name,
u32 crypto_key_size)
{
ikev2_profile_t *p;
- clib_error_t *r;
p = ikev2_profile_index_by_name (name);
-
if (!p)
- {
- r = clib_error_return (0, "unknown profile %v", name);
- return r;
- }
+ return clib_error_return (0, "unknown profile %v", name);
+
+ if ((IKEV2_TRANSFORM_INTEG_TYPE_NONE != integ_alg) +
+ (IKEV2_TRANSFORM_ENCR_TYPE_AES_GCM_16 == crypto_alg) !=
+ 1)
+ return clib_error_return (0, "invalid cipher + integrity algorithm");
p->ike_ts.crypto_alg = crypto_alg;
p->ike_ts.integ_alg = integ_alg;
@@ -4264,13 +4636,20 @@ ikev2_resolve_responder_hostname (vlib_main_t *vm, ikev2_responder_t *r)
dns_cache_entry_t *ep = 0;
dns_pending_request_t _t0, *t0 = &_t0;
dns_resolve_name_t _rn, *rn = &_rn;
+ u8 *name;
int rv;
- if (!km->dns_resolve_name)
+ if (!km->dns_resolve_name_ptr)
return clib_error_return (0, "cannot load symbols from dns plugin");
t0->request_type = DNS_API_PENDING_NAME_TO_IP;
- rv = km->dns_resolve_name (r->hostname, &ep, t0, rn);
+ /* VPP main curse: IKEv2 uses only non-NULL terminated vectors internally
+ * whereas DNS resolver expects a NULL-terminated C-string */
+ name = vec_dup (r->hostname);
+ vec_terminate_c_string (name);
+ rv = ((__typeof__ (dns_resolve_name) *) km->dns_resolve_name_ptr) (name, &ep,
+ t0, rn);
+ vec_free (name);
if (rv < 0)
return clib_error_return (0, "dns lookup failure");
@@ -4339,7 +4718,7 @@ ikev2_initiate_sa_init (vlib_main_t * vm, u8 * name)
proposals[0].protocol_id = IKEV2_PROTOCOL_IKE;
/* Add and then cleanup proposal data */
- ikev2_payload_add_sa (chain, proposals);
+ ikev2_payload_add_sa (chain, proposals, 0);
ikev2_sa_free_proposal_vector (&proposals);
sa.is_initiator = 1;
@@ -4373,6 +4752,7 @@ ikev2_initiate_sa_init (vlib_main_t * vm, u8 * name)
sa.childs[0].i_proposals[0].protocol_id = IKEV2_PROTOCOL_ESP;
RAND_bytes ((u8 *) & sa.childs[0].i_proposals[0].spi,
sizeof (sa.childs[0].i_proposals[0].spi));
+ sa.childs[0].i_proposals[0].spi &= 0xffffffff;
/* Add NAT detection notification messages (mandatory) */
u8 *nat_detection_sha1 = ikev2_compute_nat_sha1 (
@@ -4524,7 +4904,6 @@ ikev2_initiate_delete_child_sa (vlib_main_t * vm, u32 ispi)
ikev2_sa_t *sa;
if (fchild)
break;
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
fchild = ikev2_sa_get_child(sa, ispi, IKEV2_PROTOCOL_ESP, 1);
if (fchild)
@@ -4533,7 +4912,6 @@ ikev2_initiate_delete_child_sa (vlib_main_t * vm, u32 ispi)
break;
}
}
- /* *INDENT-ON* */
}
if (!fchild || !fsa)
@@ -4564,7 +4942,6 @@ ikev2_initiate_delete_ike_sa (vlib_main_t * vm, u64 ispi)
ikev2_sa_t *sa;
if (fsa)
break;
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
if (sa->ispi == ispi)
{
@@ -4573,7 +4950,6 @@ ikev2_initiate_delete_ike_sa (vlib_main_t * vm, u64 ispi)
break;
}
}
- /* *INDENT-ON* */
}
if (!fsa)
@@ -4615,10 +4991,12 @@ ikev2_rekey_child_sa_internal (vlib_main_t * vm, ikev2_sa_t * sa,
ikev2_rekey_t *rekey;
vec_reset_length (sa->rekey);
vec_add2 (sa->rekey, rekey, 1);
+ rekey->kex = 0;
ikev2_sa_proposal_t *proposals = vec_dup (csa->i_proposals);
/*need new ispi */
RAND_bytes ((u8 *) & proposals[0].spi, sizeof (proposals[0].spi));
+ proposals[0].spi &= 0xffffffff;
rekey->spi = proposals[0].spi;
rekey->ispi = csa->i_proposals->spi;
len = ikev2_generate_message (b0, sa, ike0, proposals, 0, 0);
@@ -4647,7 +5025,6 @@ ikev2_initiate_rekey_child_sa (vlib_main_t * vm, u32 ispi)
ikev2_sa_t *sa;
if (fchild)
break;
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
fchild = ikev2_sa_get_child(sa, ispi, IKEV2_PROTOCOL_ESP, 1);
if (fchild)
@@ -4656,7 +5033,6 @@ ikev2_initiate_rekey_child_sa (vlib_main_t * vm, u32 ispi)
break;
}
}
- /* *INDENT-ON* */
}
if (!fchild || !fsa)
@@ -4689,12 +5065,10 @@ ikev2_sa_del (ikev2_profile_t * p, u32 sw_if_index)
vec_foreach (tkm, km->per_thread_data)
{
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
if (ikev2_sa_sw_if_match (sa, sw_if_index))
vec_add1 (sa_vec, sa);
}
- /* *INDENT-ON* */
vec_foreach (sap, sa_vec)
{
@@ -4704,12 +5078,10 @@ ikev2_sa_del (ikev2_profile_t * p, u32 sw_if_index)
}
vec_free (sa_vec);
- /* *INDENT-OFF* */
pool_foreach (sa, km->sais) {
if (ikev2_sa_sw_if_match (sa, sw_if_index))
vec_add1 (ispi_vec, sa->ispi);
}
- /* *INDENT-ON* */
vec_foreach (ispi, ispi_vec)
{
@@ -4728,12 +5100,10 @@ ikev2_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
if (is_add)
return 0;
- /* *INDENT-OFF* */
pool_foreach (p, km->profiles) {
if (p->responder.sw_if_index == sw_if_index)
ikev2_sa_del (p, sw_if_index);
}
- /* *INDENT-ON* */
return 0;
}
@@ -4744,67 +5114,24 @@ clib_error_t *
ikev2_init (vlib_main_t * vm)
{
ikev2_main_t *km = &ikev2_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- int thread_id;
clib_memset (km, 0, sizeof (ikev2_main_t));
+
+ km->log_level = IKEV2_LOG_ERROR;
+ km->log_class = vlib_log_register_class ("ikev2", 0);
+
km->vnet_main = vnet_get_main ();
km->vlib_main = vm;
km->liveness_period = IKEV2_LIVENESS_PERIOD_CHECK;
km->liveness_max_retries = IKEV2_LIVENESS_RETRIES;
- ikev2_crypto_init (km);
-
- mhash_init_vec_string (&km->profile_index_by_name, sizeof (uword));
-
- vec_validate_aligned (km->per_thread_data, tm->n_vlib_mains - 1,
- CLIB_CACHE_LINE_BYTES);
- for (thread_id = 0; thread_id < tm->n_vlib_mains; thread_id++)
- {
- ikev2_main_per_thread_data_t *ptd =
- vec_elt_at_index (km->per_thread_data, thread_id);
- ptd->sa_by_rspi = hash_create (0, sizeof (uword));
-
-#if OPENSSL_VERSION_NUMBER >= 0x10100000L
- ptd->evp_ctx = EVP_CIPHER_CTX_new ();
- ptd->hmac_ctx = HMAC_CTX_new ();
-#else
- EVP_CIPHER_CTX_init (&ptd->_evp_ctx);
- ptd->evp_ctx = &ptd->_evp_ctx;
- HMAC_CTX_init (&(ptd->_hmac_ctx));
- ptd->hmac_ctx = &ptd->_hmac_ctx;
-#endif
- }
-
- km->sa_by_ispi = hash_create (0, sizeof (uword));
- km->sw_if_indices = hash_create (0, 0);
-
- udp_register_dst_port (vm, IKEV2_PORT, ikev2_node_ip4.index, 1);
- udp_register_dst_port (vm, IKEV2_PORT, ikev2_node_ip6.index, 0);
- udp_register_dst_port (vm, IKEV2_PORT_NATT, ikev2_node_ip4.index, 1);
- udp_register_dst_port (vm, IKEV2_PORT_NATT, ikev2_node_ip6.index, 0);
-
- vlib_punt_hdl_t punt_hdl = vlib_punt_client_register ("ikev2-ip4-natt");
- vlib_punt_register (punt_hdl, ipsec_punt_reason[IPSEC_PUNT_IP4_SPI_UDP_0],
- "ikev2-ip4-natt");
- ikev2_cli_reference ();
-
- km->dns_resolve_name =
- vlib_get_plugin_symbol ("dns_plugin.so", "dns_resolve_name");
- if (!km->dns_resolve_name)
- ikev2_log_error ("cannot load symbols from dns plugin");
-
- km->log_level = IKEV2_LOG_ERROR;
- km->log_class = vlib_log_register_class ("ikev2", 0);
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ikev2_init) = {
- .runs_after = VLIB_INITS ("ipsec_init", "ipsec_punt_init", "dns_init"),
+ .runs_after = VLIB_INITS ("ipsec_init", "ipsec_punt_init"),
};
-/* *INDENT-ON* */
static u8
ikev2_mngr_process_child_sa (ikev2_sa_t * sa, ikev2_child_sa_t * csa,
@@ -4875,14 +5202,12 @@ ikev2_mngr_process_child_sa (ikev2_sa_t * sa, ikev2_child_sa_t * csa,
ip_addr_bytes (&sa->iaddr));
}
- /* *INDENT-OFF* */
ipip_tunnel_key_t key = {
.src = local_ip,
.dst = remote_ip,
.transport = IPIP_TRANSPORT_IP4,
.fib_index = 0,
};
- /* *INDENT-ON* */
ipip = ipip_tunnel_db_find (&key);
@@ -4963,7 +5288,6 @@ ikev2_mngr_process_ipsec_sa (ipsec_sa_t * ipsec_sa)
ikev2_sa_t *sa;
if (fchild)
break;
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
fchild = ikev2_sa_get_child(sa, ipsec_sa->spi, IKEV2_PROTOCOL_ESP, 1);
if (fchild)
@@ -4972,7 +5296,6 @@ ikev2_mngr_process_ipsec_sa (ipsec_sa_t * ipsec_sa)
break;
}
}
- /* *INDENT-ON* */
}
vlib_get_combined_counter (&ipsec_sa_counters,
ipsec_sa->stat_index, &counts);
@@ -5070,7 +5393,6 @@ ikev2_process_pending_sa_init (vlib_main_t *vm, ikev2_main_t *km)
u64 ispi;
ikev2_sa_t *sa;
- /* *INDENT-OFF* */
hash_foreach (ispi, sai, km->sa_by_ispi,
({
sa = pool_elt_at_index (km->sais, sai);
@@ -5079,7 +5401,6 @@ ikev2_process_pending_sa_init (vlib_main_t *vm, ikev2_main_t *km)
ikev2_process_pending_sa_init_one (vm, km, sa);
}));
- /* *INDENT-ON* */
}
static void
@@ -5137,8 +5458,8 @@ ikev2_disable_dpd (void)
km->dpd_disabled = 1;
}
-static_always_inline int
-ikev2_mngr_process_responder_sas (ikev2_sa_t * sa)
+static int
+ikev2_mngr_process_responder_sas (ikev2_sa_t *sa)
{
ikev2_main_t *km = &ikev2_main;
vlib_main_t *vm = km->vlib_main;
@@ -5169,6 +5490,9 @@ ikev2_mngr_process_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
ikev2_child_sa_t *c;
u32 *sai;
+ /* lazy init will wake it up */
+ vlib_process_wait_for_event (vm);
+
while (1)
{
vlib_process_wait_for_event_or_clock (vm, 2);
@@ -5181,34 +5505,38 @@ ikev2_mngr_process_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
ikev2_sa_t *sa;
u32 *to_be_deleted = 0;
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
ikev2_child_sa_t *c;
u8 del_old_ids = 0;
- if (sa->state != IKEV2_STATE_AUTHENTICATED)
- continue;
+ if (sa->state == IKEV2_STATE_SA_INIT)
+ {
+ if (vec_len (sa->childs) > 0)
+ vec_add1 (to_be_deleted, sa - tkm->sas);
+ }
+ else if (sa->state != IKEV2_STATE_AUTHENTICATED)
+ continue;
- if (sa->old_remote_id_present && 0 > sa->old_id_expiration)
- {
- sa->old_remote_id_present = 0;
- del_old_ids = 1;
- }
- else
- sa->old_id_expiration -= 1;
+ if (sa->old_remote_id_present && 0 > sa->old_id_expiration)
+ {
+ sa->old_remote_id_present = 0;
+ del_old_ids = 1;
+ }
+ else
+ sa->old_id_expiration -= 1;
- vec_foreach (c, sa->childs)
- ikev2_mngr_process_child_sa(sa, c, del_old_ids);
+ vec_foreach (c, sa->childs)
+ ikev2_mngr_process_child_sa (sa, c, del_old_ids);
- if (!km->dpd_disabled && ikev2_mngr_process_responder_sas (sa))
- vec_add1 (to_be_deleted, sa - tkm->sas);
- }
- /* *INDENT-ON* */
+ if (!km->dpd_disabled && ikev2_mngr_process_responder_sas (sa))
+ vec_add1 (to_be_deleted, sa - tkm->sas);
+ }
vec_foreach (sai, to_be_deleted)
{
sa = pool_elt_at_index (tkm->sas, sai[0]);
- u8 reinitiate = (sa->is_initiator && sa->profile_index != ~0);
+ const u32 profile_index = sa->profile_index;
+ const int reinitiate = (sa->is_initiator && profile_index != ~0);
vec_foreach (c, sa->childs)
{
ikev2_delete_tunnel_interface (km->vnet_main, sa, c);
@@ -5220,7 +5548,7 @@ ikev2_mngr_process_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
if (reinitiate)
{
- p = pool_elt_at_index (km->profiles, sa->profile_index);
+ p = pool_elt_at_index (km->profiles, profile_index);
if (p)
{
clib_error_t *e = ikev2_initiate_sa_init (vm, p->name);
@@ -5237,19 +5565,16 @@ ikev2_mngr_process_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
/* process ipsec sas */
ipsec_sa_t *sa;
- /* *INDENT-OFF* */
pool_foreach (sa, ipsec_sa_pool)
{
ikev2_mngr_process_ipsec_sa (sa);
}
- /* *INDENT-ON* */
ikev2_process_pending_sa_init (vm, km);
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ikev2_mngr_process_node, static) = {
.function = ikev2_mngr_process_fn,
.type = VLIB_NODE_TYPE_PROCESS,
@@ -5257,11 +5582,60 @@ VLIB_REGISTER_NODE (ikev2_mngr_process_node, static) = {
"ikev2-manager-process",
};
+static void
+ikev2_lazy_init (ikev2_main_t *km)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ int thread_id;
+
+ if (km->lazy_init_done)
+ return;
+
+ ikev2_crypto_init (km);
+
+ mhash_init_vec_string (&km->profile_index_by_name, sizeof (uword));
+
+ vec_validate_aligned (km->per_thread_data, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+ for (thread_id = 0; thread_id < tm->n_vlib_mains; thread_id++)
+ {
+ ikev2_main_per_thread_data_t *ptd =
+ vec_elt_at_index (km->per_thread_data, thread_id);
+
+ ptd->sa_by_rspi = hash_create (0, sizeof (uword));
+
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+ ptd->evp_ctx = EVP_CIPHER_CTX_new ();
+ ptd->hmac_ctx = HMAC_CTX_new ();
+#else
+ EVP_CIPHER_CTX_init (&ptd->_evp_ctx);
+ ptd->evp_ctx = &ptd->_evp_ctx;
+ HMAC_CTX_init (&(ptd->_hmac_ctx));
+ ptd->hmac_ctx = &ptd->_hmac_ctx;
+#endif
+ }
+
+ km->sa_by_ispi = hash_create (0, sizeof (uword));
+ km->sw_if_indices = hash_create (0, 0);
+
+ km->punt_hdl = vlib_punt_client_register ("ikev2");
+
+ km->dns_resolve_name_ptr =
+ vlib_get_plugin_symbol ("dns_plugin.so", "dns_resolve_name");
+ if (!km->dns_resolve_name_ptr)
+ ikev2_log_error ("cannot load symbols from dns plugin");
+
+ /* wake up ikev2 process */
+ vlib_process_signal_event (vlib_get_first_main (),
+ ikev2_mngr_process_node.index, 0, 0);
+
+ km->lazy_init_done = 1;
+}
+
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Internet Key Exchange (IKEv2) Protocol",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ikev2/ikev2.h b/src/plugins/ikev2/ikev2.h
index 308ffe52ba4..9ed0ecc494c 100644
--- a/src/plugins/ikev2/ikev2.h
+++ b/src/plugins/ikev2/ikev2.h
@@ -32,7 +32,6 @@
typedef u8 v8;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u64 ispi;
u64 rspi;
@@ -42,14 +41,12 @@ typedef CLIB_PACKED (struct {
u8 flags;
u32 msgid; u32 length; u8 payload[0];
}) ike_header_t;
-/* *INDENT-ON* */
#define ike_hdr_is_response(_h) ((_h)->flags & IKEV2_HDR_FLAG_RESPONSE)
#define ike_hdr_is_request(_h) (!ike_hdr_is_response(_h))
#define ike_hdr_is_initiator(_h) ((_h)->flags & IKEV2_HDR_FLAG_INITIATOR)
#define ike_hdr_is_responder(_h) (!(ike_hdr_is_initiator(_h)))
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 nextpayload;
u8 flags;
@@ -58,17 +55,13 @@ typedef CLIB_PACKED (struct {
u8 reserved[2];
u8 payload[0];
}) ike_ke_payload_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 nextpayload;
u8 flags;
u16 length; u8 payload[0];
}) ike_payload_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 nextpayload;
u8 flags;
@@ -77,9 +70,7 @@ typedef CLIB_PACKED (struct {
u8 reserved[3];
u8 payload[0];
}) ike_auth_payload_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 nextpayload;
u8 flags;
@@ -87,7 +78,6 @@ typedef CLIB_PACKED (struct {
u8 id_type;
u8 reserved[3]; u8 payload[0];
}) ike_id_payload_header_t;
-/* *INDENT-ON* */
#define IKE_VERSION_2 0x20
@@ -451,7 +441,6 @@ uword unformat_ikev2_transform_dh_type (unformat_input_t * input,
va_list * args);
uword unformat_ikev2_transform_esn_type (unformat_input_t * input,
va_list * args);
-void ikev2_cli_reference (void);
clib_error_t *ikev2_set_liveness_params (u32 period, u32 max_retries);
diff --git a/src/plugins/ikev2/ikev2_api.c b/src/plugins/ikev2/ikev2_api.c
index d104e54579a..a3e71668126 100644
--- a/src/plugins/ikev2/ikev2_api.c
+++ b/src/plugins/ikev2/ikev2_api.c
@@ -188,12 +188,10 @@ vl_api_ikev2_profile_dump_t_handler (vl_api_ikev2_profile_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (profile, im->profiles)
{
send_profile (profile, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -207,6 +205,32 @@ ikev2_copy_stats (vl_api_ikev2_sa_stats_t *dst, const ikev2_stats_t *src)
dst->n_sa_auth_req = src->n_sa_auth_req;
}
+static vl_api_ikev2_state_t
+ikev2_state_encode (ikev2_state_t state)
+{
+ switch (state)
+ {
+ case IKEV2_STATE_UNKNOWN:
+ return UNKNOWN;
+ case IKEV2_STATE_SA_INIT:
+ return SA_INIT;
+ case IKEV2_STATE_DELETED:
+ return DELETED;
+ case IKEV2_STATE_AUTH_FAILED:
+ return AUTH_FAILED;
+ case IKEV2_STATE_AUTHENTICATED:
+ return AUTHENTICATED;
+ case IKEV2_STATE_NOTIFY_AND_DELETE:
+ return NOTIFY_AND_DELETE;
+ case IKEV2_STATE_TS_UNACCEPTABLE:
+ return TS_UNACCEPTABLE;
+ case IKEV2_STATE_NO_PROPOSAL_CHOSEN:
+ return NO_PROPOSAL_CHOSEN;
+ }
+
+ return UNKNOWN;
+}
+
static void
send_sa (ikev2_sa_t * sa, vl_api_ikev2_sa_dump_t * mp, u32 api_sa_index)
{
@@ -214,7 +238,6 @@ send_sa (ikev2_sa_t * sa, vl_api_ikev2_sa_dump_t * mp, u32 api_sa_index)
int rv = 0;
ikev2_sa_transform_t *tr;
- /* *INDENT-OFF* */
REPLY_MACRO2_ZERO (VL_API_IKEV2_SA_DETAILS,
{
vl_api_ikev2_sa_t *rsa = &rmp->sa;
@@ -270,7 +293,6 @@ send_sa (ikev2_sa_t * sa, vl_api_ikev2_sa_dump_t * mp, u32 api_sa_index)
vl_api_ikev2_sa_t_endian(rsa);
});
- /* *INDENT-ON* */
}
static void
@@ -282,17 +304,199 @@ vl_api_ikev2_sa_dump_t_handler (vl_api_ikev2_sa_dump_t * mp)
vec_foreach (tkm, km->per_thread_data)
{
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas)
{
u32 api_sa_index = ikev2_encode_sa_index (sa - tkm->sas,
tkm - km->per_thread_data);
send_sa (sa, mp, api_sa_index);
}
- /* *INDENT-ON* */
}
}
+static void
+send_sa_v2 (ikev2_sa_t *sa, vl_api_ikev2_sa_v2_dump_t *mp, u32 api_sa_index)
+{
+ ikev2_main_t *km = &ikev2_main;
+ vl_api_ikev2_sa_v2_details_t *rmp = 0;
+ int rv = 0;
+ ikev2_sa_transform_t *tr;
+ ikev2_profile_t *p;
+ p = pool_elt_at_index (km->profiles, sa->profile_index);
+
+ REPLY_MACRO2_ZERO (VL_API_IKEV2_SA_V2_DETAILS, {
+ vl_api_ikev2_sa_v2_t *rsa = &rmp->sa;
+ vl_api_ikev2_keys_t *k = &rsa->keys;
+
+ int size_data = sizeof (rsa->profile_name) - 1;
+ if (vec_len (p->name) < size_data)
+ size_data = vec_len (p->name);
+ clib_memcpy (rsa->profile_name, p->name, size_data);
+
+ rsa->state = ikev2_state_encode (sa->state);
+
+ rsa->sa_index = api_sa_index;
+ ip_address_encode2 (&sa->iaddr, &rsa->iaddr);
+ ip_address_encode2 (&sa->raddr, &rsa->raddr);
+ rsa->ispi = sa->ispi;
+ rsa->rspi = sa->rspi;
+ cp_id (&rsa->i_id, &sa->i_id);
+ cp_id (&rsa->r_id, &sa->r_id);
+
+ tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ if (tr)
+ cp_sa_transform (&rsa->encryption, tr);
+
+ tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF);
+ if (tr)
+ cp_sa_transform (&rsa->prf, tr);
+
+ tr =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG);
+ if (tr)
+ cp_sa_transform (&rsa->integrity, tr);
+
+ tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_DH);
+ if (tr)
+ cp_sa_transform (&rsa->dh, tr);
+
+ k->sk_d_len = vec_len (sa->sk_d);
+ clib_memcpy (&k->sk_d, sa->sk_d, k->sk_d_len);
+
+ k->sk_ai_len = vec_len (sa->sk_ai);
+ clib_memcpy (&k->sk_ai, sa->sk_ai, k->sk_ai_len);
+
+ k->sk_ar_len = vec_len (sa->sk_ar);
+ clib_memcpy (&k->sk_ar, sa->sk_ar, k->sk_ar_len);
+
+ k->sk_ei_len = vec_len (sa->sk_ei);
+ clib_memcpy (&k->sk_ei, sa->sk_ei, k->sk_ei_len);
+
+ k->sk_er_len = vec_len (sa->sk_er);
+ clib_memcpy (&k->sk_er, sa->sk_er, k->sk_er_len);
+
+ k->sk_pi_len = vec_len (sa->sk_pi);
+ clib_memcpy (&k->sk_pi, sa->sk_pi, k->sk_pi_len);
+
+ k->sk_pr_len = vec_len (sa->sk_pr);
+ clib_memcpy (&k->sk_pr, sa->sk_pr, k->sk_pr_len);
+
+ ikev2_copy_stats (&rsa->stats, &sa->stats);
+
+ vl_api_ikev2_sa_v2_t_endian (rsa);
+ });
+}
+
+static void
+vl_api_ikev2_sa_v2_dump_t_handler (vl_api_ikev2_sa_v2_dump_t *mp)
+{
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_main_per_thread_data_t *tkm;
+ ikev2_sa_t *sa;
+
+ vec_foreach (tkm, km->per_thread_data)
+ {
+ pool_foreach (sa, tkm->sas)
+ {
+ u32 api_sa_index =
+ ikev2_encode_sa_index (sa - tkm->sas, tkm - km->per_thread_data);
+ send_sa_v2 (sa, mp, api_sa_index);
+ }
+ }
+}
+
+static void
+send_sa_v3 (ikev2_sa_t *sa, vl_api_ikev2_sa_v3_dump_t *mp, u32 api_sa_index)
+{
+ ikev2_main_t *km = &ikev2_main;
+ vl_api_ikev2_sa_v3_details_t *rmp = 0;
+ int rv = 0;
+ ikev2_sa_transform_t *tr;
+ ikev2_profile_t *p;
+ p = pool_elt_at_index (km->profiles, sa->profile_index);
+ vlib_main_t *vm = vlib_get_main ();
+
+ REPLY_MACRO2_ZERO (VL_API_IKEV2_SA_V3_DETAILS, {
+ vl_api_ikev2_sa_v3_t *rsa = &rmp->sa;
+ vl_api_ikev2_keys_t *k = &rsa->keys;
+
+ int size_data = sizeof (rsa->profile_name) - 1;
+ if (vec_len (p->name) < size_data)
+ size_data = vec_len (p->name);
+ clib_memcpy (rsa->profile_name, p->name, size_data);
+
+ rsa->state = ikev2_state_encode (sa->state);
+
+ rsa->uptime = vlib_time_now (vm) - sa->auth_timestamp;
+
+ rsa->sa_index = api_sa_index;
+ ip_address_encode2 (&sa->iaddr, &rsa->iaddr);
+ ip_address_encode2 (&sa->raddr, &rsa->raddr);
+ rsa->ispi = sa->ispi;
+ rsa->rspi = sa->rspi;
+ cp_id (&rsa->i_id, &sa->i_id);
+ cp_id (&rsa->r_id, &sa->r_id);
+
+ tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ if (tr)
+ cp_sa_transform (&rsa->encryption, tr);
+
+ tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF);
+ if (tr)
+ cp_sa_transform (&rsa->prf, tr);
+
+ tr =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG);
+ if (tr)
+ cp_sa_transform (&rsa->integrity, tr);
+
+ tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_DH);
+ if (tr)
+ cp_sa_transform (&rsa->dh, tr);
+
+ k->sk_d_len = vec_len (sa->sk_d);
+ clib_memcpy (&k->sk_d, sa->sk_d, k->sk_d_len);
+
+ k->sk_ai_len = vec_len (sa->sk_ai);
+ clib_memcpy (&k->sk_ai, sa->sk_ai, k->sk_ai_len);
+
+ k->sk_ar_len = vec_len (sa->sk_ar);
+ clib_memcpy (&k->sk_ar, sa->sk_ar, k->sk_ar_len);
+
+ k->sk_ei_len = vec_len (sa->sk_ei);
+ clib_memcpy (&k->sk_ei, sa->sk_ei, k->sk_ei_len);
+
+ k->sk_er_len = vec_len (sa->sk_er);
+ clib_memcpy (&k->sk_er, sa->sk_er, k->sk_er_len);
+
+ k->sk_pi_len = vec_len (sa->sk_pi);
+ clib_memcpy (&k->sk_pi, sa->sk_pi, k->sk_pi_len);
+
+ k->sk_pr_len = vec_len (sa->sk_pr);
+ clib_memcpy (&k->sk_pr, sa->sk_pr, k->sk_pr_len);
+
+ ikev2_copy_stats (&rsa->stats, &sa->stats);
+
+ vl_api_ikev2_sa_v3_t_endian (rsa);
+ });
+}
+
+static void
+vl_api_ikev2_sa_v3_dump_t_handler (vl_api_ikev2_sa_v3_dump_t *mp)
+{
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_main_per_thread_data_t *tkm;
+ ikev2_sa_t *sa;
+
+ vec_foreach (tkm, km->per_thread_data)
+ {
+ pool_foreach (sa, tkm->sas)
+ {
+ u32 api_sa_index =
+ ikev2_encode_sa_index (sa - tkm->sas, tkm - km->per_thread_data);
+ send_sa_v3 (sa, mp, api_sa_index);
+ }
+ }
+}
static void
send_child_sa (ikev2_child_sa_t * child,
@@ -303,7 +507,6 @@ send_child_sa (ikev2_child_sa_t * child,
int rv = 0;
ikev2_sa_transform_t *tr;
- /* *INDENT-OFF* */
REPLY_MACRO2_ZERO (VL_API_IKEV2_CHILD_SA_DETAILS,
{
vl_api_ikev2_keys_t *k = &rmp->child_sa.keys;
@@ -348,7 +551,6 @@ send_child_sa (ikev2_child_sa_t * child,
vl_api_ikev2_child_sa_t_endian (&rmp->child_sa);
});
- /* *INDENT-ON* */
}
static void
@@ -380,6 +582,85 @@ vl_api_ikev2_child_sa_dump_t_handler (vl_api_ikev2_child_sa_dump_t * mp)
}
static void
+send_child_sa_v2 (ikev2_child_sa_t *child, vl_api_ikev2_child_sa_v2_dump_t *mp,
+ u32 child_sa_index, u32 sa_index)
+{
+ vl_api_ikev2_child_sa_v2_details_t *rmp = 0;
+ int rv = 0;
+ ikev2_sa_transform_t *tr;
+ vlib_main_t *vm = vlib_get_main ();
+
+ REPLY_MACRO2_ZERO (VL_API_IKEV2_CHILD_SA_V2_DETAILS, {
+ vl_api_ikev2_keys_t *k = &rmp->child_sa.keys;
+ rmp->child_sa.child_sa_index = child_sa_index;
+ rmp->child_sa.uptime = vlib_time_now (vm) - child->timestamp;
+ rmp->child_sa.sa_index = sa_index;
+ rmp->child_sa.i_spi = child->i_proposals ? child->i_proposals[0].spi : 0;
+ rmp->child_sa.r_spi = child->r_proposals ? child->r_proposals[0].spi : 0;
+
+ tr =
+ ikev2_sa_get_td_for_type (child->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ if (tr)
+ cp_sa_transform (&rmp->child_sa.encryption, tr);
+
+ tr = ikev2_sa_get_td_for_type (child->r_proposals,
+ IKEV2_TRANSFORM_TYPE_INTEG);
+ if (tr)
+ cp_sa_transform (&rmp->child_sa.integrity, tr);
+
+ tr =
+ ikev2_sa_get_td_for_type (child->r_proposals, IKEV2_TRANSFORM_TYPE_ESN);
+ if (tr)
+ cp_sa_transform (&rmp->child_sa.esn, tr);
+
+ k->sk_ei_len = vec_len (child->sk_ei);
+ clib_memcpy (&k->sk_ei, child->sk_ei, k->sk_ei_len);
+
+ k->sk_er_len = vec_len (child->sk_er);
+ clib_memcpy (&k->sk_er, child->sk_er, k->sk_er_len);
+
+ if (vec_len (child->sk_ai))
+ {
+ k->sk_ai_len = vec_len (child->sk_ai);
+ clib_memcpy (&k->sk_ai, child->sk_ai, k->sk_ai_len);
+
+ k->sk_ar_len = vec_len (child->sk_ar);
+ clib_memcpy (&k->sk_ar, child->sk_ar, k->sk_ar_len);
+ }
+
+ vl_api_ikev2_child_sa_v2_t_endian (&rmp->child_sa);
+ });
+}
+
+static void
+vl_api_ikev2_child_sa_v2_dump_t_handler (vl_api_ikev2_child_sa_v2_dump_t *mp)
+{
+ ikev2_main_t *im = &ikev2_main;
+ ikev2_main_per_thread_data_t *tkm;
+ ikev2_sa_t *sa;
+ ikev2_child_sa_t *child;
+ u32 sai = ~0, ti = ~0;
+
+ ikev2_decode_sa_index (clib_net_to_host_u32 (mp->sa_index), &sai, &ti);
+
+ if (vec_len (im->per_thread_data) <= ti)
+ return;
+
+ tkm = vec_elt_at_index (im->per_thread_data, ti);
+
+ if (pool_len (tkm->sas) <= sai || pool_is_free_index (tkm->sas, sai))
+ return;
+
+ sa = pool_elt_at_index (tkm->sas, sai);
+
+ vec_foreach (child, sa->childs)
+ {
+ u32 child_sa_index = child - sa->childs;
+ send_child_sa_v2 (child, mp, child_sa_index, sai);
+ }
+}
+
+static void
vl_api_ikev2_traffic_selector_dump_t_handler
(vl_api_ikev2_traffic_selector_dump_t * mp)
{
@@ -414,7 +695,6 @@ static void
vl_api_ikev2_traffic_selector_details_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2_ZERO (VL_API_IKEV2_TRAFFIC_SELECTOR_DETAILS,
{
rmp->ts.sa_index = api_sa_index;
@@ -422,7 +702,6 @@ static void
cp_ts (&rmp->ts, ts, mp->is_initiator);
vl_api_ikev2_ts_t_endian (&rmp->ts);
});
- /* *INDENT-ON* */
}
}
@@ -451,13 +730,11 @@ vl_api_ikev2_nonce_get_t_handler (vl_api_ikev2_nonce_get_t * mp)
int data_len = vec_len (nonce);
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO3_ZERO (VL_API_IKEV2_NONCE_GET_REPLY, data_len,
{
rmp->data_len = clib_host_to_net_u32 (data_len);
clib_memcpy (rmp->nonce, nonce, data_len);
});
- /* *INDENT-ON* */
}
static void
@@ -490,8 +767,6 @@ static void
{
vl_api_ikev2_profile_set_liveness_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
clib_error_t *error;
error = ikev2_set_liveness_params (clib_net_to_host_u32 (mp->period),
clib_net_to_host_u32 (mp->max_retries));
@@ -501,10 +776,6 @@ static void
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_PROFILE_SET_LIVENESS_REPLY);
}
@@ -513,8 +784,6 @@ vl_api_ikev2_profile_add_del_t_handler (vl_api_ikev2_profile_add_del_t * mp)
{
vl_api_ikev2_profile_add_del_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
u8 *tmp = format (0, "%s", mp->name);
@@ -526,10 +795,6 @@ vl_api_ikev2_profile_add_del_t_handler (vl_api_ikev2_profile_add_del_t * mp)
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_PROFILE_ADD_DEL_REPLY);
}
@@ -539,8 +804,6 @@ static void
{
vl_api_ikev2_profile_set_auth_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
int data_len = ntohl (mp->data_len);
@@ -562,10 +825,6 @@ static void
}
else
rv = VNET_API_ERROR_INVALID_VALUE;
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_PROFILE_SET_AUTH_REPLY);
}
@@ -574,8 +833,6 @@ vl_api_ikev2_profile_set_id_t_handler (vl_api_ikev2_profile_set_id_t * mp)
{
vl_api_ikev2_profile_set_id_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
u8 *tmp = format (0, "%s", mp->name);
@@ -596,9 +853,6 @@ vl_api_ikev2_profile_set_id_t_handler (vl_api_ikev2_profile_set_id_t * mp)
}
else
rv = VNET_API_ERROR_INVALID_VALUE;
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
REPLY_MACRO (VL_API_IKEV2_PROFILE_SET_ID_REPLY);
}
@@ -609,8 +863,6 @@ static void
{
vl_api_ikev2_profile_set_udp_encap_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
u8 *tmp = format (0, "%s", mp->name);
@@ -622,10 +874,6 @@ static void
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_PROFILE_SET_UDP_ENCAP_REPLY);
}
@@ -634,8 +882,6 @@ vl_api_ikev2_profile_set_ts_t_handler (vl_api_ikev2_profile_set_ts_t * mp)
{
vl_api_ikev2_profile_set_ts_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
u8 *tmp = format (0, "%s", mp->name);
@@ -654,10 +900,6 @@ vl_api_ikev2_profile_set_ts_t_handler (vl_api_ikev2_profile_set_ts_t * mp)
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_PROFILE_SET_TS_REPLY);
}
@@ -666,8 +908,6 @@ vl_api_ikev2_set_local_key_t_handler (vl_api_ikev2_set_local_key_t * mp)
{
vl_api_ikev2_set_local_key_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -678,10 +918,6 @@ vl_api_ikev2_set_local_key_t_handler (vl_api_ikev2_set_local_key_t * mp)
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_SET_LOCAL_KEY_REPLY);
}
@@ -691,8 +927,6 @@ vl_api_ikev2_set_responder_hostname_t_handler (
{
vl_api_ikev2_set_responder_hostname_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -710,10 +944,6 @@ vl_api_ikev2_set_responder_hostname_t_handler (
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_SET_RESPONDER_HOSTNAME_REPLY);
}
@@ -722,8 +952,6 @@ vl_api_ikev2_set_responder_t_handler (vl_api_ikev2_set_responder_t * mp)
{
vl_api_ikev2_set_responder_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -740,10 +968,6 @@ vl_api_ikev2_set_responder_t_handler (vl_api_ikev2_set_responder_t * mp)
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_SET_RESPONDER_REPLY);
}
@@ -753,8 +977,6 @@ vl_api_ikev2_set_ike_transforms_t_handler (vl_api_ikev2_set_ike_transforms_t *
{
vl_api_ikev2_set_ike_transforms_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -772,10 +994,6 @@ vl_api_ikev2_set_ike_transforms_t_handler (vl_api_ikev2_set_ike_transforms_t *
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_SET_IKE_TRANSFORMS_REPLY);
}
@@ -785,8 +1003,6 @@ vl_api_ikev2_set_esp_transforms_t_handler (vl_api_ikev2_set_esp_transforms_t *
{
vl_api_ikev2_set_esp_transforms_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -803,10 +1019,6 @@ vl_api_ikev2_set_esp_transforms_t_handler (vl_api_ikev2_set_esp_transforms_t *
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_SET_ESP_TRANSFORMS_REPLY);
}
@@ -815,8 +1027,6 @@ vl_api_ikev2_set_sa_lifetime_t_handler (vl_api_ikev2_set_sa_lifetime_t * mp)
{
vl_api_ikev2_set_sa_lifetime_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -836,10 +1046,6 @@ vl_api_ikev2_set_sa_lifetime_t_handler (vl_api_ikev2_set_sa_lifetime_t * mp)
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_SET_SA_LIFETIME_REPLY);
}
@@ -849,8 +1055,6 @@ static void
{
vl_api_ikev2_profile_set_ipsec_udp_port_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
u8 *tmp = format (0, "%s", mp->name);
@@ -860,10 +1064,6 @@ static void
clib_net_to_host_u16 (mp->port),
mp->is_set);
vec_free (tmp);
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_PROFILE_SET_IPSEC_UDP_PORT_REPLY);
}
@@ -876,7 +1076,6 @@ static void
VALIDATE_SW_IF_INDEX (mp);
-#if WITH_LIBSSL > 0
u8 *tmp = format (0, "%s", mp->name);
clib_error_t *error;
@@ -890,10 +1089,6 @@ static void
rv = VNET_API_ERROR_UNSPECIFIED;
}
vec_free (tmp);
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_IKEV2_SET_TUNNEL_INTERFACE_REPLY);
}
@@ -903,8 +1098,6 @@ vl_api_ikev2_initiate_sa_init_t_handler (vl_api_ikev2_initiate_sa_init_t * mp)
{
vl_api_ikev2_initiate_sa_init_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -918,10 +1111,6 @@ vl_api_ikev2_initiate_sa_init_t_handler (vl_api_ikev2_initiate_sa_init_t * mp)
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_INITIATE_SA_INIT_REPLY);
}
@@ -931,8 +1120,6 @@ vl_api_ikev2_initiate_del_ike_sa_t_handler (vl_api_ikev2_initiate_del_ike_sa_t
{
vl_api_ikev2_initiate_del_ike_sa_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -943,10 +1130,6 @@ vl_api_ikev2_initiate_del_ike_sa_t_handler (vl_api_ikev2_initiate_del_ike_sa_t
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_INITIATE_DEL_IKE_SA_REPLY);
}
@@ -956,8 +1139,6 @@ static void
{
vl_api_ikev2_initiate_del_child_sa_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -968,10 +1149,6 @@ static void
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_INITIATE_DEL_CHILD_SA_REPLY);
}
@@ -981,8 +1158,6 @@ static void
{
vl_api_ikev2_profile_disable_natt_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
clib_error_t *error;
u8 *tmp = format (0, "%s", mp->name);
@@ -994,10 +1169,6 @@ static void
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_PROFILE_DISABLE_NATT_REPLY);
}
@@ -1007,8 +1178,6 @@ static void
{
vl_api_ikev2_initiate_rekey_child_sa_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -1019,10 +1188,6 @@ static void
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_INITIATE_REKEY_CHILD_SA_REPLY);
}
diff --git a/src/plugins/ikev2/ikev2_cli.c b/src/plugins/ikev2/ikev2_cli.c
index 3523ce079b6..975774c48d5 100644
--- a/src/plugins/ikev2/ikev2_cli.c
+++ b/src/plugins/ikev2/ikev2_cli.c
@@ -74,12 +74,16 @@ format_ikev2_child_sa (u8 * s, va_list * va)
ikev2_ts_t *ts;
ikev2_sa_transform_t *tr;
u8 *c = 0;
+ vlib_main_t *vm = vlib_get_main ();
u32 indent = format_get_indent (s);
indent += 1;
s = format (s, "child sa %u:", index);
+ s = format (s, "\n uptime: %f (s)\n ",
+ vlib_time_now (vm) - child->timestamp);
+
tr = ikev2_sa_get_td_for_type (child->r_proposals,
IKEV2_TRANSFORM_TYPE_ENCR);
c = format (c, "%U ", format_ikev2_sa_transform, tr);
@@ -121,6 +125,12 @@ format_ikev2_child_sa (u8 * s, va_list * va)
return s;
}
+static char *stateNames[] = {
+#define _(v, f, s) s,
+ foreach_ikev2_state
+#undef _
+};
+
static u8 *
format_ikev2_sa (u8 * s, va_list * va)
{
@@ -129,6 +139,12 @@ format_ikev2_sa (u8 * s, va_list * va)
ikev2_sa_transform_t *tr;
ikev2_child_sa_t *child;
u32 indent = 1;
+ vlib_main_t *vm = vlib_get_main ();
+
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_profile_t *p;
+
+ p = pool_elt_at_index (km->profiles, sa->profile_index);
s = format (s, "iip %U ispi %lx rip %U rspi %lx",
format_ip_address, &sa->iaddr, sa->ispi,
@@ -150,6 +166,16 @@ format_ikev2_sa (u8 * s, va_list * va)
tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_DH);
s = format (s, "%U", format_ikev2_sa_transform, tr);
+ s = format (s, "\n profile: %v", p->name);
+
+ if (sa->state <= IKEV2_STATE_NO_PROPOSAL_CHOSEN)
+ {
+ s = format (s, "\n state: %s", stateNames[sa->state]);
+ }
+
+ s =
+ format (s, "\n uptime: %f (s)\n", vlib_time_now (vm) - sa->auth_timestamp);
+
s = format (s, "\n%U", format_white_space, indent);
s = format (s, "nonce i:%U\n%Ur:%U\n",
@@ -232,7 +258,6 @@ show_ikev2_sa_command_fn (vlib_main_t * vm,
vec_foreach (tkm, km->per_thread_data)
{
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
if (show_one)
{
@@ -245,7 +270,6 @@ show_ikev2_sa_command_fn (vlib_main_t * vm,
else
s = format (s, "%U\n", format_ikev2_sa, sa, details);
}
- /* *INDENT-ON* */
}
vlib_cli_output (vm, "%v", s);
@@ -253,13 +277,11 @@ show_ikev2_sa_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ikev2_sa_command, static) = {
.path = "show ikev2 sa",
.short_help = "show ikev2 sa [rspi <rspi>] [details]",
.function = show_ikev2_sa_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ikev2_disable_dpd_command_fn (vlib_main_t * vm,
@@ -270,13 +292,11 @@ ikev2_disable_dpd_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ikev2_cli_disable_dpd_command, static) = {
.path = "ikev2 dpd disable",
.short_help = "ikev2 dpd disable",
.function = ikev2_disable_dpd_command_fn,
};
-/* *INDENT-ON* */
static uword
unformat_ikev2_token (unformat_input_t * input, va_list * va)
@@ -553,7 +573,6 @@ done:
return r;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ikev2_profile_add_del_command, static) = {
.path = "ikev2 profile",
.short_help =
@@ -574,7 +593,6 @@ VLIB_CLI_COMMAND (ikev2_profile_add_del_command, static) = {
"ikev2 profile set <id> disable natt\n",
.function = ikev2_profile_add_del_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ikev2_profile_command_fn (vlib_main_t * vm,
@@ -584,7 +602,6 @@ show_ikev2_profile_command_fn (vlib_main_t * vm,
ikev2_main_t *km = &ikev2_main;
ikev2_profile_t *p;
- /* *INDENT-OFF* */
pool_foreach (p, km->profiles) {
vlib_cli_output(vm, "profile %v", p->name);
@@ -651,18 +668,15 @@ show_ikev2_profile_command_fn (vlib_main_t * vm,
vlib_cli_output(vm, " lifetime %d jitter %d handover %d maxdata %d",
p->lifetime, p->lifetime_jitter, p->handover, p->lifetime_maxdata);
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ikev2_profile_command, static) = {
.path = "show ikev2 profile",
.short_help = "show ikev2 profile",
.function = show_ikev2_profile_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
set_ikev2_liveness_period_fn (vlib_main_t * vm,
@@ -695,13 +709,11 @@ done:
return r;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ikev2_liveness_command, static) = {
.path = "ikev2 set liveness",
.short_help = "ikev2 set liveness <period> <max-retires>",
.function = set_ikev2_liveness_period_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
set_ikev2_local_key_command_fn (vlib_main_t * vm,
@@ -735,14 +747,12 @@ done:
return r;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ikev2_local_key_command, static) = {
.path = "set ikev2 local key",
.short_help =
"set ikev2 local key <file>",
.function = set_ikev2_local_key_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -793,7 +803,6 @@ done:
return r;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ikev2_initiate_command, static) = {
.path = "ikev2 initiate",
.short_help =
@@ -803,12 +812,6 @@ VLIB_CLI_COMMAND (ikev2_initiate_command, static) = {
"ikev2 initiate rekey-child-sa <child sa ispi>\n",
.function = ikev2_initiate_command_fn,
};
-/* *INDENT-ON* */
-
-void
-ikev2_cli_reference (void)
-{
-}
static clib_error_t *
ikev2_set_log_level_command_fn (vlib_main_t * vm,
@@ -838,13 +841,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ikev2_set_log_level_command, static) = {
.path = "ikev2 set logging level",
.function = ikev2_set_log_level_command_fn,
.short_help = "ikev2 set logging level <0-5>",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ikev2/ikev2_crypto.c b/src/plugins/ikev2/ikev2_crypto.c
index a9ab1bc8067..3d4ad0a28ed 100644
--- a/src/plugins/ikev2/ikev2_crypto.c
+++ b/src/plugins/ikev2/ikev2_crypto.c
@@ -488,7 +488,7 @@ BN_bn2binpad (const BIGNUM * a, unsigned char *to, int tolen)
{
vec_insert (to, pad, 0);
clib_memset (to, 0, pad);
- _vec_len (to) -= pad;
+ vec_dec_len (to, pad);
}
return tolen;
}
@@ -553,7 +553,7 @@ ikev2_generate_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t)
{
vec_insert (sa->dh_shared_key, pad, 0);
clib_memset (sa->dh_shared_key, 0, pad);
- _vec_len (sa->dh_shared_key) -= pad;
+ vec_dec_len (sa->dh_shared_key, pad);
}
BN_clear_free (ex);
}
@@ -679,7 +679,7 @@ ikev2_complete_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t)
{
vec_insert (sa->dh_shared_key, pad, 0);
clib_memset (sa->dh_shared_key, 0, pad);
- _vec_len (sa->dh_shared_key) -= pad;
+ vec_dec_len (sa->dh_shared_key, pad);
}
BN_clear_free (ex);
DH_free (dh);
diff --git a/src/plugins/ikev2/ikev2_payload.c b/src/plugins/ikev2/ikev2_payload.c
index 294864d8c43..5801a1b3e87 100644
--- a/src/plugins/ikev2/ikev2_payload.c
+++ b/src/plugins/ikev2/ikev2_payload.c
@@ -24,7 +24,6 @@
#include <plugins/ikev2/ikev2.h>
#include <plugins/ikev2/ikev2_priv.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 nextpayload;
u8 flags;
@@ -34,9 +33,7 @@ typedef CLIB_PACKED (struct {
u16 msg_type;
u8 payload[0];
}) ike_notify_payload_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip4_address_t start_addr;
ip4_address_t end_addr;
@@ -55,9 +52,7 @@ typedef CLIB_PACKED (struct {
u16 end_port;
u8 addr_pair[0];
}) ikev2_ts_payload_entry_t;
-/* *INDENT-OFF* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 nextpayload;
u8 flags;
@@ -66,9 +61,7 @@ typedef CLIB_PACKED (struct {
u8 reserved[3];
ikev2_ts_payload_entry_t ts[0];
}) ike_ts_payload_header_t;
-/* *INDENT-OFF* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 last_or_more;
u8 reserved;
@@ -78,9 +71,7 @@ typedef CLIB_PACKED (struct {
u8 spi_size;
u8 num_transforms; u32 spi[0];
}) ike_sa_proposal_data_t;
-/* *INDENT-OFF* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 last_or_more;
u8 reserved;
@@ -90,9 +81,7 @@ typedef CLIB_PACKED (struct {
u16 transform_id;
u8 attributes[0];
}) ike_sa_transform_data_t;
-/* *INDENT-OFF* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 nextpayload;
u8 flags;
@@ -102,7 +91,6 @@ typedef CLIB_PACKED (struct {
u16 num_of_spi;
u32 spi[0];
}) ike_delete_payload_header_t;
-/* *INDENT-OFF* */
static ike_payload_header_t *
ikev2_payload_add_hdr (ikev2_payload_chain_t * c, u8 payload_type, int len)
@@ -167,8 +155,8 @@ ikev2_payload_add_notify_2 (ikev2_payload_chain_t * c, u16 msg_type,
}
void
-ikev2_payload_add_sa (ikev2_payload_chain_t * c,
- ikev2_sa_proposal_t * proposals)
+ikev2_payload_add_sa (ikev2_payload_chain_t *c, ikev2_sa_proposal_t *proposals,
+ u8 force_spi)
{
ike_payload_header_t *ph;
ike_sa_proposal_data_t *prop;
@@ -184,7 +172,13 @@ ikev2_payload_add_sa (ikev2_payload_chain_t * c,
vec_foreach (p, proposals)
{
- int spi_size = (p->protocol_id == IKEV2_PROTOCOL_ESP) ? 4 : 0;
+ int spi_size = 0;
+
+ if (p->protocol_id == IKEV2_PROTOCOL_ESP)
+ spi_size = 4;
+ else if (force_spi && p->protocol_id == IKEV2_PROTOCOL_IKE)
+ spi_size = 8;
+
pr_data = vec_new (u8, sizeof (ike_sa_proposal_data_t) + spi_size);
prop = (ike_sa_proposal_data_t *) pr_data;
prop->last_or_more = proposals - p + 1 < vec_len (proposals) ? 2 : 0;
@@ -193,8 +187,13 @@ ikev2_payload_add_sa (ikev2_payload_chain_t * c,
prop->spi_size = spi_size;
prop->num_transforms = vec_len (p->transforms);
- if (spi_size)
+ if (spi_size == 4)
prop->spi[0] = clib_host_to_net_u32 (p->spi);
+ else if (spi_size == 8)
+ {
+ u64 s = clib_host_to_net_u64 (p->spi);
+ clib_memcpy_fast (prop->spi, &s, sizeof (s));
+ }
vec_foreach (t, p->transforms)
{
@@ -384,8 +383,9 @@ ikev2_parse_sa_payload (ike_payload_header_t * ikep, u32 rlen)
sap = (ike_sa_proposal_data_t *) & ikep->payload[proposal_ptr];
int i, transform_ptr;
- /* IKE proposal should not have SPI */
- if (sap->protocol_id == IKEV2_PROTOCOL_IKE && sap->spi_size != 0)
+ /* IKE proposal should have 8 bytes or no SPI */
+ if (sap->protocol_id == IKEV2_PROTOCOL_IKE && sap->spi_size != 0 &&
+ sap->spi_size != 8)
goto data_corrupted;
/* IKE proposal should not have SPI */
@@ -404,6 +404,12 @@ ikev2_parse_sa_payload (ike_payload_header_t * ikep, u32 rlen)
{
proposal->spi = clib_net_to_host_u32 (sap->spi[0]);
}
+ else if (sap->spi_size == 8)
+ {
+ u64 s;
+ clib_memcpy_fast (&s, &sap->spi[0], sizeof (s));
+ proposal->spi = clib_net_to_host_u64 (s);
+ }
for (i = 0; i < sap->num_transforms; i++)
{
diff --git a/src/plugins/ikev2/ikev2_priv.h b/src/plugins/ikev2/ikev2_priv.h
index 4c56b980f1c..0639809e9b1 100644
--- a/src/plugins/ikev2/ikev2_priv.h
+++ b/src/plugins/ikev2/ikev2_priv.h
@@ -184,16 +184,21 @@ do { \
#define ikev2_log_debug(...) \
vlib_log(VLIB_LOG_LEVEL_DEBUG, ikev2_main.log_class, __VA_ARGS__)
+#define foreach_ikev2_state \
+ _ (0, UNKNOWN, "UNKNOWN") \
+ _ (1, SA_INIT, "SA_INIT") \
+ _ (2, DELETED, "DELETED") \
+ _ (3, AUTH_FAILED, "AUTH_FAILED") \
+ _ (4, AUTHENTICATED, "AUTHENTICATED") \
+ _ (5, NOTIFY_AND_DELETE, "NOTIFY_AND_DELETE") \
+ _ (6, TS_UNACCEPTABLE, "TS_UNACCEPTABLE") \
+ _ (7, NO_PROPOSAL_CHOSEN, "NO_PROPOSAL_CHOSEN")
+
typedef enum
{
- IKEV2_STATE_UNKNOWN,
- IKEV2_STATE_SA_INIT,
- IKEV2_STATE_DELETED,
- IKEV2_STATE_AUTH_FAILED,
- IKEV2_STATE_AUTHENTICATED,
- IKEV2_STATE_NOTIFY_AND_DELETE,
- IKEV2_STATE_TS_UNACCEPTABLE,
- IKEV2_STATE_NO_PROPOSAL_CHOSEN,
+#define _(v, f, s) IKEV2_STATE_##f = v,
+ foreach_ikev2_state
+#undef _
} ikev2_state_t;
typedef struct
@@ -238,7 +243,7 @@ typedef struct
{
u8 proposal_num;
ikev2_protocol_id_t protocol_id:8;
- u32 spi;
+ u64 spi;
ikev2_sa_transform_t *transforms;
} ikev2_sa_proposal_t;
@@ -302,6 +307,8 @@ typedef struct
f64 time_to_expiration;
u8 is_expired;
i8 rekey_retries;
+
+ f64 timestamp;
} ikev2_child_sa_t;
typedef struct
@@ -312,6 +319,8 @@ typedef struct
typedef struct
{
+ u16 notify_type;
+ u8 kex;
u8 protocol_id;
u32 spi;
u32 ispi;
@@ -323,6 +332,22 @@ typedef struct
typedef struct
{
+ u16 notify_type;
+ u16 dh_group;
+ u64 ispi;
+ u64 rspi;
+ u8 *i_nonce;
+ u8 *r_nonce;
+ u8 *dh_shared_key;
+ u8 *dh_private_key;
+ u8 *i_dh_data;
+ u8 *r_dh_data;
+ ikev2_sa_proposal_t *i_proposals;
+ ikev2_sa_proposal_t *r_proposals;
+} ikev2_sa_rekey_t;
+
+typedef struct
+{
u16 msg_type;
u8 protocol_id;
u32 spi;
@@ -425,6 +450,9 @@ typedef struct
ikev2_rekey_t *new_child;
+ /* pending sa rekeyings */
+ ikev2_sa_rekey_t *sa_rekey;
+
/* packet data */
u8 *last_sa_init_req_packet_data;
u8 *last_sa_init_res_packet_data;
@@ -462,6 +490,8 @@ typedef struct
u8 keys_generated;
ikev2_stats_t stats;
+
+ f64 auth_timestamp;
} ikev2_sa_t;
@@ -530,7 +560,17 @@ typedef struct
u8 dpd_disabled;
/* pointer to name resolver function in dns plugin */
- int (*dns_resolve_name) ();
+ void *dns_resolve_name_ptr;
+
+ /* flag indicating whether lazy init is done or not */
+ int lazy_init_done;
+
+ /* refcount for IKEv2 udp ports and IPsec NATT punt registration */
+ int bind_refcount;
+
+ /* punt handle for IPsec NATT IPSEC_PUNT_IP4_SPI_UDP_0 reason */
+ vlib_punt_hdl_t punt_hdl;
+
} ikev2_main_t;
extern ikev2_main_t ikev2_main;
@@ -584,8 +624,8 @@ void ikev2_payload_add_notify (ikev2_payload_chain_t * c, u16 msg_type,
u8 * data);
void ikev2_payload_add_notify_2 (ikev2_payload_chain_t * c, u16 msg_type,
u8 * data, ikev2_notify_t * notify);
-void ikev2_payload_add_sa (ikev2_payload_chain_t * c,
- ikev2_sa_proposal_t * proposals);
+void ikev2_payload_add_sa (ikev2_payload_chain_t *c,
+ ikev2_sa_proposal_t *proposals, u8 force_spi);
void ikev2_payload_add_ke (ikev2_payload_chain_t * c, u16 dh_group,
u8 * dh_data);
void ikev2_payload_add_nonce (ikev2_payload_chain_t * c, u8 * nonce);
diff --git a/src/plugins/ikev2/ikev2_test.c b/src/plugins/ikev2/ikev2_test.c
index b63778ed103..5682d7058f6 100644
--- a/src/plugins/ikev2/ikev2_test.c
+++ b/src/plugins/ikev2/ikev2_test.c
@@ -32,7 +32,7 @@
#include <vnet/format_fns.h>
#include <ikev2/ikev2.api_enum.h>
#include <ikev2/ikev2.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
#define vl_endianfun /* define message structures */
#include <plugins/ikev2/ikev2.api.h>
@@ -396,8 +396,78 @@ vl_api_ikev2_sa_details_t_handler (vl_api_ikev2_sa_details_t * mp)
ip_address_decode2 (&sa->iaddr, &iaddr);
ip_address_decode2 (&sa->raddr, &raddr);
- fformat (vam->ofp, "profile index %d sa index: %d\n",
- mp->sa.profile_index, mp->sa.sa_index);
+ fformat (vam->ofp, "profile index %u sa index: %d\n", mp->sa.profile_index,
+ mp->sa.sa_index);
+ fformat (vam->ofp, " iip %U ispi %lx rip %U rspi %lx\n", format_ip_address,
+ &iaddr, sa->ispi, format_ip_address, &raddr, sa->rspi);
+ fformat (vam->ofp, " %U ", format_ikev2_sa_transform, &sa->encryption);
+ fformat (vam->ofp, "%U ", format_ikev2_sa_transform, &sa->prf);
+ fformat (vam->ofp, "%U ", format_ikev2_sa_transform, &sa->integrity);
+ fformat (vam->ofp, "%U \n", format_ikev2_sa_transform, &sa->dh);
+
+ fformat (vam->ofp, " SK_d %U\n", format_hex_bytes, k->sk_d, k->sk_d_len);
+
+ fformat (vam->ofp, " SK_a i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ai, k->sk_ai_len, format_hex_bytes, k->sk_ar, k->sk_ar_len);
+
+ fformat (vam->ofp, " SK_e i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ei, k->sk_ei_len, format_hex_bytes, k->sk_er, k->sk_er_len);
+
+ fformat (vam->ofp, " SK_p i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_pi, k->sk_pi_len, format_hex_bytes, k->sk_pr, k->sk_pr_len);
+
+ fformat (vam->ofp, " identifier (i) %U\n", format_ikev2_id_type_and_data,
+ &sa->i_id);
+ fformat (vam->ofp, " identifier (r) %U\n", format_ikev2_id_type_and_data,
+ &sa->r_id);
+
+ vam->result_ready = 1;
+}
+
+static int
+api_ikev2_sa_v2_dump (vat_main_t *vam)
+{
+ ikev2_test_main_t *im = &ikev2_test_main;
+ vl_api_ikev2_sa_v2_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ /* Construct the API message */
+ M (IKEV2_SA_V2_DUMP, mp);
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ if (!im->ping_id)
+ im->ping_id = vl_msg_api_get_msg_index ((u8 *) (VL_API_CONTROL_PING_CRC));
+ mp_ping = vl_msg_api_alloc_as_if_client (sizeof (*mp_ping));
+ mp_ping->_vl_msg_id = htons (im->ping_id);
+ mp_ping->client_index = vam->my_client_index;
+ vam->result_ready = 0;
+
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ikev2_sa_v2_details_t_handler (vl_api_ikev2_sa_v2_details_t *mp)
+{
+ vat_main_t *vam = ikev2_test_main.vat_main;
+ vl_api_ikev2_sa_v2_t *sa = &mp->sa;
+ ip_address_t iaddr;
+ ip_address_t raddr;
+ vl_api_ikev2_keys_t *k = &sa->keys;
+ vl_api_ikev2_sa_v2_t_endian (sa);
+
+ ip_address_decode2 (&sa->iaddr, &iaddr);
+ ip_address_decode2 (&sa->raddr, &raddr);
+
+ fformat (vam->ofp, "profile name %s sa index: %d\n", mp->sa.profile_name,
+ mp->sa.sa_index);
fformat (vam->ofp, " iip %U ispi %lx rip %U rspi %lx\n", format_ip_address,
&iaddr, sa->ispi, format_ip_address, &raddr, sa->rspi);
fformat (vam->ofp, " %U ", format_ikev2_sa_transform, &sa->encryption);
@@ -427,6 +497,76 @@ vl_api_ikev2_sa_details_t_handler (vl_api_ikev2_sa_details_t * mp)
}
static int
+api_ikev2_sa_v3_dump (vat_main_t *vam)
+{
+ ikev2_test_main_t *im = &ikev2_test_main;
+ vl_api_ikev2_sa_v3_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ /* Construct the API message */
+ M (IKEV2_SA_V3_DUMP, mp);
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ if (!im->ping_id)
+ im->ping_id = vl_msg_api_get_msg_index ((u8 *) (VL_API_CONTROL_PING_CRC));
+ mp_ping = vl_msg_api_alloc_as_if_client (sizeof (*mp_ping));
+ mp_ping->_vl_msg_id = htons (im->ping_id);
+ mp_ping->client_index = vam->my_client_index;
+ vam->result_ready = 0;
+
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ikev2_sa_v3_details_t_handler (vl_api_ikev2_sa_v3_details_t *mp)
+{
+ vat_main_t *vam = ikev2_test_main.vat_main;
+ vl_api_ikev2_sa_v3_t *sa = &mp->sa;
+ ip_address_t iaddr;
+ ip_address_t raddr;
+ vl_api_ikev2_keys_t *k = &sa->keys;
+ vl_api_ikev2_sa_v3_t_endian (sa);
+
+ ip_address_decode2 (&sa->iaddr, &iaddr);
+ ip_address_decode2 (&sa->raddr, &raddr);
+
+ fformat (vam->ofp, "profile name %s sa index: %d\n", mp->sa.profile_name,
+ mp->sa.sa_index);
+ fformat (vam->ofp, " iip %U ispi %lx rip %U rspi %lx\n", format_ip_address,
+ &iaddr, sa->ispi, format_ip_address, &raddr, sa->rspi);
+ fformat (vam->ofp, " %U ", format_ikev2_sa_transform, &sa->encryption);
+ fformat (vam->ofp, "%U ", format_ikev2_sa_transform, &sa->prf);
+ fformat (vam->ofp, "%U ", format_ikev2_sa_transform, &sa->integrity);
+ fformat (vam->ofp, "%U \n", format_ikev2_sa_transform, &sa->dh);
+
+ fformat (vam->ofp, " SK_d %U\n", format_hex_bytes, k->sk_d, k->sk_d_len);
+
+ fformat (vam->ofp, " SK_a i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ai, k->sk_ai_len, format_hex_bytes, k->sk_ar, k->sk_ar_len);
+
+ fformat (vam->ofp, " SK_e i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ei, k->sk_ei_len, format_hex_bytes, k->sk_er, k->sk_er_len);
+
+ fformat (vam->ofp, " SK_p i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_pi, k->sk_pi_len, format_hex_bytes, k->sk_pr, k->sk_pr_len);
+
+ fformat (vam->ofp, " identifier (i) %U\n", format_ikev2_id_type_and_data,
+ &sa->i_id);
+ fformat (vam->ofp, " identifier (r) %U\n", format_ikev2_id_type_and_data,
+ &sa->r_id);
+
+ vam->result_ready = 1;
+}
+
+static int
api_ikev2_child_sa_dump (vat_main_t * vam)
{
unformat_input_t *i = vam->input;
@@ -488,6 +628,83 @@ vl_api_ikev2_child_sa_details_t_handler (vl_api_ikev2_child_sa_details_t * mp)
fformat (vam->ofp, "%U ", format_ikev2_sa_transform, &child_sa->integrity);
fformat (vam->ofp, "%U \n", format_ikev2_sa_transform, &child_sa->esn);
+ fformat (vam->ofp, " spi(i) %lx spi(r) %lx\n", child_sa->i_spi,
+ child_sa->r_spi);
+
+ fformat (vam->ofp, " SK_e i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ei, k->sk_ei_len, format_hex_bytes, k->sk_er, k->sk_er_len);
+ if (k->sk_ai_len)
+ {
+ fformat (vam->ofp, " SK_a i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ai, k->sk_ai_len, format_hex_bytes, k->sk_ar,
+ k->sk_ar_len);
+ }
+ vam->result_ready = 1;
+}
+
+static int
+api_ikev2_child_sa_v2_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ ikev2_test_main_t *im = &ikev2_test_main;
+ vl_api_ikev2_child_sa_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+ u32 sa_index = ~0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sa_index %d", &sa_index))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sa_index == ~0)
+ return -99;
+
+ /* Construct the API message */
+ M (IKEV2_CHILD_SA_DUMP, mp);
+
+ mp->sa_index = clib_net_to_host_u32 (sa_index);
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ if (!im->ping_id)
+ im->ping_id = vl_msg_api_get_msg_index ((u8 *) (VL_API_CONTROL_PING_CRC));
+ mp_ping = vl_msg_api_alloc_as_if_client (sizeof (*mp_ping));
+ mp_ping->_vl_msg_id = htons (im->ping_id);
+ mp_ping->client_index = vam->my_client_index;
+ vam->result_ready = 0;
+
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ikev2_child_sa_v2_details_t_handler (
+ vl_api_ikev2_child_sa_details_t *mp)
+{
+ vat_main_t *vam = ikev2_test_main.vat_main;
+ vl_api_ikev2_child_sa_t *child_sa = &mp->child_sa;
+ vl_api_ikev2_keys_t *k = &child_sa->keys;
+ vl_api_ikev2_child_sa_t_endian (child_sa);
+
+ fformat (vam->ofp, " child sa %u:\n", child_sa->child_sa_index);
+
+ fformat (vam->ofp, " %U ", format_ikev2_sa_transform,
+ &child_sa->encryption);
+ fformat (vam->ofp, "%U ", format_ikev2_sa_transform, &child_sa->integrity);
+ fformat (vam->ofp, "%U \n", format_ikev2_sa_transform, &child_sa->esn);
+
fformat (vam->ofp, " spi(i) %lx spi(r) %lx\n",
child_sa->i_spi, child_sa->r_spi);
diff --git a/src/plugins/ikev2/ikev2_types.api b/src/plugins/ikev2/ikev2_types.api
index b279026c2b9..2492611703d 100644
--- a/src/plugins/ikev2/ikev2_types.api
+++ b/src/plugins/ikev2/ikev2_types.api
@@ -128,6 +128,19 @@ typedef ikev2_child_sa
vl_api_ikev2_sa_transform_t esn;
};
+typedef ikev2_child_sa_v2
+{
+ u32 sa_index;
+ u32 child_sa_index;
+ u32 i_spi;
+ u32 r_spi;
+ vl_api_ikev2_keys_t keys;
+ vl_api_ikev2_sa_transform_t encryption;
+ vl_api_ikev2_sa_transform_t integrity;
+ vl_api_ikev2_sa_transform_t esn;
+ f64 uptime;
+};
+
typedef ikev2_sa_stats
{
u16 n_keepalives;
@@ -138,6 +151,18 @@ typedef ikev2_sa_stats
u16 n_init_sa_retransmit;
};
+enum ikev2_state
+{
+ UNKNOWN,
+ SA_INIT,
+ DELETED,
+ AUTH_FAILED,
+ AUTHENTICATED,
+ NOTIFY_AND_DELETE,
+ TS_UNACCEPTABLE,
+ NO_PROPOSAL_CHOSEN,
+};
+
typedef ikev2_sa
{
u32 sa_index;
@@ -161,3 +186,54 @@ typedef ikev2_sa
vl_api_ikev2_sa_stats_t stats;
};
+
+typedef ikev2_sa_v2
+{
+ u32 sa_index;
+ string profile_name[64];
+ vl_api_ikev2_state_t state;
+
+ u64 ispi;
+ u64 rspi;
+ vl_api_address_t iaddr;
+ vl_api_address_t raddr;
+
+ vl_api_ikev2_keys_t keys;
+
+ /* ID */
+ vl_api_ikev2_id_t i_id;
+ vl_api_ikev2_id_t r_id;
+
+ vl_api_ikev2_sa_transform_t encryption;
+ vl_api_ikev2_sa_transform_t integrity;
+ vl_api_ikev2_sa_transform_t prf;
+ vl_api_ikev2_sa_transform_t dh;
+
+ vl_api_ikev2_sa_stats_t stats;
+};
+
+typedef ikev2_sa_v3
+{
+ u32 sa_index;
+ string profile_name[64];
+ vl_api_ikev2_state_t state;
+
+ u64 ispi;
+ u64 rspi;
+ vl_api_address_t iaddr;
+ vl_api_address_t raddr;
+
+ vl_api_ikev2_keys_t keys;
+
+ /* ID */
+ vl_api_ikev2_id_t i_id;
+ vl_api_ikev2_id_t r_id;
+
+ vl_api_ikev2_sa_transform_t encryption;
+ vl_api_ikev2_sa_transform_t integrity;
+ vl_api_ikev2_sa_transform_t prf;
+ vl_api_ikev2_sa_transform_t dh;
+
+ vl_api_ikev2_sa_stats_t stats;
+ f64 uptime;
+}; \ No newline at end of file
diff --git a/src/plugins/ila/ila.c b/src/plugins/ila/ila.c
index 366abc9831e..02acd84880c 100644
--- a/src/plugins/ila/ila.c
+++ b/src/plugins/ila/ila.c
@@ -365,7 +365,7 @@ ila_ila2sir (vlib_main_t * vm,
{
ila_ila2sir_trace_t *tr =
vlib_add_trace (vm, node, p0, sizeof (*tr));
- tr->ila_index = ie0 ? (ie0 - ilm->entries) : ~0;
+ tr->ila_index = ie0 - ilm->entries;
tr->initial_dst = ip60->dst_address;
tr->adj_index = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
}
@@ -385,7 +385,6 @@ ila_ila2sir (vlib_main_t * vm,
return frame->n_vectors;
}
-/** *INDENT-OFF* */
VLIB_REGISTER_NODE (ila_ila2sir_node, static) =
{
.function = ila_ila2sir,
@@ -400,7 +399,6 @@ VLIB_REGISTER_NODE (ila_ila2sir_node, static) =
[ILA_ILA2SIR_NEXT_DROP] = "error-drop"
},
};
-/** *INDENT-ON* */
typedef enum
{
@@ -585,7 +583,6 @@ ila_sir2ila (vlib_main_t * vm,
return frame->n_vectors;
}
-/** *INDENT-OFF* */
VLIB_REGISTER_NODE (ila_sir2ila_node, static) =
{
.function = ila_sir2ila,.name = "sir-to-ila",
@@ -599,16 +596,13 @@ VLIB_REGISTER_NODE (ila_sir2ila_node, static) =
[ILA_SIR2ILA_NEXT_DROP] = "error-drop"
},
};
-/** *INDENT-ON* */
-/** *INDENT-OFF* */
VNET_FEATURE_INIT (ila_sir2ila, static) =
{
.arc_name = "ip6-unicast",
.node_name = "sir-to-ila",
.runs_before = VNET_FEATURES ("ip6-lookup"),
};
-/** *INDENT-ON* */
static void
ila_entry_stack (ila_entry_t *ie)
@@ -826,12 +820,10 @@ ila_interface (u32 sw_if_index, u8 disable)
return 0;
}
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Identifier Locator Addressing (ILA) for IPv6",
};
-/* *INDENT-ON* */
u8 *format_ila_dpo (u8 * s, va_list * va)
{
@@ -939,7 +931,7 @@ ila_init (vlib_main_t * vm)
ilm->lookup_table_nbuckets, ilm->lookup_table_size);
ila_dpo_type = dpo_register_new_type(&ila_vft, ila_nodes);
- ila_fib_node_type = fib_node_register_new_type(&ila_fib_node_vft);
+ ila_fib_node_type = fib_node_register_new_type ("ila", &ila_fib_node_vft);
ila_fib_src = fib_source_allocate("ila",
FIB_SOURCE_PRIORITY_HI,
FIB_SOURCE_BH_SIMPLE);
diff --git a/src/plugins/ioam/analyse/ioam_summary_export.c b/src/plugins/ioam/analyse/ioam_summary_export.c
index 032272f5ec7..6856bcc2200 100644
--- a/src/plugins/ioam/analyse/ioam_summary_export.c
+++ b/src/plugins/ioam/analyse/ioam_summary_export.c
@@ -20,11 +20,9 @@
#include <ioam/analyse/ip6/ip6_ioam_analyse.h>
u8 *
-ioam_template_rewrite (flow_report_main_t * frm, flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address, u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+ioam_template_rewrite (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port, ipfix_report_element_t *elts,
+ u32 n_elts, u32 *stream_index)
{
ip4_header_t *ip;
udp_header_t *udp;
@@ -39,7 +37,7 @@ ioam_template_rewrite (flow_report_main_t * frm, flow_report_t * fr,
u32 field_index = 0;
flow_report_stream_t *stream;
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
/* Determine field count */
#define _(field,mask,item,length) \
@@ -74,8 +72,8 @@ ioam_template_rewrite (flow_report_main_t * frm, flow_report_t * fr,
ip->ip_version_and_header_length = 0x45;
ip->ttl = 254;
ip->protocol = IP_PROTOCOL_UDP;
- ip->src_address.as_u32 = src_address->as_u32;
- ip->dst_address.as_u32 = collector_address->as_u32;
+ ip->src_address.as_u32 = exp->src_address.ip.ip4.as_u32;
+ ip->dst_address.as_u32 = exp->ipfix_collector.ip.ip4.as_u32;
udp->src_port = clib_host_to_net_u16 (collector_port);
udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_ipfix);
udp->length = clib_host_to_net_u16 (vec_len (rewrite) - sizeof (*ip));
@@ -264,8 +262,9 @@ ioam_analyse_add_ipfix_record (flow_report_t * fr,
}
vlib_frame_t *
-ioam_send_flows (flow_report_main_t * frm, flow_report_t * fr,
- vlib_frame_t * f, u32 * to_next, u32 node_index)
+ioam_send_flows (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f, u32 *to_next,
+ u32 node_index)
{
vlib_buffer_t *b0 = NULL;
u32 next_offset = 0;
@@ -276,17 +275,16 @@ ioam_send_flows (flow_report_main_t * frm, flow_report_t * fr,
ipfix_set_header_t *s = NULL;
ip4_header_t *ip;
udp_header_t *udp;
- u32 records_this_buffer;
u16 new_l0, old_l0;
ip_csum_t sum0;
- vlib_main_t *vm = frm->vlib_main;
+ vlib_main_t *vm = vlib_get_main ();
ip6_address_t temp;
ioam_analyser_data_t *record = NULL;
flow_report_stream_t *stream;
ioam_analyser_data_t *aggregated_data;
u16 data_len;
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
clib_memset (&temp, 0, sizeof (ip6_address_t));
@@ -330,16 +328,14 @@ ioam_send_flows (flow_report_main_t * frm, flow_report_t * fr,
h->sequence_number = stream->sequence_number++;
h->sequence_number = clib_host_to_net_u32 (h->sequence_number);
next_offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp);
- records_this_buffer = 0;
}
next_offset = ioam_analyse_add_ipfix_record (fr, record,
b0, next_offset,
&temp, &temp, 0, 0);
- records_this_buffer++;
/* Flush data if packet len is about to reach path mtu */
- if (next_offset > (frm->path_mtu - 250))
+ if (next_offset > (exp->path_mtu - 250))
flush = 1;
}
@@ -366,7 +362,7 @@ ioam_send_flows (flow_report_main_t * frm, flow_report_t * fr,
udp->length =
clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
- if (frm->udp_checksum)
+ if (exp->udp_checksum)
{
/* RFC 7011 section 10.3.2. */
udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
@@ -399,7 +395,7 @@ ioam_flow_create (u8 del)
vnet_flow_report_add_del_args_t args;
int rv;
u32 domain_id = 0;
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &flow_report_main.exporters[0];
u16 template_id;
clib_memset (&args, 0, sizeof (args));
@@ -408,7 +404,7 @@ ioam_flow_create (u8 del)
del ? (args.is_add = 0) : (args.is_add = 1);
args.domain_id = domain_id;
- rv = vnet_flow_report_add_del (frm, &args, &template_id);
+ rv = vnet_flow_report_add_del (exp, &args, &template_id);
switch (rv)
{
@@ -430,12 +426,10 @@ ioam_flow_report_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ioam_flow_report_init) =
{
.runs_after = VLIB_INITS("flow_report_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/analyse/ioam_summary_export.h b/src/plugins/ioam/analyse/ioam_summary_export.h
index 99890ad43d9..7d1c5d00829 100644
--- a/src/plugins/ioam/analyse/ioam_summary_export.h
+++ b/src/plugins/ioam/analyse/ioam_summary_export.h
@@ -65,11 +65,9 @@ typedef struct
clib_error_t *ioam_flow_create (u8 del);
-u8 *ioam_template_rewrite (flow_report_main_t * frm, flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address, u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index);
+u8 *ioam_template_rewrite (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port, ipfix_report_element_t *elts,
+ u32 n_elts, u32 *stream_index);
u16 ioam_analyse_add_ipfix_record (flow_report_t * fr,
ioam_analyser_data_t * record,
diff --git a/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.c b/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.c
index 99ba3295d44..9db0485da61 100644
--- a/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.c
+++ b/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.c
@@ -94,13 +94,11 @@ set_ioam_analyse_command_fn (vlib_main_t * vm, unformat_input_t * input,
return (ioam_analyse_enable_disable (vm, is_add, is_export, remote_listen));
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ioam_analyse_command, static) = {
.path = "set ioam analyse",
.short_help = "set ioam analyse [export-ipfix-collector] [disable] [listen-ipfix]",
.function = set_ioam_analyse_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ioam_analyse_cmd_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -130,13 +128,11 @@ show_ioam_analyse_cmd_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_show_ioam_ipfix_cmd, static) = {
.path = "show ioam analyse ",
.short_help = "show ioam analyser information",
.function = show_ioam_analyse_cmd_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ioam_analyse_init (vlib_main_t * vm)
diff --git a/src/plugins/ioam/analyse/ip6/node.c b/src/plugins/ioam/analyse/ip6/node.c
index ef35d0a9134..67895aa6486 100644
--- a/src/plugins/ioam/analyse/ip6/node.c
+++ b/src/plugins/ioam/analyse/ip6/node.c
@@ -466,7 +466,6 @@ ip6_ioam_analyse_unregister_handlers ()
ip6_ioam_analyse_unregister_hbh_handler (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE);
}
-/* *INDENT-OFF* */
/*
* Node for IP6 analyse - packets
@@ -507,7 +506,6 @@ VLIB_REGISTER_NODE (analyse_node_remote) =
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/encap/ip6_ioam_e2e.c b/src/plugins/ioam/encap/ip6_ioam_e2e.c
index a3dd048c659..0a811da6105 100644
--- a/src/plugins/ioam/encap/ip6_ioam_e2e.c
+++ b/src/plugins/ioam/encap/ip6_ioam_e2e.c
@@ -205,9 +205,7 @@ ioam_e2e_init (vlib_main_t * vm)
* Init function for the E2E lib.
* ip6_hop_by_hop_ioam_e2e_init gets called during init.
*/
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ioam_e2e_init) =
{
.runs_after = VLIB_INITS("ip6_hop_by_hop_ioam_init"),
};
-/* *INDENT-ON* */
diff --git a/src/plugins/ioam/encap/ip6_ioam_e2e.h b/src/plugins/ioam/encap/ip6_ioam_e2e.h
index fb83403da8f..f958e17669b 100644
--- a/src/plugins/ioam/encap/ip6_ioam_e2e.h
+++ b/src/plugins/ioam/encap/ip6_ioam_e2e.h
@@ -19,12 +19,10 @@
#include <ioam/lib-e2e/e2e_util.h>
#include "ip6_ioam_seqno.h"
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
ip6_hop_by_hop_option_t hdr;
ioam_e2e_packet_t e2e_hdr;
}) ioam_e2e_option_t;
-/* *INDENT-ON* */
typedef struct ioam_e2e_data_t_ {
u32 flow_ctx;
diff --git a/src/plugins/ioam/encap/ip6_ioam_pot.c b/src/plugins/ioam/encap/ip6_ioam_pot.c
index 99c21b571a3..54d748455d2 100644
--- a/src/plugins/ioam/encap/ip6_ioam_pot.c
+++ b/src/plugins/ioam/encap/ip6_ioam_pot.c
@@ -255,9 +255,7 @@ ip6_hop_by_hop_ioam_pot_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_pot_init) =
{
.runs_after = VLIB_INITS("ip6_hop_by_hop_ioam_init"),
};
-/* *INDENT-OFF* */
diff --git a/src/plugins/ioam/encap/ip6_ioam_pot.h b/src/plugins/ioam/encap/ip6_ioam_pot.h
index 01ce4ac590f..ef6f4c7344c 100644
--- a/src/plugins/ioam/encap/ip6_ioam_pot.h
+++ b/src/plugins/ioam/encap/ip6_ioam_pot.h
@@ -18,7 +18,6 @@
#include <vnet/ip/ip6_hop_by_hop_packet.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip6_hop_by_hop_option_t hdr;
u8 pot_type;
@@ -27,7 +26,6 @@ typedef CLIB_PACKED (struct {
u64 random;
u64 cumulative;
}) ioam_pot_option_t;
-/* *INDENT-ON* */
#endif /* PLUGINS_IOAM_PLUGIN_IOAM_ENCAP_IP6_IOAM_POT_H_ */
diff --git a/src/plugins/ioam/encap/ip6_ioam_trace.c b/src/plugins/ioam/encap/ip6_ioam_trace.c
index ea496610433..b244af56a6b 100644
--- a/src/plugins/ioam/encap/ip6_ioam_trace.c
+++ b/src/plugins/ioam/encap/ip6_ioam_trace.c
@@ -399,20 +399,16 @@ ip6_show_ioam_trace_cmd_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_show_ioam_trace_cmd, static) = {
.path = "show ioam trace",
.short_help = "iOAM trace statistics",
.function = ip6_show_ioam_trace_cmd_fn,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Inbound Operations, Administration, and Maintenance (OAM)",
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_hop_by_hop_ioam_trace_init (vlib_main_t * vm)
@@ -443,13 +439,11 @@ ip6_hop_by_hop_ioam_trace_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_trace_init) =
{
.runs_after = VLIB_INITS ("ip_main_init", "ip6_lookup_init",
"ip6_hop_by_hop_ioam_init"),
};
-/* *INDENT-ON* */
int
ip6_trace_profile_cleanup (void)
diff --git a/src/plugins/ioam/encap/ip6_ioam_trace.h b/src/plugins/ioam/encap/ip6_ioam_trace.h
index 4eda6110d24..25693dfc6cd 100644
--- a/src/plugins/ioam/encap/ip6_ioam_trace.h
+++ b/src/plugins/ioam/encap/ip6_ioam_trace.h
@@ -21,12 +21,10 @@
#include <vnet/ip/ip6_hop_by_hop_packet.h>
#include <ioam/lib-trace/trace_util.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
ip6_hop_by_hop_option_t hdr;
ioam_trace_hdr_t trace_hdr;
}) ioam_trace_option_t;
-/* *INDENT-ON* */
always_inline void
ip6_hbh_ioam_trace_set_bit (ioam_trace_option_t * trace, u8 trace_bit)
diff --git a/src/plugins/ioam/export-common/ioam_export.h b/src/plugins/ioam/export-common/ioam_export.h
index f242ad7a788..1b764f66b15 100644
--- a/src/plugins/ioam/export-common/ioam_export.h
+++ b/src/plugins/ioam/export-common/ioam_export.h
@@ -287,7 +287,7 @@ ioam_export_header_create (ioam_export_main_t * em,
(DEFAULT_EXPORT_RECORDS *
DEFAULT_EXPORT_SIZE));
ip->checksum = ip4_header_checksum (ip);
- _vec_len (rewrite) = sizeof (ip4_ipfix_data_packet_t);
+ vec_set_len (rewrite, sizeof (ip4_ipfix_data_packet_t));
em->record_header = rewrite;
return (1);
}
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c
index f802a049365..1606f72224f 100644
--- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c
@@ -158,14 +158,12 @@ set_vxlan_gpe_ioam_export_ipfix_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_vxlan_gpe_ioam_ipfix_command, static) =
{
.path = "set vxlan-gpe-ioam export ipfix",
.short_help = "set vxlan-gpe-ioam export ipfix collector <ip4-address> src <ip4-address>",
.function = set_vxlan_gpe_ioam_export_ipfix_command_fn,
};
-/* *INDENT-ON* */
#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api.c>
static clib_error_t *
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c
index 7d66bd45849..5de10ba47f3 100644
--- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c
@@ -33,14 +33,12 @@ vxlan_gpe_ioam_export_process (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan_gpe_ioam_export_process_node, static) =
{
.function = vxlan_gpe_ioam_export_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "vxlan-gpe-ioam-export-process",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c
index f8d90332d5c..839fd80b443 100644
--- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c
@@ -144,7 +144,6 @@ vxlan_gpe_export_node_fn (vlib_main_t * vm,
/*
* Node for VXLAN-GPE export
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan_export_node) =
{
.function = vxlan_gpe_export_node_fn,
@@ -159,7 +158,6 @@ VLIB_REGISTER_NODE (vxlan_export_node) =
.next_nodes =
{[EXPORT_NEXT_VXLAN_GPE_INPUT] = "vxlan-gpe-pop-ioam-v4"},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/export/ioam_export.c b/src/plugins/ioam/export/ioam_export.c
index 21695af3368..f38281182c8 100644
--- a/src/plugins/ioam/export/ioam_export.c
+++ b/src/plugins/ioam/export/ioam_export.c
@@ -137,13 +137,11 @@ set_ioam_export_ipfix_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ipfix_command, static) =
{
.path = "set ioam export ipfix",.short_help =
"set ioam export ipfix collector <ip4-address> src <ip4-address>",.
function = set_ioam_export_ipfix_command_fn,};
-/* *INDENT-ON* */
#include <ioam/export/ioam_export.api.c>
static clib_error_t *
diff --git a/src/plugins/ioam/ioam_plugin_doc.md b/src/plugins/ioam/ioam_plugin_doc.md
deleted file mode 100644
index 343abcf73d8..00000000000
--- a/src/plugins/ioam/ioam_plugin_doc.md
+++ /dev/null
@@ -1,464 +0,0 @@
-## VPP Inband OAM (iOAM) {#ioam_plugin_doc}
-
-In-band OAM (iOAM) is an implementation study to record operational
-information in the packet while the packet traverses a path between
-two points in the network.
-
-Overview of iOAM can be found in [iOAM-Devnet] page.
-The following IETF drafts detail the motivation and mechanism for
-recording operational information:
- - [iOAM-ietf-requirements] - Describes motivation and usecases for iOAM
- - [iOAM-ietf-data] - Describes data records that can be collected using iOAM
- - [iOAM-ietf-transport] - Lists out the transport protocols
- and mechanism to carry iOAM data records
- - [iOAM-ietf-proof-of-transit] - Describes the idea of Proof of Transit (POT)
- and mechanisms to operationalize the idea
-
-## Terminology
-In-band OAM is expected to be deployed in a specific domain rather
-than on the overall Internet. The part of the network which employs in-band OAM
-is referred to as **"in-band OAM-domain"**.
-
-In-band OAM data is added to a packet on entering the in-band OAM-domain
-and is removed from the packet when exiting the domain.
-Within the in-band OAM-domain, network nodes that the packet traverses
-may update the in-band OAM data records.
-
-- The node which adds in-band OAM data to the packet is called the
-**"in-band OAM encapsulating node"**.
-
-- The node which removes the in-band OAM data is referred to as the
-**"in-band OAM decapsulating node"**.
-
-- Nodes within the domain which are aware of in-band OAM data and read
-and/or write or process the in-band OAM data are called
-**"in-band OAM transit nodes"**.
-
-## Features supported in the current release
-VPP can function as in-band OAM encapsulating, transit and decapsulating node.
-In this version of VPP in-band OAM data is transported as options in an
-IPv6 hop-by-hop extension header. Hence in-band OAM can be enabled
-for IPv6 traffic.
-
-The following iOAM features are supported:
-
-- **In-band OAM Tracing** : In-band OAM supports multiple data records to be
-recorded in the packet as the packet traverses the network.
-These data records offer insights into the operational behavior of the network.
-The following information can be collected in the tracing
-data from the nodes a packet traverses:
- - Node ID
- - Ingress interface ID
- - Egress interface ID
- - Timestamp
- - Pre-configured application data
-
-- **In-band OAM Proof of Transit (POT)**: Proof of transit iOAM data is
-added to every packet for verifying that a packet traverses a specific
-set of nodes.
-In-band OAM data is updated at every node that is enabled with iOAM
-proof of transit and is used to verify whether a packet traversed
-all the specified nodes. When the verifier receives each packet,
-it can validate whether the packet traversed the specified nodes.
-
-
-## Configuration
-Configuring iOAM involves:
-- Selecting the packets for which iOAM data must be inserted, updated or removed
- - Selection of packets for iOAM data insertion on iOAM encapsulating node.
- Selection of packets is done by 5-tuple based classification
- - Selection of packets for updating iOAM data is implicitly done on the
- presence of iOAM options in the packet
- - Selection of packets for removing the iOAM data is done on 5-tuple
- based classification
-- The kind of data to be collected
- - Tracing data
- - Proof of transit
-- Additional details for processing iOAM data to be collected
- - For trace data - trace type, number of nodes to be recorded in the trace,
- time stamp precision, etc.
- - For POT data - configuration of POT profile required to process the POT data
-
-The CLI for configuring iOAM is explained here followed by detailed steps
-and examples to deploy iOAM on VPP as an encapsulating, transit or
-decapsulating iOAM node in the subsequent sub-sections.
-
-VPP iOAM configuration for enabling trace and POT is as follows:
-
- set ioam rewrite trace-type <0x1f|0x7|0x9|0x11|0x19>
- trace-elts <number of trace elements> trace-tsp <0|1|2|3>
- node-id <node ID in hex> app-data <application data in hex> [pot]
-
-A description of each of the options of the CLI follows:
-- trace-type : An entry in the "Node data List" array of the trace option
-can have different formats, following the needs of the a deployment.
-For example: Some deployments might only be interested
-in recording the node identifiers, whereas others might be interested
-in recording node identifier and timestamp.
-The following types are currently supported:
- - 0x1f : Node data to include hop limit (8 bits), node ID (24 bits),
- ingress and egress interface IDs (16 bits each), timestamp (32 bits),
- application data (32 bits)
- - 0x7 : Node data to include hop limit (8 bits), node ID (24 bits),
- ingress and egress interface IDs (16 bits each)
- - 0x9 : Node data to include hop limit (8 bits), node ID (24 bits),
- timestamp (32 bits)
- - 0x11: Node data to include hop limit (8 bits), node ID (24 bits),
- application data (32 bits)
- - 0x19: Node data to include hop limit (8 bits), node ID (24 bits),
- timestamp (32 bits), application data (32 bits)
-- trace-elts : Defines the length of the node data array in the trace option.
-- trace-tsp : Defines the timestamp precision to use with the enumerated value
- for precision as follows:
- - 0 : 32bits timestamp in seconds
- - 1 : 32bits timestamp in milliseconds
- - 2 : 32bits timestamp in microseconds
- - 3 : 32bits timestamp in nanoseconds
-- node-id : Unique identifier for the node, included in the node ID
- field of the node data in trace option.
-- app-data : The value configured here is included as is in
-application data field of node data in trace option.
-- pot : Enables POT option to be included in the iOAM options.
-
-### Trace configuration
-
-#### On in-band OAM encapsulating node
- - **Configure classifier and apply ACL** to select packets for
- iOAM data insertion
- - Example to enable iOAM data insertion for all the packets
- towards IPv6 address db06::06:
-
- vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
-
- vpp# classify session acl-hit-next node ip6-add-hop-by-hop
- table-index 0 match l3 ip6 dst db06::06
-
- vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
-
- - **Enable tracing** : Specify node ID, maximum number of nodes for which
- trace data should be recorded, type of data to be included for recording,
- optionally application data to be included
- - Example to enable tracing with a maximum of 4 nodes recorded
- and the data to be recorded to include - hop limit, node id,
- ingress and egress interface IDs, timestamp (millisecond precision),
- application data (0x1234):
-
-
- vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1
- node-id 0x1 app-data 0x1234
-
-
-
-#### On in-band OAM transit node
-- The transit node requires trace type, timestamp precision, node ID and
-optionally application data to be configured,
-to update its node data in the trace option.
-
-Example:
-
- vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1
- node-id 0x2 app-data 0x1234
-
-#### On the In-band OAM decapsulating node
-- The decapsulating node similar to encapsulating node requires
-**classification** of the packets to remove iOAM data from.
- - Example to decapsulate iOAM data for packets towards
- db06::06, configure classifier and enable it as an ACL as follows:
-
-
- vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
-
- vpp# classify session acl-hit-next node ip6-lookup table-index 0
- match l3 ip6 dst db06::06 opaque-index 100
-
- vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
-
-
-- Decapsulating node requires trace type, timestamp precision,
-node ID and optionally application data to be configured,
-to update its node data in the trace option before it is decapsulated.
-
-Example:
-
- vpp# set ioam rewrite trace-type 0x1f trace-elts 4
- trace-tsp 1 node-id 0x3 app-data 0x1234
-
-
-### Proof of Transit configuration
-
-For details on proof-of-transit,
-see the IETF draft [iOAM-ietf-proof-of-transit].
-To enable Proof of Transit all the nodes that participate
-and hence are verified for transit need a proof of transit profile.
-A script to generate a proof of transit profile as per the mechanism
-described in [iOAM-ietf-proof-of-transit] will be available at [iOAM-Devnet].
-
-The Proof of transit mechanism implemented here is based on
-Shamir's Secret Sharing algorithm.
-The overall algorithm uses two polynomials
-POLY-1 and POLY-2. The degree of polynomials depends on number of nodes
-to be verified for transit.
-POLY-1 is secret and constant. Each node gets a point on POLY-1
-at setup-time and keeps it secret.
-POLY-2 is public, random and per packet.
-Each node is assigned a point on POLY-1 and POLY-2 with the same x index.
-Each node derives its point on POLY-2 each time a packet arrives at it.
-A node then contributes its points on POLY-1 and POLY-2 to construct
-POLY-3 (POLY-3 = POLY-1 + POLY-2) using lagrange extrapolation and
-forwards it towards the verifier by updating POT data in the packet.
-The verifier constructs POLY-3 from the accumulated value from all the nodes
-and its own points on POLY-1 and POLY-2 and verifies whether
-POLY-3 = POLY-1 + POLY-2. Only the verifier knows POLY-1.
-The solution leverages finite field arithmetic in a field of size "prime number"
-for reasons explained in description of Shamir's secret sharing algorithm.
-
-Here is an explanation of POT profile list and profile configuration CLI to
-realize the above mechanism.
-It is best to use the script provided at [iOAM-Devnet] to generate
-this configuration.
-- **Create POT profile** : set pot profile name <string> id [0-1]
-[validator-key 0xu64] prime-number 0xu64 secret_share 0xu64
-lpc 0xu64 polynomial2 0xu64 bits-in-random [0-64]
- - name : Profile list name.
- - id : Profile id, it can be 0 or 1.
- A maximum of two profiles can be configured per profile list.
- - validator-key : Secret key configured only on the
- verifier/decapsulating node used to compare and verify proof of transit.
- - prime-number : Prime number for finite field arithmetic as required by the
- proof of transit mechanism.
- - secret_share : Unique point for each node on the secret polynomial POLY-1.
- - lpc : Lagrange Polynomial Constant(LPC) calculated per node based on
- its point (x value used for evaluating the points on the polynomial)
- on the polynomial used in lagrange extrapolation
- for reconstructing polynomial (POLY-3).
- - polynomial2 : Is the pre-evaluated value of the point on
- 2nd polynomial(POLY-2). This is unique for each node.
- It is pre-evaluated for all the coefficients of POLY-2 except
- for the constant part of the polynomial that changes per packet
- and is received as part of the POT data in the packet.
- - bits-in-random : To control the size of the random number to be
- generated. This number has to match the other numbers generated and used
- in the profile as per the algorithm.
-
-- **Set a configured profile as active/in-use** :
-set pot profile-active name <string> ID [0-1]
- - name : Name of the profile list to be used for computing
- POT data per packet.
- - ID : Identifier of the profile within the list to be used.
-
-#### On In-band OAM encapsulating node
- - Configure the classifier and apply ACL to select packets for iOAM data insertion.
- - Example to enable iOAM data insertion for all the packet towards
- IPv6 address db06::06 -
-
-
- vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
-
- vpp# classify session acl-hit-next node
- ip6-add-hop-by-hop table-index 0 match l3 ip6 dst db06::06
-
- vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
-
-
- - Configure the proof of transit profile list with profiles.
-Each profile list referred to by a name can contain 2 profiles,
-only one is in use for updating proof of transit data at any time.
- - Example profile list example with a profile generated from the
- script to verify transit through 3 nodes is:
-
-
- vpp# set pot profile name example id 0 prime-number 0x7fff0000fa884685
- secret_share 0x6c22eff0f45ec56d lpc 0x7fff0000fa884682
- polynomial2 0xffb543d4a9c bits-in-random 63
-
- - Enable one of the profiles from the configured profile list as active
- so that is will be used for calculating proof of transit
-
-Example enable profile ID 0 from profile list example configured above:
-
-
- vpp# set pot profile-active name example ID 0
-
-
- - Enable POT option to be inserted
-
-
- vpp# set ioam rewrite pot
-
-
-#### On in-band OAM transit node
- - Configure the proof of transit profile list with profiles for transit node.
-Example:
-
-
- vpp# set pot profile name example id 0 prime-number 0x7fff0000fa884685
- secret_share 0x564cdbdec4eb625d lpc 0x1
- polynomial2 0x23f3a227186a bits-in-random 63
-
-#### On in-band OAM decapsulating node / verifier
-- The decapsulating node, similar to the encapsulating node requires
-classification of the packets to remove iOAM data from.
- - Example to decapsulate iOAM data for packets towards db06::06
- configure classifier and enable it as an ACL as follows:
-
-
- vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
-
- vpp# classify session acl-hit-next node ip6-lookup table-index 0
- match l3 ip6 dst db06::06 opaque-index 100
-
- vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
-
-- To update and verify the proof of transit, POT profile list should be configured.
- - Example POT profile list configured as follows:
-
- vpp# set pot profile name example id 0 validate-key 0x7fff0000fa88465d
- prime-number 0x7fff0000fa884685 secret_share 0x7a08fbfc5b93116d lpc 0x3
- polynomial2 0x3ff738597ce bits-in-random 63
-
-## Operational data
-
-Following CLIs are available to check iOAM operation:
-- To check iOAM configuration that are effective use "show ioam summary"
-
-Example:
-
- vpp# show ioam summary
- REWRITE FLOW CONFIGS - Not configured
- HOP BY HOP OPTIONS - TRACE CONFIG -
- Trace Type : 0x1f (31)
- Trace timestamp precision : 1 (Milliseconds)
- Num of trace nodes : 4
- Node-id : 0x2 (2)
- App Data : 0x1234 (4660)
- POT OPTION - 1 (Enabled)
- Try 'show ioam pot and show pot profile' for more information
-
-- To find statistics about packets for which iOAM options were
-added (encapsulating node) and removed (decapsulating node) execute
-*show errors*
-
-Example on encapsulating node:
-
-
- vpp# show error
- Count Node Reason
- 1208804706 ip6-inacl input ACL hits
- 1208804706 ip6-add-hop-by-hop Pkts w/ added ip6 hop-by-hop options
-
-Example on decapsulating node:
-
- vpp# show error
- Count Node Reason
- 69508569 ip6-inacl input ACL hits
- 69508569 ip6-pop-hop-by-hop Pkts w/ removed ip6 hop-by-hop options
-
-- To check the POT profiles use "show pot profile"
-
-Example:
-
- vpp# show pot profile
- Profile list in use : example
- POT Profile at index: 0
- ID : 0
- Validator : False (0)
- Secret share : 0x564cdbdec4eb625d (6218586935324795485)
- Prime number : 0x7fff0000fa884685 (9223090566081300101)
- 2nd polynomial(eval) : 0x23f3a227186a (39529304496234)
- LPC : 0x1 (1)
- Bit mask : 0x7fffffffffffffff (9223372036854775807)
- Profile index in use: 0
- Pkts passed : 0x36 (54)
-
-- To get statistics of POT for packets use "show ioam pot"
-
-Example at encapsulating or transit node:
-
- vpp# show ioam pot
- Pkts with ip6 hop-by-hop POT options - 54
- Pkts with ip6 hop-by-hop POT options but no profile set - 0
- Pkts with POT in Policy - 0
- Pkts with POT out of Policy - 0
-
-
-Example at decapsulating/verification node:
-
-
- vpp# show ioam pot
- Pkts with ip6 hop-by-hop POT options - 54
- Pkts with ip6 hop-by-hop POT options but no profile set - 0
- Pkts with POT in Policy - 54
- Pkts with POT out of Policy - 0
-
-- Tracing - enable trace of IPv6 packets to view the data inserted and
-collected.
-
-Example when the nodes are receiving data over a DPDK interface:
-Enable tracing using "trace add dpdk-input 20" and
-execute "show trace" to view the iOAM data collected:
-
-
- vpp# trace add dpdk-input 20
-
- vpp# show trace
-
- ------------------- Start of thread 0 vpp_main -------------------
-
- Packet 1
-
- 00:00:19:294697: dpdk-input
- GigabitEthernetb/0/0 rx queue 0
- buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0
- PKT MBUF: port 0, nb_segs 1, pkt_len 214
- buf_len 2176, data_len 214, ol_flags 0x0, data_off 128, phys_addr 0xe9a35a00
- packet_type 0x0
- IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55
- IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
- tos 0x00, flow label 0x0, hop limit 63, payload length 160
- 00:00:19:294737: ethernet-input
- IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55
- 00:00:19:294753: ip6-input
- IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
- tos 0x00, flow label 0x0, hop limit 63, payload length 160
- 00:00:19:294757: ip6-lookup
- fib 0 adj-idx 15 : indirect via db05::2 flow hash: 0x00000000
- IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
- tos 0x00, flow label 0x0, hop limit 63, payload length 160
- 00:00:19:294802: ip6-hop-by-hop
- IP6_HOP_BY_HOP: next index 5 len 96 traced 96 Trace Type 0x1f , 1 elts left
- [0] ttl 0x0 node ID 0x0 ingress 0x0 egress 0x0 ts 0x0
- app 0x0
- [1] ttl 0x3e node ID 0x3 ingress 0x1 egress 0x2 ts 0xb68c2213
- app 0x1234
- [2] ttl 0x3f node ID 0x2 ingress 0x1 egress 0x2 ts 0xb68c2204
- app 0x1234
- [3] ttl 0x40 node ID 0x1 ingress 0x5 egress 0x6 ts 0xb68c2200
- app 0x1234
- POT opt present
- random = 0x577a916946071950, Cumulative = 0x10b46e78a35a392d, Index = 0x0
- 00:00:19:294810: ip6-rewrite
- tx_sw_if_index 1 adj-idx 14 : GigabitEthernetb/0/0
- IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72 flow hash: 0x00000000
- IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
- IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
- tos 0x00, flow label 0x0, hop limit 62, payload length 160
- 00:00:19:294814: GigabitEthernetb/0/0-output
- GigabitEthernetb/0/0
- IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
- IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
- tos 0x00, flow label 0x0, hop limit 62, payload length 160
- 00:00:19:294820: GigabitEthernetb/0/0-tx
- GigabitEthernetb/0/0 tx queue 0
- buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0
- IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
-
- IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
-
- tos 0x00, flow label 0x0, hop limit 62, payload length 160
-
-
-[iOAM-Devnet]: <https://github.com/ciscodevnet/iOAM>
-[iOAM-ietf-requirements]:<https://tools.ietf.org/html/draft-brockners-inband-oam-requirements-01>
-[iOAM-ietf-transport]:<https://tools.ietf.org/html/draft-brockners-inband-oam-transport-01>
-[iOAM-ietf-data]:<https://tools.ietf.org/html/draft-brockners-inband-oam-data-01>
-[iOAM-ietf-proof-of-transit]:<https://tools.ietf.org/html/draft-brockners-proof-of-transit-01>
diff --git a/src/plugins/ioam/ioam_plugin_doc.rst b/src/plugins/ioam/ioam_plugin_doc.rst
new file mode 100644
index 00000000000..0f84d5f7a36
--- /dev/null
+++ b/src/plugins/ioam/ioam_plugin_doc.rst
@@ -0,0 +1,490 @@
+Inband OAM (iOAM)
+=================
+
+In-band OAM (iOAM) is an implementation study to record operational
+information in the packet while the packet traverses a path between two
+points in the network.
+
+Overview of iOAM can be found in
+`iOAM-Devnet <https://github.com/ciscodevnet/iOAM>`__ page. The
+following IETF drafts detail the motivation and mechanism for recording
+operational information: -
+`iOAM-ietf-requirements <https://tools.ietf.org/html/draft-brockners-inband-oam-requirements-01>`__
+- Describes motivation and usecases for iOAM -
+`iOAM-ietf-data <https://tools.ietf.org/html/draft-brockners-inband-oam-data-01>`__
+- Describes data records that can be collected using iOAM -
+`iOAM-ietf-transport <https://tools.ietf.org/html/draft-brockners-inband-oam-transport-01>`__
+- Lists out the transport protocols and mechanism to carry iOAM data
+records -
+`iOAM-ietf-proof-of-transit <https://tools.ietf.org/html/draft-brockners-proof-of-transit-01>`__
+- Describes the idea of Proof of Transit (POT) and mechanisms to
+operationalize the idea
+
+Terminology
+-----------
+
+In-band OAM is expected to be deployed in a specific domain rather than
+on the overall Internet. The part of the network which employs in-band
+OAM is referred to as **“in-band OAM-domain”**.
+
+In-band OAM data is added to a packet on entering the in-band OAM-domain
+and is removed from the packet when exiting the domain. Within the
+in-band OAM-domain, network nodes that the packet traverses may update
+the in-band OAM data records.
+
+- The node which adds in-band OAM data to the packet is called the
+ **“in-band OAM encapsulating node”**.
+
+- The node which removes the in-band OAM data is referred to as the
+ **“in-band OAM decapsulating node”**.
+
+- Nodes within the domain which are aware of in-band OAM data and read
+ and/or write or process the in-band OAM data are called **“in-band
+ OAM transit nodes”**.
+
+Features supported in the current release
+-----------------------------------------
+
+VPP can function as in-band OAM encapsulating, transit and decapsulating
+node. In this version of VPP in-band OAM data is transported as options
+in an IPv6 hop-by-hop extension header. Hence in-band OAM can be enabled
+for IPv6 traffic.
+
+The following iOAM features are supported:
+
+- **In-band OAM Tracing** : In-band OAM supports multiple data records
+ to be recorded in the packet as the packet traverses the network.
+ These data records offer insights into the operational behavior of
+ the network. The following information can be collected in the
+ tracing data from the nodes a packet traverses:
+
+ - Node ID
+ - Ingress interface ID
+ - Egress interface ID
+ - Timestamp
+ - Pre-configured application data
+
+- **In-band OAM Proof of Transit (POT)**: Proof of transit iOAM data is
+ added to every packet for verifying that a packet traverses a
+ specific set of nodes. In-band OAM data is updated at every node that
+ is enabled with iOAM proof of transit and is used to verify whether a
+ packet traversed all the specified nodes. When the verifier receives
+ each packet, it can validate whether the packet traversed the
+ specified nodes.
+
+Configuration
+-------------
+
+Configuring iOAM involves: - Selecting the packets for which iOAM data
+must be inserted, updated or removed - Selection of packets for iOAM
+data insertion on iOAM encapsulating node. Selection of packets is done
+by 5-tuple based classification - Selection of packets for updating iOAM
+data is implicitly done on the presence of iOAM options in the packet -
+Selection of packets for removing the iOAM data is done on 5-tuple based
+classification - The kind of data to be collected - Tracing data - Proof
+of transit - Additional details for processing iOAM data to be collected
+- For trace data - trace type, number of nodes to be recorded in the
+trace, time stamp precision, etc. - For POT data - configuration of POT
+profile required to process the POT data
+
+The CLI for configuring iOAM is explained here followed by detailed
+steps and examples to deploy iOAM on VPP as an encapsulating, transit or
+decapsulating iOAM node in the subsequent sub-sections.
+
+VPP iOAM configuration for enabling trace and POT is as follows:
+
+::
+
+ set ioam rewrite trace-type <0x1f|0x7|0x9|0x11|0x19>
+ trace-elts <number of trace elements> trace-tsp <0|1|2|3>
+ node-id <node ID in hex> app-data <application data in hex> [pot]
+
+A description of each of the options of the CLI follows: - trace-type :
+An entry in the “Node data List” array of the trace option can have
+different formats, following the needs of the a deployment. For example:
+Some deployments might only be interested in recording the node
+identifiers, whereas others might be interested in recording node
+identifier and timestamp. The following types are currently supported: -
+0x1f : Node data to include hop limit (8 bits), node ID (24 bits),
+ingress and egress interface IDs (16 bits each), timestamp (32 bits),
+application data (32 bits) - 0x7 : Node data to include hop limit (8
+bits), node ID (24 bits), ingress and egress interface IDs (16 bits
+each) - 0x9 : Node data to include hop limit (8 bits), node ID (24
+bits), timestamp (32 bits) - 0x11: Node data to include hop limit (8
+bits), node ID (24 bits), application data (32 bits) - 0x19: Node data
+to include hop limit (8 bits), node ID (24 bits), timestamp (32 bits),
+application data (32 bits) - trace-elts : Defines the length of the node
+data array in the trace option. - trace-tsp : Defines the timestamp
+precision to use with the enumerated value for precision as follows: - 0
+: 32bits timestamp in seconds - 1 : 32bits timestamp in milliseconds - 2
+: 32bits timestamp in microseconds - 3 : 32bits timestamp in nanoseconds
+- node-id : Unique identifier for the node, included in the node ID
+field of the node data in trace option. - app-data : The value
+configured here is included as is in application data field of node data
+in trace option. - pot : Enables POT option to be included in the iOAM
+options.
+
+Trace configuration
+~~~~~~~~~~~~~~~~~~~
+
+On in-band OAM encapsulating node
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- **Configure classifier and apply ACL** to select packets for iOAM
+ data insertion
+
+ - Example to enable iOAM data insertion for all the packets towards
+ IPv6 address db06::06:
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node ip6-add-hop-by-hop
+ table-index 0 match l3 ip6 dst db06::06
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+- **Enable tracing** : Specify node ID, maximum number of nodes for
+ which trace data should be recorded, type of data to be included for
+ recording, optionally application data to be included
+
+ - Example to enable tracing with a maximum of 4 nodes recorded and
+ the data to be recorded to include - hop limit, node id, ingress
+ and egress interface IDs, timestamp (millisecond precision),
+ application data (0x1234):
+
+ vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1
+ node-id 0x1 app-data 0x1234
+
+On in-band OAM transit node
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- The transit node requires trace type, timestamp precision, node ID
+ and optionally application data to be configured, to update its node
+ data in the trace option.
+
+Example:
+
+::
+
+ vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1
+ node-id 0x2 app-data 0x1234
+
+On the In-band OAM decapsulating node
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- The decapsulating node similar to encapsulating node requires
+ **classification** of the packets to remove iOAM data from.
+
+ - Example to decapsulate iOAM data for packets towards db06::06,
+ configure classifier and enable it as an ACL as follows:
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node ip6-lookup table-index 0
+ match l3 ip6 dst db06::06 opaque-index 100
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+- Decapsulating node requires trace type, timestamp precision, node ID
+ and optionally application data to be configured, to update its node
+ data in the trace option before it is decapsulated.
+
+Example:
+
+::
+
+ vpp# set ioam rewrite trace-type 0x1f trace-elts 4
+ trace-tsp 1 node-id 0x3 app-data 0x1234
+
+Proof of Transit configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For details on proof-of-transit, see the IETF draft
+`iOAM-ietf-proof-of-transit <https://tools.ietf.org/html/draft-brockners-proof-of-transit-01>`__.
+To enable Proof of Transit all the nodes that participate and hence are
+verified for transit need a proof of transit profile. A script to
+generate a proof of transit profile as per the mechanism described in
+`iOAM-ietf-proof-of-transit <https://tools.ietf.org/html/draft-brockners-proof-of-transit-01>`__
+will be available at
+`iOAM-Devnet <https://github.com/ciscodevnet/iOAM>`__.
+
+The Proof of transit mechanism implemented here is based on Shamir’s
+Secret Sharing algorithm. The overall algorithm uses two polynomials
+POLY-1 and POLY-2. The degree of polynomials depends on number of nodes
+to be verified for transit. POLY-1 is secret and constant. Each node
+gets a point on POLY-1 at setup-time and keeps it secret. POLY-2 is
+public, random and per packet. Each node is assigned a point on POLY-1
+and POLY-2 with the same x index. Each node derives its point on POLY-2
+each time a packet arrives at it. A node then contributes its points on
+POLY-1 and POLY-2 to construct POLY-3 (POLY-3 = POLY-1 + POLY-2) using
+lagrange extrapolation and forwards it towards the verifier by updating
+POT data in the packet. The verifier constructs POLY-3 from the
+accumulated value from all the nodes and its own points on POLY-1 and
+POLY-2 and verifies whether POLY-3 = POLY-1 + POLY-2. Only the verifier
+knows POLY-1. The solution leverages finite field arithmetic in a field
+of size “prime number” for reasons explained in description of Shamir’s
+secret sharing algorithm.
+
+| Here is an explanation of POT profile list and profile configuration
+ CLI to realize the above mechanism. It is best to use the script
+ provided at `iOAM-Devnet <https://github.com/ciscodevnet/iOAM>`__ to
+ generate this configuration. - **Create POT profile** : set pot
+ profile name id [0-1]
+| [validator-key 0xu64] prime-number 0xu64 secret_share 0xu64
+| lpc 0xu64 polynomial2 0xu64 bits-in-random [0-64]
+| - name : Profile list name. - id : Profile id, it can be 0 or 1. A
+ maximum of two profiles can be configured per profile list. -
+ validator-key : Secret key configured only on the
+ verifier/decapsulating node used to compare and verify proof of
+ transit. - prime-number : Prime number for finite field arithmetic as
+ required by the proof of transit mechanism. - secret_share : Unique
+ point for each node on the secret polynomial POLY-1. - lpc : Lagrange
+ Polynomial Constant(LPC) calculated per node based on its point (x
+ value used for evaluating the points on the polynomial) on the
+ polynomial used in lagrange extrapolation for reconstructing
+ polynomial (POLY-3). - polynomial2 : Is the pre-evaluated value of the
+ point on 2nd polynomial(POLY-2). This is unique for each node. It is
+ pre-evaluated for all the coefficients of POLY-2 except for the
+ constant part of the polynomial that changes per packet and is
+ received as part of the POT data in the packet. - bits-in-random : To
+ control the size of the random number to be generated. This number has
+ to match the other numbers generated and used in the profile as per
+ the algorithm.
+
+- **Set a configured profile as active/in-use** :
+ set pot profile-active name ID [0-1]
+
+ - name : Name of the profile list to be used for computing POT data
+ per packet.
+ - ID : Identifier of the profile within the list to be used.
+
+.. _on-in-band-oam-encapsulating-node-1:
+
+On In-band OAM encapsulating node
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Configure the classifier and apply ACL to select packets for iOAM
+ data insertion.
+
+ - Example to enable iOAM data insertion for all the packet towards
+ IPv6 address db06::06 -
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node ip6-add-hop-by-hop
+ table-index 0 match l3 ip6 dst db06::06
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+- Configure the proof of transit profile list with profiles. Each
+ profile list referred to by a name can contain 2 profiles, only one
+ is in use for updating proof of transit data at any time.
+
+ - Example profile list example with a profile generated from the
+ script to verify transit through 3 nodes is:
+
+ vpp# set pot profile name example id 0 prime-number
+ 0x7fff0000fa884685 secret_share 0x6c22eff0f45ec56d lpc
+ 0x7fff0000fa884682 polynomial2 0xffb543d4a9c bits-in-random 63
+
+- Enable one of the profiles from the configured profile list as active
+ so that is will be used for calculating proof of transit
+
+Example enable profile ID 0 from profile list example configured above:
+
+::
+
+ vpp# set pot profile-active name example ID 0
+
+- Enable POT option to be inserted
+
+ vpp# set ioam rewrite pot
+
+.. _on-in-band-oam-transit-node-1:
+
+On in-band OAM transit node
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Configure the proof of transit profile list with profiles for transit
+ node. Example:
+
+ vpp# set pot profile name example id 0 prime-number
+ 0x7fff0000fa884685 secret_share 0x564cdbdec4eb625d lpc 0x1
+ polynomial2 0x23f3a227186a bits-in-random 63
+
+On in-band OAM decapsulating node / verifier
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- The decapsulating node, similar to the encapsulating node requires
+ classification of the packets to remove iOAM data from.
+
+ - Example to decapsulate iOAM data for packets towards db06::06
+ configure classifier and enable it as an ACL as follows:
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node ip6-lookup table-index 0
+ match l3 ip6 dst db06::06 opaque-index 100
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+- To update and verify the proof of transit, POT profile list should be
+ configured.
+
+ - Example POT profile list configured as follows:
+
+ vpp# set pot profile name example id 0 validate-key
+ 0x7fff0000fa88465d prime-number 0x7fff0000fa884685 secret_share
+ 0x7a08fbfc5b93116d lpc 0x3 polynomial2 0x3ff738597ce bits-in-random
+ 63
+
+Operational data
+----------------
+
+Following CLIs are available to check iOAM operation: - To check iOAM
+configuration that are effective use “show ioam summary”
+
+Example:
+
+::
+
+ vpp# show ioam summary
+ REWRITE FLOW CONFIGS - Not configured
+ HOP BY HOP OPTIONS - TRACE CONFIG -
+ Trace Type : 0x1f (31)
+ Trace timestamp precision : 1 (Milliseconds)
+ Num of trace nodes : 4
+ Node-id : 0x2 (2)
+ App Data : 0x1234 (4660)
+ POT OPTION - 1 (Enabled)
+ Try 'show ioam pot and show pot profile' for more information
+
+- To find statistics about packets for which iOAM options were added
+ (encapsulating node) and removed (decapsulating node) execute *show
+ errors*
+
+Example on encapsulating node:
+
+::
+
+ vpp# show error
+ Count Node Reason
+ 1208804706 ip6-inacl input ACL hits
+ 1208804706 ip6-add-hop-by-hop Pkts w/ added ip6 hop-by-hop options
+
+Example on decapsulating node:
+
+::
+
+ vpp# show error
+ Count Node Reason
+ 69508569 ip6-inacl input ACL hits
+ 69508569 ip6-pop-hop-by-hop Pkts w/ removed ip6 hop-by-hop options
+
+- To check the POT profiles use “show pot profile”
+
+Example:
+
+::
+
+ vpp# show pot profile
+ Profile list in use : example
+ POT Profile at index: 0
+ ID : 0
+ Validator : False (0)
+ Secret share : 0x564cdbdec4eb625d (6218586935324795485)
+ Prime number : 0x7fff0000fa884685 (9223090566081300101)
+ 2nd polynomial(eval) : 0x23f3a227186a (39529304496234)
+ LPC : 0x1 (1)
+ Bit mask : 0x7fffffffffffffff (9223372036854775807)
+ Profile index in use: 0
+ Pkts passed : 0x36 (54)
+
+- To get statistics of POT for packets use “show ioam pot”
+
+Example at encapsulating or transit node:
+
+::
+
+ vpp# show ioam pot
+ Pkts with ip6 hop-by-hop POT options - 54
+ Pkts with ip6 hop-by-hop POT options but no profile set - 0
+ Pkts with POT in Policy - 0
+ Pkts with POT out of Policy - 0
+
+Example at decapsulating/verification node:
+
+::
+
+ vpp# show ioam pot
+ Pkts with ip6 hop-by-hop POT options - 54
+ Pkts with ip6 hop-by-hop POT options but no profile set - 0
+ Pkts with POT in Policy - 54
+ Pkts with POT out of Policy - 0
+
+- Tracing - enable trace of IPv6 packets to view the data inserted and
+ collected.
+
+Example when the nodes are receiving data over a DPDK interface: Enable
+tracing using “trace add dpdk-input 20” and execute “show trace” to view
+the iOAM data collected:
+
+::
+
+ vpp# trace add dpdk-input 20
+
+ vpp# show trace
+
+ ------------------- Start of thread 0 vpp_main -------------------
+
+ Packet 1
+
+ 00:00:19:294697: dpdk-input
+ GigabitEthernetb/0/0 rx queue 0
+ buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0
+ PKT MBUF: port 0, nb_segs 1, pkt_len 214
+ buf_len 2176, data_len 214, ol_flags 0x0, data_off 128, phys_addr 0xe9a35a00
+ packet_type 0x0
+ IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 63, payload length 160
+ 00:00:19:294737: ethernet-input
+ IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55
+ 00:00:19:294753: ip6-input
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 63, payload length 160
+ 00:00:19:294757: ip6-lookup
+ fib 0 adj-idx 15 : indirect via db05::2 flow hash: 0x00000000
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 63, payload length 160
+ 00:00:19:294802: ip6-hop-by-hop
+ IP6_HOP_BY_HOP: next index 5 len 96 traced 96 Trace Type 0x1f , 1 elts left
+ [0] ttl 0x0 node ID 0x0 ingress 0x0 egress 0x0 ts 0x0
+ app 0x0
+ [1] ttl 0x3e node ID 0x3 ingress 0x1 egress 0x2 ts 0xb68c2213
+ app 0x1234
+ [2] ttl 0x3f node ID 0x2 ingress 0x1 egress 0x2 ts 0xb68c2204
+ app 0x1234
+ [3] ttl 0x40 node ID 0x1 ingress 0x5 egress 0x6 ts 0xb68c2200
+ app 0x1234
+ POT opt present
+ random = 0x577a916946071950, Cumulative = 0x10b46e78a35a392d, Index = 0x0
+ 00:00:19:294810: ip6-rewrite
+ tx_sw_if_index 1 adj-idx 14 : GigabitEthernetb/0/0
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72 flow hash: 0x00000000
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 62, payload length 160
+ 00:00:19:294814: GigabitEthernetb/0/0-output
+ GigabitEthernetb/0/0
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 62, payload length 160
+ 00:00:19:294820: GigabitEthernetb/0/0-tx
+ GigabitEthernetb/0/0 tx queue 0
+ buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
+
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+
+ tos 0x00, flow label 0x0, hop limit 62, payload length 160
diff --git a/src/plugins/ioam/ip6/ioam_cache.h b/src/plugins/ioam/ip6/ioam_cache.h
index b85172e8ac1..8c1b6291707 100644
--- a/src/plugins/ioam/ip6/ioam_cache.h
+++ b/src/plugins/ioam/ip6/ioam_cache.h
@@ -608,17 +608,20 @@ ioam_cache_ts_table_destroy (vlib_main_t * vm)
int i;
/* free pool and hash table */
- for (i = 0; i < no_of_threads; i++)
+ if (cm->ioam_ts_pool)
{
- pool_foreach (entry, cm->ioam_ts_pool[i])
- {
- ioam_cache_ts_entry_free (i, entry, cm->error_node_index);
- }
- pool_free (cm->ioam_ts_pool[i]);
- cm->ioam_ts_pool = 0;
- tw_timer_wheel_free_16t_2w_512sl (&cm->timer_wheels[i]);
+ for (i = 0; i < no_of_threads; i++)
+ {
+ pool_foreach (entry, cm->ioam_ts_pool[i])
+ {
+ ioam_cache_ts_entry_free (i, entry, cm->error_node_index);
+ }
+ pool_free (cm->ioam_ts_pool[i]);
+ cm->ioam_ts_pool[i] = 0;
+ tw_timer_wheel_free_16t_2w_512sl (&cm->timer_wheels[i]);
+ }
+ vec_free (cm->ioam_ts_pool);
}
- vec_free (cm->ioam_ts_pool);
return (0);
}
diff --git a/src/plugins/ioam/ip6/ioam_cache_node.c b/src/plugins/ioam/ip6/ioam_cache_node.c
index 6a5465b86aa..9859ee6fbf0 100644
--- a/src/plugins/ioam/ip6/ioam_cache_node.c
+++ b/src/plugins/ioam/ip6/ioam_cache_node.c
@@ -179,7 +179,6 @@ ip6_ioam_cache_node_fn (vlib_main_t * vm,
/*
* Node for IP6 iOAM header cache
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ioam_cache_node) =
{
.function = ip6_ioam_cache_node_fn,
@@ -196,7 +195,6 @@ VLIB_REGISTER_NODE (ioam_cache_node) =
[IOAM_CACHE_NEXT_POP_HBYH] = "ip6-pop-hop-by-hop"
},
};
-/* *INDENT-ON* */
typedef struct
{
@@ -386,7 +384,6 @@ VLIB_NODE_FN (ip6_add_from_cache_hbh_node) (vlib_main_t * vm,
processed);
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_add_from_cache_hbh_node) =
{
.name = "ip6-add-from-cache-hop-by-hop",
@@ -404,7 +401,6 @@ VLIB_REGISTER_NODE (ip6_add_from_cache_hbh_node) =
#undef _
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
index d2c7f20a778..61476ebd85c 100644
--- a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
+++ b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
@@ -235,7 +235,6 @@ ip6_ioam_cache_ts_node_fn (vlib_main_t * vm,
/*
* Node for IP6 iOAM header cache
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ioam_cache_ts_node) =
{
.function = ip6_ioam_cache_ts_node_fn,
@@ -253,7 +252,6 @@ VLIB_REGISTER_NODE (ioam_cache_ts_node) =
[IOAM_CACHE_TS_ERROR_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
typedef struct
{
@@ -623,7 +621,6 @@ VLIB_NODE_FN (ip6_reset_ts_hbh_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_reset_ts_hbh_node) =
{
.name = "ip6-add-syn-hop-by-hop",
@@ -642,7 +639,6 @@ VLIB_REGISTER_NODE (ip6_reset_ts_hbh_node) =
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
vlib_node_registration_t ioam_cache_ts_timer_tick_node;
@@ -738,7 +734,6 @@ ioam_cache_ts_timer_tick_node_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ioam_cache_ts_timer_tick_node) = {
.function = ioam_cache_ts_timer_tick_node_fn,
.name = "ioam-cache-ts-timer-tick",
@@ -757,7 +752,6 @@ VLIB_REGISTER_NODE (ioam_cache_ts_timer_tick_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/ipfixcollector/node.c b/src/plugins/ioam/ipfixcollector/node.c
index 098029d6ad6..73d7b57fab5 100644
--- a/src/plugins/ioam/ipfixcollector/node.c
+++ b/src/plugins/ioam/ipfixcollector/node.c
@@ -271,7 +271,6 @@ ipfix_collector_node_fn (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipfix_collector_node) = {
.function = ipfix_collector_node_fn,
.name = "ipfix-collector",
@@ -289,7 +288,6 @@ VLIB_REGISTER_NODE (ipfix_collector_node) = {
[IPFIX_COLLECTOR_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/lib-e2e/e2e_util.h b/src/plugins/ioam/lib-e2e/e2e_util.h
index f8a4ebd4797..a72b4030b2c 100644
--- a/src/plugins/ioam/lib-e2e/e2e_util.h
+++ b/src/plugins/ioam/lib-e2e/e2e_util.h
@@ -18,13 +18,11 @@
#include <ioam/lib-e2e/ioam_seqno_lib.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
u8 e2e_type;
u8 reserved;
u32 e2e_data;
}) ioam_e2e_packet_t;
-/* *INDENT-ON* */
#endif /* PLUGINS_IOAM_PLUGIN_IOAM_LIB_E2E_E2E_UTIL_H_ */
diff --git a/src/plugins/ioam/lib-pot/math64.h b/src/plugins/ioam/lib-pot/math64.h
index 4c608a37de4..2084c25fa58 100644
--- a/src/plugins/ioam/lib-pot/math64.h
+++ b/src/plugins/ioam/lib-pot/math64.h
@@ -51,23 +51,23 @@ static inline void mul64by64(u64 a, u64 b, u64 * hi, u64 * lo)
static inline u64 mod128by64(u64 x, u64 y, u64 m, double di)
{
- u64 q1, q2, q;
- u64 p1, p0;
- double dq;
+ u64 q1, q2;
+ u64 p1, p0;
+ double dq;
- /* calculate quotient first pass 53 bits */
- dq = (TWO64 * (double)x + (double)y) * di;
+ /* calculate quotient first pass 53 bits */
+ dq = (TWO64 * (double) x + (double) y) * di;
- if (dq >= TWO64)
- q1 = 0xfffffffffffff800L;
- else
- q1 = dq;
+ if (dq >= TWO64)
+ q1 = 0xfffffffffffff800L;
+ else
+ q1 = dq;
- /* q1 * m to compare the product to the dividend. */
- mul64by64(q1, m, &p1, &p0);
+ /* q1 * m to compare the product to the dividend. */
+ mul64by64 (q1, m, &p1, &p0);
- /* Adjust quotient. is it > actual result: */
- if (x < p1 || (x == p1 && y < p0))
+ /* Adjust quotient. is it > actual result: */
+ if (x < p1 || (x == p1 && y < p0))
{
/* q1 > quotient. calculate abs remainder */
x = p1 - (x + (p0 < y));
@@ -77,7 +77,6 @@ static inline u64 mod128by64(u64 x, u64 y, u64 m, double di)
q2 = (u64) ((TWO64 * (double)x + (double)y) * di);
mul64by64(q2, m, &p1, &p0);
- q = q1 - q2;
if (x < p1 || (x == p1 && y <= p0))
{
y = p0 - y;
@@ -86,7 +85,6 @@ static inline u64 mod128by64(u64 x, u64 y, u64 m, double di)
{
y = p0 - y;
y += m;
- q--;
}
}
else
@@ -97,12 +95,10 @@ static inline u64 mod128by64(u64 x, u64 y, u64 m, double di)
q2 = (u64) ((TWO64 * (double)x + (double)y) * di);
mul64by64(q2, m, &p1, &p0);
- q = q1 + q2;
if (x < p1 || (x == p1 && y < p0))
{
y = y - p0;
y += m;
- q--;
}
else
{
@@ -110,7 +106,6 @@ static inline u64 mod128by64(u64 x, u64 y, u64 m, double di)
if (y >= m)
{
y -= m;
- q++;
}
}
}
diff --git a/src/plugins/ioam/lib-trace/trace_util.c b/src/plugins/ioam/lib-trace/trace_util.c
index d935543cf23..31fbb2b7446 100644
--- a/src/plugins/ioam/lib-trace/trace_util.c
+++ b/src/plugins/ioam/lib-trace/trace_util.c
@@ -98,14 +98,12 @@ clear_trace_profiles (void)
clear_trace_profile_command_fn (0, 0, 0);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(clear_trace_profile_command) =
{
.path = "clear ioam-trace profile",
.short_help = "clear ioam-trace profile [<index>|all]",
.function = clear_trace_profile_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
set_trace_profile_command_fn (vlib_main_t * vm,
@@ -137,7 +135,6 @@ set_trace_profile_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_trace_profile_command, static) =
{
.path = "set ioam-trace profile",
@@ -146,7 +143,6 @@ VLIB_CLI_COMMAND (set_trace_profile_command, static) =
node-id <node id in hex> app-data <app_data in hex>",
.function = set_trace_profile_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_trace_profile_command_fn (vlib_main_t * vm,
@@ -189,14 +185,12 @@ show_trace_profile_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_trace_profile_command, static) =
{
.path = "show ioam-trace profile",
.short_help = "show ioam-trace profile",
.function = show_trace_profile_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/lib-trace/trace_util.h b/src/plugins/ioam/lib-trace/trace_util.h
index 61f18d9173a..869ea717cf8 100644
--- a/src/plugins/ioam/lib-trace/trace_util.h
+++ b/src/plugins/ioam/lib-trace/trace_util.h
@@ -75,14 +75,12 @@ int trace_profile_create (trace_profile * profile, u8 trace_type, u8 num_elts,
void clear_trace_profiles (void);
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
u8 ioam_trace_type;
u8 data_list_elts_left;
u32 elts[0]; /* Variable type. So keep it generic */
}) ioam_trace_hdr_t;
-/* *INDENT-ON* */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c
index 87e57d3605e..801faa98066 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c
@@ -193,7 +193,6 @@ vxlan_gpe_decap_ioam_v4 (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan_gpe_decap_ioam_v4_node) = {
.function = vxlan_gpe_decap_ioam_v4,
.name = "vxlan-gpe-decap-ioam-v4",
@@ -211,7 +210,6 @@ VLIB_REGISTER_NODE (vxlan_gpe_decap_ioam_v4_node) = {
[VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c
index 1d15654464d..de375df4f7c 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c
@@ -164,7 +164,6 @@ vxlan_gpe_encap_ioam_v4 (vlib_main_t * vm,
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan_gpe_encap_ioam_v4_node) = {
.function = vxlan_gpe_encap_ioam_v4,
.name = "vxlan-gpe-encap-ioam-v4",
@@ -182,7 +181,6 @@ VLIB_REGISTER_NODE (vxlan_gpe_encap_ioam_v4_node) = {
[VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c
index 84900eb7e01..2fa0aa29450 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c
@@ -321,7 +321,6 @@ vxlan_gpe_pop_ioam_v4 (vlib_main_t * vm,
return vxlan_gpe_pop_ioam (vm, node, from_frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan_gpe_pop_ioam_v4_node) = {
.function = vxlan_gpe_pop_ioam_v4,
.name = "vxlan-gpe-pop-ioam-v4",
@@ -340,7 +339,6 @@ VLIB_REGISTER_NODE (vxlan_gpe_pop_ioam_v4_node) = {
#undef _
},
};
-/* *INDENT-ON* */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
index 215f14b74bc..e3c82725e26 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
@@ -51,14 +51,12 @@ typedef enum
} vxlan_gpe_transit_ioam_next_t;
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (vxlan_gpe_transit_ioam, static) =
{
.arc_name = "ip4-output",
.node_name = "vxlan-gpe-transit-ioam",
.runs_before = VNET_FEATURES ("interface-output"),
};
-/* *INDENT-ON* */
static uword
vxlan_gpe_transit_ioam (vlib_main_t * vm,
@@ -156,7 +154,6 @@ vxlan_gpe_transit_ioam (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan_gpe_transit_ioam_node) = {
.function = vxlan_gpe_transit_ioam,
.name = "vxlan-gpe-transit-ioam",
@@ -175,7 +172,6 @@ VLIB_REGISTER_NODE (vxlan_gpe_transit_ioam_node) = {
},
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c
index 9f6d181f0b1..d61832d975a 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c
@@ -27,6 +27,7 @@
#include <vlibmemory/api.h>
#include <vnet/format_fns.h>
#include <vnet/ip/ip_types_api.h>
+#include <vnet/udp/udp_local.h>
/* define message IDs */
#include <ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api_enum.h>
@@ -92,7 +93,7 @@ static void vl_api_vxlan_gpe_ioam_vni_enable_t_handler
clib_memcpy (&key4.remote, &mp->remote.un.ip4, sizeof (key4.remote));
vni = clib_net_to_host_u32 (mp->vni);
key4.vni = clib_host_to_net_u32 (vni << 8);
- key4.pad = 0;
+ key4.port = clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE);
p = hash_get_mem (gm->vxlan4_gpe_tunnel_by_key, &key4);
}
@@ -141,7 +142,7 @@ static void vl_api_vxlan_gpe_ioam_vni_disable_t_handler
clib_memcpy (&key4.remote, &mp->remote, sizeof (key4.remote));
vni = clib_net_to_host_u32 (mp->vni);
key4.vni = clib_host_to_net_u32 (vni << 8);
- key4.pad = 0;
+ key4.port = clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE);
p = hash_get_mem (gm->vxlan4_gpe_tunnel_by_key, &key4);
}
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c
index 108b0c0765b..327afc3fb61 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c
@@ -19,6 +19,7 @@
#include <vnet/dpo/load_balance.h>
#include <vnet/fib/ip4_fib.h>
#include <vnet/fib/fib_entry.h>
+#include <vnet/udp/udp_local.h>
vxlan_gpe_ioam_main_t vxlan_gpe_ioam_main;
@@ -478,7 +479,7 @@ vxlan_gpe_set_ioam_rewrite_command_fn (vlib_main_t *
key4.local = local.ip4.as_u32;
key4.remote = remote.ip4.as_u32;
key4.vni = clib_host_to_net_u32 (vni << 8);
- key4.pad = 0;
+ key4.port = clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE);
p = hash_get_mem (gm->vxlan4_gpe_tunnel_by_key, &key4);
}
else
@@ -488,6 +489,7 @@ vxlan_gpe_set_ioam_rewrite_command_fn (vlib_main_t *
key6.remote.as_u64[0] = remote.ip6.as_u64[0];
key6.remote.as_u64[1] = remote.ip6.as_u64[1];
key6.vni = clib_host_to_net_u32 (vni << 8);
+ key6.port = clib_host_to_net_u16 (UDP_DST_PORT_VXLAN6_GPE);
p = hash_get_mem (gm->vxlan6_gpe_tunnel_by_key, &key6);
}
@@ -508,13 +510,11 @@ vxlan_gpe_set_ioam_rewrite_command_fn (vlib_main_t *
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vxlan_gpe_set_ioam_rewrite_cmd, static) = {
.path = "set vxlan-gpe-ioam",
.short_help = "set vxlan-gpe-ioam vxlan <src-ip> <dst_ip> <vnid> [disable]",
.function = vxlan_gpe_set_ioam_rewrite_command_fn,
};
-/* *INDENT-ON* */
@@ -590,13 +590,11 @@ vxlan_gpe_set_ioam_flags_command_fn (vlib_main_t * vm,
return rv;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vxlan_gpe_set_ioam_flags_cmd, static) =
{
.path = "set vxlan-gpe-ioam rewrite",
.short_help = "set vxlan-gpe-ioam [trace] [pot] [ppc <encap|decap>]",
.function = vxlan_gpe_set_ioam_flags_command_fn,};
-/* *INDENT-ON* */
int vxlan_gpe_ioam_disable_for_dest
@@ -681,13 +679,11 @@ static clib_error_t *vxlan_gpe_set_ioam_transit_rewrite_command_fn
return rv;
}
- /* *INDENT-OFF* */
VLIB_CLI_COMMAND (vxlan_gpe_set_ioam_transit_rewrite_cmd, static) = {
.path = "set vxlan-gpe-ioam-transit",
.short_help = "set vxlan-gpe-ioam-transit dst-ip <dst_ip> [outer-fib-index <outer_fib_index>] [disable]",
.function = vxlan_gpe_set_ioam_transit_rewrite_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *clear_vxlan_gpe_ioam_rewrite_command_fn
(vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -695,14 +691,12 @@ clib_error_t *clear_vxlan_gpe_ioam_rewrite_command_fn
return (vxlan_gpe_ioam_disable (0, 0, 0));
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vxlan_gpe_clear_ioam_flags_cmd, static) =
{
.path = "clear vxlan-gpe-ioam rewrite",
.short_help = "clear vxlan-gpe-ioam rewrite",
.function = clear_vxlan_gpe_ioam_rewrite_command_fn,
};
-/* *INDENT-ON* */
/**
@@ -749,7 +743,8 @@ void
vxlan_gpe_ioam_interface_init (void)
{
vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
- hm->fib_entry_type = fib_node_register_new_type (&vxlan_gpe_ioam_vft);
+ hm->fib_entry_type =
+ fib_node_register_new_type ("vxlan-gpe", &vxlan_gpe_ioam_vft);
return;
}
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c
index 1a37059396c..9c783c747d0 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c
@@ -39,14 +39,12 @@ typedef union
} time_u64_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
vxlan_gpe_ioam_option_t hdr;
u8 ioam_trace_type;
u8 data_list_elts_left;
u32 elts[0]; /* Variable type. So keep it generic */
}) vxlan_gpe_ioam_trace_option_t;
-/* *INDENT-ON* */
#define foreach_vxlan_gpe_ioam_trace_stats \
@@ -422,13 +420,11 @@ vxlan_gpe_show_ioam_trace_cmd_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vxlan_gpe_show_ioam_trace_cmd, static) = {
.path = "show ioam vxlan-gpe trace",
.short_help = "iOAM trace statistics",
.function = vxlan_gpe_show_ioam_trace_cmd_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -459,13 +455,11 @@ vxlan_gpe_ioam_trace_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (vxlan_gpe_ioam_trace_init) =
{
.runs_after = VLIB_INITS("ip_main_init", "ip6_lookup_init",
"vxlan_gpe_init"),
};
-/* *INDENT-ON* */
int
diff --git a/src/plugins/ioam/udp-ping/udp_ping_export.c b/src/plugins/ioam/udp-ping/udp_ping_export.c
index 3e835989a6f..78d62233a7d 100644
--- a/src/plugins/ioam/udp-ping/udp_ping_export.c
+++ b/src/plugins/ioam/udp-ping/udp_ping_export.c
@@ -23,20 +23,18 @@
#define UDP_PING_EXPORT_RECORD_SIZE 400
static u8 *
-udp_ping_template_rewrite (flow_report_main_t * frm, flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address, u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+udp_ping_template_rewrite (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port, ipfix_report_element_t *elts,
+ u32 n_elts, u32 *stream_index)
{
- return ioam_template_rewrite (frm, fr, collector_address,
- src_address, collector_port, elts, n_elts,
+ return ioam_template_rewrite (exp, fr, collector_port, elts, n_elts,
stream_index);
}
static vlib_frame_t *
-udp_ping_send_flows (flow_report_main_t * frm, flow_report_t * fr,
- vlib_frame_t * f, u32 * to_next, u32 node_index)
+udp_ping_send_flows (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f, u32 *to_next,
+ u32 node_index)
{
vlib_buffer_t *b0 = NULL;
u32 next_offset = 0;
@@ -47,17 +45,16 @@ udp_ping_send_flows (flow_report_main_t * frm, flow_report_t * fr,
ipfix_set_header_t *s = NULL;
ip4_header_t *ip;
udp_header_t *udp;
- u32 records_this_buffer;
u16 new_l0, old_l0;
ip_csum_t sum0;
- vlib_main_t *vm = frm->vlib_main;
+ vlib_main_t *vm = vlib_get_main ();
flow_report_stream_t *stream;
udp_ping_flow_data *stats;
ip46_udp_ping_flow *ip46_flow;
u16 src_port, dst_port;
u16 data_len;
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
data_len = vec_len (udp_ping_main.ip46_flow);
for (i = 0; i < data_len; i++)
@@ -101,7 +98,6 @@ udp_ping_send_flows (flow_report_main_t * frm, flow_report_t * fr,
h->sequence_number =
clib_host_to_net_u32 (h->sequence_number);
next_offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp);
- records_this_buffer = 0;
}
next_offset = ioam_analyse_add_ipfix_record (fr,
@@ -117,10 +113,8 @@ udp_ping_send_flows (flow_report_main_t * frm, flow_report_t * fr,
//memcpy (b0->data + next_offset, &pak_sent, sizeof(u32));
//next_offset += sizeof(u32);
- records_this_buffer++;
-
/* Flush data if packet len is about to reach path mtu */
- if (next_offset > (frm->path_mtu - UDP_PING_EXPORT_RECORD_SIZE))
+ if (next_offset > (exp->path_mtu - UDP_PING_EXPORT_RECORD_SIZE))
{
b0->current_length = next_offset;
b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
@@ -230,7 +224,7 @@ udp_ping_flow_create (u8 del)
vnet_flow_report_add_del_args_t args;
int rv;
u32 domain_id = 0;
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &flow_report_main.exporters[0];
u16 template_id;
clib_memset (&args, 0, sizeof (args));
@@ -240,7 +234,7 @@ udp_ping_flow_create (u8 del)
args.domain_id = domain_id;
args.src_port = UDP_DST_PORT_ipfix;
- rv = vnet_flow_report_add_del (frm, &args, &template_id);
+ rv = vnet_flow_report_add_del (exp, &args, &template_id);
switch (rv)
{
@@ -281,13 +275,11 @@ set_udp_ping_export_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_udp_ping_export_command, static) = {
.path = "set udp-ping export-ipfix",
.short_help = "set udp-ping export-ipfix [disable]",
.function = set_udp_ping_export_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
udp_ping_flow_report_init (vlib_main_t * vm)
@@ -295,12 +287,10 @@ udp_ping_flow_report_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (udp_ping_flow_report_init) =
{
.runs_after = VLIB_INITS ("flow_report_init"),
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/ioam/udp-ping/udp_ping_node.c b/src/plugins/ioam/udp-ping/udp_ping_node.c
index 6bfa8f67ef5..fbc3b13971c 100644
--- a/src/plugins/ioam/udp-ping/udp_ping_node.c
+++ b/src/plugins/ioam/udp-ping/udp_ping_node.c
@@ -93,14 +93,12 @@ format_udp_ping_trace (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp_ping_node, static) =
{
.function = udp_ping_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "udp-ping-process",
};
-/* *INDENT-ON* */
void
udp_ping_calculate_timer_interval (void)
@@ -301,7 +299,6 @@ set_udp_ping_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_udp_ping_command, static) =
{
.path = "set udp-ping",
@@ -312,7 +309,6 @@ VLIB_CLI_COMMAND (set_udp_ping_command, static) =
[disable]",
.function = set_udp_ping_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_udp_ping_summary_cmd_fn (vlib_main_t * vm,
@@ -374,14 +370,12 @@ show_udp_ping_summary_cmd_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_udp_ping_cmd, static) =
{
.path = "show udp-ping summary",
.short_help = "Summary of udp-ping",
.function = show_udp_ping_summary_cmd_fn,
};
-/* *INDENT-ON* */
/**
* @brief UDP-Ping Process node.
@@ -790,7 +784,6 @@ udp_ping_local_node_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
/*
* Node for udp-ping-local
*/
@@ -814,7 +807,6 @@ VLIB_REGISTER_NODE (udp_ping_local, static) =
[UDP_PING_NEXT_IP6_DROP] = "ip6-drop",
},
};
-/* *INDENT-ON* */
static clib_error_t *
udp_ping_init (vlib_main_t * vm)
@@ -828,12 +820,10 @@ udp_ping_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (udp_ping_init) =
{
.runs_after = VLIB_INITS("ip_main_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ip_session_redirect/CMakeLists.txt b/src/plugins/ip_session_redirect/CMakeLists.txt
new file mode 100644
index 00000000000..09b93d72759
--- /dev/null
+++ b/src/plugins/ip_session_redirect/CMakeLists.txt
@@ -0,0 +1,27 @@
+# Copyright (c) 2021-2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(ip_session_redirect
+ SOURCES
+ api.c
+ redirect.c
+
+ API_FILES
+ ip_session_redirect.api
+
+ API_TEST_SOURCES
+ test_api.c
+
+ INSTALL_HEADERS
+ ip_session_redirect.h
+)
diff --git a/src/plugins/ip_session_redirect/FEATURE.yaml b/src/plugins/ip_session_redirect/FEATURE.yaml
new file mode 100644
index 00000000000..d5cca4673d6
--- /dev/null
+++ b/src/plugins/ip_session_redirect/FEATURE.yaml
@@ -0,0 +1,9 @@
+---
+name: IP session redirect
+maintainer: Benoît Ganne <bganne@cisco.com>
+features:
+ - use the classifier ACL infrastructure to redirect sessions via arbitrary
+ fib paths
+description: "IP session redirect plugin"
+state: experimental
+properties: [CLI, STATS, MULTITHREAD, API]
diff --git a/src/plugins/ip_session_redirect/api.c b/src/plugins/ip_session_redirect/api.c
new file mode 100644
index 00000000000..1d17d55b5b4
--- /dev/null
+++ b/src/plugins/ip_session_redirect/api.c
@@ -0,0 +1,124 @@
+/* Copyright (c) 2021-2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+
+#include <vlib/vlib.h>
+#include <vnet/fib/fib_api.h>
+#include <vnet/ip/ip_format_fns.h>
+#include <vlibmemory/api.h>
+#include <vlibapi/api.h>
+
+#define REPLY_MSG_ID_BASE vl_api_ip_sesion_redirect_msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+#include "ip_session_redirect.api_enum.h"
+#include "ip_session_redirect.api_types.h"
+
+#include "ip_session_redirect.h"
+
+static u16 vl_api_ip_sesion_redirect_msg_id_base;
+
+static int
+vl_api_ip_session_redirect_add (u32 table_index, u32 opaque_index,
+ vl_api_fib_path_nh_proto_t proto, int is_punt,
+ u8 *match, int match_len,
+ vl_api_fib_path_t *paths, int n_paths)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ fib_route_path_t *paths_ = 0;
+ dpo_proto_t proto_;
+ u8 *match_ = 0;
+ int rv = 0;
+
+ if (n_paths <= 0)
+ {
+ rv = VNET_API_ERROR_NO_PATHS_IN_ROUTE;
+ goto err0;
+ }
+
+ for (int i = 0; i < n_paths; i++)
+ {
+ fib_route_path_t path;
+ if ((rv = fib_api_path_decode (&paths[i], &path)))
+ goto err1;
+ vec_add1 (paths_, path);
+ }
+
+ if (~0 == proto)
+ proto_ = paths_[0].frp_proto;
+ else
+ fib_api_path_nh_proto_to_dpo (ntohl (proto), &proto_);
+
+ vec_add (match_, match, match_len);
+ rv = ip_session_redirect_add (vm, ntohl (table_index), ntohl (opaque_index),
+ proto_, is_punt, match_, paths_);
+ vec_free (match_);
+
+err1:
+ vec_free (paths_);
+err0:
+ return rv;
+}
+
+static void
+vl_api_ip_session_redirect_add_t_handler (vl_api_ip_session_redirect_add_t *mp)
+{
+ vl_api_ip_session_redirect_add_reply_t *rmp;
+ int rv = vl_api_ip_session_redirect_add (
+ mp->table_index, mp->opaque_index, ~0 /* proto */, mp->is_punt, mp->match,
+ mp->match_len, mp->paths, mp->n_paths);
+ REPLY_MACRO (VL_API_IP_SESSION_REDIRECT_ADD_REPLY)
+}
+
+static void
+vl_api_ip_session_redirect_add_v2_t_handler (
+ vl_api_ip_session_redirect_add_v2_t *mp)
+{
+ vl_api_ip_session_redirect_add_v2_reply_t *rmp;
+ int rv = vl_api_ip_session_redirect_add (
+ mp->table_index, mp->opaque_index, mp->proto, mp->is_punt, mp->match,
+ mp->match_len, mp->paths, mp->n_paths);
+ REPLY_MACRO (VL_API_IP_SESSION_REDIRECT_ADD_V2_REPLY)
+}
+
+static void
+vl_api_ip_session_redirect_del_t_handler (vl_api_ip_session_redirect_del_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_ip_session_redirect_del_reply_t *rmp;
+ u8 *match = 0;
+ int rv;
+
+ vec_add (match, mp->match, mp->match_len);
+ rv = ip_session_redirect_del (vm, ntohl (mp->table_index), match);
+ vec_free (match);
+
+ REPLY_MACRO (VL_API_IP_SESSION_REDIRECT_DEL_REPLY);
+}
+
+#include "ip_session_redirect.api.c"
+static clib_error_t *
+ip_session_redirect_plugin_api_hookup (vlib_main_t *vm)
+{
+ vl_api_ip_sesion_redirect_msg_id_base = setup_message_id_table ();
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (ip_session_redirect_plugin_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ip_session_redirect/ip_session_redirect.api b/src/plugins/ip_session_redirect/ip_session_redirect.api
new file mode 100644
index 00000000000..2bf2373dbd2
--- /dev/null
+++ b/src/plugins/ip_session_redirect/ip_session_redirect.api
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2021-2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "0.3.0";
+import "vnet/interface_types.api";
+import "vnet/fib/fib_types.api";
+
+/** \brief Add or update a session redirection
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param table_index - classifier table index
+ @param opaque_index - classifier session opaque index
+ @param match_len - classifier session match length in bytes (max is 80-bytes)
+ @param match - classifier session match
+ @param is_punt - true = punted traffic, false = forwarded traffic
+ @param n_paths - number of paths
+ @param paths - the paths of the redirect
+*/
+
+autoreply define ip_session_redirect_add
+{
+ option deprecated;
+ u32 client_index;
+ u32 context;
+
+ u32 table_index;
+ u8 match_len;
+ u8 match[80];
+ u32 opaque_index [default=0xffffffff];
+ bool is_punt;
+ u8 n_paths;
+ vl_api_fib_path_t paths[n_paths];
+
+ option vat_help = "table <index> match <match> via <path>";
+ option status="in_progress";
+};
+
+/** \brief Add or update a session redirection - version 2
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param table_index - classifier table index
+ @param opaque_index - classifier session opaque index
+ @param proto - protocol of forwarded packets (default autodetect from path nh)
+ @param is_punt - true = punted traffic, false = forwarded traffic
+ @param match_len - classifier session match length in bytes (max is 80-bytes)
+ @param match - classifier session match
+ @param n_paths - number of paths
+ @param paths - the paths of the redirect
+*/
+
+autoreply define ip_session_redirect_add_v2
+{
+ u32 client_index;
+ u32 context;
+
+ u32 table_index;
+ u32 opaque_index [default=0xffffffff];
+ vl_api_fib_path_nh_proto_t proto [default=0xffffffff];
+ bool is_punt;
+ u8 match_len;
+ u8 match[80];
+ u8 n_paths;
+ vl_api_fib_path_t paths[n_paths];
+
+ option vat_help = "table <index> match <match> via <path>";
+ option status="in_progress";
+};
+
+/** \brief Delete a session redirection
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param table_index - classifier table index
+ @param match_len - classifier session match length in bytes (max is 80-bytes)
+ @param match - classifier session match
+*/
+
+autoreply define ip_session_redirect_del
+{
+ u32 client_index;
+ u32 context;
+
+ u32 table_index;
+ u8 match_len;
+ u8 match[match_len];
+
+ option vat_help = "session-index <index> table <index> match <match>";
+ option status="in_progress";
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ip_session_redirect/ip_session_redirect.h b/src/plugins/ip_session_redirect/ip_session_redirect.h
new file mode 100644
index 00000000000..45f64eebba1
--- /dev/null
+++ b/src/plugins/ip_session_redirect/ip_session_redirect.h
@@ -0,0 +1,33 @@
+/* Copyright (c) 2021-2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+
+#ifndef IP_SESSION_REDIRECT_H_
+#define IP_SESSION_REDIRECT_H_
+
+#include <vnet/fib/fib_node.h>
+
+int ip_session_redirect_add (vlib_main_t *vm, u32 table_index,
+ u32 opaque_index, dpo_proto_t proto, int is_punt,
+ const u8 *match, const fib_route_path_t *rpaths);
+int ip_session_redirect_del (vlib_main_t *vm, u32 table_index,
+ const u8 *match);
+
+#endif /* IP_SESSION_REDIRECT_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ip_session_redirect/ip_session_redirect_doc.rst b/src/plugins/ip_session_redirect/ip_session_redirect_doc.rst
new file mode 100644
index 00000000000..aad87166f8f
--- /dev/null
+++ b/src/plugins/ip_session_redirect/ip_session_redirect_doc.rst
@@ -0,0 +1,42 @@
+IP session redirect
+===================
+
+This plugin allows to steer packet via different paths based on the
+classifier.
+It leverages the VPP classifier ACL infrastructure (classifier, in_out_acl
+etc), extending its capabilities to redirect traffic without having to
+resort on additional VRFs.
+It also allows to steer punted packets using the same mechanism.
+
+Maturity level
+--------------
+
+Under development: it should work, but has not been thoroughly tested.
+
+Features
+--------
+
+- steer regular or/and punt traffic using the classifier
+- API
+
+Quickstart
+----------
+
+1. configure punting
+
+::
+
+ ~# vppctl set punt ipv4 udp all
+
+2. create the classifier table and uses it for punt ACL
+
+::
+
+ ~# vppctl classify table miss-next drop mask l3 ip4 src l4 udp src_port buckets 100000
+ ~# vppctl set interface input acl intfc local0 ip4-punt-table 0
+
+3. add session to steer punted packets
+
+::
+
+ ~# vppctl ip session redirect table 0 match l3 ip4 src 10.10.10.10 l4 src_port 1234 via 10.10.0.10 pg1
diff --git a/src/plugins/ip_session_redirect/punt_redirect.vpp b/src/plugins/ip_session_redirect/punt_redirect.vpp
new file mode 100644
index 00000000000..e3594cd71d9
--- /dev/null
+++ b/src/plugins/ip_session_redirect/punt_redirect.vpp
@@ -0,0 +1,48 @@
+create packet-generator interface pg0
+set int ip addr pg0 10.10.10.1/24
+
+create packet-generator interface pg1
+set int ip addr pg1 10.10.0.1/24
+set ip neighbor pg1 10.10.0.10 4.5.6
+
+set punt ipv4 udp all
+
+classify table miss-next drop mask l3 ip4 src l4 udp src_port buckets 100000
+set interface input acl intfc local0 ip4-punt-table 0
+ip session redirect punt table 0 match l3 ip4 src 10.10.10.10 l4 src_port 1234 via 10.10.0.10 pg1
+
+set int st pg0 up
+set int st pg1 up
+
+comment { punt because of no udp listener for 53667, redirected }
+packet-generator new { \
+ name ok \
+ limit 1 \
+ node ethernet-input \
+ source pg0 \
+ size 100-100 \
+ data { \
+ IP4: 5.6.7 -> 2.3.4 \
+ UDP: 10.10.10.10 -> 10.10.10.1 \
+ UDP: 1234 -> 53667 \
+ incrementing 1 \
+ } \
+}
+
+comment { punt because of no udp listener for 53668, dropped }
+packet-generator new { \
+ name nok \
+ limit 1 \
+ node ethernet-input \
+ source pg0 \
+ size 100-100 \
+ data { \
+ IP4: 5.6.7 -> 2.3.4 \
+ UDP: 10.10.10.10 -> 10.10.10.1 \
+ UDP: 1235 -> 53668 \
+ incrementing 1 \
+ } \
+}
+
+trace add pg-input 10
+pa en
diff --git a/src/plugins/ip_session_redirect/redirect.c b/src/plugins/ip_session_redirect/redirect.c
new file mode 100644
index 00000000000..ea18182e309
--- /dev/null
+++ b/src/plugins/ip_session_redirect/redirect.c
@@ -0,0 +1,463 @@
+/* Copyright (c) 2021-2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+#include <vlib/vlib.h>
+#include <vnet/fib/fib_path_list.h>
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/classify/in_out_acl.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include "ip_session_redirect.h"
+
+typedef struct
+{
+ u8 *match_and_table_index;
+ dpo_id_t dpo; /* forwarding dpo */
+ fib_node_t node; /* linkage into the FIB graph */
+ fib_node_index_t pl;
+ u32 sibling;
+ u32 parent_node_index;
+ u32 opaque_index;
+ u32 table_index;
+ fib_forward_chain_type_t payload_type;
+ u8 is_punt : 1;
+ u8 is_ip6 : 1;
+} ip_session_redirect_t;
+
+typedef struct
+{
+ ip_session_redirect_t *pool;
+ u32 *session_by_match_and_table_index;
+ fib_node_type_t fib_node_type;
+} ip_session_redirect_main_t;
+
+static ip_session_redirect_main_t ip_session_redirect_main;
+
+static int
+ip_session_redirect_stack (ip_session_redirect_t *ipr)
+{
+ dpo_id_t dpo = DPO_INVALID;
+
+ fib_path_list_contribute_forwarding (ipr->pl, ipr->payload_type,
+ fib_path_list_is_popular (ipr->pl) ?
+ FIB_PATH_LIST_FWD_FLAG_NONE :
+ FIB_PATH_LIST_FWD_FLAG_COLLAPSE,
+ &dpo);
+ dpo_stack_from_node (ipr->parent_node_index, &ipr->dpo, &dpo);
+ dpo_reset (&dpo);
+
+ /* update session with new next_index */
+ return vnet_classify_add_del_session (
+ &vnet_classify_main, ipr->table_index, ipr->match_and_table_index,
+ ipr->dpo.dpoi_next_node /* hit_next_index */, ipr->opaque_index,
+ 0 /* advance */, CLASSIFY_ACTION_SET_METADATA,
+ ipr->dpo.dpoi_index /* metadata */, 1 /* is_add */);
+}
+
+static ip_session_redirect_t *
+ip_session_redirect_find (ip_session_redirect_main_t *im, u32 table_index,
+ const u8 *match)
+{
+ /* we are adding the table index at the end of the match string so we
+ * can disambiguiate identical matches in different tables in
+ * im->session_by_match_and_table_index */
+ u8 *match_and_table_index = vec_dup (match);
+ vec_add (match_and_table_index, (void *) &table_index, 4);
+ uword *p =
+ hash_get_mem (im->session_by_match_and_table_index, match_and_table_index);
+ vec_free (match_and_table_index);
+ if (!p)
+ return 0;
+ return pool_elt_at_index (im->pool, p[0]);
+}
+
+__clib_export int
+ip_session_redirect_add (vlib_main_t *vm, u32 table_index, u32 opaque_index,
+ dpo_proto_t proto, int is_punt, const u8 *match,
+ const fib_route_path_t *rpaths)
+{
+ ip_session_redirect_main_t *im = &ip_session_redirect_main;
+ fib_forward_chain_type_t payload_type;
+ ip_session_redirect_t *ipr;
+ const char *pname;
+
+ payload_type = fib_forw_chain_type_from_dpo_proto (proto);
+ switch (payload_type)
+ {
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ pname = is_punt ? "ip4-punt-acl" : "ip4-inacl";
+ break;
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ pname = is_punt ? "ip6-punt-acl" : "ip6-inacl";
+ break;
+ default:
+ return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
+ }
+
+ ipr = ip_session_redirect_find (im, table_index, match);
+ if (ipr)
+ {
+ /* update to an existing session */
+ fib_path_list_child_remove (ipr->pl, ipr->sibling);
+ dpo_reset (&ipr->dpo);
+ }
+ else
+ {
+ /* allocate a new entry */
+ pool_get (im->pool, ipr);
+ fib_node_init (&ipr->node, im->fib_node_type);
+ ipr->match_and_table_index = vec_dup ((u8 *) match);
+ /* we are adding the table index at the end of the match string so we
+ * can disambiguiate identical matches in different tables in
+ * im->session_by_match_and_table_index */
+ vec_add (ipr->match_and_table_index, (void *) &table_index, 4);
+ ipr->table_index = table_index;
+ hash_set_mem (im->session_by_match_and_table_index,
+ ipr->match_and_table_index, ipr - im->pool);
+ }
+
+ ipr->payload_type = payload_type;
+ ipr->pl = fib_path_list_create (
+ FIB_PATH_LIST_FLAG_SHARED | FIB_PATH_LIST_FLAG_NO_URPF, rpaths);
+ ipr->sibling =
+ fib_path_list_child_add (ipr->pl, im->fib_node_type, ipr - im->pool);
+ ipr->parent_node_index = vlib_get_node_by_name (vm, (u8 *) pname)->index;
+ ipr->opaque_index = opaque_index;
+ ipr->is_punt = is_punt;
+ ipr->is_ip6 = payload_type == FIB_FORW_CHAIN_TYPE_UNICAST_IP6;
+
+ return ip_session_redirect_stack (ipr);
+}
+
+__clib_export int
+ip_session_redirect_del (vlib_main_t *vm, u32 table_index, const u8 *match)
+{
+ ip_session_redirect_main_t *im = &ip_session_redirect_main;
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ ip_session_redirect_t *ipr;
+ int rv;
+
+ ipr = ip_session_redirect_find (im, table_index, match);
+ if (!ipr)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ rv = vnet_classify_add_del_session (
+ cm, ipr->table_index, ipr->match_and_table_index, 0 /* hit_next_index */,
+ 0 /* opaque_index */, 0 /* advance */, 0 /* action */, 0 /* metadata */,
+ 0 /* is_add */);
+ if (rv)
+ return rv;
+
+ hash_unset_mem (im->session_by_match_and_table_index,
+ ipr->match_and_table_index);
+ vec_free (ipr->match_and_table_index);
+ fib_path_list_child_remove (ipr->pl, ipr->sibling);
+ dpo_reset (&ipr->dpo);
+ pool_put (im->pool, ipr);
+ return 0;
+}
+
+static int
+ip_session_redirect_show_yield (vlib_main_t *vm, f64 *start)
+{
+ /* yields for 2 clock ticks every 1 tick to avoid blocking the main thread
+ * when dumping huge data structures */
+ f64 now = vlib_time_now (vm);
+ if (now - *start > 11e-6)
+ {
+ vlib_process_suspend (vm, 21e-6);
+ *start = vlib_time_now (vm);
+ return 1;
+ }
+
+ return 0;
+}
+
+static u8 *
+format_ip_session_redirect (u8 *s, va_list *args)
+{
+ const ip_session_redirect_main_t *im = &ip_session_redirect_main;
+ const ip_session_redirect_t *ipr =
+ va_arg (*args, const ip_session_redirect_t *);
+ index_t ipri = ipr - im->pool;
+ const char *type = ipr->is_punt ? "[punt]" : "[acl]";
+ const char *ip = ipr->is_ip6 ? "[ip6]" : "[ip4]";
+ s =
+ format (s, "[%u] %s %s table %d key %U opaque_index 0x%x\n", ipri, type,
+ ip, ipr->table_index, format_hex_bytes, ipr->match_and_table_index,
+ vec_len (ipr->match_and_table_index) - 4, ipr->opaque_index);
+ s = format (s, " via:\n");
+ s = format (s, " %U", format_fib_path_list, ipr->pl, 2);
+ s = format (s, " forwarding\n");
+ s = format (s, " %U", format_dpo_id, &ipr->dpo, 0);
+ return s;
+}
+
+static clib_error_t *
+ip_session_redirect_show_cmd (vlib_main_t *vm, unformat_input_t *main_input,
+ vlib_cli_command_t *cmd)
+{
+ ip_session_redirect_main_t *im = &ip_session_redirect_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ ip_session_redirect_t *ipr;
+ clib_error_t *error = 0;
+ u32 table_index = ~0;
+ int is_punt = -1;
+ int is_ip6 = -1;
+ u8 *match = 0;
+ int max = 50;
+ u8 *s = 0;
+
+ if (unformat_is_eof (main_input))
+ unformat_init (line_input, 0,
+ 0); /* support straight "sh ip session redirect" */
+ else if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "all"))
+ ;
+ else if (unformat (line_input, "punt"))
+ is_punt = 1;
+ else if (unformat (line_input, "acl"))
+ is_punt = 0;
+ else if (unformat (line_input, "ip4"))
+ is_ip6 = 0;
+ else if (unformat (line_input, "ip6"))
+ is_ip6 = 1;
+ else if (unformat (line_input, "table %u", &table_index))
+ ;
+ else if (unformat (line_input, "match %U", unformat_classify_match, cm,
+ &match, table_index))
+ ;
+ else if (unformat (line_input, "max %d", &max))
+ ;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto out;
+ }
+ }
+
+ if (match)
+ {
+ ipr = ip_session_redirect_find (im, table_index, match);
+ if (!ipr)
+ vlib_cli_output (vm, "none");
+ else
+ vlib_cli_output (vm, "%U", format_ip_session_redirect, ipr);
+ }
+ else
+ {
+ f64 start = vlib_time_now (vm);
+ ip_session_redirect_t *iprs = im->pool;
+ int n = 0;
+ pool_foreach (ipr, iprs)
+ {
+ if (n >= max)
+ {
+ n = -1; /* signal overflow */
+ break;
+ }
+ if ((~0 == table_index || ipr->table_index == table_index) &&
+ (-1 == is_punt || ipr->is_punt == is_punt) &&
+ (-1 == is_ip6 || ipr->is_ip6 == is_ip6))
+ {
+ s = format (s, "%U\n", format_ip_session_redirect, ipr);
+ n++;
+ }
+ if (ip_session_redirect_show_yield (vm, &start))
+ {
+ /* we must reload the pool as it might have moved */
+ u32 ii = ipr - iprs;
+ iprs = im->pool;
+ ipr = iprs + ii;
+ }
+ }
+ vec_add1 (s, 0);
+ vlib_cli_output (vm, (char *) s);
+ vec_free (s);
+ if (-1 == n)
+ {
+ vlib_cli_output (
+ vm,
+ "\nPlease note: only the first %d entries displayed. "
+ "To display more, specify max.",
+ max);
+ }
+ }
+
+out:
+ vec_free (match);
+ unformat_free (line_input);
+ return error;
+}
+
+VLIB_CLI_COMMAND (ip_session_redirect_show_command, static) = {
+ .path = "show ip session redirect",
+ .function = ip_session_redirect_show_cmd,
+ .short_help = "show ip session redirect [all|[table <table-index>] "
+ "[punt|acl] [ip4|ip6] [match]]",
+};
+
+static clib_error_t *
+ip_session_redirect_cmd (vlib_main_t *vm, unformat_input_t *main_input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ dpo_proto_t proto = DPO_PROTO_IP4;
+ fib_route_path_t *rpaths = 0, rpath;
+ clib_error_t *error = 0;
+ u32 opaque_index = ~0;
+ u32 table_index = ~0;
+ int is_punt = 0;
+ int is_add = 1;
+ u8 *match = 0;
+ int rv;
+
+ if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "punt"))
+ is_punt = 1;
+ else if (unformat (line_input, "table %u", &table_index))
+ ;
+ else if (unformat (line_input, "opaque-index %u", &opaque_index))
+ ;
+ else if (unformat (line_input, "match %U", unformat_classify_match, cm,
+ &match, table_index))
+ ;
+ else if (unformat (line_input, "via %U", unformat_fib_route_path, &rpath,
+ &proto))
+ vec_add1 (rpaths, rpath);
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto out;
+ }
+ }
+
+ if (~0 == table_index || 0 == match)
+ {
+ error = clib_error_create ("missing table index or match");
+ goto out;
+ }
+
+ if (is_add)
+ {
+ if (0 == rpaths)
+ {
+ error = clib_error_create ("missing path");
+ goto out;
+ }
+ rv = ip_session_redirect_add (vm, table_index, opaque_index, proto,
+ is_punt, match, rpaths);
+ }
+ else
+ {
+ rv = ip_session_redirect_del (vm, table_index, match);
+ }
+
+ if (rv)
+ error = clib_error_create ("failed with error %d", rv);
+
+out:
+ vec_free (rpaths);
+ vec_free (match);
+ unformat_free (line_input);
+ return error;
+}
+
+VLIB_CLI_COMMAND (ip_session_redirect_command, static) = {
+ .path = "ip session redirect",
+ .function = ip_session_redirect_cmd,
+ .short_help = "ip session redirect [add] [punt] table <index> match <match> "
+ "via <path> | del table <index> match <match>"
+};
+
+static fib_node_t *
+ip_session_redirect_get_node (fib_node_index_t index)
+{
+ ip_session_redirect_main_t *im = &ip_session_redirect_main;
+ ip_session_redirect_t *ipr = pool_elt_at_index (im->pool, index);
+ return &ipr->node;
+}
+
+static ip_session_redirect_t *
+ip_session_redirect_get_from_node (fib_node_t *node)
+{
+ return (
+ ip_session_redirect_t *) (((char *) node) -
+ STRUCT_OFFSET_OF (ip_session_redirect_t, node));
+}
+
+static void
+ip_session_redirect_last_lock_gone (fib_node_t *node)
+{
+ /* the lifetime of the entry is managed by the table. */
+ ASSERT (0);
+}
+
+/* A back walk has reached this entry */
+static fib_node_back_walk_rc_t
+ip_session_redirect_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ int rv;
+ ip_session_redirect_t *ipr = ip_session_redirect_get_from_node (node);
+ rv = ip_session_redirect_stack (ipr);
+ ASSERT (0 == rv);
+ if (rv)
+ clib_warning ("ip_session_redirect_stack() error %d", rv);
+ return FIB_NODE_BACK_WALK_CONTINUE;
+}
+
+static const fib_node_vft_t ip_session_redirect_vft = {
+ .fnv_get = ip_session_redirect_get_node,
+ .fnv_last_lock = ip_session_redirect_last_lock_gone,
+ .fnv_back_walk = ip_session_redirect_back_walk_notify,
+};
+
+static clib_error_t *
+ip_session_redirect_init (vlib_main_t *vm)
+{
+ ip_session_redirect_main_t *im = &ip_session_redirect_main;
+ im->session_by_match_and_table_index =
+ hash_create_vec (0, sizeof (u8), sizeof (u32));
+ im->fib_node_type = fib_node_register_new_type ("ip-session-redirect",
+ &ip_session_redirect_vft);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip_session_redirect_init);
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "IP session redirect",
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ip_session_redirect/test_api.c b/src/plugins/ip_session_redirect/test_api.c
new file mode 100644
index 00000000000..e4026a673ff
--- /dev/null
+++ b/src/plugins/ip_session_redirect/test_api.c
@@ -0,0 +1,195 @@
+/* Copyright (c) 2021-2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+
+#include <vlib/vlib.h>
+#include <vnet/fib/fib_api.h>
+#include <vnet/ip/ip_format_fns.h>
+#include <vnet/classify/vnet_classify.h>
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#define __plugin_msg_base ip_session_redirect_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+/* declare message IDs */
+#include "ip_session_redirect.api_enum.h"
+#include "ip_session_redirect.api_types.h"
+#include "ip_session_redirect.h"
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} ip_session_redirect_test_main_t;
+
+ip_session_redirect_test_main_t ip_session_redirect_test_main;
+
+static int
+api_ip_session_redirect_add_parse (vat_main_t *vam, u32 *table_index,
+ u32 *opaque_index, dpo_proto_t *proto,
+ int *is_punt, u8 **match,
+ fib_route_path_t **paths)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ fib_route_path_t path;
+
+ *table_index = ~0;
+ *opaque_index = ~0;
+ *proto = DPO_PROTO_IP4;
+ *is_punt = 0;
+ *match = 0;
+ *paths = 0;
+
+ while (unformat_check_input (vam->input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (vam->input, "punt"))
+ *is_punt = 1;
+ else if (unformat (vam->input, "table %u", table_index))
+ ;
+ else if (unformat (vam->input, "opaque-index %u", opaque_index))
+ ;
+ else if (unformat (vam->input, "match %U", unformat_classify_match, cm,
+ match, *table_index))
+ ;
+ else if (unformat (vam->input, "via %U", unformat_fib_route_path, &path,
+ proto))
+ vec_add1 (*paths, path);
+ else
+ {
+ clib_warning ("unknown input `%U'", format_unformat_error,
+ vam->input);
+ return -99;
+ }
+ }
+
+ return 0;
+}
+
+static int
+api_ip_session_redirect_add (vat_main_t *vam)
+{
+ vl_api_ip_session_redirect_add_t *mp;
+ fib_route_path_t *paths;
+ dpo_proto_t proto;
+ u32 opaque_index;
+ u32 table_index;
+ int is_punt;
+ int ret, i;
+ u8 *match;
+
+ ret = api_ip_session_redirect_add_parse (vam, &table_index, &opaque_index,
+ &proto, &is_punt, &match, &paths);
+ if (ret)
+ goto err;
+
+ M2 (IP_SESSION_REDIRECT_ADD, mp, vec_len (paths) * sizeof (mp->paths[0]));
+
+ mp->table_index = htonl (table_index);
+ mp->opaque_index = htonl (opaque_index);
+ mp->is_punt = is_punt;
+ memcpy_s (mp->match, sizeof (mp->match), match, vec_len (match));
+ mp->n_paths = vec_len (paths);
+ vec_foreach_index (i, paths)
+ fib_api_path_encode (&paths[i], &mp->paths[i]);
+
+ S (mp);
+ W (ret);
+
+err:
+ vec_free (match);
+ vec_free (paths);
+ return ret;
+}
+
+static int
+api_ip_session_redirect_add_v2 (vat_main_t *vam)
+{
+ vl_api_ip_session_redirect_add_v2_t *mp;
+ fib_route_path_t *paths;
+ dpo_proto_t proto;
+ u32 opaque_index;
+ u32 table_index;
+ int is_punt;
+ int ret, i;
+ u8 *match;
+
+ ret = api_ip_session_redirect_add_parse (vam, &table_index, &opaque_index,
+ &proto, &is_punt, &match, &paths);
+ if (ret)
+ goto err;
+
+ M2 (IP_SESSION_REDIRECT_ADD_V2, mp, vec_len (paths) * sizeof (mp->paths[0]));
+
+ mp->table_index = htonl (table_index);
+ mp->opaque_index = htonl (opaque_index);
+ mp->proto = fib_api_path_dpo_proto_to_nh (proto);
+ mp->is_punt = is_punt;
+ memcpy_s (mp->match, sizeof (mp->match), match, vec_len (match));
+ mp->n_paths = vec_len (paths);
+ vec_foreach_index (i, paths)
+ fib_api_path_encode (&paths[i], &mp->paths[i]);
+
+ S (mp);
+ W (ret);
+
+err:
+ vec_free (match);
+ vec_free (paths);
+ return ret;
+}
+
+static int
+api_ip_session_redirect_del (vat_main_t *vam)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ vl_api_ip_session_redirect_del_t *mp;
+ u32 table_index = ~0;
+ u8 *match = 0;
+ int ret;
+
+ while (unformat_check_input (vam->input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (vam->input, "table %u", &table_index))
+ ;
+ else if (unformat (vam->input, "match %U", unformat_classify_match, cm,
+ &match, table_index))
+ ;
+ else
+ {
+ clib_warning ("unknown input '%U'", format_unformat_error,
+ vam->input);
+ return -99;
+ }
+ }
+
+ M2 (IP_SESSION_REDIRECT_DEL, mp, vec_len (match));
+
+ mp->table_index = htonl (table_index);
+ mp->match_len = htonl (vec_len (match));
+ clib_memcpy (mp->match, match, vec_len (match));
+
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
+#include "ip_session_redirect.api_test.c"
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/l2e/l2e.c b/src/plugins/l2e/l2e.c
deleted file mode 100644
index 4c6eac50446..00000000000
--- a/src/plugins/l2e/l2e.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * l2e.c : Extract L3 packets from the L2 input and feed
- * them into the L3 path.
- *
- * Copyright (c) 2013 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/l2e/l2e.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/feat_bitmap.h>
-#include <vnet/ip/ip.h>
-
-l2_emulation_main_t l2_emulation_main;
-
-/**
- * A zero'd out struct we can use in the vec_validate
- */
-static const l2_emulation_t ezero = { };
-
-__clib_export void
-l2_emulation_enable (u32 sw_if_index)
-{
- l2_emulation_main_t *em = &l2_emulation_main;
- vec_validate_init_empty (em->l2_emulations, sw_if_index, ezero);
-
- l2_emulation_t *l23e = &em->l2_emulations[sw_if_index];
-
- l23e->enabled = 1;
-
- /*
- * L3 enable the interface - using IP unnumbered from the control
- * plane may not be possible since there may be no BVI interface
- * to which to unnumber
- */
- ip4_sw_interface_enable_disable (sw_if_index, 1);
- ip6_sw_interface_enable_disable (sw_if_index, 1);
-
- l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_L2_EMULATION, 1);
-}
-
-
-__clib_export void
-l2_emulation_disable (u32 sw_if_index)
-{
- l2_emulation_main_t *em = &l2_emulation_main;
- if (vec_len (em->l2_emulations) >= sw_if_index)
- {
- l2_emulation_t *l23e = &em->l2_emulations[sw_if_index];
- clib_memset (l23e, 0, sizeof (*l23e));
-
- l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_L2_EMULATION, 0);
- ip4_sw_interface_enable_disable (sw_if_index, 0);
- ip6_sw_interface_enable_disable (sw_if_index, 0);
- }
-}
-
-static clib_error_t *
-l2_emulation_interface_add_del (vnet_main_t * vnm,
- u32 sw_if_index, u32 is_add)
-{
- l2_emulation_main_t *em = &l2_emulation_main;
- if (is_add)
- {
- vec_validate_init_empty (em->l2_emulations, sw_if_index, ezero);
- }
-
- return (NULL);
-}
-
-VNET_SW_INTERFACE_ADD_DEL_FUNCTION (l2_emulation_interface_add_del);
-
-static clib_error_t *
-l2_emulation_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vnet_main_t *vnm = vnet_get_main ();
- u32 sw_if_index = ~0;
- u8 enable = 1;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "%U", unformat_vnet_sw_interface,
- vnm, &sw_if_index))
- ;
- else if (unformat (input, "enable"))
- enable = 1;
- else if (unformat (input, "disable"))
- enable = 0;
- else
- break;
- }
-
- if (~0 == sw_if_index)
- return clib_error_return (0, "interface must be specified");
-
- if (enable)
- l2_emulation_enable (sw_if_index);
- else
- l2_emulation_disable (sw_if_index);
-
- return (NULL);
-}
-
-/*?
- * Configure l2 emulation.
- * When the interface is in L2 mode, configure the extraction of L3
- * packets out of the L2 path and into the L3 path.
- *
- * @cliexpar
- * @cliexstart{set interface l2 input l2-emulation <interface-name> [disable]}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (l2_emulation_cli_node, static) = {
- .path = "set interface l2 l2-emulation",
- .short_help =
- "set interface l2 l2-emulation <interface-name> [disable|enable]\n",
- .function = l2_emulation_cli,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-l2_emulation_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- l2_emulation_main_t *em = &l2_emulation_main;
- vnet_main_t *vnm = vnet_get_main ();
- l2_emulation_t *l23e;
- u32 sw_if_index;
-
- vec_foreach_index (sw_if_index, em->l2_emulations)
- {
- l23e = &em->l2_emulations[sw_if_index];
- if (l23e->enabled)
- {
- vlib_cli_output (vm, "%U\n",
- format_vnet_sw_if_index_name, vnm, sw_if_index);
- }
- }
- return (NULL);
-}
-
-/*?
- * Show l2 emulation.
- * When the interface is in L2 mode, configure the extraction of L3
- * packets out of the L2 path and into the L3 path.
- *
- * @cliexpar
- * @cliexstart{show interface l2 l2-emulation}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (l2_emulation_show_node, static) = {
- .path = "show interface l2 l2-emulation",
- .short_help = "show interface l2 l2-emulation\n",
- .function = l2_emulation_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-l2_emulation_init (vlib_main_t * vm)
-{
- l2_emulation_main_t *em = &l2_emulation_main;
- vlib_node_t *node;
-
- node = vlib_get_node_by_name (vm, (u8 *) "l2-emulation");
- em->l2_emulation_node_index = node->index;
-
- /* Initialize the feature next-node indexes */
- feat_bitmap_init_next_nodes (vm,
- em->l2_emulation_node_index,
- L2INPUT_N_FEAT,
- l2input_get_feat_names (),
- em->l2_input_feat_next);
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (l2_emulation_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/l2e/l2e.h b/src/plugins/l2e/l2e.h
deleted file mode 100644
index e548d333f9d..00000000000
--- a/src/plugins/l2e/l2e.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2013 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef included_vnet_l2_emulation_h
-#define included_vnet_l2_emulation_h
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-
-/**
- * Per-interface L2 configuration
- */
-typedef struct l2_emulation_t_
-{
- /**
- * Enabled or Disabled.
- * this is required since one L3 protocl can be enabled, but others not
- */
- u8 enabled;
-} l2_emulation_t;
-
-/**
- * per-packet trace data
- */
-typedef struct l2_emulation_trace_t_
-{
- /* per-pkt trace data */
- u8 extracted;
-} l2_emulation_trace_t;
-
-/**
- * Grouping of global data for the L2 emulation feature
- */
-typedef struct l2_emulation_main_t_
-{
- u16 msg_id_base;
-
- u32 l2_emulation_node_index;
-
- /**
- * Per-interface vector of emulation configs
- */
- l2_emulation_t *l2_emulations;
-
- /**
- * Next nodes for L2 output features
- */
- u32 l2_input_feat_next[32];
-} l2_emulation_main_t;
-
-/**
- * L2 Emulation is a feautre that is applied to L2 ports to 'extract'
- * IP packets from the L2 path and inject them into the L3 path (i.e.
- * into the appropriate ip[4|6]_input node).
- * L3 routes in the table_id for that interface should then be configured
- * as DVR routes, therefore the forwarded packet has the L2 header
- * preserved and togehter the L3 routed system behaves like an L2 bridge.
- */
-extern void l2_emulation_enable (u32 sw_if_index);
-extern void l2_emulation_disable (u32 sw_if_index);
-
-extern l2_emulation_main_t l2_emulation_main;
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/l2e/l2e_api.c b/src/plugins/l2e/l2e_api.c
deleted file mode 100644
index fe2fb7ee06e..00000000000
--- a/src/plugins/l2e/l2e_api.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- *------------------------------------------------------------------
- * l2e_api.c - layer 2 emulation api
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vnet/plugin/plugin.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vpp/app/version.h>
-
-#include <l2e/l2e.h>
-
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
-
-/* define message IDs */
-#include <l2e/l2e.api_enum.h>
-#include <l2e/l2e.api_types.h>
-
-#include <vlibapi/api_helper_macros.h>
-
-#define L2E_MSG_BASE l2em->msg_id_base
-
-static void
-vl_api_l2_emulation_t_handler (vl_api_l2_emulation_t * mp)
-{
- l2_emulation_main_t *l2em = &l2_emulation_main;
- vl_api_l2_emulation_reply_t *rmp;
- int rv = 0;
-
- VALIDATE_SW_IF_INDEX (mp);
-
- u32 sw_if_index = ntohl (mp->sw_if_index);
-
- if (mp->enable)
- l2_emulation_enable (sw_if_index);
- else
- l2_emulation_disable (sw_if_index);
-
- BAD_SW_IF_INDEX_LABEL;
-
- REPLY_MACRO (VL_API_L2_EMULATION_REPLY + L2E_MSG_BASE);
-}
-
-#include <l2e/l2e.api.c>
-static clib_error_t *
-l2e_init (vlib_main_t * vm)
-{
- l2_emulation_main_t *l2em = &l2_emulation_main;
-
- /* Ask for a correctly-sized block of API message decode slots */
- l2em->msg_id_base = setup_message_id_table ();
-
- return (NULL);
-}
-
-VLIB_API_INIT_FUNCTION (l2e_init);
-
-/* *INDENT-OFF* */
-VLIB_PLUGIN_REGISTER () = {
- .version = VPP_BUILD_VER,
- .description = "Layer 2 (L2) Emulation",
-};
-/* *INDENT-ON* */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/l2e/l2e_node.c b/src/plugins/l2e/l2e_node.c
deleted file mode 100644
index 71c9b4bc6af..00000000000
--- a/src/plugins/l2e/l2e_node.c
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * l2e_node.c : l2 emulation node
- *
- * Copyright (c) 2019 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <plugins/l2e/l2e.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/feat_bitmap.h>
-
-#define foreach_l2_emulation \
- _(IP4, "Extract IPv4") \
- _(IP6, "Extract IPv6")
-
-typedef enum
-{
-#define _(sym,str) L2_EMULATION_ERROR_##sym,
- foreach_l2_emulation
-#undef _
- L2_EMULATION_N_ERROR,
-} l2_emulation_error_t;
-
-static char *l2_emulation_error_strings[] = {
-#define _(sym,string) string,
- foreach_l2_emulation
-#undef _
-};
-
-typedef enum
-{
-#define _(sym,str) L2_EMULATION_NEXT_##sym,
- foreach_l2_emulation
-#undef _
- L2_EMULATION_N_NEXT,
-} l2_emulation_next_t;
-
-/* packet trace format function */
-static u8 *
-format_l2_emulation_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- l2_emulation_trace_t *t = va_arg (*args, l2_emulation_trace_t *);
-
- s = format (s, "l2-emulation: %s", (t->extracted ? "yes" : "no"));
-
- return s;
-}
-
-VLIB_NODE_FN (l2_emulation_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- l2_emulation_main_t *em = &l2_emulation_main;
- u32 n_left_from, *from, *to_next;
- l2_emulation_next_t next_index;
- u32 ip4_hits = 0;
- u32 ip6_hits = 0;
-
- next_index = 0;
- n_left_from = frame->n_vectors;
- from = vlib_frame_vector_args (frame);
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- vlib_buffer_t *b0, *b1;
- u32 sw_if_index0, sw_if_index1;
- u16 ether_type0, ether_type1;
- u32 next0 = ~0, next1 = ~0;
- u8 l2_len0, l2_len1;
- u32 bi0, bi1;
- u8 *h0, *h1;
-
- bi0 = to_next[0] = from[0];
- bi1 = to_next[1] = from[1];
-
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
- l2_len0 = vnet_buffer (b0)->l2.l2_len;
- l2_len1 = vnet_buffer (b1)->l2.l2_len;
-
- h0 = vlib_buffer_get_current (b0);
- h1 = vlib_buffer_get_current (b1);
-
- ether_type0 = clib_net_to_host_u16 (*(u16 *) (h0 + l2_len0 - 2));
- ether_type1 = clib_net_to_host_u16 (*(u16 *) (h1 + l2_len1 - 2));
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
-
- /*
- * only extract unicast
- */
- if (PREDICT_TRUE (!(h0[0] & 0x1)))
- {
- switch (ether_type0)
- {
- case ETHERNET_TYPE_IP4:
- ASSERT (em->l2_emulations[sw_if_index0].enabled);
- ++ip4_hits;
- next0 = L2_EMULATION_NEXT_IP4;
- vlib_buffer_advance (b0, l2_len0);
- break;
- case ETHERNET_TYPE_IP6:
- ASSERT (em->l2_emulations[sw_if_index0].enabled);
- ++ip6_hits;
- next0 = L2_EMULATION_NEXT_IP6;
- vlib_buffer_advance (b0, l2_len0);
- default:
- break;
- }
- }
- if (PREDICT_TRUE (!(h1[0] & 0x1)))
- {
- switch (ether_type1)
- {
- case ETHERNET_TYPE_IP4:
- ASSERT (em->l2_emulations[sw_if_index1].enabled);
- ++ip4_hits;
- next1 = L2_EMULATION_NEXT_IP4;
- vlib_buffer_advance (b1, l2_len1);
- break;
- case ETHERNET_TYPE_IP6:
- ASSERT (em->l2_emulations[sw_if_index1].enabled);
- ++ip6_hits;
- next1 = L2_EMULATION_NEXT_IP6;
- vlib_buffer_advance (b1, l2_len1);
- default:
- break;
- }
- }
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- l2_emulation_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->extracted = (next0 != ~0);
- }
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b1->flags & VLIB_BUFFER_IS_TRACED)))
- {
- l2_emulation_trace_t *t =
- vlib_add_trace (vm, node, b1, sizeof (*t));
- t->extracted = (next1 != ~0);
- }
-
- /* Determine the next node and remove ourself from bitmap */
- if (PREDICT_TRUE (next0 == ~0))
- next0 = vnet_l2_feature_next (b0, em->l2_input_feat_next,
- L2INPUT_FEAT_L2_EMULATION);
-
- /* Determine the next node and remove ourself from bitmap */
- if (PREDICT_TRUE (next1 == ~0))
- next1 = vnet_l2_feature_next (b1, em->l2_input_feat_next,
- L2INPUT_FEAT_L2_EMULATION);
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vlib_buffer_t *b0;
- u32 sw_if_index0;
- u16 ether_type0;
- u32 next0 = ~0;
- u8 l2_len0;
- u32 bi0;
- u8 *h0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- l2_len0 = vnet_buffer (b0)->l2.l2_len;
-
- h0 = vlib_buffer_get_current (b0);
- ether_type0 = clib_net_to_host_u16 (*(u16 *) (h0 + l2_len0 - 2));
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-
- /*
- * only extract unicast
- */
- if (PREDICT_TRUE (!(h0[0] & 0x1)))
- {
- switch (ether_type0)
- {
- case ETHERNET_TYPE_IP4:
- ASSERT (em->l2_emulations[sw_if_index0].enabled);
- ++ip4_hits;
- next0 = L2_EMULATION_NEXT_IP4;
- vlib_buffer_advance (b0, l2_len0);
- break;
- case ETHERNET_TYPE_IP6:
- ASSERT (em->l2_emulations[sw_if_index0].enabled);
- ++ip6_hits;
- next0 = L2_EMULATION_NEXT_IP6;
- vlib_buffer_advance (b0, l2_len0);
- default:
- break;
- }
- }
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- l2_emulation_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->extracted = (next0 != ~0);
- }
-
- /* Determine the next node and remove ourself from bitmap */
- if (PREDICT_TRUE (next0 == ~0))
- next0 = vnet_l2_feature_next (b0, em->l2_input_feat_next,
- L2INPUT_FEAT_L2_EMULATION);
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- vlib_node_increment_counter (vm, node->node_index,
- L2_EMULATION_ERROR_IP4, ip4_hits);
- vlib_node_increment_counter (vm, node->node_index,
- L2_EMULATION_ERROR_IP6, ip6_hits);
-
- return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (l2_emulation_node) = {
- .name = "l2-emulation",
- .vector_size = sizeof (u32),
- .format_trace = format_l2_emulation_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(l2_emulation_error_strings),
- .error_strings = l2_emulation_error_strings,
-
- .n_next_nodes = L2_EMULATION_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [L2_EMULATION_NEXT_IP4] = "ip4-input",
- [L2_EMULATION_NEXT_IP6] = "ip6-input",
- },
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/l2tp/decap.c b/src/plugins/l2tp/decap.c
index 8c41bdd2357..e6ad8b0926c 100644
--- a/src/plugins/l2tp/decap.c
+++ b/src/plugins/l2tp/decap.c
@@ -249,7 +249,6 @@ VLIB_NODE_FN (l2t_decap_node) (vlib_main_t * vm,
* while l2tp-decap-local drops it.
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2t_decap_node) = {
.name = "l2tp-decap",
.vector_size = sizeof (u32),
@@ -267,11 +266,9 @@ VLIB_REGISTER_NODE (l2t_decap_node) = {
[L2T_DECAP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
extern vlib_node_function_t l2t_decap_node_fn;
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2t_decap_local_node) = {
.function = l2t_decap_node_fn,
.name = "l2tp-decap-local",
@@ -290,7 +287,6 @@ VLIB_REGISTER_NODE (l2t_decap_local_node) = {
[L2T_DECAP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/l2tp/encap.c b/src/plugins/l2tp/encap.c
index fbb5fc6ea46..3115b96f088 100644
--- a/src/plugins/l2tp/encap.c
+++ b/src/plugins/l2tp/encap.c
@@ -192,7 +192,6 @@ VLIB_NODE_FN (l2t_encap_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2t_encap_node) = {
.name = "l2tp-encap",
.vector_size = sizeof (u32),
@@ -211,7 +210,6 @@ VLIB_REGISTER_NODE (l2t_encap_node) = {
[L2T_ENCAP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
void
diff --git a/src/plugins/l2tp/l2tp.c b/src/plugins/l2tp/l2tp.c
index 08fa6d1e60b..907468b5900 100644
--- a/src/plugins/l2tp/l2tp.c
+++ b/src/plugins/l2tp/l2tp.c
@@ -53,12 +53,10 @@ format_l2t_session (u8 * s, va_list * args)
vlib_counter_t v;
s = format (s, "[%d] %U (our) %U (client) %U (sw_if_index %d)\n",
- session - lm->sessions,
- format_ip6_address, &session->our_address,
- format_ip6_address, &session->client_address,
- format_vnet_sw_interface_name, lm->vnet_main,
- vnet_get_sw_interface (lm->vnet_main, session->sw_if_index),
- session->sw_if_index);
+ session - lm->sessions, format_ip6_address,
+ &session->our_address, format_ip6_address,
+ &session->client_address, format_vnet_sw_if_index_name,
+ lm->vnet_main, session->sw_if_index, session->sw_if_index);
s = format (s, " local cookies %016llx %016llx remote cookie %016llx\n",
clib_net_to_host_u64 (session->local_cookie[0]),
@@ -129,24 +127,20 @@ show_l2tp_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "L2tp session lookup on %s", keystr);
- /* *INDENT-OFF* */
pool_foreach (session, lm->sessions)
{
vlib_cli_output (vm, "%U", format_l2t_session, session);
}
- /* *INDENT-ON* */
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_session_detail_command, static) = {
.path = "show l2tpv3",
.short_help = "show l2tpv3 [verbose]",
.function = show_l2tp_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_counters_command_fn (vlib_main_t * vm,
@@ -159,7 +153,6 @@ test_counters_command_fn (vlib_main_t * vm,
u32 nincr = 0;
u32 thread_index = vm->thread_index;
- /* *INDENT-OFF* */
pool_foreach (session, lm->sessions)
{
session_index = session - lm->sessions;
@@ -177,19 +170,16 @@ test_counters_command_fn (vlib_main_t * vm,
nincr++;
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "Incremented %d active counters\n", nincr);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_counters_command, static) = {
.path = "test lt2p counters",
.short_help = "increment all active counters",
.function = test_counters_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_counters_command_fn (vlib_main_t * vm,
@@ -201,7 +191,6 @@ clear_counters_command_fn (vlib_main_t * vm,
u32 counter_index;
u32 nincr = 0;
- /* *INDENT-OFF* */
pool_foreach (session, lm->sessions)
{
session_index = session - lm->sessions;
@@ -212,19 +201,16 @@ clear_counters_command_fn (vlib_main_t * vm,
vlib_zero_combined_counter (&lm->counter_main, counter_index+1);
nincr++;
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "Cleared %d active counters\n", nincr);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_counters_command, static) = {
.path = "clear l2tp counters",
.short_help = "clear all active counters",
.function = clear_counters_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_l2tpv3_name (u8 * s, va_list * args)
@@ -254,13 +240,11 @@ l2tpv3_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
return 0;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (l2tpv3_device_class,static) = {
.name = "L2TPv3",
.format_device_name = format_l2tpv3_name,
.name_renumber = l2tpv3_name_renumber,
};
-/* *INDENT-ON* */
static u8 *
format_l2tp_header_with_length (u8 * s, va_list * args)
@@ -270,14 +254,12 @@ format_l2tp_header_with_length (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (l2tpv3_hw_class) = {
.name = "L2TPV3",
.format_header = format_l2tp_header_with_length,
.build_rewrite = default_build_rewrite,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
int
create_l2tpv3_ipv6_tunnel (l2t_main_t * lm,
@@ -377,7 +359,7 @@ create_l2tpv3_ipv6_tunnel (l2t_main_t * lm,
{
hw_if_index = lm->free_l2tpv3_tunnel_hw_if_indices
[vec_len (lm->free_l2tpv3_tunnel_hw_if_indices) - 1];
- _vec_len (lm->free_l2tpv3_tunnel_hw_if_indices) -= 1;
+ vec_dec_len (lm->free_l2tpv3_tunnel_hw_if_indices, 1);
hi = vnet_get_hw_interface (vnm, hw_if_index);
hi->dev_instance = s - lm->sessions;
@@ -517,7 +499,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_l2tpv3_tunnel_command, static) =
{
.path = "create l2tpv3 tunnel",
@@ -525,7 +506,6 @@ VLIB_CLI_COMMAND (create_l2tpv3_tunnel_command, static) =
"create l2tpv3 tunnel client <ip6> our <ip6> local-cookie <hex> remote-cookie <hex> local-session <dec> remote-session <dec>",
.function = create_l2tpv3_tunnel_command_fn,
};
-/* *INDENT-ON* */
int
l2tpv3_set_tunnel_cookies (l2t_main_t * lm,
@@ -600,7 +580,6 @@ set_l2tp_tunnel_cookie_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_l2tp_tunnel_cookie_command, static) =
{
.path = "set l2tpv3 tunnel cookie",
@@ -608,7 +587,6 @@ VLIB_CLI_COMMAND (set_l2tp_tunnel_cookie_command, static) =
"set l2tpv3 tunnel cookie <intfc> local <hex> remote <hex>",
.function = set_l2tp_tunnel_cookie_command_fn,
};
-/* *INDENT-ON* */
int
l2tpv3_interface_enable_disable (vnet_main_t * vnm,
@@ -665,14 +643,12 @@ set_ip6_l2tpv3 (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip6_l2tpv3, static) =
{
.path = "set interface ip6 l2tpv3",
.function = set_ip6_l2tpv3,
.short_help = "set interface ip6 l2tpv3 <intfc> [del]",
};
-/* *INDENT-ON* */
static clib_error_t *
l2tp_config (vlib_main_t * vm, unformat_input_t * input)
diff --git a/src/plugins/l2tp/l2tp_api.c b/src/plugins/l2tp/l2tp_api.c
index ba9d2681b35..9c5ad700e0c 100644
--- a/src/plugins/l2tp/l2tp_api.c
+++ b/src/plugins/l2tp/l2tp_api.c
@@ -89,12 +89,10 @@ vl_api_sw_if_l2tpv3_tunnel_dump_t_handler (vl_api_sw_if_l2tpv3_tunnel_dump_t *
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (session, lm->sessions)
{
send_sw_if_l2tpv3_tunnel_details (am, reg, session, lm, mp->context);
}
- /* *INDENT-ON* */
}
static void vl_api_l2tpv3_create_tunnel_t_handler
@@ -146,12 +144,10 @@ static void vl_api_l2tpv3_create_tunnel_t_handler
encap_fib_index, &sw_if_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_L2TPV3_CREATE_TUNNEL_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void vl_api_l2tpv3_set_tunnel_cookies_t_handler
@@ -234,12 +230,10 @@ VLIB_API_INIT_FUNCTION (l2tp_api_hookup);
#include <vlib/unix/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Layer 2 Tunneling Protocol v3 (L2TP)",
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/l2tp/l2tp_test.c b/src/plugins/l2tp/l2tp_test.c
index 87abf5d0a2a..33691313cbe 100644
--- a/src/plugins/l2tp/l2tp_test.c
+++ b/src/plugins/l2tp/l2tp_test.c
@@ -26,7 +26,7 @@
/* define message IDs */
#include <l2tp/l2tp.api_enum.h>
#include <l2tp/l2tp.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
@@ -41,13 +41,11 @@ l2tp_test_main_t l2tp_test_main;
#define __plugin_msg_base l2tp_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
static void vl_api_l2tpv3_create_tunnel_reply_t_handler
(vl_api_l2tpv3_create_tunnel_reply_t * mp)
diff --git a/src/plugins/l2tp/packet.h b/src/plugins/l2tp/packet.h
index 66dfea2194c..d7d78f85e53 100644
--- a/src/plugins/l2tp/packet.h
+++ b/src/plugins/l2tp/packet.h
@@ -24,14 +24,12 @@
* tunnels. It is not present in IOS XR l2tpv3 tunnels.
* The Linux implementation is almost certainly wrong.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
u32 session_id;
u64 cookie; u32
l2_specific_sublayer; /* set to 0 (if present) */
}) l2tpv3_header_t;
-/* *INDENT-ON* */
#endif /* __included_l2tp_packet_h__ */
diff --git a/src/plugins/l3xc/FEATURE.yaml b/src/plugins/l3xc/FEATURE.yaml
index 5086fee3c57..0ee8c0c2168 100644
--- a/src/plugins/l3xc/FEATURE.yaml
+++ b/src/plugins/l3xc/FEATURE.yaml
@@ -4,7 +4,7 @@ maintainer: Neale Ranns <nranns@cisco.com>
features:
- cross connect all ingress traffic on an L3 interface to an output FIB path.
- the path can describe any output (with the exception of MPLS labels)
- - The same functions can be acheived by using a dedicated VRF for the table
+ - The same functions can be achieved by using a dedicated VRF for the table
and adding a default route with the same path. However, the L3XC is more
efficient in memory and CPU
diff --git a/src/plugins/l3xc/l3xc.c b/src/plugins/l3xc/l3xc.c
index ac0e4cf0d2c..427c38e9ab5 100644
--- a/src/plugins/l3xc/l3xc.c
+++ b/src/plugins/l3xc/l3xc.c
@@ -67,11 +67,11 @@ l3xc_stack (l3xc_t * l3xc)
*/
dpo_id_t via_dpo = DPO_INVALID;
- fib_path_list_contribute_forwarding (l3xc->l3xc_pl,
- (FIB_PROTOCOL_IP4 == l3xc->l3xc_proto ?
- FIB_FORW_CHAIN_TYPE_UNICAST_IP4 :
- FIB_FORW_CHAIN_TYPE_UNICAST_IP6),
- FIB_PATH_LIST_FWD_FLAG_NONE, &via_dpo);
+ fib_path_list_contribute_forwarding (
+ l3xc->l3xc_pl,
+ (FIB_PROTOCOL_IP4 == l3xc->l3xc_proto ? FIB_FORW_CHAIN_TYPE_UNICAST_IP4 :
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP6),
+ FIB_PATH_LIST_FWD_FLAG_COLLAPSE, &via_dpo);
dpo_stack_from_node ((FIB_PROTOCOL_IP4 == l3xc->l3xc_proto ?
l3xc_ip4_node.index :
@@ -264,7 +264,6 @@ out:
return (NULL);
}
-/* *INDENT-OFF* */
/**
* Create an L3XC policy.
*/
@@ -274,7 +273,6 @@ VLIB_CLI_COMMAND (l3xc_cmd_node, static) = {
.short_help = "l3xc [add|del] <INTERFACE> via ...",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static u8 *
format_l3xc (u8 * s, va_list * args)
@@ -305,13 +303,11 @@ l3xc_walk (l3xc_walk_cb_t cb, void *ctx)
{
u32 l3xci;
- /* *INDENT-OFF* */
pool_foreach_index (l3xci, l3xc_pool)
{
if (!cb(l3xci, ctx))
break;
}
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -320,24 +316,20 @@ l3xc_show_cmd (vlib_main_t * vm,
{
l3xc_t *l3xc;
- /* *INDENT-OFF* */
pool_foreach (l3xc, l3xc_pool)
{
vlib_cli_output(vm, "%U", format_l3xc, l3xc);
}
- /* *INDENT-ON* */
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l3xc_show_cmd_node, static) = {
.path = "show l3xc",
.function = l3xc_show_cmd,
.short_help = "show l3xc",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static fib_node_t *
l3xc_get_node (fib_node_index_t index)
@@ -381,7 +373,7 @@ static const fib_node_vft_t l3xc_vft = {
static clib_error_t *
l3xc_init (vlib_main_t * vm)
{
- l3xc_fib_node_type = fib_node_register_new_type (&l3xc_vft);
+ l3xc_fib_node_type = fib_node_register_new_type ("l3xc", &l3xc_vft);
return (NULL);
}
diff --git a/src/plugins/l3xc/l3xc_api.c b/src/plugins/l3xc/l3xc_api.c
index 847acaac331..f09100b3546 100644
--- a/src/plugins/l3xc/l3xc_api.c
+++ b/src/plugins/l3xc/l3xc_api.c
@@ -37,6 +37,7 @@
*/
static u32 l3xc_base_msg_id;
+#define REPLY_MSG_ID_BASE (l3xc_base_msg_id)
#include <vlibapi/api_helper_macros.h>
static void
@@ -96,12 +97,7 @@ done:
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_L3XC_UPDATE_REPLY + l3xc_base_msg_id,
- ({
- rmp->stats_index = 0;
- }))
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_L3XC_UPDATE_REPLY, ({ rmp->stats_index = 0; }))
}
static void
@@ -116,7 +112,7 @@ vl_api_l3xc_del_t_handler (vl_api_l3xc_del_t * mp)
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO (VL_API_L3XC_DEL_REPLY + l3xc_base_msg_id);
+ REPLY_MACRO (VL_API_L3XC_DEL_REPLY);
}
typedef struct l3xc_dump_walk_ctx_t_
@@ -213,12 +209,10 @@ l3xc_api_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (l3xc_api_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "L3 Cross-Connect (L3XC)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/l3xc/l3xc_node.c b/src/plugins/l3xc/l3xc_node.c
index 62db8c328b0..0f79bebeff9 100644
--- a/src/plugins/l3xc/l3xc_node.c
+++ b/src/plugins/l3xc/l3xc_node.c
@@ -199,7 +199,6 @@ static char *l3xc_error_strings[] = {
#undef l3xc_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l3xc_ip4_node) =
{
.function = l3xc_input_ip4,
@@ -245,7 +244,6 @@ VNET_FEATURE_INIT (l3xc_ip6_feat, static) =
.node_name = "l3xc-input-ip6",
.runs_after = VNET_FEATURES ("acl-plugin-in-ip6-fa"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lacp/cli.c b/src/plugins/lacp/cli.c
index fee9a5a2269..7cf97e09dc4 100644
--- a/src/plugins/lacp/cli.c
+++ b/src/plugins/lacp/cli.c
@@ -28,7 +28,6 @@ lacp_dump_ifs (lacp_interface_details_t ** out_lacpifs)
lacp_interface_details_t *r_lacpifs = NULL;
lacp_interface_details_t *lacpif = NULL;
- /* *INDENT-OFF* */
pool_foreach (mif, bm->neighbors) {
if (mif->lacp_enabled == 0)
continue;
@@ -61,7 +60,6 @@ lacp_dump_ifs (lacp_interface_details_t ** out_lacpifs)
lacpif->ptx_state = mif->ptx_state;
lacpif->mux_state = mif->mux_state;
}
- /* *INDENT-ON* */
*out_lacpifs = r_lacpifs;
@@ -309,14 +307,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_lacp_command, static) = {
.path = "show lacp",
.short_help = "show lacp [<interface>] [details]",
.function = show_lacp_fn,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
debug_lacp_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -384,13 +380,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (debug_lacp_command, static) = {
.path = "debug lacp",
.short_help = "debug lacp <interface> <on | off>",
.function = debug_lacp_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
lacp_cli_init (vlib_main_t * vm)
diff --git a/src/plugins/lacp/input.c b/src/plugins/lacp/input.c
index ebca2ad9185..5ccd1037fdb 100644
--- a/src/plugins/lacp/input.c
+++ b/src/plugins/lacp/input.c
@@ -16,7 +16,7 @@
#define _GNU_SOURCE
#include <vnet/bonding/node.h>
#include <lacp/node.h>
-#include <vpp/stats/stat_segment.h>
+#include <vlib/stats/stats.h>
static int
lacp_packet_scan (vlib_main_t * vm, member_if_t * mif)
@@ -155,7 +155,7 @@ lacp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0)
{
mif->last_marker_pdu_recd_time = vlib_time_now (vm);
if (mif->last_marker_pkt)
- _vec_len (mif->last_marker_pkt) = 0;
+ vec_set_len (mif->last_marker_pkt, 0);
vec_validate (mif->last_marker_pkt,
vlib_buffer_length_in_chain (vm, b0) - 1);
nbytes = vlib_buffer_contents (vm, bi0, mif->last_marker_pkt);
@@ -176,7 +176,7 @@ lacp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0)
* and reuse it.
*/
if (mif->last_rx_pkt)
- _vec_len (mif->last_rx_pkt) = 0;
+ vec_set_len (mif->last_rx_pkt, 0);
/*
* Make sure the per-neighbor rx buffer is big enough to hold
@@ -213,19 +213,19 @@ lacp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0)
/* Actually scan the packet */
e = lacp_packet_scan (vm, mif);
bif = bond_get_bond_if_by_dev_instance (mif->bif_dev_instance);
- stat_segment_set_state_counter (bm->stats[bif->sw_if_index]
- [mif->sw_if_index].actor_state,
- mif->actor.state);
- stat_segment_set_state_counter (bm->stats[bif->sw_if_index]
- [mif->sw_if_index].partner_state,
- mif->partner.state);
+ vlib_stats_set_gauge (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].actor_state,
+ mif->actor.state);
+ vlib_stats_set_gauge (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].partner_state,
+ mif->partner.state);
mif->last_packet_signature_valid = 1;
mif->last_packet_signature = last_packet_signature;
}
mif->pdu_received++;
if (mif->last_rx_pkt)
- _vec_len (mif->last_rx_pkt) = 0;
+ vec_set_len (mif->last_rx_pkt, 0);
return e;
}
@@ -239,12 +239,10 @@ lacp_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (lacp_init) =
{
.runs_after = VLIB_INITS("lacp_periodic_init"),
};
-/* *INDENT-ON* */
/*
* packet trace format function, very similar to
diff --git a/src/plugins/lacp/lacp.c b/src/plugins/lacp/lacp.c
index 44a32aa23a9..ba66f7b245d 100644
--- a/src/plugins/lacp/lacp.c
+++ b/src/plugins/lacp/lacp.c
@@ -21,7 +21,7 @@
#include <vppinfra/hash.h>
#include <vnet/bonding/node.h>
#include <lacp/node.h>
-#include <vpp/stats/stat_segment.h>
+#include <vlib/stats/stats.h>
lacp_main_t lacp_main;
@@ -142,7 +142,6 @@ lacp_periodic (vlib_main_t * vm)
bond_if_t *bif;
u8 actor_state, partner_state;
- /* *INDENT-OFF* */
pool_foreach (mif, bm->neighbors)
{
if (mif->port_enabled == 0)
@@ -173,19 +172,18 @@ lacp_periodic (vlib_main_t * vm)
if (actor_state != mif->actor.state)
{
bif = bond_get_bond_if_by_dev_instance (mif->bif_dev_instance);
- stat_segment_set_state_counter (bm->stats[bif->sw_if_index]
- [mif->sw_if_index].actor_state,
- mif->actor.state);
+ vlib_stats_set_gauge (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].actor_state,
+ mif->actor.state);
}
if (partner_state != mif->partner.state)
{
bif = bond_get_bond_if_by_dev_instance (mif->bif_dev_instance);
- stat_segment_set_state_counter (bm->stats[bif->sw_if_index]
- [mif->sw_if_index].partner_state,
- mif->partner.state);
+ vlib_stats_set_gauge (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].partner_state,
+ mif->partner.state);
}
}
- /* *INDENT-ON* */
}
static void
@@ -216,12 +214,10 @@ lacp_interface_enable_disable (vlib_main_t * vm, bond_if_t * bif,
ASSERT (lm->lacp_int >= 1);
if (lm->lacp_int == 0)
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "lacp-int-en-dis: BUG lacp_int == 0",
};
- /* *INDENT-ON* */
ELOG_DATA (&vlib_global_main.elog_main, e);
}
else
@@ -380,12 +376,12 @@ lacp_init_state_machines (vlib_main_t * vm, member_if_t * mif)
lacp_init_mux_machine (vm, mif);
lacp_init_ptx_machine (vm, mif);
lacp_init_rx_machine (vm, mif);
- stat_segment_set_state_counter (bm->stats[bif->sw_if_index]
- [mif->sw_if_index].actor_state,
- mif->actor.state);
- stat_segment_set_state_counter (bm->stats[bif->sw_if_index]
- [mif->sw_if_index].partner_state,
- mif->partner.state);
+ vlib_stats_set_gauge (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].actor_state,
+ mif->actor.state);
+ vlib_stats_set_gauge (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].partner_state,
+ mif->partner.state);
}
VLIB_INIT_FUNCTION (lacp_periodic_init);
@@ -453,12 +449,10 @@ lacp_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lacp_hw_interface_up_down);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Link Aggregation Control Protocol (LACP)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lacp/lacp_api.c b/src/plugins/lacp/lacp_api.c
index fce2492be8c..cdf05aa370c 100644
--- a/src/plugins/lacp/lacp_api.c
+++ b/src/plugins/lacp/lacp_api.c
@@ -31,14 +31,11 @@
#include <lacp/lacp.api_enum.h>
#include <lacp/lacp.api_types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-
-/* Macro to finish up custom dump fns */
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
#define REPLY_MSG_ID_BASE lm->msg_id_base
#include <vlibapi/api_helper_macros.h>
@@ -125,7 +122,8 @@ lacp_plugin_api_hookup (vlib_main_t * vm)
lm->msg_id_base = setup_message_id_table ();
/* Mark these APIs as mp safe */
- am->is_mp_safe[lm->msg_id_base + VL_API_SW_INTERFACE_LACP_DUMP] = 1;
+ vl_api_set_msg_thread_safe (
+ am, lm->msg_id_base + VL_API_SW_INTERFACE_LACP_DUMP, 1);
return 0;
}
diff --git a/src/plugins/lacp/lacp_doc.md b/src/plugins/lacp/lacp_doc.md
deleted file mode 100644
index 7df82b5689a..00000000000
--- a/src/plugins/lacp/lacp_doc.md
+++ /dev/null
@@ -1,104 +0,0 @@
-# VPP Link Aggregation Control Protocol (LACP) implementation {#lacp_plugin_doc}
-
-This document is to describe the usage of VPP LACP implementation.
-
-## LACP
-
-The Link Aggregation Control Protocol (LACP) is an 802.3ad standard which
-provides a protocol for exchanging information between Partner Systems on a
-link to allow their protocol instances to reach agreement on the Link Aggregation
-Group to which the link belongs and enable transmission and reception for the
-higher layer. Multiple links may be bundled to the same Aggregation Group to form
-a high bandwidth transmission medium and create a fault-tolerant link.
-
-
-### Configuration
-
-1. Create the bond interface
-create bond mode lacp [hw-addr <mac-address>] [load-balance { l2 | l23 | l34 } [numa-only]]
-
-2. Enslave the physical interface to the bond
-bond add <bond-interface-name> <slave-interface> [passive] [long-timeout]"
-
-3. Delete the bond interface
-delete bond {<interface> | sw_if_index <sw_idx>}
-
-4. Detach the slave interface from the bond
-bond del <slave-interface>
-
-### Configuration example
-
-```
-create bond mode lacp
-set interface state BondEthernet0 up
-bond add BondEthernet0 TenGigabitEthernet7/0/0
-bond add BondEthernet0 TenGigabitEthernet7/0/1
-bond add BondEthernet0 TenGigabitEthernet5/0/0
-bond add BondEthernet0 TenGigabitEthernet5/0/1
-```
-
-```
-bond del TenGigabitEthernet5/0/1
-```
-
-```
-delete bond BondEthernet0
-```
-
-### Operational data
-
-```
-show lacp [<interface>] [details]
-```
-
-Example:
-
-```
-DBGvpp# show lacp
- actor state partner state
-interface name sw_if_index bond interface exp/def/dis/col/syn/agg/tim/act exp/def/dis/col/syn/agg/tim/act
-GigabitEthernet2/0/1 1 BondEthernet0 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1
- LAG ID: [(ffff,e4-c7-22-f3-26-71,0000,00ff,0001), (ffff,fc-99-47-4a-0c-8b,0009,00ff,0001)]
- RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
-TenGigabitEthernet4/0/0 2 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
- LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0001), (8000,00-2a-6a-e5-50-c1,0140,8000,011d)]
- RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
-TenGigabitEthernet4/0/1 3 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
- LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0002), (8000,00-2a-6a-e5-50-c1,0140,8000,011e)]
- RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
-TenGigabitEthernet8/0/1 7 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
- LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0003), (8000,00-2a-6a-e5-50-01,007a,8000,0114)]
- RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
-TenGigabitEthernet8/0/0 6 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
- LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0004), (8000,00-2a-6a-e5-50-01,007a,8000,0115)]
- RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
-TenGigabitEthernet6/0/1 5 BondEthernet2 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1
- LAG ID: [(ffff,90-e2-ba-36-31-21,0002,00ff,0001), (ffff,90-e2-ba-29-f5-31,000f,00ff,0002)]
- RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
-TenGigabitEthernet6/0/0 4 BondEthernet2 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1
- LAG ID: [(ffff,90-e2-ba-36-31-21,0002,00ff,0002), (ffff,90-e2-ba-29-f5-31,000f,00ff,0001)]
- RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
-DBGvpp#
-```
-
-```
-show bond [details]
-````
-
-Example:
-
-```
-DBGvpp# show bond
-sh bond
-interface name sw_if_index mode load balance active slaves slaves
-BondEthernet0 10 lacp l2 1 1
-BondEthernet1 11 lacp l34 4 4
-BondEthernet2 12 lacp l23 2 2
-DBGvpp#
-```
-
-### Debugging
-
-```
-debug lacp [<interface>] <on | off>
-```
diff --git a/src/plugins/lacp/lacp_doc.rst b/src/plugins/lacp/lacp_doc.rst
new file mode 100644
index 00000000000..04b51ba22f8
--- /dev/null
+++ b/src/plugins/lacp/lacp_doc.rst
@@ -0,0 +1,109 @@
+LACP Protocol
+=============
+
+This document is to describe the usage of VPP Link Aggregation Control
+Protocol (LACP) implementation.
+
+LACP
+----
+
+The Link Aggregation Control Protocol (LACP) is an 802.3ad standard
+which provides a protocol for exchanging information between Partner
+Systems on a link to allow their protocol instances to reach agreement
+on the Link Aggregation Group to which the link belongs and enable
+transmission and reception for the higher layer. Multiple links may be
+bundled to the same Aggregation Group to form a high bandwidth
+transmission medium and create a fault-tolerant link.
+
+Configuration
+~~~~~~~~~~~~~
+
+1. Create the bond interface create bond mode lacp [hw-addr ]
+ [load-balance { l2 \| l23 \| l34 } [numa-only]]
+
+2. Enslave the physical interface to the bond bond add [passive]
+ [long-timeout]”
+
+3. Delete the bond interface delete bond { \| sw_if_index }
+
+4. Detach the slave interface from the bond bond del
+
+Configuration example
+~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ create bond mode lacp
+ set interface state BondEthernet0 up
+ bond add BondEthernet0 TenGigabitEthernet7/0/0
+ bond add BondEthernet0 TenGigabitEthernet7/0/1
+ bond add BondEthernet0 TenGigabitEthernet5/0/0
+ bond add BondEthernet0 TenGigabitEthernet5/0/1
+
+::
+
+ bond del TenGigabitEthernet5/0/1
+
+::
+
+ delete bond BondEthernet0
+
+Operational data
+~~~~~~~~~~~~~~~~
+
+::
+
+ show lacp [<interface>] [details]
+
+Example:
+
+::
+
+ DBGvpp# show lacp
+ actor state partner state
+ interface name sw_if_index bond interface exp/def/dis/col/syn/agg/tim/act exp/def/dis/col/syn/agg/tim/act
+ GigabitEthernet2/0/1 1 BondEthernet0 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1
+ LAG ID: [(ffff,e4-c7-22-f3-26-71,0000,00ff,0001), (ffff,fc-99-47-4a-0c-8b,0009,00ff,0001)]
+ RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+ TenGigabitEthernet4/0/0 2 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
+ LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0001), (8000,00-2a-6a-e5-50-c1,0140,8000,011d)]
+ RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+ TenGigabitEthernet4/0/1 3 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
+ LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0002), (8000,00-2a-6a-e5-50-c1,0140,8000,011e)]
+ RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+ TenGigabitEthernet8/0/1 7 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
+ LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0003), (8000,00-2a-6a-e5-50-01,007a,8000,0114)]
+ RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+ TenGigabitEthernet8/0/0 6 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
+ LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0004), (8000,00-2a-6a-e5-50-01,007a,8000,0115)]
+ RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+ TenGigabitEthernet6/0/1 5 BondEthernet2 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1
+ LAG ID: [(ffff,90-e2-ba-36-31-21,0002,00ff,0001), (ffff,90-e2-ba-29-f5-31,000f,00ff,0002)]
+ RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+ TenGigabitEthernet6/0/0 4 BondEthernet2 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1
+ LAG ID: [(ffff,90-e2-ba-36-31-21,0002,00ff,0002), (ffff,90-e2-ba-29-f5-31,000f,00ff,0001)]
+ RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+ DBGvpp#
+
+::
+
+ show bond [details]
+
+Example:
+
+::
+
+ DBGvpp# show bond
+ sh bond
+ interface name sw_if_index mode load balance active slaves slaves
+ BondEthernet0 10 lacp l2 1 1
+ BondEthernet1 11 lacp l34 4 4
+ BondEthernet2 12 lacp l23 2 2
+ DBGvpp#
+
+Debugging
+~~~~~~~~~
+
+::
+
+ debug lacp [<interface>] <on | off>
diff --git a/src/plugins/lacp/lacp_test.c b/src/plugins/lacp/lacp_test.c
index 3a5e6351693..e5078520fd3 100644
--- a/src/plugins/lacp/lacp_test.c
+++ b/src/plugins/lacp/lacp_test.c
@@ -31,7 +31,7 @@
#include <vnet/format_fns.h>
#include <lacp/lacp.api_enum.h>
#include <lacp/lacp.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
diff --git a/src/plugins/lacp/mux_machine.c b/src/plugins/lacp/mux_machine.c
index 974dbd9dac9..ee43894cd53 100644
--- a/src/plugins/lacp/mux_machine.c
+++ b/src/plugins/lacp/mux_machine.c
@@ -208,13 +208,11 @@ lacp_mux_debug_func (member_if_t * mif, int event, int state,
lacp_fsm_state_t * transition)
{
vlib_worker_thread_t *w = vlib_worker_threads + os_get_thread_index ();
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "%s",
.format_args = "T4",
};
- /* *INDENT-ON* */
struct
{
u32 event;
diff --git a/src/plugins/lacp/node.c b/src/plugins/lacp/node.c
index 4f78880c741..4426dafab5b 100644
--- a/src/plugins/lacp/node.c
+++ b/src/plugins/lacp/node.c
@@ -33,7 +33,7 @@ lacp_state_struct lacp_state_array[] = {
The interior node is neither pipelined nor dual-looped, because
it would be very unusual to see more than one LACP packet in
- a given input frame. So, it's a very simple / straighforward
+ a given input frame. So, it's a very simple / straightforward
example.
*/
@@ -112,7 +112,6 @@ lacp_node_fn (vlib_main_t * vm,
/*
* lacp input graph node declaration
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lacp_input_node, static) = {
.function = lacp_node_fn,
.name = "lacp-input",
@@ -129,19 +128,16 @@ VLIB_REGISTER_NODE (lacp_input_node, static) = {
[LACP_INPUT_NEXT_NORMAL] = "error-drop",
},
};
-/* *INDENT-ON* */
static void
lacp_elog_start_event (void)
{
lacp_main_t *lm = &lacp_main;
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "Starting LACP process, interface count = %d",
.format_args = "i4",
};
- /* *INDENT-ON* */
struct
{
u32 count;
@@ -155,13 +151,11 @@ static void
lacp_elog_stop_event (void)
{
lacp_main_t *lm = &lacp_main;
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "Stopping LACP process, interface count = %d",
.format_args = "i4",
};
- /* *INDENT-ON* */
struct
{
u32 count;
diff --git a/src/plugins/lacp/ptx_machine.c b/src/plugins/lacp/ptx_machine.c
index bb9d033c13a..92a99c920e9 100644
--- a/src/plugins/lacp/ptx_machine.c
+++ b/src/plugins/lacp/ptx_machine.c
@@ -195,13 +195,11 @@ lacp_ptx_debug_func (member_if_t * mif, int event, int state,
lacp_fsm_state_t * transition)
{
vlib_worker_thread_t *w = vlib_worker_threads + os_get_thread_index ();
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "%s",
.format_args = "T4",
};
- /* *INDENT-ON* */
struct
{
u32 event;
diff --git a/src/plugins/lacp/rx_machine.c b/src/plugins/lacp/rx_machine.c
index 2fadbe636cf..9c161b02078 100644
--- a/src/plugins/lacp/rx_machine.c
+++ b/src/plugins/lacp/rx_machine.c
@@ -343,7 +343,6 @@ lacp_port_is_moved (vlib_main_t * vm, member_if_t * mif)
member_if_t *mif2;
lacp_pdu_t *lacpdu = (lacp_pdu_t *) mif->last_rx_pkt;
- /* *INDENT-OFF* */
pool_foreach (mif2, bm->neighbors) {
{
if ((mif != mif2) && (mif2->rx_state == LACP_RX_STATE_PORT_DISABLED) &&
@@ -353,7 +352,6 @@ lacp_port_is_moved (vlib_main_t * vm, member_if_t * mif)
return 1;
}
}
- /* *INDENT-ON* */
return 0;
}
@@ -400,13 +398,11 @@ lacp_rx_debug_func (member_if_t * mif, int event, int state,
lacp_fsm_state_t * transition)
{
vlib_worker_thread_t *w = vlib_worker_threads + os_get_thread_index ();
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "%s",
.format_args = "T4",
};
- /* *INDENT-ON* */
struct
{
u32 event;
diff --git a/src/plugins/lacp/tx_machine.c b/src/plugins/lacp/tx_machine.c
index 1eb3bc1c4b7..c36f44c07ce 100644
--- a/src/plugins/lacp/tx_machine.c
+++ b/src/plugins/lacp/tx_machine.c
@@ -84,13 +84,11 @@ lacp_tx_debug_func (member_if_t * mif, int event, int state,
lacp_fsm_state_t * transition)
{
vlib_worker_thread_t *w = vlib_worker_threads + os_get_thread_index ();
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "%s",
.format_args = "T4",
};
- /* *INDENT-ON* */
struct
{
u32 event;
diff --git a/src/plugins/lb/api.c b/src/plugins/lb/api.c
index e44f815cb9c..ea2e482135b 100644
--- a/src/plugins/lb/api.c
+++ b/src/plugins/lb/api.c
@@ -30,17 +30,15 @@
#include <lb/lb.api_enum.h>
#include <lb/lb.api_types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define REPLY_MSG_ID_BASE lbm->msg_id_base
#include <vlibapi/api_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
static void
vl_api_lb_conf_t_handler
@@ -72,7 +70,7 @@ vl_api_lb_add_del_vip_t_handler
lb_main_t *lbm = &lb_main;
vl_api_lb_conf_reply_t * rmp;
int rv = 0;
- lb_vip_add_args_t args;
+ lb_vip_add_args_t args = {};
/* if port == 0, it means all-port VIP */
if (mp->port == 0)
@@ -130,6 +128,80 @@ vl_api_lb_add_del_vip_t_handler
}
static void
+vl_api_lb_add_del_vip_v2_t_handler (vl_api_lb_add_del_vip_v2_t *mp)
+{
+ lb_main_t *lbm = &lb_main;
+ vl_api_lb_conf_reply_t *rmp;
+ int rv = 0;
+ lb_vip_add_args_t args = {};
+
+ /* if port == 0, it means all-port VIP */
+ if (mp->port == 0)
+ {
+ mp->protocol = ~0;
+ }
+
+ ip_address_decode (&mp->pfx.address, &(args.prefix));
+
+ if (mp->is_del)
+ {
+ u32 vip_index;
+ if (!(rv = lb_vip_find_index (&(args.prefix), mp->pfx.len, mp->protocol,
+ ntohs (mp->port), &vip_index)))
+ rv = lb_vip_del (vip_index);
+ }
+ else
+ {
+ u32 vip_index;
+ lb_vip_type_t type = 0;
+
+ if (ip46_prefix_is_ip4 (&(args.prefix), mp->pfx.len))
+ {
+ if (mp->encap == LB_API_ENCAP_TYPE_GRE4)
+ type = LB_VIP_TYPE_IP4_GRE4;
+ else if (mp->encap == LB_API_ENCAP_TYPE_GRE6)
+ type = LB_VIP_TYPE_IP4_GRE6;
+ else if (mp->encap == LB_API_ENCAP_TYPE_L3DSR)
+ type = LB_VIP_TYPE_IP4_L3DSR;
+ else if (mp->encap == LB_API_ENCAP_TYPE_NAT4)
+ type = LB_VIP_TYPE_IP4_NAT4;
+ }
+ else
+ {
+ if (mp->encap == LB_API_ENCAP_TYPE_GRE4)
+ type = LB_VIP_TYPE_IP6_GRE4;
+ else if (mp->encap == LB_API_ENCAP_TYPE_GRE6)
+ type = LB_VIP_TYPE_IP6_GRE6;
+ else if (mp->encap == LB_API_ENCAP_TYPE_NAT6)
+ type = LB_VIP_TYPE_IP6_NAT6;
+ }
+
+ args.plen = mp->pfx.len;
+ args.protocol = mp->protocol;
+ args.port = ntohs (mp->port);
+ args.type = type;
+ args.new_length = ntohl (mp->new_flows_table_length);
+
+ if (mp->src_ip_sticky)
+ args.src_ip_sticky = 1;
+
+ if (mp->encap == LB_API_ENCAP_TYPE_L3DSR)
+ {
+ args.encap_args.dscp = (u8) (mp->dscp & 0x3F);
+ }
+ else if ((mp->encap == LB_API_ENCAP_TYPE_NAT4) ||
+ (mp->encap == LB_API_ENCAP_TYPE_NAT6))
+ {
+ args.encap_args.srv_type = mp->type;
+ args.encap_args.target_port = ntohs (mp->target_port);
+ }
+
+ rv = lb_vip_add (args, &vip_index);
+ }
+ REPLY_MACRO (VL_API_LB_ADD_DEL_VIP_V2_REPLY);
+}
+
+static void
vl_api_lb_add_del_as_t_handler
(vl_api_lb_add_del_as_t * mp)
{
@@ -211,7 +283,6 @@ static void send_lb_as_details
lb_main_t *lbm = &lb_main;
int msg_size = 0;
u32 *as_index;
- u32 asindex = 0;
/* construct as list under this vip */
lb_as_t *as;
@@ -235,7 +306,6 @@ static void send_lb_as_details
rmp->in_use_since = htonl(as->last_used);
vl_api_send_msg (reg, (u8 *) rmp);
- asindex++;
}
}
@@ -260,7 +330,6 @@ vl_api_lb_as_dump_t_handler
dump_all = (prefix.ip6.as_u64[0] == 0) && (prefix.ip6.as_u64[1] == 0);
- /* *INDENT-OFF* */
pool_foreach (vip, lbm->vips)
{
if ( dump_all
@@ -272,7 +341,6 @@ vl_api_lb_as_dump_t_handler
send_lb_as_details(reg, mp->context, vip);
}
}
- /* *INDENT-ON* */
}
static void
diff --git a/src/plugins/lb/cli.c b/src/plugins/lb/cli.c
index 7b5dc5c8549..afa73ef616c 100644
--- a/src/plugins/lb/cli.c
+++ b/src/plugins/lb/cli.c
@@ -32,6 +32,7 @@ lb_vip_command_fn (vlib_main_t * vm,
clib_error_t *error = 0;
args.new_length = 1024;
+ args.src_ip_sticky = 0;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -49,6 +50,8 @@ lb_vip_command_fn (vlib_main_t * vm,
;
else if (unformat(line_input, "del"))
del = 1;
+ else if (unformat (line_input, "src_ip_sticky"))
+ args.src_ip_sticky = 1;
else if (unformat(line_input, "protocol tcp"))
{
args.protocol = (u8)IP_PROTOCOL_TCP;
@@ -177,6 +180,7 @@ done:
return error;
}
+/* clang-format off */
VLIB_CLI_COMMAND (lb_vip_command, static) =
{
.path = "lb vip",
@@ -185,9 +189,10 @@ VLIB_CLI_COMMAND (lb_vip_command, static) =
"[encap (gre6|gre4|l3dsr|nat4|nat6)] "
"[dscp <n>] "
"[type (nodeport|clusterip) target_port <n>] "
- "[new_len <n>] [del]",
+ "[new_len <n>] [src_ip_sticky] [del]",
.function = lb_vip_command_fn,
};
+/* clang-format on */
static clib_error_t *
lb_as_command_fn (vlib_main_t * vm,
@@ -442,24 +447,22 @@ lb_set_interface_nat_command_fn (vlib_main_t * vm,
{
if (lb_nat4_interface_add_del (*sw_if_index, is_del))
{
- error = clib_error_return(
- 0, "%s %U failed", is_del ? "del" : "add",
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface (vnm, *sw_if_index));
- goto done;
- }
- }
+ error = clib_error_return (
+ 0, "%s %U failed", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnm, *sw_if_index);
+ goto done;
+ }
+ }
else
{
if (lb_nat6_interface_add_del (*sw_if_index, is_del))
{
- error = clib_error_return(
- 0, "%s %U failed", is_del ? "del" : "add",
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface (vnm, *sw_if_index));
- goto done;
- }
- }
+ error = clib_error_return (
+ 0, "%s %U failed", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnm, *sw_if_index);
+ goto done;
+ }
+ }
}
done:
diff --git a/src/plugins/lb/lb.api b/src/plugins/lb/lb.api
index 4bf30e76b59..96f047ddbc2 100644
--- a/src/plugins/lb/lb.api
+++ b/src/plugins/lb/lb.api
@@ -1,4 +1,4 @@
-option version = "1.0.0";
+option version = "1.1.0";
import "plugins/lb/lb_types.api";
import "vnet/interface_types.api";
@@ -54,6 +54,39 @@ autoreply define lb_add_del_vip {
option vat_help = "<prefix> [protocol (tcp|udp) port <n>] [encap (gre6|gre4|l3dsr|nat4|nat6)] [dscp <n>] [type (nodeport|clusterip) target_port <n>] [new_len <n>] [del]";
};
+/** \brief Add a virtual address (or prefix)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param pfx - ip prefix and length
+ @param protocol - tcp or udp.
+ @param port - destination port. (0) means 'all-port VIP'
+ @param encap - Encap is ip4 GRE(0) or ip6 GRE(1) or L3DSR(2) or NAT4(3) or NAT6(4).
+ @param dscp - DSCP bit corresponding to VIP(applicable in L3DSR mode only).
+ @param type - service type(applicable in NAT4/NAT6 mode only).
+ @param target_port - Pod's port corresponding to specific service(applicable in NAT4/NAT6 mode only).
+ @param node_port - Node's port(applicable in NAT4/NAT6 mode only).
+ @param new_flows_table_length - Size of the new connections flow table used
+ for this VIP (must be power of 2).
+ @param src_ip_sticky - source ip based sticky session.
+ @param is_del - The VIP should be removed.
+*/
+autoreply define lb_add_del_vip_v2 {
+ u32 client_index;
+ u32 context;
+ vl_api_address_with_prefix_t pfx;
+ u8 protocol [default=255];
+ u16 port;
+ vl_api_lb_encap_type_t encap;
+ u8 dscp;
+ vl_api_lb_srv_type_t type ; /* LB_API_SRV_TYPE_CLUSTERIP */
+ u16 target_port;
+ u16 node_port;
+ u32 new_flows_table_length [default=1024];
+ bool src_ip_sticky;
+ bool is_del;
+ option vat_help = "<prefix> [protocol (tcp|udp) port <n>] [encap (gre6|gre4|l3dsr|nat4|nat6)] [dscp <n>] [type (nodeport|clusterip) target_port <n>] [new_len <n>] [src_ip_sticky] [del]";
+};
+
/** \brief Add an application server for a given VIP
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c
index 6fc7f0f92b2..7ae1884ff31 100644
--- a/src/plugins/lb/lb.c
+++ b/src/plugins/lb/lb.c
@@ -198,15 +198,18 @@ u8 *format_lb_vip_detailed (u8 * s, va_list * args)
lb_vip_t *vip = va_arg (*args, lb_vip_t *);
u32 indent = format_get_indent (s);
- s = format(s, "%U %U [%lu] %U%s\n"
+ /* clang-format off */
+ s = format(s, "%U %U [%lu] %U%s%s\n"
"%U new_size:%u\n",
format_white_space, indent,
format_lb_vip_type, vip->type,
vip - lbm->vips,
format_ip46_prefix, &vip->prefix, (u32) vip->plen, IP46_TYPE_ANY,
+ lb_vip_is_src_ip_sticky (vip) ? " src_ip_sticky" : "",
(vip->flags & LB_VIP_FLAGS_USED)?"":" removed",
format_white_space, indent,
vip->new_flow_table_mask + 1);
+ /* clang-format on */
if (vip->port != 0)
{
@@ -370,9 +373,9 @@ void lb_garbage_collection()
}
vec_foreach(i, to_be_removed_vips) {
- vip = &lbm->vips[*i];
- pool_put(lbm->vips, vip);
- pool_free(vip->as_indexes);
+ vip = &lbm->vips[*i];
+ pool_free (vip->as_indexes);
+ pool_put (lbm->vips, vip);
}
vec_free(to_be_removed_vips);
@@ -411,7 +414,7 @@ out:
}
//First, let's sort the ASs
- vec_alloc(sort_arr, pool_elts(vip->as_indexes));
+ vec_validate (sort_arr, pool_elts (vip->as_indexes) - 1);
i = 0;
pool_foreach (as_index, vip->as_indexes) {
@@ -422,7 +425,7 @@ out:
sort_arr[i].as_index = as - lbm->ass;
i++;
}
- _vec_len(sort_arr) = i;
+ vec_set_len (sort_arr, i);
vec_sort_with_function(sort_arr, lb_pseudorand_compare);
@@ -1147,6 +1150,10 @@ int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index)
}
vip->flags = LB_VIP_FLAGS_USED;
+ if (args.src_ip_sticky)
+ {
+ vip->flags |= LB_VIP_FLAGS_SRC_IP_STICKY;
+ }
vip->as_indexes = 0;
//Validate counters
@@ -1249,12 +1256,10 @@ int lb_vip_del(u32 vip_index)
return rv;
}
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Load Balancer (LB)",
};
-/* *INDENT-ON* */
u8 *format_lb_dpo (u8 * s, va_list * va)
{
@@ -1412,7 +1417,7 @@ lb_init (vlib_main_t * vm)
lb_dpo_nat4_port_nodes);
lbm->dpo_nat6_port_type = dpo_register_new_type(&lb_vft,
lb_dpo_nat6_port_nodes);
- lbm->fib_node_type = fib_node_register_new_type(&lb_fib_node_vft);
+ lbm->fib_node_type = fib_node_register_new_type ("lb", &lb_fib_node_vft);
//Init AS reference counters
vlib_refcount_init(&lbm->as_refcount);
diff --git a/src/plugins/lb/lb.h b/src/plugins/lb/lb.h
index ebbb1f6f8f0..46da40970c9 100644
--- a/src/plugins/lb/lb.h
+++ b/src/plugins/lb/lb.h
@@ -22,7 +22,7 @@
* The load-balancer receives traffic destined to VIP (Virtual IP)
* addresses from one or multiple(ECMP) routers.
* The load-balancer tunnels the traffic toward many application servers
- * ensuring session stickyness (i.e. that a single sessions is tunneled
+ * ensuring session stickiness (i.e. that a single sessions is tunneled
* towards a single application server).
*
*/
@@ -324,6 +324,7 @@ typedef struct {
*/
u8 flags;
#define LB_VIP_FLAGS_USED 0x1
+#define LB_VIP_FLAGS_SRC_IP_STICKY 0x2
/**
* Pool of AS indexes used for this VIP.
@@ -346,11 +347,14 @@ typedef struct {
|| (vip)->type == LB_VIP_TYPE_IP4_L3DSR \
|| (vip)->type == LB_VIP_TYPE_IP4_NAT4 )
+#define lb_vip_is_src_ip_sticky(vip) \
+ (((vip)->flags & LB_VIP_FLAGS_SRC_IP_STICKY) != 0)
+
+/* clang-format off */
#define lb_vip_is_gre4(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE4 \
|| (vip)->type == LB_VIP_TYPE_IP4_GRE4) \
&& ((vip)->port == 0))
-
#define lb_vip_is_gre6(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE6 \
|| (vip)->type == LB_VIP_TYPE_IP4_GRE6) \
&& ((vip)->port == 0))
@@ -362,27 +366,28 @@ typedef struct {
#define lb_vip_is_gre6_port(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE6 \
|| (vip)->type == LB_VIP_TYPE_IP4_GRE6) \
&& ((vip)->port != 0))
+/* clang-format on */
always_inline bool
lb_vip_is_l3dsr(const lb_vip_t *vip)
{
- return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port ==0);
+ return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port == 0);
}
always_inline bool
lb_vip_is_l3dsr_port(const lb_vip_t *vip)
{
- return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port !=0);
+ return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port != 0);
}
always_inline bool
lb_vip_is_nat4_port(const lb_vip_t *vip)
{
- return (vip->type == LB_VIP_TYPE_IP4_NAT4 && vip->port !=0);
+ return (vip->type == LB_VIP_TYPE_IP4_NAT4 && vip->port != 0);
}
always_inline bool
lb_vip_is_nat6_port(const lb_vip_t *vip)
{
- return (vip->type == LB_VIP_TYPE_IP6_NAT6 && vip->port !=0);
+ return (vip->type == LB_VIP_TYPE_IP6_NAT6 && vip->port != 0);
}
format_function_t format_lb_vip;
@@ -575,6 +580,7 @@ typedef struct {
u8 plen;
u8 protocol;
u16 port;
+ u8 src_ip_sticky;
lb_vip_type_t type;
u32 new_length;
lb_vip_encap_args_t encap_args;
diff --git a/src/plugins/lb/lb_plugin_doc.md b/src/plugins/lb/lb_plugin_doc.md
deleted file mode 100644
index 5f6538974e9..00000000000
--- a/src/plugins/lb/lb_plugin_doc.md
+++ /dev/null
@@ -1,192 +0,0 @@
-# Load Balancer plugin for VPP {#lb_plugin_doc}
-
-## Version
-
-The load balancer plugin is currently in *beta* version.
-Both CLIs and APIs are subject to *heavy* changes,
-which also means feedback is really welcome regarding features, apis, etc...
-
-## Overview
-
-This plugin provides load balancing for VPP in a way that is largely inspired
-from Google's MagLev: http://research.google.com/pubs/pub44824.html
-
-The load balancer is configured with a set of Virtual IPs (VIP, which can be
-prefixes), and for each VIP, with a set of Application Server addresses (ASs).
-
-There are four encap types to steer traffic to different ASs:
-1). IPv4+GRE ad IPv6+GRE encap types:
-Traffic received for a given VIP (or VIP prefix) is tunneled using GRE towards
-the different ASs in a way that (tries to) ensure that a given session will
-always be tunneled to the same AS.
-
-2). IPv4+L3DSR encap types:
-L3DSR is used to overcome Layer 2 limitations of Direct Server Return Load Balancing.
-It maps VIP to DSCP bits, and reuse TOS bits to transfer DSCP bits
-to server, and then server will get VIP from DSCP-to-VIP mapping.
-
-Both VIPs or ASs can be IPv4 or IPv6, but for a given VIP, all ASs must be using
-the same encap. type (i.e. IPv4+GRE or IPv6+GRE or IPv4+L3DSR).
-Meaning that for a given VIP, all AS addresses must be of the same family.
-
-3). IPv4/IPv6 + NAT4/NAT6 encap types:
-This type provides kube-proxy data plane on user space,
-which is used to replace linux kernel's kube-proxy based on iptables.
-
-Currently, load balancer plugin supports three service types:
-a) Cluster IP plus Port: support any protocols, including TCP, UDP.
-b) Node IP plus Node Port: currently only support UDP.
-c) External Load Balancer.
-
-For Cluster IP plus Port case:
-kube-proxy is configured with a set of Virtual IPs (VIP, which can be
-prefixes), and for each VIP, with a set of AS addresses (ASs).
-
-For a specific session received for a given VIP (or VIP prefix),
-first packet selects a AS according to internal load balancing algorithm,
-then does DNAT operation and sent to chosen AS.
-At the same time, will create a session entry to store AS chosen result.
-Following packets for that session will look up session table first,
-which ensures that a given session will always be routed to the same AS.
-
-For returned packet from AS, it will do SNAT operation and sent out.
-
-Please refer to below for details:
-https://schd.ws/hosted_files/ossna2017/1e/VPP_K8S_GTPU_OSSNA.pdf
-
-
-## Performance
-
-The load balancer has been tested up to 1 millions flows and still forwards more
-than 3Mpps per core in such circumstances.
-Although 3Mpps seems already good, it is likely that performance will be improved
-in next versions.
-
-## Configuration
-
-### Global LB parameters
-
-The load balancer needs to be configured with some parameters:
-
- lb conf [ip4-src-address <addr>] [ip6-src-address <addr>]
- [buckets <n>] [timeout <s>]
-
-ip4-src-address: the source address used to send encap. packets using IPv4 for GRE4 mode.
- or Node IP4 address for NAT4 mode.
-
-ip6-src-address: the source address used to send encap. packets using IPv6 for GRE6 mode.
- or Node IP6 address for NAT6 mode.
-
-buckets: the *per-thread* established-connections-table number of buckets.
-
-timeout: the number of seconds a connection will remain in the
- established-connections-table while no packet for this flow
- is received.
-
-### Configure the VIPs
-
- lb vip <prefix> [encap (gre6|gre4|l3dsr|nat4|nat6)] \
- [dscp <n>] [port <n> target_port <n> node_port <n>] [new_len <n>] [del]
-
-new_len is the size of the new-connection-table. It should be 1 or 2 orders of
-magnitude bigger than the number of ASs for the VIP in order to ensure a good
-load balancing.
-Encap l3dsr and dscp is used to map VIP to dscp bit and rewrite DSCP bit in packets.
-So the selected server could get VIP from DSCP bit in this packet and perform DSR.
-Encap nat4/nat6 and port/target_port/node_port is used to do kube-proxy data plane.
-
-Examples:
-
- lb vip 2002::/16 encap gre6 new_len 1024
- lb vip 2003::/16 encap gre4 new_len 2048
- lb vip 80.0.0.0/8 encap gre6 new_len 16
- lb vip 90.0.0.0/8 encap gre4 new_len 1024
- lb vip 100.0.0.0/8 encap l3dsr dscp 2 new_len 32
- lb vip 90.1.2.1/32 encap nat4 port 3306 target_port 3307 node_port 30964 new_len 1024
- lb vip 2004::/16 encap nat6 port 6306 target_port 6307 node_port 30966 new_len 1024
-
-### Configure the ASs (for each VIP)
-
- lb as <vip-prefix> [<address> [<address> [...]]] [del]
-
-You can add (or delete) as many ASs at a time (for a single VIP).
-Note that the AS address family must correspond to the VIP encap. IP family.
-
-Examples:
-
- lb as 2002::/16 2001::2 2001::3 2001::4
- lb as 2003::/16 10.0.0.1 10.0.0.2
- lb as 80.0.0.0/8 2001::2
- lb as 90.0.0.0/8 10.0.0.1
-
-### Configure SNAT
-
- lb set interface nat4 in <intfc> [del]
-
-Set SNAT feature in a specific interface.
-(applicable in NAT4 mode only)
-
- lb set interface nat6 in <intfc> [del]
-
-Set SNAT feature in a specific interface.
-(applicable in NAT6 mode only)
-
-## Monitoring
-
-The plugin provides quite a bunch of counters and information.
-These are still subject to quite significant changes.
-
- show lb
- show lb vip
- show lb vip verbose
-
- show node counters
-
-
-## Design notes
-
-### Multi-Threading
-
-MagLev is a distributed system which pseudo-randomly generates a
-new-connections-table based on AS names such that each server configured with
-the same set of ASs ends up with the same table. Connection stickyness is then
-ensured with an established-connections-table. Using ECMP, it is assumed (but
-not relied on) that servers will mostly receive traffic for different flows.
-
-This implementation pushes the parallelism a little bit further by using
-one established-connections table per thread. This is equivalent to assuming
-that RSS will make a job similar to ECMP, and is pretty useful as threads don't
-need to get a lock in order to write in the table.
-
-### Hash Table
-
-A load balancer requires an efficient read and write hash table. The hash table
-used by ip6-forward is very read-efficient, but not so much for writing. In
-addition, it is not a big deal if writing into the hash table fails (again,
-MagLev uses a flow table but does not heaviliy relies on it).
-
-The plugin therefore uses a very specific (and stupid) hash table.
- - Fixed (and power of 2) number of buckets (configured at runtime)
- - Fixed (and power of 2) elements per buckets (configured at compilation time)
-
-### Reference counting
-
-When an AS is removed, there is two possible ways to react.
- - Keep using the AS for established connections
- - Change AS for established connections (likely to cause error for TCP)
-
-In the first case, although an AS is removed from the configuration, its
-associated state needs to stay around as long as it is used by at least one
-thread.
-
-In order to avoid locks, a specific reference counter is used. The design is quite
-similar to clib counters but:
- - It is possible to decrease the value
- - Summing will not zero the per-thread counters
- - Only the thread can reallocate its own counters vector (to avoid concurrency issues)
-
-This reference counter is lock free, but reading a count of 0 does not mean
-the value can be freed unless it is ensured by *other* means that no other thread
-is concurrently referencing the object. In the case of this plugin, it is assumed
-that no concurrent event will take place after a few seconds.
-
diff --git a/src/plugins/lb/lb_plugin_doc.rst b/src/plugins/lb/lb_plugin_doc.rst
new file mode 100644
index 00000000000..603453e7848
--- /dev/null
+++ b/src/plugins/lb/lb_plugin_doc.rst
@@ -0,0 +1,223 @@
+Load Balancer plugin
+====================
+
+Version
+-------
+
+The load balancer plugin is currently in *beta* version. Both CLIs and
+APIs are subject to *heavy* changes, which also means feedback is really
+welcome regarding features, apis, etc…
+
+Overview
+--------
+
+This plugin provides load balancing for VPP in a way that is largely
+inspired from Google’s MagLev:
+http://research.google.com/pubs/pub44824.html
+
+The load balancer is configured with a set of Virtual IPs (VIP, which
+can be prefixes), and for each VIP, with a set of Application Server
+addresses (ASs).
+
+There are four encap types to steer traffic to different ASs: 1).
+IPv4+GRE ad IPv6+GRE encap types: Traffic received for a given VIP (or
+VIP prefix) is tunneled using GRE towards the different ASs in a way
+that (tries to) ensure that a given session will always be tunneled to
+the same AS.
+
+2). IPv4+L3DSR encap types: L3DSR is used to overcome Layer 2
+limitations of Direct Server Return Load Balancing. It maps VIP to DSCP
+bits, and reuse TOS bits to transfer DSCP bits to server, and then
+server will get VIP from DSCP-to-VIP mapping.
+
+Both VIPs or ASs can be IPv4 or IPv6, but for a given VIP, all ASs must
+be using the same encap. type (i.e. IPv4+GRE or IPv6+GRE or IPv4+L3DSR).
+Meaning that for a given VIP, all AS addresses must be of the same
+family.
+
+3). IPv4/IPv6 + NAT4/NAT6 encap types: This type provides kube-proxy
+data plane on user space, which is used to replace linux kernel’s
+kube-proxy based on iptables.
+
+Currently, load balancer plugin supports three service types: a) Cluster
+IP plus Port: support any protocols, including TCP, UDP. b) Node IP plus
+Node Port: currently only support UDP. c) External Load Balancer.
+
+For Cluster IP plus Port case: kube-proxy is configured with a set of
+Virtual IPs (VIP, which can be prefixes), and for each VIP, with a set
+of AS addresses (ASs).
+
+For a specific session received for a given VIP (or VIP prefix), first
+packet selects a AS according to internal load balancing algorithm, then
+does DNAT operation and sent to chosen AS. At the same time, will create
+a session entry to store AS chosen result. Following packets for that
+session will look up session table first, which ensures that a given
+session will always be routed to the same AS.
+
+For returned packet from AS, it will do SNAT operation and sent out.
+
+Please refer to below for details:
+https://schd.ws/hosted_files/ossna2017/1e/VPP_K8S_GTPU_OSSNA.pdf
+
+Performance
+-----------
+
+The load balancer has been tested up to 1 millions flows and still
+forwards more than 3Mpps per core in such circumstances. Although 3Mpps
+seems already good, it is likely that performance will be improved in
+next versions.
+
+Configuration
+-------------
+
+Global LB parameters
+~~~~~~~~~~~~~~~~~~~~
+
+The load balancer needs to be configured with some parameters:
+
+::
+
+ lb conf [ip4-src-address <addr>] [ip6-src-address <addr>]
+ [buckets <n>] [timeout <s>]
+
+ip4-src-address: the source address used to send encap. packets using
+IPv4 for GRE4 mode. or Node IP4 address for NAT4 mode.
+
+ip6-src-address: the source address used to send encap. packets using
+IPv6 for GRE6 mode. or Node IP6 address for NAT6 mode.
+
+buckets: the *per-thread* established-connections-table number of
+buckets.
+
+timeout: the number of seconds a connection will remain in the
+established-connections-table while no packet for this flow is received.
+
+Configure the VIPs
+~~~~~~~~~~~~~~~~~~
+
+::
+
+ lb vip <prefix> [encap (gre6|gre4|l3dsr|nat4|nat6)] \
+ [dscp <n>] [port <n> target_port <n> node_port <n>] [new_len <n>] [del]
+
+new_len is the size of the new-connection-table. It should be 1 or 2
+orders of magnitude bigger than the number of ASs for the VIP in order
+to ensure a good load balancing. Encap l3dsr and dscp is used to map VIP
+to dscp bit and rewrite DSCP bit in packets. So the selected server
+could get VIP from DSCP bit in this packet and perform DSR. Encap
+nat4/nat6 and port/target_port/node_port is used to do kube-proxy data
+plane.
+
+Examples:
+
+::
+
+ lb vip 2002::/16 encap gre6 new_len 1024
+ lb vip 2003::/16 encap gre4 new_len 2048
+ lb vip 80.0.0.0/8 encap gre6 new_len 16
+ lb vip 90.0.0.0/8 encap gre4 new_len 1024
+ lb vip 100.0.0.0/8 encap l3dsr dscp 2 new_len 32
+ lb vip 90.1.2.1/32 encap nat4 port 3306 target_port 3307 node_port 30964 new_len 1024
+ lb vip 2004::/16 encap nat6 port 6306 target_port 6307 node_port 30966 new_len 1024
+
+Configure the ASs (for each VIP)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ lb as <vip-prefix> [<address> [<address> [...]]] [del]
+
+You can add (or delete) as many ASs at a time (for a single VIP). Note
+that the AS address family must correspond to the VIP encap. IP family.
+
+Examples:
+
+::
+
+ lb as 2002::/16 2001::2 2001::3 2001::4
+ lb as 2003::/16 10.0.0.1 10.0.0.2
+ lb as 80.0.0.0/8 2001::2
+ lb as 90.0.0.0/8 10.0.0.1
+
+Configure SNAT
+~~~~~~~~~~~~~~
+
+::
+
+ lb set interface nat4 in <intfc> [del]
+
+Set SNAT feature in a specific interface. (applicable in NAT4 mode only)
+
+::
+
+ lb set interface nat6 in <intfc> [del]
+
+Set SNAT feature in a specific interface. (applicable in NAT6 mode only)
+
+Monitoring
+----------
+
+The plugin provides quite a bunch of counters and information. These are
+still subject to quite significant changes.
+
+::
+
+ show lb
+ show lb vip
+ show lb vip verbose
+
+ show node counters
+
+Design notes
+------------
+
+Multi-Threading
+~~~~~~~~~~~~~~~
+
+MagLev is a distributed system which pseudo-randomly generates a
+new-connections-table based on AS names such that each server configured
+with the same set of ASs ends up with the same table. Connection
+stickiness is then ensured with an established-connections-table. Using
+ECMP, it is assumed (but not relied on) that servers will mostly receive
+traffic for different flows.
+
+This implementation pushes the parallelism a little bit further by using
+one established-connections table per thread. This is equivalent to
+assuming that RSS will make a job similar to ECMP, and is pretty useful
+as threads don’t need to get a lock in order to write in the table.
+
+Hash Table
+~~~~~~~~~~
+
+A load balancer requires an efficient read and write hash table. The
+hash table used by ip6-forward is very read-efficient, but not so much
+for writing. In addition, it is not a big deal if writing into the hash
+table fails (again, MagLev uses a flow table but does not heavily
+relies on it).
+
+The plugin therefore uses a very specific (and stupid) hash table. -
+Fixed (and power of 2) number of buckets (configured at runtime) - Fixed
+(and power of 2) elements per buckets (configured at compilation time)
+
+Reference counting
+~~~~~~~~~~~~~~~~~~
+
+When an AS is removed, there is two possible ways to react. - Keep using
+the AS for established connections - Change AS for established
+connections (likely to cause error for TCP)
+
+In the first case, although an AS is removed from the configuration, its
+associated state needs to stay around as long as it is used by at least
+one thread.
+
+In order to avoid locks, a specific reference counter is used. The
+design is quite similar to clib counters but: - It is possible to
+decrease the value - Summing will not zero the per-thread counters -
+Only the thread can reallocate its own counters vector (to avoid
+concurrency issues)
+
+This reference counter is lock free, but reading a count of 0 does not
+mean the value can be freed unless it is ensured by *other* means that
+no other thread is concurrently referencing the object. In the case of
+this plugin, it is assumed that no concurrent event will take place
+after a few seconds.
diff --git a/src/plugins/lb/lb_test.c b/src/plugins/lb/lb_test.c
index 80fc38e2746..f64bdd220b5 100644
--- a/src/plugins/lb/lb_test.c
+++ b/src/plugins/lb/lb_test.c
@@ -207,6 +207,105 @@ static int api_lb_add_del_vip (vat_main_t * vam)
return ret;
}
+static int
+api_lb_add_del_vip_v2 (vat_main_t *vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_lb_add_del_vip_v2_t *mp;
+ int ret;
+ ip46_address_t ip_prefix;
+ u8 prefix_length = 0;
+ u8 protocol = 0;
+ u32 port = 0;
+ u32 encap = 0;
+ u32 dscp = ~0;
+ u32 srv_type = LB_SRV_TYPE_CLUSTERIP;
+ u32 target_port = 0;
+ u32 new_length = 1024;
+ u8 src_ip_sticky = 0;
+ int is_del = 0;
+
+ if (!unformat (line_input, "%U", unformat_ip46_prefix, &ip_prefix,
+ &prefix_length, IP46_TYPE_ANY, &prefix_length))
+ {
+ errmsg ("lb_add_del_vip: invalid vip prefix\n");
+ return -99;
+ }
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "new_len %d", &new_length))
+ ;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "src_ip_sticky"))
+ src_ip_sticky = 1;
+ else if (unformat (line_input, "protocol tcp"))
+ {
+ protocol = IP_PROTOCOL_TCP;
+ }
+ else if (unformat (line_input, "protocol udp"))
+ {
+ protocol = IP_PROTOCOL_UDP;
+ }
+ else if (unformat (line_input, "port %d", &port))
+ ;
+ else if (unformat (line_input, "encap gre4"))
+ encap = LB_ENCAP_TYPE_GRE4;
+ else if (unformat (line_input, "encap gre6"))
+ encap = LB_ENCAP_TYPE_GRE6;
+ else if (unformat (line_input, "encap l3dsr"))
+ encap = LB_ENCAP_TYPE_L3DSR;
+ else if (unformat (line_input, "encap nat4"))
+ encap = LB_ENCAP_TYPE_NAT4;
+ else if (unformat (line_input, "encap nat6"))
+ encap = LB_ENCAP_TYPE_NAT6;
+ else if (unformat (line_input, "dscp %d", &dscp))
+ ;
+ else if (unformat (line_input, "type clusterip"))
+ srv_type = LB_SRV_TYPE_CLUSTERIP;
+ else if (unformat (line_input, "type nodeport"))
+ srv_type = LB_SRV_TYPE_NODEPORT;
+ else if (unformat (line_input, "target_port %d", &target_port))
+ ;
+ else
+ {
+ errmsg ("invalid arguments\n");
+ return -99;
+ }
+ }
+
+ if ((encap != LB_ENCAP_TYPE_L3DSR) && (dscp != ~0))
+ {
+ errmsg ("lb_vip_add error: should not configure dscp for none L3DSR.");
+ return -99;
+ }
+
+ if ((encap == LB_ENCAP_TYPE_L3DSR) && (dscp >= 64))
+ {
+ errmsg ("lb_vip_add error: dscp for L3DSR should be less than 64.");
+ return -99;
+ }
+
+ M (LB_ADD_DEL_VIP, mp);
+ ip_address_encode (&ip_prefix, IP46_TYPE_ANY, &mp->pfx.address);
+ mp->pfx.len = prefix_length;
+ mp->protocol = (u8) protocol;
+ mp->port = htons ((u16) port);
+ mp->encap = (u8) encap;
+ mp->dscp = (u8) dscp;
+ mp->type = (u8) srv_type;
+ mp->target_port = htons ((u16) target_port);
+ mp->node_port = htons ((u16) target_port);
+ mp->new_flows_table_length = htonl (new_length);
+ mp->is_del = is_del;
+ mp->src_ip_sticky = src_ip_sticky;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
static int api_lb_add_del_as (vat_main_t * vam)
{
diff --git a/src/plugins/lb/lb_types.api b/src/plugins/lb/lb_types.api
index 3378a5fec4f..a6e1980b6be 100644
--- a/src/plugins/lb/lb_types.api
+++ b/src/plugins/lb/lb_types.api
@@ -28,9 +28,9 @@ enum lb_encap_type
LB_API_ENCAP_TYPE_GRE4 = 0,
LB_API_ENCAP_TYPE_GRE6 = 1,
LB_API_ENCAP_TYPE_L3DSR = 2,
- LB_API_ENCAP_TYPE_NAT4 = 3 ,
- LB_API_ENCAP_TYPE_NAT6 =4,
- LB_API_ENCAP_N_TYPES =5,
+ LB_API_ENCAP_TYPE_NAT4 = 3,
+ LB_API_ENCAP_TYPE_NAT6 = 4,
+ LB_API_ENCAP_N_TYPES = 5,
};
/* Lookup types */
@@ -38,8 +38,8 @@ enum lb_lkp_type_t
{
LB_API_LKP_SAME_IP_PORT = 0,
LB_API_LKP_DIFF_IP_PORT = 1,
- LB_API_LKP_ALL_PORT_IP =2,
- LB_API_LKP_N_TYPES =3,
+ LB_API_LKP_ALL_PORT_IP = 2,
+ LB_API_LKP_N_TYPES = 3,
};
enum lb_vip_type
diff --git a/src/plugins/lb/lbhash.h b/src/plugins/lb/lbhash.h
index f822d79ded8..8253e9d52f0 100644
--- a/src/plugins/lb/lbhash.h
+++ b/src/plugins/lb/lbhash.h
@@ -88,8 +88,7 @@ lb_hash_t *lb_hash_alloc(u32 buckets, u32 timeout)
sizeof(lb_hash_bucket_t) * (buckets + 1);
u8 *mem = 0;
lb_hash_t *h;
- vec_alloc_aligned(mem, size, CLIB_CACHE_LINE_BYTES);
- clib_memset(mem, 0, size);
+ vec_validate_aligned (mem, size - 1, CLIB_CACHE_LINE_BYTES);
h = (lb_hash_t *)mem;
h->buckets_mask = (buckets - 1);
h->timeout = timeout;
diff --git a/src/plugins/lb/node.c b/src/plugins/lb/node.c
index b5e9da71376..a37fe11a9b4 100644
--- a/src/plugins/lb/node.c
+++ b/src/plugins/lb/node.c
@@ -174,26 +174,22 @@ lb_node_get_other_ports6 (ip6_header_t *ip60)
}
static_always_inline void
-lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4,
- u32 *hash, u32 *vip_idx, u8 per_port_vip)
+lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4, u32 *hash,
+ u32 *vip_idx, u8 per_port_vip)
{
vip_port_key_t key;
clib_bihash_kv_8_8_t kv, value;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ lb_vip_t *vip0;
+ u64 ports;
/* For vip case, retrieve vip index for ip lookup */
*vip_idx = vnet_buffer (p)->ip.adj_index[VLIB_TX];
- if (per_port_vip)
- {
- /* For per-port-vip case, ip lookup stores placeholder index */
- key.vip_prefix_index = *vip_idx;
- }
-
+ /* Extract the L4 port number from the packet */
if (is_input_v4)
{
- ip4_header_t *ip40;
- u64 ports;
-
ip40 = vlib_buffer_get_current (p);
if (PREDICT_TRUE(
ip40->protocol == IP_PROTOCOL_TCP
@@ -202,20 +198,10 @@ lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4,
| ((u64) ((udp_header_t *) (ip40 + 1))->dst_port);
else
ports = lb_node_get_other_ports4 (ip40);
-
- *hash = lb_hash_hash (*((u64 *) &ip40->address_pair), ports, 0, 0, 0);
-
- if (per_port_vip)
- {
- key.protocol = ip40->protocol;
- key.port = (u16)(ports & 0xFFFF);
- }
}
else
{
- ip6_header_t *ip60;
ip60 = vlib_buffer_get_current (p);
- u64 ports;
if (PREDICT_TRUE(
ip60->protocol == IP_PROTOCOL_TCP
@@ -224,33 +210,68 @@ lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4,
| ((u64) ((udp_header_t *) (ip60 + 1))->dst_port);
else
ports = lb_node_get_other_ports6 (ip60);
-
- *hash = lb_hash_hash (ip60->src_address.as_u64[0],
- ip60->src_address.as_u64[1],
- ip60->dst_address.as_u64[0],
- ip60->dst_address.as_u64[1], ports);
-
- if (per_port_vip)
- {
- key.protocol = ip60->protocol;
- key.port = (u16)(ports & 0xFFFF);
- }
}
- /* For per-port-vip case, retrieve vip index for vip_port_filter table */
if (per_port_vip)
{
+ /* For per-port-vip case, ip lookup stores placeholder index */
+ key.vip_prefix_index = *vip_idx;
+ key.port = (u16) (ports & 0xFFFF);
+ key.rsv = 0;
+ if (is_input_v4)
+ {
+ key.protocol = ip40->protocol;
+ }
+ else
+ {
+ key.protocol = ip60->protocol;
+ }
+
+ /* For per-port-vip case, retrieve vip index for vip_port_filter table */
kv.key = key.as_u64;
- if (clib_bihash_search_8_8(&lbm->vip_index_per_port, &kv, &value) < 0)
- {
- /* return default vip */
- *vip_idx = 0;
- return;
- }
- *vip_idx = value.value;
+ if (clib_bihash_search_8_8 (&lbm->vip_index_per_port, &kv, &value) < 0)
+ {
+ /* Set default vip */
+ *vip_idx = 0;
+ }
+ else
+ {
+ *vip_idx = value.value;
+ }
+ }
+
+ vip0 = pool_elt_at_index (lbm->vips, *vip_idx);
+
+ if (is_input_v4)
+ {
+ if (lb_vip_is_src_ip_sticky (vip0))
+ {
+ *hash = lb_hash_hash (*((u64 *) &ip40->address_pair), 0, 0, 0, 0);
+ }
+ else
+ {
+ *hash =
+ lb_hash_hash (*((u64 *) &ip40->address_pair), ports, 0, 0, 0);
+ }
+ }
+ else
+ {
+ if (lb_vip_is_src_ip_sticky (vip0))
+ {
+ *hash = lb_hash_hash (
+ ip60->src_address.as_u64[0], ip60->src_address.as_u64[1],
+ ip60->dst_address.as_u64[0], ip60->dst_address.as_u64[1], 0);
+ }
+ else
+ {
+ *hash = lb_hash_hash (
+ ip60->src_address.as_u64[0], ip60->src_address.as_u64[1],
+ ip60->dst_address.as_u64[0], ip60->dst_address.as_u64[1], ports);
+ }
}
}
+/* clang-format off */
static_always_inline uword
lb_node_fn (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -565,6 +586,7 @@ lb_node_fn (vlib_main_t * vm,
return frame->n_vectors;
}
+/* clang-format on */
u8 *
format_nodeport_lb_trace (u8 * s, va_list * args)
diff --git a/src/plugins/linux-cp/CMakeLists.txt b/src/plugins/linux-cp/CMakeLists.txt
index 080f73ecce4..c891689b4b4 100644
--- a/src/plugins/linux-cp/CMakeLists.txt
+++ b/src/plugins/linux-cp/CMakeLists.txt
@@ -12,12 +12,18 @@
# limitations under the License.
vpp_find_path(LIBNL3_INCLUDE_DIR NAMES libnl3/netlink/route/link/vlan.h)
+vpp_find_path(LIBMNL_INCLUDE_DIR NAMES libmnl/libmnl.h)
if (NOT LIBNL3_INCLUDE_DIR)
message(WARNING "-- libnl3 headers not found - linux-cp plugin disabled")
return()
endif()
+if (NOT LIBMNL_INCLUDE_DIR)
+ message(WARNING "-- libmnl headers not found - linux-cp plugin disabled")
+ return()
+endif()
+
vpp_plugin_find_library(linux-cp LIBNL3_LIB libnl-3.so)
vpp_plugin_find_library(linux-cp LIBNL3_ROUTE_LIB libnl-route-3.so.200)
@@ -27,6 +33,8 @@ include_directories(${LIBMNL_INCLUDE_DIR})
add_vpp_library(lcp
SOURCES
lcp_interface.c
+ lcp_interface_sync.c
+ lcp_mpls_sync.c
lcp_adj.c
lcp.c
@@ -59,3 +67,12 @@ add_vpp_plugin(linux_cp_unittest
LINK_LIBRARIES
lcp
)
+
+add_vpp_plugin(linux_nl
+ SOURCES
+ lcp_router.c
+ lcp_nl.c
+
+ LINK_LIBRARIES
+ lcp
+)
diff --git a/src/plugins/linux-cp/FEATURE.yaml b/src/plugins/linux-cp/FEATURE.yaml
index 088b0606f58..425858591f2 100644
--- a/src/plugins/linux-cp/FEATURE.yaml
+++ b/src/plugins/linux-cp/FEATURE.yaml
@@ -3,10 +3,10 @@ name: Linux Control Plane (integration)
maintainer: Neale Ranns <neale@grahpiant.com>
description: |-
- This plugin provides the beginnings of an integration with the
- Linux network stack.
- The plugin provides the capability to 'mirror' VPP interfaces in
- the Linux kernel. This means that for any interface in VPP the user
+ These plugins provide an integration with the Linux network stack.
+
+ The "linux_cp" plugin provides the capability to 'mirror' VPP interfaces
+ in the Linux kernel. This means that for any interface in VPP the user
can create a corresponding TAP or TUN device in the Linux kernel
and have VPP plumb them together.
The plumbing mechanics is different in each direction.
@@ -17,8 +17,10 @@ description: |-
In the TX direction, packets received by VPP an the mirror Tap/Tun
are cross-connected to the VPP interfaces. For IP packets, IP output
features are applied.
- This is the beginnings of integration, because there needs to be
- an external agent that will configure (and synchronize) the IP
+ If MPLS is enabled on a VPP interface, state is synced to Linux and
+ in TX direction a special feature is enabled to pass MPLS packets through
+ untouched.
+ The "linux_nl" plugin listens to netlink messages and synchronizes the IP
configuration of the paired interfaces.
state: experimental
diff --git a/src/plugins/linux-cp/lcp.api b/src/plugins/linux-cp/lcp.api
index 319dd3e6483..e7eaa5a3669 100644
--- a/src/plugins/linux-cp/lcp.api
+++ b/src/plugins/linux-cp/lcp.api
@@ -21,19 +21,20 @@ option version = "1.0.0";
import "vnet/interface_types.api";
-/** \brief Set the default Linux Control Plane namespace
+/** \brief Set the default Linux Control Plane netns
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param namespace - the new default namespace; namespace[0] == 0 iff none
+ @param netns - the new default netns; netns[0] == 0 if none
*/
autoreply define lcp_default_ns_set
{
u32 client_index;
u32 context;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
+ option in_progress;
};
-/** \brief get the default Linux Control Plane namespace
+/** \brief get the default Linux Control Plane netns
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
*/
@@ -43,15 +44,16 @@ define lcp_default_ns_get
u32 context;
};
-/** \brief get the default Linux Control Plane namespace
+/** \brief get the default Linux Control Plane netns
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param namespace - the default namespace; namespace[0] == 0 iff none
+ @param netns - the default netns; netns[0] == 0 if none
*/
define lcp_default_ns_get_reply
{
u32 context;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
+ option in_progress;
};
enum lcp_itf_host_type : u8
@@ -67,32 +69,59 @@ enum lcp_itf_host_type : u8
@param sw_if_index - index of VPP PHY SW interface
@param host_if_name - host tap interface name
@param host_if_type - the type of host interface to create (tun, tap)
- @param namespace - optional tap namespace; namespace[0] == 0 iff none
+ @param netns - optional tap netns; netns[0] == 0 if none
*/
autoreply autoendian define lcp_itf_pair_add_del
{
+ option deprecated;
+
u32 client_index;
u32 context;
bool is_add;
vl_api_interface_index_t sw_if_index;
string host_if_name[16]; /* IFNAMSIZ */
vl_api_lcp_itf_host_type_t host_if_type;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
};
autoendian define lcp_itf_pair_add_del_v2
{
+ option in_progress;
+
u32 client_index;
u32 context;
bool is_add;
vl_api_interface_index_t sw_if_index;
string host_if_name[16]; /* IFNAMSIZ */
vl_api_lcp_itf_host_type_t host_if_type;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
};
define lcp_itf_pair_add_del_v2_reply
{
+ option in_progress;
+
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t host_sw_if_index;
+};
+autoendian define lcp_itf_pair_add_del_v3
+{
+ option in_progress;
+
+ u32 client_index;
+ u32 context;
+ bool is_add;
+ vl_api_interface_index_t sw_if_index;
+ string host_if_name[16]; /* IFNAMSIZ */
+ vl_api_lcp_itf_host_type_t host_if_type;
+ string netns[32]; /* LCP_NS_LEN */
+};
+define lcp_itf_pair_add_del_v3_reply
+{
+ option in_progress;
+
u32 context;
i32 retval;
+ u32 vif_index;
vl_api_interface_index_t host_sw_if_index;
};
@@ -101,13 +130,26 @@ define lcp_itf_pair_add_del_v2_reply
@param context - sender context, to match reply w/ request
@param sw_if_index - interface to use as filter (~0 == "all")
*/
-define lcp_itf_pair_get
+autoendian define lcp_itf_pair_get
{
u32 client_index;
u32 context;
u32 cursor;
};
-define lcp_itf_pair_get_reply
+autoendian define lcp_itf_pair_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+autoendian define lcp_itf_pair_get_v2
+{
+ u32 client_index;
+ u32 context;
+ u32 cursor;
+ vl_api_interface_index_t sw_if_index;
+};
+autoendian define lcp_itf_pair_get_v2_reply
{
u32 context;
i32 retval;
@@ -121,7 +163,7 @@ define lcp_itf_pair_get_reply
@param vif_index - tap linux index
@param host_if_name - host interface name
@param host_if_type - host interface type (tun, tap)
- @param namespace - host interface namespace
+ @param netns - host interface netns
*/
autoendian define lcp_itf_pair_details
{
@@ -131,7 +173,8 @@ autoendian define lcp_itf_pair_details
u32 vif_index;
string host_if_name[16]; /* IFNAMSIZ */
vl_api_lcp_itf_host_type_t host_if_type;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
+ option in_progress;
};
service {
@@ -139,6 +182,11 @@ service {
stream lcp_itf_pair_details;
};
+service {
+ rpc lcp_itf_pair_get_v2 returns lcp_itf_pair_get_v2_reply
+ stream lcp_itf_pair_details;
+};
+
/** \brief Replace end/begin
*/
autoreply define lcp_itf_pair_replace_begin
diff --git a/src/plugins/linux-cp/lcp.c b/src/plugins/linux-cp/lcp.c
index f4c491c9cb3..34e8550a13f 100644
--- a/src/plugins/linux-cp/lcp.c
+++ b/src/plugins/linux-cp/lcp.c
@@ -20,6 +20,7 @@
#include <net/if.h>
#include <plugins/linux-cp/lcp.h>
+#include <plugins/linux-cp/lcp_interface.h>
lcp_main_t lcp_main;
@@ -28,8 +29,9 @@ lcp_get_default_ns (void)
{
lcp_main_t *lcpm = &lcp_main;
- if (lcpm->default_namespace[0] == 0)
- return 0;
+ if (!lcpm->default_namespace || lcpm->default_namespace[0] == 0)
+ return NULL;
+
return lcpm->default_namespace;
}
@@ -59,16 +61,15 @@ lcp_set_default_ns (u8 *ns)
if (!p || *p == 0)
{
- clib_memset (lcpm->default_namespace, 0,
- sizeof (lcpm->default_namespace));
+ lcpm->default_namespace = NULL;
if (lcpm->default_ns_fd > 0)
close (lcpm->default_ns_fd);
lcpm->default_ns_fd = 0;
return 0;
}
- clib_strncpy ((char *) lcpm->default_namespace, p, LCP_NS_LEN - 1);
-
+ vec_validate_init_c_string (lcpm->default_namespace, p,
+ clib_strnlen (p, LCP_NS_LEN));
s = format (0, "/var/run/netns/%s%c", (char *) lcpm->default_namespace, 0);
lcpm->default_ns_fd = open ((char *) s, O_RDONLY);
vec_free (s);
@@ -76,6 +77,112 @@ lcp_set_default_ns (u8 *ns)
return 0;
}
+void
+lcp_set_sync (u8 is_auto)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->lcp_sync = (is_auto != 0);
+
+ // If we set to 'on', do a one-off sync of LCP interfaces
+ if (is_auto)
+ lcp_itf_pair_sync_state_all ();
+}
+
+int
+lcp_sync (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->lcp_sync;
+}
+
+void
+lcp_set_auto_subint (u8 is_auto)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->lcp_auto_subint = (is_auto != 0);
+}
+
+int
+lcp_auto_subint (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->lcp_auto_subint;
+}
+
+void
+lcp_set_del_static_on_link_down (u8 is_del)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->del_static_on_link_down = (is_del != 0);
+}
+
+u8
+lcp_get_del_static_on_link_down (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->del_static_on_link_down;
+}
+
+void
+lcp_set_del_dynamic_on_link_down (u8 is_del)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->del_dynamic_on_link_down = (is_del != 0);
+}
+
+u8
+lcp_get_del_dynamic_on_link_down (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->del_dynamic_on_link_down;
+}
+
+void
+lcp_set_netlink_processing_active (u8 is_processing)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->netlink_processing_active = (is_processing != 0);
+}
+
+u8
+lcp_get_netlink_processing_active (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->netlink_processing_active;
+}
+
+void
+lcp_set_default_num_queues (u16 num_queues, u8 is_tx)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ if (is_tx)
+ lcpm->num_tx_queues = num_queues;
+ else
+ lcpm->num_rx_queues = num_queues;
+}
+
+u16
+lcp_get_default_num_queues (u8 is_tx)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ if (is_tx)
+ return lcpm->num_tx_queues;
+
+ return lcpm->num_rx_queues ?: vlib_num_workers ();
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/linux-cp/lcp.h b/src/plugins/linux-cp/lcp.h
index 7fdad3798bc..e89b149f67d 100644
--- a/src/plugins/linux-cp/lcp.h
+++ b/src/plugins/linux-cp/lcp.h
@@ -22,11 +22,17 @@
typedef struct lcp_main_s
{
u16 msg_id_base; /* API message ID base */
- u8 default_namespace[LCP_NS_LEN]; /* default namespace if set */
+ u8 *default_namespace; /* default namespace if set */
int default_ns_fd;
- u8 auto_intf;
- /* Set when Unit testing */
- u8 test_mode;
+ u8 lcp_auto_subint; /* Automatically create/delete LCP sub-interfaces */
+ u8 lcp_sync; /* Automatically sync VPP changes to LCP */
+ u8 del_static_on_link_down; /* Delete static routes when link goes down */
+ u8 del_dynamic_on_link_down; /* Delete dynamic routes when link goes down */
+ u16 num_rx_queues;
+ u16 num_tx_queues;
+ u8 test_mode; /* Set when Unit testing */
+ u8 netlink_processing_active; /* Set while a batch of Netlink messages are
+ being processed */
} lcp_main_t;
extern lcp_main_t lcp_main;
@@ -38,6 +44,31 @@ int lcp_set_default_ns (u8 *ns);
u8 *lcp_get_default_ns (void); /* Returns NULL or shared string */
int lcp_get_default_ns_fd (void);
+/**
+ * Get/Set whether to delete static routes when the link goes down.
+ */
+void lcp_set_del_static_on_link_down (u8 is_del);
+u8 lcp_get_del_static_on_link_down (void);
+
+/**
+ * Get/Set whether to delete dynamic routes when the link goes down.
+ */
+void lcp_set_del_dynamic_on_link_down (u8 is_del);
+u8 lcp_get_del_dynamic_on_link_down (void);
+
+/**
+ * Get/Set when we're processing a batch of netlink messages.
+ * This is used to avoid looping messages between lcp-sync and netlink.
+ */
+void lcp_set_netlink_processing_active (u8 is_processing);
+u8 lcp_get_netlink_processing_active (void);
+
+/**
+ * Get/Set the default queue number for LCP host taps.
+ */
+void lcp_set_default_num_queues (u16 num_queues, u8 is_tx);
+u16 lcp_get_default_num_queues (u8 is_tx);
+
#endif
/*
diff --git a/src/plugins/linux-cp/lcp.rst b/src/plugins/linux-cp/lcp.rst
index 6d81901cf7b..6f82a29bfbb 100644
--- a/src/plugins/linux-cp/lcp.rst
+++ b/src/plugins/linux-cp/lcp.rst
@@ -8,9 +8,9 @@ Linux Control Plane Integration
Overview
________
-This plugin allows VPP to integrate with the Linux. The
+This plugin allows VPP to integrate with the Linux kernel. The
general model is that Linux is the network stack, i.e. it has the
-control plane protocols, like ARP, IPv6 ND/MLD, Ping, etc, and VPP
+control plane protocols, like ARP, IPv6 ND/MLD, ping, etc, and VPP
provides a SW based ASIC for forwarding.
Interfaces
@@ -20,16 +20,17 @@ VPP owns the interfaces in the system; physical (.e.g PCI), quasi
physical (e.g. vhost), or virtual (e.g. tunnel). However,
for the Linux networking stack to function it needs a representation
of these interfaces; it needs a mirror image in the kernel. For this
-mirror we use a Tap interface, if the VPP interface is multi-point, a
-Tun if it's point-to-point. A physical and its mirror form an
+mirror we use a TAP interface, if the VPP interface is multi-point, a
+TUN if it's point-to-point. A physical and its mirror form an
interface 'pair'.
-The host interface has two identities; the sw_if_index of the Tap and
-the virtual interface index in the kernel. It may be in a Linux namespace.
+The host interface has two identities; the sw_if_index of the TAP and
+the virtual interface index in the kernel. It may be in a Linux network
+namespace.
The creation of the interface pairs is required from the control
plane. It can be statically configured in the VPP startup
-configuration file. The intent here was to make the pair creation
+configuration file. The intent here is to make the pair creation
explicit, rather than have VPP guess which of the interfaces it owns
require a mirror.
@@ -41,27 +42,23 @@ interfaces. Any configuration that is made on these Linux interfaces,
also needs to be applied on the corresponding physical interface in
VPP.
-This is functionality is not provided in this plugin, but it can be
-achieved in various ways, for example by listening to the netlink
-messages and applying the config. As a result all e.g. routes
-programmed in Linux, will also be present in VPP's FIB.
+This is functionality is provided by the "linux_nl" plugin.
-Linux will own the [ARP/ND] nieghbor tables (which will be copied via
+Linux will own the [ARP/ND] neighbor tables (which will be copied via
netlink to VPP also). This means that Linux will send packets with the
peer's MAC address in the rewrite to VPP. The receiving TAP interface
must therefore be in promiscuous mode.
-
Forwarding
__________
The basic principle is to x-connect traffic from a Linux host interface
-(received on the Tap/Tun) to its paired the physical, and vice-versa.
+(received on the tap/tun) to its paired the physical, and vice-versa.
Host to Physical
^^^^^^^^^^^^^^^^
-All packets sent by the host, and received by VPP on a Tap/Tun should
+All packets sent by the host, and received by VPP on a tap/tun should
be sent to its paired physical interface. However, they should be sent
with the same consequences as if they had originated from VPP,
i.e. they should be subject to all output features on the physical
@@ -73,17 +70,18 @@ adjacency that VPP would have used to send this packet; this adjacency
is stored in the buffer's meta data so that it is available to all
output features. Then the packet is sent through the physical
interface's IP output feature arc.
+
All ARP packets are x-connected from the tap to the physical.
Physical to Host
^^^^^^^^^^^^^^^^
All ARP packets received on the physical are sent to the paired
-Tap. This allows the Linux network stack to build the nieghbour table.
+tap. This allows the Linux network stack to build the neighbor table.
IP packets that are punted are sent to the host. They are sent on the
tap that is paired with the physical on which they were originally
-received. The packet is sent on the Tap/Tun 'exactly' as it was
+received. The packet is sent on the tap/tun 'exactly' as it was
received (i.e. with the L2 rewrite) but post any translations that
input features may have made.
@@ -92,5 +90,4 @@ Recommendations
^^^^^^^^^^^^^^^
When using this plugin disable the ARP, ND, IGMP plugins; this is the
-task for Linux.
-Disable ping plugin, since Linux will now respond.
+task for Linux. Disable ping plugin, since Linux will now respond.
diff --git a/src/plugins/linux-cp/lcp_adj.c b/src/plugins/linux-cp/lcp_adj.c
index bfbc2fec913..b10c70616b5 100644
--- a/src/plugins/linux-cp/lcp_adj.c
+++ b/src/plugins/linux-cp/lcp_adj.c
@@ -185,8 +185,8 @@ lcp_adj_show_cmd (vlib_main_t *vm, unformat_input_t *input,
if (unformat (input, "verbose"))
verbose = 1;
- vlib_cli_output (vm, "Linux-CP Adjs:\n%U", BV (format_bihash), &lcp_adj_tbl,
- verbose);
+ vlib_cli_output (vm, "linux-cp adjacencies:\n%U", BV (format_bihash),
+ &lcp_adj_tbl, verbose);
return 0;
}
@@ -210,7 +210,7 @@ lcp_adj_init (vlib_main_t *vm)
{
adj_type = adj_delegate_register_new_type (&lcp_adj_vft);
- BV (clib_bihash_init) (&lcp_adj_tbl, "linux-cp ADJ table", 1024, 1 << 24);
+ BV (clib_bihash_init) (&lcp_adj_tbl, "linux-cp adjacencies", 1024, 1 << 24);
BV (clib_bihash_set_kvp_format_fn) (&lcp_adj_tbl, format_lcp_adj_kvp);
return (NULL);
diff --git a/src/plugins/linux-cp/lcp_api.c b/src/plugins/linux-cp/lcp_api.c
index c9aa01566c6..74421230e9d 100644
--- a/src/plugins/linux-cp/lcp_api.c
+++ b/src/plugins/linux-cp/lcp_api.c
@@ -41,27 +41,11 @@ api_encode_host_type (lip_host_type_t type)
return LCP_API_ITF_HOST_TAP;
}
-void
-lcp_set_auto_intf (u8 is_auto)
-{
- lcp_main_t *lcpm = &lcp_main;
-
- lcpm->auto_intf = (is_auto != 0);
-}
-
-int
-lcp_auto_intf (void)
-{
- lcp_main_t *lcpm = &lcp_main;
-
- return lcpm->auto_intf;
-}
-
static int
vl_api_lcp_itf_pair_add (u32 phy_sw_if_index, lip_host_type_t lip_host_type,
u8 *mp_host_if_name, size_t sizeof_host_if_name,
u8 *mp_namespace, size_t sizeof_mp_namespace,
- u32 *host_sw_if_index_p)
+ u32 *host_sw_if_index_p, u32 *vif_index_p)
{
u8 *host_if_name, *netns;
int host_len, netns_len, rv;
@@ -80,6 +64,13 @@ vl_api_lcp_itf_pair_add (u32 phy_sw_if_index, lip_host_type_t lip_host_type,
rv = lcp_itf_pair_create (phy_sw_if_index, host_if_name, lip_host_type,
netns, host_sw_if_index_p);
+ if (!rv && (vif_index_p != NULL))
+ {
+ lcp_itf_pair_t *pair =
+ lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index));
+ *vif_index_p = pair->lip_vif_index;
+ }
+
vec_free (host_if_name);
vec_free (netns);
@@ -94,20 +85,15 @@ vl_api_lcp_itf_pair_add_del_t_handler (vl_api_lcp_itf_pair_add_del_t *mp)
lip_host_type_t lip_host_type;
int rv;
- if (!vnet_sw_if_index_is_api_valid (mp->sw_if_index))
- {
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto bad_sw_if_index;
- }
+ VALIDATE_SW_IF_INDEX_END (mp);
phy_sw_if_index = mp->sw_if_index;
lip_host_type = api_decode_host_type (mp->host_if_type);
if (mp->is_add)
{
- rv =
- vl_api_lcp_itf_pair_add (phy_sw_if_index, lip_host_type,
- mp->host_if_name, sizeof (mp->host_if_name),
- mp->namespace, sizeof (mp->namespace), NULL);
+ rv = vl_api_lcp_itf_pair_add (
+ phy_sw_if_index, lip_host_type, mp->host_if_name,
+ sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns), NULL, NULL);
}
else
{
@@ -115,7 +101,7 @@ vl_api_lcp_itf_pair_add_del_t_handler (vl_api_lcp_itf_pair_add_del_t *mp)
}
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO (VL_API_LCP_ITF_PAIR_ADD_DEL_REPLY);
+ REPLY_MACRO_END (VL_API_LCP_ITF_PAIR_ADD_DEL_REPLY);
}
static void
@@ -126,20 +112,45 @@ vl_api_lcp_itf_pair_add_del_v2_t_handler (vl_api_lcp_itf_pair_add_del_v2_t *mp)
lip_host_type_t lip_host_type;
int rv;
- if (!vnet_sw_if_index_is_api_valid (mp->sw_if_index))
+ VALIDATE_SW_IF_INDEX_END (mp);
+
+ phy_sw_if_index = mp->sw_if_index;
+ lip_host_type = api_decode_host_type (mp->host_if_type);
+ if (mp->is_add)
{
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto bad_sw_if_index;
+ rv = vl_api_lcp_itf_pair_add (
+ phy_sw_if_index, lip_host_type, mp->host_if_name,
+ sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns),
+ &host_sw_if_index, NULL);
}
+ else
+ {
+ rv = lcp_itf_pair_delete (phy_sw_if_index);
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO2_END (VL_API_LCP_ITF_PAIR_ADD_DEL_V2_REPLY,
+ { rmp->host_sw_if_index = host_sw_if_index; });
+}
+
+static void
+vl_api_lcp_itf_pair_add_del_v3_t_handler (vl_api_lcp_itf_pair_add_del_v3_t *mp)
+{
+ u32 phy_sw_if_index, host_sw_if_index = ~0, vif_index = ~0;
+ vl_api_lcp_itf_pair_add_del_v3_reply_t *rmp;
+ lip_host_type_t lip_host_type;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX_END (mp);
phy_sw_if_index = mp->sw_if_index;
lip_host_type = api_decode_host_type (mp->host_if_type);
if (mp->is_add)
{
- rv = vl_api_lcp_itf_pair_add (phy_sw_if_index, lip_host_type,
- mp->host_if_name,
- sizeof (mp->host_if_name), mp->namespace,
- sizeof (mp->namespace), &host_sw_if_index);
+ rv = vl_api_lcp_itf_pair_add (
+ phy_sw_if_index, lip_host_type, mp->host_if_name,
+ sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns),
+ &host_sw_if_index, &vif_index);
}
else
{
@@ -147,8 +158,10 @@ vl_api_lcp_itf_pair_add_del_v2_t_handler (vl_api_lcp_itf_pair_add_del_v2_t *mp)
}
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO2 (VL_API_LCP_ITF_PAIR_ADD_DEL_V2_REPLY,
- { rmp->host_sw_if_index = ntohl (host_sw_if_index); });
+ REPLY_MACRO2_END (VL_API_LCP_ITF_PAIR_ADD_DEL_V3_REPLY, ({
+ rmp->host_sw_if_index = host_sw_if_index;
+ rmp->vif_index = vif_index;
+ }));
}
static void
@@ -158,7 +171,7 @@ send_lcp_itf_pair_details (index_t lipi, vl_api_registration_t *rp,
vl_api_lcp_itf_pair_details_t *rmp;
lcp_itf_pair_t *lcp_pair = lcp_itf_pair_get (lipi);
- REPLY_MACRO_DETAILS4 (
+ REPLY_MACRO_DETAILS4_END (
VL_API_LCP_ITF_PAIR_DETAILS, rp, context, ({
rmp->phy_sw_if_index = lcp_pair->lip_phy_sw_if_index;
rmp->host_sw_if_index = lcp_pair->lip_host_sw_if_index;
@@ -167,9 +180,11 @@ send_lcp_itf_pair_details (index_t lipi, vl_api_registration_t *rp,
memcpy_s (rmp->host_if_name, sizeof (rmp->host_if_name),
lcp_pair->lip_host_name, vec_len (lcp_pair->lip_host_name));
+ rmp->host_if_name[vec_len (lcp_pair->lip_host_name)] = 0;
- clib_strncpy ((char *) rmp->namespace, (char *) lcp_pair->lip_namespace,
- vec_len (lcp_pair->lip_namespace));
+ memcpy_s (rmp->netns, sizeof (rmp->netns), lcp_pair->lip_namespace,
+ vec_len (lcp_pair->lip_namespace));
+ rmp->netns[vec_len (lcp_pair->lip_namespace)] = 0;
}));
}
@@ -179,19 +194,51 @@ vl_api_lcp_itf_pair_get_t_handler (vl_api_lcp_itf_pair_get_t *mp)
vl_api_lcp_itf_pair_get_reply_t *rmp;
i32 rv = 0;
- REPLY_AND_DETAILS_MACRO (
+ REPLY_AND_DETAILS_MACRO_END (
VL_API_LCP_ITF_PAIR_GET_REPLY, lcp_itf_pair_pool,
({ send_lcp_itf_pair_details (cursor, rp, mp->context); }));
}
static void
+vl_api_lcp_itf_pair_get_v2_t_handler (vl_api_lcp_itf_pair_get_v2_t *mp)
+{
+ vl_api_lcp_itf_pair_get_v2_reply_t *rmp;
+ i32 rv = 0;
+
+ if (mp->sw_if_index == ~0)
+ {
+ REPLY_AND_DETAILS_MACRO_END (
+ VL_API_LCP_ITF_PAIR_GET_REPLY, lcp_itf_pair_pool,
+ ({ send_lcp_itf_pair_details (cursor, rp, mp->context); }));
+ }
+ else
+ {
+ VALIDATE_SW_IF_INDEX_END (mp);
+
+ u32 pair_index = lcp_itf_pair_find_by_phy (mp->sw_if_index);
+ if (pair_index == INDEX_INVALID)
+ {
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto bad_sw_if_index;
+ }
+ send_lcp_itf_pair_details (
+ pair_index, vl_api_client_index_to_registration (mp->client_index),
+ mp->context);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO2_END (VL_API_LCP_ITF_PAIR_GET_V2_REPLY,
+ ({ rmp->cursor = ~0; }));
+ }
+}
+
+static void
vl_api_lcp_default_ns_set_t_handler (vl_api_lcp_default_ns_set_t *mp)
{
vl_api_lcp_default_ns_set_reply_t *rmp;
int rv;
- mp->namespace[LCP_NS_LEN - 1] = 0;
- rv = lcp_set_default_ns (mp->namespace);
+ mp->netns[LCP_NS_LEN - 1] = 0;
+ rv = lcp_set_default_ns (mp->netns);
REPLY_MACRO (VL_API_LCP_DEFAULT_NS_SET_REPLY);
}
@@ -199,25 +246,14 @@ vl_api_lcp_default_ns_set_t_handler (vl_api_lcp_default_ns_set_t *mp)
static void
vl_api_lcp_default_ns_get_t_handler (vl_api_lcp_default_ns_get_t *mp)
{
- lcp_main_t *lcpm = &lcp_main;
vl_api_lcp_default_ns_get_reply_t *rmp;
- vl_api_registration_t *reg;
- char *ns;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id = (VL_API_LCP_DEFAULT_NS_GET_REPLY + lcpm->msg_id_base);
- rmp->context = mp->context;
-
- ns = (char *) lcp_get_default_ns ();
- if (ns)
- clib_strncpy ((char *) rmp->namespace, ns, LCP_NS_LEN - 1);
- vl_api_send_msg (reg, (u8 *) rmp);
+ REPLY_MACRO_DETAILS2 (VL_API_LCP_DEFAULT_NS_GET_REPLY, ({
+ char *ns = (char *) lcp_get_default_ns ();
+ if (ns)
+ clib_strncpy ((char *) rmp->netns, ns,
+ LCP_NS_LEN - 1);
+ }));
}
static void
@@ -250,7 +286,7 @@ vl_api_lcp_itf_pair_replace_end_t_handler (
#include <linux-cp/lcp.api.c>
static clib_error_t *
-lcp_plugin_api_hookup (vlib_main_t *vm)
+lcp_api_init (vlib_main_t *vm)
{
/* Ask for a correctly-sized block of API message decode slots */
lcp_msg_id_base = setup_message_id_table ();
@@ -258,7 +294,7 @@ lcp_plugin_api_hookup (vlib_main_t *vm)
return (NULL);
}
-VLIB_INIT_FUNCTION (lcp_plugin_api_hookup);
+VLIB_INIT_FUNCTION (lcp_api_init);
#include <vpp/app/version.h>
VLIB_PLUGIN_REGISTER () = {
diff --git a/src/plugins/linux-cp/lcp_cli.c b/src/plugins/linux-cp/lcp_cli.c
index cb874b1c023..0dcf600b301 100644
--- a/src/plugins/linux-cp/lcp_cli.c
+++ b/src/plugins/linux-cp/lcp_cli.c
@@ -34,81 +34,178 @@ lcp_itf_pair_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
vnet_main_t *vnm = vnet_get_main ();
- u32 sw_if_index;
- u8 *host_if_name;
- lip_host_type_t host_if_type;
- u8 *ns;
- int r;
+ u32 sw_if_index = ~0;
+ u8 *host_if_name = NULL;
+ lip_host_type_t host_if_type = LCP_ITF_HOST_TAP;
+ u8 *ns = NULL;
+ clib_error_t *error = NULL;
+
+ if (unformat_user (input, unformat_line_input, line_input))
+ {
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "host-if %s", &host_if_name))
+ ;
+ else if (unformat (line_input, "netns %s", &ns))
+ ;
+ else if (unformat (line_input, "tun"))
+ host_if_type = LCP_ITF_HOST_TUN;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+ unformat_free (line_input);
+ }
+
+ if (error)
+ ;
+ else if (sw_if_index == ~0)
+ error = clib_error_return (0, "interface name or sw_if_index required");
+ else if (!host_if_name)
+ error = clib_error_return (0, "host interface name required");
+ else if (vec_len (ns) >= LCP_NS_LEN)
+ error = clib_error_return (
+ 0, "Namespace name should be fewer than %d characters", LCP_NS_LEN);
+ else
+ {
+ int r;
+
+ r = lcp_itf_pair_create (sw_if_index, host_if_name, host_if_type, ns,
+ NULL);
+ if (r)
+ error = clib_error_return (0, "linux-cp pair creation failed (%d)", r);
+ }
+
+ vec_free (host_if_name);
+ vec_free (ns);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (lcp_itf_pair_create_command, static) = {
+ .path = "lcp create",
+ .short_help = "lcp create <sw_if_index>|<if-name> host-if <host-if-name> "
+ "netns <namespace> [tun]",
+ .function = lcp_itf_pair_create_command_fn,
+};
+
+static clib_error_t *
+lcp_sync_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
- sw_if_index = ~0;
- host_if_name = ns = NULL;
- host_if_type = LCP_ITF_HOST_TAP;
-
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "%d", &sw_if_index))
- ;
- else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
- &sw_if_index))
- ;
- else if (unformat (line_input, "host-if %s", &host_if_name))
- ;
- else if (unformat (line_input, "netns %s", &ns))
- ;
- else if (unformat (line_input, "tun"))
- host_if_type = LCP_ITF_HOST_TUN;
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_sync (1);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_sync (0);
else
- {
- unformat_free (line_input);
- vec_free (host_if_name);
- vec_free (ns);
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
}
unformat_free (line_input);
+ return 0;
+}
- if (!host_if_name)
- {
- vec_free (ns);
- return clib_error_return (0, "host interface name required");
- }
+VLIB_CLI_COMMAND (lcp_sync_command, static) = {
+ .path = "lcp lcp-sync",
+ .short_help = "lcp lcp-sync [on|enable|off|disable]",
+ .function = lcp_sync_command_fn,
+};
- if (sw_if_index == ~0)
- {
- vec_free (host_if_name);
- vec_free (ns);
- return clib_error_return (0, "interface name or sw_if_index required");
- }
+static clib_error_t *
+lcp_auto_subint_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
- if (vec_len (ns) >= LCP_NS_LEN)
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- vec_free (host_if_name);
- vec_free (ns);
- return clib_error_return (
- 0, "Namespace name should be fewer than %d characters", LCP_NS_LEN);
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_auto_subint (1);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_auto_subint (0);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
}
- r = lcp_itf_pair_create (sw_if_index, host_if_name, host_if_type, ns, NULL);
+ unformat_free (line_input);
+ return 0;
+}
- vec_free (host_if_name);
- vec_free (ns);
+VLIB_CLI_COMMAND (lcp_auto_subint_command, static) = {
+ .path = "lcp lcp-auto-subint",
+ .short_help = "lcp lcp-auto-subint [on|enable|off|disable]",
+ .function = lcp_auto_subint_command_fn,
+};
- if (r)
- return clib_error_return (0, "linux-cp pair creation failed (%d)", r);
+static clib_error_t *
+lcp_param_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del-static-on-link-down"))
+ {
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_del_static_on_link_down (1 /* is_del */);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_del_static_on_link_down (0 /* is_del */);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ else if (unformat (line_input, "del-dynamic-on-link-down"))
+ {
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_del_dynamic_on_link_down (1 /* is_del */);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_del_dynamic_on_link_down (0 /* is_del */);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
return 0;
}
-VLIB_CLI_COMMAND (lcp_itf_pair_create_command, static) = {
- .path = "lcp create",
- .short_help = "lcp create <sw_if_index>|<if-name> host-if <host-if-name> "
- "netns <namespace> [tun]",
- .function = lcp_itf_pair_create_command_fn,
+VLIB_CLI_COMMAND (lcp_param_command, static) = {
+ .path = "lcp param",
+ .short_help = "lcp param [del-static-on-link-down (on|enable|off|disable)] "
+ "[del-dynamic-on-link-down (on|enable|off|disable)]",
+ .function = lcp_param_command_fn,
};
static clib_error_t *
@@ -118,6 +215,7 @@ lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input,
unformat_input_t _line_input, *line_input = &_line_input;
u8 *ns;
int r;
+ clib_error_t *error = NULL;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -130,10 +228,15 @@ lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input,
;
else if (unformat (line_input, "clear netns"))
;
+ else
+ {
+ vec_free (ns);
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
}
- unformat_free (line_input);
-
vlib_cli_output (vm, "lcp set default netns '%s'\n", (char *) ns);
r = lcp_set_default_ns (ns);
@@ -141,7 +244,10 @@ lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input,
if (r)
return clib_error_return (0, "linux-cp set default netns failed (%d)", r);
- return 0;
+done:
+ unformat_free (line_input);
+
+ return error;
}
VLIB_CLI_COMMAND (lcp_default_netns_command, static) = {
@@ -156,36 +262,42 @@ lcp_itf_pair_delete_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
vnet_main_t *vnm = vnet_get_main ();
unformat_input_t _line_input, *line_input = &_line_input;
- u32 sw_if_index;
- int r;
+ u32 sw_if_index = ~0;
+ clib_error_t *error = NULL;
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- sw_if_index = ~0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ if (unformat_user (input, unformat_line_input, line_input))
{
- if (unformat (line_input, "%d", &sw_if_index))
- ;
- else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
- &sw_if_index))
- ;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+ unformat_free (line_input);
}
- unformat_free (line_input);
-
- if (sw_if_index == ~0)
- return clib_error_return (0, "interface name or sw_if_index required");
+ if (error)
+ ;
+ else if (sw_if_index == ~0)
+ error = clib_error_return (0, "interface name or sw_if_index required");
+ else
+ {
+ int r;
- r = lcp_itf_pair_delete (sw_if_index);
+ r = lcp_itf_pair_delete (sw_if_index);
+ if (r)
+ error = clib_error_return (0, "linux-cp pair deletion failed (%d)", r);
+ }
- if (r)
- return clib_error_return (0, "linux-cp pair deletion failed (%d)", r);
- return 0;
+ return error;
}
VLIB_CLI_COMMAND (lcp_itf_pair_delete_command, static) = {
diff --git a/src/plugins/linux-cp/lcp_interface.c b/src/plugins/linux-cp/lcp_interface.c
index da409619746..e1f4a6a1d69 100644
--- a/src/plugins/linux-cp/lcp_interface.c
+++ b/src/plugins/linux-cp/lcp_interface.c
@@ -21,6 +21,7 @@
#include <linux-cp/lcp_interface.h>
#include <netlink/route/link/vlan.h>
+#include <linux/if_ether.h>
#include <vnet/plugin/plugin.h>
#include <vnet/plugin/plugin.h>
@@ -38,12 +39,12 @@
#include <vlibapi/api_helper_macros.h>
#include <vnet/ipsec/ipsec_punt.h>
-static vlib_log_class_t lcp_itf_pair_logger;
+vlib_log_class_t lcp_itf_pair_logger;
/**
* Pool of LIP objects
*/
-lcp_itf_pair_t *lcp_itf_pair_pool;
+lcp_itf_pair_t *lcp_itf_pair_pool = NULL;
u32
lcp_itf_num_pairs (void)
@@ -72,12 +73,6 @@ lcp_itf_pair_register_vft (lcp_itf_pair_vft_t *lcp_itf_vft)
vec_add1 (lcp_itf_vfts, *lcp_itf_vft);
}
-#define LCP_ITF_PAIR_DBG(...) \
- vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
-
-#define LCP_ITF_PAIR_INFO(...) \
- vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
-
u8 *
format_lcp_itf_pair (u8 *s, va_list *args)
{
@@ -136,6 +131,13 @@ lcp_itf_pair_show (u32 phy_sw_if_index)
ns = lcp_get_default_ns ();
vlib_cli_output (vm, "lcp default netns '%s'\n",
ns ? (char *) ns : "<unset>");
+ vlib_cli_output (vm, "lcp lcp-auto-subint %s\n",
+ lcp_auto_subint () ? "on" : "off");
+ vlib_cli_output (vm, "lcp lcp-sync %s\n", lcp_sync () ? "on" : "off");
+ vlib_cli_output (vm, "lcp del-static-on-link-down %s\n",
+ lcp_get_del_static_on_link_down () ? "on" : "off");
+ vlib_cli_output (vm, "lcp del-dynamic-on-link-down %s\n",
+ lcp_get_del_dynamic_on_link_down () ? "on" : "off");
if (phy_sw_if_index == ~0)
{
@@ -152,6 +154,11 @@ lcp_itf_pair_show (u32 phy_sw_if_index)
lcp_itf_pair_t *
lcp_itf_pair_get (u32 index)
{
+ if (!lcp_itf_pair_pool)
+ return NULL;
+ if (index == INDEX_INVALID)
+ return NULL;
+
return pool_elt_at_index (lcp_itf_pair_pool, index);
}
@@ -168,18 +175,6 @@ lcp_itf_pair_find_by_vif (u32 vif_index)
return INDEX_INVALID;
}
-int
-lcp_itf_pair_add_sub (u32 vif, u8 *host_if_name, u32 sub_sw_if_index,
- u32 phy_sw_if_index, u8 *ns)
-{
- lcp_itf_pair_t *lip;
-
- lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index));
-
- return lcp_itf_pair_add (lip->lip_host_sw_if_index, sub_sw_if_index,
- host_if_name, vif, lip->lip_host_type, ns);
-}
-
const char *lcp_itf_l3_feat_names[N_LCP_ITF_HOST][N_AF] = {
[LCP_ITF_HOST_TAP] = {
[AF_IP4] = "linux-cp-xc-ip4",
@@ -235,17 +230,23 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
index_t lipi;
lcp_itf_pair_t *lip;
+ if (host_sw_if_index == ~0)
+ {
+ LCP_ITF_PAIR_ERR ("pair_add: Cannot add LIP - invalid host");
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+
lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index);
- LCP_ITF_PAIR_INFO ("add: host:%U phy:%U, host_if:%v vif:%d ns:%v",
+ if (lipi != INDEX_INVALID)
+ return VNET_API_ERROR_VALUE_EXIST;
+
+ LCP_ITF_PAIR_INFO ("add: host:%U phy:%U, host_if:%v vif:%d ns:%s",
format_vnet_sw_if_index_name, vnet_get_main (),
host_sw_if_index, format_vnet_sw_if_index_name,
vnet_get_main (), phy_sw_if_index, host_name, host_index,
ns);
- if (lipi != INDEX_INVALID)
- return VNET_API_ERROR_VALUE_EXIST;
-
/*
* Create a new pair.
*/
@@ -266,9 +267,6 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
lip->lip_vif_index = host_index;
lip->lip_namespace = vec_dup (ns);
- if (lip->lip_host_sw_if_index == ~0)
- return 0;
-
/*
* First use of this host interface.
* Enable the x-connect feature on the host to send
@@ -314,10 +312,13 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
}
else
{
- vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 1, NULL,
- 0);
- vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 1, NULL,
- 0);
+ if (hash_elts (lip_db_by_vif) == 1)
+ {
+ vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 1,
+ NULL, 0);
+ vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 1,
+ NULL, 0);
+ }
}
/* invoke registered callbacks for pair addition */
@@ -336,7 +337,7 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
}
static clib_error_t *
-lcp_netlink_add_link_vlan (int parent, u32 vlan, const char *name)
+lcp_netlink_add_link_vlan (int parent, u32 vlan, u16 proto, const char *name)
{
struct rtnl_link *link;
struct nl_sock *sk;
@@ -344,17 +345,25 @@ lcp_netlink_add_link_vlan (int parent, u32 vlan, const char *name)
sk = nl_socket_alloc ();
if ((err = nl_connect (sk, NETLINK_ROUTE)) < 0)
- return clib_error_return (NULL, "Unable to connect socket: %d", err);
+ {
+ LCP_ITF_PAIR_ERR ("netlink_add_link_vlan: connect error: %s",
+ nl_geterror (err));
+ return clib_error_return (NULL, "Unable to connect socket: %d", err);
+ }
link = rtnl_link_vlan_alloc ();
rtnl_link_set_link (link, parent);
rtnl_link_set_name (link, name);
-
rtnl_link_vlan_set_id (link, vlan);
+ rtnl_link_vlan_set_protocol (link, htons (proto));
if ((err = rtnl_link_add (sk, link, NLM_F_CREATE)) < 0)
- return clib_error_return (NULL, "Unable to add link %s: %d", name, err);
+ {
+ LCP_ITF_PAIR_ERR ("netlink_add_link_vlan: link add error: %s",
+ nl_geterror (err));
+ return clib_error_return (NULL, "Unable to add link %s: %d", name, err);
+ }
rtnl_link_put (link);
nl_close (sk);
@@ -400,10 +409,11 @@ lcp_itf_pair_del (u32 phy_sw_if_index)
lip = lcp_itf_pair_get (lipi);
- LCP_ITF_PAIR_INFO ("pair delete: {%U, %U, %s}", format_vnet_sw_if_index_name,
- vnet_get_main (), lip->lip_phy_sw_if_index,
- format_vnet_sw_if_index_name, vnet_get_main (),
- lip->lip_host_sw_if_index, lip->lip_host_name);
+ LCP_ITF_PAIR_NOTICE (
+ "pair_del: host:%U phy:%U host_if:%v vif:%d ns:%v",
+ format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_host_sw_if_index,
+ format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_phy_sw_if_index,
+ lip->lip_host_name, lip->lip_vif_index, lip->lip_namespace);
/* invoke registered callbacks for pair deletion */
vec_foreach (vft, lcp_itf_vfts)
@@ -432,12 +442,14 @@ lcp_itf_pair_del (u32 phy_sw_if_index)
}
else
{
- vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 0, NULL,
- 0);
- vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 0, NULL,
- 0);
+ if (hash_elts (lip_db_by_vif) == 1)
+ {
+ vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 0,
+ NULL, 0);
+ vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 0,
+ NULL, 0);
+ }
}
-
lip_db_by_phy[phy_sw_if_index] = INDEX_INVALID;
lip_db_by_host[lip->lip_host_sw_if_index] = INDEX_INVALID;
hash_unset (lip_db_by_vif, lip->lip_vif_index);
@@ -454,24 +466,45 @@ lcp_itf_pair_delete_by_index (index_t lipi)
{
u32 host_sw_if_index;
lcp_itf_pair_t *lip;
- u8 *host_name;
+ u8 *host_name, *ns;
lip = lcp_itf_pair_get (lipi);
host_name = vec_dup (lip->lip_host_name);
host_sw_if_index = lip->lip_host_sw_if_index;
+ ns = vec_dup (lip->lip_namespace);
lcp_itf_pair_del (lip->lip_phy_sw_if_index);
if (vnet_sw_interface_is_sub (vnet_get_main (), host_sw_if_index))
{
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+ if (ns)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open ((u8 *) ns);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
lcp_netlink_del_link ((const char *) host_name);
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+
vnet_delete_sub_interface (host_sw_if_index);
}
else
tap_delete_if (vlib_get_main (), host_sw_if_index);
vec_free (host_name);
+ vec_free (ns);
}
int
@@ -489,6 +522,23 @@ lcp_itf_pair_delete (u32 phy_sw_if_index)
return 0;
}
+/**
+ * lcp_itf_interface_add_del
+ *
+ * Registered to receive interface Add and delete notifications
+ */
+static clib_error_t *
+lcp_itf_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
+{
+ if (!is_add)
+ /* remove any interface pair we have for this interface */
+ lcp_itf_pair_delete (sw_if_index);
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_interface_add_del);
+
void
lcp_itf_pair_walk (lcp_itf_pair_walk_cb_t cb, void *ctx)
{
@@ -501,58 +551,17 @@ lcp_itf_pair_walk (lcp_itf_pair_walk_cb_t cb, void *ctx)
};
}
-typedef struct lcp_itf_pair_names_t_
-{
- u8 *lipn_host_name;
- u8 *lipn_phy_name;
- u8 *lipn_namespace;
- u32 lipn_phy_sw_if_index;
-} lcp_itf_pair_names_t;
-
-static lcp_itf_pair_names_t *lipn_names;
-
static clib_error_t *
lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
{
- u8 *host, *phy;
- u8 *ns;
u8 *default_ns;
+ u32 tmp;
- host = phy = ns = default_ns = NULL;
+ default_ns = NULL;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- vec_reset_length (host);
-
- if (unformat (input, "pair %s %s %s", &phy, &host, &ns))
- {
- lcp_itf_pair_names_t *lipn;
-
- if (vec_len (ns) > LCP_NS_LEN)
- {
- return clib_error_return (0,
- "linux-cp IF namespace must"
- " be less than %d characters",
- LCP_NS_LEN);
- }
-
- vec_add2 (lipn_names, lipn, 1);
-
- lipn->lipn_host_name = vec_dup (host);
- lipn->lipn_phy_name = vec_dup (phy);
- lipn->lipn_namespace = vec_dup (ns);
- }
- else if (unformat (input, "pair %v %v", &phy, &host))
- {
- lcp_itf_pair_names_t *lipn;
-
- vec_add2 (lipn_names, lipn, 1);
-
- lipn->lipn_host_name = vec_dup (host);
- lipn->lipn_phy_name = vec_dup (phy);
- lipn->lipn_namespace = 0;
- }
- else if (unformat (input, "default netns %v", &default_ns))
+ if (unformat (input, "default netns %v", &default_ns))
{
vec_add1 (default_ns, 0);
if (lcp_set_default_ns (default_ns) < 0)
@@ -563,14 +572,22 @@ lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
LCP_NS_LEN);
}
}
- else if (unformat (input, "interface-auto-create"))
- lcp_set_auto_intf (1 /* is_auto */);
+ else if (unformat (input, "lcp-auto-subint"))
+ lcp_set_auto_subint (1 /* is_auto */);
+ else if (unformat (input, "lcp-sync"))
+ lcp_set_sync (1 /* is_auto */);
+ else if (unformat (input, "del-static-on-link-down"))
+ lcp_set_del_static_on_link_down (1 /* is_del */);
+ else if (unformat (input, "del-dynamic-on-link-down"))
+ lcp_set_del_dynamic_on_link_down (1 /* is_del */);
+ else if (unformat (input, "num-rx-queues %d", &tmp))
+ lcp_set_default_num_queues (tmp, 0 /* is_tx */);
+ else if (unformat (input, "num-tx-queues %d", &tmp))
+ lcp_set_default_num_queues (tmp, 1 /* is_tx */);
else
return clib_error_return (0, "interfaces not found");
}
- vec_free (host);
- vec_free (phy);
vec_free (default_ns);
return NULL;
@@ -615,22 +632,81 @@ lcp_validate_if_name (u8 *name)
return 1;
}
-static void
-lcp_itf_set_vif_link_state (u32 vif_index, u8 up, u8 *ns)
+void
+lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state)
{
int curr_ns_fd, vif_ns_fd;
+ if (!lip)
+ return;
+
curr_ns_fd = vif_ns_fd = -1;
- if (ns)
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ /* Set the same link state on the netlink interface
+ */
+ vnet_netlink_set_link_state (lip->lip_vif_index, state);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+
+ return;
+}
+
+void
+lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ ip_lookup_main_t *lm4 = &im4->lookup_main;
+ ip_lookup_main_t *lm6 = &im6->lookup_main;
+ ip_interface_address_t *ia = 0;
+ int vif_ns_fd = -1;
+ int curr_ns_fd = -1;
+
+ if (!lip)
+ return;
+
+ if (lip->lip_namespace)
{
curr_ns_fd = clib_netns_open (NULL /* self */);
- vif_ns_fd = clib_netns_open (ns);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
if (vif_ns_fd != -1)
clib_setns (vif_ns_fd);
}
- vnet_netlink_set_link_state (vif_index, up);
+ /* Sync any IP4 addressing info into LCP */
+ foreach_ip_interface_address (
+ lm4, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({
+ ip4_address_t *r4 = ip_interface_address_get_address (lm4, ia);
+ LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip4 %U/%d",
+ format_lcp_itf_pair, lip, format_ip4_address, r4,
+ ia->address_length);
+ vnet_netlink_add_ip4_addr (lip->lip_vif_index, r4, ia->address_length);
+ }));
+
+ /* Sync any IP6 addressing info into LCP */
+ foreach_ip_interface_address (
+ lm6, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({
+ ip6_address_t *r6 = ip_interface_address_get_address (lm6, ia);
+ LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip6 %U/%d",
+ format_lcp_itf_pair, lip, format_ip6_address, r6,
+ ia->address_length);
+ vnet_netlink_add_ip6_addr (lip->lip_vif_index, r6, ia->address_length);
+ }));
if (vif_ns_fd != -1)
close (vif_ns_fd);
@@ -642,6 +718,64 @@ lcp_itf_set_vif_link_state (u32 vif_index, u8 up, u8 *ns)
}
}
+typedef struct
+{
+ u32 vlan;
+ bool dot1ad;
+
+ u32 matched_sw_if_index;
+} lcp_itf_match_t;
+
+static walk_rc_t
+lcp_itf_pair_find_walk (vnet_main_t *vnm, u32 sw_if_index, void *arg)
+{
+ lcp_itf_match_t *match = arg;
+ const vnet_sw_interface_t *sw;
+
+ sw = vnet_get_sw_interface (vnm, sw_if_index);
+ if (sw && (sw->sub.eth.inner_vlan_id == 0) &&
+ (sw->sub.eth.outer_vlan_id == match->vlan) &&
+ (sw->sub.eth.flags.dot1ad == match->dot1ad))
+ {
+ LCP_ITF_PAIR_DBG ("find_walk: found match outer %d dot1ad %d "
+ "inner-dot1q %d: interface %U",
+ sw->sub.eth.outer_vlan_id, sw->sub.eth.flags.dot1ad,
+ sw->sub.eth.inner_vlan_id,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw->sw_if_index);
+ match->matched_sw_if_index = sw->sw_if_index;
+ return WALK_STOP;
+ }
+
+ return WALK_CONTINUE;
+}
+
+/* Return the index of the sub-int on the phy that has the given vlan and
+ * proto,
+ */
+static index_t
+lcp_itf_pair_find_by_outer_vlan (u32 sup_if_index, u16 vlan, bool dot1ad)
+{
+ lcp_itf_match_t match;
+ const vnet_hw_interface_t *hw;
+
+ match.vlan = vlan;
+ match.dot1ad = dot1ad;
+ match.matched_sw_if_index = INDEX_INVALID;
+ hw = vnet_get_sup_hw_interface (vnet_get_main (), sup_if_index);
+
+ vnet_hw_interface_walk_sw (vnet_get_main (), hw->hw_if_index,
+ lcp_itf_pair_find_walk, &match);
+
+ if (match.matched_sw_if_index >= vec_len (lip_db_by_phy))
+ return INDEX_INVALID;
+
+ return lip_db_by_phy[match.matched_sw_if_index];
+}
+
+static clib_error_t *lcp_itf_pair_link_up_down (vnet_main_t *vnm,
+ u32 hw_if_index, u32 flags);
+
int
lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
lip_host_type_t host_if_type, u8 *ns,
@@ -649,24 +783,53 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
{
vlib_main_t *vm;
vnet_main_t *vnm;
- u32 vif_index = 0, host_sw_if_index;
+ u32 vif_index = 0, host_sw_if_index = ~0;
const vnet_sw_interface_t *sw;
const vnet_hw_interface_t *hw;
+ const lcp_itf_pair_t *lip;
+ index_t lipi;
+
+ lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index);
+
+ if (lipi != INDEX_INVALID)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: already created");
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
if (!vnet_sw_if_index_is_api_valid (phy_sw_if_index))
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: invalid phy index %u", phy_sw_if_index);
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
if (!lcp_validate_if_name (host_if_name))
- return VNET_API_ERROR_INVALID_ARGUMENT;
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: invalid host-if-name '%s'",
+ host_if_name);
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
vnm = vnet_get_main ();
sw = vnet_get_sw_interface (vnm, phy_sw_if_index);
hw = vnet_get_sup_hw_interface (vnm, phy_sw_if_index);
+ if (!sw || !hw)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: invalid interface");
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+
+ if (hw->hw_class_index != ethernet_hw_interface_class.index &&
+ host_if_type == LCP_ITF_HOST_TAP)
+ {
+ LCP_ITF_PAIR_ERR (
+ "pair_create: don't create TAP for non-eth interface; use tun");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
/*
* Use interface-specific netns if supplied.
- * Otherwise, use default netns if defined.
- * Otherwise ignore a netns and use the OS default.
+ * Otherwise, use netns if defined, otherwise use the OS default.
*/
if (ns == 0 || ns[0] == 0)
ns = lcp_get_default_ns ();
@@ -674,16 +837,50 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
/* sub interfaces do not need a tap created */
if (vnet_sw_interface_is_sub (vnm, phy_sw_if_index))
{
- const lcp_itf_pair_t *lip;
+ index_t parent_if_index;
int orig_ns_fd, ns_fd;
clib_error_t *err;
- u16 vlan;
+ u16 outer_vlan, inner_vlan;
+ u16 outer_proto, inner_proto;
+ u16 vlan, proto;
+ u32 parent_vif_index;
- /*
- * Find the parent tap by finding the pair from the parent phy
- */
- lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw->sup_sw_if_index));
- vlan = sw->sub.eth.outer_vlan_id;
+ err = vnet_sw_interface_supports_addressing (vnm, phy_sw_if_index);
+ if (err)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: can't create LCP for a "
+ "sub-interface without exact-match set");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ outer_vlan = sw->sub.eth.outer_vlan_id;
+ inner_vlan = sw->sub.eth.inner_vlan_id;
+ outer_proto = inner_proto = ETH_P_8021Q;
+ if (1 == sw->sub.eth.flags.dot1ad)
+ outer_proto = ETH_P_8021AD;
+
+ LCP_ITF_PAIR_INFO ("pair_create: subif: dot1%s outer %d inner %d on %U",
+ sw->sub.eth.flags.dot1ad ? "ad" : "q", outer_vlan,
+ inner_vlan, format_vnet_sw_if_index_name, vnm,
+ hw->sw_if_index);
+
+ parent_if_index = lcp_itf_pair_find_by_phy (sw->sup_sw_if_index);
+ if (INDEX_INVALID == parent_if_index)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: can't find LCP for %U",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw->sup_sw_if_index);
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+ lip = lcp_itf_pair_get (parent_if_index);
+ if (!lip)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: can't create LCP for a "
+ "sub-interface without an LCP on the parent");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+ LCP_ITF_PAIR_DBG ("pair_create: parent %U", format_lcp_itf_pair, lip);
+ parent_vif_index = lip->lip_vif_index;
/*
* see if the requested host interface has already been created
@@ -708,11 +905,56 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
/*
* no existing host interface, create it now
*/
- err = lcp_netlink_add_link_vlan (lip->lip_vif_index, vlan,
- (const char *) host_if_name);
- if (!err && -1 != ns_fd)
- err = vnet_netlink_set_link_netns (vif_index, ns_fd, NULL);
+ /*
+ * Find the parent tap:
+ * - if this is an outer VLAN, use the pair from the parent phy
+ * - if this is an inner VLAN, find the pair from the outer sub-int,
+ * which must exist.
+ */
+ if (inner_vlan)
+ {
+ index_t linux_parent_if_index;
+ const lcp_itf_pair_t *llip;
+
+ vlan = inner_vlan;
+ proto = inner_proto;
+ linux_parent_if_index = lcp_itf_pair_find_by_outer_vlan (
+ hw->sw_if_index, sw->sub.eth.outer_vlan_id,
+ sw->sub.eth.flags.dot1ad);
+ if (INDEX_INVALID == linux_parent_if_index ||
+ !(llip = lcp_itf_pair_get (linux_parent_if_index)))
+ {
+ LCP_ITF_PAIR_ERR (
+ "pair_create: can't find LCP for outer vlan %d "
+ "proto %s on %U",
+ outer_vlan,
+ outer_proto == ETH_P_8021AD ? "dot1ad" : "dot1q",
+ format_vnet_sw_if_index_name, vnm, hw->sw_if_index);
+ err = clib_error_return (0, "parent pair not found");
+ goto socket_close;
+ }
+
+ LCP_ITF_PAIR_DBG ("pair_create: linux parent %U",
+ format_lcp_itf_pair, llip);
+ parent_vif_index = llip->lip_vif_index;
+ }
+ else
+ {
+ vlan = outer_vlan;
+ proto = outer_proto;
+ }
+
+ err = lcp_netlink_add_link_vlan (parent_vif_index, vlan, proto,
+ (const char *) host_if_name);
+ if (err != 0)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: cannot create link "
+ "outer(proto:0x%04x,vlan:%u).inner(proto:0x%"
+ "04x,vlan:%u) name:'%s'",
+ outer_proto, outer_vlan, inner_proto,
+ inner_vlan, host_if_name);
+ }
if (!err)
vif_index = if_nametoindex ((char *) host_if_name);
@@ -721,13 +963,20 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
/*
* create a sub-interface on the tap
*/
- if (!err && vnet_create_sub_interface (lip->lip_host_sw_if_index,
- sw->sub.id, sw->sub.eth.raw_flags,
- sw->sub.eth.inner_vlan_id, vlan,
- &host_sw_if_index))
- LCP_ITF_PAIR_INFO ("failed create vlan: %d on %U", vlan,
- format_vnet_sw_if_index_name, vnet_get_main (),
- lip->lip_host_sw_if_index);
+ if (!err &&
+ vnet_create_sub_interface (lip->lip_host_sw_if_index, sw->sub.id,
+ sw->sub.eth.raw_flags, inner_vlan,
+ outer_vlan, &host_sw_if_index))
+ {
+ LCP_ITF_PAIR_ERR (
+ "pair_create: failed to create tap subint: %d.%d on %U",
+ outer_vlan, inner_vlan, format_vnet_sw_if_index_name, vnm,
+ lip->lip_host_sw_if_index);
+ err = clib_error_return (
+ 0, "failed to create tap subint: %d.%d. on %U", outer_vlan,
+ inner_vlan, format_vnet_sw_if_index_name, vnm,
+ lip->lip_host_sw_if_index);
+ }
socket_close:
if (orig_ns_fd != -1)
@@ -744,15 +993,21 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
else
{
tap_create_if_args_t args = {
- .num_rx_queues = clib_max (1, vlib_num_workers ()),
+ .num_rx_queues =
+ clib_max (1, lcp_get_default_num_queues (0 /* is_tx */)),
+ .num_tx_queues =
+ clib_max (1, lcp_get_default_num_queues (1 /* is_tx */)),
.id = hw->hw_if_index,
.sw_if_index = ~0,
.rx_ring_sz = 256,
.tx_ring_sz = 256,
.host_if_name = host_if_name,
.host_namespace = 0,
+ .rv = 0,
+ .error = NULL,
};
ethernet_interface_t *ei;
+ u32 host_sw_mtu_size;
if (host_if_type == LCP_ITF_HOST_TUN)
args.tap_flags |= TAP_FLAG_TUN;
@@ -762,38 +1017,45 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
mac_address_copy (&args.host_mac_addr, &ei->address.mac);
}
- if (sw->mtu[VNET_MTU_L3])
+ /*
+ * The TAP interface does copy forward the host MTU based on the VPP
+ * interface's L3 MTU, but it should also ensure that the VPP tap
+ * interface has an MTU that is greater-or-equal to those. Considering
+ * users can set the interfaces at runtime (set interface mtu packet ...)
+ * ensure that the tap MTU is large enough, taking the VPP interface L3
+ * if it's set, and otherwise a sensible default.
+ */
+ host_sw_mtu_size = sw->mtu[VNET_MTU_L3];
+ if (host_sw_mtu_size)
{
args.host_mtu_set = 1;
- args.host_mtu_size = sw->mtu[VNET_MTU_L3];
+ args.host_mtu_size = host_sw_mtu_size;
}
+ else
+ host_sw_mtu_size = ETHERNET_MAX_PACKET_BYTES;
if (ns && ns[0] != 0)
args.host_namespace = ns;
vm = vlib_get_main ();
tap_create_if (vm, &args);
-
if (args.rv < 0)
{
+ LCP_ITF_PAIR_ERR ("pair_create: could not create tap, retval:%d",
+ args.rv);
+ clib_error_free (args.error);
return args.rv;
}
+ vnet_sw_interface_set_mtu (vnm, args.sw_if_index, host_sw_mtu_size);
+
/*
* get the hw and ethernet of the tap
*/
hw = vnet_get_sup_hw_interface (vnm, args.sw_if_index);
-
- /*
- * Set the interface down on the host side.
- * This controls whether the host can RX/TX.
- */
virtio_main_t *mm = &virtio_main;
virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
- lcp_itf_set_vif_link_state (vif->ifindex, 0 /* down */,
- args.host_namespace);
-
/*
* Leave the TAP permanently up on the VPP side.
* This TAP will be shared by many sub-interface.
@@ -819,14 +1081,35 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
return -1;
}
- vnet_sw_interface_admin_up (vnm, host_sw_if_index);
- lcp_itf_pair_add (host_sw_if_index, phy_sw_if_index, host_if_name, vif_index,
- host_if_type, ns);
-
LCP_ITF_PAIR_INFO ("pair create: {%U, %U, %s}", format_vnet_sw_if_index_name,
vnet_get_main (), phy_sw_if_index,
format_vnet_sw_if_index_name, vnet_get_main (),
host_sw_if_index, host_if_name);
+ lcp_itf_pair_add (host_sw_if_index, phy_sw_if_index, host_if_name, vif_index,
+ host_if_type, ns);
+
+ /*
+ * Copy the link state from VPP into the host side.
+ * The TAP is shared by many interfaces, always keep it up.
+ * This controls whether the host can RX/TX.
+ */
+ sw = vnet_get_sw_interface (vnm, phy_sw_if_index);
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_vif (vif_index));
+ LCP_ITF_PAIR_INFO ("pair create: %U sw-flags %u hw-flags %u",
+ format_lcp_itf_pair, lip, sw->flags, hw->flags);
+ vnet_sw_interface_admin_up (vnm, host_sw_if_index);
+ lcp_itf_set_link_state (lip, sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ /*
+ * Reflect current link state and link speed of the hardware interface on the
+ * TAP interface.
+ */
+ if (host_if_type == LCP_ITF_HOST_TAP &&
+ !vnet_sw_interface_is_sub (vnm, phy_sw_if_index))
+ {
+ hw = vnet_get_sup_hw_interface (vnm, phy_sw_if_index);
+ lcp_itf_pair_link_up_down (vnm, hw->hw_if_index, hw->flags);
+ }
if (host_sw_if_indexp)
*host_sw_if_indexp = host_sw_if_index;
@@ -890,70 +1173,6 @@ lcp_itf_pair_replace_end (void)
return (0);
}
-static uword
-lcp_itf_pair_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
- vlib_frame_t *f)
-{
- uword *event_data = 0;
- uword *lipn_index;
-
- while (1)
- {
- vlib_process_wait_for_event (vm);
-
- vlib_process_get_events (vm, &event_data);
-
- vec_foreach (lipn_index, event_data)
- {
- lcp_itf_pair_names_t *lipn;
-
- lipn = &lipn_names[*lipn_index];
- lcp_itf_pair_create (lipn->lipn_phy_sw_if_index,
- lipn->lipn_host_name, LCP_ITF_HOST_TAP,
- lipn->lipn_namespace, NULL);
- }
-
- vec_reset_length (event_data);
- }
-
- return 0;
-}
-
-VLIB_REGISTER_NODE (lcp_itf_pair_process_node, static) = {
- .function = lcp_itf_pair_process,
- .name = "linux-cp-itf-process",
- .type = VLIB_NODE_TYPE_PROCESS,
-};
-
-static clib_error_t *
-lcp_itf_phy_add (vnet_main_t *vnm, u32 sw_if_index, u32 is_create)
-{
- lcp_itf_pair_names_t *lipn;
- vlib_main_t *vm = vlib_get_main ();
- vnet_hw_interface_t *hw;
-
- if (!is_create || vnet_sw_interface_is_sub (vnm, sw_if_index))
- return NULL;
-
- hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
-
- vec_foreach (lipn, lipn_names)
- {
- if (!vec_cmp (hw->name, lipn->lipn_phy_name))
- {
- lipn->lipn_phy_sw_if_index = sw_if_index;
-
- vlib_process_signal_event (vm, lcp_itf_pair_process_node.index, 0,
- lipn - lipn_names);
- break;
- }
- }
-
- return NULL;
-}
-
-VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_phy_add);
-
static clib_error_t *
lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
{
@@ -980,7 +1199,8 @@ lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
tap_set_carrier (si->hw_if_index,
(flags & VNET_HW_INTERFACE_FLAG_LINK_UP));
- if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP)
+ if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP &&
+ hi->link_speed != UINT32_MAX)
{
tap_set_speed (si->hw_if_index, hi->link_speed / 1000);
}
@@ -992,13 +1212,15 @@ lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_itf_pair_link_up_down);
static clib_error_t *
-lcp_itf_pair_init (vlib_main_t *vm)
+lcp_interface_init (vlib_main_t *vm)
{
vlib_punt_hdl_t punt_hdl = vlib_punt_client_register ("linux-cp");
/* punt IKE */
vlib_punt_register (punt_hdl, ipsec_punt_reason[IPSEC_PUNT_IP4_SPI_UDP_0],
"linux-cp-punt");
+ vlib_punt_register (punt_hdl, ipsec_punt_reason[IPSEC_PUNT_IP6_SPI_UDP_0],
+ "linux-cp-punt");
/* punt all unknown ports */
udp_punt_unknown (vm, 0, 1);
@@ -1011,7 +1233,7 @@ lcp_itf_pair_init (vlib_main_t *vm)
return NULL;
}
-VLIB_INIT_FUNCTION (lcp_itf_pair_init) = {
+VLIB_INIT_FUNCTION (lcp_interface_init) = {
.runs_after = VLIB_INITS ("vnet_interface_init", "tcp_init", "udp_init"),
};
diff --git a/src/plugins/linux-cp/lcp_interface.h b/src/plugins/linux-cp/lcp_interface.h
index bed30248845..cfcd3925a15 100644
--- a/src/plugins/linux-cp/lcp_interface.h
+++ b/src/plugins/linux-cp/lcp_interface.h
@@ -21,6 +21,22 @@
#include <plugins/linux-cp/lcp.h>
+extern vlib_log_class_t lcp_itf_pair_logger;
+
+#define LCP_ITF_PAIR_DBG(...) \
+ vlib_log_debug (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_INFO(...) \
+ vlib_log_info (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_NOTICE(...) \
+ vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_WARN(...) \
+ vlib_log_warn (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_ERR(...) vlib_log_err (lcp_itf_pair_logger, __VA_ARGS__);
+
#define foreach_lcp_itf_pair_flag _ (STALE, 0, "stale")
typedef enum lip_flag_t_
@@ -88,8 +104,6 @@ extern index_t lcp_itf_pair_find_by_vif (u32 vif_index);
extern int lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index,
u8 *host_name, u32 host_index,
lip_host_type_t host_type, u8 *ns);
-extern int lcp_itf_pair_add_sub (u32 vif, u8 *host_name, u32 sub_sw_if_index,
- u32 phy_sw_if_index, u8 *ns);
extern int lcp_itf_pair_del (u32 phy_sw_if_index);
/**
@@ -144,12 +158,6 @@ lcp_itf_pair_find_by_host (u32 host_sw_if_index)
return (lip_db_by_host[host_sw_if_index]);
}
-/**
- * manage interface auto creation
- */
-void lcp_set_auto_intf (u8 is_auto);
-int lcp_auto_intf (void);
-
typedef void (*lcp_itf_pair_add_cb_t) (lcp_itf_pair_t *);
typedef void (*lcp_itf_pair_del_cb_t) (lcp_itf_pair_t *);
@@ -160,6 +168,36 @@ typedef struct lcp_itf_pair_vft
} lcp_itf_pair_vft_t;
void lcp_itf_pair_register_vft (lcp_itf_pair_vft_t *lcp_itf_vft);
+
+/**
+ * sub-interface auto creation/deletion for LCP
+ */
+void lcp_set_auto_subint (u8 is_auto);
+int lcp_auto_subint (void);
+
+/**
+ * sync state changes from VPP into LCP
+ */
+void lcp_set_sync (u8 is_auto);
+int lcp_sync (void);
+
+/* Set TAP and Linux host link state */
+void lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state);
+
+/* Set any VPP L3 addresses on Linux host device */
+void lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip);
+
+/* Sync all state from VPP to a specific Linux device, all sub-interfaces
+ * of a hardware interface, or all interfaces in the system.
+ *
+ * Note: in some circumstances, this syncer will (have to) make changes to
+ * the VPP interface, for example if its MTU is greater than its parent.
+ * See the function for rationale.
+ */
+void lcp_itf_pair_sync_state (lcp_itf_pair_t *lip);
+void lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi);
+void lcp_itf_pair_sync_state_all ();
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/linux-cp/lcp_interface_sync.c b/src/plugins/linux-cp/lcp_interface_sync.c
new file mode 100644
index 00000000000..ca7638e1799
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_interface_sync.c
@@ -0,0 +1,445 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright 2021 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/devices/netlink.h>
+#include <vnet/ip/ip.h>
+#include <vppinfra/linux/netns.h>
+#include <plugins/linux-cp/lcp_interface.h>
+
+/* helper function to copy forward all sw interface link state flags
+ * MTU, and IP addresses into their counterpart LIP interface.
+ *
+ * This is called upon MTU changes and state changes.
+ */
+void
+lcp_itf_pair_sync_state (lcp_itf_pair_t *lip)
+{
+ vnet_sw_interface_t *sw;
+ vnet_sw_interface_t *sup_sw;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+ u32 mtu;
+ u32 netlink_mtu;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return;
+
+ sw =
+ vnet_get_sw_interface_or_null (vnet_get_main (), lip->lip_phy_sw_if_index);
+ if (!sw)
+ return;
+ sup_sw =
+ vnet_get_sw_interface_or_null (vnet_get_main (), sw->sup_sw_if_index);
+ if (!sup_sw)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ LCP_ITF_PAIR_INFO ("sync_state: %U flags %u sup-flags %u mtu %u sup-mtu %u",
+ format_lcp_itf_pair, lip, sw->flags, sup_sw->flags,
+ sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
+
+ /* Linux will not allow children to be admin-up if their parent is
+ * admin-down. If child is up but parent is not, force it down.
+ */
+ int state = sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+
+ if (state && !(sup_sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ LCP_ITF_PAIR_WARN (
+ "sync_state: %U flags %u sup-flags %u mtu %u sup-mtu %u: "
+ "forcing state to sup-flags to satisfy netlink",
+ format_lcp_itf_pair, lip, sw->flags, sup_sw->flags,
+ sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
+ state = 0;
+ }
+ lcp_itf_set_link_state (lip, state);
+
+ /* Linux will clamp MTU of children when the parent is lower. VPP is fine
+ * with differing MTUs. VPP assumes that if a subint has MTU of 0, that it
+ * inherits from its parent. Linux likes to be more explicit, so we
+ * reconcile any differences.
+ */
+ mtu = sw->mtu[VNET_MTU_L3];
+ if (mtu == 0)
+ mtu = sup_sw->mtu[VNET_MTU_L3];
+
+ if (sup_sw->mtu[VNET_MTU_L3] < sw->mtu[VNET_MTU_L3])
+ {
+ LCP_ITF_PAIR_WARN ("sync_state: %U flags %u mtu %u sup-mtu %u: "
+ "clamping to sup-mtu to satisfy netlink",
+ format_lcp_itf_pair, lip, sw->flags,
+ sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
+ mtu = sup_sw->mtu[VNET_MTU_L3];
+ }
+
+ /* Set MTU on all of {sw, tap, netlink}. Only send a netlink message if we
+ * really do want to change the MTU.
+ */
+ vnet_sw_interface_set_mtu (vnet_get_main (), lip->lip_phy_sw_if_index, mtu);
+ vnet_sw_interface_set_mtu (vnet_get_main (), lip->lip_host_sw_if_index, mtu);
+ if (NULL == vnet_netlink_get_link_mtu (lip->lip_vif_index, &netlink_mtu))
+ {
+ if (netlink_mtu != mtu)
+ vnet_netlink_set_link_mtu (lip->lip_vif_index, mtu);
+ }
+
+ /* Linux will remove IPv6 addresses on children when the parent state
+ * goes down, so we ensure all IPv4/IPv6 addresses are synced.
+ */
+ lcp_itf_set_interface_addr (lip);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+
+ return;
+}
+
+static walk_rc_t
+lcp_itf_pair_walk_sync_state_all_cb (index_t lipi, void *ctx)
+{
+ lcp_itf_pair_t *lip;
+ lip = lcp_itf_pair_get (lipi);
+ if (!lip)
+ return WALK_CONTINUE;
+
+ lcp_itf_pair_sync_state (lip);
+ return WALK_CONTINUE;
+}
+
+static walk_rc_t
+lcp_itf_pair_walk_sync_state_hw_cb (vnet_main_t *vnm, u32 sw_if_index,
+ void *arg)
+{
+ lcp_itf_pair_t *lip;
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ {
+ return WALK_CONTINUE;
+ }
+
+ lcp_itf_pair_sync_state (lip);
+ return WALK_CONTINUE;
+}
+
+void
+lcp_itf_pair_sync_state_all ()
+{
+ lcp_itf_pair_walk (lcp_itf_pair_walk_sync_state_all_cb, 0);
+}
+
+void
+lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi)
+{
+ if (!hi)
+ return;
+ LCP_ITF_PAIR_DBG ("sync_state_hw: hi %U", format_vnet_sw_if_index_name,
+ vnet_get_main (), hi->hw_if_index);
+
+ vnet_hw_interface_walk_sw (vnet_get_main (), hi->hw_if_index,
+ lcp_itf_pair_walk_sync_state_hw_cb, NULL);
+}
+
+static clib_error_t *
+lcp_itf_admin_state_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags)
+{
+ lcp_itf_pair_t *lip;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return 0;
+
+ LCP_ITF_PAIR_DBG ("admin_state_change: sw %U %u",
+ format_vnet_sw_if_index_name, vnm, sw_if_index, flags);
+
+ // Sync interface state changes into host
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ return NULL;
+ LCP_ITF_PAIR_INFO ("admin_state_change: %U flags %u", format_lcp_itf_pair,
+ lip, flags);
+
+ if (vnet_sw_interface_is_sub (vnm, sw_if_index))
+ {
+ lcp_itf_pair_sync_state (lip);
+ return NULL;
+ }
+
+ // When Linux changes link on a parent interface, all of its children also
+ // change. If a parent interface changes MTU, all of its children are clamped
+ // at that MTU by Linux. Neither holds true in VPP, so we are forced to undo
+ // change by walking the sub-interfaces of a phy and syncing their state back
+ // into Linux.
+ si = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (!si)
+ return NULL;
+
+ hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index);
+ if (!hi)
+ return NULL;
+ LCP_ITF_PAIR_DBG ("admin_state_change: si %U hi %U, syncing children",
+ format_vnet_sw_if_index_name, vnm, si->sw_if_index,
+ format_vnet_sw_if_index_name, vnm, hi->sw_if_index);
+
+ lcp_itf_pair_sync_state_hw (hi);
+
+ return NULL;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (lcp_itf_admin_state_change);
+
+static clib_error_t *
+lcp_itf_mtu_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags)
+{
+ vnet_sw_interface_t *si;
+ vnet_hw_interface_t *hi;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return NULL;
+
+ LCP_ITF_PAIR_DBG ("mtu_change: sw %U %u", format_vnet_sw_if_index_name, vnm,
+ sw_if_index, flags);
+
+ if (vnet_sw_interface_is_sub (vnm, sw_if_index))
+ {
+ lcp_itf_pair_t *lip;
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (lip)
+ lcp_itf_pair_sync_state (lip);
+ return NULL;
+ }
+
+ // When Linux changes link on a parent interface, all of its children also
+ // change. If a parent interface changes MTU, all of its children are clamped
+ // at that MTU by Linux. Neither holds true in VPP, so we are forced to undo
+ // change by walking the sub-interfaces of a phy and syncing their state back
+ // into Linux.
+ si = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (!si)
+ return NULL;
+
+ hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index);
+ if (!hi)
+ return NULL;
+ LCP_ITF_PAIR_DBG ("mtu_change: si %U hi %U, syncing children",
+ format_vnet_sw_if_index_name, vnm, si->sw_if_index,
+ format_vnet_sw_if_index_name, vnm, hi->sw_if_index);
+
+ lcp_itf_pair_sync_state_hw (hi);
+
+ return NULL;
+}
+
+VNET_SW_INTERFACE_MTU_CHANGE_FUNCTION (lcp_itf_mtu_change);
+
+static void
+lcp_itf_ip4_add_del_interface_addr (ip4_main_t *im, uword opaque,
+ u32 sw_if_index, ip4_address_t *address,
+ u32 address_length, u32 if_address_index,
+ u32 is_del)
+{
+ const lcp_itf_pair_t *lip;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return;
+
+ LCP_ITF_PAIR_DBG ("ip4_addr_%s: si:%U %U/%u", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index, format_ip4_address, address, address_length);
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ LCP_ITF_PAIR_DBG ("ip4_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add",
+ format_lcp_itf_pair, lip, format_ip4_address, address,
+ address_length);
+
+ if (is_del)
+ vnet_netlink_del_ip4_addr (lip->lip_vif_index, address, address_length);
+ else
+ vnet_netlink_add_ip4_addr (lip->lip_vif_index, address, address_length);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+ return;
+}
+
+static void
+lcp_itf_ip6_add_del_interface_addr (ip6_main_t *im, uword opaque,
+ u32 sw_if_index, ip6_address_t *address,
+ u32 address_length, u32 if_address_index,
+ u32 is_del)
+{
+ const lcp_itf_pair_t *lip;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return;
+
+ LCP_ITF_PAIR_DBG ("ip6_addr_%s: si:%U %U/%u", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index, format_ip6_address, address, address_length);
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+ LCP_ITF_PAIR_DBG ("ip6_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add",
+ format_lcp_itf_pair, lip, format_ip6_address, address,
+ address_length);
+ if (is_del)
+ vnet_netlink_del_ip6_addr (lip->lip_vif_index, address, address_length);
+ else
+ vnet_netlink_add_ip6_addr (lip->lip_vif_index, address, address_length);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+}
+
+static clib_error_t *
+lcp_itf_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_create)
+{
+ const vnet_sw_interface_t *sw;
+ uword is_sub;
+
+ if (!lcp_auto_subint ())
+ return NULL;
+
+ sw = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (!sw)
+ return NULL;
+
+ is_sub = vnet_sw_interface_is_sub (vnm, sw_if_index);
+ if (!is_sub)
+ return NULL;
+
+ LCP_ITF_PAIR_DBG ("interface_%s: sw %U parent %U", is_create ? "add" : "del",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw->sw_if_index, format_vnet_sw_if_index_name,
+ vnet_get_main (), sw->sup_sw_if_index);
+
+ if (is_create)
+ {
+ const lcp_itf_pair_t *sup_lip;
+ u8 *name = 0;
+
+ // If the parent has a LIP auto-create a LIP for this interface
+ sup_lip =
+ lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw->sup_sw_if_index));
+ if (!sup_lip)
+ return NULL;
+
+ name = format (name, "%s.%d%c", sup_lip->lip_host_name, sw->sub.id, 0);
+
+ LCP_ITF_PAIR_INFO (
+ "interface_%s: %U has parent %U, auto-creating LCP with host-if %s",
+ is_create ? "add" : "del", format_vnet_sw_if_index_name,
+ vnet_get_main (), sw->sw_if_index, format_lcp_itf_pair, sup_lip, name);
+
+ lcp_itf_pair_create (sw->sw_if_index, name, LCP_ITF_HOST_TAP,
+ sup_lip->lip_namespace, NULL);
+
+ vec_free (name);
+ }
+ else
+ {
+ lcp_itf_pair_delete (sw_if_index);
+ }
+
+ return NULL;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_interface_add_del);
+
+static clib_error_t *
+lcp_itf_sync_init (vlib_main_t *vm)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+
+ ip4_add_del_interface_address_callback_t cb4;
+ ip6_add_del_interface_address_callback_t cb6;
+
+ cb4.function = lcp_itf_ip4_add_del_interface_addr;
+ cb4.function_opaque = 0;
+ vec_add1 (im4->add_del_interface_address_callbacks, cb4);
+
+ cb6.function = lcp_itf_ip6_add_del_interface_addr;
+ cb6.function_opaque = 0;
+ vec_add1 (im6->add_del_interface_address_callbacks, cb6);
+
+ return NULL;
+}
+
+VLIB_INIT_FUNCTION (lcp_itf_sync_init) = {
+ .runs_after = VLIB_INITS ("vnet_interface_init", "tcp_init", "udp_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_mpls_sync.c b/src/plugins/linux-cp/lcp_mpls_sync.c
new file mode 100644
index 00000000000..c08fcb4d1d9
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_mpls_sync.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+
+#include <linux-cp/lcp_interface.h>
+
+#include <vnet/plugin/plugin.h>
+#include <vnet/mpls/mpls.h>
+#include <vppinfra/linux/netns.h>
+
+#include <fcntl.h>
+
+vlib_log_class_t lcp_mpls_sync_logger;
+
+#define LCP_MPLS_SYNC_DBG(...) \
+ vlib_log_debug (lcp_mpls_sync_logger, __VA_ARGS__);
+
+void
+lcp_mpls_sync_pair_add_cb (lcp_itf_pair_t *lip)
+{
+ u8 phy_is_enabled = mpls_sw_interface_is_enabled (lip->lip_phy_sw_if_index);
+ LCP_MPLS_SYNC_DBG ("pair_add_cb: mpls enabled %u, parent %U", phy_is_enabled,
+ format_lcp_itf_pair, lip);
+ if (phy_is_enabled)
+ mpls_sw_interface_enable_disable (&mpls_main, lip->lip_host_sw_if_index,
+ 1);
+}
+
+void
+lcp_mpls_sync_state_cb (struct mpls_main_t *mm, uword opaque, u32 sw_if_index,
+ u32 is_enable)
+{
+ lcp_itf_pair_t *lip;
+ index_t lipi;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+ int ctl_fd = -1;
+ u8 *ctl_path = NULL;
+
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: called for sw_if_index %u", sw_if_index);
+
+ // If device is LCP PHY, sync state to host tap.
+ lipi = lcp_itf_pair_find_by_phy (sw_if_index);
+ if (INDEX_INVALID != lipi)
+ {
+ lip = lcp_itf_pair_get (lipi);
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: mpls enabled %u parent %U", is_enable,
+ format_lcp_itf_pair, lip);
+ mpls_sw_interface_enable_disable (&mpls_main, lip->lip_host_sw_if_index,
+ is_enable);
+ return;
+ }
+
+ // If device is LCP host, toggle MPLS XC feature.
+ lipi = lcp_itf_pair_find_by_host (sw_if_index);
+ if (INDEX_INVALID == lipi)
+ return;
+ lip = lcp_itf_pair_get (lipi);
+
+ vnet_feature_enable_disable ("mpls-input", "linux-cp-xc-mpls", sw_if_index,
+ is_enable, NULL, 0);
+
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: mpls xc state %u parent %U", is_enable,
+ format_lcp_itf_pair, lip);
+
+ // If syncing is enabled, sync Linux state as well.
+ // This can happen regardless of lcp_get_netlink_processing_active(),
+ // provided it does not generate Netlink messages.
+ if (!lcp_sync ())
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ ctl_path = format (NULL, "/proc/sys/net/mpls/conf/%s/input%c",
+ lip->lip_host_name, NULL);
+ if (NULL == ctl_path)
+ {
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to format sysctl");
+ goto SYNC_CLEANUP;
+ }
+
+ ctl_fd = open ((char *) ctl_path, O_WRONLY);
+ if (ctl_fd < 0)
+ {
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to open %s for writing",
+ ctl_path);
+ goto SYNC_CLEANUP;
+ }
+
+ if (fdformat (ctl_fd, "%u", is_enable) < 1)
+ {
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to write to %s", ctl_path);
+ goto SYNC_CLEANUP;
+ }
+
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: set mpls input for %s",
+ lip->lip_host_name);
+
+SYNC_CLEANUP:
+ if (ctl_fd > -1)
+ close (ctl_fd);
+
+ if (NULL != ctl_path)
+ vec_free (ctl_path);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+}
+
+static clib_error_t *
+lcp_mpls_sync_init (vlib_main_t *vm)
+{
+ lcp_itf_pair_vft_t mpls_sync_itf_pair_vft = {
+ .pair_add_fn = lcp_mpls_sync_pair_add_cb,
+ };
+ lcp_itf_pair_register_vft (&mpls_sync_itf_pair_vft);
+
+ mpls_interface_state_change_add_callback (lcp_mpls_sync_state_cb, 0);
+
+ lcp_mpls_sync_logger = vlib_log_register_class ("linux-cp", "mpls-sync");
+
+ return NULL;
+}
+
+VLIB_INIT_FUNCTION (lcp_mpls_sync_init) = {
+ .runs_after = VLIB_INITS ("lcp_interface_init", "mpls_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_nl.c b/src/plugins/linux-cp/lcp_nl.c
new file mode 100644
index 00000000000..85b6447007a
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_nl.c
@@ -0,0 +1,1043 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <fcntl.h>
+
+#include <linux-cp/lcp_nl.h>
+
+#include <netlink/route/rule.h>
+#include <netlink/msg.h>
+#include <netlink/netlink.h>
+#include <netlink/socket.h>
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/neighbour.h>
+#include <netlink/route/addr.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vppinfra/error.h>
+#include <vppinfra/linux/netns.h>
+
+#include <vnet/fib/fib_table.h>
+
+#include <libmnl/libmnl.h>
+
+#include <plugins/linux-cp/lcp_interface.h>
+
+typedef enum nl_status_t_
+{
+ NL_STATUS_NOTIF_PROC,
+ NL_STATUS_SYNC,
+} nl_status_t;
+
+typedef enum nl_sock_type_t_
+{
+ NL_SOCK_TYPE_LINK,
+ NL_SOCK_TYPE_ADDR,
+ NL_SOCK_TYPE_NEIGH,
+ NL_SOCK_TYPE_ROUTE,
+} nl_sock_type_t;
+
+#define NL_SOCK_TYPES_N (NL_SOCK_TYPE_ROUTE + 1)
+
+/* Socket type, message type, type name, function subname */
+#define foreach_sock_type \
+ _ (NL_SOCK_TYPE_LINK, RTM_GETLINK, "link", link) \
+ _ (NL_SOCK_TYPE_ADDR, RTM_GETADDR, "address", link_addr) \
+ _ (NL_SOCK_TYPE_NEIGH, RTM_GETNEIGH, "neighbor", neigh) \
+ _ (NL_SOCK_TYPE_ROUTE, RTM_GETROUTE, "route", route)
+
+typedef enum nl_event_type_t_
+{
+ NL_EVENT_READ,
+ NL_EVENT_ERR,
+} nl_event_type_t;
+
+typedef struct nl_main
+{
+
+ nl_status_t nl_status;
+
+ struct nl_sock *sk_route;
+ struct nl_sock *sk_route_sync[NL_SOCK_TYPES_N];
+ vlib_log_class_t nl_logger;
+ nl_vft_t *nl_vfts;
+ struct nl_cache *nl_caches[LCP_NL_N_OBJS];
+ nl_msg_info_t *nl_msg_queue;
+ uword clib_file_index;
+
+ u32 rx_buf_size;
+ u32 tx_buf_size;
+ u32 batch_size;
+ u32 batch_delay_ms;
+
+ u32 sync_batch_limit;
+ u32 sync_batch_delay_ms;
+ u32 sync_attempt_delay_ms;
+
+} nl_main_t;
+
+#define NL_RX_BUF_SIZE_DEF (1 << 27) /* 128 MB */
+#define NL_TX_BUF_SIZE_DEF (1 << 18) /* 256 kB */
+#define NL_BATCH_SIZE_DEF (1 << 11) /* 2048 */
+#define NL_BATCH_DELAY_MS_DEF 50 /* 50 ms, max 20 batch/s */
+
+#define NL_SYNC_BATCH_LIMIT_DEF (1 << 10) /* 1024 */
+#define NL_SYNC_BATCH_DELAY_MS_DEF 20 /* 20ms, max 50 batch/s */
+#define NL_SYNC_ATTEMPT_DELAY_MS_DEF 2000 /* 2s */
+
+static nl_main_t nl_main = {
+ .rx_buf_size = NL_RX_BUF_SIZE_DEF,
+ .tx_buf_size = NL_TX_BUF_SIZE_DEF,
+ .batch_size = NL_BATCH_SIZE_DEF,
+ .batch_delay_ms = NL_BATCH_DELAY_MS_DEF,
+ .sync_batch_limit = NL_SYNC_BATCH_LIMIT_DEF,
+ .sync_batch_delay_ms = NL_SYNC_BATCH_DELAY_MS_DEF,
+ .sync_attempt_delay_ms = NL_SYNC_ATTEMPT_DELAY_MS_DEF,
+};
+
+/* #define foreach_nl_nft_proto \ */
+/* _(IP4, "ip", AF_INT) \ */
+/* _(IP6, "ip6", NFPROTO_IPV6) */
+
+/* typedef enum nl_nft_proto_t_ */
+/* { */
+/* #define _(a,b,c) NL_NFT_PROTO_##a = c, */
+/* foreach_nl_nft_proto */
+/* #undef _ */
+/* } nl_nft_proto_t; */
+
+#define FOREACH_VFT(__func, __arg) \
+ { \
+ nl_main_t *nm = &nl_main; \
+ nl_vft_t *__nv; \
+ vec_foreach (__nv, nm->nl_vfts) \
+ { \
+ if (!__nv->__func.cb) \
+ continue; \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_sync (vlib_get_main ()); \
+ \
+ __nv->__func.cb (__arg); \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_release (vlib_get_main ()); \
+ } \
+ }
+
+#define FOREACH_VFT_NO_ARG(__func) \
+ { \
+ nl_main_t *nm = &nl_main; \
+ nl_vft_t *__nv; \
+ vec_foreach (__nv, nm->nl_vfts) \
+ { \
+ if (!__nv->__func.cb) \
+ continue; \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_sync (vlib_get_main ()); \
+ \
+ __nv->__func.cb (); \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_release (vlib_get_main ()); \
+ } \
+ }
+
+#define FOREACH_VFT_CTX(__func, __arg, __ctx) \
+ { \
+ nl_main_t *nm = &nl_main; \
+ nl_vft_t *__nv; \
+ vec_foreach (__nv, nm->nl_vfts) \
+ { \
+ if (!__nv->__func.cb) \
+ continue; \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_sync (vlib_get_main ()); \
+ \
+ __nv->__func.cb (__arg, __ctx); \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_release (vlib_get_main ()); \
+ } \
+ }
+
+void
+nl_register_vft (const nl_vft_t *nv)
+{
+ nl_main_t *nm = &nl_main;
+
+ vec_add1 (nm->nl_vfts, *nv);
+}
+
+#define NL_DBG(...) vlib_log_debug (nl_main.nl_logger, __VA_ARGS__);
+#define NL_INFO(...) vlib_log_notice (nl_main.nl_logger, __VA_ARGS__);
+#define NL_ERROR(...) vlib_log_err (nl_main.nl_logger, __VA_ARGS__);
+
+static void lcp_nl_open_socket (void);
+static void lcp_nl_close_socket (void);
+static void lcp_nl_open_sync_socket (nl_sock_type_t sock_type);
+static void lcp_nl_close_sync_socket (nl_sock_type_t sock_type);
+
+static void
+nl_route_del (struct rtnl_route *rr, void *arg)
+{
+ FOREACH_VFT (nvl_rt_route_del, rr);
+}
+
+static void
+nl_route_add (struct rtnl_route *rr, void *arg)
+{
+ int is_replace = 0;
+
+ if (arg)
+ {
+ nl_msg_info_t *msg_info = (nl_msg_info_t *) arg;
+ struct nlmsghdr *nlh = nlmsg_hdr (msg_info->msg);
+
+ is_replace = (nlh->nlmsg_flags & NLM_F_REPLACE);
+ }
+
+ FOREACH_VFT_CTX (nvl_rt_route_add, rr, is_replace);
+}
+
+static void
+nl_route_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_route_sync_begin);
+}
+
+static void
+nl_route_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_route_sync_end);
+}
+
+static void
+nl_neigh_del (struct rtnl_neigh *rn, void *arg)
+{
+ FOREACH_VFT (nvl_rt_neigh_del, rn);
+}
+
+static void
+nl_neigh_add (struct rtnl_neigh *rn, void *arg)
+{
+ FOREACH_VFT (nvl_rt_neigh_add, rn);
+}
+
+static void
+nl_neigh_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_neigh_sync_begin);
+}
+
+static void
+nl_neigh_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_neigh_sync_end);
+}
+
+static void
+nl_link_addr_del (struct rtnl_addr *rla, void *arg)
+{
+ FOREACH_VFT (nvl_rt_addr_del, rla);
+}
+
+static void
+nl_link_addr_add (struct rtnl_addr *rla, void *arg)
+{
+ FOREACH_VFT (nvl_rt_addr_add, rla);
+}
+
+static void
+nl_link_addr_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_addr_sync_begin);
+}
+
+static void
+nl_link_addr_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_addr_sync_end);
+}
+
+static void
+nl_link_del (struct rtnl_link *rl, void *arg)
+{
+ FOREACH_VFT_CTX (nvl_rt_link_del, rl, arg);
+}
+
+static void
+nl_link_add (struct rtnl_link *rl, void *arg)
+{
+ FOREACH_VFT_CTX (nvl_rt_link_add, rl, arg);
+}
+
+static void
+nl_link_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_link_sync_begin);
+}
+
+static void
+nl_link_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_link_sync_end);
+}
+
+static void
+nl_route_dispatch (struct nl_object *obj, void *arg)
+{
+ /* nothing can be done without interface mappings */
+ if (!lcp_itf_num_pairs ())
+ return;
+
+ switch (nl_object_get_msgtype (obj))
+ {
+ case RTM_NEWROUTE:
+ nl_route_add ((struct rtnl_route *) obj, arg);
+ break;
+ case RTM_DELROUTE:
+ nl_route_del ((struct rtnl_route *) obj, arg);
+ break;
+ case RTM_NEWNEIGH:
+ nl_neigh_add ((struct rtnl_neigh *) obj, arg);
+ break;
+ case RTM_DELNEIGH:
+ nl_neigh_del ((struct rtnl_neigh *) obj, arg);
+ break;
+ case RTM_NEWADDR:
+ nl_link_addr_add ((struct rtnl_addr *) obj, arg);
+ break;
+ case RTM_DELADDR:
+ nl_link_addr_del ((struct rtnl_addr *) obj, arg);
+ break;
+ case RTM_NEWLINK:
+ nl_link_add ((struct rtnl_link *) obj, arg);
+ break;
+ case RTM_DELLINK:
+ nl_link_del ((struct rtnl_link *) obj, arg);
+ break;
+ default:
+ NL_INFO ("unhandled: %s", nl_object_get_type (obj));
+ break;
+ }
+}
+
+static int
+nl_route_process_msgs (void)
+{
+ nl_main_t *nm = &nl_main;
+ nl_msg_info_t *msg_info;
+ int err, n_msgs = 0;
+
+ lcp_set_netlink_processing_active (1);
+
+ /* process a batch of messages. break if we hit our limit */
+ vec_foreach (msg_info, nm->nl_msg_queue)
+ {
+ if ((err = nl_msg_parse (msg_info->msg, nl_route_dispatch, msg_info)) <
+ 0)
+ NL_ERROR ("Unable to parse object: %s", nl_geterror (err));
+ nlmsg_free (msg_info->msg);
+ if (++n_msgs >= nm->batch_size)
+ break;
+ }
+
+ /* remove the messages we processed from the head of the queue */
+ if (n_msgs)
+ vec_delete (nm->nl_msg_queue, n_msgs, 0);
+
+ NL_DBG ("Processed %u messages", n_msgs);
+
+ lcp_set_netlink_processing_active (0);
+
+ return n_msgs;
+}
+
+static int
+lcp_nl_route_discard_msgs (void)
+{
+ nl_main_t *nm = &nl_main;
+ nl_msg_info_t *msg_info;
+ int n_msgs;
+
+ n_msgs = vec_len (nm->nl_msg_queue);
+ if (n_msgs == 0)
+ return 0;
+
+ vec_foreach (msg_info, nm->nl_msg_queue)
+ {
+ nlmsg_free (msg_info->msg);
+ }
+
+ vec_reset_length (nm->nl_msg_queue);
+
+ NL_INFO ("Discarded %u messages", n_msgs);
+
+ return n_msgs;
+}
+
+static int
+lcp_nl_route_send_dump_req (nl_sock_type_t sock_type, int msg_type)
+{
+ nl_main_t *nm = &nl_main;
+ struct nl_sock *sk_route = nm->sk_route_sync[sock_type];
+ int err;
+ struct rtgenmsg rt_hdr = {
+ .rtgen_family = AF_UNSPEC,
+ };
+
+ err =
+ nl_send_simple (sk_route, msg_type, NLM_F_DUMP, &rt_hdr, sizeof (rt_hdr));
+
+ if (err < 0)
+ {
+ NL_ERROR ("Unable to send a dump request: %s", nl_geterror (err));
+ }
+ else
+ NL_INFO ("Dump request sent via socket %d of type %d",
+ nl_socket_get_fd (sk_route), sock_type);
+
+ return err;
+}
+
+static int
+lcp_nl_route_dump_cb (struct nl_msg *msg, void *arg)
+{
+ int err;
+
+ if ((err = nl_msg_parse (msg, nl_route_dispatch, NULL)) < 0)
+ NL_ERROR ("Unable to parse object: %s", nl_geterror (err));
+
+ return NL_OK;
+}
+
+static int
+lcp_nl_recv_dump_replies (nl_sock_type_t sock_type, int msg_limit,
+ int *is_done_rcvd)
+{
+ nl_main_t *nm = &nl_main;
+ struct nl_sock *sk_route = nm->sk_route_sync[sock_type];
+ struct sockaddr_nl nla;
+ uint8_t *buf = NULL;
+ int n_bytes;
+ struct nlmsghdr *hdr;
+ struct nl_msg *msg = NULL;
+ int err = 0;
+ int done = 0;
+ int n_msgs = 0;
+
+ lcp_set_netlink_processing_active (1);
+
+continue_reading:
+ n_bytes = nl_recv (sk_route, &nla, &buf, /* creds */ NULL);
+ if (n_bytes <= 0)
+ {
+ lcp_set_netlink_processing_active (0);
+ return n_bytes;
+ }
+
+ hdr = (struct nlmsghdr *) buf;
+ while (nlmsg_ok (hdr, n_bytes))
+ {
+ nlmsg_free (msg);
+ msg = nlmsg_convert (hdr);
+ if (!msg)
+ {
+ err = -NLE_NOMEM;
+ goto out;
+ }
+
+ n_msgs++;
+
+ nlmsg_set_proto (msg, NETLINK_ROUTE);
+ nlmsg_set_src (msg, &nla);
+
+ /* Message that terminates a multipart message. Finish parsing and signal
+ * the caller that all dump replies have been received
+ */
+ if (hdr->nlmsg_type == NLMSG_DONE)
+ {
+ done = 1;
+ goto out;
+ }
+ /* Message to be ignored. Continue parsing */
+ else if (hdr->nlmsg_type == NLMSG_NOOP)
+ ;
+ /* Message that indicates data was lost. Finish parsing and return an
+ * error
+ */
+ else if (hdr->nlmsg_type == NLMSG_OVERRUN)
+ {
+ err = -NLE_MSG_OVERFLOW;
+ goto out;
+ }
+ /* Message that indicates an error. Finish parsing, extract the error
+ * code, and return it */
+ else if (hdr->nlmsg_type == NLMSG_ERROR)
+ {
+ struct nlmsgerr *e = nlmsg_data (hdr);
+
+ if (hdr->nlmsg_len < nlmsg_size (sizeof (*e)))
+ {
+ err = -NLE_MSG_TRUNC;
+ goto out;
+ }
+ else if (e->error)
+ {
+ err = -nl_syserr2nlerr (e->error);
+ goto out;
+ }
+ /* Message is an acknowledgement (err_code = 0). Continue parsing */
+ else
+ ;
+ }
+ /* Message that contains the requested data. Pass it for processing and
+ * continue parsing
+ */
+ else
+ {
+ lcp_nl_route_dump_cb (msg, NULL);
+ }
+
+ hdr = nlmsg_next (hdr, &n_bytes);
+ }
+
+ nlmsg_free (msg);
+ free (buf);
+ msg = NULL;
+ buf = NULL;
+
+ if (!done && n_msgs < msg_limit)
+ goto continue_reading;
+
+out:
+ lcp_set_netlink_processing_active (0);
+
+ nlmsg_free (msg);
+ free (buf);
+
+ if (err)
+ return err;
+
+ *is_done_rcvd = done;
+
+ return n_msgs;
+}
+
+#define DAY_F64 (1.0 * (24 * 60 * 60))
+
+static uword
+nl_route_process (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ nl_main_t *nm = &nl_main;
+ uword event_type;
+ uword *event_data = 0;
+ f64 wait_time = DAY_F64;
+ int n_msgs;
+ int is_done;
+
+ while (1)
+ {
+ if (nm->nl_status == NL_STATUS_NOTIF_PROC)
+ {
+ /* If we process a batch of messages and stop because we reached the
+ * batch size limit, we want to wake up after the batch delay and
+ * process more. Otherwise we just want to wait for a read event.
+ */
+ vlib_process_wait_for_event_or_clock (vm, wait_time);
+ event_type = vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+
+ switch (event_type)
+ {
+ /* Process batch of queued messages on timeout or read event
+ * signal
+ */
+ case ~0:
+ case NL_EVENT_READ:
+ nl_route_process_msgs ();
+ wait_time = (vec_len (nm->nl_msg_queue) != 0) ?
+ nm->batch_delay_ms * 1e-3 :
+ DAY_F64;
+ break;
+
+ /* Initiate synchronization if there was an error polling or
+ * reading the notification socket
+ */
+ case NL_EVENT_ERR:
+ nm->nl_status = NL_STATUS_SYNC;
+ break;
+
+ default:
+ NL_ERROR ("Unknown event type: %u", (u32) event_type);
+ }
+ }
+ else if (nm->nl_status == NL_STATUS_SYNC)
+ {
+ /* Stop processing notifications - close the notification socket and
+ * discard all messages that are currently in the queue
+ */
+ lcp_nl_close_socket ();
+ lcp_nl_route_discard_msgs ();
+
+ /* Wait some time before next synchronization attempt. Allows to
+ * reduce the number of failed attempts that stall the main thread by
+ * waiting out the notification storm
+ */
+ NL_INFO ("Wait before next synchronization attempt for %ums",
+ nm->sync_attempt_delay_ms);
+ vlib_process_suspend (vm, nm->sync_attempt_delay_ms * 1e-3);
+
+ /* Open netlink synchronization socket, one for every data type of
+ * interest: link, address, neighbor, and route. That is needed to
+ * be able to send dump requests for every data type simultaneously.
+ * If send a dump request while the previous one is in progress,
+ * the request will fail and EBUSY returned
+ */
+#define _(stype, mtype, tname, fn) lcp_nl_open_sync_socket (stype);
+ foreach_sock_type
+#undef _
+
+ /* Start reading notifications and enqueueing them for further
+ * processing. The notifications will serve as a difference between
+ * the snapshot made after the dump request and the actual state at
+ * the moment. Once all the dump replies are processed, the
+ * notifications will be processed
+ */
+ lcp_nl_open_socket ();
+
+ /* Request the current entry set from the kernel for every data type
+ * of interest. Thus requesting a snapshot of the current routing
+ * state that the kernel will make and then reply with
+ */
+#define _(stype, mtype, tname, fn) lcp_nl_route_send_dump_req (stype, mtype);
+ foreach_sock_type
+#undef _
+
+ /* Process all the dump replies */
+#define _(stype, mtype, tname, fn) \
+ nl_##fn##_sync_begin (); \
+ is_done = 0; \
+ do \
+ { \
+ n_msgs = \
+ lcp_nl_recv_dump_replies (stype, nm->sync_batch_limit, &is_done); \
+ if (n_msgs < 0) \
+ { \
+ NL_ERROR ("Error receiving dump replies of type " tname \
+ ": %s (%d)", \
+ nl_geterror (n_msgs), n_msgs); \
+ break; \
+ } \
+ else if (n_msgs == 0) \
+ { \
+ NL_ERROR ("EOF while receiving dump replies of type " tname); \
+ break; \
+ } \
+ else \
+ NL_INFO ("Processed %u dump replies of type " tname, n_msgs); \
+ \
+ /* Suspend the processing loop and wait until event signal is \
+ * received or timeout expires. During synchronization, only \
+ * error event is expected because read event is suppressed. \
+ * Allows not to stall the main thread and detect errors on the \
+ * notification socket that will make synchronization \
+ * incomplete \
+ */ \
+ vlib_process_wait_for_event_or_clock (vm, \
+ nm->sync_batch_delay_ms * 1e-3); \
+ event_type = vlib_process_get_events (vm, &event_data); \
+ vec_reset_length (event_data); \
+ \
+ /* If error event received, stop synchronization and repeat an \
+ * attempt later \
+ */ \
+ if (event_type == NL_EVENT_ERR) \
+ goto sync_later; \
+ } \
+ while (!is_done); \
+ nl_##fn##_sync_end ();
+
+ foreach_sock_type
+#undef _
+
+ /* Start processing notifications */
+ nm->nl_status = NL_STATUS_NOTIF_PROC;
+
+ /* Trigger messages processing if there are notifications received
+ * during synchronization
+ */
+ wait_time = (vec_len (nm->nl_msg_queue) != 0) ? 1e-3 : DAY_F64;
+
+ sync_later:
+ /* Close netlink synchronization sockets */
+#define _(stype, mtype, tname, fn) lcp_nl_close_sync_socket (stype);
+ foreach_sock_type
+#undef _
+ }
+ else
+ NL_ERROR ("Unknown status: %d", nm->nl_status);
+ }
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (nl_route_process_node, static) = {
+ .function = nl_route_process,
+ .name = "linux-cp-netlink-process",
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .process_log2_n_stack_bytes = 17,
+};
+
+static int
+nl_route_cb (struct nl_msg *msg, void *arg)
+{
+ nl_main_t *nm = &nl_main;
+ nl_msg_info_t *msg_info = 0;
+
+ /* delay processing - increment ref count and queue for later */
+ vec_add2 (nm->nl_msg_queue, msg_info, 1);
+
+ /* store a timestamp for the message */
+ msg_info->ts = vlib_time_now (vlib_get_main ());
+ msg_info->msg = msg;
+ nlmsg_get (msg);
+
+ return 0;
+}
+
+int
+lcp_nl_drain_messages (void)
+{
+ int err;
+ nl_main_t *nm = &nl_main;
+
+ /* Read until there's an error */
+ while ((err = nl_recvmsgs_default (nm->sk_route)) > -1)
+ ;
+
+ /* If there was an error other then EAGAIN, signal process node */
+ if (err != -NLE_AGAIN)
+ vlib_process_signal_event (vlib_get_main (), nl_route_process_node.index,
+ NL_EVENT_ERR, 0);
+ else
+ {
+ /* If netlink notification processing is active, signal process node
+ * there were notifications read
+ */
+ if (nm->nl_status == NL_STATUS_NOTIF_PROC)
+ vlib_process_signal_event (
+ vlib_get_main (), nl_route_process_node.index, NL_EVENT_READ, 0);
+ }
+
+ return err;
+}
+
+void
+lcp_nl_pair_add_cb (lcp_itf_pair_t *pair)
+{
+ lcp_nl_drain_messages ();
+}
+
+static clib_error_t *
+nl_route_read_cb (clib_file_t *f)
+{
+ int err;
+ err = lcp_nl_drain_messages ();
+ if (err < 0 && err != -NLE_AGAIN)
+ NL_ERROR ("Error reading netlink socket (fd %d): %s (%d)",
+ f->file_descriptor, nl_geterror (err), err);
+
+ return 0;
+}
+
+static clib_error_t *
+nl_route_error_cb (clib_file_t *f)
+{
+ NL_ERROR ("Error polling netlink socket (fd %d)", f->file_descriptor);
+
+ /* notify process node */
+ vlib_process_signal_event (vlib_get_main (), nl_route_process_node.index,
+ NL_EVENT_ERR, 0);
+
+ return clib_error_return (0, "Error polling netlink socket %d",
+ f->file_descriptor);
+}
+
+struct nl_cache *
+lcp_nl_get_cache (lcp_nl_obj_t t)
+{
+ nl_main_t *nm = &nl_main;
+
+ return nm->nl_caches[t];
+}
+
+/* Set the RX buffer size to be used on the netlink socket */
+void
+lcp_nl_set_buffer_size (u32 buf_size)
+{
+ nl_main_t *nm = &nl_main;
+
+ nm->rx_buf_size = buf_size;
+
+ if (nm->sk_route)
+ nl_socket_set_buffer_size (nm->sk_route, nm->rx_buf_size, nm->tx_buf_size);
+}
+
+/* Set the batch size - maximum netlink messages to process at one time */
+void
+lcp_nl_set_batch_size (u32 batch_size)
+{
+ nl_main_t *nm = &nl_main;
+
+ nm->batch_size = batch_size;
+}
+
+/* Set the batch delay - how long to wait in ms between processing batches */
+void
+lcp_nl_set_batch_delay (u32 batch_delay_ms)
+{
+ nl_main_t *nm = &nl_main;
+
+ nm->batch_delay_ms = batch_delay_ms;
+}
+
+static clib_error_t *
+lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ u32 buf_size, batch_size, batch_delay_ms;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "nl-rx-buffer-size %u", &buf_size))
+ lcp_nl_set_buffer_size (buf_size);
+ else if (unformat (input, "nl-batch-size %u", &batch_size))
+ lcp_nl_set_batch_size (batch_size);
+ else if (unformat (input, "nl-batch-delay-ms %u", &batch_delay_ms))
+ lcp_nl_set_batch_delay (batch_delay_ms);
+ else
+ return clib_error_return (0, "invalid netlink option: %U",
+ format_unformat_error, input);
+ }
+
+ return NULL;
+}
+
+VLIB_CONFIG_FUNCTION (lcp_itf_pair_config, "linux-nl");
+
+static void
+lcp_nl_close_socket (void)
+{
+ nl_main_t *nm = &nl_main;
+
+ /* delete existing fd from epoll fd set */
+ if (nm->clib_file_index != ~0)
+ {
+ clib_file_main_t *fm = &file_main;
+ clib_file_t *f = clib_file_get (fm, nm->clib_file_index);
+
+ if (f)
+ {
+ NL_INFO ("Stopping poll of fd %u", f->file_descriptor);
+ fm->file_update (f, UNIX_FILE_UPDATE_DELETE);
+ }
+ else
+ /* stored index was not a valid file, reset stored index to ~0 */
+ nm->clib_file_index = ~0;
+ }
+
+ /* If we already created a socket, close/free it */
+ if (nm->sk_route)
+ {
+ NL_INFO ("Closing netlink socket %d", nl_socket_get_fd (nm->sk_route));
+ nl_socket_free (nm->sk_route);
+ nm->sk_route = NULL;
+ }
+}
+
+static void
+lcp_nl_open_socket (void)
+{
+ nl_main_t *nm = &nl_main;
+ int dest_ns_fd, curr_ns_fd;
+
+ /* Allocate a new socket for both routes and acls
+ * Notifications do not use sequence numbers, disable sequence number
+ * checking.
+ * Define a callback function, which will be called for each notification
+ * received
+ */
+ nm->sk_route = nl_socket_alloc ();
+ nl_socket_disable_seq_check (nm->sk_route);
+
+ dest_ns_fd = lcp_get_default_ns_fd ();
+ if (dest_ns_fd)
+ {
+ curr_ns_fd = open ("/proc/self/ns/net", O_RDONLY);
+ setns (dest_ns_fd, CLONE_NEWNET);
+ }
+
+ nl_connect (nm->sk_route, NETLINK_ROUTE);
+
+ if (dest_ns_fd && curr_ns_fd >= 0)
+ {
+ setns (curr_ns_fd, CLONE_NEWNET);
+ close (curr_ns_fd);
+ }
+
+ /* Subscribe to all the 'routing' notifications on the route socket */
+ nl_socket_add_memberships (nm->sk_route, RTNLGRP_LINK, RTNLGRP_IPV6_IFADDR,
+ RTNLGRP_IPV4_IFADDR, RTNLGRP_IPV4_ROUTE,
+ RTNLGRP_IPV6_ROUTE, RTNLGRP_NEIGH, RTNLGRP_NOTIFY,
+#ifdef RTNLGRP_MPLS_ROUTE /* not defined on CentOS/RHEL 7 */
+ RTNLGRP_MPLS_ROUTE,
+#endif
+ RTNLGRP_IPV4_RULE, RTNLGRP_IPV6_RULE, 0);
+
+ /* Set socket in nonblocking mode and increase buffer sizes */
+ nl_socket_set_nonblocking (nm->sk_route);
+ nl_socket_set_buffer_size (nm->sk_route, nm->rx_buf_size, nm->tx_buf_size);
+
+ if (nm->clib_file_index == ~0)
+ {
+ clib_file_t rt_file = {
+ .read_function = nl_route_read_cb,
+ .error_function = nl_route_error_cb,
+ .file_descriptor = nl_socket_get_fd (nm->sk_route),
+ .description = format (0, "linux-cp netlink route socket"),
+ };
+
+ nm->clib_file_index = clib_file_add (&file_main, &rt_file);
+ NL_INFO ("Added file %u", nm->clib_file_index);
+ }
+ else
+ /* clib file already created and socket was closed due to error */
+ {
+ clib_file_main_t *fm = &file_main;
+ clib_file_t *f = clib_file_get (fm, nm->clib_file_index);
+
+ f->file_descriptor = nl_socket_get_fd (nm->sk_route);
+ fm->file_update (f, UNIX_FILE_UPDATE_ADD);
+ NL_INFO ("Starting poll of %d", f->file_descriptor);
+ }
+
+ nl_socket_modify_cb (nm->sk_route, NL_CB_VALID, NL_CB_CUSTOM, nl_route_cb,
+ NULL);
+ NL_INFO ("Opened netlink socket %d", nl_socket_get_fd (nm->sk_route));
+}
+
+static void
+lcp_nl_open_sync_socket (nl_sock_type_t sock_type)
+{
+ nl_main_t *nm = &nl_main;
+ int dest_ns_fd, curr_ns_fd;
+ struct nl_sock *sk_route;
+
+ /* Allocate a new blocking socket for routes that will be used for dump
+ * requests. Buffer sizes are left default because replies to dump requests
+ * are flow-controlled and the kernel will not overflow the socket by sending
+ * these
+ */
+
+ nm->sk_route_sync[sock_type] = sk_route = nl_socket_alloc ();
+
+ dest_ns_fd = lcp_get_default_ns_fd ();
+ if (dest_ns_fd > 0)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ if (clib_setns (dest_ns_fd) == -1)
+ NL_ERROR ("Cannot set destination ns");
+ }
+
+ nl_connect (sk_route, NETLINK_ROUTE);
+
+ if (dest_ns_fd > 0)
+ {
+ if (curr_ns_fd == -1)
+ {
+ NL_ERROR ("No previous ns to set");
+ }
+ else
+ {
+ if (clib_setns (curr_ns_fd) == -1)
+ NL_ERROR ("Cannot set previous ns");
+ close (curr_ns_fd);
+ }
+ }
+
+ NL_INFO ("Opened netlink synchronization socket %d of type %d",
+ nl_socket_get_fd (sk_route), sock_type);
+}
+
+static void
+lcp_nl_close_sync_socket (nl_sock_type_t sock_type)
+{
+ nl_main_t *nm = &nl_main;
+ struct nl_sock *sk_route = nm->sk_route_sync[sock_type];
+
+ if (sk_route)
+ {
+ NL_INFO ("Closing netlink synchronization socket %d of type %d",
+ nl_socket_get_fd (sk_route), sock_type);
+ nl_socket_free (sk_route);
+ nm->sk_route_sync[sock_type] = NULL;
+ }
+}
+
+#include <vnet/plugin/plugin.h>
+clib_error_t *
+lcp_nl_init (vlib_main_t *vm)
+{
+ nl_main_t *nm = &nl_main;
+ lcp_itf_pair_vft_t nl_itf_pair_vft = {
+ .pair_add_fn = lcp_nl_pair_add_cb,
+ };
+
+ nm->nl_status = NL_STATUS_NOTIF_PROC;
+ nm->clib_file_index = ~0;
+ nm->nl_logger = vlib_log_register_class ("nl", "nl");
+
+ lcp_nl_open_socket ();
+ lcp_itf_pair_register_vft (&nl_itf_pair_vft);
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lcp_nl_init) = {
+ .runs_after = VLIB_INITS ("lcp_interface_init", "tuntap_init",
+ "ip_neighbor_init"),
+};
+
+#include <vpp/app/version.h>
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "linux Control Plane - Netlink listener",
+ .default_disabled = 1,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_nl.h b/src/plugins/linux-cp/lcp_nl.h
new file mode 100644
index 00000000000..41757e9b983
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_nl.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/neighbour.h>
+#include <netlink/route/addr.h>
+
+typedef void (*nl_rt_link_cb_t) (struct rtnl_link *rl, void *ctx);
+typedef void (*nl_rt_link_sync_cb_t) (void);
+typedef void (*nl_rt_addr_cb_t) (struct rtnl_addr *ra);
+typedef void (*nl_rt_addr_sync_cb_t) (void);
+typedef void (*nl_rt_neigh_cb_t) (struct rtnl_neigh *rr);
+typedef void (*nl_rt_neigh_sync_cb_t) (void);
+typedef void (*nl_rt_route_add_cb_t) (struct rtnl_route *rn, int is_replace);
+typedef void (*nl_rt_route_del_cb_t) (struct rtnl_route *rn);
+typedef void (*nl_rt_route_sync_cb_t) (void);
+
+#define NL_RT_COMMON uword is_mp_safe
+
+typedef struct nl_rt_link_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_link_cb_t cb;
+} nl_rt_link_t;
+
+typedef struct nl_rt_link_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_link_sync_cb_t cb;
+} nl_rt_link_sync_t;
+
+typedef struct nl_rt_addr_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_addr_cb_t cb;
+} nl_rt_addr_t;
+
+typedef struct nl_rt_addr_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_addr_sync_cb_t cb;
+} nl_rt_addr_sync_t;
+
+typedef struct nl_rt_neigh_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_neigh_cb_t cb;
+} nl_rt_neigh_t;
+
+typedef struct nl_rt_neigh_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_neigh_sync_cb_t cb;
+} nl_rt_neigh_sync_t;
+
+typedef struct nl_rt_route_add_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_route_add_cb_t cb;
+} nl_rt_route_add_t;
+
+typedef struct nl_rt_route_del_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_route_del_cb_t cb;
+} nl_rt_route_del_t;
+
+typedef struct nl_rt_route_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_route_sync_cb_t cb;
+} nl_rt_route_sync_t;
+
+#undef NL_RT_COMMON
+
+typedef struct nl_vft_t_
+{
+ nl_rt_link_t nvl_rt_link_add;
+ nl_rt_link_t nvl_rt_link_del;
+ nl_rt_link_sync_t nvl_rt_link_sync_begin;
+ nl_rt_link_sync_t nvl_rt_link_sync_end;
+ nl_rt_addr_t nvl_rt_addr_add;
+ nl_rt_addr_t nvl_rt_addr_del;
+ nl_rt_addr_sync_t nvl_rt_addr_sync_begin;
+ nl_rt_addr_sync_t nvl_rt_addr_sync_end;
+ nl_rt_neigh_t nvl_rt_neigh_add;
+ nl_rt_neigh_t nvl_rt_neigh_del;
+ nl_rt_neigh_sync_t nvl_rt_neigh_sync_begin;
+ nl_rt_neigh_sync_t nvl_rt_neigh_sync_end;
+ nl_rt_route_add_t nvl_rt_route_add;
+ nl_rt_route_del_t nvl_rt_route_del;
+ nl_rt_route_sync_t nvl_rt_route_sync_begin;
+ nl_rt_route_sync_t nvl_rt_route_sync_end;
+} nl_vft_t;
+
+extern void nl_register_vft (const nl_vft_t *nv);
+
+typedef enum lcp_nl_obj_t_
+{
+ LCP_NL_LINK,
+ LCP_NL_ADDR,
+ LCP_NL_NEIGH,
+ LCP_NL_ROUTE,
+} lcp_nl_obj_t;
+
+/* struct type to hold context on the netlink message being processed.
+ *
+ * At creation of a pair, a tap/tun is created and configured to match its
+ * corresponding hardware interface (MAC address, link state, MTU). Netlink
+ * messages are sent announcing the creation and subsequent configuration.
+ * We do not need to (and should not) act on those messages since applying
+ * those same configurations again is unnecessary and can be disruptive. So
+ * a timestamp for a message is stored and can be compared against the time
+ * the interface came under linux-cp management in order to figure out
+ * whether we should apply any configuration.
+ */
+typedef struct nl_msg_info
+{
+ struct nl_msg *msg;
+ f64 ts;
+} nl_msg_info_t;
+
+#define LCP_NL_N_OBJS (LCP_NL_ROUTE + 1)
+
+extern struct nl_cache *lcp_nl_get_cache (lcp_nl_obj_t t);
+extern int lcp_nl_drain_messages (void);
+extern void lcp_nl_set_buffer_size (u32 buf_size);
+extern void lcp_nl_set_batch_size (u32 batch_size);
+extern void lcp_nl_set_batch_delay (u32 batch_delay_ms);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_node.c b/src/plugins/linux-cp/lcp_node.c
index b2ffc5fb63f..241cc5e4bff 100644
--- a/src/plugins/linux-cp/lcp_node.c
+++ b/src/plugins/linux-cp/lcp_node.c
@@ -31,6 +31,7 @@
#include <vnet/ip/ip4.h>
#include <vnet/ip/ip6.h>
#include <vnet/l2/l2_input.h>
+#include <vnet/mpls/mpls.h>
#define foreach_lip_punt \
_ (IO, "punt to host") \
@@ -438,14 +439,112 @@ VNET_FEATURE_INIT (lcp_xc_ip6_mcast_node, static) = {
typedef enum
{
+ LCP_XC_MPLS_NEXT_DROP,
+ LCP_XC_MPLS_NEXT_IO,
+ LCP_XC_MPLS_N_NEXT,
+} lcp_xc_mpls_next_t;
+
+static_always_inline uword
+lcp_xc_mpls_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, *to_next, n_left_to_next;
+ lcp_xc_next_t next_index;
+
+ next_index = 0;
+ n_left_from = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ const ethernet_header_t *eth;
+ const lcp_itf_pair_t *lip;
+ u32 next0, bi0, lipi, ai;
+ vlib_buffer_t *b0;
+ // const ip_adjacency_t *adj;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ lipi =
+ lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+ lip = lcp_itf_pair_get (lipi);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip->lip_phy_sw_if_index;
+ vlib_buffer_advance (b0, -lip->lip_rewrite_len);
+ eth = vlib_buffer_get_current (b0);
+
+ ai = ADJ_INDEX_INVALID;
+ next0 = LCP_XC_MPLS_NEXT_DROP;
+ if (!ethernet_address_cast (eth->dst_address))
+ ai = lcp_adj_lkup ((u8 *) eth, lip->lip_rewrite_len,
+ vnet_buffer (b0)->sw_if_index[VLIB_TX]);
+ if (ai != ADJ_INDEX_INVALID)
+ {
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
+ next0 = LCP_XC_MPLS_NEXT_IO;
+ }
+
+ if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ lcp_xc_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->phy_sw_if_index = lip->lip_phy_sw_if_index;
+ t->adj_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (lcp_xc_mpls)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return (lcp_xc_mpls_inline (vm, node, frame));
+}
+
+VLIB_REGISTER_NODE (
+ lcp_xc_mpls) = { .name = "linux-cp-xc-mpls",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lcp_xc_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_next_nodes = LCP_XC_MPLS_N_NEXT,
+ .next_nodes = {
+ [LCP_XC_MPLS_NEXT_DROP] = "error-drop",
+ [LCP_XC_MPLS_NEXT_IO] = "interface-output",
+ } };
+
+VNET_FEATURE_INIT (lcp_xc_mpls_node, static) = {
+ .arc_name = "mpls-input",
+ .node_name = "linux-cp-xc-mpls",
+};
+
+typedef enum
+{
LCP_XC_L3_NEXT_XC,
+ LCP_XC_L3_NEXT_LOOKUP,
LCP_XC_L3_N_NEXT,
} lcp_xc_l3_next_t;
/**
* X-connect all packets from the HOST to the PHY on L3 interfaces
*
- * There's only one adjacency that can be used on thises links.
+ * There's only one adjacency that can be used on these links.
*/
static_always_inline u32
lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
@@ -453,6 +552,7 @@ lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
u32 n_left_from, *from, *to_next, n_left_to_next;
lcp_xc_next_t next_index;
+ vnet_main_t *vnm = vnet_get_main ();
next_index = 0;
n_left_from = frame->n_vectors;
@@ -488,10 +588,24 @@ lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
lip = lcp_itf_pair_get (lipi);
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip->lip_phy_sw_if_index;
- next0 = LCP_XC_L3_NEXT_XC;
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
- lip->lip_phy_adjs.adj_index[af];
+ /* P2P tunnels can use generic adjacency */
+ if (PREDICT_TRUE (
+ vnet_sw_interface_is_p2p (vnm, lip->lip_phy_sw_if_index)))
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ lip->lip_phy_sw_if_index;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ lip->lip_phy_adjs.adj_index[af];
+ next0 = LCP_XC_L3_NEXT_XC;
+ }
+ /* P2MP tunnels require a fib lookup to find the right adjacency */
+ else
+ {
+ /* lookup should use FIB table associated with phy interface */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ lip->lip_phy_sw_if_index;
+ next0 = LCP_XC_L3_NEXT_LOOKUP;
+ }
if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
{
@@ -534,6 +648,7 @@ VLIB_REGISTER_NODE (lcp_xc_l3_ip4_node) = {
.n_next_nodes = LCP_XC_L3_N_NEXT,
.next_nodes = {
[LCP_XC_L3_NEXT_XC] = "ip4-midchain",
+ [LCP_XC_L3_NEXT_LOOKUP] = "ip4-lookup",
},
};
@@ -556,6 +671,7 @@ VLIB_REGISTER_NODE (lcp_xc_l3_ip6_node) = {
.n_next_nodes = LCP_XC_L3_N_NEXT,
.next_nodes = {
[LCP_XC_L3_NEXT_XC] = "ip6-midchain",
+ [LCP_XC_L3_NEXT_LOOKUP] = "ip6-lookup",
},
};
@@ -671,10 +787,14 @@ VLIB_NODE_FN (lcp_arp_phy_node)
c0 = vlib_buffer_copy (vm, b0);
vlib_buffer_advance (b0, len0);
- /* Send to the host */
- vnet_buffer (c0)->sw_if_index[VLIB_TX] =
- lip0->lip_host_sw_if_index;
- reply_copies[n_copies++] = vlib_get_buffer_index (vm, c0);
+ if (c0)
+ {
+ /* Send to the host */
+ vnet_buffer (c0)->sw_if_index[VLIB_TX] =
+ lip0->lip_host_sw_if_index;
+ reply_copies[n_copies++] =
+ vlib_get_buffer_index (vm, c0);
+ }
}
}
if (arp1->opcode == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
@@ -699,10 +819,14 @@ VLIB_NODE_FN (lcp_arp_phy_node)
c1 = vlib_buffer_copy (vm, b1);
vlib_buffer_advance (b1, len1);
- /* Send to the host */
- vnet_buffer (c1)->sw_if_index[VLIB_TX] =
- lip1->lip_host_sw_if_index;
- reply_copies[n_copies++] = vlib_get_buffer_index (vm, c1);
+ if (c1)
+ {
+ /* Send to the host */
+ vnet_buffer (c1)->sw_if_index[VLIB_TX] =
+ lip1->lip_host_sw_if_index;
+ reply_copies[n_copies++] =
+ vlib_get_buffer_index (vm, c1);
+ }
}
}
@@ -771,10 +895,14 @@ VLIB_NODE_FN (lcp_arp_phy_node)
c0 = vlib_buffer_copy (vm, b0);
vlib_buffer_advance (b0, len0);
- /* Send to the host */
- vnet_buffer (c0)->sw_if_index[VLIB_TX] =
- lip0->lip_host_sw_if_index;
- reply_copies[n_copies++] = vlib_get_buffer_index (vm, c0);
+ if (c0)
+ {
+ /* Send to the host */
+ vnet_buffer (c0)->sw_if_index[VLIB_TX] =
+ lip0->lip_host_sw_if_index;
+ reply_copies[n_copies++] =
+ vlib_get_buffer_index (vm, c0);
+ }
}
}
diff --git a/src/plugins/linux-cp/lcp_router.c b/src/plugins/linux-cp/lcp_router.c
new file mode 100644
index 00000000000..0efd53e64ef
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_router.c
@@ -0,0 +1,1578 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sys/socket.h>
+#include <linux/if.h>
+#include <linux/mpls.h>
+
+//#include <vlib/vlib.h>
+#include <vlib/unix/plugin.h>
+#include <linux-cp/lcp_nl.h>
+#include <linux-cp/lcp_interface.h>
+
+#include <netlink/msg.h>
+#include <netlink/netlink.h>
+#include <netlink/socket.h>
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/neighbour.h>
+#include <netlink/route/nexthop.h>
+#include <netlink/route/addr.h>
+#include <netlink/route/link/vlan.h>
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/ip/ip6_ll_table.h>
+#include <vnet/ip-neighbor/ip_neighbor.h>
+#include <vnet/ip/ip6_link.h>
+
+typedef struct lcp_router_table_t_
+{
+ uint32_t nlt_id;
+ fib_protocol_t nlt_proto;
+ u32 nlt_fib_index;
+ u32 nlt_mfib_index;
+ u32 nlt_refs;
+} lcp_router_table_t;
+
+static uword *lcp_router_table_db[FIB_PROTOCOL_MAX];
+static lcp_router_table_t *lcp_router_table_pool;
+static vlib_log_class_t lcp_router_logger;
+
+const static fib_prefix_t pfx_all1s = {
+ .fp_addr = {
+ .ip4 = {
+ .as_u32 = 0xffffffff,
+ }
+ },
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = 32,
+};
+
+static fib_source_t lcp_rt_fib_src;
+static fib_source_t lcp_rt_fib_src_dynamic;
+
+#define LCP_ROUTER_DBG(...) vlib_log_debug (lcp_router_logger, __VA_ARGS__);
+
+#define LCP_ROUTER_INFO(...) vlib_log_notice (lcp_router_logger, __VA_ARGS__);
+
+#define LCP_ROUTER_ERROR(...) vlib_log_err (lcp_router_logger, __VA_ARGS__);
+
+static const mfib_prefix_t ip4_specials[] = {
+ /* ALL prefixes are in network order */
+ {
+ /* (*,224.0.0.0)/24 - all local subnet */
+ .fp_grp_addr = {
+ .ip4.data_u32 = 0x000000e0,
+ },
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ },
+};
+
+static const mfib_prefix_t ip6_specials[] = {
+ /* ALL prefixes are in network order */
+ {
+ /* (*,ff00::)/8 - all local subnet */
+ .fp_grp_addr = {
+ .ip6.as_u64[0] = 0x00000000000000ff,
+ },
+ .fp_len = 8,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ },
+};
+
+/* VIF to PHY DB of managed interfaces */
+static uword *lcp_routing_itf_db;
+
+static u32
+lcp_router_intf_h2p (u32 host)
+{
+ lcp_itf_pair_t *lip;
+ index_t lipi;
+ uword *p;
+
+ /*
+ * first check the linux side created interface (i.e. vlans, tunnels etc)
+ */
+ p = hash_get (lcp_routing_itf_db, host);
+
+ if (p)
+ return p[0];
+
+ /*
+ * then check the paired phys
+ */
+ lipi = lcp_itf_pair_find_by_vif (host);
+
+ if (INDEX_INVALID == lipi)
+ return (~0);
+
+ lip = lcp_itf_pair_get (lipi);
+
+ return lip->lip_phy_sw_if_index;
+}
+
+/*
+ * Check timestamps on netlink message and interface pair to decide whether
+ * the message should be applied. See the declaration of nl_msg_info_t for
+ * an explanation on why this is necessary.
+ * If timestamps are good (message ts is newer than intf pair ts), return 0.
+ * Else, return -1.
+ */
+static int
+lcp_router_lip_ts_check (nl_msg_info_t *msg_info, lcp_itf_pair_t *lip)
+{
+ if (!msg_info)
+ return 0;
+
+ if (msg_info->ts > lip->lip_create_ts)
+ return 0;
+
+ LCP_ROUTER_INFO ("Early message received for %U",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ return -1;
+}
+
+static void
+lcp_router_link_del (struct rtnl_link *rl, void *ctx)
+{
+ index_t lipi;
+
+ if (!lcp_auto_subint ())
+ return;
+
+ lipi = lcp_itf_pair_find_by_vif (rtnl_link_get_ifindex (rl));
+
+ if (INDEX_INVALID != lipi)
+ {
+ lcp_itf_pair_t *lip;
+
+ lip = lcp_itf_pair_get (lipi);
+
+ if (lcp_router_lip_ts_check ((nl_msg_info_t *) ctx, lip))
+ return;
+
+ LCP_ROUTER_INFO ("delete link: %s - %U", rtnl_link_get_type (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ lcp_itf_pair_delete (lip->lip_phy_sw_if_index);
+
+ if (rtnl_link_is_vlan (rl))
+ {
+ LCP_ROUTER_INFO ("delete vlan: %s -> %U", rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ vnet_delete_sub_interface (lip->lip_phy_sw_if_index);
+ vnet_delete_sub_interface (lip->lip_host_sw_if_index);
+ }
+ }
+ else
+ LCP_ROUTER_INFO ("ignore link del: %s - %s", rtnl_link_get_type (rl),
+ rtnl_link_get_name (rl));
+}
+
+static void
+lcp_router_ip4_mroutes_add_del (u32 sw_if_index, u8 is_add)
+{
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT,
+ };
+ u32 mfib_index;
+ int ii;
+
+ mfib_index =
+ mfib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
+
+ for (ii = 0; ii < ARRAY_LEN (ip4_specials); ii++)
+ {
+ if (is_add)
+ {
+ mfib_table_entry_path_update (mfib_index, &ip4_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ else
+ {
+ mfib_table_entry_path_remove (mfib_index, &ip4_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW, &path);
+ }
+ }
+}
+
+static void
+lcp_router_ip6_mroutes_add_del (u32 sw_if_index, u8 is_add)
+{
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP6,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT,
+ };
+ u32 mfib_index;
+ int ii;
+
+ mfib_index =
+ mfib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
+
+ for (ii = 0; ii < ARRAY_LEN (ip6_specials); ii++)
+ {
+ if (is_add)
+ {
+ mfib_table_entry_path_update (mfib_index, &ip6_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ else
+ {
+ mfib_table_entry_path_remove (mfib_index, &ip6_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW, &path);
+ }
+ }
+}
+
+static void
+lcp_router_link_mtu (struct rtnl_link *rl, u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 mtu;
+ vnet_sw_interface_t *sw;
+ vnet_hw_interface_t *hw;
+
+ mtu = rtnl_link_get_mtu (rl);
+ if (!mtu)
+ return;
+
+ sw = vnet_get_sw_interface (vnm, sw_if_index);
+ hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ /* If HW interface, try to change hw link */
+ if ((sw->sw_if_index == sw->sup_sw_if_index) &&
+ (hw->hw_class_index == ethernet_hw_interface_class.index))
+ vnet_hw_interface_set_mtu (vnm, hw->hw_if_index, mtu);
+ else
+ vnet_sw_interface_set_mtu (vnm, sw->sw_if_index, mtu);
+}
+
+static walk_rc_t
+lcp_router_link_addr_adj_upd_cb (vnet_main_t *vnm, u32 sw_if_index, void *arg)
+{
+ lcp_itf_pair_t *lip;
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ {
+ return WALK_CONTINUE;
+ }
+
+ vnet_update_adjacency_for_sw_interface (vnm, lip->lip_phy_sw_if_index,
+ lip->lip_phy_adjs.adj_index[AF_IP4]);
+ vnet_update_adjacency_for_sw_interface (vnm, lip->lip_phy_sw_if_index,
+ lip->lip_phy_adjs.adj_index[AF_IP6]);
+
+ return WALK_CONTINUE;
+}
+
+static void
+lcp_router_link_addr (struct rtnl_link *rl, lcp_itf_pair_t *lip)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ struct nl_addr *mac_addr;
+ vnet_sw_interface_t *sw;
+ vnet_hw_interface_t *hw;
+ void *mac_addr_bytes;
+
+ mac_addr = rtnl_link_get_addr (rl);
+ if (!mac_addr || (nl_addr_get_family (mac_addr) != AF_LLC))
+ return;
+
+ sw = vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index);
+
+ /* can only change address on hw interface */
+ if (sw->sw_if_index != sw->sup_sw_if_index)
+ return;
+
+ hw = vnet_get_sup_hw_interface (vnm, lip->lip_phy_sw_if_index);
+ if (!vec_len (hw->hw_address))
+ return;
+
+ mac_addr_bytes = nl_addr_get_binary_addr (mac_addr);
+ if (clib_memcmp (mac_addr_bytes, hw->hw_address, nl_addr_get_len (mac_addr)))
+ vnet_hw_interface_change_mac_address (vnm, hw->hw_if_index,
+ mac_addr_bytes);
+
+ /* mcast adjacencies need to be updated */
+ vnet_hw_interface_walk_sw (vnm, hw->hw_if_index,
+ lcp_router_link_addr_adj_upd_cb, NULL);
+}
+
+static void lcp_router_table_flush (lcp_router_table_t *nlt,
+ u32 *sw_if_index_to_bool,
+ fib_source_t source);
+
+static void
+lcp_router_link_add (struct rtnl_link *rl, void *ctx)
+{
+ index_t lipi;
+ int up;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ lipi = lcp_itf_pair_find_by_vif (rtnl_link_get_ifindex (rl));
+ up = IFF_UP & rtnl_link_get_flags (rl);
+
+ if (INDEX_INVALID != lipi)
+ {
+ lcp_itf_pair_t *lip;
+ u32 sw_if_flags;
+ u32 sw_if_up;
+
+ lip = lcp_itf_pair_get (lipi);
+ if (!vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index))
+ return;
+
+ if (lcp_router_lip_ts_check ((nl_msg_info_t *) ctx, lip))
+ return;
+
+ sw_if_flags =
+ vnet_sw_interface_get_flags (vnm, lip->lip_phy_sw_if_index);
+ sw_if_up = (sw_if_flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ if (!sw_if_up && up)
+ {
+ vnet_sw_interface_admin_up (vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ }
+ else if (sw_if_up && !up)
+ {
+ vnet_sw_interface_admin_down (vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+
+ /* When an interface is brought down administratively, the kernel
+ * removes routes which resolve through that interface. For IPv4
+ * routes, the kernel will not send any explicit RTM_DELROUTE
+ * messages about removing them. In order to synchronize with the
+ * kernel, affected IPv4 routes need to be manually removed from the
+ * FIB. The behavior is different for IPv6 routes. Explicit
+ * RTM_DELROUTE messages are sent about IPv6 routes being removed.
+ */
+ u32 fib_index;
+ lcp_router_table_t *nlt;
+
+ fib_index = fib_table_get_index_for_sw_if_index (
+ FIB_PROTOCOL_IP4, lip->lip_phy_sw_if_index);
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ if (fib_index == nlt->nlt_fib_index &&
+ FIB_PROTOCOL_IP4 == nlt->nlt_proto)
+ {
+ u32 *sw_if_index_to_bool = NULL;
+
+ vec_validate_init_empty (sw_if_index_to_bool,
+ lip->lip_phy_sw_if_index, false);
+ sw_if_index_to_bool[lip->lip_phy_sw_if_index] = true;
+
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src);
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src_dynamic);
+
+ vec_free (sw_if_index_to_bool);
+ break;
+ }
+ }
+ }
+
+ LCP_ROUTER_DBG ("link: %s (%d) -> %U/%U %s", rtnl_link_get_name (rl),
+ rtnl_link_get_ifindex (rl), format_vnet_sw_if_index_name,
+ vnm, lip->lip_phy_sw_if_index,
+ format_vnet_sw_if_index_name, vnm,
+ lip->lip_host_sw_if_index, (up ? "up" : "down"));
+
+ lcp_router_link_mtu (rl, lip->lip_phy_sw_if_index);
+ lcp_router_link_addr (rl, lip);
+ }
+ else if (lcp_auto_subint () && rtnl_link_is_vlan (rl))
+ {
+ /* Find the pair based on the parent VIF */
+ lipi = lcp_itf_pair_find_by_vif (rtnl_link_get_link (rl));
+
+ if (INDEX_INVALID != lipi)
+ {
+ u32 sub_phy_sw_if_index, sub_host_sw_if_index;
+ const lcp_itf_pair_t *lip;
+ int vlan;
+ u8 *ns = 0; /* FIXME */
+
+ lip = lcp_itf_pair_get (lipi);
+
+ vlan = rtnl_link_vlan_get_id (rl);
+
+ /* create the vlan interface on the parent phy */
+ if (vnet_create_sub_interface (lip->lip_phy_sw_if_index, vlan, 18, 0,
+ vlan, &sub_phy_sw_if_index))
+ {
+ LCP_ROUTER_INFO ("failed create phy vlan: %s on %U",
+ rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ return;
+ }
+
+ /* pool could grow during the previous operation */
+ lip = lcp_itf_pair_get (lipi);
+
+ /* create the vlan interface on the parent host */
+ if (vnet_create_sub_interface (lip->lip_host_sw_if_index, vlan, 18,
+ 0, vlan, &sub_host_sw_if_index))
+ {
+ LCP_ROUTER_INFO ("failed create vlan: %s on %U",
+ rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_host_sw_if_index);
+ return;
+ }
+
+ char *if_name;
+ u8 *if_namev = 0;
+
+ LCP_ROUTER_INFO (
+ "create vlan: %s -> (%U, %U) : (%U, %U)", rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index, format_vnet_sw_if_index_name,
+ vnet_get_main (), sub_phy_sw_if_index,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_host_sw_if_index, format_vnet_sw_if_index_name,
+ vnet_get_main (), sub_host_sw_if_index);
+
+ if ((if_name = rtnl_link_get_name (rl)) != NULL)
+ vec_validate_init_c_string (if_namev, if_name,
+ strnlen (if_name, IFNAMSIZ));
+ lcp_itf_pair_add (sub_host_sw_if_index, sub_phy_sw_if_index,
+ if_namev, rtnl_link_get_ifindex (rl),
+ lip->lip_host_type, ns);
+ if (up)
+ vnet_sw_interface_admin_up (vnet_get_main (), sub_phy_sw_if_index);
+ vnet_sw_interface_admin_up (vnet_get_main (), sub_host_sw_if_index);
+
+ vec_free (if_namev);
+ }
+ else
+ {
+ LCP_ROUTER_INFO ("ignore parent-link add: %s - %s",
+ rtnl_link_get_type (rl), rtnl_link_get_name (rl));
+ }
+ }
+ else
+ LCP_ROUTER_INFO ("ignore link add: %s - %s", rtnl_link_get_type (rl),
+ rtnl_link_get_name (rl));
+}
+
+static void
+lcp_router_link_sync_begin (void)
+{
+ LCP_ROUTER_INFO ("Begin synchronization of interface configurations");
+}
+
+static void
+lcp_router_link_sync_end (void)
+{
+ LCP_ROUTER_INFO ("End synchronization of interface configurations");
+}
+
+static clib_error_t *
+lcp_router_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+ vnet_hw_interface_t *hi;
+ index_t lipi;
+
+ hi = vnet_get_hw_interface_or_null (vnm, hw_if_index);
+ if (!hi)
+ return 0;
+
+ lipi = lcp_itf_pair_find_by_phy (hi->sw_if_index);
+ if (lipi == INDEX_INVALID)
+ return 0;
+
+ /* When the link goes down on an interface, the kernel processes routes which
+ * resolve through that interface depending on how they were created:
+ * - Legacy Route API: the kernel retains the routes and marks them as
+ * "linkdown";
+ * - Nexthop API: the kernel removes the next-hop objects and the routes
+ * which reference them.
+ *
+ * For IPv4 routes created with Nexthop API, the kernel will not send any
+ * explicit RTM_DELROUTE messages about removing them. In order to
+ * synchronize with the kernel, affected routes need to be manually removed
+ * from the FIB.
+ *
+ * The behavior is different for IPv6 routes created with Nexthop API. The
+ * kernel will send explicit RTM_DELROUTE messages about IPv6 routes being
+ * removed.
+ */
+ if (!(flags & VNET_HW_INTERFACE_FLAG_LINK_UP) &&
+ (lcp_get_del_static_on_link_down () ||
+ lcp_get_del_dynamic_on_link_down ()))
+ {
+ u32 fib_index;
+ u32 **fib_index_to_sw_if_index_to_bool = NULL;
+ u32 id, sw_if_index;
+ lcp_router_table_t *nlt;
+
+ fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ hi->sw_if_index);
+
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool, fib_index,
+ NULL);
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool[fib_index],
+ hi->sw_if_index, false);
+ fib_index_to_sw_if_index_to_bool[fib_index][hi->sw_if_index] = true;
+
+ /* clang-format off */
+ hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
+ ({
+ fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ sw_if_index);
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool, fib_index,
+ NULL);
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool[fib_index],
+ sw_if_index, false);
+ fib_index_to_sw_if_index_to_bool[fib_index][sw_if_index] = true;
+ }));
+ /* clang-format on */
+
+ vec_foreach_index (fib_index, fib_index_to_sw_if_index_to_bool)
+ {
+ u32 *sw_if_index_to_bool;
+
+ sw_if_index_to_bool = fib_index_to_sw_if_index_to_bool[fib_index];
+ if (NULL == sw_if_index_to_bool)
+ continue;
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ if (fib_index == nlt->nlt_fib_index &&
+ FIB_PROTOCOL_IP4 == nlt->nlt_proto)
+ {
+ if (lcp_get_del_static_on_link_down ())
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src);
+ if (lcp_get_del_dynamic_on_link_down ())
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src_dynamic);
+ break;
+ }
+ }
+
+ vec_free (sw_if_index_to_bool);
+ }
+
+ vec_free (fib_index_to_sw_if_index_to_bool);
+ }
+
+ return 0;
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_router_link_up_down);
+
+static fib_protocol_t
+lcp_router_proto_k2f (uint32_t k)
+{
+ switch (k)
+ {
+ case AF_INET6:
+ return FIB_PROTOCOL_IP6;
+ case AF_INET:
+ return FIB_PROTOCOL_IP4;
+ case AF_MPLS:
+ return FIB_PROTOCOL_MPLS;
+ default:
+ ASSERT (0);
+ return FIB_PROTOCOL_NONE;
+ }
+}
+
+static void
+lcp_router_mk_addr (const struct nl_addr *rna, ip_address_t *ia)
+{
+ fib_protocol_t fproto;
+
+ ip_address_reset (ia);
+ fproto = lcp_router_proto_k2f (nl_addr_get_family (rna));
+ ASSERT (FIB_PROTOCOL_MPLS != fproto);
+
+ ip_address_set (ia, nl_addr_get_binary_addr (rna),
+ FIB_PROTOCOL_IP4 == fproto ? AF_IP4 : AF_IP6);
+}
+
+static fib_protocol_t
+lcp_router_mk_addr46 (const struct nl_addr *rna, ip46_address_t *ia)
+{
+ fib_protocol_t fproto;
+
+ fproto = lcp_router_proto_k2f (nl_addr_get_family (rna));
+ ASSERT (FIB_PROTOCOL_MPLS != fproto);
+
+ ip46_address_reset (ia);
+ if (FIB_PROTOCOL_IP4 == fproto)
+ memcpy (&ia->ip4, nl_addr_get_binary_addr (rna), nl_addr_get_len (rna));
+ else
+ memcpy (&ia->ip6, nl_addr_get_binary_addr (rna), nl_addr_get_len (rna));
+
+ return (fproto);
+}
+
+static void
+lcp_router_link_addr_add_del (struct rtnl_addr *rla, int is_del)
+{
+ u32 sw_if_index;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_addr_get_ifindex (rla));
+
+ if (~0 != sw_if_index)
+ {
+ ip_address_t nh;
+
+ lcp_router_mk_addr (rtnl_addr_get_local (rla), &nh);
+
+ if (AF_IP4 == ip_addr_version (&nh))
+ {
+ ip4_add_del_interface_address (
+ vlib_get_main (), sw_if_index, &ip_addr_v4 (&nh),
+ rtnl_addr_get_prefixlen (rla), is_del);
+ lcp_router_ip4_mroutes_add_del (sw_if_index, !is_del);
+ }
+ else if (AF_IP6 == ip_addr_version (&nh))
+ {
+ if (ip6_address_is_link_local_unicast (&ip_addr_v6 (&nh)))
+ if (is_del)
+ ip6_link_disable (sw_if_index);
+ else
+ {
+ ip6_link_enable (sw_if_index, NULL);
+ ip6_link_set_local_address (sw_if_index, &ip_addr_v6 (&nh));
+ }
+ else
+ ip6_add_del_interface_address (
+ vlib_get_main (), sw_if_index, &ip_addr_v6 (&nh),
+ rtnl_addr_get_prefixlen (rla), is_del);
+ lcp_router_ip6_mroutes_add_del (sw_if_index, !is_del);
+ }
+
+ LCP_ROUTER_DBG ("link-addr: %U %U/%d", format_vnet_sw_if_index_name,
+ vnet_get_main (), sw_if_index, format_ip_address, &nh,
+ rtnl_addr_get_prefixlen (rla));
+ }
+}
+
+static void
+lcp_router_link_addr_del (struct rtnl_addr *la)
+{
+ lcp_router_link_addr_add_del (la, 1);
+}
+
+static void
+lcp_router_link_addr_add (struct rtnl_addr *la)
+{
+ lcp_router_link_addr_add_del (la, 0);
+}
+
+static walk_rc_t
+lcp_router_address_mark (index_t index, void *ctx)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ lcp_itf_pair_t *lip = lcp_itf_pair_get (index);
+ if (!lip)
+ return WALK_CONTINUE;
+
+ ip_interface_address_mark_one_interface (
+ vnm, vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index), 0);
+
+ return WALK_CONTINUE;
+}
+
+static void
+lcp_router_link_addr_sync_begin (void)
+{
+ lcp_itf_pair_walk (lcp_router_address_mark, 0);
+
+ LCP_ROUTER_INFO ("Begin synchronization of interface addresses");
+}
+
+static void
+lcp_router_link_addr_sync_end (void)
+{
+ ip_interface_address_sweep ();
+
+ LCP_ROUTER_INFO ("End synchronization of interface addresses");
+}
+
+static void
+lcp_router_mk_mac_addr (const struct nl_addr *rna, mac_address_t *mac)
+{
+ mac_address_from_bytes (mac, nl_addr_get_binary_addr (rna));
+}
+
+static void
+lcp_router_neigh_del (struct rtnl_neigh *rn)
+{
+ u32 sw_if_index;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_neigh_get_ifindex (rn));
+
+ if (~0 != sw_if_index)
+ {
+ ip_address_t nh;
+ int rv;
+ struct nl_addr *rna;
+
+ if ((rna = rtnl_neigh_get_dst (rn)) == NULL)
+ return;
+ lcp_router_mk_addr (rna, &nh);
+
+ if (ip46_address_is_multicast (&ip_addr_46 (&nh)))
+ {
+ LCP_ROUTER_DBG ("ignore neighbor del: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ return;
+ }
+
+ rv = ip_neighbor_del (&nh, sw_if_index);
+
+ if (rv)
+ {
+ LCP_ROUTER_ERROR (
+ "Failed to delete neighbor: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index);
+ }
+ else
+ {
+ LCP_ROUTER_DBG ("neighbor del: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ }
+ }
+ else
+ LCP_ROUTER_INFO ("ignore neighbour del on: %d",
+ rtnl_neigh_get_ifindex (rn));
+}
+
+#ifndef NUD_VALID
+#define NUD_VALID \
+ (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE | \
+ NUD_DELAY)
+#endif
+
+static void
+lcp_router_neigh_add (struct rtnl_neigh *rn)
+{
+ u32 sw_if_index;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_neigh_get_ifindex (rn));
+
+ if (~0 != sw_if_index)
+ {
+ struct nl_addr *ll;
+ ip_address_t nh;
+ int state;
+ struct nl_addr *rna;
+
+ if ((rna = rtnl_neigh_get_dst (rn)) == NULL)
+ return;
+ lcp_router_mk_addr (rna, &nh);
+
+ if (ip46_address_is_multicast (&ip_addr_46 (&nh)))
+ {
+ LCP_ROUTER_DBG ("ignore neighbor add: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ return;
+ }
+
+ ll = rtnl_neigh_get_lladdr (rn);
+ state = rtnl_neigh_get_state (rn);
+
+ if (ll && (state & NUD_VALID))
+ {
+ mac_address_t mac;
+ ip_neighbor_flags_t flags;
+ int rv;
+
+ lcp_router_mk_mac_addr (ll, &mac);
+
+ if (state & (NUD_NOARP | NUD_PERMANENT))
+ flags = IP_NEIGHBOR_FLAG_STATIC;
+ else
+ flags = IP_NEIGHBOR_FLAG_DYNAMIC;
+
+ rv = ip_neighbor_add (&nh, &mac, sw_if_index, flags, NULL);
+
+ if (rv)
+ {
+ LCP_ROUTER_ERROR (
+ "Failed to create neighbor: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index);
+ }
+ else
+ {
+ LCP_ROUTER_DBG ("neighbor add: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ }
+ }
+ else
+ /* It's a delete */
+ lcp_router_neigh_del (rn);
+ }
+ else
+ LCP_ROUTER_INFO ("ignore neighbour add on: %d",
+ rtnl_neigh_get_ifindex (rn));
+}
+
+static walk_rc_t
+lcp_router_neighbor_mark (index_t index, void *ctx)
+{
+ lcp_itf_pair_t *lip = lcp_itf_pair_get (index);
+ if (!lip)
+ return WALK_CONTINUE;
+
+ ip_neighbor_walk (AF_IP4, lip->lip_phy_sw_if_index, ip_neighbor_mark_one, 0);
+ ip_neighbor_walk (AF_IP6, lip->lip_phy_sw_if_index, ip_neighbor_mark_one, 0);
+
+ return WALK_CONTINUE;
+}
+
+static void
+lcp_router_neigh_sync_begin (void)
+{
+ lcp_itf_pair_walk (lcp_router_neighbor_mark, 0);
+
+ LCP_ROUTER_INFO ("Begin synchronization of neighbors");
+}
+
+static void
+lcp_router_neigh_sync_end (void)
+{
+ ip_neighbor_sweep (AF_IP4);
+ ip_neighbor_sweep (AF_IP6);
+
+ LCP_ROUTER_INFO ("End synchronization of neighbors");
+}
+
+static lcp_router_table_t *
+lcp_router_table_find (uint32_t id, fib_protocol_t fproto)
+{
+ uword *p;
+
+ p = hash_get (lcp_router_table_db[fproto], id);
+
+ if (p)
+ return pool_elt_at_index (lcp_router_table_pool, p[0]);
+
+ return (NULL);
+}
+
+static uint32_t
+lcp_router_table_k2f (uint32_t k)
+{
+ // the kernel's table ID 255 is the default table
+ if (k == 255 || k == 254)
+ return 0;
+ return k;
+}
+
+static lcp_router_table_t *
+lcp_router_table_add_or_lock (uint32_t id, fib_protocol_t fproto)
+{
+ lcp_router_table_t *nlt;
+
+ id = lcp_router_table_k2f (id);
+ nlt = lcp_router_table_find (id, fproto);
+
+ if (NULL == nlt)
+ {
+ pool_get_zero (lcp_router_table_pool, nlt);
+
+ nlt->nlt_id = id;
+ nlt->nlt_proto = fproto;
+
+ nlt->nlt_fib_index = fib_table_find_or_create_and_lock (
+ nlt->nlt_proto, nlt->nlt_id, lcp_rt_fib_src);
+ nlt->nlt_mfib_index = mfib_table_find_or_create_and_lock (
+ nlt->nlt_proto, nlt->nlt_id, MFIB_SOURCE_PLUGIN_LOW);
+
+ hash_set (lcp_router_table_db[fproto], nlt->nlt_id,
+ nlt - lcp_router_table_pool);
+
+ if (FIB_PROTOCOL_IP4 == fproto)
+ {
+ /* Set the all 1s address in this table to punt */
+ fib_table_entry_special_add (nlt->nlt_fib_index, &pfx_all1s,
+ lcp_rt_fib_src, FIB_ENTRY_FLAG_LOCAL);
+
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+ int ii;
+
+ for (ii = 0; ii < ARRAY_LEN (ip4_specials); ii++)
+ {
+ mfib_table_entry_path_update (
+ nlt->nlt_mfib_index, &ip4_specials[ii], MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ }
+ else if (FIB_PROTOCOL_IP6 == fproto)
+ {
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP6,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+ int ii;
+
+ for (ii = 0; ii < ARRAY_LEN (ip6_specials); ii++)
+ {
+ mfib_table_entry_path_update (
+ nlt->nlt_mfib_index, &ip6_specials[ii], MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ }
+ }
+
+ nlt->nlt_refs++;
+
+ return (nlt);
+}
+
+static void
+lcp_router_table_unlock (lcp_router_table_t *nlt)
+{
+ nlt->nlt_refs--;
+
+ if (0 == nlt->nlt_refs)
+ {
+ if (FIB_PROTOCOL_IP4 == nlt->nlt_proto)
+ {
+ /* Set the all 1s address in this table to punt */
+ fib_table_entry_special_remove (nlt->nlt_fib_index, &pfx_all1s,
+ lcp_rt_fib_src);
+ }
+
+ fib_table_unlock (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
+
+ hash_unset (lcp_router_table_db[nlt->nlt_proto], nlt->nlt_id);
+ pool_put (lcp_router_table_pool, nlt);
+ }
+}
+
+static void
+lcp_router_route_mk_prefix (struct rtnl_route *r, fib_prefix_t *p)
+{
+ const struct nl_addr *addr = rtnl_route_get_dst (r);
+ u32 *baddr = nl_addr_get_binary_addr (addr);
+ u32 blen = nl_addr_get_len (addr);
+ ip46_address_t *paddr = &p->fp_addr;
+ u32 entry;
+
+ ip46_address_reset (paddr);
+ p->fp_proto = lcp_router_proto_k2f (nl_addr_get_family (addr));
+
+ switch (p->fp_proto)
+ {
+ case FIB_PROTOCOL_MPLS:
+ entry = ntohl (*baddr);
+ p->fp_label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ p->fp_len = 21;
+ p->fp_eos = MPLS_NON_EOS;
+ return;
+ case FIB_PROTOCOL_IP4:
+ memcpy (&paddr->ip4, baddr, blen);
+ break;
+ case FIB_PROTOCOL_IP6:
+ memcpy (&paddr->ip6, baddr, blen);
+ break;
+ }
+
+ p->fp_len = nl_addr_get_prefixlen (addr);
+}
+
+static void
+lcp_router_route_mk_mprefix (struct rtnl_route *r, mfib_prefix_t *p)
+{
+ const struct nl_addr *addr;
+
+ addr = rtnl_route_get_dst (r);
+
+ p->fp_len = nl_addr_get_prefixlen (addr);
+ p->fp_proto = lcp_router_mk_addr46 (addr, &p->fp_grp_addr);
+
+ addr = rtnl_route_get_src (r);
+ if (addr)
+ p->fp_proto = lcp_router_mk_addr46 (addr, &p->fp_src_addr);
+}
+
+static int
+lcp_router_mpls_nladdr_to_path (fib_route_path_t *path, struct nl_addr *addr)
+{
+ if (!addr)
+ return 0;
+
+ struct mpls_label *stack = nl_addr_get_binary_addr (addr);
+ u32 entry, label;
+ u8 exp, ttl;
+ int label_count = 0;
+
+ while (1)
+ {
+ entry = ntohl (stack[label_count++].entry);
+ label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ exp = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
+ ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
+
+ fib_mpls_label_t fml = {
+ .fml_value = label,
+ .fml_exp = exp,
+ .fml_ttl = ttl,
+ };
+ vec_add1 (path->frp_label_stack, fml);
+
+ if (entry & MPLS_LS_S_MASK)
+ break;
+ }
+ return label_count;
+}
+
+typedef struct lcp_router_route_path_parse_t_
+{
+ fib_route_path_t *paths;
+ fib_protocol_t route_proto;
+ bool is_mcast;
+ fib_route_path_flags_t type_flags;
+ u8 preference;
+} lcp_router_route_path_parse_t;
+
+static void
+lcp_router_route_path_parse (struct rtnl_nexthop *rnh, void *arg)
+{
+ lcp_router_route_path_parse_t *ctx = arg;
+ fib_route_path_t *path;
+ u32 sw_if_index;
+ int label_count = 0;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_route_nh_get_ifindex (rnh));
+
+ if (~0 != sw_if_index)
+ {
+ fib_protocol_t fproto;
+ struct nl_addr *addr;
+
+ vec_add2 (ctx->paths, path, 1);
+
+ path->frp_flags = FIB_ROUTE_PATH_FLAG_NONE | ctx->type_flags;
+ path->frp_sw_if_index = sw_if_index;
+ path->frp_preference = ctx->preference;
+
+ /*
+ * FIB Path Weight of 0 is meaningless and replaced with 1 further along.
+ * See fib_path_create. fib_path_cmp_w_route_path would fail to match
+ * such a fib_route_path_t with any fib_path_t, because a fib_path_t's
+ * fp_weight can never be 0.
+ */
+ path->frp_weight = clib_max (1, rtnl_route_nh_get_weight (rnh));
+
+ addr = rtnl_route_nh_get_gateway (rnh);
+ if (!addr)
+ addr = rtnl_route_nh_get_via (rnh);
+
+ if (addr)
+ fproto = lcp_router_mk_addr46 (addr, &path->frp_addr);
+ else
+ fproto = ctx->route_proto;
+
+ path->frp_proto = fib_proto_to_dpo (fproto);
+
+ if (ctx->route_proto == FIB_PROTOCOL_MPLS)
+ {
+ addr = rtnl_route_nh_get_newdst (rnh);
+ label_count = lcp_router_mpls_nladdr_to_path (path, addr);
+ if (label_count)
+ {
+ LCP_ROUTER_DBG (" is label swap to %u",
+ path->frp_label_stack[0].fml_value);
+ }
+ else
+ {
+ fib_mpls_label_t fml = {
+ .fml_value = MPLS_LABEL_POP,
+ };
+ vec_add1 (path->frp_label_stack, fml);
+ LCP_ROUTER_DBG (" is label pop");
+ }
+ }
+
+#ifdef NL_CAPABILITY_VERSION_3_6_0
+ addr = rtnl_route_nh_get_encap_mpls_dst (rnh);
+ label_count = lcp_router_mpls_nladdr_to_path (path, addr);
+ if (label_count)
+ LCP_ROUTER_DBG (" has encap mpls, %d labels", label_count);
+#endif
+
+ if (ctx->is_mcast)
+ path->frp_mitf_flags = MFIB_ITF_FLAG_FORWARD;
+
+ LCP_ROUTER_DBG (" path:[%U]", format_fib_route_path, path);
+ }
+}
+
+/*
+ * blackhole, unreachable, prohibit will not have a next hop in an
+ * RTM_NEWROUTE. Add a path for them.
+ */
+static void
+lcp_router_route_path_add_special (struct rtnl_route *rr,
+ lcp_router_route_path_parse_t *ctx)
+{
+ fib_route_path_t *path;
+
+ if (rtnl_route_get_type (rr) < RTN_BLACKHOLE)
+ return;
+
+ /* if it already has a path, it does not need us to add one */
+ if (vec_len (ctx->paths) > 0)
+ return;
+
+ vec_add2 (ctx->paths, path, 1);
+
+ path->frp_flags = FIB_ROUTE_PATH_FLAG_NONE | ctx->type_flags;
+ path->frp_sw_if_index = ~0;
+ path->frp_proto = fib_proto_to_dpo (ctx->route_proto);
+ path->frp_preference = ctx->preference;
+
+ LCP_ROUTER_DBG (" path:[%U]", format_fib_route_path, path);
+}
+
+/*
+ * Map of supported route types. Some types are omitted:
+ * RTN_LOCAL - interface address addition creates these automatically
+ * RTN_BROADCAST - same as RTN_LOCAL
+ * RTN_UNSPEC, RTN_ANYCAST, RTN_THROW, RTN_NAT, RTN_XRESOLVE -
+ * There's not a VPP equivalent for these currently.
+ */
+static const u8 lcp_router_route_type_valid[__RTN_MAX] = {
+ [RTN_UNICAST] = 1, [RTN_MULTICAST] = 1, [RTN_BLACKHOLE] = 1,
+ [RTN_UNREACHABLE] = 1, [RTN_PROHIBIT] = 1,
+};
+
+/* Map of fib entry flags by route type */
+static const fib_entry_flag_t lcp_router_route_type_feflags[__RTN_MAX] = {
+ [RTN_LOCAL] = FIB_ENTRY_FLAG_LOCAL | FIB_ENTRY_FLAG_CONNECTED,
+ [RTN_BROADCAST] = FIB_ENTRY_FLAG_DROP | FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
+ [RTN_BLACKHOLE] = FIB_ENTRY_FLAG_DROP,
+};
+
+/* Map of fib route path flags by route type */
+static const fib_route_path_flags_t
+ lcp_router_route_type_frpflags[__RTN_MAX] = {
+ [RTN_UNREACHABLE] = FIB_ROUTE_PATH_ICMP_UNREACH,
+ [RTN_PROHIBIT] = FIB_ROUTE_PATH_ICMP_PROHIBIT,
+ [RTN_BLACKHOLE] = FIB_ROUTE_PATH_DROP,
+ };
+
+static inline fib_source_t
+lcp_router_proto_fib_source (u8 rt_proto)
+{
+ return (rt_proto <= RTPROT_STATIC) ? lcp_rt_fib_src : lcp_rt_fib_src_dynamic;
+}
+
+static fib_entry_flag_t
+lcp_router_route_mk_entry_flags (uint8_t rtype, int table_id, uint8_t rproto)
+{
+ fib_entry_flag_t fef = FIB_ENTRY_FLAG_NONE;
+
+ fef |= lcp_router_route_type_feflags[rtype];
+ if ((rproto == RTPROT_KERNEL) || PREDICT_FALSE (255 == table_id))
+ /* kernel proto is interface prefixes, 255 is linux's 'local' table */
+ fef |= FIB_ENTRY_FLAG_ATTACHED | FIB_ENTRY_FLAG_CONNECTED;
+
+ return (fef);
+}
+
+static void
+lcp_router_route_del (struct rtnl_route *rr)
+{
+ fib_entry_flag_t entry_flags;
+ uint32_t table_id;
+ fib_prefix_t pfx;
+ lcp_router_table_t *nlt;
+ uint8_t rtype, rproto;
+
+ rtype = rtnl_route_get_type (rr);
+ table_id = rtnl_route_get_table (rr);
+ rproto = rtnl_route_get_protocol (rr);
+
+ /* skip unsupported route types and local table */
+ if (!lcp_router_route_type_valid[rtype] || (table_id == 255))
+ return;
+
+ lcp_router_route_mk_prefix (rr, &pfx);
+ entry_flags = lcp_router_route_mk_entry_flags (rtype, table_id, rproto);
+ nlt = lcp_router_table_find (lcp_router_table_k2f (table_id), pfx.fp_proto);
+
+ LCP_ROUTER_DBG ("route del: %d:%U %U", rtnl_route_get_table (rr),
+ format_fib_prefix, &pfx, format_fib_entry_flags,
+ entry_flags);
+
+ if (NULL == nlt)
+ return;
+
+ lcp_router_route_path_parse_t np = {
+ .route_proto = pfx.fp_proto,
+ .type_flags = lcp_router_route_type_frpflags[rtype],
+ };
+
+ rtnl_route_foreach_nexthop (rr, lcp_router_route_path_parse, &np);
+ lcp_router_route_path_add_special (rr, &np);
+
+ if (0 != vec_len (np.paths))
+ {
+ fib_source_t fib_src;
+
+ fib_src = lcp_router_proto_fib_source (rproto);
+
+ switch (pfx.fp_proto)
+ {
+ case FIB_PROTOCOL_IP6:
+ fib_table_entry_delete (nlt->nlt_fib_index, &pfx, fib_src);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src,
+ np.paths);
+ /* delete the EOS route in addition to NEOS - fallthrough */
+ pfx.fp_eos = MPLS_EOS;
+ default:
+ fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src,
+ np.paths);
+ }
+ }
+
+ vec_free (np.paths);
+
+ lcp_router_table_unlock (nlt);
+}
+
+static fib_route_path_t *
+lcp_router_fib_route_path_dup (fib_route_path_t *old)
+{
+ int idx;
+ fib_route_path_t *p;
+
+ fib_route_path_t *new = vec_dup (old);
+ if (!new)
+ return NULL;
+
+ for (idx = 0; idx < vec_len (new); idx++)
+ {
+ p = &new[idx];
+ if (p->frp_label_stack)
+ p->frp_label_stack = vec_dup (p->frp_label_stack);
+ }
+
+ return new;
+}
+
+static void
+lcp_router_route_add (struct rtnl_route *rr, int is_replace)
+{
+ fib_entry_flag_t entry_flags;
+ uint32_t table_id;
+ fib_prefix_t pfx;
+ lcp_router_table_t *nlt;
+ uint8_t rtype, rproto;
+
+ rtype = rtnl_route_get_type (rr);
+ table_id = rtnl_route_get_table (rr);
+ rproto = rtnl_route_get_protocol (rr);
+
+ /* skip unsupported route types and local table */
+ if (!lcp_router_route_type_valid[rtype] || (table_id == 255))
+ return;
+
+ lcp_router_route_mk_prefix (rr, &pfx);
+ entry_flags = lcp_router_route_mk_entry_flags (rtype, table_id, rproto);
+
+ nlt = lcp_router_table_add_or_lock (table_id, pfx.fp_proto);
+ /* Skip any kernel routes and IPv6 LL or multicast routes */
+ if (rproto == RTPROT_KERNEL ||
+ (FIB_PROTOCOL_IP6 == pfx.fp_proto &&
+ (ip6_address_is_multicast (&pfx.fp_addr.ip6) ||
+ ip6_address_is_link_local_unicast (&pfx.fp_addr.ip6))))
+ {
+ LCP_ROUTER_DBG ("route skip: %d:%U %U", rtnl_route_get_table (rr),
+ format_fib_prefix, &pfx, format_fib_entry_flags,
+ entry_flags);
+ return;
+ }
+ LCP_ROUTER_DBG ("route %s: %d:%U %U", is_replace ? "replace" : "add",
+ rtnl_route_get_table (rr), format_fib_prefix, &pfx,
+ format_fib_entry_flags, entry_flags);
+
+ lcp_router_route_path_parse_t np = {
+ .route_proto = pfx.fp_proto,
+ .is_mcast = (rtype == RTN_MULTICAST),
+ .type_flags = lcp_router_route_type_frpflags[rtype],
+ .preference = (u8) rtnl_route_get_priority (rr),
+ };
+
+ rtnl_route_foreach_nexthop (rr, lcp_router_route_path_parse, &np);
+ lcp_router_route_path_add_special (rr, &np);
+
+ if (0 != vec_len (np.paths))
+ {
+ if (rtype == RTN_MULTICAST)
+ {
+ /* it's not clear to me how linux expresses the RPF paramters
+ * so we'll allow from all interfaces and hope for the best */
+ mfib_prefix_t mpfx = {};
+
+ lcp_router_route_mk_mprefix (rr, &mpfx);
+
+ mfib_table_entry_update (nlt->nlt_mfib_index, &mpfx,
+ MFIB_SOURCE_PLUGIN_LOW, MFIB_RPF_ID_NONE,
+ MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF);
+
+ mfib_table_entry_paths_update (nlt->nlt_mfib_index, &mpfx,
+ MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, np.paths);
+ }
+ else
+ {
+ fib_source_t fib_src;
+ const fib_route_path_t *rpath;
+
+ vec_foreach (rpath, np.paths)
+ {
+ if (fib_route_path_is_attached (rpath))
+ {
+ entry_flags |= FIB_ENTRY_FLAG_ATTACHED;
+ break;
+ }
+ }
+
+ fib_src = lcp_router_proto_fib_source (rproto);
+
+ if (pfx.fp_proto == FIB_PROTOCOL_MPLS)
+ {
+ /* in order to avoid double-frees, we duplicate the paths. */
+ fib_route_path_t *pathdup =
+ lcp_router_fib_route_path_dup (np.paths);
+ if (is_replace)
+ fib_table_entry_update (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, pathdup);
+ else
+ fib_table_entry_path_add2 (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, pathdup);
+ vec_free (pathdup);
+
+ /* install EOS route in addition to NEOS */
+ pfx.fp_eos = MPLS_EOS;
+ pfx.fp_payload_proto = np.paths[0].frp_proto;
+ }
+
+ if (is_replace)
+ fib_table_entry_update (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, np.paths);
+ else
+ fib_table_entry_path_add2 (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, np.paths);
+ }
+ }
+ else
+ {
+ LCP_ROUTER_DBG ("no paths for route: %d:%U %U",
+ rtnl_route_get_table (rr), format_fib_prefix, &pfx,
+ format_fib_entry_flags, entry_flags);
+ }
+ vec_free (np.paths);
+}
+
+static void
+lcp_router_route_sync_begin (void)
+{
+ lcp_router_table_t *nlt;
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ fib_table_mark (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
+ fib_table_mark (nlt->nlt_fib_index, nlt->nlt_proto,
+ lcp_rt_fib_src_dynamic);
+
+ LCP_ROUTER_INFO ("Begin synchronization of %U routes in table %u",
+ format_fib_protocol, nlt->nlt_proto,
+ nlt->nlt_fib_index);
+ }
+}
+
+static void
+lcp_router_route_sync_end (void)
+{
+ lcp_router_table_t *nlt;
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ fib_table_sweep (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
+ fib_table_sweep (nlt->nlt_fib_index, nlt->nlt_proto,
+ lcp_rt_fib_src_dynamic);
+
+ LCP_ROUTER_INFO ("End synchronization of %U routes in table %u",
+ format_fib_protocol, nlt->nlt_proto,
+ nlt->nlt_fib_index);
+ }
+}
+
+typedef struct lcp_router_table_flush_ctx_t_
+{
+ fib_node_index_t *lrtf_entries;
+ u32 *lrtf_sw_if_index_to_bool;
+ fib_source_t lrtf_source;
+} lcp_router_table_flush_ctx_t;
+
+static fib_table_walk_rc_t
+lcp_router_table_flush_cb (fib_node_index_t fib_entry_index, void *arg)
+{
+ lcp_router_table_flush_ctx_t *ctx = arg;
+ u32 sw_if_index;
+
+ sw_if_index = fib_entry_get_resolving_interface_for_source (
+ fib_entry_index, ctx->lrtf_source);
+
+ if (sw_if_index < vec_len (ctx->lrtf_sw_if_index_to_bool) &&
+ ctx->lrtf_sw_if_index_to_bool[sw_if_index])
+ {
+ vec_add1 (ctx->lrtf_entries, fib_entry_index);
+ }
+ return (FIB_TABLE_WALK_CONTINUE);
+}
+
+static void
+lcp_router_table_flush (lcp_router_table_t *nlt, u32 *sw_if_index_to_bool,
+ fib_source_t source)
+{
+ fib_node_index_t *fib_entry_index;
+ lcp_router_table_flush_ctx_t ctx = {
+ .lrtf_entries = NULL,
+ .lrtf_sw_if_index_to_bool = sw_if_index_to_bool,
+ .lrtf_source = source,
+ };
+
+ LCP_ROUTER_DBG (
+ "Flush table: proto %U, fib-index %u, max sw_if_index %u, source %U",
+ format_fib_protocol, nlt->nlt_proto, nlt->nlt_fib_index,
+ vec_len (sw_if_index_to_bool) - 1, format_fib_source, source);
+
+ fib_table_walk (nlt->nlt_fib_index, nlt->nlt_proto,
+ lcp_router_table_flush_cb, &ctx);
+
+ LCP_ROUTER_DBG ("Flush table: entries number to delete %u",
+ vec_len (ctx.lrtf_entries));
+
+ vec_foreach (fib_entry_index, ctx.lrtf_entries)
+ {
+ fib_table_entry_delete_index (*fib_entry_index, source);
+ lcp_router_table_unlock (nlt);
+ }
+
+ vec_free (ctx.lrtf_entries);
+}
+
+const nl_vft_t lcp_router_vft = {
+ .nvl_rt_link_add = { .is_mp_safe = 0, .cb = lcp_router_link_add },
+ .nvl_rt_link_del = { .is_mp_safe = 0, .cb = lcp_router_link_del },
+ .nvl_rt_link_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_link_sync_begin },
+ .nvl_rt_link_sync_end = { .is_mp_safe = 0, .cb = lcp_router_link_sync_end },
+ .nvl_rt_addr_add = { .is_mp_safe = 0, .cb = lcp_router_link_addr_add },
+ .nvl_rt_addr_del = { .is_mp_safe = 0, .cb = lcp_router_link_addr_del },
+ .nvl_rt_addr_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_link_addr_sync_begin },
+ .nvl_rt_addr_sync_end = { .is_mp_safe = 0,
+ .cb = lcp_router_link_addr_sync_end },
+ .nvl_rt_neigh_add = { .is_mp_safe = 0, .cb = lcp_router_neigh_add },
+ .nvl_rt_neigh_del = { .is_mp_safe = 0, .cb = lcp_router_neigh_del },
+ .nvl_rt_neigh_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_neigh_sync_begin },
+ .nvl_rt_neigh_sync_end = { .is_mp_safe = 0,
+ .cb = lcp_router_neigh_sync_end },
+ .nvl_rt_route_add = { .is_mp_safe = 1, .cb = lcp_router_route_add },
+ .nvl_rt_route_del = { .is_mp_safe = 1, .cb = lcp_router_route_del },
+ .nvl_rt_route_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_route_sync_begin },
+ .nvl_rt_route_sync_end = { .is_mp_safe = 0,
+ .cb = lcp_router_route_sync_end },
+};
+
+static clib_error_t *
+lcp_router_init (vlib_main_t *vm)
+{
+ lcp_router_logger = vlib_log_register_class ("linux-cp", "router");
+
+ nl_register_vft (&lcp_router_vft);
+
+ /*
+ * allocate 2 route sources. The low priority source will be for
+ * dynamic routes. If a dynamic route daemon (FRR) tries to remove its
+ * route, it will use the low priority source to ensure it will not
+ * remove static routes which were added with the higher priority source.
+ */
+ lcp_rt_fib_src =
+ fib_source_allocate ("lcp-rt", FIB_SOURCE_PRIORITY_HI, FIB_SOURCE_BH_API);
+
+ lcp_rt_fib_src_dynamic = fib_source_allocate (
+ "lcp-rt-dynamic", FIB_SOURCE_PRIORITY_HI + 1, FIB_SOURCE_BH_API);
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lcp_router_init) = {
+ .runs_before = VLIB_INITS ("lcp_nl_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lisp/CMakeLists.txt b/src/plugins/lisp/CMakeLists.txt
index bf0d60aab0f..743a17e69c8 100644
--- a/src/plugins/lisp/CMakeLists.txt
+++ b/src/plugins/lisp/CMakeLists.txt
@@ -11,6 +11,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+if(NOT OPENSSL_FOUND)
+ message(WARNING "OpenSSL not found - lisp plugin disabled")
+ return()
+endif()
+
##############################################################################
# LISP control plane: lisp-cp
##############################################################################
diff --git a/src/plugins/lisp/lisp-cp/control.c b/src/plugins/lisp/lisp-cp/control.c
index 7293e03eea0..692048ebd21 100644
--- a/src/plugins/lisp/lisp-cp/control.c
+++ b/src/plugins/lisp/lisp-cp/control.c
@@ -581,7 +581,6 @@ vnet_lisp_adjacencies_get_by_vni (u32 vni)
fwd_entry_t *fwd;
lisp_adjacency_t *adjs = 0, adj;
- /* *INDENT-OFF* */
pool_foreach (fwd, lcm->fwd_entry_pool)
{
if (gid_address_vni (&fwd->reid) != vni)
@@ -591,7 +590,6 @@ vnet_lisp_adjacencies_get_by_vni (u32 vni)
gid_address_copy (&adj.leid, &fwd->leid);
vec_add1 (adjs, adj);
}
- /* *INDENT-ON* */
return adjs;
}
@@ -804,7 +802,6 @@ vnet_lisp_map_cache_add_del (vnet_lisp_add_del_mapping_args_t * a,
/* Remove remote (if present) from the vectors of lcl-to-rmts
* TODO: Address this in a more efficient way.
*/
- /* *INDENT-OFF* */
pool_foreach (rmts, lcm->lcl_to_rmt_adjacencies)
{
vec_foreach_index (rmts_itr, rmts[0])
@@ -817,7 +814,6 @@ vnet_lisp_map_cache_add_del (vnet_lisp_add_del_mapping_args_t * a,
}
}
}
- /* *INDENT-ON* */
}
/* remove mapping from dictionary */
@@ -1389,12 +1385,10 @@ vnet_lisp_clear_all_remote_adjacencies (void)
vnet_lisp_add_del_mapping_args_t _dm_args, *dm_args = &_dm_args;
vnet_lisp_add_del_locator_set_args_t _ls, *ls = &_ls;
- /* *INDENT-OFF* */
pool_foreach_index (mi, lcm->mapping_pool)
{
vec_add1 (map_indices, mi);
}
- /* *INDENT-ON* */
vec_foreach (map_indexp, map_indices)
{
@@ -2167,7 +2161,6 @@ lisp_cp_enable_l2_l3_ifaces (lisp_cp_main_t * lcm, u8 with_default_route)
{
u32 vni, dp_table;
- /* *INDENT-OFF* */
hash_foreach(vni, dp_table, lcm->table_id_by_vni, ({
dp_add_del_iface(lcm, vni, /* is_l2 */ 0, /* is_add */1,
with_default_route);
@@ -2176,7 +2169,6 @@ lisp_cp_enable_l2_l3_ifaces (lisp_cp_main_t * lcm, u8 with_default_route)
dp_add_del_iface(lcm, vni, /* is_l2 */ 1, 1,
with_default_route);
}));
- /* *INDENT-ON* */
}
static void
@@ -2188,12 +2180,10 @@ lisp_cp_disable_l2_l3_ifaces (lisp_cp_main_t * lcm)
hash_free (lcm->fwd_entry_by_mapping_index);
pool_free (lcm->fwd_entry_pool);
/* Clear state tracking rmt-lcl fwd entries */
- /* *INDENT-OFF* */
pool_foreach (rmts, lcm->lcl_to_rmt_adjacencies)
{
vec_free(rmts[0]);
}
- /* *INDENT-ON* */
hash_free (lcm->lcl_to_rmt_adjs_by_lcl_idx);
pool_free (lcm->lcl_to_rmt_adjacencies);
}
@@ -2465,7 +2455,6 @@ build_itr_rloc_list (lisp_cp_main_t * lcm, locator_set_t * loc_set)
/* Add ipv4 locators first TODO sort them */
- /* *INDENT-OFF* */
foreach_ip_interface_address (&lcm->im4->lookup_main, ia,
loc->sw_if_index, 1 /* unnumbered */,
({
@@ -2486,7 +2475,6 @@ build_itr_rloc_list (lisp_cp_main_t * lcm, locator_set_t * loc_set)
ip_prefix_normalize (ippref);
vec_add1 (rlocs, gid[0]);
}));
- /* *INDENT-ON* */
}
return rlocs;
@@ -2638,7 +2626,6 @@ add_locators (lisp_cp_main_t * lcm, mapping_t * m, u32 locator_set_index,
new = loc[0];
if (loc->local)
{
- /* *INDENT-OFF* */
foreach_ip_interface_address (&lcm->im4->lookup_main, ia,
loc->sw_if_index, 1 /* unnumbered */,
({
@@ -2655,7 +2642,6 @@ add_locators (lisp_cp_main_t * lcm, mapping_t * m, u32 locator_set_index,
ia);
ip_address_set (new_ip, addr, AF_IP6);
}));
- /* *INDENT-ON* */
if (probed_loc && ip_address_cmp (probed_loc, new_ip) == 0)
new.probed = 1;
@@ -2669,7 +2655,6 @@ build_map_register_record_list (lisp_cp_main_t * lcm)
{
mapping_t *recs = 0, rec, *m;
- /* *INDENT-OFF* */
pool_foreach (m, lcm->mapping_pool)
{
/* for now build only local mappings */
@@ -2680,7 +2665,6 @@ build_map_register_record_list (lisp_cp_main_t * lcm)
add_locators (lcm, &rec, m->locator_set_index, NULL);
vec_add1 (recs, rec);
}
- /* *INDENT-ON* */
return recs;
}
@@ -2834,7 +2818,6 @@ lisp_cp_output (vlib_main_t * vm, vlib_node_runtime_t * node,
}
/* placeholder node used only for statistics */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_cp_output_node) = {
.function = lisp_cp_output,
.name = "lisp-cp-output",
@@ -2851,7 +2834,6 @@ VLIB_REGISTER_NODE (lisp_cp_output_node) = {
[LISP_CP_INPUT_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
static int
send_rloc_probe (lisp_cp_main_t * lcm, gid_address_t * deid,
@@ -2899,7 +2881,6 @@ send_rloc_probes (lisp_cp_main_t * lcm)
locator_pair_t *lp;
u32 si, rloc_probes_sent = 0;
- /* *INDENT-OFF* */
pool_foreach (e, lcm->fwd_entry_pool)
{
if (vec_len (e->locator_pairs) == 0)
@@ -2929,7 +2910,6 @@ send_rloc_probes (lisp_cp_main_t * lcm)
rloc_probes_sent++;
}
}
- /* *INDENT-ON* */
vlib_node_increment_counter (vlib_get_main (), lisp_cp_output_node.index,
LISP_CP_OUTPUT_ERROR_RLOC_PROBES_SENT,
@@ -3038,7 +3018,6 @@ _send_encapsulated_map_request (lisp_cp_main_t * lcm,
/* if there is already a pending request remember it */
- /* *INDENT-OFF* */
pool_foreach (pmr, lcm->pending_map_requests_pool)
{
if (!gid_address_cmp (&pmr->src, seid)
@@ -3048,7 +3027,6 @@ _send_encapsulated_map_request (lisp_cp_main_t * lcm,
break;
}
}
- /* *INDENT-ON* */
if (!is_resend && duplicate_pmr)
{
@@ -3597,7 +3575,6 @@ lisp_cp_lookup_nsh (vlib_main_t * vm,
return (lisp_cp_lookup_inline (vm, node, from_frame, LISP_AFI_LCAF));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_cp_lookup_ip4_node) = {
.function = lisp_cp_lookup_ip4,
.name = "lisp-cp-lookup-ip4",
@@ -3615,9 +3592,7 @@ VLIB_REGISTER_NODE (lisp_cp_lookup_ip4_node) = {
[LISP_CP_LOOKUP_NEXT_ARP_NDP_REPLY_TX] = "interface-output",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_cp_lookup_ip6_node) = {
.function = lisp_cp_lookup_ip6,
.name = "lisp-cp-lookup-ip6",
@@ -3635,9 +3610,7 @@ VLIB_REGISTER_NODE (lisp_cp_lookup_ip6_node) = {
[LISP_CP_LOOKUP_NEXT_ARP_NDP_REPLY_TX] = "interface-output",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_cp_lookup_l2_node) = {
.function = lisp_cp_lookup_l2,
.name = "lisp-cp-lookup-l2",
@@ -3655,9 +3628,7 @@ VLIB_REGISTER_NODE (lisp_cp_lookup_l2_node) = {
[LISP_CP_LOOKUP_NEXT_ARP_NDP_REPLY_TX] = "interface-output",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_cp_lookup_nsh_node) = {
.function = lisp_cp_lookup_nsh,
.name = "lisp-cp-lookup-nsh",
@@ -3675,7 +3646,6 @@ VLIB_REGISTER_NODE (lisp_cp_lookup_nsh_node) = {
[LISP_CP_LOOKUP_NEXT_ARP_NDP_REPLY_TX] = "interface-output",
},
};
-/* *INDENT-ON* */
/* lisp_cp_input statistics */
#define foreach_lisp_cp_input_error \
@@ -3890,11 +3860,9 @@ process_map_reply (map_records_arg_t * a)
/* remove pending map request entry */
- /* *INDENT-OFF* */
clib_fifo_foreach (noncep, pmr->nonces, ({
hash_unset(lcm->pending_map_requests_by_nonce, noncep[0]);
}));
- /* *INDENT-ON* */
clib_fifo_free (pmr->nonces);
pool_put (lcm->pending_map_requests_pool, pmr);
@@ -4063,12 +4031,10 @@ map_record_args_get ()
map_records_arg_t *rec;
/* Cleanup first */
- /* *INDENT-OFF* */
pool_foreach (rec, lcm->map_records_args_pool[vlib_get_thread_index()]) {
if (rec->is_free)
map_records_arg_free (rec);
}
- /* *INDENT-ON* */
pool_get (lcm->map_records_args_pool[vlib_get_thread_index ()], rec);
return rec;
@@ -4276,6 +4242,11 @@ process_map_request (vlib_main_t * vm, vlib_node_runtime_t * node,
rloc_probe_recv++;
clib_memset (&m, 0, sizeof (m));
u32 mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, &dst);
+ if (GID_LOOKUP_MISS == mi)
+ {
+ clib_warning ("Cannot find mapping index by gid!");
+ continue;
+ }
// TODO: select best locator; for now use the first one
dst_loc = &gid_address_ip (&itr_rlocs[0]);
@@ -4443,7 +4414,6 @@ lisp_cp_input (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_cp_input_node) = {
.function = lisp_cp_input,
.name = "lisp-cp-input",
@@ -4460,7 +4430,6 @@ VLIB_REGISTER_NODE (lisp_cp_input_node) = {
[LISP_CP_INPUT_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
clib_error_t *
lisp_cp_init (vlib_main_t * vm)
@@ -4548,13 +4517,11 @@ vnet_lisp_get_stats (void)
lisp_stats_key_t *key;
u32 index;
- /* *INDENT-OFF* */
hash_foreach_mem (key, index, lgm->lisp_stats_index_by_key,
{
if (lisp_stats_api_fill (lcm, lgm, &stat, key, index))
vec_add1 (stats, stat);
});
- /* *INDENT-ON* */
return stats;
}
@@ -4650,7 +4617,6 @@ remove_dead_pending_map_requests (lisp_cp_main_t * lcm)
pending_map_request_t *pmr;
u32 *to_be_removed = 0, *pmr_index;
- /* *INDENT-OFF* */
pool_foreach (pmr, lcm->pending_map_requests_pool)
{
if (pmr->to_be_removed)
@@ -4662,7 +4628,6 @@ remove_dead_pending_map_requests (lisp_cp_main_t * lcm)
vec_add1 (to_be_removed, pmr - lcm->pending_map_requests_pool);
}
}
- /* *INDENT-ON* */
vec_foreach (pmr_index, to_be_removed)
pool_put_index (lcm->pending_map_requests_pool, pmr_index[0]);
@@ -4748,7 +4713,6 @@ update_map_register (lisp_cp_main_t * lcm, f64 dt)
if (!lcm->is_enabled || !lcm->map_registering)
return;
- /* *INDENT-OFF* */
pool_foreach (pmr, lcm->pending_map_registers_pool)
{
if (!update_pending_map_register (pmr, dt, &del_all))
@@ -4758,7 +4722,6 @@ update_map_register (lisp_cp_main_t * lcm, f64 dt)
vec_add1 (to_be_removed, pmr - lcm->pending_map_registers_pool);
}
}
- /* *INDENT-ON* */
if (del_all)
{
@@ -4808,13 +4771,11 @@ send_map_resolver_service (vlib_main_t * vm,
/* currently no signals are expected - just wait for clock */
(void) vlib_process_get_events (vm, 0);
- /* *INDENT-OFF* */
pool_foreach (pmr, lcm->pending_map_requests_pool)
{
if (!pmr->to_be_removed)
update_pending_request (pmr, period);
}
- /* *INDENT-ON* */
remove_dead_pending_map_requests (lcm);
@@ -4830,7 +4791,7 @@ send_map_resolver_service (vlib_main_t * vm,
{
process_expired_mapping (lcm, mi[0]);
}
- _vec_len (expired) = 0;
+ vec_set_len (expired, 0);
}
}
diff --git a/src/plugins/lisp/lisp-cp/control.h b/src/plugins/lisp/lisp-cp/control.h
index 524f5028465..e65ceafd431 100644
--- a/src/plugins/lisp/lisp-cp/control.h
+++ b/src/plugins/lisp/lisp-cp/control.h
@@ -299,7 +299,7 @@ extern vlib_node_registration_t lisp_cp_input_node;
extern vlib_node_registration_t lisp_cp_lookup_ip4_node;
extern vlib_node_registration_t lisp_cp_lookup_ip6_node;
-clib_error_t *lisp_cp_init ();
+clib_error_t *lisp_cp_init (vlib_main_t *);
always_inline lisp_cp_main_t *
vnet_lisp_cp_get_main ()
diff --git a/src/plugins/lisp/lisp-cp/gid_dictionary.c b/src/plugins/lisp/lisp-cp/gid_dictionary.c
index a0cc9c09e3c..995678ceadc 100644
--- a/src/plugins/lisp/lisp-cp/gid_dictionary.c
+++ b/src/plugins/lisp/lisp-cp/gid_dictionary.c
@@ -488,13 +488,11 @@ ip4_compute_prefix_lengths_in_search_order (gid_ip4_table_t * db)
vec_reset_length (db->ip4_prefix_lengths_in_search_order);
/* Note: bitmap reversed so this is in fact a longest prefix match */
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, db->ip4_non_empty_dst_address_length_bitmap)
{
int dst_address_length = 32 - i;
vec_add1 (db->ip4_prefix_lengths_in_search_order, dst_address_length);
}
- /* *INDENT-ON* */
}
@@ -671,13 +669,11 @@ ip6_compute_prefix_lengths_in_search_order (gid_ip6_table_t * db)
vec_reset_length (db->ip6_prefix_lengths_in_search_order);
/* Note: bitmap reversed so this is in fact a longest prefix match */
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, db->ip6_non_empty_dst_address_length_bitmap)
{
int dst_address_length = 128 - i;
vec_add1 (db->ip6_prefix_lengths_in_search_order, dst_address_length);
}
- /* *INDENT-ON* */
}
static u32
diff --git a/src/plugins/lisp/lisp-cp/lisp_api.c b/src/plugins/lisp/lisp-cp/lisp_api.c
index d8f889a24fa..37267635d85 100644
--- a/src/plugins/lisp/lisp-cp/lisp_api.c
+++ b/src/plugins/lisp/lisp-cp/lisp_api.c
@@ -109,12 +109,10 @@ vl_api_lisp_add_del_locator_set_t_handler (vl_api_lisp_add_del_locator_set_t *
vec_free (locator_name);
vec_free (a->locators);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_LISP_ADD_DEL_LOCATOR_SET_REPLY,
({
rmp->ls_index = clib_host_to_net_u32 (ls_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -287,12 +285,10 @@ static void
int rv = 0;
vl_api_show_lisp_map_request_mode_reply_t *rmp;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_SHOW_LISP_MAP_REQUEST_MODE_REPLY,
({
rmp->is_src_dst = vnet_lisp_get_map_request_mode ();
}));
- /* *INDENT-ON* */
}
static void
@@ -362,13 +358,11 @@ vl_api_show_lisp_use_petr_t_handler (vl_api_show_lisp_use_petr_t * mp)
}
}
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_LISP_USE_PETR_REPLY,
{
rmp->is_petr_enable = status;
ip_address_encode2 (&gid_address_ip (&addr), &rmp->ip_address);
});
- /* *INDENT-ON* */
}
static void
@@ -589,7 +583,6 @@ vl_api_lisp_locator_set_dump_t_handler (vl_api_lisp_locator_set_dump_t * mp)
return;
filter = mp->filter;
- /* *INDENT-OFF* */
pool_foreach (lsit, lcm->locator_set_pool)
{
if (filter && !((1 == filter && lsit->local) ||
@@ -600,7 +593,6 @@ vl_api_lisp_locator_set_dump_t_handler (vl_api_lisp_locator_set_dump_t * mp)
send_lisp_locator_set_details (lcm, lsit, reg, mp->context,
lsit - lcm->locator_set_pool);
}
- /* *INDENT-ON* */
}
static void
@@ -703,13 +695,11 @@ vl_api_lisp_eid_table_dump_t_handler (vl_api_lisp_eid_table_dump_t * mp)
}
else
{
- /* *INDENT-OFF* */
pool_foreach (mapit, lcm->mapping_pool)
{
send_lisp_eid_table_details(mapit, reg, mp->context,
mp->filter);
}
- /* *INDENT-ON* */
}
}
@@ -820,12 +810,10 @@ vl_api_lisp_eid_table_map_dump_t_handler (vl_api_lisp_eid_table_map_dump_t *
vni_table = lcm->table_id_by_vni;
}
- /* *INDENT-OFF* */
hash_foreach_pair (p, vni_table,
({
send_eid_table_map_pair (p, reg, mp->context);
}));
- /* *INDENT-ON* */
}
static void
@@ -868,12 +856,10 @@ static void
vl_api_show_lisp_rloc_probe_state_reply_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_LISP_RLOC_PROBE_STATE_REPLY,
{
rmp->is_enabled = vnet_lisp_rloc_probe_state_get ();
});
- /* *INDENT-ON* */
}
static void
@@ -883,12 +869,10 @@ static void
vl_api_show_lisp_map_register_state_reply_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_LISP_MAP_REGISTER_STATE_REPLY,
{
rmp->is_enabled = vnet_lisp_map_register_state_get ();
});
- /* *INDENT-ON* */
}
static void
@@ -903,13 +887,11 @@ vl_api_lisp_adjacencies_get_t_handler (vl_api_lisp_adjacencies_get_t * mp)
adjs = vnet_lisp_adjacencies_get_by_vni (vni);
size = vec_len (adjs) * sizeof (vl_api_lisp_adjacency_t);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_LISP_ADJACENCIES_GET_REPLY, size,
{
rmp->count = clib_host_to_net_u32 (vec_len (adjs));
lisp_adjacency_copy (rmp->adjacencies, adjs);
});
- /* *INDENT-ON* */
vec_free (adjs);
}
@@ -927,7 +909,6 @@ vl_api_lisp_eid_table_vni_dump_t_handler (vl_api_lisp_eid_table_vni_dump_t *
if (!reg)
return;
- /* *INDENT-OFF* */
hash_foreach_pair (p, lcm->table_id_by_vni,
({
hash_set (vnis, p->key, 0);
@@ -942,7 +923,6 @@ vl_api_lisp_eid_table_vni_dump_t_handler (vl_api_lisp_eid_table_vni_dump_t *
({
send_eid_table_vni (p->key, reg, mp->context);
}));
- /* *INDENT-ON* */
hash_free (vnis);
}
@@ -953,13 +933,11 @@ vl_api_show_lisp_status_t_handler (vl_api_show_lisp_status_t * mp)
vl_api_show_lisp_status_reply_t *rmp = NULL;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_SHOW_LISP_STATUS_REPLY,
({
rmp->is_gpe_enabled = vnet_lisp_gpe_enable_disable_status ();
rmp->is_lisp_enabled = vnet_lisp_enable_disable_status ();
}));
- /* *INDENT-ON* */
}
static void
@@ -983,13 +961,11 @@ static void
tmp_str = format (0, "%s", loc_set->name);
}
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_LISP_GET_MAP_REQUEST_ITR_RLOCS_REPLY,
({
strncpy((char *) rmp->locator_set_name, (char *) tmp_str,
ARRAY_LEN(rmp->locator_set_name) - 1);
}));
- /* *INDENT-ON* */
vec_free (tmp_str);
}
@@ -1027,14 +1003,12 @@ vl_api_show_lisp_pitr_t_handler (vl_api_show_lisp_pitr_t * mp)
}
vec_add1 (tmp_str, 0);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_SHOW_LISP_PITR_REPLY,
({
rmp->is_enabled = lcm->flags & LISP_FLAG_PITR_MODE;
strncpy((char *) rmp->locator_set_name, (char *) tmp_str,
ARRAY_LEN(rmp->locator_set_name) - 1);
}));
- /* *INDENT-ON* */
}
/*
diff --git a/src/plugins/lisp/lisp-cp/lisp_cli.c b/src/plugins/lisp/lisp-cp/lisp_cli.c
index 569d695b033..6c15898216d 100644
--- a/src/plugins/lisp/lisp-cp/lisp_cli.c
+++ b/src/plugins/lisp/lisp-cp/lisp_cli.c
@@ -64,13 +64,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_adjacencies_command) = {
.path = "show lisp adjacencies",
.short_help = "show lisp adjacencies",
.function = lisp_show_adjacencies_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_add_del_map_server_command_fn (vlib_main_t * vm,
@@ -120,13 +118,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_add_del_map_server_command) = {
.path = "lisp map-server",
.short_help = "lisp map-server add|del <ip>",
.function = lisp_add_del_map_server_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -227,14 +223,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_add_del_local_eid_command) = {
.path = "lisp eid-table",
.short_help = "lisp eid-table add/del [vni <vni>] eid <eid> "
"locator-set <locator-set> [key <secret-key> key-id sha1|sha256 ]",
.function = lisp_add_del_local_eid_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_eid_table_map_command_fn (vlib_main_t * vm,
@@ -274,13 +268,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_eid_table_map_command) = {
.path = "lisp eid-table map",
.short_help = "lisp eid-table map [del] vni <vni> vrf <vrf> | bd <bdi>",
.function = lisp_eid_table_map_command_fn,
};
-/* *INDENT-ON* */
/**
* Handler for add/del remote mapping CLI.
@@ -418,7 +410,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_add_del_remote_mapping_command) = {
.path = "lisp remote-mapping",
.short_help = "lisp remote-mapping add|del [del-all] vni <vni> "
@@ -427,7 +418,6 @@ VLIB_CLI_COMMAND (lisp_add_del_remote_mapping_command) = {
"w <weight> [rloc <dst-locator> ... ]",
.function = lisp_add_del_remote_mapping_command_fn,
};
-/* *INDENT-ON* */
/**
* Handler for add/del adjacency CLI.
@@ -525,14 +515,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_add_del_adjacency_command) = {
.path = "lisp adjacency",
.short_help = "lisp adjacency add|del vni <vni> reid <remote-eid> "
"leid <local-eid>",
.function = lisp_add_del_adjacency_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -575,13 +563,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_map_request_mode_command) = {
.path = "lisp map-request mode",
.short_help = "lisp map-request mode dst-only|src-dst",
.function = lisp_map_request_mode_command_fn,
};
-/* *INDENT-ON* */
static u8 *
@@ -609,13 +595,11 @@ lisp_show_map_request_mode_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_map_request_mode_command) = {
.path = "show lisp map-request mode",
.short_help = "show lisp map-request mode",
.function = lisp_show_map_request_mode_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_map_resolvers_command_fn (vlib_main_t * vm,
@@ -632,13 +616,11 @@ lisp_show_map_resolvers_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_map_resolvers_command) = {
.path = "show lisp map-resolvers",
.short_help = "show lisp map-resolvers",
.function = lisp_show_map_resolvers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -691,13 +673,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_pitr_set_locator_set_command) = {
.path = "lisp pitr",
.short_help = "lisp pitr [disable] ls <locator-set-name>",
.function = lisp_pitr_set_locator_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_pitr_command_fn (vlib_main_t * vm,
@@ -744,13 +724,11 @@ lisp_show_pitr_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_pitr_command) = {
.path = "show lisp pitr",
.short_help = "Show pitr",
.function = lisp_show_pitr_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_eid_entry (u8 * s, va_list * args)
@@ -840,7 +818,6 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm,
if (print_all)
{
- /* *INDENT-OFF* */
pool_foreach (mapit, lcm->mapping_pool)
{
if (mapit->pitr_set)
@@ -856,7 +833,6 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%U", format_eid_entry, lcm->vnet_main,
lcm, mapit, ls);
}
- /* *INDENT-ON* */
}
else
{
@@ -884,13 +860,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_cp_show_eid_table_command) = {
.path = "show lisp eid-table",
.short_help = "show lisp eid-table [local|remote|eid <eid>]",
.function = lisp_show_eid_table_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -906,13 +880,11 @@ lisp_enable_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_cp_enable_command) = {
.path = "lisp enable",
.short_help = "lisp enable",
.function = lisp_enable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_disable_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -927,13 +899,11 @@ lisp_disable_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_cp_disable_command) = {
.path = "lisp disable",
.short_help = "lisp disable",
.function = lisp_disable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_map_register_enable_disable_command_fn (vlib_main_t * vm,
@@ -980,13 +950,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_map_register_enable_disable_command) = {
.path = "lisp map-register",
.short_help = "lisp map-register [enable|disable]",
.function = lisp_map_register_enable_disable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_rloc_probe_enable_disable_command_fn (vlib_main_t * vm,
@@ -1033,13 +1001,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_rloc_probe_enable_disable_command) = {
.path = "lisp rloc-probe",
.short_help = "lisp rloc-probe [enable|disable]",
.function = lisp_rloc_probe_enable_disable_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_lisp_status (u8 * s, va_list * args)
@@ -1060,13 +1026,11 @@ lisp_show_status_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_status_command) = {
.path = "show lisp status",
.short_help = "show lisp status",
.function = lisp_show_status_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_eid_table_map_command_fn (vlib_main_t * vm,
@@ -1112,12 +1076,10 @@ lisp_show_eid_table_map_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%=10s%=10s", "VNI", is_l2 ? "BD" : "VRF");
- /* *INDENT-OFF* */
hash_foreach_pair (p, vni_table,
({
vlib_cli_output (vm, "%=10d%=10d", p->key, p->value[0]);
}));
- /* *INDENT-ON* */
done:
unformat_free (line_input);
@@ -1125,13 +1087,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_eid_table_map_command) = {
.path = "show lisp eid-table map",
.short_help = "show lisp eid-table map l2|l3",
.function = lisp_show_eid_table_map_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1201,14 +1161,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_cp_add_del_locator_set_command) = {
.path = "lisp locator-set",
.short_help = "lisp locator-set add/del <name> [iface <iface-name> "
"p <priority> w <weight>]",
.function = lisp_add_del_locator_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_add_del_locator_in_set_command_fn (vlib_main_t * vm,
@@ -1277,14 +1235,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_cp_add_del_locator_in_set_command) = {
.path = "lisp locator",
.short_help = "lisp locator add/del locator-set <name> iface <iface-name> "
"p <priority> w <weight>",
.function = lisp_add_del_locator_in_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_cp_show_locator_sets_command_fn (vlib_main_t * vm,
@@ -1299,7 +1255,6 @@ lisp_cp_show_locator_sets_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%s%=16s%=16s%=16s", "Locator-set", "Locator",
"Priority", "Weight");
- /* *INDENT-OFF* */
pool_foreach (lsit, lcm->locator_set_pool)
{
u8 * msg = 0;
@@ -1331,17 +1286,14 @@ lisp_cp_show_locator_sets_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%v", msg);
vec_free (msg);
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_cp_show_locator_sets_command) = {
.path = "show lisp locator-set",
.short_help = "Shows locator-sets",
.function = lisp_cp_show_locator_sets_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1396,13 +1348,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_add_del_map_resolver_command) = {
.path = "lisp map-resolver",
.short_help = "lisp map-resolver add/del <ip_address>",
.function = lisp_add_del_map_resolver_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1451,13 +1401,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_add_del_map_request_command) = {
.path = "lisp map-request itr-rlocs",
.short_help = "lisp map-request itr-rlocs add/del <locator_set_name>",
.function = lisp_add_del_mreq_itr_rlocs_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_mreq_itr_rlocs_command_fn (vlib_main_t * vm,
@@ -1481,13 +1429,11 @@ lisp_show_mreq_itr_rlocs_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_map_request_command) = {
.path = "show lisp map-request itr-rlocs",
.short_help = "Shows map-request itr-rlocs",
.function = lisp_show_mreq_itr_rlocs_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_use_petr_set_locator_set_command_fn (vlib_main_t * vm,
@@ -1534,7 +1480,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_use_petr_set_locator_set_command) = {
.path = "lisp use-petr",
.short_help = "lisp use-petr [disable] <petr-ip>",
@@ -1586,13 +1531,11 @@ lisp_show_petr_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_petr_command) = {
.path = "show lisp petr",
.short_help = "Show petr",
.function = lisp_show_petr_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lisp/lisp-cp/lisp_cp_test.c b/src/plugins/lisp/lisp-cp/lisp_cp_test.c
index c0284d301a7..a6c3ca92ce0 100644
--- a/src/plugins/lisp/lisp-cp/lisp_cp_test.c
+++ b/src/plugins/lisp/lisp-cp/lisp_cp_test.c
@@ -26,7 +26,7 @@
/* define message IDs */
#include <lisp/lisp-cp/lisp.api_enum.h>
#include <lisp/lisp-cp/lisp.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
@@ -41,13 +41,11 @@ lisp_test_main_t lisp_test_main;
#define __plugin_msg_base lisp_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
typedef struct
{
@@ -118,7 +116,6 @@ format_lisp_eid_vat (u8 * s, va_list * args)
-/* *INDENT-OFF* */
/** Used for parsing LISP eids */
typedef struct lisp_eid_vat_t_
{
@@ -132,7 +129,6 @@ typedef struct lisp_eid_vat_t_
/**< type of eid */
u8 type;
} __clib_packed lisp_eid_vat_t;
-/* *INDENT-ON* */
static uword
unformat_lisp_eid_vat (unformat_input_t * input, va_list * args)
diff --git a/src/plugins/lisp/lisp-cp/lisp_msg_serdes.c b/src/plugins/lisp/lisp-cp/lisp_msg_serdes.c
index 14d90982d4f..509462d8e23 100644
--- a/src/plugins/lisp/lisp-cp/lisp_msg_serdes.c
+++ b/src/plugins/lisp/lisp-cp/lisp_msg_serdes.c
@@ -264,9 +264,14 @@ lisp_msg_parse_addr (vlib_buffer_t * b, gid_address_t * eid)
u32 len;
clib_memset (eid, 0, sizeof (*eid));
len = gid_address_parse (vlib_buffer_get_current (b), eid);
- if (len != ~0)
- vlib_buffer_pull (b, len);
- return len;
+ if ((len != ~0) && vlib_buffer_pull (b, len))
+ {
+ return len;
+ }
+ else
+ {
+ return ~0;
+ }
}
u32
@@ -280,7 +285,10 @@ lisp_msg_parse_eid_rec (vlib_buffer_t * b, gid_address_t * eid)
return len;
gid_address_ippref_len (eid) = EID_REC_MLEN (h);
- vlib_buffer_pull (b, len + sizeof (eid_record_hdr_t));
+ if (!vlib_buffer_pull (b, len + sizeof (eid_record_hdr_t)))
+ {
+ return ~0;
+ }
return len + sizeof (eid_record_hdr_t);
}
diff --git a/src/plugins/lisp/lisp-cp/lisp_types.h b/src/plugins/lisp/lisp-cp/lisp_types.h
index 3f7d0302640..e92f8f80c70 100644
--- a/src/plugins/lisp/lisp-cp/lisp_types.h
+++ b/src/plugins/lisp/lisp-cp/lisp_types.h
@@ -198,7 +198,8 @@ u8 gid_address_len (gid_address_t * a);
void *gid_address_cast (gid_address_t * gid, gid_address_type_t type);
void gid_address_copy (gid_address_t * dst, gid_address_t * src);
u32 gid_address_parse (u8 * offset, gid_address_t * a);
-void gid_address_ip_set (gid_address_t * dst, void *src, u8 version);
+void gid_address_ip_set (gid_address_t *dst, void *src,
+ ip_address_family_t version);
#define gid_address_type(_a) (_a)->type
#define gid_address_ippref(_a) (_a)->ippref
@@ -238,7 +239,6 @@ void gid_address_ip_set (gid_address_t * dst, void *src, u8 version);
_(nsh) \
_(sd)
-/* *INDENT-OFF* */
#define _(_n) \
u16 _n ## _size_to_write (void * pref); \
u16 _n ## _write (u8 * p, void * pref); \
@@ -248,12 +248,11 @@ void _n ## _copy (void * dst , void * src);
foreach_gid_address_type_fcns
#undef _
-/* *INDENT-ON* */
always_inline u64
mac_to_u64 (u8 * m)
{
- return (*((u64 *) m) & 0xffffffffffff);
+ return (*(u32 *) m) | ((u64) (*(u16 *) (m + 4)) << 32);
}
typedef struct
diff --git a/src/plugins/lisp/lisp-cp/one_api.c b/src/plugins/lisp/lisp-cp/one_api.c
index 3969dd245ab..b5b523433f2 100644
--- a/src/plugins/lisp/lisp-cp/one_api.c
+++ b/src/plugins/lisp/lisp-cp/one_api.c
@@ -44,7 +44,6 @@ static u32 one_base_msg_id;
#define REPLY_DETAILS(t, body) \
do { \
vl_api_registration_t * reg; \
- rv = vl_msg_api_pd_handler (mp, rv); \
reg = vl_api_client_index_to_registration (mp->client_index); \
if (!reg) \
return; \
@@ -161,12 +160,10 @@ static void
int rv = 0;
u32 ttl = vnet_lisp_map_register_get_ttl ();
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_ONE_MAP_REGISTER_TTL_REPLY,
({
rmp->ttl = clib_host_to_net_u32 (ttl);
}));
- /* *INDENT-ON* */
}
static void
@@ -213,12 +210,10 @@ vl_api_one_add_del_locator_set_t_handler (vl_api_one_add_del_locator_set_t *
vec_free (locator_name);
vec_free (a->locators);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_ONE_ADD_DEL_LOCATOR_SET_REPLY,
({
rmp->ls_index = clib_host_to_net_u32 (ls_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -467,12 +462,10 @@ static void
int rv = 0;
vl_api_show_one_map_request_mode_reply_t *rmp;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_SHOW_ONE_MAP_REQUEST_MODE_REPLY,
({
rmp->mode = vnet_lisp_get_map_request_mode ();
}));
- /* *INDENT-ON* */
}
static void
@@ -560,7 +553,6 @@ vl_api_show_one_use_petr_t_handler (vl_api_show_one_use_petr_t * mp)
}
}
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_ONE_USE_PETR_REPLY,
{
rmp->status = status;
@@ -568,7 +560,6 @@ vl_api_show_one_use_petr_t_handler (vl_api_show_one_use_petr_t * mp)
ip_address_encode2 (ip, &rmp->ip_address);
});
- /* *INDENT-ON* */
}
static void
@@ -790,7 +781,6 @@ vl_api_one_locator_set_dump_t_handler (vl_api_one_locator_set_dump_t * mp)
return;
filter = mp->filter;
- /* *INDENT-OFF* */
pool_foreach (lsit, lcm->locator_set_pool)
{
if (filter && !((1 == filter && lsit->local) ||
@@ -801,7 +791,6 @@ vl_api_one_locator_set_dump_t_handler (vl_api_one_locator_set_dump_t * mp)
send_one_locator_set_details (lcm, lsit, reg, mp->context,
lsit - lcm->locator_set_pool);
}
- /* *INDENT-ON* */
}
static void
@@ -920,13 +909,11 @@ vl_api_one_eid_table_dump_t_handler (vl_api_one_eid_table_dump_t * mp)
}
else
{
- /* *INDENT-OFF* */
pool_foreach (mapit, lcm->mapping_pool)
{
send_one_eid_table_details(mapit, reg, mp->context,
mp->filter);
}
- /* *INDENT-ON* */
}
}
@@ -1035,12 +1022,10 @@ vl_api_one_eid_table_map_dump_t_handler (vl_api_one_eid_table_map_dump_t * mp)
vni_table = lcm->table_id_by_vni;
}
- /* *INDENT-OFF* */
hash_foreach_pair (p, vni_table,
({
send_eid_table_map_pair (p, reg, mp->context);
}));
- /* *INDENT-ON* */
}
static void
@@ -1113,12 +1098,10 @@ static void
vl_api_show_one_rloc_probe_state_reply_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_ONE_RLOC_PROBE_STATE_REPLY,
{
rmp->is_enable = vnet_lisp_rloc_probe_state_get ();
});
- /* *INDENT-ON* */
}
static void
@@ -1128,12 +1111,10 @@ static void
vl_api_show_one_map_register_state_reply_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_ONE_MAP_REGISTER_STATE_REPLY,
{
rmp->is_enable = vnet_lisp_map_register_state_get ();
});
- /* *INDENT-ON* */
}
static void
@@ -1148,13 +1129,11 @@ vl_api_one_adjacencies_get_t_handler (vl_api_one_adjacencies_get_t * mp)
adjs = vnet_lisp_adjacencies_get_by_vni (vni);
size = vec_len (adjs) * sizeof (vl_api_one_adjacency_t);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_ONE_ADJACENCIES_GET_REPLY, size,
{
rmp->count = clib_host_to_net_u32 (vec_len (adjs));
one_adjacency_copy (rmp->adjacencies, adjs);
});
- /* *INDENT-ON* */
vec_free (adjs);
}
@@ -1171,7 +1150,6 @@ vl_api_one_eid_table_vni_dump_t_handler (vl_api_one_eid_table_vni_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
hash_foreach_pair (p, lcm->table_id_by_vni,
({
hash_set (vnis, p->key, 0);
@@ -1186,7 +1164,6 @@ vl_api_one_eid_table_vni_dump_t_handler (vl_api_one_eid_table_vni_dump_t * mp)
({
send_eid_table_vni (p->key, reg, mp->context);
}));
- /* *INDENT-ON* */
hash_free (vnis);
}
@@ -1197,13 +1174,11 @@ vl_api_show_one_status_t_handler (vl_api_show_one_status_t * mp)
vl_api_show_one_status_reply_t *rmp = NULL;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_SHOW_ONE_STATUS_REPLY,
({
rmp->gpe_status = vnet_lisp_gpe_enable_disable_status ();
rmp->feature_status = vnet_lisp_enable_disable_status ();
}));
- /* *INDENT-ON* */
}
static void
@@ -1227,13 +1202,11 @@ static void
tmp_str = format (0, "%s", loc_set->name);
}
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_ONE_GET_MAP_REQUEST_ITR_RLOCS_REPLY,
({
strncpy((char *) rmp->locator_set_name, (char *) tmp_str,
ARRAY_LEN(rmp->locator_set_name) - 1);
}));
- /* *INDENT-ON* */
vec_free (tmp_str);
}
@@ -1270,14 +1243,12 @@ vl_api_show_one_nsh_mapping_t_handler (vl_api_show_one_nsh_mapping_t * mp)
}
vec_add1 (tmp_str, 0);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_SHOW_ONE_NSH_MAPPING_REPLY,
({
rmp->is_set = is_set;
strncpy((char *) rmp->locator_set_name, (char *) tmp_str,
ARRAY_LEN(rmp->locator_set_name) - 1);
}));
- /* *INDENT-ON* */
}
static void
@@ -1313,14 +1284,12 @@ vl_api_show_one_pitr_t_handler (vl_api_show_one_pitr_t * mp)
}
vec_add1 (tmp_str, 0);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_SHOW_ONE_PITR_REPLY,
({
rmp->status = lcm->flags & LISP_FLAG_PITR_MODE;
strncpy((char *) rmp->locator_set_name, (char *) tmp_str,
ARRAY_LEN(rmp->locator_set_name) - 1);
}));
- /* *INDENT-ON* */
}
static void
@@ -1330,12 +1299,10 @@ static void
vl_api_show_one_stats_enable_disable_reply_t *rmp = NULL;
vnet_api_error_t rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_ONE_STATS_ENABLE_DISABLE_REPLY,
({
rmp->is_enable = vnet_lisp_stats_enable_disable_state ();
}));
- /* *INDENT-ON* */
}
static void
@@ -1363,12 +1330,10 @@ vl_api_one_stats_dump_t_handler (vl_api_one_stats_dump_t * mp)
{
vl_api_one_stats_details_t *rmp;
lisp_api_stats_t *stats, *stat;
- u8 rv = 0;
stats = vnet_lisp_get_stats ();
vec_foreach (stat, stats)
{
- /* *INDENT-OFF* */
REPLY_DETAILS (VL_API_ONE_STATS_DETAILS,
({
fid_to_api_eid (&stat->deid, &rmp->deid);
@@ -1381,7 +1346,6 @@ vl_api_one_stats_dump_t_handler (vl_api_one_stats_dump_t * mp)
rmp->pkt_count = clib_host_to_net_u32 (stat->counters.packets);
rmp->bytes = clib_host_to_net_u32 (stat->counters.bytes);
}));
- /* *INDENT-ON* */
}
}
@@ -1441,7 +1405,6 @@ vl_api_one_ndp_bd_get_t_handler (vl_api_one_ndp_bd_get_t * mp)
u32 *bds = vnet_lisp_ndp_bds_get ();
u32 size = hash_elts (bds) * sizeof (u32);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_ONE_NDP_BD_GET_REPLY, size,
{
rmp->count = clib_host_to_net_u32 (hash_elts (bds));
@@ -1450,7 +1413,6 @@ vl_api_one_ndp_bd_get_t_handler (vl_api_one_ndp_bd_get_t * mp)
rmp->bridge_domains[i++] = clib_host_to_net_u32 (p->key);
}));
});
- /* *INDENT-ON* */
hash_free (bds);
}
@@ -1466,7 +1428,6 @@ vl_api_one_l2_arp_bd_get_t_handler (vl_api_one_l2_arp_bd_get_t * mp)
u32 *bds = vnet_lisp_l2_arp_bds_get ();
u32 size = hash_elts (bds) * sizeof (u32);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_ONE_L2_ARP_BD_GET_REPLY, size,
{
rmp->count = clib_host_to_net_u32 (hash_elts (bds));
@@ -1475,7 +1436,6 @@ vl_api_one_l2_arp_bd_get_t_handler (vl_api_one_l2_arp_bd_get_t * mp)
rmp->bridge_domains[i++] = clib_host_to_net_u32 (p->key);
}));
});
- /* *INDENT-ON* */
hash_free (bds);
}
@@ -1493,7 +1453,6 @@ vl_api_one_l2_arp_entries_get_t_handler (vl_api_one_l2_arp_entries_get_t * mp)
entries = vnet_lisp_l2_arp_entries_get_by_bd (bd);
u32 size = vec_len (entries) * sizeof (vl_api_one_l2_arp_entry_t);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_ONE_L2_ARP_ENTRIES_GET_REPLY, size,
{
rmp->count = clib_host_to_net_u32 (vec_len (entries));
@@ -1504,7 +1463,6 @@ vl_api_one_l2_arp_entries_get_t_handler (vl_api_one_l2_arp_entries_get_t * mp)
i++;
}
});
- /* *INDENT-ON* */
vec_free (entries);
}
@@ -1530,12 +1488,10 @@ static void
u32 value = vnet_lisp_map_register_fallback_threshold_get ();
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_ONE_MAP_REGISTER_FALLBACK_THRESHOLD_REPLY,
({
rmp->value = clib_host_to_net_u32 (value);
}));
- /* *INDENT-ON* */
}
static void
@@ -1558,12 +1514,10 @@ static void
int rv = 0;
u8 proto = (u8) vnet_lisp_get_transport_protocol ();
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_ONE_GET_TRANSPORT_PROTOCOL_REPLY,
({
rmp->protocol = proto;
}));
- /* *INDENT-ON* */
}
static void
@@ -1579,7 +1533,6 @@ vl_api_one_ndp_entries_get_t_handler (vl_api_one_ndp_entries_get_t * mp)
entries = vnet_lisp_ndp_entries_get_by_bd (bd);
u32 size = vec_len (entries) * sizeof (vl_api_one_ndp_entry_t);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_ONE_NDP_ENTRIES_GET_REPLY, size,
{
rmp->count = clib_host_to_net_u32 (vec_len (entries));
@@ -1590,7 +1543,6 @@ vl_api_one_ndp_entries_get_t_handler (vl_api_one_ndp_entries_get_t * mp)
i++;
}
});
- /* *INDENT-ON* */
vec_free (entries);
}
@@ -1611,12 +1563,10 @@ vl_api_one_show_xtr_mode_t_handler (vl_api_one_show_xtr_mode_t * mp)
vl_api_one_show_xtr_mode_reply_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_ONE_SHOW_XTR_MODE_REPLY,
{
rmp->is_enable = vnet_lisp_get_xtr_mode ();
});
- /* *INDENT-ON* */
}
static void
@@ -1635,12 +1585,10 @@ vl_api_one_show_pitr_mode_t_handler (vl_api_one_show_pitr_mode_t * mp)
vl_api_one_show_pitr_mode_reply_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_ONE_SHOW_PITR_MODE_REPLY,
{
rmp->is_enable = vnet_lisp_get_pitr_mode ();
});
- /* *INDENT-ON* */
}
static void
@@ -1659,12 +1607,10 @@ vl_api_one_show_petr_mode_t_handler (vl_api_one_show_petr_mode_t * mp)
vl_api_one_show_petr_mode_reply_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_ONE_SHOW_PETR_MODE_REPLY,
{
rmp->is_enable = vnet_lisp_get_petr_mode ();
});
- /* *INDENT-ON* */
}
/*
@@ -1689,12 +1635,10 @@ VLIB_API_INIT_FUNCTION (one_api_hookup);
#include <vlib/unix/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Locator ID Separation Protocol (LISP)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lisp/lisp-cp/one_cli.c b/src/plugins/lisp/lisp-cp/one_cli.c
index b85fea16ea7..8658d5fafd7 100644
--- a/src/plugins/lisp/lisp-cp/one_cli.c
+++ b/src/plugins/lisp/lisp-cp/one_cli.c
@@ -62,13 +62,11 @@ lisp_show_adjacencies_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_adjacencies_command) = {
.path = "show one adjacencies",
.short_help = "show one adjacencies",
.function = lisp_show_adjacencies_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_add_del_map_server_command_fn (vlib_main_t * vm,
@@ -116,13 +114,11 @@ lisp_add_del_map_server_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_map_server_command) = {
.path = "one map-server",
.short_help = "one map-server add|del <ip>",
.function = lisp_add_del_map_server_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -224,14 +220,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_local_eid_command) = {
.path = "one eid-table",
.short_help = "one eid-table add/del [vni <vni>] eid <eid> "
"locator-set <locator-set> [key <secret-key> key-id sha1|sha256 ]",
.function = lisp_add_del_local_eid_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_eid_table_map_command_fn (vlib_main_t * vm,
@@ -271,13 +265,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_eid_table_map_command) = {
.path = "one eid-table map",
.short_help = "one eid-table map [del] vni <vni> vrf <vrf> | bd <bdi>",
.function = lisp_eid_table_map_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_add_del_ndp_entry_command_fn (vlib_main_t * vm,
@@ -335,13 +327,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_ndp_entry_command) = {
.path = "one ndp",
.short_help = "one ndp [del] bd <bd> mac <mac> ip <ipv6>",
.function = lisp_add_del_ndp_entry_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_add_del_l2_arp_entry_command_fn (vlib_main_t * vm,
@@ -399,13 +389,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_l2_arp_entry_command) = {
.path = "one l2 arp",
.short_help = "one l2 arp [del] bd <bd> mac <mac> ip <ipv4>",
.function = lisp_add_del_l2_arp_entry_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_l2_arp_entries_command_fn (vlib_main_t * vm,
@@ -416,7 +404,6 @@ lisp_show_l2_arp_entries_command_fn (vlib_main_t * vm,
lisp_api_l2_arp_entry_t *entries, *e;
hash_pair_t *p;
- /* *INDENT-OFF* */
hash_foreach_pair (p, ht,
({
entries = vnet_lisp_l2_arp_entries_get_by_bd (p->key);
@@ -429,19 +416,16 @@ lisp_show_l2_arp_entries_command_fn (vlib_main_t * vm,
}
vec_free (entries);
}));
- /* *INDENT-ON* */
hash_free (ht);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_l2_arp_entries_command) = {
.path = "show one l2 arp entries",
.short_help = "Show ONE L2 ARP entries",
.function = lisp_show_l2_arp_entries_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_ndp_entries_command_fn (vlib_main_t * vm,
@@ -452,7 +436,6 @@ lisp_show_ndp_entries_command_fn (vlib_main_t * vm,
lisp_api_ndp_entry_t *entries, *e;
hash_pair_t *p;
- /* *INDENT-OFF* */
hash_foreach_pair (p, ht,
({
entries = vnet_lisp_ndp_entries_get_by_bd (p->key);
@@ -465,19 +448,16 @@ lisp_show_ndp_entries_command_fn (vlib_main_t * vm,
}
vec_free (entries);
}));
- /* *INDENT-ON* */
hash_free (ht);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_ndp_entries_command) = {
.path = "show one ndp entries",
.short_help = "Show ONE NDP entries",
.function = lisp_show_ndp_entries_command_fn,
};
-/* *INDENT-ON* */
/**
* Handler for add/del remote mapping CLI.
@@ -613,7 +593,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_remote_mapping_command) = {
.path = "one remote-mapping",
.short_help =
@@ -623,7 +602,6 @@ VLIB_CLI_COMMAND (one_add_del_remote_mapping_command) = {
"[rloc <dst-locator> ... ]",
.function = lisp_add_del_remote_mapping_command_fn,
};
-/* *INDENT-ON* */
/**
* Handler for add/del adjacency CLI.
@@ -721,14 +699,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_adjacency_command) = {
.path = "one adjacency",
.short_help = "one adjacency add|del vni <vni> reid <remote-eid> "
"leid <local-eid>",
.function = lisp_add_del_adjacency_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -770,13 +746,11 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_map_request_mode_command) = {
.path = "one map-request mode",
.short_help = "one map-request mode dst-only|src-dst",
.function = lisp_map_request_mode_command_fn,
};
-/* *INDENT-ON* */
static u8 *
@@ -804,13 +778,11 @@ lisp_show_map_request_mode_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_map_request_mode_command) = {
.path = "show one map-request mode",
.short_help = "show one map-request mode",
.function = lisp_show_map_request_mode_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_map_resolvers_command_fn (vlib_main_t * vm,
@@ -827,13 +799,11 @@ lisp_show_map_resolvers_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_map_resolvers_command) = {
.path = "show one map-resolvers",
.short_help = "show one map-resolvers",
.function = lisp_show_map_resolvers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_nsh_set_locator_set_command_fn (vlib_main_t * vm,
@@ -884,13 +854,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_nsh_set_locator_set_command) = {
.path = "one nsh-mapping",
.short_help = "one nsh-mapping [del] ls <locator-set-name>",
.function = lisp_nsh_set_locator_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_map_register_fallback_threshold_show_command_fn (vlib_main_t * vm,
@@ -904,14 +872,12 @@ lisp_map_register_fallback_threshold_show_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_map_register_fallback_threshold_show_command) = {
.path = "show one map-register fallback-threshold",
.short_help = "show one map-register fallback-threshold",
.function = lisp_map_register_fallback_threshold_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_map_register_fallback_threshold_command_fn (vlib_main_t * vm,
@@ -949,13 +915,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_map_register_fallback_threshold_command) = {
.path = "one map-register fallback-threshold",
.short_help = "one map-register fallback-threshold <count>",
.function = lisp_map_register_fallback_threshold_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_pitr_set_locator_set_command_fn (vlib_main_t * vm,
@@ -1006,13 +970,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_pitr_set_locator_set_command) = {
.path = "one pitr",
.short_help = "one pitr [disable] ls <locator-set-name>",
.function = lisp_pitr_set_locator_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_pitr_command_fn (vlib_main_t * vm,
@@ -1059,13 +1021,11 @@ lisp_show_pitr_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_pitr_command) = {
.path = "show one pitr",
.short_help = "Show pitr",
.function = lisp_show_pitr_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_eid_entry (u8 * s, va_list * args)
@@ -1156,7 +1116,6 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm,
if (print_all)
{
- /* *INDENT-OFF* */
pool_foreach (mapit, lcm->mapping_pool)
{
if (mapit->pitr_set || mapit->nsh_set)
@@ -1172,7 +1131,6 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%U", format_eid_entry, lcm->vnet_main,
lcm, mapit, ls);
}
- /* *INDENT-ON* */
}
else
{
@@ -1200,13 +1158,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_show_eid_table_command) = {
.path = "show one eid-table",
.short_help = "show one eid-table [local|remote|eid <eid>]",
.function = lisp_show_eid_table_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_enable_disable_pitr_mode_command_fn (vlib_main_t * vm,
@@ -1253,13 +1209,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_enable_disable_pitr_mode_command) = {
.path = "one pitr mode",
.short_help = "one pitr mode [enable|disable]",
.function = lisp_enable_disable_pitr_mode_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1307,13 +1261,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_enable_disable_petr_mode_command) = {
.path = "one petr mode",
.short_help = "one petr mode [enable|disable]",
.function = lisp_enable_disable_petr_mode_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_enable_disable_xtr_mode_command_fn (vlib_main_t * vm,
@@ -1360,13 +1312,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_enable_disable_xtr_mode_command) = {
.path = "one xtr mode",
.short_help = "one xtr mode [enable|disable]",
.function = lisp_enable_disable_xtr_mode_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
one_enable_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -1381,13 +1331,11 @@ one_enable_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_enable_command) = {
.path = "one enable",
.short_help = "one enable",
.function = one_enable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
one_disable_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -1402,13 +1350,11 @@ one_disable_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_disable_command) = {
.path = "one disable",
.short_help = "one disable",
.function = one_disable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_map_register_set_ttl_command_fn (vlib_main_t * vm,
@@ -1449,13 +1395,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_map_register_set_ttl_command) = {
.path = "one map-register ttl",
.short_help = "one map-register ttl",
.function = lisp_map_register_set_ttl_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_map_register_show_ttl_command_fn (vlib_main_t * vm,
@@ -1468,14 +1412,12 @@ lisp_map_register_show_ttl_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_map_register_show_ttl_command) = {
.path = "show one map-register ttl",
.short_help = "show one map-register ttl",
.function = lisp_map_register_show_ttl_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_map_register_enable_disable_command_fn (vlib_main_t * vm,
@@ -1522,13 +1464,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_map_register_enable_disable_command) = {
.path = "one map-register",
.short_help = "one map-register [enable|disable]",
.function = lisp_map_register_enable_disable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_rloc_probe_enable_disable_command_fn (vlib_main_t * vm,
@@ -1575,13 +1515,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_rloc_probe_enable_disable_command) = {
.path = "one rloc-probe",
.short_help = "one rloc-probe [enable|disable]",
.function = lisp_rloc_probe_enable_disable_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_lisp_status (u8 * s, va_list * args)
@@ -1602,13 +1540,11 @@ lisp_show_status_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_status_command) = {
.path = "show one status",
.short_help = "show one status",
.function = lisp_show_status_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_eid_table_map_command_fn (vlib_main_t * vm,
@@ -1654,12 +1590,10 @@ lisp_show_eid_table_map_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%=10s%=10s", "VNI", is_l2 ? "BD" : "VRF");
- /* *INDENT-OFF* */
hash_foreach_pair (p, vni_table,
({
vlib_cli_output (vm, "%=10d%=10d", p->key, p->value[0]);
}));
- /* *INDENT-ON* */
done:
unformat_free (line_input);
@@ -1667,13 +1601,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_eid_table_map_command) = {
.path = "show one eid-table map",
.short_help = "show one eid-table map l2|l3",
.function = lisp_show_eid_table_map_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1742,14 +1674,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_add_del_locator_set_command) = {
.path = "one locator-set",
.short_help = "one locator-set add/del <name> [iface <iface-name> "
"p <priority> w <weight>]",
.function = lisp_add_del_locator_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_add_del_locator_in_set_command_fn (vlib_main_t * vm,
@@ -1818,14 +1748,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_add_del_locator_in_set_command) = {
.path = "one locator",
.short_help = "one locator add/del locator-set <name> iface <iface-name> "
"p <priority> w <weight>",
.function = lisp_add_del_locator_in_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_cp_show_locator_sets_command_fn (vlib_main_t * vm,
@@ -1840,7 +1768,6 @@ lisp_cp_show_locator_sets_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%s%=16s%=16s%=16s", "Locator-set", "Locator",
"Priority", "Weight");
- /* *INDENT-OFF* */
pool_foreach (lsit, lcm->locator_set_pool)
{
u8 * msg = 0;
@@ -1872,17 +1799,14 @@ lisp_cp_show_locator_sets_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%v", msg);
vec_free (msg);
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_show_locator_sets_command) = {
.path = "show one locator-set",
.short_help = "Shows locator-sets",
.function = lisp_cp_show_locator_sets_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1936,13 +1860,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_map_resolver_command) = {
.path = "one map-resolver",
.short_help = "one map-resolver add/del <ip_address>",
.function = lisp_add_del_map_resolver_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1991,13 +1913,11 @@ done:
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_map_request_command) = {
.path = "one map-request itr-rlocs",
.short_help = "one map-request itr-rlocs add/del <locator_set_name>",
.function = lisp_add_del_mreq_itr_rlocs_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_mreq_itr_rlocs_command_fn (vlib_main_t * vm,
@@ -2021,13 +1941,11 @@ lisp_show_mreq_itr_rlocs_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_map_request_command) = {
.path = "show one map-request itr-rlocs",
.short_help = "Shows map-request itr-rlocs",
.function = lisp_show_mreq_itr_rlocs_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_use_petr_set_locator_set_command_fn (vlib_main_t * vm,
@@ -2073,7 +1991,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_use_petr_set_locator_set_command) = {
.path = "one use-petr",
.short_help = "one use-petr [disable] <petr-ip>",
@@ -2125,13 +2042,11 @@ lisp_show_petr_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_petr_command) = {
.path = "show one petr",
.short_help = "Show petr",
.function = lisp_show_petr_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_map_servers_command_fn (vlib_main_t * vm,
@@ -2148,13 +2063,11 @@ lisp_show_map_servers_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_map_servers_command) = {
.path = "show one map-servers",
.short_help = "show one map servers",
.function = lisp_show_map_servers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_map_register_state_command_fn (vlib_main_t * vm,
@@ -2170,13 +2083,11 @@ lisp_show_map_register_state_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_map_register_state_command) = {
.path = "show one map-register state",
.short_help = "show one map-register state",
.function = lisp_show_map_register_state_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_rloc_probe_state_command_fn (vlib_main_t * vm,
@@ -2192,13 +2103,11 @@ lisp_show_rloc_probe_state_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_rloc_probe_state_command) = {
.path = "show one rloc state",
.short_help = "show one RLOC state",
.function = lisp_show_rloc_probe_state_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_stats_command_fn (vlib_main_t * vm,
@@ -2210,13 +2119,11 @@ lisp_show_stats_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_stats_command) = {
.path = "show one statistics status",
.short_help = "show ONE statistics enable/disable status",
.function = lisp_show_stats_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_stats_details_command_fn (vlib_main_t * vm,
@@ -2244,13 +2151,11 @@ lisp_show_stats_details_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_stats_details_command) = {
.path = "show one statistics details",
.short_help = "show ONE statistics",
.function = lisp_show_stats_details_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_stats_enable_disable_command_fn (vlib_main_t * vm,
@@ -2282,13 +2187,11 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_stats_enable_disable_command) = {
.path = "one statistics",
.short_help = "enable/disable ONE statistics collecting",
.function = lisp_stats_enable_disable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_stats_flush_command_fn (vlib_main_t * vm,
@@ -2299,13 +2202,11 @@ lisp_stats_flush_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_stats_flush_command) = {
.path = "one statistics flush",
.short_help = "Flush ONE statistics",
.function = lisp_stats_flush_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_one_modes_command_fn (vlib_main_t * vm,
@@ -2323,13 +2224,11 @@ lisp_show_one_modes_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_show_one_modes_modes_command) = {
.path = "show one modes",
.short_help = "show one modes",
.function = lisp_show_one_modes_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lisp/lisp-cp/one_test.c b/src/plugins/lisp/lisp-cp/one_test.c
index 6966122b2b5..475b52de15e 100644
--- a/src/plugins/lisp/lisp-cp/one_test.c
+++ b/src/plugins/lisp/lisp-cp/one_test.c
@@ -26,7 +26,7 @@
/* define message IDs */
#include <lisp/lisp-cp/one.api_enum.h>
#include <lisp/lisp-cp/one.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
@@ -41,13 +41,11 @@ one_test_main_t one_test_main;
#define __plugin_msg_base one_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
#define LISP_PING(_lm, mp_ping) \
if (!(_lm)->ping_id) \
@@ -673,7 +671,6 @@ vl_api_show_one_pitr_reply_t_handler (vl_api_show_one_pitr_reply_t * mp)
vam->result_ready = 1;
}
-/* *INDENT-OFF* */
/** Used for parsing LISP eids */
typedef CLIB_PACKED(struct{
union {
@@ -684,7 +681,6 @@ typedef CLIB_PACKED(struct{
u32 len; /**< prefix length if IP */
u8 type; /**< type of eid */
}) lisp_eid_vat_t;
-/* *INDENT-ON* */
static uword
unformat_lisp_eid_vat (unformat_input_t * input, va_list * args)
diff --git a/src/plugins/lisp/lisp-cp/packets.c b/src/plugins/lisp/lisp-cp/packets.c
index 3f4292b4841..6c36a550ab4 100644
--- a/src/plugins/lisp/lisp-cp/packets.c
+++ b/src/plugins/lisp/lisp-cp/packets.c
@@ -217,7 +217,6 @@ pkt_push_ecm_hdr (vlib_buffer_t * b)
return h;
}
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lisp/lisp-cp/packets.h b/src/plugins/lisp/lisp-cp/packets.h
index 68cd949e75d..c10fdb28c79 100644
--- a/src/plugins/lisp/lisp-cp/packets.h
+++ b/src/plugins/lisp/lisp-cp/packets.h
@@ -27,7 +27,6 @@ void *pkt_push_udp_and_ip (vlib_main_t * vm, vlib_buffer_t * b, u16 sp,
void *pkt_push_ecm_hdr (vlib_buffer_t * b);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lisp/lisp-gpe/decap.c b/src/plugins/lisp/lisp-gpe/decap.c
index 59fd5646ee7..18e32675a32 100644
--- a/src/plugins/lisp/lisp-gpe/decap.c
+++ b/src/plugins/lisp/lisp-gpe/decap.c
@@ -456,7 +456,6 @@ static char *lisp_gpe_ip4_input_error_strings[] = {
#undef lisp_gpe_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_gpe_ip4_input_node) = {
.function = lisp_gpe_ip4_input,
.name = "lisp-gpe-ip4-input",
@@ -476,9 +475,7 @@ VLIB_REGISTER_NODE (lisp_gpe_ip4_input_node) = {
.format_trace = format_lisp_gpe_rx_trace,
// $$$$ .unformat_buffer = unformat_lisp_gpe_header,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_gpe_ip6_input_node) = {
.function = lisp_gpe_ip6_input,
.name = "lisp-gpe-ip6-input",
@@ -498,7 +495,6 @@ VLIB_REGISTER_NODE (lisp_gpe_ip6_input_node) = {
.format_trace = format_lisp_gpe_rx_trace,
// $$$$ .unformat_buffer = unformat_lisp_gpe_header,
};
-/* *INDENT-ON* */
/**
* Adds arc from lisp-gpe-input to nsh-input if nsh-input is available
@@ -556,7 +552,6 @@ static char *lisp_gpe_nsh_placeholder_error_strings[] = {
"lisp gpe placeholder nsh decap",
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_gpe_nsh_placeholder_input_node) = {
.function = lisp_gpe_nsh_placeholder_input,
.name = "lisp-gpe-nsh-placeholder-input",
@@ -571,7 +566,6 @@ VLIB_REGISTER_NODE (lisp_gpe_nsh_placeholder_input_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_add_placeholder_nsh_node_command_fn (vlib_main_t * vm,
@@ -586,12 +580,10 @@ lisp_add_placeholder_nsh_node_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_add_placeholder_nsh_node_command, static) = {
.path = "test one nsh add-placeholder-decap-node",
.function = lisp_add_placeholder_nsh_node_command_fn,
};
-/* *INDENT-ON* */
VLIB_INIT_FUNCTION (gpe_decap_init);
diff --git a/src/plugins/lisp/lisp-gpe/interface.c b/src/plugins/lisp/lisp-gpe/interface.c
index 1d2abaf3f90..ed2b08f9aaf 100644
--- a/src/plugins/lisp/lisp-gpe/interface.c
+++ b/src/plugins/lisp/lisp-gpe/interface.c
@@ -88,12 +88,10 @@ format_lisp_gpe_tx_trace (u8 * s, va_list * args)
*
* @return number of vectors in frame.
*/
-static uword
-lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+VLIB_NODE_FN (lisp_tunnel_output)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
{
u32 n_left_from, next_index, *from, *to_next;
- lisp_gpe_main_t *lgm = &lisp_gpe_main;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -112,7 +110,6 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
const ip_adjacency_t *adj0;
const dpo_id_t *dpo0;
vlib_buffer_t *b0;
- u8 is_v4_0;
bi0 = from[0];
to_next[0] = bi0;
@@ -122,11 +119,7 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
-
- /* Fixup the checksum and len fields in the LISP tunnel encap
- * that was applied at the midchain node */
- is_v4_0 = is_v4_packet (vlib_buffer_get_current (b0));
- ip_udp_fixup_one (lgm->vlib_main, b0, is_v4_0);
+ b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
/* Follow the DPO on which the midchain is stacked */
adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
@@ -151,6 +144,13 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
+VLIB_REGISTER_NODE (lisp_tunnel_output) = {
+ .name = "lisp-tunnel-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lisp_gpe_tx_trace,
+ .sibling_of = "tunnel-output",
+};
+
static u8 *
format_lisp_gpe_name (u8 * s, va_list * args)
{
@@ -158,14 +158,10 @@ format_lisp_gpe_name (u8 * s, va_list * args)
return format (s, "lisp_gpe%d", dev_instance);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (lisp_gpe_device_class) = {
.name = "LISP_GPE",
.format_device_name = format_lisp_gpe_name,
- .format_tx_trace = format_lisp_gpe_tx_trace,
- .tx_function = lisp_gpe_interface_tx,
};
-/* *INDENT-ON* */
u8 *
format_lisp_gpe_header_with_length (u8 * s, va_list * args)
@@ -190,14 +186,12 @@ format_lisp_gpe_header_with_length (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (lisp_gpe_hw_class) = {
.name = "LISP_GPE",
.format_header = format_lisp_gpe_header_with_length,
.build_rewrite = lisp_gpe_build_rewrite,
.update_adjacency = lisp_gpe_update_adjacency,
};
-/* *INDENT-ON* */
typedef struct
@@ -302,14 +296,12 @@ format_l2_lisp_gpe_name (u8 * s, va_list * args)
return format (s, "l2_lisp_gpe%d", dev_instance);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (l2_lisp_gpe_device_class,static) = {
.name = "L2_LISP_GPE",
.format_device_name = format_l2_lisp_gpe_name,
.format_tx_trace = format_l2_lisp_gpe_tx_trace,
.tx_function = l2_lisp_gpe_interface_tx,
};
-/* *INDENT-ON* */
typedef struct
{
@@ -406,14 +398,12 @@ format_nsh_lisp_gpe_name (u8 * s, va_list * args)
return format (s, "nsh_lisp_gpe%d", dev_instance);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (nsh_lisp_gpe_device_class,static) = {
.name = "NSH_LISP_GPE",
.format_device_name = format_nsh_lisp_gpe_name,
.format_tx_trace = format_nsh_lisp_gpe_tx_trace,
.tx_function = nsh_lisp_gpe_interface_tx,
};
-/* *INDENT-ON* */
static vnet_hw_interface_t *
lisp_gpe_create_iface (lisp_gpe_main_t * lgm, u32 vni, u32 dp_table,
@@ -431,7 +421,7 @@ lisp_gpe_create_iface (lisp_gpe_main_t * lgm, u32 vni, u32 dp_table,
if (flen > 0)
{
hw_if_index = lgm->free_tunnel_hw_if_indices[flen - 1];
- _vec_len (lgm->free_tunnel_hw_if_indices) -= 1;
+ vec_dec_len (lgm->free_tunnel_hw_if_indices, 1);
hi = vnet_get_hw_interface (vnm, hw_if_index);
@@ -507,13 +497,11 @@ lisp_gpe_iface_set_table (u32 sw_if_index, u32 table_id)
fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id,
FIB_SOURCE_LISP);
- vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index);
ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
ip4_sw_interface_enable_disable (sw_if_index, 1);
fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id,
FIB_SOURCE_LISP);
- vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
ip6_sw_interface_enable_disable (sw_if_index, 1);
}
@@ -928,13 +916,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (add_del_lisp_gpe_iface_command, static) = {
.path = "gpe iface",
.short_help = "gpe iface add/del vni <vni> vrf <vrf>",
.function = lisp_gpe_add_del_iface_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe.c b/src/plugins/lisp/lisp-gpe/lisp_gpe.c
index 1ae3131323c..7474d0fb6a5 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe.c
@@ -169,7 +169,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_gpe_add_del_fwd_entry_command, static) = {
.path = "gpe entry",
.short_help = "gpe entry add/del vni <vni> vrf/bd <id> [leid <leid>]"
@@ -177,7 +176,6 @@ VLIB_CLI_COMMAND (lisp_gpe_add_del_fwd_entry_command, static) = {
"[negative action <action>]",
.function = lisp_gpe_add_del_fwd_entry_command_fn,
};
-/* *INDENT-ON* */
/** Check if LISP-GPE is enabled. */
u8
@@ -271,13 +269,11 @@ gpe_set_encap_mode_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (gpe_set_encap_mode_command, static) = {
.path = "gpe encap",
.short_help = "gpe encap [lisp|vxlan]",
.function = gpe_set_encap_mode_command_fn,
};
-/* *INDENT-ON* */
/** Format GPE encap mode. */
u8 *
@@ -307,13 +303,11 @@ gpe_show_encap_mode_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (gpe_show_encap_mode_command, static) = {
.path = "show gpe encap",
.short_help = "show GPE encapulation mode",
.function = gpe_show_encap_mode_command_fn,
};
-/* *INDENT-ON* */
/** CLI command to enable/disable LISP-GPE. */
static clib_error_t *
@@ -352,13 +346,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (enable_disable_lisp_gpe_command, static) = {
.path = "gpe",
.short_help = "gpe [enable|disable]",
.function = lisp_gpe_enable_disable_command_fn,
};
-/* *INDENT-ON* */
/** CLI command to show LISP-GPE interfaces. */
static clib_error_t *
@@ -371,31 +363,25 @@ lisp_show_iface_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%=10s%=12s", "vrf", "hw_if_index");
- /* *INDENT-OFF* */
hash_foreach_pair (p, lgm->l3_ifaces.hw_if_index_by_dp_table, ({
vlib_cli_output (vm, "%=10d%=10d", p->key, p->value[0]);
}));
- /* *INDENT-ON* */
if (0 != lgm->l2_ifaces.hw_if_index_by_dp_table)
{
vlib_cli_output (vm, "%=10s%=12s", "bd_id", "hw_if_index");
- /* *INDENT-OFF* */
hash_foreach_pair (p, lgm->l2_ifaces.hw_if_index_by_dp_table, ({
vlib_cli_output (vm, "%=10d%=10d", p->key, p->value[0]);
}));
- /* *INDENT-ON* */
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_iface_command) = {
.path = "show gpe interface",
.short_help = "show gpe interface",
.function = lisp_show_iface_command_fn,
};
-/* *INDENT-ON* */
/** CLI command to show GPE fwd native route path. */
static clib_error_t *
@@ -428,13 +414,11 @@ gpe_show_native_fwd_rpath_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (gpe_show_native_fwd_rpath_command) = {
.path = "show gpe native-forward",
.short_help = "show gpe native-forward",
.function = gpe_show_native_fwd_rpath_command_fn,
};
-/* *INDENT-ON* */
void
gpe_update_native_fwd_path (u8 ip_version)
@@ -578,14 +562,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (gpe_native_forward_command) = {
.path = "gpe native-forward",
.short_help = "gpe native-forward [del] via <nh-ip-addr> [iface] "
"[table <table>]",
.function = gpe_native_forward_command_fn,
};
-/* *INDENT-ON* */
/** Format LISP-GPE status. */
u8 *
@@ -709,13 +691,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_test_nsh_command, static) = {
.path = "test one nsh",
.short_help = "test gpe nsh pcap <path-to-pcap-file>",
.function = lisp_test_nsh_command_fn,
};
-/* *INDENT-ON* */
VLIB_INIT_FUNCTION (lisp_gpe_init);
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe.h b/src/plugins/lisp/lisp-gpe/lisp_gpe.h
index 10dc4fe7aa7..d7e877124a1 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe.h
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe.h
@@ -36,22 +36,18 @@
#include <vppinfra/bihash_template.h>
/** IP4-UDP-LISP encap header */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip4_header_t ip4; /* 20 bytes */
udp_header_t udp; /* 8 bytes */
lisp_gpe_header_t lisp; /* 8 bytes */
}) ip4_udp_lisp_gpe_header_t;
-/* *INDENT-ON* */
/** IP6-UDP-LISP encap header */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip6_header_t ip6; /* 40 bytes */
udp_header_t udp; /* 8 bytes */
lisp_gpe_header_t lisp; /* 8 bytes */
}) ip6_udp_lisp_gpe_header_t;
-/* *INDENT-ON* */
#define foreach_lisp_gpe_ip_input_next \
_(DROP, "error-drop") \
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe_adjacency.c b/src/plugins/lisp/lisp-gpe/lisp_gpe_adjacency.c
index 8d20412a1f2..562b3b5eafb 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe_adjacency.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe_adjacency.c
@@ -285,7 +285,8 @@ lisp_gpe_fixup (vlib_main_t * vm,
/* Fixup the checksum and len fields in the LISP tunnel encap
* that was applied at the midchain node */
- ip_udp_fixup_one (vm, b, is_v4_packet (vlib_buffer_get_current (b)));
+ ip_udp_fixup_one (vm, b, is_v4_packet (vlib_buffer_get_current (b)),
+ UDP_ENCAP_FIXUP_NONE);
}
/**
@@ -317,8 +318,6 @@ lisp_gpe_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
linkt = adj_get_link_type (ai);
af = ADJ_FLAG_MIDCHAIN_IP_STACK;
- if (VNET_LINK_ETHERNET == linkt)
- af |= ADJ_FLAG_MIDCHAIN_NO_COUNT;
adj_nbr_midchain_update_rewrite
(ai, lisp_gpe_fixup, NULL, af,
@@ -558,7 +557,6 @@ lisp_gpe_adjacency_show (vlib_main_t * vm,
}
else
{
- /* *INDENT-OFF* */
pool_foreach (ladj, lisp_adj_pool)
{
vlib_cli_output (vm, "[%d] %U\n",
@@ -566,19 +564,16 @@ lisp_gpe_adjacency_show (vlib_main_t * vm,
format_lisp_gpe_adjacency, ladj,
LISP_GPE_ADJ_FORMAT_FLAG_NONE);
}
- /* *INDENT-ON* */
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_lisp_gpe_tunnel_command, static) =
{
.path = "show gpe adjacency",
.function = lisp_gpe_adjacency_show,
};
-/* *INDENT-ON* */
#define LISP_ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS (256)
#define LISP_ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE (1<<20)
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe_api.c b/src/plugins/lisp/lisp-gpe/lisp_gpe_api.c
index df90ef54403..f77a6f4059f 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe_api.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe_api.c
@@ -212,7 +212,6 @@ vl_api_gpe_fwd_entry_vnis_get_t_handler (vl_api_gpe_fwd_entry_vnis_get_t * mp)
u32 *vnis = vnet_lisp_gpe_get_fwd_entry_vnis ();
u32 size = hash_elts (vnis) * sizeof (u32);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_GPE_FWD_ENTRY_VNIS_GET_REPLY, size,
{
rmp->count = clib_host_to_net_u32 (hash_elts (vnis));
@@ -221,7 +220,6 @@ vl_api_gpe_fwd_entry_vnis_get_t_handler (vl_api_gpe_fwd_entry_vnis_get_t * mp)
rmp->vnis[i++] = clib_host_to_net_u32 (p->key);
}));
});
- /* *INDENT-ON* */
hash_free (vnis);
}
@@ -239,14 +237,12 @@ vl_api_gpe_fwd_entries_get_t_handler (vl_api_gpe_fwd_entries_get_t * mp)
e = vnet_lisp_gpe_fwd_entries_get_by_vni (mp->vni);
size = vec_len (e) * sizeof (vl_api_gpe_fwd_entry_t);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_GPE_FWD_ENTRIES_GET_REPLY, size,
{
rmp->count = vec_len (e);
gpe_fwd_entries_copy (rmp->entries, e);
gpe_fwd_entries_get_reply_t_host_to_net (rmp);
});
- /* *INDENT-ON* */
vec_free (e);
}
@@ -294,12 +290,10 @@ vl_api_gpe_add_del_fwd_entry_t_handler (vl_api_gpe_add_del_fwd_entry_t * mp)
rv = vnet_lisp_gpe_add_del_fwd_entry (a, 0);
vec_free (pairs);
send_reply:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_GPE_ADD_DEL_FWD_ENTRY_REPLY,
{
rmp->fwd_entry_index = clib_host_to_net_u32 (a->fwd_entry_index);
});
- /* *INDENT-ON* */
}
static void
@@ -365,12 +359,10 @@ vl_api_gpe_get_encap_mode_t_handler (vl_api_gpe_get_encap_mode_t * mp)
vl_api_gpe_get_encap_mode_reply_t *rmp;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_GPE_GET_ENCAP_MODE_REPLY,
({
rmp->encap_mode = vnet_gpe_get_encap_mode ();
}));
- /* *INDENT-ON* */
}
static void
@@ -464,7 +456,6 @@ vl_api_gpe_native_fwd_rpaths_get_t_handler (vl_api_gpe_native_fwd_rpaths_get_t
size = vec_len (lgm->native_fwd_rpath[rpath_index])
* sizeof (vl_api_gpe_native_fwd_rpath_t);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_GPE_NATIVE_FWD_RPATHS_GET_REPLY, size,
{
rmp->count = vec_len (lgm->native_fwd_rpath[rpath_index]);
@@ -472,7 +463,6 @@ vl_api_gpe_native_fwd_rpaths_get_t_handler (vl_api_gpe_native_fwd_rpaths_get_t
lgm->native_fwd_rpath[rpath_index]);
gpe_native_fwd_rpaths_get_reply_t_host_to_net (rmp);
});
- /* *INDENT-ON* */
}
/*
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe_fwd_entry.c b/src/plugins/lisp/lisp-gpe/lisp_gpe_fwd_entry.c
index 5f196fb22e3..d0d86d58391 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe_fwd_entry.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe_fwd_entry.c
@@ -1356,7 +1356,6 @@ vnet_lisp_gpe_fwd_entry_flush (void)
lisp_gpe_main_t *lgm = &lisp_gpe_main;
lisp_gpe_fwd_entry_t *lfe;
- /* *INDENT-OFF* */
pool_foreach (lfe, lgm->lisp_fwd_entry_pool)
{
switch (fid_addr_type(&lfe->key->rmt))
@@ -1372,7 +1371,6 @@ vnet_lisp_gpe_fwd_entry_flush (void)
break;
}
}
- /* *INDENT-ON* */
}
static u8 *
@@ -1476,7 +1474,6 @@ lisp_gpe_fwd_entry_show (vlib_main_t * vm,
return (NULL);
}
- /* *INDENT-OFF* */
pool_foreach (lfe, lgm->lisp_fwd_entry_pool)
{
if ((vni == ~0) ||
@@ -1484,18 +1481,15 @@ lisp_gpe_fwd_entry_show (vlib_main_t * vm,
vlib_cli_output (vm, "%U", format_lisp_gpe_fwd_entry, lfe,
LISP_GPE_FWD_ENTRY_FORMAT_NONE);
}
- /* *INDENT-ON* */
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_gpe_fwd_entry_show_command, static) = {
.path = "show gpe entry",
.short_help = "show gpe entry vni <vni> vrf <vrf> [leid <leid>] reid <reid>",
.function = lisp_gpe_fwd_entry_show,
};
-/* *INDENT-ON* */
clib_error_t *
lisp_gpe_fwd_entry_init (vlib_main_t * vm)
@@ -1521,12 +1515,10 @@ vnet_lisp_gpe_get_fwd_entry_vnis (void)
lisp_gpe_fwd_entry_t *lfe;
u32 *vnis = 0;
- /* *INDENT-OFF* */
pool_foreach (lfe, lgm->lisp_fwd_entry_pool)
{
hash_set (vnis, lfe->key->vni, 0);
}
- /* *INDENT-ON* */
return vnis;
}
@@ -1538,7 +1530,6 @@ vnet_lisp_gpe_fwd_entries_get_by_vni (u32 vni)
lisp_gpe_fwd_entry_t *lfe;
lisp_api_gpe_fwd_entry_t *entries = 0, e;
- /* *INDENT-OFF* */
pool_foreach (lfe, lgm->lisp_fwd_entry_pool)
{
if (lfe->key->vni == vni)
@@ -1554,7 +1545,6 @@ vnet_lisp_gpe_fwd_entries_get_by_vni (u32 vni)
vec_add1 (entries, e);
}
}
- /* *INDENT-ON* */
return entries;
}
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe_sub_interface.c b/src/plugins/lisp/lisp-gpe/lisp_gpe_sub_interface.c
index 9c48c0064ca..4ba46c8240f 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe_sub_interface.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe_sub_interface.c
@@ -93,14 +93,12 @@ lisp_gpe_sub_interface_set_table (u32 sw_if_index, u32 table_id)
FIB_SOURCE_LISP);
ASSERT (FIB_NODE_INDEX_INVALID != fib_index);
- vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index);
ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id,
FIB_SOURCE_LISP);
ASSERT (FIB_NODE_INDEX_INVALID != fib_index);
- vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
}
@@ -170,6 +168,8 @@ lisp_gpe_sub_interface_find_or_create_and_lock (const ip_address_t * lrloc,
vnet_sw_interface_set_flags (vnet_get_main (),
l3s->sw_if_index,
VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ vnet_set_interface_l3_output_node (vlib_get_main (), l3s->sw_if_index,
+ (u8 *) "lisp-tunnel-output");
lisp_gpe_sub_interface_db_insert (l3s);
}
@@ -202,6 +202,7 @@ lisp_gpe_sub_interface_unlock (index_t l3si)
lisp_gpe_tenant_l3_iface_unlock (l3s->key->vni);
vnet_sw_interface_set_flags (vnet_get_main (), l3s->sw_if_index, 0);
+ vnet_reset_interface_l3_output_node (vlib_get_main (), l3s->sw_if_index);
vnet_delete_sub_interface (l3s->sw_if_index);
lisp_gpe_sub_interface_db_remove (l3s);
@@ -223,9 +224,7 @@ format_lisp_gpe_sub_interface (u8 * s, va_list * ap)
lisp_gpe_sub_interface_t *l3s = va_arg (*ap, lisp_gpe_sub_interface_t *);
vnet_main_t *vnm = vnet_get_main ();
- s = format (s, "%-16U",
- format_vnet_sw_interface_name,
- vnm, vnet_get_sw_interface (vnm, l3s->sw_if_index));
+ s = format (s, "%-16U", format_vnet_sw_if_index_name, vnm, l3s->sw_if_index);
s = format (s, "%=8d", l3s->key->vni);
s = format (s, "%=15d", l3s->sw_if_index);
s = format (s, "%U", format_ip_address, &l3s->key->local_rloc);
@@ -244,23 +243,19 @@ lisp_gpe_sub_interface_show (vlib_main_t * vm,
vlib_cli_output (vm, "%-16s%=8s%=15s%s", "Name", "VNI", "sw_if_index",
"local RLOC");
- /* *INDENT-OFF* */
pool_foreach (l3s, lisp_gpe_sub_interface_pool)
{
vlib_cli_output (vm, "%U", format_lisp_gpe_sub_interface, l3s);
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_gpe_sub_interface_command) = {
.path = "show gpe sub-interface",
.short_help = "show gpe sub-interface",
.function = lisp_gpe_sub_interface_show,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_gpe_sub_interface_module_init (vlib_main_t * vm)
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe_tenant.c b/src/plugins/lisp/lisp-gpe/lisp_gpe_tenant.c
index b6173b273c0..450c611c5f7 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe_tenant.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe_tenant.c
@@ -262,13 +262,11 @@ lisp_gpe_tenant_flush (void)
{
lisp_gpe_tenant_t *lt;
- /* *INDENT-OFF* */
pool_foreach (lt, lisp_gpe_tenant_pool)
{
lisp_gpe_tenant_l2_iface_unlock(lt->lt_vni);
lisp_gpe_tenant_l3_iface_unlock(lt->lt_vni);
}
- /* *INDENT-ON* */
}
/**
@@ -305,23 +303,19 @@ lisp_gpe_tenant_show (vlib_main_t * vm,
{
lisp_gpe_tenant_t *lt;
- /* *INDENT-OFF* */
pool_foreach (lt, lisp_gpe_tenant_pool)
{
vlib_cli_output (vm, "%U", format_lisp_gpe_tenant, lt);
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_gpe_tenant_command) = {
.path = "show gpe tenant",
.short_help = "show gpe tenant",
.function = lisp_gpe_tenant_show,
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe_test.c b/src/plugins/lisp/lisp-gpe/lisp_gpe_test.c
index 54f7713162a..10167b14975 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe_test.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe_test.c
@@ -26,7 +26,7 @@
/* define message IDs */
#include <lisp/lisp-gpe/lisp_gpe.api_enum.h>
#include <lisp/lisp-gpe/lisp_gpe.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
@@ -41,13 +41,11 @@ lisp_gpe_test_main_t lisp_gpe_test_main;
#define __plugin_msg_base lisp_gpe_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
#define LISP_PING(_lm, mp_ping) \
if (!(_lm)->ping_id) \
@@ -258,7 +256,6 @@ end:
}
-/* *INDENT-OFF* */
/** Used for parsing LISP eids */
typedef CLIB_PACKED(struct{
union {
@@ -269,7 +266,6 @@ typedef CLIB_PACKED(struct{
u32 len; /**< prefix length if IP */
u8 type; /**< type of eid */
}) lisp_eid_vat_t;
-/* *INDENT-ON* */
static uword
unformat_lisp_eid_vat (unformat_input_t * input, va_list * args)
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe_tunnel.c b/src/plugins/lisp/lisp-gpe/lisp_gpe_tunnel.c
index 14ee095d2de..8dca55c4315 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe_tunnel.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe_tunnel.c
@@ -253,24 +253,20 @@ show_lisp_gpe_tunnel_command_fn (vlib_main_t * vm,
}
else
{
- /* *INDENT-OFF* */
pool_foreach (lgt, lisp_gpe_tunnel_pool)
{
vlib_cli_output (vm, "%U", format_lisp_gpe_tunnel, lgt);
}
- /* *INDENT-ON* */
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_lisp_gpe_tunnel_command, static) =
{
.path = "show gpe tunnel",
.function = show_lisp_gpe_tunnel_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_gpe_tunnel_module_init (vlib_main_t * vm)
diff --git a/src/plugins/lisp/test/lisp_cp_test.c b/src/plugins/lisp/test/lisp_cp_test.c
index 228ff32d010..d1908566f8e 100644
--- a/src/plugins/lisp/test/lisp_cp_test.c
+++ b/src/plugins/lisp/test/lisp_cp_test.c
@@ -99,7 +99,6 @@ test_lisp_msg_push_ecm ()
/* clear ip checksum */
clib_memset ((u8 *) ih + 10, 0, 2);
- /* *INDENT-OFF* */
u8 expected_ip4_hdr[] = {
0x45, /* version; IHL */
0x00, /* services */
@@ -112,7 +111,6 @@ test_lisp_msg_push_ecm ()
0xd4, 0xc3, 0xb2, 0xa1, /* src IP */
0x63, 0x72, 0x81, 0x90, /* dst IP */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (ih, expected_ip4_hdr, sizeof (expected_ip4_hdr)));
@@ -120,14 +118,12 @@ test_lisp_msg_push_ecm ()
/* clear udp checksum */
clib_memset ((u8 *) uh + 6, 0, 2);
- /* *INDENT-OFF* */
u8 expected_udp_hdr[] = {
0x00, 0x15, /* src port */
0x00, 0x14, /* dst port */
0x03, 0x8c, /* length */
0x00, 0x00, /* checksum */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (uh, expected_udp_hdr, sizeof (expected_udp_hdr)));
@@ -149,7 +145,6 @@ test_lisp_msg_parse_mapping_record ()
b = clib_mem_alloc (buff_len);
clib_memset ((u8 *) b, 0, buff_len);
- /* *INDENT-OFF* */
u8 map_reply_records[] = {
/* 1. record */
0x01, 0x02, 0x03, 0x04, /* record TTL */
@@ -167,7 +162,6 @@ test_lisp_msg_parse_mapping_record ()
0x00, 0x01, /* Loc-AFI */
0xaa, 0xbb, 0xcc, 0xdd, /* Loator */
};
- /* *INDENT-ON* */
b->current_length = buff_len;
clib_memcpy (b->data, map_reply_records, sizeof (map_reply_records));
@@ -322,7 +316,6 @@ test_lisp_msg_put_mreq_with_lcaf ()
/* clear Nonce to simplify comparison */
clib_memset ((u8 *) h + 4, 0, 8);
- /* *INDENT-OFF* */
u8 expected_data[] =
{
0x10, 0x40, 0x00, 0x01, /* type; flags; IRC; REC count */
@@ -349,7 +342,6 @@ test_lisp_msg_put_mreq_with_lcaf ()
0x00, 0x01, /* EID-prefix-AFI */
0xf0, 0xde, 0xbc, 0x9a, /* EID-prefix */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (expected_data, (u8 *) h, sizeof (expected_data)));
done:
@@ -377,7 +369,6 @@ test_lisp_msg_put_mreq ()
print_map_request (h);
- /* *INDENT-OFF* */
u8 expected_data[50] = {
0x10, 0x40, 0x01, 0x01, /* type; flags; IRC; REC count */
0x00, 0x00, 0x00, 0x00,
@@ -400,7 +391,6 @@ test_lisp_msg_put_mreq ()
0x00, 0x01, /* EID-prefix-AFI */
0xf0, 0xde, 0xbc, 0x9a, /* EID-prefix */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (expected_data, (u8 *) h, sizeof (expected_data)));
@@ -415,7 +405,6 @@ build_test_map_records ()
{
mapping_t *records = 0;
- /* *INDENT-OFF* */
mapping_t r = {
.ttl = MAP_REGISTER_DEFAULT_TTL,
.eid = {
@@ -439,7 +428,6 @@ build_test_map_records ()
}
}
};
- /* *INDENT-ON* */
vec_add1 (r.locators, loc);
vec_add1 (records, r);
@@ -482,7 +470,6 @@ test_lisp_map_register ()
/* clear authentication data */
clib_memset ((u8 *) b->data + 16, 0, 20);
- /* *INDENT-OFF* */
u8 expected_data[] = {
0x30, 0x00, 0x01, 0x01, /* type; rsvd; want notify; REC count */
0x00, 0x00, 0x00, 0x00,
@@ -509,7 +496,6 @@ test_lisp_map_register ()
0x00, 0x04, 0x00, 0x01, /* flags, AFI = ipv4 */
0x66, 0x77, 0x88, 0x99, /* ipv4 locator address */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (expected_data, b->data, sizeof (expected_data)));
done:
@@ -537,20 +523,17 @@ test_lisp_parse_map_reply ()
{
clib_error_t *error = 0;
- /* *INDENT-OFF* */
u8 map_reply_data[] =
{
0x00, 0x00, 0x00, 0x01, /* type; rsvd; mapping count */
0x00, 0x00, 0x00, 0x00,
};
- /* *INDENT-ON* */
vlib_buffer_t *b = create_buffer (map_reply_data, sizeof (map_reply_data));
map_records_arg_t *mrecs = parse_map_reply (b);
_assert (0 == mrecs);
clib_mem_free (b);
- /* *INDENT-OFF* */
u8 map_reply_data2[] =
{
0x00, 0x00, 0x00, 0x01, /* type; rsvd */
@@ -561,7 +544,6 @@ test_lisp_parse_map_reply ()
0x01, 0x02, 0x03, 0x04, /* record TTL */
0x01, /* locator count */
};
- /* *INDENT-ON* */
b = create_buffer (map_reply_data2, sizeof (map_reply_data2));
mrecs = parse_map_reply (b);
@@ -585,7 +567,6 @@ test_lisp_parse_lcaf ()
b = clib_mem_alloc (buff_len);
clib_memset ((u8 *) b, 0, buff_len);
- /* *INDENT-OFF* */
u8 map_reply_records[] =
{
/* 1. record */
@@ -644,7 +625,6 @@ test_lisp_parse_lcaf ()
0x00, 0x01, /* Loc-AFI */
0xaa, 0xbb, 0xcc, 0xdd, /* Loator */
};
- /* *INDENT-ON* */
b->current_length = buff_len;
memcpy (b->data, map_reply_records, sizeof (map_reply_records));
@@ -785,13 +765,11 @@ test_gid_parse_ip_pref ()
gid_address_t _gid_addr, *gid_addr = &_gid_addr;
gid_address_t _gid_addr_copy, *copy = &_gid_addr_copy;
- /* *INDENT-OFF* */
u8 data[] =
{
0x00, 0x01, /* AFI = IPv4 */
0x10, 0xbb, 0xcc, 0xdd, /* ipv4 address */
};
- /* *INDENT-ON* */
u32 len = gid_address_parse (data, gid_addr);
_assert (6 == len);
@@ -808,14 +786,12 @@ test_gid_parse_mac ()
gid_address_t _gid, *gid = &_gid;
gid_address_t _gid_copy, *gid_copy = &_gid_copy;
- /* *INDENT-OFF* */
u8 data[] =
{
0x40, 0x05, /* AFI = MAC address */
0x10, 0xbb, 0xcc, 0xdd, /* MAC */
0x77, 0x99,
};
- /* *INDENT-ON* */
u32 len = gid_address_parse (data, gid);
_assert (8 == len);
@@ -843,7 +819,6 @@ test_gid_write_nsh (void)
u16 len = gid_address_put (b, &g);
- /* *INDENT-OFF* */
u8 expected[] =
{
0x40, 0x03, 0x00, 0x00, /* AFI = LCAF*/
@@ -852,7 +827,6 @@ test_gid_write_nsh (void)
/* Service Path ID, Service index */
0x11, 0x22, 0x33, 0x42, /* SPI, SI */
};
- /* *INDENT-ON* */
_assert (sizeof (expected) == len);
_assert (0 == memcmp (expected, b, len));
@@ -871,7 +845,6 @@ test_gid_parse_nsh ()
clib_memset (gid_addr, 0, sizeof (gid_addr[0]));
clib_memset (copy, 0, sizeof (copy[0]));
- /* *INDENT-OFF* */
u8 data[] =
{
0x40, 0x03, 0x00, 0x00, /* AFI = LCAF*/
@@ -880,7 +853,6 @@ test_gid_parse_nsh ()
/* Service Path ID, Service index */
0x55, 0x99, 0x42, 0x09, /* SPI, SI */
};
- /* *INDENT-ON* */
u32 len = gid_address_parse (data, gid_addr);
_assert (sizeof (data) == len);
@@ -907,7 +879,6 @@ test_gid_parse_lcaf ()
clib_memset (gid_addr, 0, sizeof (gid_addr[0]));
clib_memset (gid_addr_copy, 0, sizeof (gid_addr_copy[0]));
- /* *INDENT-OFF* */
u8 data[] =
{
0x40, 0x03, /* AFI = LCAF*/
@@ -922,7 +893,6 @@ test_gid_parse_lcaf ()
0x00, 0x01, /* AFI = ipv4 */
0x10, 0xbb, 0xcc, 0xdd, /* ipv4 address */
};
- /* *INDENT-ON* */
u32 len = gid_address_parse (data, gid_addr);
_assert (18 == len);
@@ -951,7 +921,6 @@ test_gid_parse_lcaf_complex ()
clib_memset (gid_addr, 0, sizeof (gid_addr[0]));
clib_memset (gid_addr_copy, 0, sizeof (gid_addr_copy[0]));
- /* *INDENT-OFF* */
u8 data[] = {
0x40, 0x03, /* AFI = LCAF */
@@ -988,7 +957,6 @@ test_gid_parse_lcaf_complex ()
0x10, 0xbb, 0xcc, 0xdd,
0x10, 0xbb, 0xcc, 0xdd, /* ipv6 address */
};
- /* *INDENT-ON* */
u32 len = gid_address_parse (data, gid_addr);
_assert (54 == len);
@@ -1056,7 +1024,6 @@ test_write_mac_in_lcaf (void)
u16 len = gid_address_put (b, &g);
- /* *INDENT-OFF* */
u8 expected[] =
{
0x40, 0x03, /* AFI = LCAF */
@@ -1071,7 +1038,6 @@ test_write_mac_in_lcaf (void)
0x01, 0x02, 0x03, 0x04,
0x05, 0x06 /* MAC */
};
- /* *INDENT-ON* */
_assert (sizeof (expected) == len);
_assert (0 == memcmp (expected, b, len));
@@ -1096,14 +1062,12 @@ test_mac_address_write (void)
u16 len = gid_address_put (b, &g);
_assert (8 == len);
- /* *INDENT-OFF* */
u8 expected[] =
{
0x40, 0x05, /* AFI = MAC */
0x01, 0x02, 0x03, 0x04,
0x05, 0x06 /* MAC */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (expected, b, len));
done:
@@ -1118,7 +1082,6 @@ test_src_dst_with_vni_serdes (void)
u8 *b = clib_mem_alloc (500);
clib_memset (b, 0, 500);
- /* *INDENT-OFF* */
fid_address_t src =
{
.type = FID_ADDR_IP_PREF,
@@ -1161,7 +1124,6 @@ test_src_dst_with_vni_serdes (void)
.vni_mask = 0x9
};
- /* *INDENT-ON* */
u16 size_to_put = gid_address_size_to_put (&g);
_assert (36 == size_to_put);
@@ -1170,7 +1132,6 @@ test_src_dst_with_vni_serdes (void)
u16 write_len = gid_address_put (b, &g);
_assert (size_to_put == write_len);
- /* *INDENT-OFF* */
u8 expected_data[] =
{
0x40, 0x03, 0x00, 0x00, /* AFI = LCAF, reserved1, flags */
@@ -1187,7 +1148,6 @@ test_src_dst_with_vni_serdes (void)
0x00, 0x01, /* AFI = ip4 */
0x09, 0x08, 0x00, 0x00, /* destination */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (expected_data, b, sizeof (expected_data)));
@@ -1205,7 +1165,6 @@ test_src_dst_deser_bad_afi (void)
{
clib_error_t *error = 0;
- /* *INDENT-OFF* */
u8 expected_data[] =
{
0x40, 0x03, 0x00, 0x00, /* AFI = LCAF, reserved1, flags */
@@ -1220,7 +1179,6 @@ test_src_dst_deser_bad_afi (void)
0x10, 0x21, 0x32, 0x43,
0x54, 0x65, /* destination */
};
- /* *INDENT-ON* */
gid_address_t p;
_assert (~0 == gid_address_parse (expected_data, &p));
@@ -1265,7 +1223,6 @@ test_src_dst_serdes (void)
u16 write_len = gid_address_put (b, &g);
_assert (size_to_put == write_len);
- /* *INDENT-OFF* */
u8 expected_data[] =
{
0x40, 0x03, 0x00, 0x00, /* AFI = LCAF, reserved1, flags */
@@ -1280,7 +1237,6 @@ test_src_dst_serdes (void)
0x10, 0x21, 0x32, 0x43,
0x54, 0x65, /* destination */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (expected_data, b, sizeof (expected_data)));
@@ -1320,7 +1276,6 @@ test_gid_address_write (void)
u16 write_len = gid_address_put (b, &g);
_assert (18 == write_len);
- /* *INDENT-OFF* */
u8 expected_gid_data[] =
{
0x40, 0x03, /* AFI = LCAF */
@@ -1334,7 +1289,6 @@ test_gid_address_write (void)
0x00, 0x01, /* AFI = IPv4 */
0xdd, 0xcc, 0xbb, 0xaa, /* ipv4 addr */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (expected_gid_data, b, sizeof (expected_gid_data)));
done:
@@ -1413,25 +1367,21 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_cp_command, static) =
{
.path = "test lisp cp",
.short_help = "lisp cp internal unit tests",
.function = lisp_cp_test,
};
-/* *INDENT-ON* */
#include <vlib/unix/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Test Locator ID Separation Protocol (LISP)",
.default_disabled = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lldp/lldp.api b/src/plugins/lldp/lldp.api
index 6be060b0dd1..c5edee7cf39 100644
--- a/src/plugins/lldp/lldp.api
+++ b/src/plugins/lldp/lldp.api
@@ -56,3 +56,79 @@ autoreply define sw_interface_set_lldp
bool enable [default=true];
string port_desc[];
};
+
+/** \brief Dump lldp neighbors
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+autoendian define lldp_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 cursor;
+};
+
+autoendian define lldp_dump_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+
+enum port_id_subtype
+{
+ PORT_ID_SUBTYPE_RESERVED = 0x00,
+ PORT_ID_SUBTYPE_INTF_ALIAS = 0x01,
+ PORT_ID_SUBTYPE_PORT_COMP = 0x02,
+ PORT_ID_SUBTYPE_MAC_ADDR = 0x03,
+ PORT_ID_SUBTYPE_NET_ADDR = 0x04,
+ PORT_ID_SUBTYPE_INTF_NAME = 0x05,
+ PORT_ID_SUBTYPE_AGENT_CIRCUIT_ID = 0x06,
+ PORT_ID_SUBTYPE_LOCAL = 0x07,
+};
+
+enum chassis_id_subtype
+{
+ CHASSIS_ID_SUBTYPE_RESERVED = 0x00,
+ CHASSIS_ID_SUBTYPE_CHASSIS_COMP = 0x01,
+ CHASSIS_ID_SUBTYPE_INTF_ALIAS = 0x02,
+ CHASSIS_ID_SUBTYPE_PORT_COMP = 0x03,
+ CHASSIS_ID_SUBTYPE_MAC_ADDR = 0x04,
+ CHASSIS_ID_SUBTYPE_NET_ADDR = 0x05,
+ CHASSIS_ID_SUBTYPE_INTF_NAME = 0x06,
+ CHASSIS_ID_SUBTYPE_LOCAL = 0x07,
+};
+
+/** \brief Details about neighbor
+ @param context - returned sender context, to match reply w/ request
+ @param sw_if_index - interface where neighbor was discovered
+ @param last_heard - last heard time
+ @param last_sent - last sent time
+ @param chassis_id - chassis id value
+ @param chassis_id_len - length for chassis id
+ @param port_id - port id value
+ @param port_id_len - length for port id
+ @param ttl - time to length for the neighbour
+ @param port_id_subtype - subtype for port_id
+ @param chassis_id_sybtype - sybtype for chassis_id
+*/
+autoendian define lldp_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ f64 last_heard;
+ f64 last_sent;
+ u8 chassis_id[64];
+ u8 chassis_id_len;
+ u8 port_id[64];
+ u8 port_id_len;
+ u16 ttl;
+ vl_api_port_id_subtype_t port_id_subtype;
+ vl_api_chassis_id_subtype_t chassis_id_subtype;
+ option status="in_progress";
+};
+
+service {
+ rpc lldp_dump returns lldp_dump_reply
+ stream lldp_details;
+};
diff --git a/src/plugins/lldp/lldp_api.c b/src/plugins/lldp/lldp_api.c
index 69eab6949c4..bb5d1cbb5e8 100644
--- a/src/plugins/lldp/lldp_api.c
+++ b/src/plugins/lldp/lldp_api.c
@@ -23,6 +23,7 @@
#include <vnet/interface.h>
#include <vnet/api_errno.h>
#include <lldp/lldp.h>
+#include <lldp/lldp_node.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
@@ -42,7 +43,7 @@ static u32 lldp_base_msg_id;
#include <vlibapi/api_helper_macros.h>
static void
-vl_api_lldp_config_t_handler (vl_api_lldp_config_t * mp)
+vl_api_lldp_config_t_handler (vl_api_lldp_config_t *mp)
{
vl_api_lldp_config_reply_t *rmp;
int rv = 0;
@@ -50,8 +51,8 @@ vl_api_lldp_config_t_handler (vl_api_lldp_config_t * mp)
sys_name = vl_api_from_api_to_new_vec (mp, &mp->system_name);
- if (lldp_cfg_set (&sys_name, ntohl (mp->tx_hold), ntohl (mp->tx_interval))
- != lldp_ok)
+ if (lldp_cfg_set (&sys_name, ntohl (mp->tx_hold), ntohl (mp->tx_interval)) !=
+ lldp_ok)
{
vec_free (sys_name);
rv = VNET_API_ERROR_INVALID_VALUE;
@@ -61,7 +62,7 @@ vl_api_lldp_config_t_handler (vl_api_lldp_config_t * mp)
}
static void
-vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t * mp)
+vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t *mp)
{
vl_api_sw_interface_set_lldp_reply_t *rmp;
int rv = 0;
@@ -81,7 +82,7 @@ vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t * mp)
if (ip4.as_u32 != 0)
{
vec_validate (mgmt_ip4, sizeof (ip4_address_t) - 1);
- clib_memcpy (mgmt_ip4, &ip4, vec_len (mgmt_ip4));
+ clib_memcpy (mgmt_ip4, &ip4, sizeof (ip4));
}
ip6_address_decode (mp->mgmt_ip6, &ip6);
@@ -89,7 +90,7 @@ vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t * mp)
if (!ip6_address_is_zero (&ip6))
{
vec_validate (mgmt_ip6, sizeof (ip6_address_t) - 1);
- clib_memcpy (mgmt_ip6, &ip6, vec_len (mgmt_ip6));
+ clib_memcpy (mgmt_ip6, &ip6, sizeof (ip6));
}
if (memcmp (mp->mgmt_oid, no_data, strlen ((char *) mp->mgmt_oid)) != 0)
@@ -100,7 +101,7 @@ vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t * mp)
VALIDATE_SW_IF_INDEX (mp);
- if (lldp_cfg_intf_set (ntohl (mp->sw_if_index), (u8 **) & port_desc,
+ if (lldp_cfg_intf_set (ntohl (mp->sw_if_index), (u8 **) &port_desc,
&mgmt_ip4, &mgmt_ip6, &mgmt_oid,
mp->enable) != lldp_ok)
{
@@ -116,6 +117,41 @@ vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t * mp)
REPLY_MACRO (VL_API_SW_INTERFACE_SET_LLDP_REPLY);
}
+static void
+send_lldp (u32 index, vl_api_registration_t *rp, u32 context)
+{
+ vl_api_lldp_details_t *rmp = 0;
+ vnet_main_t *vnm = &vnet_main;
+ lldp_main_t *lm = &lldp_main;
+ const lldp_intf_t *n = vec_elt_at_index (lm->intfs, index);
+ const vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, n->hw_if_index);
+
+ REPLY_MACRO_DETAILS4_END (
+ VL_API_LLDP_DETAILS, rp, context, ({
+ rmp->sw_if_index = hw->sw_if_index;
+ rmp->last_heard = n->last_heard;
+ rmp->last_sent = n->last_sent;
+ rmp->ttl = n->ttl;
+ rmp->port_id_subtype = (vl_api_port_id_subtype_t) n->port_id_subtype;
+ rmp->chassis_id_subtype =
+ (vl_api_chassis_id_subtype_t) n->chassis_id_subtype;
+ rmp->chassis_id_len = vec_len (n->chassis_id);
+ clib_memcpy (&rmp->chassis_id, n->chassis_id, rmp->chassis_id_len);
+ rmp->port_id_len = vec_len (n->port_id);
+ clib_memcpy (&rmp->port_id, n->port_id, rmp->port_id_len);
+ }));
+}
+
+static void
+vl_api_lldp_dump_t_handler (vl_api_lldp_dump_t *mp)
+{
+ int rv = 0;
+ lldp_main_t *lm = &lldp_main;
+ vl_api_lldp_dump_reply_t *rmp;
+
+ REPLY_AND_DETAILS_MACRO_END (VL_API_LLDP_DUMP_REPLY, lm->intfs,
+ ({ send_lldp (cursor, rp, mp->context); }));
+}
/*
* * lldp_api_hookup
@@ -127,7 +163,7 @@ vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t * mp)
#include <lldp/lldp.api.c>
static clib_error_t *
-lldp_api_hookup (vlib_main_t * vm)
+lldp_api_hookup (vlib_main_t *vm)
{
/*
* Set up the (msg_name, crc, message-id) table
@@ -142,13 +178,10 @@ VLIB_API_INIT_FUNCTION (lldp_api_hookup);
#include <vlib/unix/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
- .version = VPP_BUILD_VER,
- .description = "Link Layer Discovery Protocol (LLDP)",
+ .version = VPP_BUILD_VER,
+ .description = "Link Layer Discovery Protocol (LLDP)",
};
-/* *INDENT-ON* */
-
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lldp/lldp_cli.c b/src/plugins/lldp/lldp_cli.c
index e77d699393c..1ed3efa4251 100644
--- a/src/plugins/lldp/lldp_cli.c
+++ b/src/plugins/lldp/lldp_cli.c
@@ -103,7 +103,7 @@ lldp_cfg_intf_set (u32 hw_if_index, u8 ** port_desc, u8 ** mgmt_ip4,
}
/* Add MAC address to an interface's filter */
- if (hi->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER)
+ if (hi->caps & VNET_HW_IF_CAP_MAC_FILTER)
{
error =
vnet_hw_interface_add_del_mac_address (lm->vnet_main,
@@ -130,7 +130,7 @@ lldp_cfg_intf_set (u32 hw_if_index, u8 ** port_desc, u8 ** mgmt_ip4,
lldp_intf_t *n = lldp_get_intf (lm, hi->sw_if_index);
lldp_delete_intf (lm, n);
/* Remove MAC address from the interface's filter */
- if ((n) && (hi->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER))
+ if ((n) && (hi->caps & VNET_HW_IF_CAP_MAC_FILTER))
{
error =
vnet_hw_interface_add_del_mac_address (lm->vnet_main,
@@ -175,13 +175,13 @@ lldp_intf_cmd (vlib_main_t * vm, unformat_input_t * input,
if (unformat (input, "mgmt-ip4 %U", unformat_ip4_address, &ip4_addr))
{
vec_validate (mgmt_ip4, sizeof (ip4_address_t) - 1);
- clib_memcpy (mgmt_ip4, &ip4_addr, vec_len (mgmt_ip4));
+ clib_memcpy (mgmt_ip4, &ip4_addr, sizeof (ip4_addr));
}
else
if (unformat (input, "mgmt-ip6 %U", unformat_ip6_address, &ip6_addr))
{
vec_validate (mgmt_ip6, sizeof (ip6_address_t) - 1);
- clib_memcpy (mgmt_ip6, &ip6_addr, vec_len (mgmt_ip6));
+ clib_memcpy (mgmt_ip6, &ip6_addr, sizeof (ip6_addr));
}
else if (unformat (input, "mgmt-oid %s", &mgmt_oid))
;
@@ -298,7 +298,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(set_interface_lldp_cmd, static) = {
.path = "set interface lldp",
.short_help = "set interface lldp <interface> | sw_if_index <idx>"
@@ -313,7 +312,6 @@ VLIB_CLI_COMMAND(set_lldp_cmd, static) = {
"[tx-interval <value>]",
.function = lldp_cfg_cmd,
};
-/* *INDENT-ON* */
static const char *
lldp_chassis_id_subtype_str (lldp_chassis_id_subtype_t t)
@@ -580,7 +578,6 @@ format_lldp_intfs_detail (u8 * s, vlib_main_t * vm, const lldp_main_t * lm)
s = format (s, "\nLLDP-enabled interface table:\n");
f64 now = vlib_time_now (vm);
- /* *INDENT-OFF* */
pool_foreach (
n, lm->intfs) {
hw = vnet_get_hw_interface(vnm, n->hw_if_index);
@@ -640,7 +637,6 @@ format_lldp_intfs_detail (u8 * s, vlib_main_t * vm, const lldp_main_t * lm)
now, format_time_ago, n->last_heard, now);
}
}
- /* *INDENT-ON* */
return s;
}
@@ -663,7 +659,6 @@ format_lldp_intfs (u8 * s, va_list * va)
"Peer chassis ID", "Remote port ID", "Last heard", "Last sent",
"Status");
- /* *INDENT-OFF* */
pool_foreach (
n, lm->intfs) {
const vnet_hw_interface_t *hw =
@@ -689,7 +684,6 @@ format_lldp_intfs (u8 * s, va_list * va)
format_time_ago, n->last_sent, now, "inactive");
}
}
- /* *INDENT-ON* */
return s;
}
@@ -710,13 +704,11 @@ show_lldp (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(show_lldp_command, static) = {
.path = "show lldp",
.short_help = "show lldp [detail]",
.function = show_lldp,
};
-/* *INDENT-ON* */
/*
* packet trace format function, very similar to
diff --git a/src/plugins/lldp/lldp_doc.md b/src/plugins/lldp/lldp_doc.md
deleted file mode 100644
index 717de898c4e..00000000000
--- a/src/plugins/lldp/lldp_doc.md
+++ /dev/null
@@ -1,86 +0,0 @@
-# VPP Link Layer Discovery Protocol (LLDP) implementation {#lldp_doc}
-
-This is a memo intended to contain documentation of the VPP LLDP implementation
-Everything that is not directly obvious should come here.
-
-
-## LLDP
-LLDP is a link layer protocol to advertise the capabilities and current status of the system.
-
-There are 2 nodes handling LLDP
-
-1.) input-node which processes incoming packets and updates the local database
-2.) process-node which is responsible for sending out LLDP packets from VPP side
-
-
-### Configuration
-
-LLDP has a global configuration and a per-interface enable setting.
-
-Global configuration is modified using the "set lldp" command
-
-set lldp [system-name <string>] [tx-hold <value>] [tx-interval <value>]
-
-system-name: the name of the VPP system sent to peers in the system-name TLV
-tx-hold: multiplier for tx-interval when setting time-to-live (TTL) value in the LLDP packets (TTL = tx-hold * tx-interval + 1, if TTL > 65535, then TTL = 65535)
-tx-interval: time interval between sending out LLDP packets
-
-Per interface setting is done using the "set interface lldp" command
-
-set interface lldp <interface> | if_index <idx> [port-desc <string>] [disable]
-
-interface: the name of the interface for which to enable/disable LLDP
-if_index: sw interface index can be used if interface name is not used.
-port-desc: port description
-disable: LLDP feature can be enabled or disabled per interface.
-
-### Configuration example
-
-Configure system-name as "VPP" and transmit interval to 10 seconds:
-
-set lldp system-name VPP tx-interval 10
-
-Enable LLDP on interface TenGigabitEthernet5/0/1 with port description
-
-set interface lldp TenGigabitEthernet5/0/1 port-desc vtf:eth0
-
-
-### Operational data
-
-The list of LLDP-enabled interfaces which are up can be shown using "show lldp" command
-
-Example:
-DBGvpp# show lldp
-Local interface Peer chassis ID Remote port ID Last heard Last sent Status
-GigabitEthernet2/0/1 never 27.0s ago inactive
-TenGigabitEthernet5/0/1 8c:60:4f:dd:ca:52 Eth1/3/3 20.1s ago 18.3s ago active
-
-All LLDP configuration data with all LLDP-enabled interfaces can be shown using "show lldp detail" command
-
-Example:
-DBGvpp# show lldp detail
-LLDP configuration:
-Configured system name: vpp
-Configured tx-hold: 4
-Configured tx-interval: 30
-
-LLDP-enabled interface table:
-
-Interface name: GigabitEthernet2/0/1
-Interface/peer state: inactive(timeout)
-Last known peer chassis ID:
-Last known peer port ID:
-Last packet sent: 12.4s ago
-Last packet received: never
-
-Interface name: GigabitEthernet2/0/2
-Interface/peer state: interface down
-Last packet sent: never
-
-Interface name: TenGigabitEthernet5/0/1
-Interface/peer state: active
-Peer chassis ID: 8c:60:4f:dd:ca:52(MAC address)
-Remote port ID: Eth1/3/3(Locally assigned)
-Last packet sent: 3.6s ago
-Last packet received: 5.5s ago
-
diff --git a/src/plugins/lldp/lldp_doc.rst b/src/plugins/lldp/lldp_doc.rst
new file mode 100644
index 00000000000..a6737985aab
--- /dev/null
+++ b/src/plugins/lldp/lldp_doc.rst
@@ -0,0 +1,84 @@
+LLDP Protocol
+=============
+
+This is a memo intended to contain documentation of the VPP LLDP (Link
+Layer Discovery Protocol) implementation Everything that is not directly
+obvious should come here.
+
+LLDP
+----
+
+LLDP is a link layer protocol to advertise the capabilities and current
+status of the system.
+
+There are 2 nodes handling LLDP
+
+1.) input-node which processes incoming packets and updates the local
+database 2.) process-node which is responsible for sending out LLDP
+packets from VPP side
+
+Configuration
+~~~~~~~~~~~~~
+
+LLDP has a global configuration and a per-interface enable setting.
+
+Global configuration is modified using the “set lldp” command
+
+set lldp [system-name ] [tx-hold ] [tx-interval ]
+
+system-name: the name of the VPP system sent to peers in the system-name
+TLV tx-hold: multiplier for tx-interval when setting time-to-live (TTL)
+value in the LLDP packets (TTL = tx-hold \* tx-interval + 1, if TTL >
+65535, then TTL = 65535) tx-interval: time interval between sending out
+LLDP packets
+
+Per interface setting is done using the “set interface lldp” command
+
+set interface lldp \| if_index [port-desc ] [disable]
+
+interface: the name of the interface for which to enable/disable LLDP
+if_index: sw interface index can be used if interface name is not used.
+port-desc: port description disable: LLDP feature can be enabled or
+disabled per interface.
+
+Configuration example
+~~~~~~~~~~~~~~~~~~~~~
+
+Configure system-name as “VPP” and transmit interval to 10 seconds:
+
+set lldp system-name VPP tx-interval 10
+
+Enable LLDP on interface TenGigabitEthernet5/0/1 with port description
+
+set interface lldp TenGigabitEthernet5/0/1 port-desc vtf:eth0
+
+Operational data
+~~~~~~~~~~~~~~~~
+
+The list of LLDP-enabled interfaces which are up can be shown using
+“show lldp” command
+
+Example: DBGvpp# show lldp Local interface Peer chassis ID Remote port
+ID Last heard Last sent Status GigabitEthernet2/0/1 never 27.0s ago
+inactive TenGigabitEthernet5/0/1 8c:60:4f:dd:ca:52 Eth1/3/3 20.1s ago
+18.3s ago active
+
+All LLDP configuration data with all LLDP-enabled interfaces can be
+shown using “show lldp detail” command
+
+Example: DBGvpp# show lldp detail LLDP configuration: Configured system
+name: vpp Configured tx-hold: 4 Configured tx-interval: 30
+
+LLDP-enabled interface table:
+
+Interface name: GigabitEthernet2/0/1 Interface/peer state:
+inactive(timeout) Last known peer chassis ID: Last known peer port ID:
+Last packet sent: 12.4s ago Last packet received: never
+
+Interface name: GigabitEthernet2/0/2 Interface/peer state: interface
+down Last packet sent: never
+
+Interface name: TenGigabitEthernet5/0/1 Interface/peer state: active
+Peer chassis ID: 8c:60:4f:dd:ca:52(MAC address) Remote port ID:
+Eth1/3/3(Locally assigned) Last packet sent: 3.6s ago Last packet
+received: 5.5s ago
diff --git a/src/plugins/lldp/lldp_input.c b/src/plugins/lldp/lldp_input.c
index 327ef10f9de..b8aa846e385 100644
--- a/src/plugins/lldp/lldp_input.c
+++ b/src/plugins/lldp/lldp_input.c
@@ -48,13 +48,13 @@ lldp_rpc_update_peer_cb (const lldp_intf_update_t * a)
if (n->chassis_id)
{
- _vec_len (n->chassis_id) = 0;
+ vec_set_len (n->chassis_id, 0);
}
vec_add (n->chassis_id, chassis_id, a->chassis_id_len);
n->chassis_id_subtype = a->chassis_id_subtype;
if (n->port_id)
{
- _vec_len (n->port_id) = 0;
+ vec_set_len (n->port_id, 0);
}
vec_add (n->port_id, portid, a->portid_len);
n->port_id_subtype = a->portid_subtype;
diff --git a/src/plugins/lldp/lldp_node.c b/src/plugins/lldp/lldp_node.c
index dbb54af91f0..dbbb5d46402 100644
--- a/src/plugins/lldp/lldp_node.c
+++ b/src/plugins/lldp/lldp_node.c
@@ -102,7 +102,6 @@ lldp_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
/*
* lldp input graph node declaration
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(lldp_input_node, static) = {
.function = lldp_node_fn,
.name = "lldp-input",
@@ -120,7 +119,6 @@ VLIB_REGISTER_NODE(lldp_input_node, static) = {
[LLDP_INPUT_NEXT_NORMAL] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* lldp process node function
@@ -220,7 +218,7 @@ lldp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
#endif
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
@@ -230,13 +228,11 @@ lldp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
/*
* lldp process node declaration
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(lldp_process_node, static) = {
.function = lldp_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "lldp-process",
};
-/* *INDENT-ON* */
void
lldp_schedule_intf (lldp_main_t * lm, lldp_intf_t * n)
diff --git a/src/plugins/lldp/lldp_protocol.h b/src/plugins/lldp/lldp_protocol.h
index e641b26e20d..c4219162dfe 100644
--- a/src/plugins/lldp/lldp_protocol.h
+++ b/src/plugins/lldp/lldp_protocol.h
@@ -56,12 +56,10 @@ struct lldp_tlv_head
u8 byte2; /* contains the lower bits of length */
};
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
struct lldp_tlv_head head;
u8 v[0];
}) lldp_tlv_t;
-/* *INDENT-ON* */
lldp_tlv_code_t lldp_tlv_get_code (const lldp_tlv_t * tlv);
void lldp_tlv_set_code (lldp_tlv_t * tlv, lldp_tlv_code_t code);
@@ -89,13 +87,11 @@ typedef enum
#undef F
} lldp_chassis_id_subtype_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
struct lldp_tlv_head head;
u8 subtype;
u8 id[0];
}) lldp_chassis_id_tlv_t;
-/* *INDENT-ON* */
#define foreach_port_id_subtype(F) \
F (0, reserved, "Reserved") \
@@ -118,7 +114,6 @@ typedef enum
#undef F
} lldp_port_id_subtype_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
struct lldp_tlv_head head;
u8 subtype;
@@ -129,7 +124,6 @@ typedef CLIB_PACKED (struct {
struct lldp_tlv_head head;
u16 ttl;
}) lldp_ttl_tlv_t;
-/* *INDENT-ON* */
#endif /* __included_lldp_protocol_h__ */
diff --git a/src/plugins/lldp/lldp_test.c b/src/plugins/lldp/lldp_test.c
index 661487c7835..ba5ecb20260 100644
--- a/src/plugins/lldp/lldp_test.c
+++ b/src/plugins/lldp/lldp_test.c
@@ -38,13 +38,11 @@ lldp_test_main_t lldp_test_main;
#define __plugin_msg_base lldp_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
static int
api_lldp_config (vat_main_t * vam)
@@ -144,4 +142,16 @@ api_sw_interface_set_lldp (vat_main_t * vam)
return ret;
}
+static int
+api_lldp_dump (vat_main_t *vam)
+{
+ return 0;
+}
+
+static void
+vl_api_lldp_dump_reply_t_handler (vl_api_lldp_dump_reply_t *mp)
+{
+ // not yet implemented
+}
+
#include <lldp/lldp.api_test.c>
diff --git a/src/plugins/mactime/CMakeLists.txt b/src/plugins/mactime/CMakeLists.txt
index debf033f71e..7111f415f25 100644
--- a/src/plugins/mactime/CMakeLists.txt
+++ b/src/plugins/mactime/CMakeLists.txt
@@ -38,6 +38,5 @@ if(VPP_BUILD_MACTIME_TOP)
svm
vppinfra
Threads::Threads
- rt m dl crypto
)
endif()
diff --git a/src/plugins/mactime/builtins.c b/src/plugins/mactime/builtins.c
index 321502454a2..c487d0375bf 100644
--- a/src/plugins/mactime/builtins.c
+++ b/src/plugins/mactime/builtins.c
@@ -1,5 +1,4 @@
#include <vnet/vnet.h>
-#include <builtinurl/builtinurl.h>
#include <http_static/http_static.h>
#include <mactime/mactime.h>
#include <vlib/unix/plugin.h>
@@ -15,9 +14,8 @@ mactime_ip_neighbor_copy (index_t ipni, void *ctx)
return (WALK_CONTINUE);
}
-static int
-handle_get_mactime (http_builtin_method_type_t reqtype,
- u8 * request, http_session_t * hs)
+static hss_url_handler_rc_t
+handle_get_mactime (hss_url_handler_args_t *args)
{
mactime_main_t *mm = &mactime_main;
mactime_device_t *dp;
@@ -147,21 +145,20 @@ handle_get_mactime (http_builtin_method_type_t reqtype,
vec_free (macstring);
vec_free (pool_indices);
- hs->data = s;
- hs->data_offset = 0;
- hs->cache_pool_index = ~0;
- hs->free_data = 1;
- return 0;
+ args->data = s;
+ args->data_len = vec_len (s);
+ args->free_vec_data = 1;
+ return HSS_URL_HANDLER_OK;
}
void
mactime_url_init (vlib_main_t * vm)
{
- void (*fp) (void *, char *, int);
+ hss_register_url_fn fp;
/* Look up the builtin URL registration handler */
fp = vlib_get_plugin_symbol ("http_static_plugin.so",
- "http_static_server_register_builtin_handler");
+ "hss_register_url_handler");
if (fp == 0)
{
@@ -169,7 +166,7 @@ mactime_url_init (vlib_main_t * vm)
return;
}
- (*fp) (handle_get_mactime, "mactime.json", HTTP_BUILTIN_METHOD_GET);
+ (*fp) (handle_get_mactime, "mactime.json", HTTP_REQ_GET);
}
/*
diff --git a/src/plugins/mactime/mactime.c b/src/plugins/mactime/mactime.c
index b8b1884119d..933e44ea5c1 100644
--- a/src/plugins/mactime/mactime.c
+++ b/src/plugins/mactime/mactime.c
@@ -28,7 +28,6 @@
#include <mactime/mactime.api_enum.h>
#include <mactime/mactime.api_types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define REPLY_MSG_ID_BASE mm->msg_id_base
#include <vlibapi/api_helper_macros.h>
@@ -141,7 +140,6 @@ mactime_enable_disable_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mactime_enable_disable_command, static) =
{
.path = "mactime enable-disable",
@@ -149,7 +147,6 @@ VLIB_CLI_COMMAND (mactime_enable_disable_command, static) =
"mactime enable-disable <interface-name> [disable]",
.function = mactime_enable_disable_command_fn,
};
-/* *INDENT-ON* */
/** Enable / disable time-base src mac filtration on an interface
@@ -194,7 +191,6 @@ vl_api_mactime_dump_t_handler (vl_api_mactime_dump_t * mp)
goto send_reply;
}
- /* *INDENT-OFF* */
pool_foreach (dev, mm->devices)
{
message_size = sizeof(*ep) + vec_len(dev->device_name) +
@@ -231,15 +227,12 @@ vl_api_mactime_dump_t_handler (vl_api_mactime_dump_t * mp)
ep->device_name [ARRAY_LEN(ep->device_name) -1] = 0;
vl_api_send_msg (rp, (u8 *)ep);
}
- /* *INDENT-OFF* */
send_reply:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_MACTIME_DUMP_REPLY,
({
rmp->table_epoch = clib_host_to_net_u32 (mm->device_table_epoch);
}));
- /* *INDENT-ON* */
}
/** Create a lookup table entry for the indicated mac address
@@ -429,12 +422,10 @@ mactime_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (mactime_init) =
{
.runs_after = VLIB_INITS("ip_neighbor_init"),
};
-/* *INDENT-ON* */
static clib_error_t *
mactime_config (vlib_main_t * vm, unformat_input_t * input)
@@ -462,30 +453,24 @@ mactime_config (vlib_main_t * vm, unformat_input_t * input)
VLIB_CONFIG_FUNCTION (mactime_config, "mactime");
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (mactime, static) =
{
.arc_name = "device-input",
.node_name = "mactime",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (mactime_tx, static) = {
.arc_name = "interface-output",
.node_name = "mactime-tx",
.runs_before = VNET_FEATURES ("interface-output-arc-end"),
};
-/* *INDENT-ON */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Time-based MAC Source Address Filter",
};
-/* *INDENT-ON* */
u8 *
format_bytes_with_width (u8 * s, va_list * va)
@@ -576,12 +561,10 @@ show_mactime_command_fn (vlib_main_t * vm,
if (verbose)
vlib_cli_output (vm, "Time now: %U", format_clib_timebase_time, now);
- /* *INDENT-OFF* */
pool_foreach (dp, mm->devices)
{
vec_add1 (pool_indices, dp - mm->devices);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "%-15s %18s %14s %10s %11s %13s",
"Device Name", "Addresses", "Status",
@@ -637,7 +620,8 @@ show_mactime_command_fn (vlib_main_t * vm,
print:
vec_reset_length (macstring);
- macstring = format (0, "%U", format_mac_address, dp->mac_address);
+ macstring =
+ format (macstring, "%U", format_mac_address, dp->mac_address);
switch (current_status)
{
case 0:
@@ -692,14 +676,12 @@ show_mactime_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_mactime_command, static) =
{
.path = "show mactime",
.short_help = "show mactime [verbose]",
.function = show_mactime_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_mactime_command_fn (vlib_main_t * vm,
@@ -718,14 +700,12 @@ clear_mactime_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_mactime_command, static) =
{
.path = "clear mactime",
.short_help = "clear mactime counters",
.function = clear_mactime_command_fn,
};
-/* *INDENT-ON* */
diff --git a/src/plugins/mactime/mactime_test.c b/src/plugins/mactime/mactime_test.c
index 67655ccefad..5f31dca7012 100644
--- a/src/plugins/mactime/mactime_test.c
+++ b/src/plugins/mactime/mactime_test.c
@@ -281,12 +281,10 @@ api_mactime_dump (vat_main_t * vam)
W (ret);
fformat (vam->ofp, "%U", format_device, 0 /* header */ , 0 /* verbose */ );
- /* *INDENT-OFF* */
pool_foreach (dev, tm->devices)
{
fformat (vam->ofp, "%U", format_device, dev, verbose);
}
- /* *INDENT-ON* */
return ret;
}
diff --git a/src/plugins/mactime/mactime_top.c b/src/plugins/mactime/mactime_top.c
index 72d1964f32f..1517ec43e17 100644
--- a/src/plugins/mactime/mactime_top.c
+++ b/src/plugins/mactime/mactime_top.c
@@ -106,7 +106,6 @@ vl_api_mactime_details_t_handler (vl_api_mactime_details_t * mp)
}
}
-#define vl_print(handle, ...) fformat(handle, __VA_ARGS__)
#define vl_endianfun /* define message structures */
#include <mactime/mactime.api.h>
#undef vl_endianfun
@@ -143,14 +142,11 @@ connect_to_vpp (char *name)
if (mm->msg_id_base == (u16) ~ 0)
return -1;
-#define _(N,n) \
- vl_msg_api_set_handlers((VL_API_##N + mm->msg_id_base), \
- #n, \
- vl_api_##n##_t_handler, \
- vl_noop_handler, \
- vl_api_##n##_t_endian, \
- vl_api_##n##_t_print, \
- sizeof(vl_api_##n##_t), 1);
+#define _(N, n) \
+ vl_msg_api_set_handlers ((VL_API_##N + mm->msg_id_base), #n, \
+ vl_api_##n##_t_handler, vl_api_##n##_t_endian, \
+ vl_api_##n##_t_format, sizeof (vl_api_##n##_t), 1, \
+ vl_api_##n##_t_tojson, vl_api_##n##_t_fromjson);
foreach_mactime_api_msg;
#undef _
@@ -189,18 +185,16 @@ scrape_stats_segment (mt_main_t * mm)
mactime_device_t *dev;
stat_segment_access_t sa;
stat_client_main_t *sm = mm->stat_client_main;
- stat_segment_directory_entry_t *ep;
+ vlib_stats_entry_t *ep;
int need_update2 = 0;
static u32 *pool_indices;
int i, j;
vec_reset_length (pool_indices);
- /* *INDENT-OFF* */
pool_foreach (dev, mm->devices)
{
vec_add1 (pool_indices, dev->pool_index);
}
- /* *INDENT-ON* */
/* Nothing to do... */
if (vec_len (pool_indices) == 0)
@@ -443,13 +437,11 @@ print_device_table (mt_main_t * mm)
{
mactime_device_t *dev;
- fformat (stdout, "%U", format_device, 0 /* header */ , 0 /* verbose */ );
- /* *INDENT-OFF* */
+ fformat (stdout, "%U", format_device, NULL /* header */, 0 /* verbose */);
pool_foreach (dev, mm->devices)
{
fformat (stdout, "%U", format_device, dev, 0 /* verbose */);
}
- /* *INDENT-ON* */
}
int
diff --git a/src/plugins/mactime/node.c b/src/plugins/mactime/node.c
index 465cee380b1..fad487e666e 100644
--- a/src/plugins/mactime/node.c
+++ b/src/plugins/mactime/node.c
@@ -349,7 +349,6 @@ mactime_node_fn (vlib_main_t * vm,
return mactime_node_inline (vm, node, frame, 0 /* is_tx */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (mactime_node) =
{
.function = mactime_node_fn,
@@ -370,7 +369,6 @@ VLIB_REGISTER_NODE (mactime_node) =
[MACTIME_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
static uword
mactime_tx_node_fn (vlib_main_t * vm,
@@ -379,7 +377,6 @@ mactime_tx_node_fn (vlib_main_t * vm,
return mactime_node_inline (vm, node, frame, 1 /* is_tx */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (mactime_tx_node) =
{
.function = mactime_tx_node_fn,
@@ -400,7 +397,6 @@ VLIB_REGISTER_NODE (mactime_tx_node) =
[MACTIME_NEXT_ETHERNET_INPUT] = "ethernet-input", /* notused */
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/map/examples/gen-rules.py b/src/plugins/map/examples/gen-rules.py
index 7964aa9a359..3d98f65b95d 100755
--- a/src/plugins/map/examples/gen-rules.py
+++ b/src/plugins/map/examples/gen-rules.py
@@ -20,38 +20,64 @@ import sys
# map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6
# map add rule index <0> psid <psid> ip6-dst <ip6-dst>
-def_ip4_pfx = '192.0.2.0/24'
-def_ip6_pfx = '2001:db8::/32'
-def_ip6_src = '2001:db8::1'
+def_ip4_pfx = "192.0.2.0/24"
+def_ip6_pfx = "2001:db8::/32"
+def_ip6_src = "2001:db8::1"
def_psid_offset = 6
def_psid_len = 6
def_ea_bits_len = 0
-parser = argparse.ArgumentParser(description='MAP VPP configuration generator')
-parser.add_argument('-t', action="store", dest="mapmode")
-parser.add_argument('-f', action="store", dest="format", default="vpp")
-parser.add_argument('--ip4-prefix', action="store", dest="ip4_pfx", default=def_ip4_pfx)
-parser.add_argument('--ip6-prefix', action="store", dest="ip6_pfx", default=def_ip6_pfx)
-parser.add_argument('--ip6-src', action="store", dest="ip6_src", default=def_ip6_src)
-parser.add_argument('--psid-len', action="store", dest="psid_len", default=def_psid_len)
-parser.add_argument('--psid-offset', action="store", dest="psid_offset", default=def_psid_offset)
-parser.add_argument('--ea-bits-len', action="store", dest="ea_bits_len", default=def_ea_bits_len)
+parser = argparse.ArgumentParser(description="MAP VPP configuration generator")
+parser.add_argument("-t", action="store", dest="mapmode")
+parser.add_argument("-f", action="store", dest="format", default="vpp")
+parser.add_argument("--ip4-prefix", action="store", dest="ip4_pfx", default=def_ip4_pfx)
+parser.add_argument("--ip6-prefix", action="store", dest="ip6_pfx", default=def_ip6_pfx)
+parser.add_argument("--ip6-src", action="store", dest="ip6_src", default=def_ip6_src)
+parser.add_argument("--psid-len", action="store", dest="psid_len", default=def_psid_len)
+parser.add_argument(
+ "--psid-offset", action="store", dest="psid_offset", default=def_psid_offset
+)
+parser.add_argument(
+ "--ea-bits-len", action="store", dest="ea_bits_len", default=def_ea_bits_len
+)
args = parser.parse_args()
+
#
# Print domain
#
def domain_print(i, ip4_pfx, ip6_pfx, ip6_src, eabits_len, psid_offset, psid_len):
- if format == 'vpp':
- print("map add domain ip4-pfx " + ip4_pfx + " ip6-pfx", ip6_pfx, "ip6-src " + ip6_src +
- " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len)
- if format == 'confd':
- print("vpp softwire softwire-instances softwire-instance", i, "br-ipv6 " + ip6_src +
- " ipv6-prefix " + ip6_pfx + " ipv4-prefix " + ip4_pfx +
- " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len)
- if format == 'xml':
+ if format == "vpp":
+ print(
+ "map add domain ip4-pfx " + ip4_pfx + " ip6-pfx",
+ ip6_pfx,
+ "ip6-src " + ip6_src + " ea-bits-len",
+ eabits_len,
+ "psid-offset",
+ psid_offset,
+ "psid-len",
+ psid_len,
+ )
+ if format == "confd":
+ print(
+ "vpp softwire softwire-instances softwire-instance",
+ i,
+ "br-ipv6 "
+ + ip6_src
+ + " ipv6-prefix "
+ + ip6_pfx
+ + " ipv4-prefix "
+ + ip4_pfx
+ + " ea-bits-len",
+ eabits_len,
+ "psid-offset",
+ psid_offset,
+ "psid-len",
+ psid_len,
+ )
+ if format == "xml":
print("<softwire-instance>")
- print("<id>", i, "</id>");
+ print("<id>", i, "</id>")
print(" <br-ipv6>" + ip6_src + "</br-ipv6>")
print(" <ipv6-prefix>" + ip6_pfx + "</ipv6-prefix>")
print(" <ipv4-prefix>" + ip4_pfx + "</ipv4-prefix>")
@@ -59,32 +85,54 @@ def domain_print(i, ip4_pfx, ip6_pfx, ip6_src, eabits_len, psid_offset, psid_len
print(" <psid-len>", psid_len, "</psid-len>")
print(" <psid-offset>", psid_offset, "</psid-offset>")
+
def domain_print_end():
- if format == 'xml':
+ if format == "xml":
print("</softwire-instance>")
+
def rule_print(i, psid, dst):
- if format == 'vpp':
+ if format == "vpp":
print("map add rule index", i, "psid", psid, "ip6-dst", dst)
- if format == 'confd':
+ if format == "confd":
print("binding", psid, "ipv6-addr", dst)
- if format == 'xml':
+ if format == "xml":
print(" <binding>")
print(" <psid>", psid, "</psid>")
print(" <ipv6-addr>", dst, "</ipv6-addr>")
print(" </binding>")
+
#
# Algorithmic mapping Shared IPv4 address
#
-def algo(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
- domain_print(0, ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len)
+def algo(
+ ip4_pfx_str,
+ ip6_pfx_str,
+ ip6_src_str,
+ ea_bits_len,
+ psid_offset,
+ psid_len,
+ ip6_src_ecmp=False,
+):
+ domain_print(
+ 0, ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len
+ )
domain_print_end()
+
#
# 1:1 Full IPv4 address
#
-def lw46(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+def lw46(
+ ip4_pfx_str,
+ ip6_pfx_str,
+ ip6_src_str,
+ ea_bits_len,
+ psid_offset,
+ psid_len,
+ ip6_src_ecmp=False,
+):
ip4_pfx = ipaddress.ip_network(ip4_pfx_str)
ip6_src = ipaddress.ip_address(ip6_src_str)
ip6_dst = ipaddress.ip_network(ip6_pfx_str)
@@ -92,15 +140,26 @@ def lw46(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_l
mod = ip4_pfx.num_addresses / 1024
for i in range(ip4_pfx.num_addresses):
- domain_print(i, str(ip4_pfx[i]) + "/32", str(ip6_dst[i]) + "/128", str(ip6_src), 0, 0, 0)
+ domain_print(
+ i, str(ip4_pfx[i]) + "/32", str(ip6_dst[i]) + "/128", str(ip6_src), 0, 0, 0
+ )
domain_print_end()
if ip6_src_ecmp and not i % mod:
ip6_src = ip6_src + 1
+
#
# 1:1 Shared IPv4 address, shared BR (16) VPP CLI
#
-def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+def lw46_shared(
+ ip4_pfx_str,
+ ip6_pfx_str,
+ ip6_src_str,
+ ea_bits_len,
+ psid_offset,
+ psid_len,
+ ip6_src_ecmp=False,
+):
ip4_pfx = ipaddress.ip_network(ip4_pfx_str)
ip6_src = ipaddress.ip_address(ip6_src_str)
ip6_dst = ipaddress.ip_network(ip6_pfx_str)
@@ -109,7 +168,7 @@ def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset,
for i in range(ip4_pfx.num_addresses):
domain_print(i, str(ip4_pfx[i]) + "/32", "::/0", str(ip6_src), 0, 0, psid_len)
for psid in range(0x1 << int(psid_len)):
- rule_print(i, psid, str(ip6_dst[(i * (0x1<<int(psid_len))) + psid]))
+ rule_print(i, psid, str(ip6_dst[(i * (0x1 << int(psid_len))) + psid]))
domain_print_end()
if ip6_src_ecmp and not i % mod:
ip6_src = ip6_src + 1
@@ -118,7 +177,15 @@ def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset,
#
# 1:1 Shared IPv4 address, shared BR
#
-def lw46_shared_b(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+def lw46_shared_b(
+ ip4_pfx_str,
+ ip6_pfx_str,
+ ip6_src_str,
+ ea_bits_len,
+ psid_offset,
+ psid_len,
+ ip6_src_ecmp=False,
+):
ip4_pfx = ipaddress.ip_network(ip4_pfx_str)
ip6_src = ipaddress.ip_address(ip6_src_str)
ip6_dst = list(ipaddress.ip_network(ip6_pfx_str).subnets(new_prefix=56))
@@ -127,15 +194,16 @@ def lw46_shared_b(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offse
for i in range(ip4_pfx.num_addresses):
domain_print(i, str(ip4_pfx[i]) + "/32", "::/0", str(ip6_src), 0, 0, psid_len)
for psid in range(0x1 << psid_len):
- enduserprefix = list(ip6_dst.pop(0).subnets(new_prefix=64))[255-1]
- rule_print(i, psid, enduserprefix[(i * (0x1<<psid_len)) + psid])
+ enduserprefix = list(ip6_dst.pop(0).subnets(new_prefix=64))[255 - 1]
+ rule_print(i, psid, enduserprefix[(i * (0x1 << psid_len)) + psid])
domain_print_end()
if ip6_src_ecmp and not i % mod:
ip6_src = ip6_src + 1
def xml_header_print():
- print('''
+ print(
+ """
<?xml version="1.0" encoding="UTF-8"?>
<hello xmlns="urn:ietf:params:xml:ns:netconf:base:1.0">
<capabilities>
@@ -156,10 +224,13 @@ def xml_header_print():
<softwire>
<softwire-instances>
- ''')
+ """
+ )
+
def xml_footer_print():
- print('''
+ print(
+ """
</softwire-instances>
</softwire>
</vpp>
@@ -175,12 +246,20 @@ def xml_footer_print():
</rpc>
]]>]]>
- ''')
+ """
+ )
format = args.format
-if format == 'xml':
+if format == "xml":
xml_header_print()
-globals()[args.mapmode](args.ip4_pfx, args.ip6_pfx, args.ip6_src, args.ea_bits_len, args.psid_offset, args.psid_len)
-if format == 'xml':
+globals()[args.mapmode](
+ args.ip4_pfx,
+ args.ip6_pfx,
+ args.ip6_src,
+ args.ea_bits_len,
+ args.psid_offset,
+ args.psid_len,
+)
+if format == "xml":
xml_footer_print()
diff --git a/src/plugins/map/examples/test_map.py b/src/plugins/map/examples/test_map.py
index 7a48964b3f2..f141ba3338c 100755
--- a/src/plugins/map/examples/test_map.py
+++ b/src/plugins/map/examples/test_map.py
@@ -1,128 +1,164 @@
#!/usr/bin/env python3
-import time,argparse,sys,cmd, unittest
+import time, argparse, sys, cmd, unittest
from ipaddress import *
-parser = argparse.ArgumentParser(description='VPP MAP test')
-parser.add_argument('-i', nargs='*', action="store", dest="inputdir")
+parser = argparse.ArgumentParser(description="VPP MAP test")
+parser.add_argument("-i", nargs="*", action="store", dest="inputdir")
args = parser.parse_args()
for dir in args.inputdir:
sys.path.append(dir)
from vpp_papi import *
+
#
# 1:1 Shared IPv4 address, shared BR (16) VPP CLI
#
-def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+def lw46_shared(
+ ip4_pfx_str,
+ ip6_pfx_str,
+ ip6_src_str,
+ ea_bits_len,
+ psid_offset,
+ psid_len,
+ ip6_src_ecmp=False,
+):
ip4_pfx = ip_network(ip4_pfx_str)
ip6_src = ip_address(ip6_src_str)
ip6_dst = ip_network(ip6_pfx_str)
- ip6_nul = IPv6Address(u'0::0')
+ ip6_nul = IPv6Address("0::0")
mod = ip4_pfx.num_addresses / 1024
for i in range(ip4_pfx.num_addresses):
a = time.clock()
- t = map_add_domain(0, ip6_nul.packed, ip4_pfx[i].packed, ip6_src.packed, 0, 32, 128, ea_bits_len, psid_offset, psid_len, 0, 0)
- #print "Return from map_add_domain", t
+ t = map_add_domain(
+ 0,
+ ip6_nul.packed,
+ ip4_pfx[i].packed,
+ ip6_src.packed,
+ 0,
+ 32,
+ 128,
+ ea_bits_len,
+ psid_offset,
+ psid_len,
+ 0,
+ 0,
+ )
+ # print "Return from map_add_domain", t
if t == None:
- print "map_add_domain failed"
+ print("map_add_domain failed")
continue
if t.retval != 0:
- print "map_add_domain failed", t
+ print(f"map_add_domain failed, {t}")
continue
for psid in range(0x1 << int(psid_len)):
- r = map_add_del_rule(0, t.index, 1, (ip6_dst[(i * (0x1<<int(psid_len))) + psid]).packed, psid)
- #print "Return from map_add_del_rule", r
+ r = map_add_del_rule(
+ 0,
+ t.index,
+ 1,
+ (ip6_dst[(i * (0x1 << int(psid_len))) + psid]).packed,
+ psid,
+ )
+ # print "Return from map_add_del_rule", r
if ip6_src_ecmp and not i % mod:
ip6_src = ip6_src + 1
- print "Running time:", time.clock() - a
+ print(f"Running time: {time.clock() - a}")
+
class TestMAP(unittest.TestCase):
- '''
+ """
def test_delete_all(self):
t = map_domain_dump(0)
self.assertNotEqual(t, None)
- print "Number of domains configured: ", len(t)
+ print(f"Number of domains configured: {len(t)}")
for d in t:
ts = map_del_domain(0, d.domainindex)
self.assertNotEqual(ts, None)
t = map_domain_dump(0)
self.assertNotEqual(t, None)
- print "Number of domains configured: ", len(t)
- self.assertEqual(len(t), 0)
+ print(f"Number of domains configured: {len(t)}")
+ self.assertEqual(len(t), 0)/
- '''
+ """
def test_a_million_rules(self):
- ip4_pfx = u'192.0.2.0/24'
- ip6_pfx = u'2001:db8::/32'
- ip6_src = u'2001:db8::1'
+ ip4_pfx = "192.0.2.0/24"
+ ip6_pfx = "2001:db8::/32"
+ ip6_src = "2001:db8::1"
psid_offset = 6
psid_len = 6
ea_bits_len = 0
lw46_shared(ip4_pfx, ip6_pfx, ip6_src, ea_bits_len, psid_offset, psid_len)
+
#
# RX thread, that should sit on blocking vpe_api_read()
-#
+#
#
#
#
import threading
-class RXThread (threading.Thread):
+
+
+class RXThread(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
- print "Starting "
+ print("Starting ")
i = 0
while True:
msg = vpe_api_read()
if msg:
- #print msg
- id = unpack('>H', msg[0:2])
- size = unpack('>H', msg[2:4])
- print "Received", id, "of size", size
+ # print msg
+ id = unpack(">H", msg[0:2])
+ size = unpack(">H", msg[2:4])
+ print(f"Received {id} of size {size}")
i += 1
- #del msg
+ # del msg
continue
- #time.sleep(0.001)
+ # time.sleep(0.001)
return
+
# Create RX thread
rxthread = RXThread()
rxthread.setDaemon(True)
-
-print "Connect", connect_to_vpe("client124")
+
+print(f"Connect {connect_to_vpe('client124')}")
import timeit
+
rxthread.start()
-print "After thread started"
+print("After thread started")
-#pneum_kill_thread()
-print "After thread killed"
+# pneum_kill_thread()
+print("After thread killed")
-#t = show_version(0)
-#print "Result from show version", t
+# t = show_version(0)
+# print "Result from show version", t
-print timeit.timeit('t = show_version(0)', number=1000, setup="from __main__ import show_version")
+print(
+ f"{timeit.timeit('t = show_version(0)', number=1000, setup='from __main__ import show_version')}"
+)
time.sleep(10)
-#print timeit.timeit('control_ping(0)', number=10, setup="from __main__ import control_ping")
+# print timeit.timeit('control_ping(0)', number=10, setup="from __main__ import control_ping")
disconnect_from_vpe()
sys.exit()
-print t.program, t.version,t.builddate,t.builddirectory
+print(f"{t.program} {t.version}{t.builddate}{t.builddirectory}")
-'''
+"""
t = map_domain_dump(0)
if not t:
@@ -131,11 +167,9 @@ if not t:
for d in t:
print("IP6 prefix:",str(IPv6Address(d.ip6prefix)))
print( "IP4 prefix:",str(IPv4Address(d.ip4prefix)))
-'''
+"""
suite = unittest.TestLoader().loadTestsFromTestCase(TestMAP)
unittest.TextTestRunner(verbosity=2).run(suite)
disconnect_from_vpe()
-
-
diff --git a/src/plugins/map/gen-rules.py b/src/plugins/map/gen-rules.py
index e43b8e155be..e804763d0cf 100755
--- a/src/plugins/map/gen-rules.py
+++ b/src/plugins/map/gen-rules.py
@@ -21,87 +21,143 @@ import sys
# map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6
# map add rule index <0> psid <psid> ip6-dst <ip6-dst>
-parser = argparse.ArgumentParser(description='MAP VPP configuration generator')
-parser.add_argument('-t', action="store", dest="mapmode")
+parser = argparse.ArgumentParser(description="MAP VPP configuration generator")
+parser.add_argument("-t", action="store", dest="mapmode")
args = parser.parse_args()
+
#
# 1:1 Shared IPv4 address, shared BR
#
def shared11br():
- ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
- ip6_dst = ipaddress.ip_network('bbbb::/32')
+ ip4_pfx = ipaddress.ip_network("20.0.0.0/16")
+ ip6_dst = ipaddress.ip_network("bbbb::/32")
psid_len = 6
for i in range(ip4_pfx.num_addresses):
- print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1",
- "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ print(
+ "map add domain ip4-pfx "
+ + str(ip4_pfx[i])
+ + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1",
+ "ea-bits-len 0 psid-offset 6 psid-len",
+ psid_len,
+ )
for psid in range(0x1 << psid_len):
- print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+ print(
+ "map add rule index",
+ i,
+ "psid",
+ psid,
+ "ip6-dst",
+ ip6_dst[(i * (0x1 << psid_len)) + psid],
+ )
#
# 1:1 Shared IPv4 address
#
def shared11():
- ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
- ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
- ip6_dst = ipaddress.ip_network('bbbb::/32')
+ ip4_pfx = ipaddress.ip_network("20.0.0.0/16")
+ ip6_src = ipaddress.ip_network("cccc:bbbb::/64")
+ ip6_dst = ipaddress.ip_network("bbbb::/32")
psid_len = 6
for i in range(ip4_pfx.num_addresses):
- print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i],
- "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ print(
+ "map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src",
+ ip6_src[i],
+ "ea-bits-len 0 psid-offset 6 psid-len",
+ psid_len,
+ )
for psid in range(0x1 << psid_len):
- print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+ print(
+ "map add rule index",
+ i,
+ "psid",
+ psid,
+ "ip6-dst",
+ ip6_dst[(i * (0x1 << psid_len)) + psid],
+ )
+
#
# 1:1 Shared IPv4 address small
#
def smallshared11():
- ip4_pfx = ipaddress.ip_network('20.0.0.0/24')
- ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
- ip6_dst = ipaddress.ip_network('bbbb::/32')
+ ip4_pfx = ipaddress.ip_network("20.0.0.0/24")
+ ip6_src = ipaddress.ip_network("cccc:bbbb::/64")
+ ip6_dst = ipaddress.ip_network("bbbb::/32")
psid_len = 6
for i in range(ip4_pfx.num_addresses):
- print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i],
- "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ print(
+ "map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src",
+ ip6_src[i],
+ "ea-bits-len 0 psid-offset 6 psid-len",
+ psid_len,
+ )
for psid in range(0x1 << psid_len):
- print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+ print(
+ "map add rule index",
+ i,
+ "psid",
+ psid,
+ "ip6-dst",
+ ip6_dst[(i * (0x1 << psid_len)) + psid],
+ )
+
#
# 1:1 Full IPv4 address
#
def full11():
- ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
- ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
- ip6_dst = ipaddress.ip_network('bbbb::/32')
+ ip4_pfx = ipaddress.ip_network("20.0.0.0/16")
+ ip6_src = ipaddress.ip_network("cccc:bbbb::/64")
+ ip6_dst = ipaddress.ip_network("bbbb::/32")
psid_len = 0
for i in range(ip4_pfx.num_addresses):
- print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-src", ip6_src[i],
- "ea-bits-len 0 psid-offset 0 psid-len 0")
+ print(
+ "map add domain ip4-pfx "
+ + str(ip4_pfx[i])
+ + "/32 ip6-pfx "
+ + str(ip6_dst[i])
+ + "/128 ip6-src",
+ ip6_src[i],
+ "ea-bits-len 0 psid-offset 0 psid-len 0",
+ )
+
+
def full11br():
- ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
- ip6_dst = ipaddress.ip_network('bbbb::/32')
+ ip4_pfx = ipaddress.ip_network("20.0.0.0/16")
+ ip6_dst = ipaddress.ip_network("bbbb::/32")
psid_len = 0
for i in range(ip4_pfx.num_addresses):
- print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-shared-src cccc:bbbb::1",
- "ea-bits-len 0 psid-offset 0 psid-len 0")
+ print(
+ "map add domain ip4-pfx "
+ + str(ip4_pfx[i])
+ + "/32 ip6-pfx "
+ + str(ip6_dst[i])
+ + "/128 ip6-shared-src cccc:bbbb::1",
+ "ea-bits-len 0 psid-offset 0 psid-len 0",
+ )
+
#
# Algorithmic mapping Shared IPv4 address
#
def algo():
- print("map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ip6-src cccc:bbbb::1 ea-bits-len 16 psid-offset 6 psid-len 8")
- print("map add domain ip4-pfx 20.0.1.0/24 ip6-pfx bbbb:1::/32 ip6-src cccc:bbbb::2 ea-bits-len 8 psid-offset 0 psid-len 0")
+ print(
+ "map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ip6-src cccc:bbbb::1 ea-bits-len 16 psid-offset 6 psid-len 8"
+ )
+ print(
+ "map add domain ip4-pfx 20.0.1.0/24 ip6-pfx bbbb:1::/32 ip6-src cccc:bbbb::2 ea-bits-len 8 psid-offset 0 psid-len 0"
+ )
+
#
# IP4 forwarding
#
def ip4():
- ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip4_pfx = ipaddress.ip_network("20.0.0.0/16")
for i in range(ip4_pfx.num_addresses):
- print("ip route add " + str(ip4_pfx[i]) + "/32 via 172.16.0.2")
+ print("ip route add " + str(ip4_pfx[i]) + "/32 via 172.16.0.2")
globals()[args.mapmode]()
-
-
diff --git a/src/plugins/map/ip4_map.c b/src/plugins/map/ip4_map.c
index 1ab5cc2dc4f..652808e6d37 100644
--- a/src/plugins/map/ip4_map.c
+++ b/src/plugins/map/ip4_map.c
@@ -155,6 +155,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
map_main_t *mm = &map_main;
vlib_combined_counter_main_t *cm = mm->domain_counters;
u32 thread_index = vm->thread_index;
+ u32 *buffer0 = 0;
while (n_left_from > 0)
{
@@ -170,7 +171,6 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
ip6_header_t *ip6h0;
u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP;
u32 map_domain_index0 = ~0;
- u32 *buffer0 = 0;
bool free_original_buffer0 = false;
u32 *frag_from0, frag_left0;
@@ -322,10 +322,10 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
+ vec_free (buffer0);
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_map_feature, static) =
{
.arc_name = "ip4-unicast",
@@ -354,7 +354,6 @@ VLIB_REGISTER_NODE(ip4_map_node) = {
[IP4_MAP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/map/ip4_map_t.c b/src/plugins/map/ip4_map_t.c
index 8ae76f331f6..fe29af458a2 100644
--- a/src/plugins/map/ip4_map_t.c
+++ b/src/plugins/map/ip4_map_t.c
@@ -56,7 +56,6 @@ typedef enum
//This is used to pass information within the buffer data.
//Buffer structure being too small to contain big structures like this.
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip6_address_t daddr;
ip6_address_t saddr;
@@ -64,7 +63,6 @@ typedef CLIB_PACKED (struct {
//sizeof(ip6) + sizeof(ip_frag) - sizeof(ip4)
u8 unused[28];
}) ip4_mapt_pseudo_header_t;
-/* *INDENT-ON* */
typedef struct
{
@@ -684,7 +682,6 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_map_t_feature, static) = {
.arc_name = "ip4-unicast",
.node_name = "ip4-map-t",
@@ -710,9 +707,7 @@ VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = {
[IP4_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = {
.function = ip4_map_t_icmp,
.name = "ip4-map-t-icmp",
@@ -731,9 +726,7 @@ VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = {
[IP4_MAPT_ICMP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = {
.function = ip4_map_t_tcp_udp,
.name = "ip4-map-t-tcp-udp",
@@ -752,9 +745,7 @@ VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = {
[IP4_MAPT_TCP_UDP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip4_map_t_node) = {
.function = ip4_map_t,
.name = "ip4-map-t",
@@ -774,7 +765,6 @@ VLIB_REGISTER_NODE(ip4_map_t_node) = {
[IP4_MAPT_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/map/ip6_map.c b/src/plugins/map/ip6_map.c
index 1193dda0a80..3d9b21dfcd9 100644
--- a/src/plugins/map/ip6_map.c
+++ b/src/plugins/map/ip6_map.c
@@ -803,7 +803,6 @@ ip6_map_icmp_relay (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip6_map_feature, static) =
{
.arc_name = "ip6-unicast",
@@ -836,9 +835,7 @@ VLIB_REGISTER_NODE(ip6_map_node) = {
[IP6_MAP_NEXT_ICMP] = "ip6-icmp-error",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip6_map_post_ip4_reass_node) = {
.function = ip6_map_post_ip4_reass,
.name = "ip6-map-post-ip4-reass",
@@ -854,9 +851,7 @@ VLIB_REGISTER_NODE(ip6_map_post_ip4_reass_node) = {
[IP6_MAP_POST_IP4_REASS_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = {
.function = ip6_map_icmp_relay,
.name = "ip6-map-icmp-relay",
@@ -871,7 +866,6 @@ VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = {
[IP6_ICMP_RELAY_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
clib_error_t *
ip6_map_init (vlib_main_t * vm)
diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c
index 861c049b0f4..51853d619e6 100644
--- a/src/plugins/map/ip6_map_t.c
+++ b/src/plugins/map/ip6_map_t.c
@@ -529,7 +529,10 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
ip60 = vlib_buffer_get_current (p0);
d0 =
- ip6_map_get_domain (&ip60->dst_address,
+ /* Originally using the IPv6 dest for rule lookup, now source
+ * [dgeist] ip6_map_get_domain (&ip60->dst_address,
+ */
+ ip6_map_get_domain (&ip60->src_address,
&vnet_buffer (p0)->map_t.map_domain_index,
&error0);
if (!d0)
@@ -687,7 +690,6 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip6_map_t_fragmented_node) = {
.function = ip6_map_t_fragmented,
.name = "ip6-map-t-fragmented",
@@ -707,9 +709,7 @@ VLIB_REGISTER_NODE(ip6_map_t_fragmented_node) = {
[IP6_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip6_map_t_icmp_node) = {
.function = ip6_map_t_icmp,
.name = "ip6-map-t-icmp",
@@ -729,9 +729,7 @@ VLIB_REGISTER_NODE(ip6_map_t_icmp_node) = {
[IP6_MAPT_ICMP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip6_map_t_tcp_udp_node) = {
.function = ip6_map_t_tcp_udp,
.name = "ip6-map-t-tcp-udp",
@@ -751,9 +749,7 @@ VLIB_REGISTER_NODE(ip6_map_t_tcp_udp_node) = {
[IP6_MAPT_TCP_UDP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip6_map_t_feature, static) = {
.arc_name = "ip6-unicast",
.node_name = "ip6-map-t",
@@ -781,7 +777,6 @@ VLIB_REGISTER_NODE(ip6_map_t_node) = {
[IP6_MAPT_NEXT_ICMP] = "ip6-icmp-error",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/map/lpm.c b/src/plugins/map/lpm.c
index c0e5bad1417..a2fc3337167 100644
--- a/src/plugins/map/lpm.c
+++ b/src/plugins/map/lpm.c
@@ -28,7 +28,13 @@ masked_address32 (uint32_t addr, uint8_t len)
static uint64_t
masked_address64 (uint64_t addr, uint8_t len)
{
- return len == 64 ? addr : addr & ~(~0ull >> len);
+ /* This was originally causing non-64-bit masks to not match due to LSB vs
+ * MSB masking (0s at the head of the value) Probably needs some corner case
+ * checking in case my masking logic was off [dgeist]
+ *
+ * return len == 64 ? addr : addr & ~(~0ull >> len);
+ */
+ return len == 64 ? addr : addr & ((1ull << (len)) - 1);
}
static void
@@ -126,13 +132,25 @@ lpm_128_add (lpm_t *lpm, void *addr_v, u8 pfxlen, u32 value)
BVT(clib_bihash_kv) kv;
ip6_address_t *addr = addr_v;
- kv.key[0] = masked_address64(addr->as_u64[0], pfxlen > 64 ? 64 : pfxlen);
+ /* This is a quick hack. It works for pfxlen < 64 but needs validation for
+ * other [dgeist]
+ *
+ * kv.key[0] = masked_address64(addr->as_u64[0], pfxlen > 64 ? 64 : pfxlen);
+ */
+ kv.key[0] = masked_address64 (addr->as_u64[0], pfxlen > 64 ? 64 : 64);
kv.key[1] = masked_address64(addr->as_u64[1], pfxlen > 64 ? pfxlen - 64 : 0);
kv.key[2] = pfxlen;
kv.value = value;
BV(clib_bihash_add_del)(&lpm->bihash, &kv, 1);
lpm->prefix_length_refcount[pfxlen]++;
- lpm->prefix_lengths_bitmap = clib_bitmap_set (lpm->prefix_lengths_bitmap, 128 - pfxlen, 1);
+ /* Populating the lengths bitmap table with prefix of 48 instead of 80
+ * (128 - 48) [dgeist]
+ *
+ * lpm->prefix_lengths_bitmap = clib_bitmap_set (
+ * lpm->prefix_lengths_bitmap, 128 - pfxlen, 1);
+ */
+ lpm->prefix_lengths_bitmap = clib_bitmap_set (
+ lpm->prefix_lengths_bitmap, pfxlen > 64 ? 128 - pfxlen : pfxlen, 1);
}
static void
@@ -148,8 +166,8 @@ lpm_128_delete (lpm_t *lpm, void *addr_v, u8 pfxlen)
/* refcount accounting */
ASSERT (lpm->prefix_length_refcount[pfxlen] > 0);
if (--lpm->prefix_length_refcount[pfxlen] == 0) {
- lpm->prefix_lengths_bitmap = clib_bitmap_set (lpm->prefix_lengths_bitmap,
- 128 - pfxlen, 0);
+ lpm->prefix_lengths_bitmap =
+ clib_bitmap_set (lpm->prefix_lengths_bitmap, 128 - pfxlen, 0);
}
}
diff --git a/src/plugins/map/map.c b/src/plugins/map/map.c
index 938793c8947..3cffadd39e8 100644
--- a/src/plugins/map/map.c
+++ b/src/plugins/map/map.c
@@ -176,6 +176,10 @@ map_create_domain (ip4_address_t * ip4_prefix,
mm->ip6_src_prefix_tbl->add (mm->ip6_src_prefix_tbl, &d->ip6_src,
d->ip6_src_len, *map_domain_index);
+ /* Let's build a table with the MAP rule ip6 prefixes as well [dgeist] */
+ mm->ip6_prefix_tbl->add (mm->ip6_prefix_tbl, &d->ip6_prefix,
+ d->ip6_prefix_len, *map_domain_index);
+
/* Validate packet/byte counters */
map_domain_counter_lock (mm);
int i;
@@ -218,6 +222,9 @@ map_delete_domain (u32 map_domain_index)
d->ip4_prefix_len);
mm->ip6_src_prefix_tbl->delete (mm->ip6_src_prefix_tbl, &d->ip6_src,
d->ip6_src_len);
+ /* Addition to remove the new table [dgeist] */
+ mm->ip6_prefix_tbl->delete (mm->ip6_prefix_tbl, &d->ip6_prefix,
+ d->ip6_prefix_len);
/* Release user-assigned MAP domain name. */
map_free_extras (map_domain_index);
@@ -979,10 +986,8 @@ show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input,
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
{
- /* *INDENT-OFF* */
pool_foreach (d, mm->domains)
{vlib_cli_output(vm, "%U", format_map_domain, d, counters);}
- /* *INDENT-ON* */
return 0;
}
@@ -1008,10 +1013,8 @@ show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (map_domain_index == ~0)
{
- /* *INDENT-OFF* */
pool_foreach (d, mm->domains)
{vlib_cli_output(vm, "%U", format_map_domain, d, counters);}
- /* *INDENT-ON* */
}
else
{
@@ -1062,7 +1065,6 @@ show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
- /* *INDENT-OFF* */
pool_foreach (d, mm->domains) {
if (d->rules) {
rulecount+= 0x1 << d->psid_length;
@@ -1071,7 +1073,6 @@ show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input,
domains += sizeof(*d);
domaincount++;
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "MAP domains structure: %d\n", sizeof (map_domain_t));
vlib_cli_output (vm, "MAP domains: %d (%d bytes)\n", domaincount, domains);
@@ -1255,7 +1256,6 @@ done:
}
-/* *INDENT-OFF* */
/*?
* Set or copy the IP TOS/Traffic Class field
@@ -1312,7 +1312,7 @@ VLIB_CLI_COMMAND(map_pre_resolve_command, static) = {
/*?
* Enable or disable the MAP-E inbound security check
- * Specifiy if the inbound security check should be done on fragments
+ * Specify if the inbound security check should be done on fragments
*
* @cliexpar
* @cliexstart{map params security-check}
@@ -1333,9 +1333,8 @@ VLIB_CLI_COMMAND(map_security_check_command, static) = {
.function = map_security_check_command_fn,
};
-
/*?
- * Specifiy the IPv4 source address used for relayed ICMP error messages
+ * Specify the IPv4 source address used for relayed ICMP error messages
*
* @cliexpar
* @cliexstart{map params icmp source-address}
@@ -1470,7 +1469,6 @@ VLIB_PLUGIN_REGISTER() = {
.description = "Mapping of Address and Port (MAP)",
};
-/* *INDENT-ON* */
/*
* map_init
diff --git a/src/plugins/map/map.h b/src/plugins/map/map.h
index d874aa47b3a..86b8ec22df4 100644
--- a/src/plugins/map/map.h
+++ b/src/plugins/map/map.h
@@ -335,7 +335,11 @@ ip6_map_get_domain (ip6_address_t * addr, u32 * map_domain_index, u8 * error)
{
map_main_t *mm = &map_main;
u32 mdi =
- mm->ip6_src_prefix_tbl->lookup (mm->ip6_src_prefix_tbl, addr, 128);
+ /* This is the old src (ip6 destination) hash lookup [dgeist]
+ *
+ * mm->ip6_src_prefix_tbl->lookup (mm->ip6_src_prefix_tbl, addr, 128);
+ */
+ mm->ip6_prefix_tbl->lookup (mm->ip6_prefix_tbl, addr, 128);
if (mdi == ~0)
{
*error = MAP_ERROR_NO_DOMAIN;
diff --git a/src/plugins/map/map_api.c b/src/plugins/map/map_api.c
index f81216dddbd..1dbff4ca0d1 100644
--- a/src/plugins/map/map_api.c
+++ b/src/plugins/map/map_api.c
@@ -50,13 +50,11 @@ vl_api_map_add_domain_t_handler (vl_api_map_add_domain_t * mp)
mp->ip6_src.len, mp->ea_bits_len, mp->psid_offset,
mp->psid_length, &index, mp->mtu, flags, mp->tag);
- /* *INDENT-OFF* */
REPLY_MACRO2_END(VL_API_MAP_ADD_DOMAIN_REPLY,
({
rmp->index = index;
}));
- /* *INDENT-ON* */
}
static void
@@ -98,7 +96,6 @@ send_domain_details (u32 map_domain_index, vl_api_registration_t * rp,
vec_elt_at_index (mm->domain_extras, map_domain_index);
int tag_len = clib_min (ARRAY_LEN (rmp->tag), vec_len (de->tag) + 1);
- /* *INDENT-OFF* */
REPLY_MACRO_DETAILS4(VL_API_MAP_DOMAIN_DETAILS, rp, context,
({
rmp->domain_index = htonl (map_domain_index);
@@ -119,7 +116,6 @@ send_domain_details (u32 map_domain_index, vl_api_registration_t * rp,
memcpy (rmp->tag, de->tag, tag_len - 1);
rmp->tag[tag_len - 1] = '\0';
}));
- /* *INDENT-ON* */
}
static void
@@ -136,12 +132,10 @@ vl_api_map_domain_dump_t_handler (vl_api_map_domain_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach_index (i, mm->domains)
{
send_domain_details(i, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -152,12 +146,10 @@ vl_api_map_domains_get_t_handler (vl_api_map_domains_get_t * mp)
i32 rv = 0;
- /* *INDENT-OFF* */
REPLY_AND_DETAILS_MACRO (VL_API_MAP_DOMAINS_GET_REPLY, mm->domains,
({
send_domain_details (cursor, rp, mp->context);
}));
- /* *INDENT-ON* */
}
static void
diff --git a/src/plugins/map/map_doc.md b/src/plugins/map/map_doc.md
deleted file mode 100644
index f3e2a56706d..00000000000
--- a/src/plugins/map/map_doc.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# VPP MAP and Lw4o6 implementation {#map_doc}
-
-This is a memo intended to contain documentation of the VPP MAP and Lw4o6 implementations.
-Everything that is not directly obvious should come here.
-
-
-
-## MAP-E Virtual Reassembly
-
-The MAP-E implementation supports handling of IPv4 fragments as well as IPv4-in-IPv6 inner and outer fragments. This is called virtual reassembly because the fragments are not actually reassembled. Instead, some meta-data are kept about the first fragment and reused for subsequent fragments.
-
-Fragment caching and handling is not always necessary. It is performed when:
-* An IPv4 fragment is received and the destination IPv4 address is shared.
-* An IPv6 packet is received with an inner IPv4 fragment, the IPv4 source address is shared, and 'security-check fragments' is on.
-* An IPv6 fragment is received.
-
-There are 3 dedicated nodes:
-* ip4-map-reass
-* ip6-map-ip4-reass
-* ip6-map-ip6-reass
-
-ip4-map sends all fragments to ip4-map-reass.
-ip6-map sends all inner-fragments to ip6-map-ip4-reass.
-ip6-map sends all outer-fragments to ip6-map-ip6-reass.
-
-IPv4 (resp. IPv6) virtual reassembly makes use of a hash table in order to store IPv4 (resp. IPv6) reassembly structures. The hash-key is based on the IPv4-src:IPv4-dst:Frag-ID:Protocol tuple (resp. IPv6-src:IPv6-dst:Frag-ID tuple, as the protocol is IPv4-in-IPv6). Therefore, each packet reassembly makes use of exactly one reassembly structure. When such a structure is allocated, it is timestamped with the current time. Finally, those structures are capable of storing a limited number of buffer indexes.
-
-An IPv4 (resp. IPv6) reassembly structure can cache up to MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY (resp. MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) buffers. Buffers are cached until the first fragment is received.
-
-#### Virtual Reassembly configuration
-
-IPv4 and IPv6 virtual reassembly support the following configuration:
- map params reassembly [ip4 | ip6] [lifetime <lifetime-ms>] [pool-size <pool-size>] [buffers <buffers>] [ht-ratio <ht-ratio>]
-
-lifetime:
- The time in milliseconds a reassembly structure is considered valid. The longer, the more reliable is reassembly, but the more likely it is to exhaust the pool of reassembly structures. IPv4 standard suggests a lifetime of 15 seconds. IPv6 specifies a lifetime of 60 seconds. Those values are not realistic for high-throughput cases.
-
-buffers:
- The upper limit of buffers that are allowed to be cached. It can be used to protect against fragmentation attacks which would aim to exhaust the global buffers pool.
-
-pool-size:
- The number of reassembly structures that can be allocated. As each structure can store a small fixed number of fragments, it also sets an upper-bound of 'pool-size * MAP_IPX_REASS_MAX_FRAGMENTS_PER_REASSEMBLY' buffers that can be cached in total.
-
-ht-ratio:
- The amount of buckets in the hash-table is pool-size * ht-ratio.
-
-
-Any time pool-size and ht-ratio is modified, the hash-table is destroyed and created again, which means all current state is lost.
-
-
-##### Additional considerations
-
-Reassembly at high rate is expensive in terms of buffers. There is a trade-off between the lifetime and number of allocated buffers. Reducing the lifetime helps, but at the cost of loosing state for fragments that are wide appart.
-
-Let:
-R be the packet rate at which fragments are received.
-F be the number of fragments per packet.
-
-Assuming the first fragment is always received last. We should have:
-buffers > lifetime * R / F * (F - 1)
-pool-size > lifetime * R/F
-
-This is a worst case. Receiving the first fragment earlier helps reducing the number of required buffers. Also, an optimization is implemented (MAP_IP6_REASS_COUNT_BYTES and MAP_IP4_REASS_COUNT_BYTES) which counts the number of transmitted bytes and remembers the total number of bytes which should be transmitted based on the last fragment, and therefore helps reducing 'pool-size'.
-
-But the formula shows that it is challenging to forward a significant amount of fragmented packets at high rates. For instance, with a lifetime of 1 second, 5Mpps packet rate would require buffering up to 2.5 millions fragments.
-
-If you want to do that, be prepared to configure a lot of fragments.
-
-
diff --git a/src/plugins/map/map_doc.rst b/src/plugins/map/map_doc.rst
new file mode 100644
index 00000000000..663e815d545
--- /dev/null
+++ b/src/plugins/map/map_doc.rst
@@ -0,0 +1,99 @@
+MAP and Lw4o6
+=============
+
+This is a memo intended to contain documentation of the VPP MAP and
+Lw4o6 implementations. Everything that is not directly obvious should
+come here.
+
+MAP-E Virtual Reassembly
+------------------------
+
+The MAP-E implementation supports handling of IPv4 fragments as well as
+IPv4-in-IPv6 inner and outer fragments. This is called virtual
+reassembly because the fragments are not actually reassembled. Instead,
+some meta-data are kept about the first fragment and reused for
+subsequent fragments.
+
+Fragment caching and handling is not always necessary. It is performed
+when: \* An IPv4 fragment is received and the destination IPv4 address
+is shared. \* An IPv6 packet is received with an inner IPv4 fragment,
+the IPv4 source address is shared, and ‘security-check fragments’ is on.
+\* An IPv6 fragment is received.
+
+There are 3 dedicated nodes: \* ip4-map-reass \* ip6-map-ip4-reass \*
+ip6-map-ip6-reass
+
+ip4-map sends all fragments to ip4-map-reass. ip6-map sends all
+inner-fragments to ip6-map-ip4-reass. ip6-map sends all outer-fragments
+to ip6-map-ip6-reass.
+
+IPv4 (resp. IPv6) virtual reassembly makes use of a hash table in order
+to store IPv4 (resp. IPv6) reassembly structures. The hash-key is based
+on the IPv4-src:IPv4-dst:Frag-ID:Protocol tuple (resp.
+IPv6-src:IPv6-dst:Frag-ID tuple, as the protocol is IPv4-in-IPv6).
+Therefore, each packet reassembly makes use of exactly one reassembly
+structure. When such a structure is allocated, it is timestamped with
+the current time. Finally, those structures are capable of storing a
+limited number of buffer indexes.
+
+An IPv4 (resp. IPv6) reassembly structure can cache up to
+MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY (resp.
+MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) buffers. Buffers are cached
+until the first fragment is received.
+
+Virtual Reassembly configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+IPv4 and IPv6 virtual reassembly support the following configuration:
+map params reassembly [ip4 \| ip6] [lifetime ] [pool-size ] [buffers ]
+[ht-ratio ]
+
+lifetime: The time in milliseconds a reassembly structure is considered
+valid. The longer, the more reliable is reassembly, but the more likely
+it is to exhaust the pool of reassembly structures. IPv4 standard
+suggests a lifetime of 15 seconds. IPv6 specifies a lifetime of 60
+seconds. Those values are not realistic for high-throughput cases.
+
+buffers: The upper limit of buffers that are allowed to be cached. It
+can be used to protect against fragmentation attacks which would aim to
+exhaust the global buffers pool.
+
+pool-size: The number of reassembly structures that can be allocated. As
+each structure can store a small fixed number of fragments, it also sets
+an upper-bound of ‘pool-size \*
+MAP_IPX_REASS_MAX_FRAGMENTS_PER_REASSEMBLY’ buffers that can be cached
+in total.
+
+ht-ratio: The amount of buckets in the hash-table is pool-size \*
+ht-ratio.
+
+Any time pool-size and ht-ratio is modified, the hash-table is destroyed
+and created again, which means all current state is lost.
+
+Additional considerations
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Reassembly at high rate is expensive in terms of buffers. There is a
+trade-off between the lifetime and number of allocated buffers. Reducing
+the lifetime helps, but at the cost of loosing state for fragments that
+are wide apart.
+
+Let: R be the packet rate at which fragments are received. F be the
+number of fragments per packet.
+
+Assuming the first fragment is always received last. We should have:
+buffers > lifetime \* R / F \* (F - 1) pool-size > lifetime \* R/F
+
+This is a worst case. Receiving the first fragment earlier helps
+reducing the number of required buffers. Also, an optimization is
+implemented (MAP_IP6_REASS_COUNT_BYTES and MAP_IP4_REASS_COUNT_BYTES)
+which counts the number of transmitted bytes and remembers the total
+number of bytes which should be transmitted based on the last fragment,
+and therefore helps reducing ‘pool-size’.
+
+But the formula shows that it is challenging to forward a significant
+amount of fragmented packets at high rates. For instance, with a
+lifetime of 1 second, 5Mpps packet rate would require buffering up to
+2.5 millions fragments.
+
+If you want to do that, be prepared to configure a lot of fragments.
diff --git a/src/plugins/marvell/README.md b/src/plugins/marvell/README.md
deleted file mode 100644
index 3f3c27e3618..00000000000
--- a/src/plugins/marvell/README.md
+++ /dev/null
@@ -1,65 +0,0 @@
-# Marvell device plugin for VPP {#marvell_plugin_doc}
-
-##Overview
-This plugins provides native device support for Marvell PP2 network device, by use of Marvell Usermode SDK ([MUSDK][1]).
-Code is developed and tested on [MACCHIATObin][2] board.
-
-##Prerequisites
-Plugins depends on installed MUSDK and Marvell provided linux [kernel][3] with MUSDK provided kernel patches (see `patches/linux` in musdk repo and relevant documentation.
-Kernel version used: **4.14.22 armada-18.09.3**
-MUSDK version used: **armada-18.09.3**
-Following kernel modules from MUSDK must be loaded for plugin to work:
-* `musdk_cma.ko`
-* `mv_pp_uio.ko`
-
-##Musdk 18.09.3 compilation steps
-
-```
-./bootstrap
-./configure --prefix=/opt/vpp/external/aarch64/ CFLAGS="-Wno-error=unused-result -g -fPIC" --enable-shared=no
-sed -i -e 's/marvell,mv-pp-uio/generic-uio/' modules/pp2/mv_pp_uio.c
-sed -i -e 's/O_CREAT/O_CREAT, S_IRUSR | S_IWUSR/' src/lib/file_utils.c
-make
-sudo make install
-```
-
-## Usage
-### Interface Cration
-Interfaces are dynamically created with following CLI:
-```
-create interface marvell pp2 name eth0
-set interface state mv-ppio-0/0 up
-```
-
-Where `eth0` is linux interface name and `mv-ppio-X/Y` is VPP interface name where X is PP2 device ID and Y is PPIO ID
-Interface needs to be assigned to MUSDK in FDT configuration and linux interface state must be up.
-
-### Interface Deletion
-Interface can be deleted with following CLI:
-```
-delete interface marvell pp2 <interface name>
-```
-
-
-### Interface Statistics
-Interface statistics can be displayed with `sh hardware-interface mv-ppio0/0`
-command.
-
-### Interaction with DPDK plugin
-This plugin doesn't have any dependency on DPDK or DPDK plugin but it can
-work with DPDK plugin enabled or disabled. It is observed that performace is
-better around 30% when DPDK plugin is disabled, as DPDK plugin registers
-own buffer manager, which needs to deal with additional metadata in each packet.
-
-DPKD plugin can be disabled by adding following config to the startup.conf.
-
-```
-plugins {
- dpdk_plugin.so { disable }
-}
-```
-
-
-[1]: https://github.com/MarvellEmbeddedProcessors/musdk-marvell
-[2]: http://macchiatobin.net
-[3]: https://github.com/MarvellEmbeddedProcessors/linux-marvell
diff --git a/src/plugins/marvell/README.rst b/src/plugins/marvell/README.rst
new file mode 100644
index 00000000000..19cf1c49d0e
--- /dev/null
+++ b/src/plugins/marvell/README.rst
@@ -0,0 +1,85 @@
+Marvell device plugin
+=====================
+
+Overview
+--------
+
+This plugins provides native device support for Marvell PP2 network
+device, by use of Marvell Usermode SDK
+(`MUSDK <https://github.com/MarvellEmbeddedProcessors/musdk-marvell>`__).
+Code is developed and tested on
+`MACCHIATObin <http://macchiatobin.net>`__ board.
+
+Prerequisites
+-------------
+
+Plugins depends on installed MUSDK and Marvell provided linux
+`kernel <https://github.com/MarvellEmbeddedProcessors/linux-marvell>`__
+with MUSDK provided kernel patches (see ``patches/linux`` in musdk repo
+and relevant documentation. Kernel version used: **4.14.22
+armada-18.09.3** MUSDK version used: **armada-18.09.3** Following kernel
+modules from MUSDK must be loaded for plugin to work: \*
+``musdk_cma.ko`` \* ``mv_pp_uio.ko``
+
+Musdk 18.09.3 compilation steps
+-------------------------------
+
+::
+
+ ./bootstrap
+ ./configure --prefix=/opt/vpp/external/aarch64/ CFLAGS="-Wno-error=unused-result -g -fPIC" --enable-shared=no
+ sed -i -e 's/marvell,mv-pp-uio/generic-uio/' modules/pp2/mv_pp_uio.c
+ sed -i -e 's/O_CREAT/O_CREAT, S_IRUSR | S_IWUSR/' src/lib/file_utils.c
+ make
+ sudo make install
+
+Usage
+-----
+
+Interface Creation
+~~~~~~~~~~~~~~~~~~
+
+Interfaces are dynamically created with following CLI:
+
+::
+
+ create interface marvell pp2 name eth0
+ set interface state mv-ppio-0/0 up
+
+Where ``eth0`` is linux interface name and ``mv-ppio-X/Y`` is VPP
+interface name where X is PP2 device ID and Y is PPIO ID Interface needs
+to be assigned to MUSDK in FDT configuration and linux interface state
+must be up.
+
+Interface Deletion
+~~~~~~~~~~~~~~~~~~
+
+Interface can be deleted with following CLI:
+
+::
+
+ delete interface marvell pp2 <interface name>
+
+Interface Statistics
+~~~~~~~~~~~~~~~~~~~~
+
+Interface statistics can be displayed with
+``sh hardware-interface mv-ppio0/0`` command.
+
+Interaction with DPDK plugin
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This plugin doesn’t have any dependency on DPDK or DPDK plugin but it
+can work with DPDK plugin enabled or disabled. It is observed that
+performance is better around 30% when DPDK plugin is disabled, as DPDK
+plugin registers own buffer manager, which needs to deal with additional
+metadata in each packet.
+
+DPKD plugin can be disabled by adding following config to the
+startup.conf.
+
+::
+
+ plugins {
+ dpdk_plugin.so { disable }
+ }
diff --git a/src/plugins/marvell/plugin.c b/src/plugins/marvell/plugin.c
index fe673092a5e..ed90776ba95 100644
--- a/src/plugins/marvell/plugin.c
+++ b/src/plugins/marvell/plugin.c
@@ -19,12 +19,10 @@
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Marvell PP2 Device Driver",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/marvell/pp2/cli.c b/src/plugins/marvell/pp2/cli.c
index 28ef35b2b24..f4ecb1873c9 100644
--- a/src/plugins/marvell/pp2/cli.c
+++ b/src/plugins/marvell/pp2/cli.c
@@ -59,13 +59,11 @@ mrvl_pp2_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mrvl_pp2_create_command, static) = {
.path = "create interface marvell pp2",
.short_help = "create interface marvell pp2 [name <ifname>] [rx-queue-size slots] [tx-queue-size slots]",
.function = mrvl_pp2_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
mrvl_pp2_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -110,14 +108,12 @@ mrvl_pp2_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mrvl_pp2_delete_command, static) = {
.path = "delete interface marvell pp2",
.short_help = "delete interface marvell pp2 "
"{<interface> | sw_if_index <sw_idx>}",
.function = mrvl_pp2_delete_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
mrvl_pp2_cli_init (vlib_main_t * vm)
diff --git a/src/plugins/marvell/pp2/format.c b/src/plugins/marvell/pp2/format.c
index 838f5169b05..877010ea561 100644
--- a/src/plugins/marvell/pp2/format.c
+++ b/src/plugins/marvell/pp2/format.c
@@ -22,7 +22,6 @@
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
-#include <vppinfra/linux/syscall.h>
#include <vnet/plugin/plugin.h>
#include <marvell/pp2/pp2.h>
diff --git a/src/plugins/marvell/pp2/input.c b/src/plugins/marvell/pp2/input.c
index 44f01355e39..2545f91becb 100644
--- a/src/plugins/marvell/pp2/input.c
+++ b/src/plugins/marvell/pp2/input.c
@@ -218,8 +218,8 @@ mrvl_pp2_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
n_rx_bytes += mrvl_pp2_next_from_desc (node, d, b0, &next0);
n_rx_bytes += mrvl_pp2_next_from_desc (node, d + 1, b1, &next1);
- vnet_feature_start_device_input_x2 (ppif->sw_if_index, &next0,
- &next1, b0, b1);
+ vnet_feature_start_device_input (ppif->sw_if_index, &next0, b0);
+ vnet_feature_start_device_input (ppif->sw_if_index, &next1, b1);
}
else
{
@@ -262,8 +262,7 @@ mrvl_pp2_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (PREDICT_TRUE (ppif->per_interface_next_index == ~0))
{
n_rx_bytes += mrvl_pp2_next_from_desc (node, d, b0, &next0);
- vnet_feature_start_device_input_x1 (ppif->sw_if_index, &next0,
- b0);
+ vnet_feature_start_device_input (ppif->sw_if_index, &next0, b0);
}
else
{
@@ -370,7 +369,6 @@ mrvl_pp2_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return n_rx;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (mrvl_pp2_input_node) = {
.function = mrvl_pp2_input_fn,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
@@ -383,7 +381,6 @@ VLIB_REGISTER_NODE (mrvl_pp2_input_node) = {
.error_strings = mrvl_pp2_input_error_strings,
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/marvell/pp2/pp2.c b/src/plugins/marvell/pp2/pp2.c
index dfe5c157018..030ab9b4496 100644
--- a/src/plugins/marvell/pp2/pp2.c
+++ b/src/plugins/marvell/pp2/pp2.c
@@ -22,7 +22,6 @@
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
-#include <vppinfra/linux/syscall.h>
#include <vnet/plugin/plugin.h>
#include <marvell/pp2/pp2.h>
#include <vnet/interface/rx_queue_funcs.h>
@@ -130,7 +129,6 @@ mrvl_pp2_delete_if (mrvl_pp2_if_t * ppif)
pp2_ppio_deinit (ppif->ppio);
}
- /* *INDENT-OFF* */
/* free buffers hanging in the tx ring */
vec_foreach (outq, ppif->outqs)
{
@@ -163,7 +161,6 @@ mrvl_pp2_delete_if (mrvl_pp2_if_t * ppif)
pp2_bpool_deinit (inq->bpool);
}
vec_free (ppif->inqs);
- /* *INDENT-ON* */
pool_put (ppm->interfaces, ppif);
@@ -178,6 +175,7 @@ mrvl_pp2_create_if (mrvl_pp2_create_if_args_t * args)
vlib_main_t *vm = vlib_get_main ();
vnet_main_t *vnm = vnet_get_main ();
vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vnet_eth_interface_registration_t eir = {};
mrvl_pp2_main_t *ppm = &mrvl_pp2_main;
struct pp2_bpool_params bpool_params = { 0 };
struct pp2_ppio_params ppio_params = { 0 };
@@ -282,16 +280,11 @@ mrvl_pp2_create_if (mrvl_pp2_create_if_args_t * args)
goto error;
}
- args->error = ethernet_register_interface (vnm, mrvl_pp2_device_class.index,
- ppif->dev_instance,
- mac_addr,
- &ppif->hw_if_index,
- mrvl_pp2_eth_flag_change);
- if (args->error)
- {
- args->rv = VNET_API_ERROR_INVALID_REGISTRATION;
- goto error;
- }
+ eir.dev_class_index = mrvl_pp2_device_class.index;
+ eir.dev_instance = ppif->dev_instance;
+ eir.address = mac_addr;
+ eir.cb.flag_change = mrvl_pp2_eth_flag_change;
+ ppif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
sw = vnet_get_hw_sw_interface (vnm, ppif->hw_if_index);
ppif->sw_if_index = sw->sw_if_index;
@@ -380,7 +373,6 @@ static char *mrvl_pp2_tx_func_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (mrvl_pp2_device_class,) =
{
.name = "Marvell PPv2 interface",
@@ -393,7 +385,6 @@ VNET_DEVICE_CLASS (mrvl_pp2_device_class,) =
.clear_counters = mrvl_pp2_clear_interface_counters,
.rx_redirect_to_node = mrvl_pp2_set_interface_next_node,
};
-/* *INDENT-ON* */
static clib_error_t *
mrvl_pp2_init (vlib_main_t * vm)
diff --git a/src/plugins/marvell/pp2/pp2_api.c b/src/plugins/marvell/pp2/pp2_api.c
index a3672c442d4..c1f3a9e1d1d 100644
--- a/src/plugins/marvell/pp2/pp2_api.c
+++ b/src/plugins/marvell/pp2/pp2_api.c
@@ -28,6 +28,7 @@
#include <marvell/pp2/pp2.api_enum.h>
#include <marvell/pp2/pp2.api_types.h>
+#define REPLY_MSG_ID_BASE (pp2->msg_id_base)
#include <vlibapi/api_helper_macros.h>
static void
@@ -48,12 +49,8 @@ vl_api_mrvl_pp2_create_t_handler (vl_api_mrvl_pp2_create_t * mp)
{
clib_error_free (args.error);
}
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_MRVL_PP2_CREATE_REPLY + pp2->msg_id_base,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_MRVL_PP2_CREATE_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
}
static void
@@ -78,7 +75,7 @@ vl_api_mrvl_pp2_delete_t_handler (vl_api_mrvl_pp2_delete_t * mp)
mrvl_pp2_delete_if (dif);
reply:
- REPLY_MACRO (VL_API_MRVL_PP2_DELETE_REPLY + pp2->msg_id_base);
+ REPLY_MACRO (VL_API_MRVL_PP2_DELETE_REPLY);
}
#include <marvell/pp2/pp2.api.c>
diff --git a/src/plugins/mdata/mdata.c b/src/plugins/mdata/mdata.c
index 69622960c39..c45b1e38910 100644
--- a/src/plugins/mdata/mdata.c
+++ b/src/plugins/mdata/mdata.c
@@ -203,25 +203,23 @@ mdata_enable_disable_command_fn (vlib_main_t * vm,
/*?
* This command enables or disables buffer metadata change tracking
*
- *@cliexpar
+ * @cliexpar
* To enable buffer metadata change tracking:
- *@cliexstart{buffer metadata tracking on}
+ * @cliexstart{buffer metadata tracking on}
* Tracking enabled
- *@cliexend
+ * @cliexend
*
- *@cliexstart{buffer metadata tracking off}
+ * @cliexstart{buffer metadata tracking off}
* Tracking disabled
- *@cliexend
+ * @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mdata_enable_disable_command, static) =
{
.path = "buffer metadata tracking",
.short_help = "buffer metadata tracking [on][off]",
.function = mdata_enable_disable_command_fn,
};
-/* *INDENT-ON* */
/* API message handler */
static void vl_api_mdata_enable_disable_t_handler
@@ -256,13 +254,11 @@ mdata_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (mdata_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Buffer metadata change tracker."
};
-/* *INDENT-ON* */
#define foreach_primary_metadata_field \
@@ -350,15 +346,12 @@ _(tcp.data_len) \
_(tcp.flags) \
_(snat.flags)
-#define foreach_opaque2_metadata_field \
-_(qos.bits) \
-_(qos.source) \
-_(loop_counter) \
-_(gbp.flags) \
-_(gbp.sclass) \
-_(gso_size) \
-_(gso_l4_hdr_sz) \
-_(pg_replay_timestamp)
+#define foreach_opaque2_metadata_field \
+ _ (qos.bits) \
+ _ (qos.source) \
+ _ (loop_counter) \
+ _ (gso_size) \
+ _ (gso_l4_hdr_sz)
static u8 *
format_buffer_metadata_changes (u8 * s, va_list * args)
@@ -472,23 +465,21 @@ show_metadata_command_fn (vlib_main_t * vm,
/*?
* This command displays buffer metadata change information
- *@cliexpar
+ * @cliexpar
* How to display buffer metadata change information
- *@cliexstart{show buffer metadata}
+ * @cliexstart{show buffer metadata}
* ethernet-input: current_data current_length flags error
* vnet_buffer_t: l2_hdr_offset l3_hdr_offset
* vnet_buffer2_t: no changes
- *@cliexend
+ * @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_metadata_command, static) =
{
.path = "show buffer metadata",
.short_help = "show buffer metadata",
.function = show_metadata_command_fn,
};
-/* *INDENT-OFF* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/mdata/mdata_doc.md b/src/plugins/mdata/mdata_doc.md
deleted file mode 100644
index cbbfb012183..00000000000
--- a/src/plugins/mdata/mdata_doc.md
+++ /dev/null
@@ -1,24 +0,0 @@
-# Buffer metadata change tracker {#mdata_doc}
-
-## Introduction
-
-The mdata plugin uses the vlib main loop "before" performance counter
-hook to snapshoot buffer metadata before calling the node dispatch
-function. Similarly, the plugin uses the main loop "after" hook to
-compare a vectors' worth of buffer metadata after the fact.
-
-The comparison function is a simple octet-by-octet A != B check. We
-accumulate changed octets per-node across the entire run, using a
-single spinlock-protected accumulator.
-
-The "show buffer metadata" command produces a report of all fields
-whose values are changed by nodes visited during a given run.
-
-Since many fields in the vnet_buffer_opaque_t are union members,
-it may appear that a certain node changes numerous fields. The entire
-point of the exercise is to warn developers that if a packet visits
-node N, data placed into opaque union field F *will* be affected.
-
-One should never assume much about buffer metadata immutability across
-arbitrary subgraphs. This tool generates accurate reports, to the
-extent that one exercises the required subgraph trajectories.
diff --git a/src/plugins/mdata/mdata_doc.rst b/src/plugins/mdata/mdata_doc.rst
new file mode 100644
index 00000000000..95746bd3d0e
--- /dev/null
+++ b/src/plugins/mdata/mdata_doc.rst
@@ -0,0 +1,26 @@
+Buffer metadata change tracker
+==============================
+
+Introduction
+------------
+
+The mdata plugin uses the vlib main loop “before” performance counter
+hook to snapshoot buffer metadata before calling the node dispatch
+function. Similarly, the plugin uses the main loop “after” hook to
+compare a vectors’ worth of buffer metadata after the fact.
+
+The comparison function is a simple octet-by-octet A != B check. We
+accumulate changed octets per-node across the entire run, using a single
+spinlock-protected accumulator.
+
+The “show buffer metadata” command produces a report of all fields whose
+values are changed by nodes visited during a given run.
+
+Since many fields in the vnet_buffer_opaque_t are union members, it may
+appear that a certain node changes numerous fields. The entire point of
+the exercise is to warn developers that if a packet visits node N, data
+placed into opaque union field F *will* be affected.
+
+One should never assume much about buffer metadata immutability across
+arbitrary subgraphs. This tool generates accurate reports, to the extent
+that one exercises the required subgraph trajectories.
diff --git a/src/plugins/memif/CMakeLists.txt b/src/plugins/memif/CMakeLists.txt
index b86d30adb97..4bbf6ba39db 100644
--- a/src/plugins/memif/CMakeLists.txt
+++ b/src/plugins/memif/CMakeLists.txt
@@ -33,3 +33,5 @@ add_vpp_plugin(memif
INSTALL_HEADERS
memif.h
)
+
+add_compile_definitions(MEMIF_CACHELINE_SIZE=${VPP_CACHE_LINE_SIZE})
diff --git a/src/plugins/memif/cli.c b/src/plugins/memif/cli.c
index b313e9737b3..c2ed63747fa 100644
--- a/src/plugins/memif/cli.c
+++ b/src/plugins/memif/cli.c
@@ -33,7 +33,7 @@ memif_socket_filename_create_command_fn (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
- int r;
+ clib_error_t *err;
u32 socket_id;
u8 *socket_filename;
@@ -53,6 +53,7 @@ memif_socket_filename_create_command_fn (vlib_main_t * vm,
else
{
vec_free (socket_filename);
+ unformat_free (line_input);
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
@@ -72,37 +73,18 @@ memif_socket_filename_create_command_fn (vlib_main_t * vm,
return clib_error_return (0, "Invalid socket filename");
}
- r = memif_socket_filename_add_del (1, socket_id, socket_filename);
+ err = memif_socket_filename_add_del (1, socket_id, (char *) socket_filename);
vec_free (socket_filename);
- if (r < 0)
- {
- switch (r)
- {
- case VNET_API_ERROR_INVALID_ARGUMENT:
- return clib_error_return (0, "Invalid argument");
- case VNET_API_ERROR_SYSCALL_ERROR_1:
- return clib_error_return (0, "Syscall error 1");
- case VNET_API_ERROR_ENTRY_ALREADY_EXISTS:
- return clib_error_return (0, "Already exists");
- case VNET_API_ERROR_UNEXPECTED_INTF_STATE:
- return clib_error_return (0, "Interface still in use");
- default:
- return clib_error_return (0, "Unknown error");
- }
- }
-
- return 0;
+ return err;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (memif_socket_filename_create_command, static) = {
.path = "create memif socket",
.short_help = "create memif socket [id <id>] [filename <path>]",
.function = memif_socket_filename_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
memif_socket_filename_delete_command_fn (vlib_main_t * vm,
@@ -110,7 +92,6 @@ memif_socket_filename_delete_command_fn (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
- int r;
u32 socket_id;
/* Get a line of input. */
@@ -125,6 +106,7 @@ memif_socket_filename_delete_command_fn (vlib_main_t * vm,
;
else
{
+ unformat_free (line_input);
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
@@ -137,42 +119,21 @@ memif_socket_filename_delete_command_fn (vlib_main_t * vm,
return clib_error_return (0, "Invalid socket id");
}
- r = memif_socket_filename_add_del (0, socket_id, 0);
-
- if (r < 0)
- {
- switch (r)
- {
- case VNET_API_ERROR_INVALID_ARGUMENT:
- return clib_error_return (0, "Invalid argument");
- case VNET_API_ERROR_SYSCALL_ERROR_1:
- return clib_error_return (0, "Syscall error 1");
- case VNET_API_ERROR_ENTRY_ALREADY_EXISTS:
- return clib_error_return (0, "Already exists");
- case VNET_API_ERROR_UNEXPECTED_INTF_STATE:
- return clib_error_return (0, "Interface still in use");
- default:
- return clib_error_return (0, "Unknown error");
- }
- }
-
- return 0;
+ return memif_socket_filename_add_del (0, socket_id, 0);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (memif_socket_filename_delete_command, static) = {
.path = "delete memif socket",
.short_help = "delete memif socket [id <id>]",
.function = memif_socket_filename_delete_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
- int r;
+ clib_error_t *err;
u32 ring_size = MEMIF_DEFAULT_RING_SIZE;
memif_create_if_args_t args = { 0 };
args.buffer_size = MEMIF_DEFAULT_BUFFER_SIZE;
@@ -207,14 +168,19 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
args.is_master = 0;
else if (unformat (line_input, "no-zero-copy"))
args.is_zero_copy = 0;
+ else if (unformat (line_input, "use-dma"))
+ args.use_dma = 1;
else if (unformat (line_input, "mode ip"))
args.mode = MEMIF_INTERFACE_MODE_IP;
else if (unformat (line_input, "hw-addr %U",
unformat_ethernet_address, args.hw_addr))
args.hw_addr_set = 1;
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
}
unformat_free (line_input);
@@ -234,27 +200,13 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
args.rx_queues = rx_queues;
args.tx_queues = tx_queues;
- r = memif_create_if (vm, &args);
+ err = memif_create_if (vm, &args);
vec_free (args.secret);
- if (r <= VNET_API_ERROR_SYSCALL_ERROR_1
- && r >= VNET_API_ERROR_SYSCALL_ERROR_10)
- return clib_error_return (0, "%s (errno %d)", strerror (errno), errno);
-
- if (r == VNET_API_ERROR_INVALID_ARGUMENT)
- return clib_error_return (0, "Invalid argument");
-
- if (r == VNET_API_ERROR_INVALID_INTERFACE)
- return clib_error_return (0, "Invalid interface name");
-
- if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS)
- return clib_error_return (0, "Interface with same id already exists");
-
- return 0;
+ return err;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (memif_create_command, static) = {
.path = "create interface memif",
.short_help = "create interface memif [id <id>] [socket-id <socket-id>] "
@@ -264,7 +216,6 @@ VLIB_CLI_COMMAND (memif_create_command, static) = {
"[mode ip] [secret <string>]",
.function = memif_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -289,8 +240,11 @@ memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
vnm, &sw_if_index))
;
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
}
unformat_free (line_input);
@@ -308,13 +262,11 @@ memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (memif_delete_command, static) = {
.path = "delete interface memif",
.short_help = "delete interface memif {<interface> | sw_if_index <sw_idx>}",
.function = memif_delete_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_memif_if_flags (u8 * s, va_list * args)
@@ -378,23 +330,22 @@ format_memif_descriptor (u8 * s, va_list * args)
if (ring)
{
s = format (s, "%Udescriptor table:\n", format_white_space, indent);
- s =
- format (s,
- "%Uid flags len address offset user address\n",
- format_white_space, indent);
- s =
- format (s,
- "%U===== ===== ======== ================== ====== ==================\n",
- format_white_space, indent);
+ s = format (s,
+ "%Uid flags region len address offset "
+ " user address\n",
+ format_white_space, indent);
+ s = format (s,
+ "%U===== ===== ====== ======== ================== "
+ "========== ==================\n",
+ format_white_space, indent);
for (slot = 0; slot < ring_size; slot++)
{
- s = format (s, "%U%-5d %-5d %-7d 0x%016lx %-6d 0x%016lx\n",
- format_white_space, indent, slot,
- ring->desc[slot].flags,
- ring->desc[slot].length,
+ s = format (s, "%U%-5d %-5d %-6d %-7d 0x%016lx %-10d 0x%016lx\n",
+ format_white_space, indent, slot, ring->desc[slot].flags,
+ ring->desc[slot].region, ring->desc[slot].length,
mif->regions[ring->desc[slot].region].shm,
- ring->desc[slot].offset, memif_get_buffer (mif, ring,
- slot));
+ ring->desc[slot].offset,
+ memif_get_buffer (mif, ring, slot));
}
s = format (s, "\n");
}
@@ -437,7 +388,6 @@ memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "sockets\n");
vlib_cli_output (vm, " %-3s %-11s %s\n", "id", "listener", "filename");
- /* *INDENT-OFF* */
hash_foreach (sock_id, msf_idx, mm->socket_file_index_by_sock_id,
({
memif_socket_file_t *msf;
@@ -453,17 +403,14 @@ memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output(vm, " %-3u %-11v %s\n", sock_id, s, filename);
vec_reset_length (s);
}));
- /* *INDENT-ON* */
vec_free (s);
vlib_cli_output (vm, "\n");
if (vec_len (hw_if_indices) == 0)
{
- /* *INDENT-OFF* */
pool_foreach (mif, mm->interfaces)
vec_add1 (hw_if_indices, mif->hw_if_index);
- /* *INDENT-ON* */
}
for (hw_if_index = 0; hw_if_index < vec_len (hw_if_indices); hw_if_index++)
@@ -498,7 +445,6 @@ memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, " remote-disc-reason \"%s\"",
mif->remote_disc_string);
- /* *INDENT-OFF* */
vec_foreach_index (i, mif->regions)
{
mr = vec_elt_at_index (mif->regions, i);
@@ -519,20 +465,17 @@ memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (show_descr)
vlib_cli_output (vm, " %U", format_memif_descriptor, mif, mq);
}
- /* *INDENT-ON* */
}
done:
vec_free (hw_if_indices);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (memif_show_command, static) = {
.path = "show memif",
.short_help = "show memif [<interface>] [descriptors]",
.function = memif_show_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
memif_cli_init (vlib_main_t * vm)
diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c
index fc66420a6ad..017a001168b 100644
--- a/src/plugins/memif/device.c
+++ b/src/plugins/memif/device.c
@@ -194,8 +194,8 @@ retry:
else
{
/* we need to rollback vectors before bailing out */
- _vec_len (ptd->buffers) = saved_ptd_buffers_len;
- _vec_len (ptd->copy_ops) = saved_ptd_copy_ops_len;
+ vec_set_len (ptd->buffers, saved_ptd_buffers_len);
+ vec_set_len (ptd->copy_ops, saved_ptd_copy_ops_len);
vlib_error_count (vm, node->node_index,
MEMIF_TX_ERROR_ROLLBACK, 1);
slot = saved_slot;
@@ -369,6 +369,270 @@ no_free_slots:
return n_left;
}
+CLIB_MARCH_FN (memif_tx_dma_completion_cb, void, vlib_main_t *vm,
+ vlib_dma_batch_t *b)
+{
+ memif_main_t *mm = &memif_main;
+ memif_if_t *mif = vec_elt_at_index (mm->interfaces, b->cookie >> 16);
+ memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, b->cookie & 0xffff);
+ memif_dma_info_t *dma_info = mq->dma_info + mq->dma_info_head;
+ memif_per_thread_data_t *ptd = &dma_info->data;
+
+ vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers));
+
+ dma_info->finished = 1;
+ vec_reset_length (ptd->buffers);
+ vec_reset_length (ptd->copy_ops);
+
+ __atomic_store_n (&mq->ring->tail, dma_info->dma_tail, __ATOMIC_RELEASE);
+
+ mq->dma_info_head++;
+ if (mq->dma_info_head == mq->dma_info_size)
+ mq->dma_info_head = 0;
+ mq->dma_info_full = 0;
+}
+
+#ifndef CLIB_MARCH_VARIANT
+void
+memif_tx_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b)
+{
+ return CLIB_MARCH_FN_SELECT (memif_tx_dma_completion_cb) (vm, b);
+}
+#endif
+
+static_always_inline uword
+memif_interface_tx_dma_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ u32 *buffers, memif_if_t *mif,
+ memif_ring_type_t type, memif_queue_t *mq,
+ u32 n_left)
+{
+ memif_ring_t *ring;
+ u32 n_copy_op;
+ u16 ring_size, mask, slot, free_slots;
+ int n_retries = 5, fallback = 0;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ memif_copy_op_t *co;
+ memif_region_index_t last_region = ~0;
+ void *last_region_shm = 0;
+ u16 head, tail;
+ memif_dma_info_t *dma_info;
+ memif_per_thread_data_t *ptd;
+ memif_main_t *mm = &memif_main;
+ u16 mif_id = mif - mm->interfaces;
+
+ ring = mq->ring;
+ ring_size = 1 << mq->log2_ring_size;
+ mask = ring_size - 1;
+
+ dma_info = mq->dma_info + mq->dma_info_tail;
+ ptd = &dma_info->data;
+
+ /* do software fallback if dma info ring is full */
+ u16 dma_mask = mq->dma_info_size - 1;
+ if ((((mq->dma_info_tail + 1) & dma_mask) == mq->dma_info_head) ||
+ ((mq->dma_info_head == dma_mask) && (mq->dma_info_tail == 0)))
+ {
+ if (!mq->dma_info_full)
+ mq->dma_info_full = 1;
+ else
+ fallback = 1;
+ }
+
+ vlib_dma_batch_t *b = NULL;
+ if (PREDICT_TRUE (!fallback))
+ b = vlib_dma_batch_new (vm, mif->dma_tx_config);
+ if (!b)
+ return n_left;
+
+retry:
+
+ slot = tail = mq->dma_tail;
+ head = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE);
+ mq->last_tail += tail - mq->last_tail;
+ free_slots = head - mq->dma_tail;
+
+ while (n_left && free_slots)
+ {
+ memif_desc_t *d0;
+ void *mb0;
+ i32 src_off;
+ u32 bi0, dst_off, src_left, dst_left, bytes_to_copy;
+ u32 saved_ptd_copy_ops_len = _vec_len (ptd->copy_ops);
+ u32 saved_ptd_buffers_len = _vec_len (ptd->buffers);
+ u16 saved_slot = slot;
+
+ clib_prefetch_load (&ring->desc[(slot + 8) & mask]);
+
+ d0 = &ring->desc[slot & mask];
+ if (PREDICT_FALSE (last_region != d0->region))
+ {
+ last_region_shm = mif->regions[d0->region].shm;
+ last_region = d0->region;
+ }
+ mb0 = last_region_shm + d0->offset;
+
+ dst_off = 0;
+
+ /* slave is the producer, so it should be able to reset buffer length */
+ dst_left = d0->length;
+
+ if (PREDICT_TRUE (n_left >= 4))
+ vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD);
+ bi0 = buffers[0];
+
+ next_in_chain:
+
+ b0 = vlib_get_buffer (vm, bi0);
+ src_off = b0->current_data;
+ src_left = b0->current_length;
+
+ while (src_left)
+ {
+ if (PREDICT_FALSE (dst_left == 0))
+ {
+ if (free_slots)
+ {
+ d0->length = dst_off;
+ d0->flags = MEMIF_DESC_FLAG_NEXT;
+ d0 = &ring->desc[slot & mask];
+ dst_off = 0;
+ dst_left = (type == MEMIF_RING_S2M) ? mif->run.buffer_size :
+ d0->length;
+
+ if (PREDICT_FALSE (last_region != d0->region))
+ {
+ last_region_shm = mif->regions[d0->region].shm;
+ last_region = d0->region;
+ }
+ mb0 = last_region_shm + d0->offset;
+ }
+ else
+ {
+ /* we need to rollback vectors before bailing out */
+ vec_set_len (ptd->buffers, saved_ptd_buffers_len);
+ vec_set_len (ptd->copy_ops, saved_ptd_copy_ops_len);
+ vlib_error_count (vm, node->node_index,
+ MEMIF_TX_ERROR_ROLLBACK, 1);
+ slot = saved_slot;
+ goto no_free_slots;
+ }
+ }
+ bytes_to_copy = clib_min (src_left, dst_left);
+ memif_add_copy_op (ptd, mb0 + dst_off, bytes_to_copy, src_off,
+ vec_len (ptd->buffers));
+ src_off += bytes_to_copy;
+ dst_off += bytes_to_copy;
+ src_left -= bytes_to_copy;
+ dst_left -= bytes_to_copy;
+ }
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ slot++;
+ free_slots--;
+ bi0 = b0->next_buffer;
+ goto next_in_chain;
+ }
+
+ vec_add1_aligned (ptd->buffers, buffers[0], CLIB_CACHE_LINE_BYTES);
+ d0->length = dst_off;
+ d0->flags = 0;
+
+ free_slots -= 1;
+ slot += 1;
+
+ buffers++;
+ n_left--;
+ }
+no_free_slots:
+
+ /* copy data */
+ n_copy_op = vec_len (ptd->copy_ops);
+ co = ptd->copy_ops;
+ while (n_copy_op >= 8)
+ {
+ clib_prefetch_load (co[4].data);
+ clib_prefetch_load (co[5].data);
+ clib_prefetch_load (co[6].data);
+ clib_prefetch_load (co[7].data);
+
+ b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
+ b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]);
+ b2 = vlib_get_buffer (vm, ptd->buffers[co[2].buffer_vec_index]);
+ b3 = vlib_get_buffer (vm, ptd->buffers[co[3].buffer_vec_index]);
+
+ if (PREDICT_TRUE (!fallback))
+ {
+ vlib_dma_batch_add (vm, b, co[0].data,
+ b0->data + co[0].buffer_offset, co[0].data_len);
+ vlib_dma_batch_add (vm, b, co[1].data,
+ b1->data + co[1].buffer_offset, co[1].data_len);
+ vlib_dma_batch_add (vm, b, co[2].data,
+ b2->data + co[2].buffer_offset, co[2].data_len);
+ vlib_dma_batch_add (vm, b, co[3].data,
+ b3->data + co[3].buffer_offset, co[3].data_len);
+ }
+ else
+ {
+ clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset,
+ co[0].data_len);
+ clib_memcpy_fast (co[1].data, b1->data + co[1].buffer_offset,
+ co[1].data_len);
+ clib_memcpy_fast (co[2].data, b2->data + co[2].buffer_offset,
+ co[2].data_len);
+ clib_memcpy_fast (co[3].data, b3->data + co[3].buffer_offset,
+ co[3].data_len);
+ }
+
+ co += 4;
+ n_copy_op -= 4;
+ }
+ while (n_copy_op)
+ {
+ b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
+ if (PREDICT_TRUE (!fallback))
+ vlib_dma_batch_add (vm, b, co[0].data, b0->data + co[0].buffer_offset,
+ co[0].data_len);
+ else
+ clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset,
+ co[0].data_len);
+ co += 1;
+ n_copy_op -= 1;
+ }
+
+ /* save dma info before retry */
+ dma_info->dma_tail = slot;
+ mq->dma_tail = slot;
+ vec_reset_length (ptd->copy_ops);
+
+ if (n_left && n_retries--)
+ goto retry;
+
+ if (PREDICT_TRUE (!fallback))
+ {
+ vlib_dma_batch_set_cookie (vm, b,
+ ((u64) mif_id << 16) | (mq - mif->tx_queues));
+ vlib_dma_batch_submit (vm, b);
+ dma_info->finished = 0;
+
+ if (b->n_enq)
+ {
+ mq->dma_info_tail++;
+ if (mq->dma_info_tail == mq->dma_info_size)
+ mq->dma_info_tail = 0;
+ }
+ }
+ else if (fallback && dma_info->finished)
+ {
+ /* if dma has been completed, update ring immediately */
+ vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers));
+ vec_reset_length (ptd->buffers);
+ __atomic_store_n (&mq->ring->tail, slot, __ATOMIC_RELEASE);
+ }
+
+ return n_left;
+}
+
VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
@@ -376,22 +640,19 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm,
memif_main_t *nm = &memif_main;
vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
memif_if_t *mif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
+ vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
memif_queue_t *mq;
+ u32 qid = tf->queue_id;
u32 *from, thread_index = vm->thread_index;
memif_per_thread_data_t *ptd = vec_elt_at_index (memif_main.per_thread_data,
thread_index);
- u8 tx_queues = vec_len (mif->tx_queues);
uword n_left;
- if (tx_queues < vlib_get_n_threads ())
- {
- ASSERT (tx_queues > 0);
- mq = vec_elt_at_index (mif->tx_queues, thread_index % tx_queues);
- }
- else
- mq = vec_elt_at_index (mif->tx_queues, thread_index);
+ ASSERT (vec_len (mif->tx_queues) > qid);
+ mq = vec_elt_at_index (mif->tx_queues, qid);
- clib_spinlock_lock_if_init (&mif->lockp);
+ if (tf->shared_queue)
+ clib_spinlock_lock (&mq->lockp);
from = vlib_frame_vector_args (frame);
n_left = frame->n_vectors;
@@ -402,10 +663,17 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm,
n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_S2M,
mq, ptd, n_left);
else
- n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_M2S,
- mq, ptd, n_left);
+ {
+ if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0))
+ n_left = memif_interface_tx_dma_inline (vm, node, from, mif,
+ MEMIF_RING_M2S, mq, n_left);
+ else
+ n_left = memif_interface_tx_inline (vm, node, from, mif,
+ MEMIF_RING_M2S, mq, ptd, n_left);
+ }
- clib_spinlock_unlock_if_init (&mif->lockp);
+ if (tf->shared_queue)
+ clib_spinlock_unlock (&mq->lockp);
if (n_left)
vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_FREE_SLOTS,
@@ -418,7 +686,12 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm,
mq->int_count++;
}
- if ((mif->flags & MEMIF_IF_FLAG_ZERO_COPY) == 0)
+ if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0))
+ {
+ if (n_left)
+ vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left);
+ }
+ else if ((mif->flags & MEMIF_IF_FLAG_ZERO_COPY) == 0)
vlib_buffer_free (vm, from, frame->n_vectors);
else if (n_left)
vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left);
@@ -468,16 +741,6 @@ memif_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
return 0;
}
-static clib_error_t *
-memif_subif_add_del_function (vnet_main_t * vnm,
- u32 hw_if_index,
- struct vnet_sw_interface_t *st, int is_add)
-{
- /* Nothing for now */
- return 0;
-}
-
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (memif_device_class) = {
.name = "memif",
.format_device_name = format_memif_device_name,
@@ -488,11 +751,9 @@ VNET_DEVICE_CLASS (memif_device_class) = {
.rx_redirect_to_node = memif_set_interface_next_node,
.clear_counters = memif_clear_hw_interface_counters,
.admin_up_down_function = memif_interface_admin_up_down,
- .subif_add_del_function = memif_subif_add_del_function,
.rx_mode_change_function = memif_interface_rx_mode_change,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api
index 9e32db5b470..5973ad60054 100644
--- a/src/plugins/memif/memif.api
+++ b/src/plugins/memif/memif.api
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-option version = "3.0.0";
+option version = "3.1.0";
import "vnet/interface_types.api";
import "vnet/ethernet/ethernet_types.api";
@@ -43,6 +43,8 @@ enum memif_mode
*/
autoreply define memif_socket_filename_add_del
{
+ option deprecated;
+
u32 client_index;
u32 context;
bool is_add; /* 0 = remove, 1 = add association */
@@ -51,6 +53,40 @@ autoreply define memif_socket_filename_add_del
option vat_help = "[add|del] id <id> filename <file>";
};
+/** \brief Create or remove named socket file for memif interfaces
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - 0 = remove, 1 = add association
+ @param socket_id - non-0 32-bit integer used to identify a socket file
+ ~0 means autogenerate
+ @param socket_filename - filename of the socket to be used for connection
+ establishment; id 0 always maps to default "/var/vpp/memif.sock";
+ no socket filename needed when is_add == 0.
+ socket_filename starting with '@' will create an abstract socket
+ in the given namespace
+*/
+define memif_socket_filename_add_del_v2
+{
+ u32 client_index;
+ u32 context;
+ bool is_add; /* 0 = remove, 1 = add association */
+ u32 socket_id [default=0xffffffff]; /* unique non-0 id for given socket file name */
+ string socket_filename[]; /* NUL terminated filename */
+ option vat_help = "[add|del] id <id> filename <file>";
+};
+
+/** \brief Create memory interface socket file response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+ @param socket_id - non-0 32-bit integer used to identify a socket file
+*/
+define memif_socket_filename_add_del_v2_reply
+{
+ u32 context;
+ i32 retval;
+ u32 socket_id;
+};
+
/** \brief Create memory interface
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -70,6 +106,8 @@ autoreply define memif_socket_filename_add_del
*/
define memif_create
{
+ option deprecated;
+
u32 client_index;
u32 context;
@@ -94,6 +132,58 @@ define memif_create
*/
define memif_create_reply
{
+ option deprecated;
+
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief Create memory interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param role - role of the interface in the connection (master/slave)
+ @param mode - interface mode
+ @param rx_queues - number of rx queues (only valid for slave)
+ @param tx_queues - number of tx queues (only valid for slave)
+ @param id - 32bit integer used to authenticate and match opposite sides
+ of the connection
+ @param socket_id - socket filename id to be used for connection
+ establishment
+ @param ring_size - the number of entries of RX/TX rings
+ @param buffer_size - size of the buffer allocated for each ring entry
+ @param no_zero_copy - if true, disable zero copy
+ @param use_dma - if true, use dma accelerate memory copy
+ @param hw_addr - interface MAC address
+ @param secret - optional, default is "", max length 24
+*/
+define memif_create_v2
+{
+ u32 client_index;
+ u32 context;
+
+ vl_api_memif_role_t role; /* 0 = master, 1 = slave */
+ vl_api_memif_mode_t mode; /* 0 = ethernet, 1 = ip, 2 = punt/inject */
+ u8 rx_queues; /* optional, default is 1 */
+ u8 tx_queues; /* optional, default is 1 */
+ u32 id; /* optional, default is 0 */
+ u32 socket_id; /* optional, default is 0, "/var/vpp/memif.sock" */
+ u32 ring_size; /* optional, default is 1024 entries, must be power of 2 */
+ u16 buffer_size; /* optional, default is 2048 bytes */
+ bool no_zero_copy; /* disable zero copy */
+ bool use_dma; /* use dma acceleration */
+ vl_api_mac_address_t hw_addr; /* optional, randomly generated if zero */
+ string secret[24]; /* optional, default is "", max length 24 */
+ option vat_help = "[id <id>] [socket-id <id>] [ring_size <size>] [buffer_size <size>] [hw_addr <mac_address>] [secret <string>] [mode ip] <master|slave>";
+};
+
+/** \brief Create memory interface response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+ @param sw_if_index - software index of the newly created interface
+*/
+define memif_create_v2_reply
+{
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c
index 9bbbe7f9d89..7e3dd44db2c 100644
--- a/src/plugins/memif/memif.c
+++ b/src/plugins/memif/memif.c
@@ -26,7 +26,6 @@
#include <sys/un.h>
#include <sys/uio.h>
#include <sys/mman.h>
-#include <sys/prctl.h>
#include <sys/eventfd.h>
#include <inttypes.h>
#include <limits.h>
@@ -36,6 +35,7 @@
#include <vnet/plugin/plugin.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
#include <vpp/app/version.h>
#include <memif/memif.h>
#include <memif/private.h>
@@ -49,6 +49,14 @@ memif_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
return 0;
}
+static clib_error_t *
+memif_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+ u32 flags)
+{
+ /* nothing for now */
+ return 0;
+}
+
static void
memif_queue_intfd_close (memif_queue_t * mq)
{
@@ -91,6 +99,8 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err)
memif_region_t *mr;
memif_queue_t *mq;
int i;
+ vlib_main_t *vm = vlib_get_main ();
+ int with_barrier = 0;
if (mif == 0)
return;
@@ -132,7 +142,12 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err)
clib_mem_free (mif->sock);
}
- /* *INDENT-OFF* */
+ if (vlib_worker_thread_barrier_held () == 0)
+ {
+ with_barrier = 1;
+ vlib_worker_thread_barrier_sync (vm);
+ }
+
vec_foreach_index (i, mif->rx_queues)
{
mq = vec_elt_at_index (mif->rx_queues, i);
@@ -146,9 +161,7 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err)
}
}
vnet_hw_if_unregister_all_rx_queues (vnm, mif->hw_if_index);
- vnet_hw_if_update_runtime_data (vnm, mif->hw_if_index);
- /* *INDENT-OFF* */
vec_foreach_index (i, mif->tx_queues)
{
mq = vec_elt_at_index (mif->tx_queues, i);
@@ -158,9 +171,12 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err)
{
memif_disconnect_free_zc_queue_buffer(mq, 0);
}
+ clib_spinlock_free (&mq->lockp);
}
mq->ring = 0;
}
+ vnet_hw_if_unregister_all_tx_queues (vnm, mif->hw_if_index);
+ vnet_hw_if_update_runtime_data (vnm, mif->hw_if_index);
/* free tx and rx queues */
vec_foreach (mq, mif->rx_queues)
@@ -182,11 +198,13 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err)
if (mr->fd > -1)
close (mr->fd);
}
- /* *INDENT-ON* */
vec_free (mif->regions);
vec_free (mif->remote_name);
vec_free (mif->remote_if_name);
clib_fifo_free (mif->msg_queue);
+
+ if (with_barrier)
+ vlib_worker_thread_barrier_release (vm);
}
static clib_error_t *
@@ -228,19 +246,22 @@ memif_int_fd_read_ready (clib_file_t * uf)
clib_error_t *
memif_connect (memif_if_t * mif)
{
+ memif_main_t *mm = &memif_main;
vlib_main_t *vm = vlib_get_main ();
vnet_main_t *vnm = vnet_get_main ();
clib_file_t template = { 0 };
memif_region_t *mr;
- int i;
+ int i, j;
+ u32 n_txqs = 0, n_threads = vlib_get_n_threads ();
clib_error_t *err = NULL;
+ u8 max_log2_ring_sz = 0;
+ int with_barrier = 0;
memif_log_debug (mif, "connect %u", mif->dev_instance);
vec_free (mif->local_disc_string);
vec_free (mif->remote_disc_string);
- /* *INDENT-OFF* */
vec_foreach (mr, mif->regions)
{
if (mr->shm)
@@ -259,15 +280,21 @@ memif_connect (memif_if_t * mif)
goto error;
}
}
- /* *INDENT-ON* */
template.read_function = memif_int_fd_read_ready;
template.write_function = memif_int_fd_write_ready;
- /* *INDENT-OFF* */
+ with_barrier = 1;
+ if (vlib_worker_thread_barrier_held ())
+ with_barrier = 0;
+
+ if (with_barrier)
+ vlib_worker_thread_barrier_sync (vm);
+
vec_foreach_index (i, mif->tx_queues)
{
memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i);
+ max_log2_ring_sz = clib_max (max_log2_ring_sz, mq->log2_ring_size);
mq->ring = mif->regions[mq->region].shm + mq->offset;
if (mq->ring->cookie != MEMIF_COOKIE)
@@ -275,6 +302,50 @@ memif_connect (memif_if_t * mif)
err = clib_error_return (0, "wrong cookie on tx ring %u", i);
goto error;
}
+ mq->queue_index =
+ vnet_hw_if_register_tx_queue (vnm, mif->hw_if_index, i);
+ clib_spinlock_init (&mq->lockp);
+
+ if (mif->flags & MEMIF_IF_FLAG_USE_DMA)
+ {
+ memif_dma_info_t *dma_info;
+ mq->dma_head = 0;
+ mq->dma_tail = 0;
+ mq->dma_info_head = 0;
+ mq->dma_info_tail = 0;
+ mq->dma_info_size = MEMIF_DMA_INFO_SIZE;
+ vec_validate_aligned (mq->dma_info, MEMIF_DMA_INFO_SIZE,
+ CLIB_CACHE_LINE_BYTES);
+
+ vec_foreach (dma_info, mq->dma_info)
+ {
+ vec_validate_aligned (dma_info->data.desc_data,
+ pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (dma_info->data.desc_len,
+ pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (dma_info->data.desc_status,
+ pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (dma_info->data.copy_ops, 0,
+ CLIB_CACHE_LINE_BYTES);
+ vec_reset_length (dma_info->data.copy_ops);
+ vec_validate_aligned (dma_info->data.buffers, 0,
+ CLIB_CACHE_LINE_BYTES);
+ vec_reset_length (dma_info->data.buffers);
+ }
+ }
+ }
+
+ if (vec_len (mif->tx_queues) > 0)
+ {
+ n_txqs = vec_len (mif->tx_queues);
+ for (j = 0; j < n_threads; j++)
+ {
+ u32 qi = mif->tx_queues[j % n_txqs].queue_index;
+ vnet_hw_if_tx_queue_assign_thread (vnm, qi, j);
+ }
}
vec_foreach_index (i, mif->rx_queues)
@@ -284,6 +355,8 @@ memif_connect (memif_if_t * mif)
u32 qi;
int rv;
+ max_log2_ring_sz = clib_max (max_log2_ring_sz, mq->log2_ring_size);
+
mq->ring = mif->regions[mq->region].shm + mq->offset;
if (mq->ring->cookie != MEMIF_COOKIE)
{
@@ -293,6 +366,37 @@ memif_connect (memif_if_t * mif)
qi = vnet_hw_if_register_rx_queue (vnm, mif->hw_if_index, i,
VNET_HW_IF_RXQ_THREAD_ANY);
mq->queue_index = qi;
+
+ if (mif->flags & MEMIF_IF_FLAG_USE_DMA)
+ {
+ memif_dma_info_t *dma_info;
+ mq->dma_head = 0;
+ mq->dma_tail = 0;
+ mq->dma_info_head = 0;
+ mq->dma_info_tail = 0;
+ mq->dma_info_size = MEMIF_DMA_INFO_SIZE;
+ vec_validate_aligned (mq->dma_info, MEMIF_DMA_INFO_SIZE,
+ CLIB_CACHE_LINE_BYTES);
+ vec_foreach (dma_info, mq->dma_info)
+ {
+ vec_validate_aligned (dma_info->data.desc_data,
+ pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (dma_info->data.desc_len,
+ pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (dma_info->data.desc_status,
+ pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (dma_info->data.copy_ops, 0,
+ CLIB_CACHE_LINE_BYTES);
+ vec_reset_length (dma_info->data.copy_ops);
+ vec_validate_aligned (dma_info->data.buffers, 0,
+ CLIB_CACHE_LINE_BYTES);
+ vec_reset_length (dma_info->data.buffers);
+ }
+ }
+
if (mq->int_fd > -1)
{
template.file_descriptor = mq->int_fd;
@@ -324,7 +428,23 @@ memif_connect (memif_if_t * mif)
vnet_hw_if_rx_queue_set_int_pending (vnm, qi);
}
}
- /* *INDENT-ON* */
+
+ if (1 << max_log2_ring_sz > vec_len (mm->per_thread_data[0].desc_data))
+ {
+ memif_per_thread_data_t *ptd;
+
+ vec_foreach (ptd, mm->per_thread_data)
+ {
+ vec_validate_aligned (ptd->desc_data, pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (ptd->desc_len, pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (ptd->desc_status, pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ }
+ }
+ if (with_barrier)
+ vlib_worker_thread_barrier_release (vm);
mif->flags &= ~MEMIF_IF_FLAG_CONNECTING;
mif->flags |= MEMIF_IF_FLAG_CONNECTED;
@@ -334,6 +454,8 @@ memif_connect (memif_if_t * mif)
return 0;
error:
+ if (with_barrier)
+ vlib_worker_thread_barrier_release (vm);
memif_log_err (mif, "%U", format_clib_error, err);
return err;
}
@@ -405,7 +527,6 @@ memif_init_regions_and_queues (memif_if_t * mif)
if (mif->flags & MEMIF_IF_FLAG_ZERO_COPY)
{
vlib_buffer_pool_t *bp;
- /* *INDENT-OFF* */
vec_foreach (bp, vm->buffer_main->buffer_pools)
{
vlib_physmem_map_t *pm;
@@ -416,7 +537,6 @@ memif_init_regions_and_queues (memif_if_t * mif)
r->shm = pm->base;
r->is_external = 1;
}
- /* *INDENT-ON* */
}
for (i = 0; i < mif->run.num_s2m_rings; i++)
@@ -461,7 +581,6 @@ memif_init_regions_and_queues (memif_if_t * mif)
vec_validate_aligned (mif->tx_queues, mif->run.num_s2m_rings - 1,
CLIB_CACHE_LINE_BYTES);
- /* *INDENT-OFF* */
vec_foreach_index (i, mif->tx_queues)
{
memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i);
@@ -470,6 +589,7 @@ memif_init_regions_and_queues (memif_if_t * mif)
err = clib_error_return_unix (0, "eventfd[tx queue %u]", i);
goto error;
}
+
mq->int_clib_file_index = ~0;
mq->ring = memif_get_ring (mif, MEMIF_RING_S2M, i);
mq->log2_ring_size = mif->cfg.log2_ring_size;
@@ -481,13 +601,11 @@ memif_init_regions_and_queues (memif_if_t * mif)
vec_validate_aligned (mq->buffers, 1 << mq->log2_ring_size,
CLIB_CACHE_LINE_BYTES);
}
- /* *INDENT-ON* */
ASSERT (mif->rx_queues == 0);
vec_validate_aligned (mif->rx_queues, mif->run.num_m2s_rings - 1,
CLIB_CACHE_LINE_BYTES);
- /* *INDENT-OFF* */
vec_foreach_index (i, mif->rx_queues)
{
memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i);
@@ -507,7 +625,6 @@ memif_init_regions_and_queues (memif_if_t * mif)
vec_validate_aligned (mq->buffers, 1 << mq->log2_ring_size,
CLIB_CACHE_LINE_BYTES);
}
- /* *INDENT-ON* */
return 0;
@@ -558,7 +675,6 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
}
last_run_duration = start_time = vlib_time_now (vm);
- /* *INDENT-OFF* */
pool_foreach (mif, mm->interfaces)
{
memif_socket_file_t * msf = vec_elt_at_index (mm->socket_files, mif->socket_file_index);
@@ -583,8 +699,8 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
{
clib_memset (sock, 0, sizeof(clib_socket_t));
sock->config = (char *) msf->filename;
- sock->flags = CLIB_SOCKET_F_IS_CLIENT | CLIB_SOCKET_F_SEQPACKET |
- CLIB_SOCKET_F_BLOCKING;
+ sock->is_seqpacket = 1;
+ sock->is_blocking = 1;
if ((err = clib_socket_init (sock)))
{
@@ -611,162 +727,160 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
}
}
}
- /* *INDENT-ON* */
last_run_duration = vlib_time_now (vm) - last_run_duration;
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (memif_process_node,static) = {
.function = memif_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "memif-process",
};
-/* *INDENT-ON* */
-static int
-memif_add_socket_file (u32 sock_id, u8 * socket_filename)
+/*
+ * Returns an unused socket id, and ~0 if it can't find one.
+ */
+u32
+memif_get_unused_socket_id ()
{
memif_main_t *mm = &memif_main;
uword *p;
- memif_socket_file_t *msf;
+ int i, j;
- p = hash_get (mm->socket_file_index_by_sock_id, sock_id);
- if (p)
+ static u32 seed = 0;
+ /* limit to 1M tries */
+ for (j = 0; j < 1 << 10; j++)
{
- msf = pool_elt_at_index (mm->socket_files, *p);
- if (strcmp ((char *) msf->filename, (char *) socket_filename) == 0)
+ seed = random_u32 (&seed);
+ for (i = 0; i < 1 << 10; i++)
{
- /* Silently accept identical "add". */
- return 0;
+ /* look around randomly generated id */
+ seed += (2 * (i % 2) - 1) * i;
+ if (seed == (u32) ~0)
+ continue;
+ p = hash_get (mm->socket_file_index_by_sock_id, seed);
+ if (!p)
+ return seed;
}
-
- /* But don't allow a direct add of a different filename. */
- return VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
}
- pool_get (mm->socket_files, msf);
- clib_memset (msf, 0, sizeof (memif_socket_file_t));
-
- msf->filename = socket_filename;
- msf->socket_id = sock_id;
-
- hash_set (mm->socket_file_index_by_sock_id, sock_id,
- msf - mm->socket_files);
-
- return 0;
+ return ~0;
}
-static int
-memif_delete_socket_file (u32 sock_id)
+clib_error_t *
+memif_socket_filename_add_del (u8 is_add, u32 sock_id, char *sock_filename)
{
memif_main_t *mm = &memif_main;
uword *p;
memif_socket_file_t *msf;
+ clib_error_t *err = 0;
+ char *dir = 0, *tmp;
+ u32 idx = 0;
+ u8 *name = 0;
- p = hash_get (mm->socket_file_index_by_sock_id, sock_id);
- if (!p)
- {
- /* Don't delete non-existent entries. */
- return VNET_API_ERROR_INVALID_ARGUMENT;
- }
+ /* allow adding socket id 0 */
+ if (sock_id == 0 && is_add == 0)
+ return vnet_error (VNET_ERR_INVALID_ARGUMENT, "cannot delete socket id 0");
- msf = pool_elt_at_index (mm->socket_files, *p);
- if (msf->ref_cnt > 0)
+ if (sock_id == ~0)
+ return vnet_error (VNET_ERR_INVALID_ARGUMENT,
+ "socked id is not specified");
+
+ if (is_add == 0)
{
- return VNET_API_ERROR_UNEXPECTED_INTF_STATE;
- }
+ p = hash_get (mm->socket_file_index_by_sock_id, sock_id);
+ if (!p)
+ /* Don't delete non-existent entries. */
+ return vnet_error (VNET_ERR_INVALID_ARGUMENT,
+ "socket file with id %u does not exist", sock_id);
- vec_free (msf->filename);
- pool_put (mm->socket_files, msf);
+ msf = pool_elt_at_index (mm->socket_files, *p);
+ if (msf->ref_cnt > 0)
+ return vnet_error (VNET_ERR_UNEXPECTED_INTF_STATE,
+ "socket file '%s' is in use", msf->filename);
- hash_unset (mm->socket_file_index_by_sock_id, sock_id);
+ vec_free (msf->filename);
+ pool_put (mm->socket_files, msf);
- return 0;
-}
-
-int
-memif_socket_filename_add_del (u8 is_add, u32 sock_id, u8 * sock_filename)
-{
- char *dir = 0, *tmp;
- u32 idx = 0;
+ hash_unset (mm->socket_file_index_by_sock_id, sock_id);
- /* allow adding socket id 0 */
- if ((sock_id == 0 && is_add == 0) || sock_id == ~0)
- {
- return VNET_API_ERROR_INVALID_ARGUMENT;
+ return 0;
}
- if (is_add == 0)
+ if (sock_filename == 0 || sock_filename[0] == 0)
+ return vnet_error (VNET_ERR_INVALID_ARGUMENT,
+ "socket filename not specified");
+
+ if (clib_socket_prefix_is_valid (sock_filename))
{
- return memif_delete_socket_file (sock_id);
+ name = format (0, "%s%c", sock_filename, 0);
}
-
- if (sock_filename == 0 || sock_filename[0] == 0)
+ else if (sock_filename[0] == '/')
{
- return VNET_API_ERROR_INVALID_ARGUMENT;
+ name = format (0, "%s%c", sock_filename, 0);
}
-
- if (sock_filename[0] != '/')
+ else
{
- clib_error_t *error;
-
/* copy runtime dir path */
vec_add (dir, vlib_unix_get_runtime_dir (),
strlen (vlib_unix_get_runtime_dir ()));
vec_add1 (dir, '/');
/* if sock_filename contains dirs, add them to path */
- tmp = strrchr ((char *) sock_filename, '/');
+ tmp = strrchr (sock_filename, '/');
if (tmp)
{
- idx = tmp - (char *) sock_filename;
+ idx = tmp - sock_filename;
vec_add (dir, sock_filename, idx);
}
vec_add1 (dir, '\0');
/* create socket dir */
- error = vlib_unix_recursive_mkdir (dir);
- if (error)
+ if ((err = vlib_unix_recursive_mkdir (dir)))
{
- clib_error_free (error);
- return VNET_API_ERROR_SYSCALL_ERROR_1;
+ clib_error_free (err);
+ err = vnet_error (VNET_ERR_SYSCALL_ERROR_1,
+ "unable to create socket dir");
+ goto done;
}
- sock_filename = format (0, "%s/%s%c", vlib_unix_get_runtime_dir (),
- sock_filename, 0);
+ name =
+ format (0, "%s/%s%c", vlib_unix_get_runtime_dir (), sock_filename, 0);
}
- else
- {
- sock_filename = vec_dup (sock_filename);
- /* check if directory exists */
- tmp = strrchr ((char *) sock_filename, '/');
- if (tmp)
+ p = hash_get (mm->socket_file_index_by_sock_id, sock_id);
+ if (p)
+ {
+ msf = pool_elt_at_index (mm->socket_files, *p);
+ if (strcmp ((char *) msf->filename, (char *) name) == 0)
{
- idx = tmp - (char *) sock_filename;
- vec_add (dir, sock_filename, idx);
- vec_add1 (dir, '\0');
+ /* Silently accept identical "add". */
+ goto done;
}
- /* check dir existance and access rights for effective user/group IDs */
- if ((dir == NULL)
- ||
- (faccessat ( /* ignored */ -1, dir, F_OK | R_OK | W_OK, AT_EACCESS)
- < 0))
- {
- vec_free (dir);
- return VNET_API_ERROR_INVALID_ARGUMENT;
- }
+ /* But don't allow a direct add of a different filename. */
+ err = vnet_error (VNET_ERR_ENTRY_ALREADY_EXISTS, "entry already exists");
+ goto done;
}
- vec_free (dir);
- return memif_add_socket_file (sock_id, sock_filename);
+ pool_get (mm->socket_files, msf);
+ clib_memset (msf, 0, sizeof (memif_socket_file_t));
+
+ msf->filename = name;
+ msf->socket_id = sock_id;
+ name = 0;
+
+ hash_set (mm->socket_file_index_by_sock_id, sock_id, msf - mm->socket_files);
+
+done:
+ vec_free (name);
+ vec_free (dir);
+ return err;
}
-int
-memif_delete_if (vlib_main_t * vm, memif_if_t * mif)
+clib_error_t *
+memif_delete_if (vlib_main_t *vm, memif_if_t *mif)
{
vnet_main_t *vnm = vnet_get_main ();
memif_main_t *mm = &memif_main;
@@ -797,7 +911,6 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif)
}
/* free interface data structures */
- clib_spinlock_free (&mif->lockp);
mhash_unset (&msf->dev_instance_by_id, &mif->id, 0);
/* remove socket file */
@@ -806,10 +919,8 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif)
if (msf->is_listener)
{
int i;
- /* *INDENT-OFF* */
vec_foreach_index (i, msf->pending_clients)
memif_socket_close (msf->pending_clients + i);
- /* *INDENT-ON* */
memif_socket_close (&msf->sock);
vec_free (msf->pending_clients);
}
@@ -827,6 +938,7 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif)
}
}
+ vec_free (mif->local_disc_string);
clib_memset (mif, 0, sizeof (*mif));
pool_put (mm->interfaces, mif);
@@ -837,33 +949,39 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif)
return 0;
}
-/* *INDENT-OFF* */
-VNET_HW_INTERFACE_CLASS (memif_ip_hw_if_class, static) =
-{
+VNET_HW_INTERFACE_CLASS (memif_ip_hw_if_class, static) = {
.name = "memif-ip",
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+ .tx_hash_fn_type = VNET_HASH_FN_TYPE_IP,
};
-/* *INDENT-ON* */
-int
-memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
+static void
+memif_prepare_dma_args (vlib_dma_config_t *args)
+{
+ args->max_batches = 256;
+ args->max_transfer_size = VLIB_BUFFER_DEFAULT_DATA_SIZE;
+ args->barrier_before_last = 1;
+ args->sw_fallback = 1;
+ args->callback_fn = NULL;
+}
+
+clib_error_t *
+memif_create_if (vlib_main_t *vm, memif_create_if_args_t *args)
{
memif_main_t *mm = &memif_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
vnet_main_t *vnm = vnet_get_main ();
+ vnet_eth_interface_registration_t eir = {};
memif_if_t *mif = 0;
vnet_sw_interface_t *sw;
- clib_error_t *error = 0;
- int ret = 0;
uword *p;
- vnet_hw_interface_t *hw;
memif_socket_file_t *msf = 0;
- int rv = 0;
+ clib_error_t *err = 0;
p = hash_get (mm->socket_file_index_by_sock_id, args->socket_id);
if (p == 0)
{
- rv = VNET_API_ERROR_INVALID_ARGUMENT;
+ err = vnet_error (VNET_ERR_INVALID_ARGUMENT, "unknown socket id");
goto done;
}
@@ -874,14 +992,17 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
{
if ((!msf->is_listener != !args->is_master))
{
- rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+ err =
+ vnet_error (VNET_ERR_SUBIF_ALREADY_EXISTS,
+ "socket file cannot be used by both master and slave");
goto done;
}
p = mhash_get (&msf->dev_instance_by_id, &args->id);
if (p)
{
- rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+ err = vnet_error (VNET_ERR_SUBIF_ALREADY_EXISTS,
+ "interface already exists");
goto done;
}
}
@@ -889,25 +1010,6 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
/* Create new socket file */
if (msf->ref_cnt == 0)
{
- struct stat file_stat;
-
- /* If we are creating listener make sure file doesn't exist or if it
- * exists thn delete it if it is old socket file */
- if (args->is_master && (stat ((char *) msf->filename, &file_stat) == 0))
- {
- if (S_ISSOCK (file_stat.st_mode))
- {
- unlink ((char *) msf->filename);
- }
- else
- {
- error = clib_error_return (0, "File exists for %s",
- msf->filename);
- rv = VNET_API_ERROR_VALUE_EXIST;
- goto done;
- }
- }
-
mhash_init (&msf->dev_instance_by_id, sizeof (uword),
sizeof (memif_interface_id_t));
msf->dev_instance_by_fd = hash_create (0, sizeof (uword));
@@ -933,8 +1035,6 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
bt->total_length_not_including_first_buffer = 0;
vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- /* initially prealloc copy_ops so we can use
- _vec_len instead of vec_elen */
vec_validate_aligned (ptd->copy_ops, 0, CLIB_CACHE_LINE_BYTES);
vec_reset_length (ptd->copy_ops);
vec_validate_aligned (ptd->buffers, 0, CLIB_CACHE_LINE_BYTES);
@@ -952,8 +1052,19 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
if (args->secret)
mif->secret = vec_dup (args->secret);
- if (tm->n_vlib_mains > 1)
- clib_spinlock_init (&mif->lockp);
+ /* register dma config if enabled */
+ if (args->use_dma)
+ {
+ vlib_dma_config_t dma_args;
+ bzero (&dma_args, sizeof (dma_args));
+ memif_prepare_dma_args (&dma_args);
+
+ dma_args.max_transfers = 1 << args->log2_ring_size;
+ dma_args.callback_fn = memif_dma_completion_cb;
+ mif->dma_input_config = vlib_dma_config_add (vm, &dma_args);
+ dma_args.callback_fn = memif_tx_dma_completion_cb;
+ mif->dma_tx_config = vlib_dma_config_add (vm, &dma_args);
+ }
if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET)
{
@@ -969,10 +1080,13 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
args->hw_addr[0] = 2;
args->hw_addr[1] = 0xfe;
}
- error = ethernet_register_interface (vnm, memif_device_class.index,
- mif->dev_instance, args->hw_addr,
- &mif->hw_if_index,
- memif_eth_flag_change);
+
+ eir.dev_class_index = memif_device_class.index;
+ eir.dev_instance = mif->dev_instance;
+ eir.address = args->hw_addr;
+ eir.cb.flag_change = memif_eth_flag_change;
+ eir.cb.set_max_frame_size = memif_eth_set_max_frame_size;
+ mif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
}
else if (mif->mode == MEMIF_INTERFACE_MODE_IP)
{
@@ -983,11 +1097,9 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
mif->dev_instance);
}
else
- error = clib_error_return (0, "unsupported interface mode");
-
- if (error)
{
- ret = VNET_API_ERROR_SYSCALL_ERROR_2;
+ err =
+ vnet_error (VNET_ERR_SYSCALL_ERROR_2, "unsupported interface mode");
goto error;
}
@@ -1006,7 +1118,6 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
/* If this is new one, start listening */
if (msf->is_listener && msf->ref_cnt == 0)
{
- struct stat file_stat;
clib_socket_t *s = clib_mem_alloc (sizeof (clib_socket_t));
ASSERT (msf->sock == 0);
@@ -1014,19 +1125,15 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
clib_memset (s, 0, sizeof (clib_socket_t));
s->config = (char *) msf->filename;
- s->flags = CLIB_SOCKET_F_IS_SERVER |
- CLIB_SOCKET_F_ALLOW_GROUP_WRITE |
- CLIB_SOCKET_F_SEQPACKET | CLIB_SOCKET_F_PASSCRED;
+ s->local_only = 1;
+ s->is_server = 1;
+ s->allow_group_write = 1;
+ s->is_seqpacket = 1;
+ s->passcred = 1;
- if ((error = clib_socket_init (s)))
+ if ((err = clib_socket_init (s)))
{
- ret = VNET_API_ERROR_SYSCALL_ERROR_4;
- goto error;
- }
-
- if (stat ((char *) msf->filename, &file_stat) == -1)
- {
- ret = VNET_API_ERROR_SYSCALL_ERROR_8;
+ err->code = VNET_ERR_SYSCALL_ERROR_4;
goto error;
}
@@ -1047,8 +1154,10 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
mif->flags |= MEMIF_IF_FLAG_ZERO_COPY;
}
- hw = vnet_get_hw_interface (vnm, mif->hw_if_index);
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ if (args->use_dma)
+ mif->flags |= MEMIF_IF_FLAG_USE_DMA;
+
+ vnet_hw_if_set_caps (vnm, mif->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
vnet_hw_if_set_input_node (vnm, mif->hw_if_index, memif_input_node.index);
mhash_set (&msf->dev_instance_by_id, &mif->id, mif->dev_instance, 0);
@@ -1061,15 +1170,12 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
error:
memif_delete_if (vm, mif);
- if (error)
- {
- memif_log_err (mif, "%U", format_clib_error, error);
- clib_error_free (error);
- }
- return ret;
+ if (err)
+ memif_log_err (mif, "%U", format_clib_error, err);
+ return err;
done:
- return rv;
+ return err;
}
clib_error_t *
@@ -1081,7 +1187,14 @@ memif_interface_admin_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
static clib_error_t *error = 0;
if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
- mif->flags |= MEMIF_IF_FLAG_ADMIN_UP;
+ {
+ if (mif->flags & MEMIF_IF_FLAG_CONNECTED)
+ {
+ vnet_hw_interface_set_flags (vnm, mif->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ }
+ mif->flags |= MEMIF_IF_FLAG_ADMIN_UP;
+ }
else
mif->flags &= ~MEMIF_IF_FLAG_ADMIN_UP;
@@ -1108,19 +1221,15 @@ memif_init (vlib_main_t * vm)
* for socket-id 0 to MEMIF_DEFAULT_SOCKET_FILENAME in the
* default run-time directory.
*/
- memif_socket_filename_add_del (1, 0, (u8 *) MEMIF_DEFAULT_SOCKET_FILENAME);
-
- return 0;
+ return memif_socket_filename_add_del (1, 0, MEMIF_DEFAULT_SOCKET_FILENAME);
}
VLIB_INIT_FUNCTION (memif_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Packet Memory Interface (memif) -- Experimental",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/memif/memif_api.c b/src/plugins/memif/memif_api.c
index a50e7ce8882..16a52ffc0dc 100644
--- a/src/plugins/memif/memif_api.c
+++ b/src/plugins/memif/memif_api.c
@@ -48,8 +48,6 @@ void
memif_main_t *mm = &memif_main;
u8 is_add;
u32 socket_id;
- u32 len;
- u8 *socket_filename;
vl_api_memif_socket_filename_add_del_reply_t *rmp;
int rv;
@@ -65,24 +63,52 @@ void
}
/* socket filename */
- socket_filename = 0;
mp->socket_filename[ARRAY_LEN (mp->socket_filename) - 1] = 0;
- len = strlen ((char *) mp->socket_filename);
- if (mp->is_add)
+
+ rv = vnet_get_api_error_and_free (memif_socket_filename_add_del (
+ is_add, socket_id, (char *) mp->socket_filename));
+
+reply:
+ REPLY_MACRO (VL_API_MEMIF_SOCKET_FILENAME_ADD_DEL_REPLY);
+}
+
+/**
+ * @brief Message handler for memif_socket_filename_add_del API.
+ * @param mp the vl_api_memif_socket_filename_add_del_t API message
+ */
+void
+vl_api_memif_socket_filename_add_del_v2_t_handler (
+ vl_api_memif_socket_filename_add_del_v2_t *mp)
+{
+ vl_api_memif_socket_filename_add_del_v2_reply_t *rmp;
+ memif_main_t *mm = &memif_main;
+ char *socket_filename = 0;
+ u32 socket_id;
+ int rv;
+
+ /* socket_id */
+ socket_id = clib_net_to_host_u32 (mp->socket_id);
+ if (socket_id == 0)
{
- vec_validate (socket_filename, len);
- memcpy (socket_filename, mp->socket_filename, len);
+ rv = VNET_API_ERROR_INVALID_ARGUMENT;
+ goto reply;
}
- rv = memif_socket_filename_add_del (is_add, socket_id, socket_filename);
+ /* socket filename */
+ socket_filename = vl_api_from_api_to_new_c_string (&mp->socket_filename);
+ if (mp->is_add && socket_id == (u32) ~0)
+ socket_id = memif_get_unused_socket_id ();
+
+ rv = vnet_get_api_error_and_free (
+ memif_socket_filename_add_del (mp->is_add, socket_id, socket_filename));
vec_free (socket_filename);
reply:
- REPLY_MACRO (VL_API_MEMIF_SOCKET_FILENAME_ADD_DEL_REPLY);
+ REPLY_MACRO2 (VL_API_MEMIF_SOCKET_FILENAME_ADD_DEL_V2_REPLY,
+ ({ rmp->socket_id = htonl (socket_id); }));
}
-
/**
* @brief Message handler for memif_create API.
* @param mp vl_api_memif_create_t * mp the api message
@@ -164,17 +190,107 @@ vl_api_memif_create_t_handler (vl_api_memif_create_t * mp)
args.hw_addr_set = 1;
}
- rv = memif_create_if (vm, &args);
+ rv = vnet_get_api_error_and_free (memif_create_if (vm, &args));
vec_free (args.secret);
reply:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_MEMIF_CREATE_REPLY,
({
rmp->sw_if_index = htonl (args.sw_if_index);
}));
- /* *INDENT-ON* */
+}
+
+/**
+ * @brief Message handler for memif_create_v2 API.
+ * @param mp vl_api_memif_create_v2_t * mp the api message
+ */
+void
+vl_api_memif_create_v2_t_handler (vl_api_memif_create_v2_t *mp)
+{
+ memif_main_t *mm = &memif_main;
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_memif_create_reply_t *rmp;
+ memif_create_if_args_t args = { 0 };
+ u32 ring_size = MEMIF_DEFAULT_RING_SIZE;
+ static const u8 empty_hw_addr[6];
+ int rv = 0;
+ mac_address_t mac;
+
+ /* id */
+ args.id = clib_net_to_host_u32 (mp->id);
+
+ /* socket-id */
+ args.socket_id = clib_net_to_host_u32 (mp->socket_id);
+
+ /* secret */
+ mp->secret[ARRAY_LEN (mp->secret) - 1] = 0;
+ if (strlen ((char *) mp->secret) > 0)
+ {
+ vec_validate (args.secret, strlen ((char *) mp->secret));
+ strncpy ((char *) args.secret, (char *) mp->secret,
+ vec_len (args.secret));
+ }
+
+ /* role */
+ args.is_master = (ntohl (mp->role) == MEMIF_ROLE_API_MASTER);
+
+ /* mode */
+ args.mode = ntohl (mp->mode);
+
+ args.is_zero_copy = mp->no_zero_copy ? 0 : 1;
+
+ args.use_dma = mp->use_dma;
+
+ /* rx/tx queues */
+ if (args.is_master == 0)
+ {
+ args.rx_queues = MEMIF_DEFAULT_RX_QUEUES;
+ args.tx_queues = MEMIF_DEFAULT_TX_QUEUES;
+ if (mp->rx_queues)
+ {
+ args.rx_queues = mp->rx_queues;
+ }
+ if (mp->tx_queues)
+ {
+ args.tx_queues = mp->tx_queues;
+ }
+ }
+
+ /* ring size */
+ if (mp->ring_size)
+ {
+ ring_size = ntohl (mp->ring_size);
+ }
+ if (!is_pow2 (ring_size))
+ {
+ rv = VNET_API_ERROR_INVALID_ARGUMENT;
+ goto reply;
+ }
+ args.log2_ring_size = min_log2 (ring_size);
+
+ /* buffer size */
+ args.buffer_size = MEMIF_DEFAULT_BUFFER_SIZE;
+ if (mp->buffer_size)
+ {
+ args.buffer_size = ntohs (mp->buffer_size);
+ }
+
+ /* MAC address */
+ mac_address_decode (mp->hw_addr, &mac);
+ if (memcmp (&mac, empty_hw_addr, 6) != 0)
+ {
+ memcpy (args.hw_addr, &mac, 6);
+ args.hw_addr_set = 1;
+ }
+
+ rv = vnet_api_error (memif_create_if (vm, &args));
+
+ vec_free (args.secret);
+
+reply:
+ REPLY_MACRO2 (VL_API_MEMIF_CREATE_V2_REPLY,
+ ({ rmp->sw_if_index = htonl (args.sw_if_index); }));
}
/**
@@ -201,7 +317,7 @@ vl_api_memif_delete_t_handler (vl_api_memif_delete_t * mp)
else
{
mif = pool_elt_at_index (mm->interfaces, hi->dev_instance);
- rv = memif_delete_if (vm, mif);
+ rv = vnet_get_api_error_and_free (memif_delete_if (vm, mif));
}
REPLY_MACRO (VL_API_MEMIF_DELETE_REPLY);
@@ -279,7 +395,6 @@ vl_api_memif_dump_t_handler (vl_api_memif_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (mif, mm->interfaces)
{
swif = vnet_get_sw_interface (vnm, mif->sw_if_index);
@@ -289,9 +404,8 @@ vl_api_memif_dump_t_handler (vl_api_memif_dump_t * mp)
vnm, swif, 0);
send_memif_details (reg, mif, swif, if_name, mp->context);
- _vec_len (if_name) = 0;
+ vec_set_len (if_name, 0);
}
- /* *INDENT-ON* */
vec_free (if_name);
}
@@ -335,7 +449,6 @@ void
if (!reg)
return;
- /* *INDENT-OFF* */
hash_foreach (sock_id, msf_idx, mm->socket_file_index_by_sock_id,
({
memif_socket_file_t *msf;
@@ -345,7 +458,6 @@ void
filename = msf->filename;
send_memif_socket_filename_details(reg, sock_id, filename, mp->context);
}));
- /* *INDENT-ON* */
}
/* Set up the API message handling tables */
diff --git a/src/plugins/memif/memif_test.c b/src/plugins/memif/memif_test.c
index 1ec6703d135..d3290ac0340 100644
--- a/src/plugins/memif/memif_test.c
+++ b/src/plugins/memif/memif_test.c
@@ -33,8 +33,7 @@
#include <vnet/format_fns.h>
#include <memif/memif.api_enum.h>
#include <memif/memif.api_types.h>
-#include <vpp/api/vpe.api_types.h>
-//#include <vnet/ethernet/ethernet_types.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
@@ -122,6 +121,86 @@ api_memif_socket_filename_add_del (vat_main_t * vam)
return ret;
}
+/* memif_socket_filename_add_del API */
+static int
+api_memif_socket_filename_add_del_v2 (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_memif_socket_filename_add_del_v2_t *mp;
+ u8 is_add;
+ u32 socket_id;
+ u8 *socket_filename;
+ int ret;
+
+ is_add = 1;
+ socket_id = ~0;
+ socket_filename = 0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "id %u", &socket_id))
+ ;
+ else if (unformat (i, "filename %s", &socket_filename))
+ ;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "add"))
+ is_add = 1;
+ else
+ {
+ vec_free (socket_filename);
+ clib_warning ("unknown input `%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (socket_id == 0 || socket_id == ~0)
+ {
+ vec_free (socket_filename);
+ errmsg ("Invalid socket id");
+ return -99;
+ }
+
+ if (is_add && (!socket_filename || *socket_filename == 0))
+ {
+ vec_free (socket_filename);
+ errmsg ("Invalid socket filename");
+ return -99;
+ }
+
+ M2 (MEMIF_SOCKET_FILENAME_ADD_DEL_V2, mp, strlen ((char *) socket_filename));
+
+ mp->is_add = is_add;
+ mp->socket_id = htonl (socket_id);
+ char *p = (char *) &mp->socket_filename;
+ p += vl_api_vec_to_api_string (socket_filename, (vl_api_string_t *) p);
+
+ vec_free (socket_filename);
+
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
+/* memif socket-create reply handler */
+static void
+vl_api_memif_socket_filename_add_del_v2_reply_t_handler (
+ vl_api_memif_socket_filename_add_del_v2_reply_t *mp)
+{
+ vat_main_t *vam = memif_test_main.vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (retval == 0)
+ {
+ fformat (vam->ofp, "created memif socket with socket_id %d\n",
+ ntohl (mp->socket_id));
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
/* memif_socket_filename_add_del reply handler */
#define VL_API_MEMIF_SOCKET_FILENAME_ADD_DEL_REPLY_T_HANDLER
static void vl_api_memif_socket_filename_add_del_reply_t_handler
@@ -246,6 +325,120 @@ static void vl_api_memif_create_reply_t_handler
vam->regenerate_interface_table = 1;
}
+/* memif-create_v2 API */
+static int
+api_memif_create_v2 (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_memif_create_v2_t *mp;
+ u32 id = 0;
+ u32 socket_id = 0;
+ u8 *secret = 0;
+ u8 role = 1;
+ u32 ring_size = 0;
+ u8 use_dma = 0;
+ u32 buffer_size = 0;
+ u8 hw_addr[6] = { 0 };
+ u32 rx_queues = MEMIF_DEFAULT_RX_QUEUES;
+ u32 tx_queues = MEMIF_DEFAULT_TX_QUEUES;
+ int ret;
+ u8 mode = MEMIF_INTERFACE_MODE_ETHERNET;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "id %u", &id))
+ ;
+ else if (unformat (i, "socket-id %u", &socket_id))
+ ;
+ else if (unformat (i, "secret %s", &secret))
+ ;
+ else if (unformat (i, "ring_size %u", &ring_size))
+ ;
+ else if (unformat (i, "buffer_size %u", &buffer_size))
+ ;
+ else if (unformat (i, "master"))
+ role = 0;
+ else if (unformat (i, "use_dma %u", &use_dma))
+ ;
+ else if (unformat (i, "slave %U", unformat_memif_queues, &rx_queues,
+ &tx_queues))
+ role = 1;
+ else if (unformat (i, "mode ip"))
+ mode = MEMIF_INTERFACE_MODE_IP;
+ else if (unformat (i, "hw_addr %U", unformat_ethernet_address, hw_addr))
+ ;
+ else
+ {
+ clib_warning ("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (socket_id == ~0)
+ {
+ errmsg ("invalid socket-id\n");
+ return -99;
+ }
+
+ if (!is_pow2 (ring_size))
+ {
+ errmsg ("ring size must be power of 2\n");
+ return -99;
+ }
+
+ if (rx_queues > 255 || rx_queues < 1)
+ {
+ errmsg ("rx queue must be between 1 - 255\n");
+ return -99;
+ }
+
+ if (tx_queues > 255 || tx_queues < 1)
+ {
+ errmsg ("tx queue must be between 1 - 255\n");
+ return -99;
+ }
+
+ M2 (MEMIF_CREATE, mp, strlen ((char *) secret));
+
+ mp->mode = mode;
+ mp->id = clib_host_to_net_u32 (id);
+ mp->role = role;
+ mp->use_dma = use_dma;
+ mp->ring_size = clib_host_to_net_u32 (ring_size);
+ mp->buffer_size = clib_host_to_net_u16 (buffer_size & 0xffff);
+ mp->socket_id = clib_host_to_net_u32 (socket_id);
+
+ char *p = (char *) &mp->secret;
+ p += vl_api_vec_to_api_string (secret, (vl_api_string_t *) p);
+ vec_free (secret);
+
+ memcpy (mp->hw_addr, hw_addr, 6);
+ mp->rx_queues = rx_queues;
+ mp->tx_queues = tx_queues;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+/* memif-create_v2 reply handler */
+static void
+vl_api_memif_create_v2_reply_t_handler (vl_api_memif_create_reply_t *mp)
+{
+ vat_main_t *vam = memif_test_main.vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (retval == 0)
+ {
+ fformat (vam->ofp, "created memif with sw_if_index %d\n",
+ ntohl (mp->sw_if_index));
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+ vam->regenerate_interface_table = 1;
+}
+
/* memif-delete API */
static int
api_memif_delete (vat_main_t * vam)
diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c
index 8bb3758c5fd..70933f4aa9d 100644
--- a/src/plugins/memif/node.c
+++ b/src/plugins/memif/node.c
@@ -30,6 +30,8 @@
#include <memif/memif.h>
#include <memif/private.h>
+#define MEMIF_IP_OFFSET 14
+
#define foreach_memif_input_error \
_ (BUFFER_ALLOC_FAIL, buffer_alloc, ERROR, "buffer allocation failed") \
_ (BAD_DESC, bad_desc, ERROR, "bad descriptor") \
@@ -140,96 +142,141 @@ memif_add_to_chain (vlib_main_t * vm, vlib_buffer_t * b, u32 * buffers,
}
}
-static_always_inline uword
-memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
- memif_if_t *mif, memif_ring_type_t type, u16 qid,
- memif_interface_mode_t mode)
+static_always_inline u16
+memif_parse_desc (memif_per_thread_data_t *ptd, memif_if_t *mif,
+ memif_queue_t *mq, u16 next, u16 n_avail)
{
- vnet_main_t *vnm = vnet_get_main ();
- memif_main_t *mm = &memif_main;
- memif_ring_t *ring;
- memif_queue_t *mq;
- u16 buffer_size = vlib_buffer_get_default_data_size (vm);
- uword n_trace;
- u16 nexts[MEMIF_RX_VECTOR_SZ], *next = nexts;
- u32 _to_next_bufs[MEMIF_RX_VECTOR_SZ], *to_next_bufs = _to_next_bufs, *bi;
- u32 n_rx_packets = 0, n_rx_bytes = 0;
- u32 n_left, n_left_to_next;
- u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- vlib_buffer_t *b0, *b1, *b2, *b3;
- u32 thread_index = vm->thread_index;
- memif_per_thread_data_t *ptd = vec_elt_at_index (mm->per_thread_data,
- thread_index);
- vlib_buffer_t bt;
- u16 cur_slot, last_slot, ring_size, n_slots, mask;
- i16 start_offset;
- u16 n_buffers = 0, n_alloc;
- memif_copy_op_t *co;
- memif_packet_op_t *po;
- memif_region_index_t last_region = ~0;
- void *last_region_shm = 0;
- void *last_region_max = 0;
+ memif_ring_t *ring = mq->ring;
+ memif_desc_t *descs = ring->desc;
+ void **desc_data = ptd->desc_data;
+ u16 *desc_len = ptd->desc_len;
+ memif_desc_status_t *desc_status = ptd->desc_status;
+ u16 n_desc = 0, n_pkts = 0;
+ u32 i = 0;
+ u16 mask = pow2_mask (mq->log2_ring_size);
+ memif_desc_t *d = 0;
+ u32 slot = next;
+
+ while (i < n_avail)
+ {
+ u8 flags;
+ d = descs + (slot++ & mask);
+ desc_data[i] = (void *) ((u64) d->region << 32 | d->offset);
+ desc_len[i] = d->length;
+ desc_status[i].as_u8 = flags = d->flags;
+ i++;
+ if (PREDICT_FALSE ((flags & MEMIF_DESC_FLAG_NEXT)) == 0)
+ {
+ n_desc = i;
+ if (++n_pkts == MEMIF_RX_VECTOR_SZ)
+ goto frame_full;
+ }
+ }
+frame_full:
- mq = vec_elt_at_index (mif->rx_queues, qid);
- ring = mq->ring;
- ring_size = 1 << mq->log2_ring_size;
- mask = ring_size - 1;
+ /* done */
+ ptd->n_packets = n_pkts;
+ return n_desc;
+}
- /* assume that somebody will want to add ethernet header on the packet
- so start with IP header at offset 14 */
- start_offset = (mode == MEMIF_INTERFACE_MODE_IP) ? 14 : 0;
+static_always_inline void
+memif_desc_status_set_err (memif_desc_status_t *p,
+ memif_desc_status_err_code_t e)
+{
+ memif_desc_status_t s = { .err = 1, .err_code = e };
+ p->as_u8 |= s.as_u8;
+}
- /* for S2M rings, we are consumers of packet buffers, and for M2S rings we
- are producers of empty buffers */
- cur_slot = (type == MEMIF_RING_S2M) ? mq->last_head : mq->last_tail;
+static_always_inline void
+memif_validate_desc_data (memif_per_thread_data_t *ptd, memif_if_t *mif,
+ u16 n_desc, int is_ethernet)
+{
+ void **desc_data = ptd->desc_data;
+ u16 *desc_len = ptd->desc_len;
+ memif_desc_status_t *desc_status = ptd->desc_status;
+ u16 n_regions = vec_len (mif->regions);
+ u32 n_rx_bytes = 0;
+ u16 max_len = 0;
+ u8 xor_status = 0;
+
+ for (u32 i = 0; i < n_desc; i++)
+ {
+ u16 region = ((u64) desc_data[i]) >> 32;
+ u32 offset = (u64) desc_data[i];
+ u16 len = desc_len[i];
+ memif_region_t *r = mif->regions + region;
+
+ if (region >= n_regions)
+ memif_desc_status_set_err (desc_status + i,
+ MEMIF_DESC_STATUS_ERR_BAD_REGION);
+ else if (offset + len > r->region_size)
+ memif_desc_status_set_err (desc_status + i,
+ MEMIF_DESC_STATUS_ERR_REGION_OVERRUN);
+ else if (is_ethernet && len > ETHERNET_MAX_PACKET_BYTES)
+ memif_desc_status_set_err (desc_status + i,
+ MEMIF_DESC_STATUS_ERR_DATA_TOO_BIG);
+ else if (len == 0)
+ memif_desc_status_set_err (desc_status + i,
+ MEMIF_DESC_STATUS_ERR_ZERO_LENGTH);
+ else
+ {
+ desc_data[i] = r->shm + offset;
+ if (len > max_len)
+ max_len = len;
+ n_rx_bytes += len;
+ }
+ xor_status |= desc_status[i].as_u8;
+ }
- if (type == MEMIF_RING_S2M)
- last_slot = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE);
- else
- last_slot = __atomic_load_n (&ring->tail, __ATOMIC_ACQUIRE);
+ ptd->max_desc_len = max_len;
+ ptd->xor_status = xor_status;
+ ptd->n_rx_bytes = n_rx_bytes;
+}
- if (cur_slot == last_slot)
- goto refill;
- n_slots = last_slot - cur_slot;
+static_always_inline u32
+memif_process_desc (vlib_main_t *vm, vlib_node_runtime_t *node,
+ memif_per_thread_data_t *ptd, memif_if_t *mif)
+{
+ u16 buffer_size = vlib_buffer_get_default_data_size (vm);
+ int is_ip = mif->mode == MEMIF_INTERFACE_MODE_IP;
+ i16 start_offset = (is_ip) ? MEMIF_IP_OFFSET : 0;
+ memif_packet_op_t *po = ptd->packet_ops;
+ void **desc_data = ptd->desc_data;
+ u16 *desc_len = ptd->desc_len;
+ memif_desc_status_t *desc_status = ptd->desc_status;
+ u32 n_buffers = 0;
+ u32 n_left = ptd->n_packets;
+ u32 packet_len;
+ int i = -1;
+ int bad_packets = 0;
/* construct copy and packet vector out of ring slots */
- while (n_slots && n_rx_packets < MEMIF_RX_VECTOR_SZ)
+ while (n_left)
{
u32 dst_off, src_off, n_bytes_left;
- u16 s0;
- memif_desc_t *d0;
void *mb0;
- po = ptd->packet_ops + n_rx_packets;
- n_rx_packets++;
po->first_buffer_vec_index = n_buffers++;
- po->packet_len = 0;
+
+ packet_len = 0;
src_off = 0;
dst_off = start_offset;
next_slot:
- clib_prefetch_load (&ring->desc[(cur_slot + 8) & mask]);
- s0 = cur_slot & mask;
- d0 = &ring->desc[s0];
- n_bytes_left = d0->length;
+ i++; /* next descriptor */
+ n_bytes_left = desc_len[i];
- /* slave resets buffer length,
- * so it can produce full size buffer for master
- */
- if (type == MEMIF_RING_M2S)
- d0->length = mif->run.buffer_size;
+ packet_len += n_bytes_left;
+ mb0 = desc_data[i];
- po->packet_len += n_bytes_left;
- if (PREDICT_FALSE (last_region != d0->region))
+ if (PREDICT_FALSE (desc_status[i].err))
{
- last_region_shm = mif->regions[d0->region].shm;
- last_region = d0->region;
- last_region_max =
- last_region_shm + mif->regions[last_region].region_size;
+ vlib_error_count (vm, node->node_index, MEMIF_INPUT_ERROR_BAD_DESC,
+ 1);
+ bad_packets++;
+ ASSERT (n_buffers > 0);
+ n_buffers--;
+ goto next_packet;
}
- mb0 = last_region_shm + d0->offset;
-
- if (PREDICT_FALSE (mb0 + n_bytes_left > last_region_max))
- vlib_error_count (vm, node->node_index, MEMIF_INPUT_ERROR_BAD_DESC, 1);
else
do
{
@@ -249,115 +296,98 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
while (PREDICT_FALSE (n_bytes_left));
- cur_slot++;
- n_slots--;
- if ((d0->flags & MEMIF_DESC_FLAG_NEXT) && n_slots)
+ if (desc_status[i].next)
{
src_off = 0;
goto next_slot;
}
- }
- /* allocate free buffers */
- vec_validate_aligned (ptd->buffers, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
- n_alloc = vlib_buffer_alloc_from_pool (vm, ptd->buffers, n_buffers,
- mq->buffer_pool_index);
- if (PREDICT_FALSE (n_alloc != n_buffers))
- {
- if (n_alloc)
- vlib_buffer_free (vm, ptd->buffers, n_alloc);
- vlib_error_count (vm, node->node_index,
- MEMIF_INPUT_ERROR_BUFFER_ALLOC_FAIL, 1);
- goto refill;
+ /* update packet op */
+ po->packet_len = packet_len;
+ po++;
+
+ next_packet:
+ /* next packet */
+ n_left--;
}
+ ASSERT (ptd->n_packets >= bad_packets);
+ ptd->n_packets -= bad_packets;
+ return n_buffers;
+}
+static_always_inline void
+memif_fill_buffer_mdata_simple (vlib_node_runtime_t *node,
+ memif_per_thread_data_t *ptd,
+ vlib_buffer_t **b, u16 *next, int is_ip)
+{
+ vlib_buffer_t bt;
+ u16 *dl = ptd->desc_len;
+ /* process buffer metadata */
+
+ u32 n_left = ptd->n_packets;
+
+ /* copy template into local variable - will save per packet load */
+ vlib_buffer_copy_template (&bt, &ptd->buffer_template);
- /* copy data */
- n_left = vec_len (ptd->copy_ops);
- co = ptd->copy_ops;
while (n_left >= 8)
{
- clib_prefetch_load (co[4].data);
- clib_prefetch_load (co[5].data);
- clib_prefetch_load (co[6].data);
- clib_prefetch_load (co[7].data);
-
- b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
- b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]);
- b2 = vlib_get_buffer (vm, ptd->buffers[co[2].buffer_vec_index]);
- b3 = vlib_get_buffer (vm, ptd->buffers[co[3].buffer_vec_index]);
-
- clib_memcpy_fast (b0->data + co[0].buffer_offset, co[0].data,
- co[0].data_len);
- clib_memcpy_fast (b1->data + co[1].buffer_offset, co[1].data,
- co[1].data_len);
- clib_memcpy_fast (b2->data + co[2].buffer_offset, co[2].data,
- co[2].data_len);
- clib_memcpy_fast (b3->data + co[3].buffer_offset, co[3].data,
- co[3].data_len);
-
- co += 4;
- n_left -= 4;
- }
- while (n_left)
- {
- b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
- clib_memcpy_fast (b0->data + co[0].buffer_offset, co[0].data,
- co[0].data_len);
- co += 1;
- n_left -= 1;
- }
+ vlib_prefetch_buffer_header (b[4], STORE);
+ vlib_prefetch_buffer_header (b[5], STORE);
+ vlib_prefetch_buffer_header (b[6], STORE);
+ vlib_prefetch_buffer_header (b[7], STORE);
+
+ vlib_buffer_copy_template (b[0], &bt);
+ vlib_buffer_copy_template (b[1], &bt);
+ vlib_buffer_copy_template (b[2], &bt);
+ vlib_buffer_copy_template (b[3], &bt);
+
+ b[0]->current_length = dl[0];
+ b[1]->current_length = dl[1];
+ b[2]->current_length = dl[2];
+ b[3]->current_length = dl[3];
+
+ if (is_ip)
+ {
+ next[0] = memif_next_from_ip_hdr (node, b[0]);
+ next[1] = memif_next_from_ip_hdr (node, b[1]);
+ next[2] = memif_next_from_ip_hdr (node, b[2]);
+ next[3] = memif_next_from_ip_hdr (node, b[3]);
+ }
- /* release slots from the ring */
- if (type == MEMIF_RING_S2M)
- {
- __atomic_store_n (&ring->tail, cur_slot, __ATOMIC_RELEASE);
- mq->last_head = cur_slot;
- }
- else
- {
- mq->last_tail = cur_slot;
+ /* next */
+ n_left -= 4;
+ b += 4;
+ dl += 4;
+ next += 4;
}
- /* prepare buffer template and next indices */
- vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_RX] =
- mif->sw_if_index;
- vnet_buffer (&ptd->buffer_template)->feature_arc_index = 0;
- ptd->buffer_template.current_data = start_offset;
- ptd->buffer_template.current_config_index = 0;
- ptd->buffer_template.buffer_pool_index = mq->buffer_pool_index;
- ptd->buffer_template.ref_count = 1;
-
- if (mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ while (n_left)
{
- next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- if (mif->per_interface_next_index != ~0)
- next_index = mif->per_interface_next_index;
- else
- vnet_feature_start_device_input_x1 (mif->sw_if_index, &next_index,
- &ptd->buffer_template);
-
- vlib_get_new_next_frame (vm, node, next_index, to_next_bufs,
- n_left_to_next);
- if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT))
- {
- vlib_next_frame_t *nf;
- vlib_frame_t *f;
- ethernet_input_frame_t *ef;
- nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
- f = vlib_get_frame (vm, nf->frame);
- f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+ /* enqueue buffer */
+ vlib_buffer_copy_template (b[0], &bt);
+ b[0]->current_length = dl[0];
+ if (is_ip)
+ next[0] = memif_next_from_ip_hdr (node, b[0]);
- ef = vlib_frame_scalar_args (f);
- ef->sw_if_index = mif->sw_if_index;
- ef->hw_if_index = mif->hw_if_index;
- vlib_frame_no_append (f);
- }
+ /* next */
+ n_left -= 1;
+ b += 1;
+ dl += 1;
+ next += 1;
}
+}
+static_always_inline void
+memif_fill_buffer_mdata (vlib_main_t *vm, vlib_node_runtime_t *node,
+ memif_per_thread_data_t *ptd, memif_if_t *mif,
+ u32 *bi, u16 *next, int is_ip)
+{
+ u16 buffer_size = vlib_buffer_get_default_data_size (vm);
+ vlib_buffer_t *b0, *b1, *b2, *b3, bt;
+ memif_packet_op_t *po;
/* process buffer metadata */
- u32 n_from = n_rx_packets;
+
+ u32 n_from = ptd->n_packets;
po = ptd->packet_ops;
- bi = to_next_bufs;
/* copy template into local variable - will save per packet load */
vlib_buffer_copy_template (&bt, &ptd->buffer_template);
@@ -397,20 +427,16 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_copy_template (b3, &bt);
b0->current_length = po[0].packet_len;
- n_rx_bytes += b0->current_length;
b1->current_length = po[1].packet_len;
- n_rx_bytes += b1->current_length;
b2->current_length = po[2].packet_len;
- n_rx_bytes += b2->current_length;
b3->current_length = po[3].packet_len;
- n_rx_bytes += b3->current_length;
memif_add_to_chain (vm, b0, ptd->buffers + fbvi[0] + 1, buffer_size);
memif_add_to_chain (vm, b1, ptd->buffers + fbvi[1] + 1, buffer_size);
memif_add_to_chain (vm, b2, ptd->buffers + fbvi[2] + 1, buffer_size);
memif_add_to_chain (vm, b3, ptd->buffers + fbvi[3] + 1, buffer_size);
- if (mode == MEMIF_INTERFACE_MODE_IP)
+ if (is_ip)
{
next[0] = memif_next_from_ip_hdr (node, b0);
next[1] = memif_next_from_ip_hdr (node, b1);
@@ -426,21 +452,18 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
while (n_from)
{
- u32 fbvi[4];
+ u32 fbvi[1];
/* enqueue buffer */
fbvi[0] = po[0].first_buffer_vec_index;
bi[0] = ptd->buffers[fbvi[0]];
b0 = vlib_get_buffer (vm, bi[0]);
vlib_buffer_copy_template (b0, &bt);
b0->current_length = po->packet_len;
- n_rx_bytes += b0->current_length;
memif_add_to_chain (vm, b0, ptd->buffers + fbvi[0] + 1, buffer_size);
- if (mode == MEMIF_INTERFACE_MODE_IP)
- {
- next[0] = memif_next_from_ip_hdr (node, b0);
- }
+ if (is_ip)
+ next[0] = memif_next_from_ip_hdr (node, b0);
/* next */
n_from -= 1;
@@ -448,11 +471,216 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
bi += 1;
next += 1;
}
+}
+
+static_always_inline void
+memif_advance_ring (memif_ring_type_t type, memif_queue_t *mq,
+ memif_ring_t *ring, u16 cur_slot)
+{
+ if (type == MEMIF_RING_S2M)
+ {
+ __atomic_store_n (&ring->tail, cur_slot, __ATOMIC_RELEASE);
+ mq->last_head = cur_slot;
+ }
+ else
+ {
+ mq->last_tail = cur_slot;
+ }
+}
+
+static_always_inline uword
+memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ memif_if_t *mif, memif_ring_type_t type, u16 qid,
+ memif_interface_mode_t mode)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ memif_main_t *mm = &memif_main;
+ memif_ring_t *ring;
+ memif_queue_t *mq;
+ u16 buffer_size = vlib_buffer_get_default_data_size (vm);
+ uword n_trace;
+ u16 nexts[MEMIF_RX_VECTOR_SZ], *next = nexts;
+ u32 _to_next_bufs[MEMIF_RX_VECTOR_SZ], *to_next_bufs = _to_next_bufs, *bi;
+ u32 n_left_to_next;
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ vlib_buffer_t *buffer_ptrs[MEMIF_RX_VECTOR_SZ];
+ u32 thread_index = vm->thread_index;
+ memif_per_thread_data_t *ptd =
+ vec_elt_at_index (mm->per_thread_data, thread_index);
+ u16 cur_slot, ring_size, n_slots, mask;
+ u16 n_buffers, n_alloc, n_desc;
+ i16 start_offset;
+ memif_copy_op_t *co;
+ int is_slave = (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) != 0;
+ int is_simple = 1;
+ int i;
+
+ mq = vec_elt_at_index (mif->rx_queues, qid);
+ ring = mq->ring;
+ ring_size = 1 << mq->log2_ring_size;
+ mask = ring_size - 1;
+
+ start_offset = (mode == MEMIF_INTERFACE_MODE_IP) ? MEMIF_IP_OFFSET : 0;
+
+ if (is_slave)
+ {
+ cur_slot = mq->last_tail;
+ n_slots = __atomic_load_n (&ring->tail, __ATOMIC_ACQUIRE) - cur_slot;
+ }
+ else
+ {
+ cur_slot = mq->last_head;
+ n_slots = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE) - cur_slot;
+ }
+
+ if (n_slots == 0)
+ {
+ ptd->n_packets = 0;
+ goto refill;
+ }
+
+ n_desc = memif_parse_desc (ptd, mif, mq, cur_slot, n_slots);
+
+ if (n_desc != ptd->n_packets)
+ is_simple = 0;
+
+ cur_slot += n_desc;
+
+ if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ memif_validate_desc_data (ptd, mif, n_desc, /* is_ethernet */ 1);
+ else
+ memif_validate_desc_data (ptd, mif, n_desc, /* is_ethernet */ 0);
+
+ if (ptd->max_desc_len > buffer_size - start_offset)
+ is_simple = 0;
+
+ if (ptd->xor_status != 0)
+ is_simple = 0;
+
+ if (is_simple)
+ n_buffers = ptd->n_packets;
+ else
+ n_buffers = memif_process_desc (vm, node, ptd, mif);
+
+ if (PREDICT_FALSE (n_buffers == 0))
+ {
+ /* All descriptors are bad. Release slots in the ring and bail */
+ memif_advance_ring (type, mq, ring, cur_slot);
+ goto refill;
+ }
+
+ /* allocate free buffers */
+ vec_validate_aligned (ptd->buffers, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
+ n_alloc = vlib_buffer_alloc_from_pool (vm, ptd->buffers, n_buffers,
+ mq->buffer_pool_index);
+ if (PREDICT_FALSE (n_alloc != n_buffers))
+ {
+ if (n_alloc)
+ vlib_buffer_free (vm, ptd->buffers, n_alloc);
+ vlib_error_count (vm, node->node_index,
+ MEMIF_INPUT_ERROR_BUFFER_ALLOC_FAIL, 1);
+ goto refill;
+ }
+
+ /* copy data */
+ if (is_simple)
+ {
+ int n_pkts = ptd->n_packets;
+ void **desc_data = ptd->desc_data;
+ u16 *desc_len = ptd->desc_len;
+
+ vlib_get_buffers (vm, ptd->buffers, buffer_ptrs, n_buffers);
+
+ for (i = 0; i + 8 < n_pkts; i++)
+ {
+ clib_prefetch_load (desc_data[i + 8]);
+ clib_prefetch_store (buffer_ptrs[i + 8]->data);
+ clib_memcpy_fast (buffer_ptrs[i]->data + start_offset, desc_data[i],
+ desc_len[i]);
+ }
+ for (; i < n_pkts; i++)
+ clib_memcpy_fast (buffer_ptrs[i]->data + start_offset, desc_data[i],
+ desc_len[i]);
+ }
+ else
+ {
+ vlib_buffer_t *b;
+ u32 n_pkts = vec_len (ptd->copy_ops);
+ co = ptd->copy_ops;
+
+ for (i = 0; i + 8 < n_pkts; i++)
+ {
+ clib_prefetch_load (co[i + 8].data);
+ b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]);
+ clib_memcpy_fast (b->data + co[i].buffer_offset, co[i].data,
+ co[i].data_len);
+ }
+ for (; i < n_pkts; i++)
+ {
+ b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]);
+ clib_memcpy_fast (b->data + co[i].buffer_offset, co[i].data,
+ co[i].data_len);
+ }
+ }
+
+ /* release slots from the ring */
+ memif_advance_ring (type, mq, ring, cur_slot);
+
+ /* prepare buffer template and next indices */
+ vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_RX] = mif->sw_if_index;
+ vnet_buffer (&ptd->buffer_template)->feature_arc_index = 0;
+ ptd->buffer_template.current_data = start_offset;
+ ptd->buffer_template.current_config_index = 0;
+ ptd->buffer_template.buffer_pool_index = mq->buffer_pool_index;
+ ptd->buffer_template.ref_count = 1;
+
+ if (mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ {
+ next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ if (mif->per_interface_next_index != ~0)
+ next_index = mif->per_interface_next_index;
+ else
+ vnet_feature_start_device_input (mif->sw_if_index, &next_index,
+ &ptd->buffer_template);
+
+ vlib_get_new_next_frame (vm, node, next_index, to_next_bufs,
+ n_left_to_next);
+ if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT))
+ {
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ ethernet_input_frame_t *ef;
+ nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
+ f = vlib_get_frame (vm, nf->frame);
+ f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+ ef = vlib_frame_scalar_args (f);
+ ef->sw_if_index = mif->sw_if_index;
+ ef->hw_if_index = mif->hw_if_index;
+ vlib_frame_no_append (f);
+ }
+ }
+
+ if (is_simple)
+ {
+ vlib_buffer_copy_indices (to_next_bufs, ptd->buffers, ptd->n_packets);
+ if (mode == MEMIF_INTERFACE_MODE_IP)
+ memif_fill_buffer_mdata_simple (node, ptd, buffer_ptrs, nexts, 1);
+ else
+ memif_fill_buffer_mdata_simple (node, ptd, buffer_ptrs, nexts, 0);
+ }
+ else
+ {
+ if (mode == MEMIF_INTERFACE_MODE_IP)
+ memif_fill_buffer_mdata (vm, node, ptd, mif, to_next_bufs, nexts, 1);
+ else
+ memif_fill_buffer_mdata (vm, node, ptd, mif, to_next_bufs, nexts, 0);
+ }
/* packet trace if enabled */
if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node))))
{
- u32 n_left = n_rx_packets;
+ u32 n_left = ptd->n_packets;
bi = to_next_bufs;
next = nexts;
u32 ni = next_index;
@@ -483,16 +711,16 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (mode == MEMIF_INTERFACE_MODE_ETHERNET)
{
- n_left_to_next -= n_rx_packets;
+ n_left_to_next -= ptd->n_packets;
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
else
- vlib_buffer_enqueue_to_next (vm, node, to_next_bufs, nexts, n_rx_packets);
+ vlib_buffer_enqueue_to_next (vm, node, to_next_bufs, nexts,
+ ptd->n_packets);
- vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters
- + VNET_INTERFACE_COUNTER_RX, thread_index,
- mif->sw_if_index, n_rx_packets,
- n_rx_bytes);
+ vlib_increment_combined_counter (
+ vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, mif->sw_if_index, ptd->n_packets, ptd->n_rx_bytes);
/* refill ring with empty buffers */
refill:
@@ -514,7 +742,7 @@ refill:
__atomic_store_n (&ring->head, head, __ATOMIC_RELEASE);
}
- return n_rx_packets;
+ return ptd->n_packets;
}
static_always_inline uword
@@ -675,14 +903,14 @@ memif_device_input_zc_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
next0 = next1 = next2 = next3 = next_index;
/* redirect if feature path enabled */
- vnet_feature_start_device_input_x1 (mif->sw_if_index,
- &next0, b0);
- vnet_feature_start_device_input_x1 (mif->sw_if_index,
- &next1, b1);
- vnet_feature_start_device_input_x1 (mif->sw_if_index,
- &next2, b2);
- vnet_feature_start_device_input_x1 (mif->sw_if_index,
- &next3, b3);
+ vnet_feature_start_device_input (mif->sw_if_index, &next0,
+ b0);
+ vnet_feature_start_device_input (mif->sw_if_index, &next1,
+ b1);
+ vnet_feature_start_device_input (mif->sw_if_index, &next2,
+ b2);
+ vnet_feature_start_device_input (mif->sw_if_index, &next3,
+ b3);
}
}
@@ -730,8 +958,8 @@ memif_device_input_zc_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
next0 = next_index;
/* redirect if feature path enabled */
- vnet_feature_start_device_input_x1 (mif->sw_if_index,
- &next0, b0);
+ vnet_feature_start_device_input (mif->sw_if_index, &next0,
+ b0);
}
}
@@ -774,7 +1002,7 @@ refill:
n_alloc = vlib_buffer_alloc_to_ring_from_pool (
vm, mq->buffers, slot, ring_size, n_slots, mq->buffer_pool_index);
dt->region = mq->buffer_pool_index + 1;
- offset = (u64) mif->regions[dt->region].shm + start_offset;
+ offset = (u64) mif->regions[dt->region].shm - start_offset;
if (PREDICT_FALSE (n_alloc != n_slots))
vlib_error_count (vm, node->node_index,
@@ -826,6 +1054,244 @@ done:
return n_rx_packets;
}
+CLIB_MARCH_FN (memif_dma_completion_cb, void, vlib_main_t *vm,
+ vlib_dma_batch_t *b)
+{
+ memif_main_t *mm = &memif_main;
+ memif_if_t *mif = vec_elt_at_index (mm->interfaces, b->cookie >> 16);
+ u32 thread_index = vm->thread_index;
+ u32 n_left_to_next = 0;
+ u16 nexts[MEMIF_RX_VECTOR_SZ], *next;
+ u32 _to_next_bufs[MEMIF_RX_VECTOR_SZ], *to_next_bufs = _to_next_bufs, *bi;
+ uword n_trace;
+ memif_dma_info_t *dma_info;
+ u16 qid = b->cookie & 0xffff;
+ memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid);
+ dma_info = mq->dma_info + mq->dma_info_head;
+ memif_per_thread_data_t *ptd = &dma_info->data;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+
+ __atomic_store_n (&mq->ring->tail, dma_info->dma_tail, __ATOMIC_RELEASE);
+
+ /* prepare buffer template and next indices */
+ i16 start_offset =
+ (dma_info->mode == MEMIF_INTERFACE_MODE_IP) ? MEMIF_IP_OFFSET : 0;
+ vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_RX] = mif->sw_if_index;
+ vnet_buffer (&ptd->buffer_template)->feature_arc_index = 0;
+ ptd->buffer_template.current_data = start_offset;
+ ptd->buffer_template.current_config_index = 0;
+ ptd->buffer_template.buffer_pool_index = mq->buffer_pool_index;
+ ptd->buffer_template.ref_count = 1;
+
+ if (dma_info->mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ {
+ next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ if (mif->per_interface_next_index != ~0)
+ next_index = mif->per_interface_next_index;
+ else
+ vnet_feature_start_device_input (mif->sw_if_index, &next_index,
+ &ptd->buffer_template);
+
+ vlib_get_new_next_frame (vm, dma_info->node, next_index, to_next_bufs,
+ n_left_to_next);
+ if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT))
+ {
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ ethernet_input_frame_t *ef;
+ nf =
+ vlib_node_runtime_get_next_frame (vm, dma_info->node, next_index);
+ f = vlib_get_frame (vm, nf->frame);
+ f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+ ef = vlib_frame_scalar_args (f);
+ ef->sw_if_index = mif->sw_if_index;
+ ef->hw_if_index = mif->hw_if_index;
+ vlib_frame_no_append (f);
+ }
+ }
+
+ vec_reset_length (ptd->buffers);
+
+ if (dma_info->mode == MEMIF_INTERFACE_MODE_IP)
+ memif_fill_buffer_mdata (vm, dma_info->node, ptd, mif, to_next_bufs, nexts,
+ 1);
+ else
+ memif_fill_buffer_mdata (vm, dma_info->node, ptd, mif, to_next_bufs, nexts,
+ 0);
+
+ /* packet trace if enabled */
+ if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, dma_info->node))))
+ {
+ u32 n_left = ptd->n_packets;
+ bi = to_next_bufs;
+ next = nexts;
+ u32 ni = next_index;
+ while (n_trace && n_left)
+ {
+ vlib_buffer_t *b;
+ memif_input_trace_t *tr;
+ if (dma_info->mode != MEMIF_INTERFACE_MODE_ETHERNET)
+ ni = next[0];
+ b = vlib_get_buffer (vm, bi[0]);
+ if (PREDICT_TRUE (vlib_trace_buffer (vm, dma_info->node, ni, b,
+ /* follow_chain */ 0)))
+ {
+ tr = vlib_add_trace (vm, dma_info->node, b, sizeof (*tr));
+ tr->next_index = ni;
+ tr->hw_if_index = mif->hw_if_index;
+ tr->ring = qid;
+ n_trace--;
+ }
+
+ /* next */
+ n_left--;
+ bi++;
+ next++;
+ }
+ vlib_set_trace_count (vm, dma_info->node, n_trace);
+ }
+
+ if (dma_info->mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ {
+ n_left_to_next -= ptd->n_packets;
+ vlib_put_next_frame (vm, dma_info->node, next_index, n_left_to_next);
+ }
+ else
+ vlib_buffer_enqueue_to_next (vm, dma_info->node, to_next_bufs, nexts,
+ ptd->n_packets);
+
+ vlib_increment_combined_counter (
+ vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, mif->sw_if_index, ptd->n_packets, ptd->n_rx_bytes);
+
+ mq->dma_info_head++;
+ if (mq->dma_info_head == mq->dma_info_size)
+ mq->dma_info_head = 0;
+
+ return;
+}
+
+#ifndef CLIB_MARCH_VARIANT
+void
+memif_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b)
+{
+ return CLIB_MARCH_FN_SELECT (memif_dma_completion_cb) (vm, b);
+}
+#endif
+
+static_always_inline uword
+memif_device_input_inline_dma (vlib_main_t *vm, vlib_node_runtime_t *node,
+ memif_if_t *mif, memif_ring_type_t type,
+ u16 qid, memif_interface_mode_t mode)
+{
+ memif_main_t *mm = &memif_main;
+ memif_ring_t *ring;
+ memif_queue_t *mq;
+ memif_per_thread_data_t *ptd;
+ u16 cur_slot, n_slots;
+ u16 n_buffers, n_alloc, n_desc;
+ memif_copy_op_t *co;
+ memif_dma_info_t *dma_info;
+
+ u16 mif_id = mif - mm->interfaces;
+ u32 i;
+
+ mq = vec_elt_at_index (mif->rx_queues, qid);
+ ring = mq->ring;
+
+ cur_slot = mq->last_head;
+ n_slots = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE) - cur_slot;
+
+ if (n_slots == 0)
+ return 0;
+
+ if ((mq->dma_info_tail + 1 == mq->dma_info_head) ||
+ ((mq->dma_info_head == mq->dma_info_size - 1) &&
+ (mq->dma_info_tail == 0)))
+ return 0;
+
+ vlib_dma_batch_t *db;
+ db = vlib_dma_batch_new (vm, mif->dma_input_config);
+ if (!db)
+ return 0;
+
+ dma_info = mq->dma_info + mq->dma_info_tail;
+ dma_info->node = node;
+ dma_info->mode = mode;
+ ptd = &dma_info->data;
+ vec_validate_aligned (dma_info->data.desc_len,
+ pow2_mask (mq->log2_ring_size), CLIB_CACHE_LINE_BYTES);
+
+ n_desc = memif_parse_desc (&dma_info->data, mif, mq, cur_slot, n_slots);
+ cur_slot += n_desc;
+
+ if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ memif_validate_desc_data (&dma_info->data, mif, n_desc,
+ /* is_ethernet */ 1);
+ else
+ memif_validate_desc_data (&dma_info->data, mif, n_desc,
+ /* is_ethernet */ 0);
+
+ n_buffers = memif_process_desc (vm, node, ptd, mif);
+
+ if (PREDICT_FALSE (n_buffers == 0))
+ {
+ /* All descriptors are bad. Release slots in the ring and bail */
+ memif_advance_ring (type, mq, ring, cur_slot);
+ goto done;
+ }
+
+ /* allocate free buffers */
+ vec_validate_aligned (dma_info->data.buffers, n_buffers - 1,
+ CLIB_CACHE_LINE_BYTES);
+ n_alloc = vlib_buffer_alloc_from_pool (vm, dma_info->data.buffers, n_buffers,
+ mq->buffer_pool_index);
+ if (PREDICT_FALSE (n_alloc != n_buffers))
+ {
+ if (n_alloc)
+ vlib_buffer_free (vm, dma_info->data.buffers, n_alloc);
+ vlib_error_count (vm, node->node_index,
+ MEMIF_INPUT_ERROR_BUFFER_ALLOC_FAIL, 1);
+ goto done;
+ }
+
+ dma_info->data.n_rx_bytes = ptd->n_rx_bytes;
+ dma_info->data.n_packets = ptd->n_packets;
+ /* copy data */
+ vlib_buffer_t *b;
+ u32 n_pkts = clib_min (MEMIF_RX_VECTOR_SZ, vec_len (ptd->copy_ops));
+ co = ptd->copy_ops;
+
+ for (i = 0; i < n_pkts; i++)
+ {
+ b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]);
+ vlib_dma_batch_add (vm, db, b->data + co[i].buffer_offset, co[i].data,
+ co[i].data_len);
+ }
+
+ for (i = n_pkts; i < vec_len (ptd->copy_ops); i++)
+ {
+ b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]);
+ vlib_dma_batch_add (vm, db, b->data + co[i].buffer_offset, co[i].data,
+ co[i].data_len);
+ }
+
+ dma_info->dma_tail = cur_slot;
+ mq->last_head = dma_info->dma_tail;
+ mq->dma_info_tail++;
+ if (mq->dma_info_tail == mq->dma_info_size)
+ mq->dma_info_tail = 0;
+
+done:
+ vlib_dma_batch_set_cookie (vm, db, ((u64) mif_id << 16) | qid);
+ vlib_dma_batch_submit (vm, db);
+ vec_reset_length (ptd->copy_ops);
+
+ return ptd->n_packets;
+}
VLIB_NODE_FN (memif_input_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -867,12 +1333,25 @@ VLIB_NODE_FN (memif_input_node) (vlib_main_t * vm,
}
else
{
- if (mif->mode == MEMIF_INTERFACE_MODE_IP)
- n_rx += memif_device_input_inline (
- vm, node, mif, MEMIF_RING_S2M, qid, mode_ip);
+ if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) &&
+ (mif->dma_input_config >= 0))
+ {
+ if (mif->mode == MEMIF_INTERFACE_MODE_IP)
+ n_rx += memif_device_input_inline_dma (
+ vm, node, mif, MEMIF_RING_S2M, qid, mode_ip);
+ else
+ n_rx += memif_device_input_inline_dma (
+ vm, node, mif, MEMIF_RING_S2M, qid, mode_eth);
+ }
else
- n_rx += memif_device_input_inline (
- vm, node, mif, MEMIF_RING_S2M, qid, mode_eth);
+ {
+ if (mif->mode == MEMIF_INTERFACE_MODE_IP)
+ n_rx += memif_device_input_inline (
+ vm, node, mif, MEMIF_RING_S2M, qid, mode_ip);
+ else
+ n_rx += memif_device_input_inline (
+ vm, node, mif, MEMIF_RING_S2M, qid, mode_eth);
+ }
}
}
}
@@ -880,7 +1359,6 @@ VLIB_NODE_FN (memif_input_node) (vlib_main_t * vm,
return n_rx;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (memif_input_node) = {
.name = "memif-input",
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
@@ -892,7 +1370,6 @@ VLIB_REGISTER_NODE (memif_input_node) = {
.error_counters = memif_input_error_counters,
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h
index 838651abc27..f6335410ba8 100644
--- a/src/plugins/memif/private.h
+++ b/src/plugins/memif/private.h
@@ -16,6 +16,7 @@
*/
#include <vppinfra/lock.h>
+#include <vlib/dma/dma.h>
#include <vlib/log.h>
#define MEMIF_DEFAULT_SOCKET_FILENAME "memif.sock"
@@ -24,7 +25,7 @@
#define MEMIF_DEFAULT_TX_QUEUES 1
#define MEMIF_DEFAULT_BUFFER_SIZE 2048
-#define MEMIF_MAX_M2S_RING (vlib_get_n_threads ())
+#define MEMIF_MAX_M2S_RING 256
#define MEMIF_MAX_S2M_RING 256
#define MEMIF_MAX_REGION 256
#define MEMIF_MAX_LOG2_RING_SIZE 14
@@ -120,9 +121,15 @@ typedef struct
int fd;
} memif_msg_fifo_elt_t;
+#define MEMIF_RX_VECTOR_SZ VLIB_FRAME_SIZE
+#define MEMIF_DMA_INFO_SIZE VLIB_FRAME_SIZE
+
+struct memif_dma_info;
+
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ clib_spinlock_t lockp;
/* ring data */
memif_ring_t *ring;
memif_log2_ring_size_t log2_ring_size;
@@ -134,6 +141,15 @@ typedef struct
u32 *buffers;
u8 buffer_pool_index;
+ /* dma data */
+ u16 dma_head;
+ u16 dma_tail;
+ struct memif_dma_info *dma_info;
+ u16 dma_info_head;
+ u16 dma_info_tail;
+ u16 dma_info_size;
+ u8 dma_info_full;
+
/* interrupts */
int int_fd;
uword int_clib_file_index;
@@ -144,14 +160,15 @@ typedef struct
u32 queue_index;
} memif_queue_t;
-#define foreach_memif_if_flag \
- _(0, ADMIN_UP, "admin-up") \
- _(1, IS_SLAVE, "slave") \
- _(2, CONNECTING, "connecting") \
- _(3, CONNECTED, "connected") \
- _(4, DELETING, "deleting") \
- _(5, ZERO_COPY, "zero-copy") \
- _(6, ERROR, "error")
+#define foreach_memif_if_flag \
+ _ (0, ADMIN_UP, "admin-up") \
+ _ (1, IS_SLAVE, "slave") \
+ _ (2, CONNECTING, "connecting") \
+ _ (3, CONNECTED, "connected") \
+ _ (4, DELETING, "deleting") \
+ _ (5, ZERO_COPY, "zero-copy") \
+ _ (6, ERROR, "error") \
+ _ (7, USE_DMA, "use_dma")
typedef enum
{
@@ -163,7 +180,6 @@ typedef enum
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- clib_spinlock_t lockp;
u32 flags;
memif_interface_id_t id;
u32 hw_if_index;
@@ -207,11 +223,15 @@ typedef struct
/* disconnect strings */
u8 *local_disc_string;
u8 *remote_disc_string;
+
+ /* dma config index */
+ int dma_input_config;
+ int dma_tx_config;
} memif_if_t;
typedef struct
{
- u32 packet_len;
+ u16 packet_len;
u16 first_buffer_vec_index;
} memif_packet_op_t;
@@ -224,21 +244,61 @@ typedef struct
u16 buffer_vec_index;
} memif_copy_op_t;
-#define MEMIF_RX_VECTOR_SZ VLIB_FRAME_SIZE
+typedef enum
+{
+ MEMIF_DESC_STATUS_OK = 0,
+ MEMIF_DESC_STATUS_ERR_BAD_REGION,
+ MEMIF_DESC_STATUS_ERR_REGION_OVERRUN,
+ MEMIF_DESC_STATUS_ERR_DATA_TOO_BIG,
+ MEMIF_DESC_STATUS_ERR_ZERO_LENGTH
+} __clib_packed memif_desc_status_err_code_t;
+
+typedef union
+{
+ struct
+ {
+ u8 next : 1;
+ u8 err : 1;
+ u8 reserved : 2;
+ memif_desc_status_err_code_t err_code : 4;
+ };
+ u8 as_u8;
+} memif_desc_status_t;
+
+STATIC_ASSERT_SIZEOF (memif_desc_status_t, 1);
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-
+ u16 n_packets;
+ u16 max_desc_len;
+ u32 n_rx_bytes;
+ u8 xor_status;
/* copy vector */
- memif_packet_op_t packet_ops[MEMIF_RX_VECTOR_SZ];
memif_copy_op_t *copy_ops;
u32 *buffers;
+ memif_packet_op_t packet_ops[MEMIF_RX_VECTOR_SZ];
+
+ /* temp storage for compressed descriptors */
+ void **desc_data;
+ u16 *desc_len;
+ memif_desc_status_t *desc_status;
/* buffer template */
vlib_buffer_t buffer_template;
} memif_per_thread_data_t;
+typedef struct memif_dma_info
+{
+ /* per thread data */
+ memif_interface_mode_t mode;
+ vlib_node_runtime_t *node;
+ u32 dma_head;
+ u32 dma_tail;
+ u8 finished;
+ memif_per_thread_data_t data;
+} memif_dma_info_t;
+
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -278,6 +338,7 @@ typedef struct
u8 *secret;
u8 is_master;
u8 is_zero_copy;
+ u8 use_dma;
memif_interface_mode_t mode:8;
memif_log2_ring_size_t log2_ring_size;
u16 buffer_size;
@@ -290,10 +351,11 @@ typedef struct
u32 sw_if_index;
} memif_create_if_args_t;
-int memif_socket_filename_add_del (u8 is_add, u32 sock_id,
- u8 * sock_filename);
-int memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args);
-int memif_delete_if (vlib_main_t * vm, memif_if_t * mif);
+u32 memif_get_unused_socket_id ();
+clib_error_t *memif_socket_filename_add_del (u8 is_add, u32 sock_id,
+ char *sock_filename);
+clib_error_t *memif_create_if (vlib_main_t *vm, memif_create_if_args_t *args);
+clib_error_t *memif_delete_if (vlib_main_t *vm, memif_if_t *mif);
clib_error_t *memif_plugin_api_hookup (vlib_main_t * vm);
clib_error_t *memif_interface_admin_up_down (vnet_main_t *vnm, u32 hw_if_index,
u32 flags);
@@ -322,7 +384,8 @@ clib_error_t *memif_slave_conn_fd_error (clib_file_t * uf);
clib_error_t *memif_msg_send_disconnect (memif_if_t * mif,
clib_error_t * err);
u8 *format_memif_device_name (u8 * s, va_list * args);
-
+void memif_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b);
+void memif_tx_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b);
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/memif/socket.c b/src/plugins/memif/socket.c
index 5a381a7c7cd..001f26f13ef 100644
--- a/src/plugins/memif/socket.c
+++ b/src/plugins/memif/socket.c
@@ -25,7 +25,6 @@
#include <sys/un.h>
#include <sys/uio.h>
#include <sys/mman.h>
-#include <sys/prctl.h>
#include <sys/eventfd.h>
#include <inttypes.h>
#include <limits.h>
@@ -446,14 +445,12 @@ memif_msg_receive (memif_if_t ** mifp, clib_socket_t * sock, clib_file_t * uf)
if ((err = memif_init_regions_and_queues (mif)))
goto error;
memif_msg_enq_init (mif);
- /* *INDENT-OFF* */
vec_foreach_index (i, mif->regions)
memif_msg_enq_add_region (mif, i);
vec_foreach_index (i, mif->tx_queues)
memif_msg_enq_add_ring (mif, i, MEMIF_RING_S2M);
vec_foreach_index (i, mif->rx_queues)
memif_msg_enq_add_ring (mif, i, MEMIF_RING_M2S);
- /* *INDENT-ON* */
memif_msg_enq_connect (mif);
break;
@@ -648,7 +645,8 @@ memif_master_conn_fd_error (clib_file_t * uf)
memif_log_warn (0, "Error on unknown file descriptor %d",
uf->file_descriptor);
- memif_file_del (uf);
+ if (uf->file_descriptor != ~0)
+ memif_file_del (uf);
return 0;
}
diff --git a/src/plugins/mss_clamp/mss_clamp_node.c b/src/plugins/mss_clamp/mss_clamp_node.c
index 4a40b2329e1..de00a5a8094 100644
--- a/src/plugins/mss_clamp/mss_clamp_node.c
+++ b/src/plugins/mss_clamp/mss_clamp_node.c
@@ -24,6 +24,7 @@
#include <vnet/feature/feature.h>
#include <vnet/ip/ip4.h>
#include <vnet/ip/ip6.h>
+#include <vnet/tcp/tcp_packet.h>
extern vlib_node_registration_t mssc_ip4_in_node, mssc_ip4_out_node;
extern vlib_node_registration_t mssc_ip6_in_node, mssc_ip6_out_node;
@@ -181,17 +182,15 @@ mssc_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame,
{
ip6_header_t *ip0 = (ip6_header_t *) h0;
ip6_header_t *ip1 = (ip6_header_t *) h1;
-
- if (IP_PROTOCOL_TCP == ip0->protocol)
- {
- clamped0 = mssc_mss_fixup (b[0], ip6_next_header (ip0),
- cm->max_mss6[sw_if_index0]);
- }
- if (IP_PROTOCOL_TCP == ip1->protocol)
- {
- clamped1 = mssc_mss_fixup (b[1], ip6_next_header (ip1),
- cm->max_mss6[sw_if_index1]);
- }
+ tcp_header_t *tcp0 =
+ ip6_ext_header_find (vm, b[0], ip0, IP_PROTOCOL_TCP, NULL);
+ tcp_header_t *tcp1 =
+ ip6_ext_header_find (vm, b[1], ip1, IP_PROTOCOL_TCP, NULL);
+
+ if (tcp0)
+ clamped0 = mssc_mss_fixup (b[0], tcp0, cm->max_mss6[sw_if_index0]);
+ if (tcp1)
+ clamped1 = mssc_mss_fixup (b[1], tcp1, cm->max_mss6[sw_if_index1]);
}
pkts_clamped += clamped0 + clamped1;
@@ -254,12 +253,11 @@ mssc_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame,
else if (FIB_PROTOCOL_IP6 == fproto)
{
ip6_header_t *ip0 = (ip6_header_t *) h0;
+ tcp_header_t *tcp0 =
+ ip6_ext_header_find (vm, b[0], ip0, IP_PROTOCOL_TCP, NULL);
- if (IP_PROTOCOL_TCP == ip0->protocol)
- {
- clamped0 = mssc_mss_fixup (b[0], ip6_next_header (ip0),
- cm->max_mss6[sw_if_index0]);
- }
+ if (tcp0)
+ clamped0 = mssc_mss_fixup (b[0], tcp0, cm->max_mss6[sw_if_index0]);
}
pkts_clamped += clamped0;
diff --git a/src/plugins/nat/CMakeLists.txt b/src/plugins/nat/CMakeLists.txt
index 2545da6da18..c53e0e39c7c 100644
--- a/src/plugins/nat/CMakeLists.txt
+++ b/src/plugins/nat/CMakeLists.txt
@@ -62,12 +62,10 @@ add_vpp_plugin(nat44_ei
nat44-ei/nat44_ei_in2out.c
nat44-ei/nat44_ei_out2in.c
nat44-ei/nat44_ei_handoff.c
- nat44-ei/nat44_ei_hairpinning.c
MULTIARCH_SOURCES
nat44-ei/nat44_ei_in2out.c
nat44-ei/nat44_ei_out2in.c
- nat44-ei/nat44_ei_hairpinning.c
API_FILES
nat44-ei/nat44_ei.api
@@ -199,7 +197,7 @@ add_custom_target(test_pnat-run
DEPENDS test_pnat
)
-if("${CMAKE_VERSION}" VERSION_GREATER_EQUAL "3.13" AND "${CMAKE_C_COMPILER_ID}" MATCHES "(Apple)?[Cc]lang")
+if(VPP_BUILD_TESTS_WITH_COVERAGE)
set(TARGET_NAME test_pnat)
set(COV_SOURCES ${CMAKE_SOURCE_DIR}/plugins/nat/pnat/pnat.c ${CMAKE_SOURCE_DIR}/plugins/nat/pnat/pnat_node.h ${CMAKE_SOURCE_DIR}/plugins/nat/pnat/pnat_node.c)
diff --git a/src/plugins/nat/FEATURE.yaml b/src/plugins/nat/FEATURE.yaml
index bbb8586390e..e2efdf5618d 100644
--- a/src/plugins/nat/FEATURE.yaml
+++ b/src/plugins/nat/FEATURE.yaml
@@ -2,7 +2,7 @@
name: Network Address Translation
maintainer:
- Ole Troan <ot@cisco.com>
- - Filip Varga <fivarga@cisco.com>
+ - Filip Varga <filipvarga89@gmail.com>
features:
- NAT44-EI - IPv4 Endpoint Independent NAT
- 1:1 NAT
diff --git a/src/plugins/nat/det44/det44.api b/src/plugins/nat/det44/det44.api
index 7b6aef70883..ddb9c497ea0 100644
--- a/src/plugins/nat/det44/det44.api
+++ b/src/plugins/nat/det44/det44.api
@@ -39,7 +39,6 @@ autoreply define det44_plugin_enable_disable {
u32 inside_vrf;
u32 outside_vrf;
bool enable;
- option status="in_progress";
};
/** \brief Enable/disable DET44 feature on the interface
@@ -55,7 +54,6 @@ autoreply define det44_interface_add_del_feature {
bool is_add;
bool is_inside;
vl_api_interface_index_t sw_if_index;
- option status="in_progress";
};
/** \brief Dump interfaces with DET44 feature
@@ -65,7 +63,6 @@ autoreply define det44_interface_add_del_feature {
define det44_interface_dump {
u32 client_index;
u32 context;
- option status="in_progress";
};
/** \brief DET44 interface details response
@@ -78,7 +75,6 @@ define det44_interface_details {
bool is_inside;
bool is_outside;
vl_api_interface_index_t sw_if_index;
- option status="in_progress";
};
/** \brief Add/delete DET44 mapping
diff --git a/src/plugins/nat/det44/det44.c b/src/plugins/nat/det44/det44.c
index 1dbbfdfdebe..f251bc9c608 100644
--- a/src/plugins/nat/det44/det44.c
+++ b/src/plugins/nat/det44/det44.c
@@ -29,7 +29,6 @@
det44_main_t det44_main;
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_det44_in2out, static) = {
.arc_name = "ip4-unicast",
.node_name = "det44-in2out",
@@ -47,7 +46,6 @@ VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Deterministic NAT (CGN)",
};
-/* *INDENT-ON* */
void
det44_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
@@ -150,14 +148,12 @@ snat_det_add_map (ip4_address_t * in_addr, u8 in_plen,
}
/* Add/del external address range to FIB */
- /* *INDENT-OFF* */
pool_foreach (i, dm->interfaces) {
if (det44_interface_is_inside(i))
continue;
det44_add_del_addr_to_fib(out_addr, out_plen, i->sw_if_index, is_add);
goto out;
}
- /* *INDENT-ON* */
out:
return 0;
}
@@ -203,7 +199,6 @@ det44_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
// rather make a structure and when enable call is used
// then register nodes
- /* *INDENT-OFF* */
pool_foreach (tmp, dm->interfaces) {
if (tmp->sw_if_index == sw_if_index)
{
@@ -211,7 +206,6 @@ det44_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
goto out;
}
}
- /* *INDENT-ON* */
out:
feature_name = is_inside ? "det44-in2out" : "det44-out2in";
@@ -270,7 +264,6 @@ out:
// add/del outside interface fib to registry
u8 found = 0;
det44_fib_t *outside_fib;
- /* *INDENT-OFF* */
vec_foreach (outside_fib, dm->outside_fibs)
{
if (outside_fib->fib_index == fib_index)
@@ -292,7 +285,6 @@ out:
break;
}
}
- /* *INDENT-ON* */
if (!is_del && !found)
{
vec_add2 (dm->outside_fibs, outside_fib, 1);
@@ -301,12 +293,10 @@ out:
}
// add/del outside address to FIB
snat_det_map_t *mp;
- /* *INDENT-OFF* */
pool_foreach (mp, dm->det_maps) {
det44_add_del_addr_to_fib(&mp->out_addr,
mp->out_plen, sw_if_index, !is_del);
}
- /* *INDENT-ON* */
}
return 0;
}
@@ -324,19 +314,29 @@ det44_expire_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
snat_det_session_t *ses;
snat_det_map_t *mp;
- vlib_process_wait_for_event_or_clock (vm, 10.0);
- vlib_process_get_events (vm, NULL);
- u32 now = (u32) vlib_time_now (vm);
- /* *INDENT-OFF* */
- pool_foreach (mp, dm->det_maps) {
- vec_foreach(ses, mp->sessions)
- {
- /* Delete if session expired */
- if (ses->in_port && (ses->expire < now))
- snat_det_ses_close (mp, ses);
- }
- }
- /* *INDENT-ON* */
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, 10.0);
+ vlib_process_get_events (vm, NULL);
+ u32 now = (u32) vlib_time_now (vm);
+
+ if (!plugin_enabled ())
+ {
+ continue;
+ }
+
+ pool_foreach (mp, dm->det_maps)
+ {
+ vec_foreach (ses, mp->sessions)
+ {
+ // close expired sessions
+ if (ses->in_port && (ses->expire < now))
+ {
+ snat_det_ses_close (mp, ses);
+ }
+ }
+ }
+ }
return 0;
}
@@ -374,10 +374,11 @@ det44_plugin_enable (det44_config_t c)
c.inside_vrf_id,
dm->fib_src_hi);
- det44_create_expire_walk_process ();
dm->mss_clamping = 0;
dm->config = c;
dm->enabled = 1;
+
+ det44_create_expire_walk_process ();
return 0;
}
@@ -395,6 +396,8 @@ det44_plugin_disable ()
return 1;
}
+ dm->enabled = 0;
+
// DET44 cleanup (order dependent)
// 1) remove interfaces (det44_interface_add_del) removes map ranges from fib
// 2) free sessions
@@ -428,15 +431,12 @@ det44_plugin_disable ()
}
vec_free (interfaces);
- /* *INDENT-OFF* */
pool_foreach (mp, dm->det_maps)
{
vec_free (mp->sessions);
}
- /* *INDENT-ON* */
det44_reset_timeouts ();
- dm->enabled = 0;
pool_free (dm->interfaces);
pool_free (dm->det_maps);
@@ -467,7 +467,6 @@ det44_update_outside_fib (ip4_main_t * im,
if (!vec_len (dm->outside_fibs))
return;
- /* *INDENT-OFF* */
pool_foreach (i, dm->interfaces)
{
if (i->sw_if_index == sw_if_index)
@@ -477,7 +476,6 @@ det44_update_outside_fib (ip4_main_t * im,
match = 1;
}
}
- /* *INDENT-ON* */
if (!match)
return;
diff --git a/src/plugins/nat/det44/det44.h b/src/plugins/nat/det44/det44.h
index 02b0fa7e81d..e576bfb65e8 100644
--- a/src/plugins/nat/det44/det44.h
+++ b/src/plugins/nat/det44/det44.h
@@ -40,6 +40,7 @@
#include <nat/lib/lib.h>
#include <nat/lib/inlines.h>
#include <nat/lib/ipfix_logging.h>
+#include <nat/lib/nat_proto.h>
/* Session state */
#define foreach_det44_session_state \
@@ -228,7 +229,7 @@ plugin_enabled ()
extern vlib_node_registration_t det44_in2out_node;
extern vlib_node_registration_t det44_out2in_node;
-int det44_plugin_enable ();
+int det44_plugin_enable (det44_config_t);
int det44_plugin_disable ();
int det44_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del);
@@ -278,13 +279,11 @@ snat_det_map_by_user (ip4_address_t * user_addr)
{
det44_main_t *dm = &det44_main;
snat_det_map_t *mp;
- /* *INDENT-OFF* */
pool_foreach (mp, dm->det_maps)
{
if (is_addr_in_net(user_addr, &mp->in_addr, mp->in_plen))
return mp;
}
- /* *INDENT-ON* */
return 0;
}
@@ -293,13 +292,11 @@ snat_det_map_by_out (ip4_address_t * out_addr)
{
det44_main_t *dm = &det44_main;
snat_det_map_t *mp;
- /* *INDENT-OFF* */
pool_foreach (mp, dm->det_maps)
{
if (is_addr_in_net(out_addr, &mp->out_addr, mp->out_plen))
return mp;
}
- /* *INDENT-ON* */
return 0;
}
diff --git a/src/plugins/nat/det44/det44_api.c b/src/plugins/nat/det44/det44_api.c
index 1486180aa99..c7e17dfd147 100644
--- a/src/plugins/nat/det44/det44_api.c
+++ b/src/plugins/nat/det44/det44_api.c
@@ -67,14 +67,12 @@ vl_api_det44_forward_t_handler (vl_api_det44_forward_t * mp)
hi_port = lo_port + m->ports_per_host - 1;
send_reply:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_DET44_FORWARD_REPLY,
({
rmp->out_port_lo = ntohs (lo_port);
rmp->out_port_hi = ntohs (hi_port);
clib_memcpy (rmp->out_addr, &out_addr, 4);
}))
- /* *INDENT-ON* */
}
static void
@@ -98,12 +96,10 @@ vl_api_det44_reverse_t_handler (vl_api_det44_reverse_t * mp)
snat_det_reverse (m, &out_addr, htons (mp->out_port), &in_addr);
send_reply:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_DET44_REVERSE_REPLY,
({
clib_memcpy (rmp->in_addr, &in_addr, 4);
}))
- /* *INDENT-ON* */
}
static void
@@ -139,10 +135,8 @@ vl_api_det44_map_dump_t_handler (vl_api_det44_map_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
vec_foreach(m, dm->det_maps)
sent_det44_map_details(m, reg, mp->context);
- /* *INDENT-ON* */
}
static void
@@ -328,12 +322,10 @@ vl_api_det44_interface_dump_t_handler (vl_api_det44_interface_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (i, dm->interfaces)
{
det44_send_interface_details(i, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -359,7 +351,6 @@ vl_api_det44_get_timeouts_t_handler (vl_api_det44_get_timeouts_t * mp)
nat_timeouts_t timeouts;
int rv = 0;
timeouts = det44_get_timeouts ();
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_DET44_GET_TIMEOUTS_REPLY,
({
rmp->udp = htonl (timeouts.udp);
@@ -367,7 +358,6 @@ vl_api_det44_get_timeouts_t_handler (vl_api_det44_get_timeouts_t * mp)
rmp->tcp_transitory = htonl (timeouts.tcp.transitory);
rmp->icmp = htonl (timeouts.icmp);
}))
- /* *INDENT-ON* */
}
/*
@@ -412,14 +402,12 @@ vl_api_nat_det_forward_t_handler (vl_api_nat_det_forward_t * mp)
hi_port = lo_port + m->ports_per_host - 1;
send_reply:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_NAT_DET_FORWARD_REPLY,
({
rmp->out_port_lo = ntohs (lo_port);
rmp->out_port_hi = ntohs (hi_port);
clib_memcpy (rmp->out_addr, &out_addr, 4);
}))
- /* *INDENT-ON* */
}
static void
@@ -443,12 +431,10 @@ vl_api_nat_det_reverse_t_handler (vl_api_nat_det_reverse_t * mp)
snat_det_reverse (m, &out_addr, htons (mp->out_port), &in_addr);
send_reply:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_NAT_DET_REVERSE_REPLY,
({
clib_memcpy (rmp->in_addr, &in_addr, 4);
}))
- /* *INDENT-ON* */
}
static void
@@ -484,10 +470,8 @@ vl_api_nat_det_map_dump_t_handler (vl_api_nat_det_map_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
vec_foreach(m, dm->det_maps)
sent_nat_det_map_details(m, reg, mp->context);
- /* *INDENT-ON* */
}
static void
diff --git a/src/plugins/nat/det44/det44_cli.c b/src/plugins/nat/det44/det44_cli.c
index 5bd81d306f4..5d0ad04363e 100644
--- a/src/plugins/nat/det44/det44_cli.c
+++ b/src/plugins/nat/det44/det44_cli.c
@@ -512,7 +512,7 @@ VLIB_CLI_COMMAND (det44_map_command, static) = {
/*?
* @cliexpar
- * @cliexpstart{show det44 mappings}
+ * @cliexstart{show det44 mappings}
* Show DET44 mappings
* vpp# show det44 mappings
* DET44 mappings:
diff --git a/src/plugins/nat/det44/det44_in2out.c b/src/plugins/nat/det44/det44_in2out.c
index 5fe4a9a0658..3f5e05a064c 100644
--- a/src/plugins/nat/det44/det44_in2out.c
+++ b/src/plugins/nat/det44/det44_in2out.c
@@ -1011,7 +1011,6 @@ VLIB_NODE_FN (det44_in2out_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (det44_in2out_node) = {
.name = "det44-in2out",
.vector_size = sizeof (u32),
@@ -1028,7 +1027,6 @@ VLIB_REGISTER_NODE (det44_in2out_node) = {
[DET44_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/det44/det44_inlines.h b/src/plugins/nat/det44/det44_inlines.h
index aeb55b385d3..e5e70bbaebc 100644
--- a/src/plugins/nat/det44/det44_inlines.h
+++ b/src/plugins/nat/det44/det44_inlines.h
@@ -91,7 +91,6 @@ det44_translate (vlib_node_runtime_t * node, u32 sw_if_index0,
if (sw_if_index == ~0)
{
// TODO: go over use cases
- /* *INDENT-OFF* */
vec_foreach (outside_fib, dm->outside_fibs)
{
fei = fib_table_lookup (outside_fib->fib_index, &pfx);
@@ -102,18 +101,15 @@ det44_translate (vlib_node_runtime_t * node, u32 sw_if_index0,
break;
}
}
- /* *INDENT-ON* */
}
if (sw_if_index != ~0)
{
det44_interface_t *i;
- /* *INDENT-OFF* */
pool_foreach (i, dm->interfaces) {
/* NAT packet aimed at outside interface */
if ((det44_interface_is_outside (i)) && (sw_if_index == i->sw_if_index))
return 0;
}
- /* *INDENT-ON* */
}
}
return 1;
diff --git a/src/plugins/nat/det44/det44_out2in.c b/src/plugins/nat/det44/det44_out2in.c
index 111bc61c476..ab6acd4f8e9 100644
--- a/src/plugins/nat/det44/det44_out2in.c
+++ b/src/plugins/nat/det44/det44_out2in.c
@@ -173,6 +173,9 @@ icmp_match_out2in_det (vlib_node_runtime_t * node,
}
det44_log_info ("unknown dst address: %U",
format_ip4_address, &ip0->dst_address);
+ b0->error = node->errors[DET44_OUT2IN_ERROR_NO_TRANSLATION];
+ next0 = DET44_OUT2IN_NEXT_DROP;
+
goto out;
}
@@ -815,7 +818,6 @@ VLIB_NODE_FN (det44_out2in_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (det44_out2in_node) = {
.name = "det44-out2in",
.vector_size = sizeof (u32),
@@ -832,7 +834,6 @@ VLIB_REGISTER_NODE (det44_out2in_node) = {
[DET44_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/dslite/dslite.c b/src/plugins/nat/dslite/dslite.c
index 4fe4422df13..a2654b5a44b 100644
--- a/src/plugins/nat/dslite/dslite.c
+++ b/src/plugins/nat/dslite/dslite.c
@@ -101,7 +101,6 @@ dslite_init_datastructures (void)
u32 b4_buckets = 128;
u32 b4_memory_size = 64 << 20;
- /* *INDENT-OFF* */
vec_foreach (td, dm->per_thread_data)
{
clib_bihash_init_24_8 (&td->in2out, "dslite in2out", translation_buckets,
@@ -112,7 +111,6 @@ dslite_init_datastructures (void)
clib_bihash_init_16_8 (&td->b4_hash, "dslite b4s", b4_buckets, b4_memory_size);
}
- /* *INDENT-ON* */
dm->is_enabled = 1;
}
@@ -281,13 +279,11 @@ format_dslite_ce_trace (u8 * s, va_list * args)
VLIB_INIT_FUNCTION (dslite_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Dual-Stack Lite",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/dslite/dslite.h b/src/plugins/nat/dslite/dslite.h
index 3c798bf54fe..f05670c9bf5 100644
--- a/src/plugins/nat/dslite/dslite.h
+++ b/src/plugins/nat/dslite/dslite.h
@@ -61,7 +61,6 @@ typedef struct
};
} dslite_session_key_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
nat_session_key_t out2in;
@@ -72,7 +71,6 @@ typedef CLIB_PACKED (struct
u64 total_bytes;
u32 total_pkts;
}) dslite_session_t;
-/* *INDENT-ON* */
typedef struct
{
diff --git a/src/plugins/nat/dslite/dslite_api.c b/src/plugins/nat/dslite/dslite_api.c
index 420e8212ad9..4bb53c37660 100644
--- a/src/plugins/nat/dslite/dslite_api.c
+++ b/src/plugins/nat/dslite/dslite_api.c
@@ -53,13 +53,11 @@ vl_api_dslite_get_aftr_addr_t_handler (vl_api_dslite_get_aftr_addr_t * mp)
dslite_main_t *dm = &dslite_main;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_DSLITE_GET_AFTR_ADDR_REPLY,
({
memcpy (rmp->ip4_addr, &dm->aftr_ip4_addr.as_u8, 4);
memcpy (rmp->ip6_addr, &dm->aftr_ip6_addr.as_u8, 16);
}))
- /* *INDENT-ON* */
}
static void
@@ -88,13 +86,11 @@ vl_api_dslite_get_b4_addr_t_handler (vl_api_dslite_get_b4_addr_t * mp)
dslite_main_t *dm = &dslite_main;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_DSLITE_GET_B4_ADDR_REPLY,
({
memcpy (rmp->ip4_addr, &dm->b4_ip4_addr.as_u8, 4);
memcpy (rmp->ip6_addr, &dm->b4_ip6_addr.as_u8, 16);
}))
- /* *INDENT-ON* */
}
static void
@@ -154,12 +150,10 @@ vl_api_dslite_address_dump_t_handler (vl_api_dslite_address_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
vec_foreach (a, dm->pool.pool_addr)
{
send_dslite_address_details (a, reg, mp->context);
}
- /* *INDENT-ON* */
}
/* API definitions */
diff --git a/src/plugins/nat/dslite/dslite_ce_decap.c b/src/plugins/nat/dslite/dslite_ce_decap.c
index f36a87f8bc1..b5bdafc0e26 100644
--- a/src/plugins/nat/dslite/dslite_ce_decap.c
+++ b/src/plugins/nat/dslite/dslite_ce_decap.c
@@ -114,7 +114,6 @@ VLIB_NODE_FN (dslite_ce_decap_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dslite_ce_decap_node) = {
.name = "dslite-ce-decap",
.vector_size = sizeof (u32),
@@ -130,7 +129,6 @@ VLIB_REGISTER_NODE (dslite_ce_decap_node) = {
[DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-icmp-input",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/dslite/dslite_ce_encap.c b/src/plugins/nat/dslite/dslite_ce_encap.c
index d8d0e400919..19596efd32d 100644
--- a/src/plugins/nat/dslite/dslite_ce_encap.c
+++ b/src/plugins/nat/dslite/dslite_ce_encap.c
@@ -107,7 +107,6 @@ VLIB_NODE_FN (dslite_ce_encap_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dslite_ce_encap_node) = {
.name = "dslite-ce-encap",
.vector_size = sizeof (u32),
@@ -122,7 +121,6 @@ VLIB_REGISTER_NODE (dslite_ce_encap_node) = {
[DSLITE_CE_ENCAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/dslite/dslite_cli.c b/src/plugins/nat/dslite/dslite_cli.c
index 193cb3fe248..8ed9deb2a2d 100644
--- a/src/plugins/nat/dslite/dslite_cli.c
+++ b/src/plugins/nat/dslite/dslite_cli.c
@@ -95,12 +95,10 @@ dslite_show_pool_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "DS-Lite pool:");
- /* *INDENT-OFF* */
vec_foreach (a, dm->pool.pool_addr)
{
vlib_cli_output (vm, "%U", format_ip4_address, &a->addr);
}
- /* *INDENT-ON* */
return 0;
}
@@ -267,7 +265,6 @@ dslite_show_sessions_command_fn (vlib_main_t * vm,
dslite_per_thread_data_t *td;
dslite_b4_t *b4;
- /* *INDENT-OFF* */
vec_foreach (td, dm->per_thread_data)
{
pool_foreach (b4, td->b4s)
@@ -275,12 +272,10 @@ dslite_show_sessions_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%U", format_dslite_b4, td, b4);
}
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
/*?
* @cliexpar
@@ -394,7 +389,6 @@ VLIB_CLI_COMMAND (dslite_show_sessions, static) = {
.function = dslite_show_sessions_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/dslite/dslite_in2out.c b/src/plugins/nat/dslite/dslite_in2out.c
index 409c59c218c..522c3cf4123 100644
--- a/src/plugins/nat/dslite/dslite_in2out.c
+++ b/src/plugins/nat/dslite/dslite_in2out.c
@@ -460,7 +460,6 @@ VLIB_NODE_FN (dslite_in2out_node) (vlib_main_t * vm,
return dslite_in2out_node_fn_inline (vm, node, frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dslite_in2out_node) = {
.name = "dslite-in2out",
.vector_size = sizeof (u32),
@@ -477,7 +476,6 @@ VLIB_REGISTER_NODE (dslite_in2out_node) = {
[DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (dslite_in2out_slowpath_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -486,7 +484,6 @@ VLIB_NODE_FN (dslite_in2out_slowpath_node) (vlib_main_t * vm,
return dslite_in2out_node_fn_inline (vm, node, frame, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dslite_in2out_slowpath_node) = {
.name = "dslite-in2out-slowpath",
.vector_size = sizeof (u32),
@@ -503,7 +500,6 @@ VLIB_REGISTER_NODE (dslite_in2out_slowpath_node) = {
[DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/dslite/dslite_out2in.c b/src/plugins/nat/dslite/dslite_out2in.c
index c2fa767bd7d..531bbb468bb 100644
--- a/src/plugins/nat/dslite/dslite_out2in.c
+++ b/src/plugins/nat/dslite/dslite_out2in.c
@@ -266,7 +266,6 @@ VLIB_NODE_FN (dslite_out2in_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dslite_out2in_node) = {
.name = "dslite-out2in",
.vector_size = sizeof (u32),
@@ -282,7 +281,6 @@ VLIB_REGISTER_NODE (dslite_out2in_node) = {
[DSLITE_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/extras/nat_100ks.py b/src/plugins/nat/extras/nat_100ks.py
index c85a4591cd3..4e8dc2486d6 100644
--- a/src/plugins/nat/extras/nat_100ks.py
+++ b/src/plugins/nat/extras/nat_100ks.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(dst="2.2.0.1")/UDP(dport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(dst="2.2.0.1") / UDP(dport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="10.0.0.3", ip_max="10.0.3.234", port_min=1025, port_max=1124, limit_flows = 100000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="10.0.0.3",
+ ip_max="10.0.3.234",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=100000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.src")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.sport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_10Ms.py b/src/plugins/nat/extras/nat_10Ms.py
index 6ce62a0b5e7..96a18ec018a 100644
--- a/src/plugins/nat/extras/nat_10Ms.py
+++ b/src/plugins/nat/extras/nat_10Ms.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(dst="2.2.0.1")/UDP(dport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(dst="2.2.0.1") / UDP(dport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="10.0.0.3", ip_max="10.1.134.162", port_min=1025, port_max=1124, limit_flows=10000000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="10.0.0.3",
+ ip_max="10.1.134.162",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=10000000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.src")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.sport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_10ks.py b/src/plugins/nat/extras/nat_10ks.py
index 33c7196eb9e..c210d5e81d8 100644
--- a/src/plugins/nat/extras/nat_10ks.py
+++ b/src/plugins/nat/extras/nat_10ks.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(dst="2.2.0.1")/UDP(dport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(dst="2.2.0.1") / UDP(dport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="10.0.0.3", ip_max="10.0.0.102", port_min=1025, port_max=1124, limit_flows = 10000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="10.0.0.3",
+ ip_max="10.0.0.102",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=10000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.src")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.sport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_1Ms.py b/src/plugins/nat/extras/nat_1Ms.py
index 73a91a70985..7271cf73781 100644
--- a/src/plugins/nat/extras/nat_1Ms.py
+++ b/src/plugins/nat/extras/nat_1Ms.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(dst="2.2.0.1")/UDP(dport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(dst="2.2.0.1") / UDP(dport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="10.0.0.3", ip_max="10.0.39.18", port_min=1025, port_max=1124, limit_flows = 1000000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="10.0.0.3",
+ ip_max="10.0.39.18",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=1000000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.src")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.sport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_out2in_100ks.py b/src/plugins/nat/extras/nat_out2in_100ks.py
index 55ab5d42ee1..911f2cefda4 100644
--- a/src/plugins/nat/extras/nat_out2in_100ks.py
+++ b/src/plugins/nat/extras/nat_out2in_100ks.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(src="2.2.0.1")/UDP(sport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(src="2.2.0.1") / UDP(sport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="173.16.1.3", ip_max="173.16.4.234", port_min=1025, port_max=1124, limit_flows = 100000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="173.16.1.3",
+ ip_max="173.16.4.234",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=100000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.dst")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.dport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_out2in_10Ms.py b/src/plugins/nat/extras/nat_out2in_10Ms.py
index 48d3d199080..b3493641ea0 100644
--- a/src/plugins/nat/extras/nat_out2in_10Ms.py
+++ b/src/plugins/nat/extras/nat_out2in_10Ms.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(src="2.2.0.1")/UDP(sport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(src="2.2.0.1") / UDP(sport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="173.16.1.3", ip_max="173.17.135.162", port_min=1025, port_max=1124, limit_flows = 10000000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="173.16.1.3",
+ ip_max="173.17.135.162",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=10000000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.dst")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.dport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_out2in_10ks.py b/src/plugins/nat/extras/nat_out2in_10ks.py
index e961504fcf9..abd82ce320d 100644
--- a/src/plugins/nat/extras/nat_out2in_10ks.py
+++ b/src/plugins/nat/extras/nat_out2in_10ks.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(src="2.2.0.1")/UDP(sport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(src="2.2.0.1") / UDP(sport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="173.16.1.3", ip_max="173.16.1.102", port_min=1025, port_max=1124, limit_flows = 100000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="173.16.1.3",
+ ip_max="173.16.1.102",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=100000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.dst")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.dport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_out2in_1Ms.py b/src/plugins/nat/extras/nat_out2in_1Ms.py
index d2cb0810263..c08ef191e6c 100644
--- a/src/plugins/nat/extras/nat_out2in_1Ms.py
+++ b/src/plugins/nat/extras/nat_out2in_1Ms.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(src="2.2.0.1")/UDP(sport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(src="2.2.0.1") / UDP(sport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="173.16.1.3", ip_max="173.16.40.18", port_min=1025, port_max=1124, limit_flows = 1000000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="173.16.1.3",
+ ip_max="173.16.40.18",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=1000000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.dst")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.dport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_ses_open.py b/src/plugins/nat/extras/nat_ses_open.py
index d614d4e7356..a267a6b67fb 100644
--- a/src/plugins/nat/extras/nat_ses_open.py
+++ b/src/plugins/nat/extras/nat_ses_open.py
@@ -1,44 +1,72 @@
from trex_stl_lib.api import *
-class STLS1:
- def __init__ (self):
- self.ip_range = {'local': {'start': "10.0.0.3", 'end': "10.1.255.255"},
- 'external': {'start': "172.16.1.3", 'end': "172.16.1.3"},
- 'remote': {'start': "2.2.0.1", 'end': "2.2.0.1"}}
- self.port_range = {'local': {'start': 1025, 'end': 65535},
- 'remote': {'start': 12, 'end': 12}}
+class STLS1:
+ def __init__(self):
+ self.ip_range = {
+ "local": {"start": "10.0.0.3", "end": "10.1.255.255"},
+ "external": {"start": "172.16.1.3", "end": "172.16.1.3"},
+ "remote": {"start": "2.2.0.1", "end": "2.2.0.1"},
+ }
+ self.port_range = {
+ "local": {"start": 1025, "end": 65535},
+ "remote": {"start": 12, "end": 12},
+ }
- def create_stream (self, vm):
- base_pkt = Ether()/IP()/UDP()
+ def create_stream(self, vm):
+ base_pkt = Ether() / IP() / UDP()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
pad = Padding()
- pad.load = '\x00' * pad_len
- base_pkt = base_pkt/pad
-
+ pad.load = "\x00" * pad_len
+ base_pkt = base_pkt / pad
+
pkt = STLPktBuilder(pkt=base_pkt, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
if direction == 0:
- ip_src = self.ip_range['remote']
- ip_dst = self.ip_range['external']
- src_port = self.port_range['remote']
- dst_port = self.port_range['local']
+ ip_src = self.ip_range["remote"]
+ ip_dst = self.ip_range["external"]
+ src_port = self.port_range["remote"]
+ dst_port = self.port_range["local"]
else:
- ip_src = self.ip_range['local']
- ip_dst = self.ip_range['remote']
- src_port = self.port_range['local']
- dst_port = self.port_range['remote']
+ ip_src = self.ip_range["local"]
+ ip_dst = self.ip_range["remote"]
+ src_port = self.port_range["local"]
+ dst_port = self.port_range["remote"]
vm = STLVM()
- vm.var(name="ip_src", min_value=ip_src['start'], max_value=ip_src['end'], size=4, op="random")
- vm.var(name="ip_dst", min_value=ip_dst['start'], max_value=ip_dst['end'], size=4, op="random")
- vm.var(name="src_port", min_value=src_port['start'], max_value=src_port['end'], size=2, op="random")
- vm.var(name="dst_port", min_value=dst_port['start'], max_value=dst_port['end'], size=2, op="random")
+ vm.var(
+ name="ip_src",
+ min_value=ip_src["start"],
+ max_value=ip_src["end"],
+ size=4,
+ op="random",
+ )
+ vm.var(
+ name="ip_dst",
+ min_value=ip_dst["start"],
+ max_value=ip_dst["end"],
+ size=4,
+ op="random",
+ )
+ vm.var(
+ name="src_port",
+ min_value=src_port["start"],
+ max_value=src_port["end"],
+ size=2,
+ op="random",
+ )
+ vm.var(
+ name="dst_port",
+ min_value=dst_port["start"],
+ max_value=dst_port["end"],
+ size=2,
+ op="random",
+ )
vm.write(fv_name="ip_src", pkt_offset="IP.src")
vm.write(fv_name="ip_dst", pkt_offset="IP.dst")
@@ -47,12 +75,9 @@ class STLS1:
vm.fix_chksum()
- return [ self.create_stream(vm) ]
+ return [self.create_stream(vm)]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_static_gen_cfg.py b/src/plugins/nat/extras/nat_static_gen_cfg.py
index 9e59bbfc0c2..009cf099582 100755
--- a/src/plugins/nat/extras/nat_static_gen_cfg.py
+++ b/src/plugins/nat/extras/nat_static_gen_cfg.py
@@ -2,24 +2,24 @@
import ipaddress
import argparse
-parser = argparse.ArgumentParser(description='Generate NAT plugin config.')
-parser.add_argument('static_map_num', metavar='N', type=int, nargs=1,
- help='number of static mappings')
+parser = argparse.ArgumentParser(description="Generate NAT plugin config.")
+parser.add_argument(
+ "static_map_num", metavar="N", type=int, nargs=1, help="number of static mappings"
+)
args = parser.parse_args()
-file_name = 'nat_static_%s' % (args.static_map_num[0])
-outfile = open(file_name, 'w')
+file_name = "nat_static_%s" % (args.static_map_num[0])
+outfile = open(file_name, "w")
-outfile.write('set int ip address TenGigabitEthernet4/0/0 172.16.2.1/24\n')
-outfile.write('set int ip address TenGigabitEthernet4/0/1 173.16.1.1/24\n')
-outfile.write('set int state TenGigabitEthernet4/0/0 up\n')
-outfile.write('set int state TenGigabitEthernet4/0/1 up\n')
-outfile.write('ip route add 2.2.0.0/16 via 173.16.1.2 TenGigabitEthernet4/0/1\n')
-outfile.write('ip route add 10.0.0.0/24 via 172.16.2.2 TenGigabitEthernet4/0/0\n')
-outfile.write('set int nat44 in TenGigabitEthernet4/0/0 out TenGigabitEthernet4/0/1\n')
-
-for i in range (0, args.static_map_num[0]):
- local = str(ipaddress.IPv4Address(u'10.0.0.3') + i)
- external = str(ipaddress.IPv4Address(u'173.16.1.3') + i)
- outfile.write('nat44 add static mapping local %s external %s\n' % (local, external))
+outfile.write("set int ip address TenGigabitEthernet4/0/0 172.16.2.1/24\n")
+outfile.write("set int ip address TenGigabitEthernet4/0/1 173.16.1.1/24\n")
+outfile.write("set int state TenGigabitEthernet4/0/0 up\n")
+outfile.write("set int state TenGigabitEthernet4/0/1 up\n")
+outfile.write("ip route add 2.2.0.0/16 via 173.16.1.2 TenGigabitEthernet4/0/1\n")
+outfile.write("ip route add 10.0.0.0/24 via 172.16.2.2 TenGigabitEthernet4/0/0\n")
+outfile.write("set int nat44 in TenGigabitEthernet4/0/0 out TenGigabitEthernet4/0/1\n")
+for i in range(0, args.static_map_num[0]):
+ local = str(ipaddress.IPv4Address("10.0.0.3") + i)
+ external = str(ipaddress.IPv4Address("173.16.1.3") + i)
+ outfile.write("nat44 add static mapping local %s external %s\n" % (local, external))
diff --git a/src/plugins/nat/extras/nat_test_fast_path.py b/src/plugins/nat/extras/nat_test_fast_path.py
index e869d40872a..fb880fb9e96 100644
--- a/src/plugins/nat/extras/nat_test_fast_path.py
+++ b/src/plugins/nat/extras/nat_test_fast_path.py
@@ -2,7 +2,6 @@ from trex_stl_lib.api import *
class STLS1:
-
def create_stream(self):
# base_pkt = Ether()/IP(dst="2.2.0.1")/UDP(dport=12)
@@ -24,37 +23,46 @@ class STLS1:
# return STLStream(packet=pkt, mode=STLTXCont())
- vm = STLScVmRaw([STLVmTupleGen(ip_min="10.0.0.1", ip_max="10.255.255.254",
- port_min=1025, port_max=65535,
- # name="stuple", limit_flows=10000),
- name="stuple", limit_flows=100),
- STLVmTupleGen(ip_min="2.0.0.1", ip_max="2.255.255.254",
- port_min=1025, port_max=65535,
- # name="dtuple", limit_flows=100000000),
- name="dtuple", limit_flows=100),
-
- # write ip to packet IP.src
- STLVmWrFlowVar(fv_name="stuple.ip",
- pkt_offset="IP.src"),
- STLVmWrFlowVar(fv_name="dtuple.ip",
- pkt_offset="IP.dst"),
- # fix checksum
- STLVmFixIpv4(offset="IP"),
- # write udp.port
- STLVmWrFlowVar(fv_name="stuple.port",
- pkt_offset="UDP.sport"),
- STLVmWrFlowVar(fv_name="dtuple.port",
- pkt_offset="UDP.dport"),
- ]
- )
-
- base_pkt = Ether()/IP(src="16.0.0.1", dst="2.0.0.1")/UDP(dport=12, sport=1025)
+ vm = STLScVmRaw(
+ [
+ STLVmTupleGen(
+ ip_min="10.0.0.1",
+ ip_max="10.255.255.254",
+ port_min=1025,
+ port_max=65535,
+ # name="stuple", limit_flows=10000),
+ name="stuple",
+ limit_flows=100,
+ ),
+ STLVmTupleGen(
+ ip_min="2.0.0.1",
+ ip_max="2.255.255.254",
+ port_min=1025,
+ port_max=65535,
+ # name="dtuple", limit_flows=100000000),
+ name="dtuple",
+ limit_flows=100,
+ ),
+ # write ip to packet IP.src
+ STLVmWrFlowVar(fv_name="stuple.ip", pkt_offset="IP.src"),
+ STLVmWrFlowVar(fv_name="dtuple.ip", pkt_offset="IP.dst"),
+ # fix checksum
+ STLVmFixIpv4(offset="IP"),
+ # write udp.port
+ STLVmWrFlowVar(fv_name="stuple.port", pkt_offset="UDP.sport"),
+ STLVmWrFlowVar(fv_name="dtuple.port", pkt_offset="UDP.dport"),
+ ]
+ )
+
+ base_pkt = (
+ Ether() / IP(src="16.0.0.1", dst="2.0.0.1") / UDP(dport=12, sport=1025)
+ )
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
diff --git a/src/plugins/nat/extras/nat_test_slow_path.py b/src/plugins/nat/extras/nat_test_slow_path.py
index a6351b98adf..3145a2c6a59 100644
--- a/src/plugins/nat/extras/nat_test_slow_path.py
+++ b/src/plugins/nat/extras/nat_test_slow_path.py
@@ -2,7 +2,6 @@ from trex_stl_lib.api import *
class STLS1:
-
def create_stream(self):
# base_pkt = Ether()/IP(dst="2.2.0.1")/UDP(dport=12)
@@ -24,35 +23,44 @@ class STLS1:
# return STLStream(packet=pkt, mode=STLTXCont())
- vm = STLScVmRaw([STLVmTupleGen(ip_min="10.0.0.1", ip_max="10.255.255.254",
- port_min=1025, port_max=65535,
- name="stuple", limit_flows=10000),
- STLVmTupleGen(ip_min="2.0.0.1", ip_max="2.255.255.254",
- port_min=1025, port_max=65535,
- name="dtuple", limit_flows=100000000),
-
- # write ip to packet IP.src
- STLVmWrFlowVar(fv_name="stuple.ip",
- pkt_offset="IP.src"),
- STLVmWrFlowVar(fv_name="dtuple.ip",
- pkt_offset="IP.dst"),
- # fix checksum
- STLVmFixIpv4(offset="IP"),
- # write udp.port
- STLVmWrFlowVar(fv_name="stuple.port",
- pkt_offset="UDP.sport"),
- STLVmWrFlowVar(fv_name="dtuple.port",
- pkt_offset="UDP.dport"),
- ]
- )
-
- base_pkt = Ether()/IP(src="16.0.0.1", dst="2.0.0.1")/UDP(dport=12, sport=1025)
+ vm = STLScVmRaw(
+ [
+ STLVmTupleGen(
+ ip_min="10.0.0.1",
+ ip_max="10.255.255.254",
+ port_min=1025,
+ port_max=65535,
+ name="stuple",
+ limit_flows=10000,
+ ),
+ STLVmTupleGen(
+ ip_min="2.0.0.1",
+ ip_max="2.255.255.254",
+ port_min=1025,
+ port_max=65535,
+ name="dtuple",
+ limit_flows=100000000,
+ ),
+ # write ip to packet IP.src
+ STLVmWrFlowVar(fv_name="stuple.ip", pkt_offset="IP.src"),
+ STLVmWrFlowVar(fv_name="dtuple.ip", pkt_offset="IP.dst"),
+ # fix checksum
+ STLVmFixIpv4(offset="IP"),
+ # write udp.port
+ STLVmWrFlowVar(fv_name="stuple.port", pkt_offset="UDP.sport"),
+ STLVmWrFlowVar(fv_name="dtuple.port", pkt_offset="UDP.dport"),
+ ]
+ )
+
+ base_pkt = (
+ Ether() / IP(src="16.0.0.1", dst="2.0.0.1") / UDP(dport=12, sport=1025)
+ )
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
diff --git a/src/plugins/nat/extras/nat_test_slow_path_with_latency.py b/src/plugins/nat/extras/nat_test_slow_path_with_latency.py
index 6c7663434c5..0c08e7a5e80 100644
--- a/src/plugins/nat/extras/nat_test_slow_path_with_latency.py
+++ b/src/plugins/nat/extras/nat_test_slow_path_with_latency.py
@@ -2,7 +2,6 @@ from trex_stl_lib.api import *
class STLS1:
-
def create_stream(self, port_id):
# base_pkt = Ether()/IP(dst="2.2.0.1")/UDP(dport=12)
@@ -24,49 +23,61 @@ class STLS1:
# return STLStream(packet=pkt, mode=STLTXCont())
- vm = STLScVmRaw([STLVmTupleGen(ip_min="10.0.0.1", ip_max="10.255.255.254",
- port_min=1025, port_max=65535,
- name="stuple", limit_flows=10000),
- STLVmTupleGen(ip_min="2.0.0.1", ip_max="2.255.255.254",
- port_min=1025, port_max=65535,
- name="dtuple", limit_flows=100000000),
-
- # write ip to packet IP.src
- STLVmWrFlowVar(fv_name="stuple.ip",
- pkt_offset="IP.src"),
- STLVmWrFlowVar(fv_name="dtuple.ip",
- pkt_offset="IP.dst"),
- # fix checksum
- STLVmFixIpv4(offset="IP"),
- # write udp.port
- STLVmWrFlowVar(fv_name="stuple.port",
- pkt_offset="UDP.sport"),
- STLVmWrFlowVar(fv_name="dtuple.port",
- pkt_offset="UDP.dport"),
- ]
- )
-
- base_pkt = Ether()/IP(src="16.0.0.1", dst="2.0.0.1")/UDP(dport=12, sport=1025)
+ vm = STLScVmRaw(
+ [
+ STLVmTupleGen(
+ ip_min="10.0.0.1",
+ ip_max="10.255.255.254",
+ port_min=1025,
+ port_max=65535,
+ name="stuple",
+ limit_flows=10000,
+ ),
+ STLVmTupleGen(
+ ip_min="2.0.0.1",
+ ip_max="2.255.255.254",
+ port_min=1025,
+ port_max=65535,
+ name="dtuple",
+ limit_flows=100000000,
+ ),
+ # write ip to packet IP.src
+ STLVmWrFlowVar(fv_name="stuple.ip", pkt_offset="IP.src"),
+ STLVmWrFlowVar(fv_name="dtuple.ip", pkt_offset="IP.dst"),
+ # fix checksum
+ STLVmFixIpv4(offset="IP"),
+ # write udp.port
+ STLVmWrFlowVar(fv_name="stuple.port", pkt_offset="UDP.sport"),
+ STLVmWrFlowVar(fv_name="dtuple.port", pkt_offset="UDP.dport"),
+ ]
+ )
+
+ base_pkt = (
+ Ether() / IP(src="16.0.0.1", dst="2.0.0.1") / UDP(dport=12, sport=1025)
+ )
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
- pad = max(0, 64 - len(base_pkt)) * 'x'
- pad_latency = max(0, (64-4) - len(base_pkt)) * 'x'
+ pad = max(0, 64 - len(base_pkt)) * "x"
+ pad_latency = max(0, (64 - 4) - len(base_pkt)) * "x"
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
- return [STLStream(packet=pkt, mode=STLTXCont()),
- # latency stream
- STLStream(packet = STLPktBuilder(pkt = base_pkt/pad_latency),
- mode = STLTXCont(pps=1000),
- flow_stats = STLFlowLatencyStats(pg_id = 12+port_id))
- ]
+ return [
+ STLStream(packet=pkt, mode=STLTXCont()),
+ # latency stream
+ STLStream(
+ packet=STLPktBuilder(pkt=base_pkt / pad_latency),
+ mode=STLTXCont(pps=1000),
+ flow_stats=STLFlowLatencyStats(pg_id=12 + port_id),
+ ),
+ ]
def get_streams(self, direction=0, **kwargs):
# return [self.create_stream()]
- return self.create_stream(kwargs['port_id'])
+ return self.create_stream(kwargs["port_id"])
# dynamic load - used for trex console or simulator
diff --git a/src/plugins/nat/lib/alloc.h b/src/plugins/nat/lib/alloc.h
index a9a2c15fedc..882809e829c 100644
--- a/src/plugins/nat/lib/alloc.h
+++ b/src/plugins/nat/lib/alloc.h
@@ -21,6 +21,7 @@
#define included_nat_lib_alloc_h__
#include <vnet/ip/ip.h>
+#include <nat/lib/nat_proto.h>
typedef struct nat_ip4_pool_addr_s nat_ip4_pool_addr_t;
typedef struct nat_ip4_addr_port_s nat_ip4_addr_port_t;
@@ -41,14 +42,12 @@ struct nat_ip4_pool_addr_s
{
ip4_address_t addr;
u32 fib_index;
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
u16 busy_##n##_ports; \
u16 * busy_##n##_ports_per_thread; \
uword * busy_##n##_port_bitmap;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
};
struct nat_ip4_addr_port_s
diff --git a/src/plugins/nat/lib/inlines.h b/src/plugins/nat/lib/inlines.h
index fe1f7dd27bc..24e3ba83a5b 100644
--- a/src/plugins/nat/lib/inlines.h
+++ b/src/plugins/nat/lib/inlines.h
@@ -20,51 +20,18 @@
#include <vnet/ip/icmp46_packet.h>
-always_inline nat_protocol_t
-ip_proto_to_nat_proto (u8 ip_proto)
-{
- static const nat_protocol_t lookup_table[256] = {
- [IP_PROTOCOL_TCP] = NAT_PROTOCOL_TCP,
- [IP_PROTOCOL_UDP] = NAT_PROTOCOL_UDP,
- [IP_PROTOCOL_ICMP] = NAT_PROTOCOL_ICMP,
- [IP_PROTOCOL_ICMP6] = NAT_PROTOCOL_ICMP,
- };
-
- return lookup_table[ip_proto];
-}
-
-static_always_inline u8
-nat_proto_to_ip_proto (nat_protocol_t nat_proto)
-{
- ASSERT (nat_proto <= NAT_PROTOCOL_ICMP);
-
- static const u8 lookup_table[256] = {
- [NAT_PROTOCOL_OTHER] = ~0,
- [NAT_PROTOCOL_TCP] = IP_PROTOCOL_TCP,
- [NAT_PROTOCOL_UDP] = IP_PROTOCOL_UDP,
- [NAT_PROTOCOL_ICMP] = IP_PROTOCOL_ICMP,
- };
-
- ASSERT (NAT_PROTOCOL_OTHER == nat_proto || NAT_PROTOCOL_TCP == nat_proto
- || NAT_PROTOCOL_UDP == nat_proto || NAT_PROTOCOL_ICMP == nat_proto);
-
- return lookup_table[nat_proto];
-}
-
-static_always_inline u8
+static_always_inline u64
icmp_type_is_error_message (u8 icmp_type)
{
- switch (icmp_type)
- {
- case ICMP4_destination_unreachable:
- case ICMP4_time_exceeded:
- case ICMP4_parameter_problem:
- case ICMP4_source_quench:
- case ICMP4_redirect:
- case ICMP4_alternate_host_address:
- return 1;
- }
- return 0;
+ int bmp = 0;
+ bmp |= 1 << ICMP4_destination_unreachable;
+ bmp |= 1 << ICMP4_time_exceeded;
+ bmp |= 1 << ICMP4_parameter_problem;
+ bmp |= 1 << ICMP4_source_quench;
+ bmp |= 1 << ICMP4_redirect;
+ bmp |= 1 << ICMP4_alternate_host_address;
+
+ return (1ULL << icmp_type) & bmp;
}
#endif /* included_nat_inlines_h__ */
diff --git a/src/plugins/nat/lib/ipfix_logging.c b/src/plugins/nat/lib/ipfix_logging.c
index 6e5e4b6c750..593fa09f7e2 100644
--- a/src/plugins/nat/lib/ipfix_logging.c
+++ b/src/plugins/nat/lib/ipfix_logging.c
@@ -51,7 +51,7 @@ typedef struct
u8 nat_event;
u32 src_ip;
u32 nat_src_ip;
- nat_protocol_t nat_proto;
+ ip_protocol_t proto;
u16 src_port;
u16 nat_src_port;
u32 vrf_id;
@@ -143,12 +143,9 @@ do { \
* @returns template packet
*/
static inline u8 *
-nat_template_rewrite (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
- u16 collector_port,
- nat_event_t event, quota_exceed_event_t quota_event)
+nat_template_rewrite (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port, nat_event_t event,
+ quota_exceed_event_t quota_event)
{
nat_ipfix_logging_main_t *silm = &nat_ipfix_logging_main;
ip4_header_t *ip;
@@ -164,7 +161,7 @@ nat_template_rewrite (flow_report_main_t * frm,
flow_report_stream_t *stream;
u32 stream_index;
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
stream_index = clib_atomic_fetch_or(&silm->stream_index, 0);
clib_atomic_cmp_and_swap (&silm->stream_index,
@@ -241,8 +238,8 @@ nat_template_rewrite (flow_report_main_t * frm,
ip->ip_version_and_header_length = 0x45;
ip->ttl = 254;
ip->protocol = IP_PROTOCOL_UDP;
- ip->src_address.as_u32 = src_address->as_u32;
- ip->dst_address.as_u32 = collector_address->as_u32;
+ ip->src_address.as_u32 = exp->src_address.ip.ip4.as_u32;
+ ip->dst_address.as_u32 = exp->ipfix_collector.ip.ip4.as_u32;
udp->src_port = clib_host_to_net_u16 (stream->src_port);
udp->dst_port = clib_host_to_net_u16 (collector_port);
udp->length = clib_host_to_net_u16 (vec_len (rewrite) - sizeof (*ip));
@@ -390,97 +387,72 @@ nat_template_rewrite (flow_report_main_t * frm,
}
u8 *
-nat_template_rewrite_addr_exhausted (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
- u16 collector_port,
- ipfix_report_element_t *elts,
- u32 n_elts, u32 *stream_index)
+nat_template_rewrite_addr_exhausted (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port,
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return nat_template_rewrite (frm, fr, collector_address, src_address,
- collector_port, NAT_ADDRESSES_EXHAUTED, 0);
+ return nat_template_rewrite (exp, fr, collector_port, NAT_ADDRESSES_EXHAUTED,
+ 0);
}
u8 *
-nat_template_rewrite_nat44_session (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
- u16 collector_port,
- ipfix_report_element_t *elts,
- u32 n_elts, u32 *stream_index)
+nat_template_rewrite_nat44_session (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port,
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return nat_template_rewrite (frm, fr, collector_address, src_address,
- collector_port, NAT44_SESSION_CREATE, 0);
+ return nat_template_rewrite (exp, fr, collector_port, NAT44_SESSION_CREATE,
+ 0);
}
u8 *
-nat_template_rewrite_max_entries_per_usr (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
- u16 collector_port,
- ipfix_report_element_t *elts,
- u32 n_elts, u32 *stream_index)
+nat_template_rewrite_max_entries_per_usr (
+ ipfix_exporter_t *exp, flow_report_t *fr, ip4_address_t *collector_address,
+ ip4_address_t *src_address, u16 collector_port, ipfix_report_element_t *elts,
+ u32 n_elts, u32 *stream_index)
{
- return nat_template_rewrite (frm, fr, collector_address, src_address,
- collector_port, QUOTA_EXCEEDED,
- MAX_ENTRIES_PER_USER);
+ return nat_template_rewrite (exp, fr, collector_port, QUOTA_EXCEEDED,
+ MAX_ENTRIES_PER_USER);
}
u8 *
-nat_template_rewrite_max_sessions (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+nat_template_rewrite_max_sessions (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t *elts,
- u32 n_elts, u32 *stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return nat_template_rewrite (frm, fr, collector_address, src_address,
- collector_port, QUOTA_EXCEEDED,
- MAX_SESSION_ENTRIES);
+ return nat_template_rewrite (exp, fr, collector_port, QUOTA_EXCEEDED,
+ MAX_SESSION_ENTRIES);
}
u8 *
-nat_template_rewrite_max_bibs (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+nat_template_rewrite_max_bibs (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t *elts,
- u32 n_elts, u32 *stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return nat_template_rewrite (frm, fr, collector_address, src_address,
- collector_port, QUOTA_EXCEEDED,
- MAX_BIB_ENTRIES);
+ return nat_template_rewrite (exp, fr, collector_port, QUOTA_EXCEEDED,
+ MAX_BIB_ENTRIES);
}
u8 *
-nat_template_rewrite_nat64_bib (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
- u16 collector_port,
- ipfix_report_element_t *elts,
- u32 n_elts, u32 *stream_index)
+nat_template_rewrite_nat64_bib (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port,
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return nat_template_rewrite (frm, fr, collector_address, src_address,
- collector_port, NAT64_BIB_CREATE, 0);
+ return nat_template_rewrite (exp, fr, collector_port, NAT64_BIB_CREATE, 0);
}
u8 *
-nat_template_rewrite_nat64_session (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
- u16 collector_port,
- ipfix_report_element_t *elts,
- u32 n_elts, u32 *stream_index)
+nat_template_rewrite_nat64_session (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port,
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return nat_template_rewrite (frm, fr, collector_address, src_address,
- collector_port, NAT64_SESSION_CREATE, 0);
+ return nat_template_rewrite (exp, fr, collector_port, NAT64_SESSION_CREATE,
+ 0);
}
static inline void
@@ -497,16 +469,17 @@ nat_ipfix_header_create (flow_report_main_t * frm,
ip4_header_t *ip;
udp_header_t *udp;
vlib_main_t *vm = vlib_get_main ();
-
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
+
stream_index = clib_atomic_fetch_or(&silm->stream_index, 0);
- stream = &frm->streams[stream_index];
+ stream = &exp->streams[stream_index];
b0->current_data = 0;
b0->current_length = sizeof (*ip) + sizeof (*udp) + sizeof (*h) +
sizeof (*s);
b0->flags |= (VLIB_BUFFER_TOTAL_LENGTH_VALID | VNET_BUFFER_F_FLOW_REPORT);
vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = exp->fib_index;
tp = vlib_buffer_get_current (b0);
ip = (ip4_header_t *) & tp->ip4;
udp = (udp_header_t *) (ip + 1);
@@ -517,10 +490,10 @@ nat_ipfix_header_create (flow_report_main_t * frm,
ip->ttl = 254;
ip->protocol = IP_PROTOCOL_UDP;
ip->flags_and_fragment_offset = 0;
- ip->src_address.as_u32 = frm->src_address.as_u32;
- ip->dst_address.as_u32 = frm->ipfix_collector.as_u32;
+ ip->src_address.as_u32 = exp->src_address.ip.ip4.as_u32;
+ ip->dst_address.as_u32 = exp->ipfix_collector.ip.ip4.as_u32;
udp->src_port = clib_host_to_net_u16 (stream->src_port);
- udp->dst_port = clib_host_to_net_u16 (frm->collector_port);
+ udp->dst_port = clib_host_to_net_u16 (exp->collector_port);
udp->checksum = 0;
h->export_time = clib_host_to_net_u32 ((u32)
@@ -545,6 +518,7 @@ nat_ipfix_send (flow_report_main_t *frm, vlib_frame_t *f, vlib_buffer_t *b0,
ip4_header_t *ip;
udp_header_t *udp;
vlib_main_t *vm = vlib_get_main ();
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
tp = vlib_buffer_get_current (b0);
ip = (ip4_header_t *) & tp->ip4;
@@ -563,7 +537,7 @@ nat_ipfix_send (flow_report_main_t *frm, vlib_frame_t *f, vlib_buffer_t *b0,
ip->checksum = ip4_header_checksum (ip);
udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
- if (frm->udp_checksum)
+ if (exp->udp_checksum)
{
udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
if (udp->checksum == 0)
@@ -577,9 +551,8 @@ nat_ipfix_send (flow_report_main_t *frm, vlib_frame_t *f, vlib_buffer_t *b0,
static void
nat_ipfix_logging_nat44_ses (u32 thread_index, u8 nat_event, u32 src_ip,
- u32 nat_src_ip, nat_protocol_t nat_proto,
- u16 src_port, u16 nat_src_port, u32 fib_index,
- int do_flush)
+ u32 nat_src_ip, ip_protocol_t proto, u16 src_port,
+ u16 nat_src_port, u32 fib_index, int do_flush)
{
nat_ipfix_logging_main_t *silm = &nat_ipfix_logging_main;
nat_ipfix_per_thread_data_t *sitd = &silm->per_thread_data[thread_index];
@@ -590,11 +563,9 @@ nat_ipfix_logging_nat44_ses (u32 thread_index, u8 nat_event, u32 src_ip,
u32 offset;
vlib_main_t *vm = vlib_get_main ();
u64 now;
- u8 proto;
u16 template_id;
u32 vrf_id;
-
- proto = nat_proto_to_ip_proto (nat_proto);
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
now += silm->milisecond_time_0;
@@ -667,8 +638,8 @@ nat_ipfix_logging_nat44_ses (u32 thread_index, u8 nat_event, u32 src_ip,
b0->current_length += NAT44_SESSION_CREATE_LEN;
}
- if (PREDICT_FALSE
- (do_flush || (offset + NAT44_SESSION_CREATE_LEN) > frm->path_mtu))
+ if (PREDICT_FALSE (do_flush ||
+ (offset + NAT44_SESSION_CREATE_LEN) > exp->path_mtu))
{
template_id = clib_atomic_fetch_or (
&silm->nat44_session_template_id,
@@ -695,6 +666,7 @@ nat_ipfix_logging_addr_exhausted (u32 thread_index, u32 pool_id, int do_flush)
u64 now;
u8 nat_event = NAT_ADDRESSES_EXHAUTED;
u16 template_id;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
now += silm->milisecond_time_0;
@@ -750,8 +722,8 @@ nat_ipfix_logging_addr_exhausted (u32 thread_index, u32 pool_id, int do_flush)
b0->current_length += NAT_ADDRESSES_EXHAUTED_LEN;
}
- if (PREDICT_FALSE
- (do_flush || (offset + NAT_ADDRESSES_EXHAUTED_LEN) > frm->path_mtu))
+ if (PREDICT_FALSE (do_flush ||
+ (offset + NAT_ADDRESSES_EXHAUTED_LEN) > exp->path_mtu))
{
template_id = clib_atomic_fetch_or (
&silm->addr_exhausted_template_id,
@@ -780,6 +752,7 @@ nat_ipfix_logging_max_entries_per_usr (u32 thread_index,
u8 nat_event = QUOTA_EXCEEDED;
u32 quota_event = clib_host_to_net_u32 (MAX_ENTRIES_PER_USER);
u16 template_id;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
now += silm->milisecond_time_0;
@@ -842,8 +815,8 @@ nat_ipfix_logging_max_entries_per_usr (u32 thread_index,
b0->current_length += MAX_ENTRIES_PER_USER_LEN;
}
- if (PREDICT_FALSE
- (do_flush || (offset + MAX_ENTRIES_PER_USER_LEN) > frm->path_mtu))
+ if (PREDICT_FALSE (do_flush ||
+ (offset + MAX_ENTRIES_PER_USER_LEN) > exp->path_mtu))
{
template_id = clib_atomic_fetch_or (
&silm->max_entries_per_user_template_id,
@@ -871,6 +844,7 @@ nat_ipfix_logging_max_ses (u32 thread_index, u32 limit, int do_flush)
u8 nat_event = QUOTA_EXCEEDED;
u32 quota_event = clib_host_to_net_u32 (MAX_SESSION_ENTRIES);
u16 template_id;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
now += silm->milisecond_time_0;
@@ -930,8 +904,7 @@ nat_ipfix_logging_max_ses (u32 thread_index, u32 limit, int do_flush)
b0->current_length += MAX_SESSIONS_LEN;
}
- if (PREDICT_FALSE
- (do_flush || (offset + MAX_SESSIONS_LEN) > frm->path_mtu))
+ if (PREDICT_FALSE (do_flush || (offset + MAX_SESSIONS_LEN) > exp->path_mtu))
{
template_id = clib_atomic_fetch_or (
&silm->max_sessions_template_id,
@@ -959,6 +932,7 @@ nat_ipfix_logging_max_bib (u32 thread_index, u32 limit, int do_flush)
u8 nat_event = QUOTA_EXCEEDED;
u32 quota_event = clib_host_to_net_u32 (MAX_BIB_ENTRIES);
u16 template_id;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
now += silm->milisecond_time_0;
@@ -1018,8 +992,7 @@ nat_ipfix_logging_max_bib (u32 thread_index, u32 limit, int do_flush)
b0->current_length += MAX_BIBS_LEN;
}
- if (PREDICT_FALSE
- (do_flush || (offset + MAX_BIBS_LEN) > frm->path_mtu))
+ if (PREDICT_FALSE (do_flush || (offset + MAX_BIBS_LEN) > exp->path_mtu))
{
template_id = clib_atomic_fetch_or (
&silm->max_bibs_template_id,
@@ -1048,6 +1021,7 @@ nat_ipfix_logging_nat64_bibe (u32 thread_index, u8 nat_event,
vlib_main_t *vm = vlib_get_main ();
u64 now;
u16 template_id;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
now += silm->milisecond_time_0;
@@ -1119,8 +1093,7 @@ nat_ipfix_logging_nat64_bibe (u32 thread_index, u8 nat_event,
b0->current_length += NAT64_BIB_LEN;
}
- if (PREDICT_FALSE
- (do_flush || (offset + NAT64_BIB_LEN) > frm->path_mtu))
+ if (PREDICT_FALSE (do_flush || (offset + NAT64_BIB_LEN) > exp->path_mtu))
{
template_id = clib_atomic_fetch_or (
&silm->nat64_bib_template_id,
@@ -1151,6 +1124,7 @@ nat_ipfix_logging_nat64_ses (u32 thread_index, u8 nat_event,
vlib_main_t *vm = vlib_get_main ();
u64 now;
u16 template_id;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
now += silm->milisecond_time_0;
@@ -1234,8 +1208,7 @@ nat_ipfix_logging_nat64_ses (u32 thread_index, u8 nat_event,
b0->current_length += NAT64_SES_LEN;
}
- if (PREDICT_FALSE
- (do_flush || (offset + NAT64_SES_LEN) > frm->path_mtu))
+ if (PREDICT_FALSE (do_flush || (offset + NAT64_SES_LEN) > exp->path_mtu))
{
template_id = clib_atomic_fetch_or (
&silm->nat64_ses_template_id,
@@ -1307,54 +1280,34 @@ nat_ipfix_flush_from_main (void)
/**
* @brief Generate NAT44 session create event
- *
- * @param thread_index thread index
- * @param src_ip source IPv4 address
- * @param nat_src_ip transaltes source IPv4 address
- * @param nat_proto NAT transport protocol
- * @param src_port source port
- * @param nat_src_port translated source port
- * @param vrf_id VRF ID
*/
void
-nat_ipfix_logging_nat44_ses_create (u32 thread_index,
- u32 src_ip,
- u32 nat_src_ip,
- nat_protocol_t nat_proto,
- u16 src_port,
- u16 nat_src_port, u32 fib_index)
+nat_ipfix_logging_nat44_ses_create (u32 thread_index, u32 src_ip,
+ u32 nat_src_ip, ip_protocol_t proto,
+ u16 src_port, u16 nat_src_port,
+ u32 fib_index)
{
skip_if_disabled ();
nat_ipfix_logging_nat44_ses (thread_index, NAT44_SESSION_CREATE, src_ip,
- nat_src_ip, nat_proto, src_port, nat_src_port,
- fib_index, 0);
+ nat_src_ip, proto, src_port, nat_src_port,
+ fib_index, 0);
}
/**
* @brief Generate NAT44 session delete event
- *
- * @param thread_index thread index
- * @param src_ip source IPv4 address
- * @param nat_src_ip transaltes source IPv4 address
- * @param nat_proto NAT transport protocol
- * @param src_port source port
- * @param nat_src_port translated source port
- * @param vrf_id VRF ID
*/
void
-nat_ipfix_logging_nat44_ses_delete (u32 thread_index,
- u32 src_ip,
- u32 nat_src_ip,
- nat_protocol_t nat_proto,
- u16 src_port,
- u16 nat_src_port, u32 fib_index)
+nat_ipfix_logging_nat44_ses_delete (u32 thread_index, u32 src_ip,
+ u32 nat_src_ip, ip_protocol_t proto,
+ u16 src_port, u16 nat_src_port,
+ u32 fib_index)
{
skip_if_disabled ();
nat_ipfix_logging_nat44_ses (thread_index, NAT44_SESSION_DELETE, src_ip,
- nat_src_ip, nat_proto, src_port, nat_src_port,
- fib_index, 0);
+ nat_src_ip, proto, src_port, nat_src_port,
+ fib_index, 0);
}
/**
@@ -1366,9 +1319,23 @@ nat_ipfix_logging_nat44_ses_delete (u32 thread_index,
void
nat_ipfix_logging_addresses_exhausted (u32 thread_index, u32 pool_id)
{
- //TODO: This event SHOULD be rate limited
+ nat_ipfix_logging_main_t *silm = &nat_ipfix_logging_main;
+ static f64 *last_sent = 0;
+
skip_if_disabled ();
+ /* TODO: make rate configurable, use 1pps so far */
+ clib_spinlock_lock_if_init (&silm->addr_exhausted_lock);
+ f64 now = vlib_time_now (vlib_get_main ());
+ vec_validate (last_sent, pool_id);
+ if (now < last_sent[pool_id] + 1.0)
+ {
+ clib_spinlock_unlock_if_init (&silm->addr_exhausted_lock);
+ return;
+ }
+ last_sent[pool_id] = now;
+ clib_spinlock_unlock_if_init (&silm->addr_exhausted_lock);
+
nat_ipfix_logging_addr_exhausted (thread_index, pool_id, 0);
}
@@ -1409,9 +1376,22 @@ deterministic_nat_data_callback
void
nat_ipfix_logging_max_sessions (u32 thread_index, u32 limit)
{
- //TODO: This event SHOULD be rate limited
+ nat_ipfix_logging_main_t *silm = &nat_ipfix_logging_main;
+ static f64 last_sent = 0;
+
skip_if_disabled ();
+ /* TODO: make rate configurable, use 1pps so far */
+ clib_spinlock_lock_if_init (&silm->max_sessions_lock);
+ f64 now = vlib_time_now (vlib_get_main ());
+ if (now < last_sent + 1.0)
+ {
+ clib_spinlock_unlock_if_init (&silm->max_sessions_lock);
+ return;
+ }
+ last_sent = now;
+ clib_spinlock_unlock_if_init (&silm->max_sessions_lock);
+
nat_ipfix_logging_max_ses (thread_index, limit, 0);
}
@@ -1424,9 +1404,22 @@ nat_ipfix_logging_max_sessions (u32 thread_index, u32 limit)
void
nat_ipfix_logging_max_bibs (u32 thread_index, u32 limit)
{
- //TODO: This event SHOULD be rate limited
+ nat_ipfix_logging_main_t *silm = &nat_ipfix_logging_main;
+ static f64 last_sent = 0;
+
skip_if_disabled ();
+ /* TODO: make rate configurable, use 1pps so far */
+ clib_spinlock_lock_if_init (&silm->max_bibs_lock);
+ f64 now = vlib_time_now (vlib_get_main ());
+ if (now < last_sent + 1.0)
+ {
+ clib_spinlock_unlock_if_init (&silm->max_bibs_lock);
+ return;
+ }
+ last_sent = now;
+ clib_spinlock_unlock_if_init (&silm->max_bibs_lock);
+
nat_ipfix_logging_max_bib (thread_index, limit, 0);
}
@@ -1497,12 +1490,13 @@ nat_ipfix_logging_nat64_session (u32 thread_index,
}
vlib_frame_t *
-data_callback (flow_report_main_t * frm, flow_report_t * fr,
- vlib_frame_t * f, u32 * to_next, u32 node_index)
+data_callback (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f, u32 *to_next,
+ u32 node_index)
{
nat_ipfix_logging_main_t *silm = &nat_ipfix_logging_main;
- if (PREDICT_FALSE (++silm->call_counter >= vec_len (frm->reports)))
+ if (PREDICT_FALSE (++silm->call_counter >= vec_len (exp->reports)))
{
nat_ipfix_flush_from_main();
silm->call_counter = 0;
@@ -1524,7 +1518,7 @@ int
nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
{
nat_ipfix_logging_main_t *silm = &nat_ipfix_logging_main;
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &flow_report_main.exporters[0];
vnet_flow_report_add_del_args_t a;
int rv;
u8 e = enable ? 1 : 0;
@@ -1539,7 +1533,7 @@ nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
a.flow_data_callback = data_callback;
a.rewrite_callback = nat_template_rewrite_nat44_session;
- rv = vnet_flow_report_add_del (frm, &a, NULL);
+ rv = vnet_flow_report_add_del (exp, &a, NULL);
if (rv)
{
//nat_elog_warn_X1 ("vnet_flow_report_add_del returned %d", "i4", rv);
@@ -1547,7 +1541,7 @@ nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
}
a.rewrite_callback = nat_template_rewrite_addr_exhausted;
- rv = vnet_flow_report_add_del (frm, &a, NULL);
+ rv = vnet_flow_report_add_del (exp, &a, NULL);
if (rv)
{
//nat_elog_warn_X1 ("vnet_flow_report_add_del returned %d", "i4", rv);
@@ -1555,7 +1549,7 @@ nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
}
a.rewrite_callback = nat_template_rewrite_max_sessions;
- rv = vnet_flow_report_add_del (frm, &a, NULL);
+ rv = vnet_flow_report_add_del (exp, &a, NULL);
if (rv)
{
//nat_elog_warn_X1 ("vnet_flow_report_add_del returned %d", "i4", rv);
@@ -1563,7 +1557,7 @@ nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
}
a.rewrite_callback = nat_template_rewrite_max_bibs;
- rv = vnet_flow_report_add_del (frm, &a, NULL);
+ rv = vnet_flow_report_add_del (exp, &a, NULL);
if (rv)
{
//nat_elog_warn_X1 ("vnet_flow_report_add_del returned %d", "i4", rv);
@@ -1571,7 +1565,7 @@ nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
}
a.rewrite_callback = nat_template_rewrite_nat64_bib;
- rv = vnet_flow_report_add_del (frm, &a, NULL);
+ rv = vnet_flow_report_add_del (exp, &a, NULL);
if (rv)
{
//nat_elog_warn_X1 ("vnet_flow_report_add_del returned %d", "i4", rv);
@@ -1579,7 +1573,7 @@ nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
}
a.rewrite_callback = nat_template_rewrite_nat64_session;
- rv = vnet_flow_report_add_del (frm, &a, NULL);
+ rv = vnet_flow_report_add_del (exp, &a, NULL);
if (rv)
{
//nat_elog_warn_X1 ("vnet_flow_report_add_del returned %d", "i4", rv);
@@ -1589,7 +1583,7 @@ nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
// if endpoint dependent per user max entries is also required
/*
a.rewrite_callback = nat_template_rewrite_max_entries_per_usr;
- rv = vnet_flow_report_add_del (frm, &a, NULL);
+ rv = vnet_flow_report_add_del (exp, &a, NULL);
if (rv)
{
//nat_elog_warn_X1 ("vnet_flow_report_add_del returned %d", "i4", rv);
@@ -1620,6 +1614,11 @@ nat_ipfix_logging_init (vlib_main_t * vm)
silm->milisecond_time_0 = unix_time_now_nsec () * 1e-6;
vec_validate (silm->per_thread_data, tm->n_vlib_mains - 1);
+
+ /* Set up rate-limit */
+ clib_spinlock_init (&silm->addr_exhausted_lock);
+ clib_spinlock_init (&silm->max_sessions_lock);
+ clib_spinlock_init (&silm->max_bibs_lock);
}
static uword
@@ -1631,11 +1630,9 @@ ipfix_flush_process (vlib_main_t *vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat_ipfix_flush_node) = {
.function = ipfix_flush_process,
.name = "nat-ipfix-flush",
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_INTERRUPT,
};
-/* *INDENT-ON* */
diff --git a/src/plugins/nat/lib/ipfix_logging.h b/src/plugins/nat/lib/ipfix_logging.h
index 0b2357a2604..dc7927a160c 100644
--- a/src/plugins/nat/lib/ipfix_logging.h
+++ b/src/plugins/nat/lib/ipfix_logging.h
@@ -108,6 +108,10 @@ typedef struct {
/** nat data callbacks call counter */
u16 call_counter;
+ /** rate-limit locks */
+ clib_spinlock_t addr_exhausted_lock;
+ clib_spinlock_t max_sessions_lock;
+ clib_spinlock_t max_bibs_lock;
} nat_ipfix_logging_main_t;
extern nat_ipfix_logging_main_t nat_ipfix_logging_main;
@@ -117,15 +121,13 @@ int nat_ipfix_logging_enabled ();
void nat_ipfix_logging_init (vlib_main_t * vm);
int nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port);
void nat_ipfix_logging_nat44_ses_create (u32 thread_index, u32 src_ip,
- u32 nat_src_ip,
- nat_protocol_t nat_proto,
- u16 src_port, u16 nat_src_port,
- u32 fib_index);
+ u32 nat_src_ip, ip_protocol_t proto,
+ u16 src_port, u16 nat_src_port,
+ u32 fib_index);
void nat_ipfix_logging_nat44_ses_delete (u32 thread_index, u32 src_ip,
- u32 nat_src_ip,
- nat_protocol_t nat_proto,
- u16 src_port, u16 nat_src_port,
- u32 fib_index);
+ u32 nat_src_ip, ip_protocol_t proto,
+ u16 src_port, u16 nat_src_port,
+ u32 fib_index);
void nat_ipfix_logging_addresses_exhausted(u32 thread_index, u32 pool_id);
void nat_ipfix_logging_max_entries_per_user(u32 thread_index,
u32 limit, u32 src_ip);
diff --git a/src/plugins/nat/lib/lib.c b/src/plugins/nat/lib/lib.c
index d2def2cc480..30bafac73c0 100644
--- a/src/plugins/nat/lib/lib.c
+++ b/src/plugins/nat/lib/lib.c
@@ -14,6 +14,7 @@
*/
#include <nat/lib/lib.h>
+#include <nat/lib/nat_proto.h>
uword
unformat_nat_protocol (unformat_input_t *input, va_list *args)
diff --git a/src/plugins/nat/lib/lib.h b/src/plugins/nat/lib/lib.h
index b0b5229b337..dc2c43beaaf 100644
--- a/src/plugins/nat/lib/lib.h
+++ b/src/plugins/nat/lib/lib.h
@@ -21,6 +21,17 @@
#include <vlibapi/api.h>
+typedef struct
+{
+ u16 identifier;
+ u16 sequence;
+} nat_icmp_echo_header_t;
+
+typedef struct
+{
+ u16 src_port, dst_port;
+} nat_tcp_udp_header_t;
+
/* NAT API Configuration flags */
#define foreach_nat_config_flag \
_(0x01, IS_TWICE_NAT) \
@@ -54,19 +65,6 @@ typedef enum
#undef _
} nat_error_t;
-#define foreach_nat_protocol \
- _ (OTHER, 0, other, "other") \
- _ (UDP, 1, udp, "udp") \
- _ (TCP, 2, tcp, "tcp") \
- _ (ICMP, 3, icmp, "icmp")
-
-typedef enum
-{
-#define _(N, i, n, s) NAT_PROTOCOL_##N = i,
- foreach_nat_protocol
-#undef _
-} nat_protocol_t;
-
/* default protocol timeouts */
#define NAT_UDP_TIMEOUT 300
#define NAT_TCP_TRANSITORY_TIMEOUT 240
@@ -96,29 +94,6 @@ nat_reset_timeouts (nat_timeouts_t * timeouts)
}
static_always_inline u32
-nat_session_get_timeout (nat_timeouts_t *timeouts, nat_protocol_t proto,
- u8 state)
-{
- switch (proto)
- {
- case NAT_PROTOCOL_ICMP:
- return timeouts->icmp;
- case NAT_PROTOCOL_UDP:
- return timeouts->udp;
- case NAT_PROTOCOL_TCP:
- {
- if (state)
- return timeouts->tcp.transitory;
- else
- return timeouts->tcp.established;
- }
- default:
- return timeouts->udp;
- }
- return 0;
-}
-
-static_always_inline u32
nat_calc_bihash_buckets (u32 n_elts)
{
n_elts = n_elts / 2.5;
@@ -138,10 +113,6 @@ nat_calc_bihash_buckets (u32 n_elts)
return lower_pow2;
}
-u8 *format_nat_protocol (u8 *s, va_list *args);
-
-uword unformat_nat_protocol (unformat_input_t *input, va_list *args);
-
#endif /* included_nat_lib_h__ */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/lib/log.h b/src/plugins/nat/lib/log.h
index 26bd93f2589..a82028ed8bf 100644
--- a/src/plugins/nat/lib/log.h
+++ b/src/plugins/nat/lib/log.h
@@ -21,20 +21,7 @@
#include <vppinfra/elog.h>
-#define foreach_nat_log_level \
- _ (0x00, LOG_NONE) \
- _ (0x01, LOG_ERROR) \
- _ (0x02, LOG_WARNING) \
- _ (0x03, LOG_NOTICE) \
- _ (0x04, LOG_INFO) \
- _ (0x05, LOG_DEBUG)
-
-typedef enum nat_log_level_t_
-{
-#define _(n, f) NAT_##f = n,
- foreach_nat_log_level
-#undef _
-} nat_log_level_t;
+#include <nat/lib/nat_types.api_types.h>
#define nat_elog(_pm, _level, _str) \
do \
diff --git a/src/plugins/nat/lib/nat_proto.h b/src/plugins/nat/lib/nat_proto.h
new file mode 100644
index 00000000000..4b57b994e22
--- /dev/null
+++ b/src/plugins/nat/lib/nat_proto.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_nat_proto_h__
+#define included_nat_proto_h__
+
+#include <vnet/ip/ip.h>
+
+#define foreach_nat_protocol \
+ _ (OTHER, 0, other, "other") \
+ _ (UDP, 1, udp, "udp") \
+ _ (TCP, 2, tcp, "tcp") \
+ _ (ICMP, 3, icmp, "icmp")
+
+typedef enum
+{
+#define _(N, i, n, s) NAT_PROTOCOL_##N = i,
+ foreach_nat_protocol
+#undef _
+ NAT_N_PROTOCOLS
+} nat_protocol_t;
+
+always_inline nat_protocol_t
+ip_proto_to_nat_proto (ip_protocol_t ip_proto)
+{
+ static const nat_protocol_t lookup_table[256] = {
+ [IP_PROTOCOL_TCP] = NAT_PROTOCOL_TCP,
+ [IP_PROTOCOL_UDP] = NAT_PROTOCOL_UDP,
+ [IP_PROTOCOL_ICMP] = NAT_PROTOCOL_ICMP,
+ [IP_PROTOCOL_ICMP6] = NAT_PROTOCOL_ICMP,
+ };
+
+ return lookup_table[ip_proto];
+}
+
+static_always_inline ip_protocol_t
+nat_proto_to_ip_proto (nat_protocol_t nat_proto)
+{
+ ASSERT (nat_proto <= NAT_PROTOCOL_ICMP);
+
+ static const u8 lookup_table[256] = {
+ [NAT_PROTOCOL_OTHER] = ~0,
+ [NAT_PROTOCOL_TCP] = IP_PROTOCOL_TCP,
+ [NAT_PROTOCOL_UDP] = IP_PROTOCOL_UDP,
+ [NAT_PROTOCOL_ICMP] = IP_PROTOCOL_ICMP,
+ };
+
+ ASSERT (NAT_PROTOCOL_OTHER == nat_proto || NAT_PROTOCOL_TCP == nat_proto ||
+ NAT_PROTOCOL_UDP == nat_proto || NAT_PROTOCOL_ICMP == nat_proto);
+
+ return lookup_table[nat_proto];
+}
+
+u8 *format_nat_protocol (u8 *s, va_list *args);
+
+uword unformat_nat_protocol (unformat_input_t *input, va_list *args);
+
+#endif /* included_nat_proto_h__ */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/lib/nat_syslog.c b/src/plugins/nat/lib/nat_syslog.c
index 2c395bf7fd8..98777ebf280 100644
--- a/src/plugins/nat/lib/nat_syslog.c
+++ b/src/plugins/nat/lib/nat_syslog.c
@@ -23,38 +23,7 @@
#include <nat/lib/nat_syslog.h>
#include <nat/lib/inlines.h>
-#define NAT_FACILITY SYSLOG_FACILITY_LOCAL0
-
-#define NAT_APPNAME "NAT"
-
-#define SADD_SDEL_SEVERITY SYSLOG_SEVERITY_INFORMATIONAL
-#define APMADD_APMDEL_SEVERITY SYSLOG_SEVERITY_INFORMATIONAL
-
-#define SADD_MSGID "SADD"
-#define SDEL_MSGID "SDEL"
-#define APMADD_MSGID "APMADD"
-#define APMDEL_MSGID "APMDEL"
-
-#define NSESS_SDID "nsess"
-#define NAPMAP_SDID "napmap"
-
-#define SSUBIX_SDPARAM_NAME "SSUBIX"
-#define SVLAN_SDPARAM_NAME "SVLAN"
-#define IATYP_SDPARAM_NAME "IATYP"
-#define ISADDR_SDPARAM_NAME "ISADDR"
-#define ISPORT_SDPARAM_NAME "ISPORT"
-#define IDADDR_SDPARAM_NAME "IDADDR"
-#define IDPORT_SDPARAM_NAME "IDPORT"
-#define XATYP_SDPARAM_NAME "XATYP"
-#define XSADDR_SDPARAM_NAME "XSADDR"
-#define XSPORT_SDPARAM_NAME "XSPORT"
-#define XDADDR_SDPARAM_NAME "XDADDR"
-#define XDPORT_SDPARAM_NAME "XDPORT"
-#define PROTO_SDPARAM_NAME "PROTO"
-#define SV6ENC_SDPARAM_NAME "SV6ENC"
-
-#define IATYP_IPV4 "IPv4"
-#define IATYP_IPV6 "IPv6"
+#include <nat/lib/nat_syslog_constants.h>
static inline void
nat_syslog_nat44_apmap (u32 ssubix, u32 sfibix, ip4_address_t * isaddr,
@@ -142,82 +111,6 @@ nat_syslog_dslite_apmdel (u32 ssubix, ip6_address_t * sv6enc,
}
static inline void
-nat_syslog_nat44_sess (u32 ssubix, u32 sfibix, ip4_address_t * isaddr,
- u16 isport, ip4_address_t * xsaddr, u16 xsport,
- ip4_address_t * idaddr, u16 idport,
- ip4_address_t * xdaddr, u16 xdport,
- nat_protocol_t proto, u8 is_add, u8 is_twicenat)
-{
- syslog_msg_t syslog_msg;
- fib_table_t *fib;
-
- if (!syslog_is_enabled ())
- return;
-
- if (syslog_severity_filter_block (SADD_SDEL_SEVERITY))
- return;
-
- fib = fib_table_get (sfibix, FIB_PROTOCOL_IP4);
-
- syslog_msg_init (&syslog_msg, NAT_FACILITY, SADD_SDEL_SEVERITY, NAT_APPNAME,
- is_add ? SADD_MSGID : SDEL_MSGID);
-
- syslog_msg_sd_init (&syslog_msg, NSESS_SDID);
- syslog_msg_add_sd_param (&syslog_msg, SSUBIX_SDPARAM_NAME, "%d", ssubix);
- syslog_msg_add_sd_param (&syslog_msg, SVLAN_SDPARAM_NAME, "%d",
- fib->ft_table_id);
- syslog_msg_add_sd_param (&syslog_msg, IATYP_SDPARAM_NAME, IATYP_IPV4);
- syslog_msg_add_sd_param (&syslog_msg, ISADDR_SDPARAM_NAME, "%U",
- format_ip4_address, isaddr);
- syslog_msg_add_sd_param (&syslog_msg, ISPORT_SDPARAM_NAME, "%d",
- clib_net_to_host_u16 (isport));
- syslog_msg_add_sd_param (&syslog_msg, XATYP_SDPARAM_NAME, IATYP_IPV4);
- syslog_msg_add_sd_param (&syslog_msg, XSADDR_SDPARAM_NAME, "%U",
- format_ip4_address, xsaddr);
- syslog_msg_add_sd_param (&syslog_msg, XSPORT_SDPARAM_NAME, "%d",
- clib_net_to_host_u16 (xsport));
- syslog_msg_add_sd_param (&syslog_msg, PROTO_SDPARAM_NAME, "%d",
- nat_proto_to_ip_proto (proto));
- syslog_msg_add_sd_param (&syslog_msg, XDADDR_SDPARAM_NAME, "%U",
- format_ip4_address, xdaddr);
- syslog_msg_add_sd_param (&syslog_msg, XDPORT_SDPARAM_NAME, "%d",
- clib_net_to_host_u16 (xdport));
- if (is_twicenat)
- {
- syslog_msg_add_sd_param (&syslog_msg, IDADDR_SDPARAM_NAME, "%U",
- format_ip4_address, idaddr);
- syslog_msg_add_sd_param (&syslog_msg, IDPORT_SDPARAM_NAME, "%d",
- clib_net_to_host_u16 (idport));
- }
-
- syslog_msg_send (&syslog_msg);
-}
-
-void
-nat_syslog_nat44_sadd (u32 ssubix, u32 sfibix, ip4_address_t * isaddr,
- u16 isport, ip4_address_t * idaddr, u16 idport,
- ip4_address_t * xsaddr, u16 xsport,
- ip4_address_t * xdaddr, u16 xdport,
- nat_protocol_t proto, u8 is_twicenat)
-{
- nat_syslog_nat44_sess (ssubix, sfibix, isaddr, isport, xsaddr, xsport,
- idaddr, idport, xdaddr, xdport, proto, 1,
- is_twicenat);
-}
-
-void
-nat_syslog_nat44_sdel (u32 ssubix, u32 sfibix, ip4_address_t * isaddr,
- u16 isport, ip4_address_t * idaddr, u16 idport,
- ip4_address_t * xsaddr, u16 xsport,
- ip4_address_t * xdaddr, u16 xdport,
- nat_protocol_t proto, u8 is_twicenat)
-{
- nat_syslog_nat44_sess (ssubix, sfibix, isaddr, isport, xsaddr, xsport,
- idaddr, idport, xdaddr, xdport, proto, 0,
- is_twicenat);
-}
-
-static inline void
nat_syslog_nat64_sess (u32 sfibix, ip6_address_t * isaddr, u16 isport,
ip4_address_t * xsaddr, u16 xsport,
ip4_address_t * xdaddr, u16 xdport,
diff --git a/src/plugins/nat/lib/nat_syslog.h b/src/plugins/nat/lib/nat_syslog.h
index 9721664cf54..f929bf310b4 100644
--- a/src/plugins/nat/lib/nat_syslog.h
+++ b/src/plugins/nat/lib/nat_syslog.h
@@ -20,6 +20,7 @@
#define __included_nat_syslog_h__
#include <nat/lib/lib.h>
+#include <nat/lib/nat_proto.h>
void nat_syslog_nat44_apmadd (u32 ssubix, u32 sfibix, ip4_address_t * isaddr,
u16 isport, ip4_address_t * xsaddr, u16 xsport,
@@ -41,18 +42,6 @@ nat_syslog_dslite_apmdel (u32 ssubix, ip6_address_t * sv6enc,
ip4_address_t * xsaddr, u16 xsport,
nat_protocol_t proto);
-void nat_syslog_nat44_sadd (u32 ssubix, u32 sfibix, ip4_address_t * isaddr,
- u16 isport, ip4_address_t * idaddr, u16 idport,
- ip4_address_t * xsaddr, u16 xsport,
- ip4_address_t * xdaddr, u16 xdport,
- nat_protocol_t proto, u8 is_twicenat);
-
-void nat_syslog_nat44_sdel (u32 ssubix, u32 sfibix, ip4_address_t * isaddr,
- u16 isport, ip4_address_t * idaddr, u16 idport,
- ip4_address_t * xsaddr, u16 xsport,
- ip4_address_t * xdaddr, u16 xdport,
- nat_protocol_t proto, u8 is_twicenat);
-
void nat_syslog_nat64_sadd (u32 sfibix, ip6_address_t * isaddr, u16 isport,
ip4_address_t * xsaddr, u16 xsport,
ip4_address_t * xdaddr, u16 xdport,
diff --git a/src/plugins/nat/lib/nat_syslog_constants.h b/src/plugins/nat/lib/nat_syslog_constants.h
new file mode 100644
index 00000000000..eeea7d2654e
--- /dev/null
+++ b/src/plugins/nat/lib/nat_syslog_constants.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT syslog logging constants
+ */
+#ifndef __included_nat_syslog_constants_h__
+#define __included_nat_syslog_constants_h__
+
+#define NAT_FACILITY SYSLOG_FACILITY_LOCAL0
+
+#define NAT_APPNAME "NAT"
+
+#define SADD_SDEL_SEVERITY SYSLOG_SEVERITY_INFORMATIONAL
+#define APMADD_APMDEL_SEVERITY SYSLOG_SEVERITY_INFORMATIONAL
+
+#define SADD_MSGID "SADD"
+#define SDEL_MSGID "SDEL"
+#define APMADD_MSGID "APMADD"
+#define APMDEL_MSGID "APMDEL"
+
+#define NSESS_SDID "nsess"
+#define NAPMAP_SDID "napmap"
+
+#define SSUBIX_SDPARAM_NAME "SSUBIX"
+#define SVLAN_SDPARAM_NAME "SVLAN"
+#define IATYP_SDPARAM_NAME "IATYP"
+#define ISADDR_SDPARAM_NAME "ISADDR"
+#define ISPORT_SDPARAM_NAME "ISPORT"
+#define IDADDR_SDPARAM_NAME "IDADDR"
+#define IDPORT_SDPARAM_NAME "IDPORT"
+#define XATYP_SDPARAM_NAME "XATYP"
+#define XSADDR_SDPARAM_NAME "XSADDR"
+#define XSPORT_SDPARAM_NAME "XSPORT"
+#define XDADDR_SDPARAM_NAME "XDADDR"
+#define XDPORT_SDPARAM_NAME "XDPORT"
+#define PROTO_SDPARAM_NAME "PROTO"
+#define SV6ENC_SDPARAM_NAME "SV6ENC"
+
+#define IATYP_IPV4 "IPv4"
+#define IATYP_IPV6 "IPv6"
+
+#endif /* __included_nat_syslog_constants_h__ */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat44-ed/nat44_ed.api b/src/plugins/nat/nat44-ed/nat44_ed.api
index c65b7a81166..322260f7f96 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed.api
+++ b/src/plugins/nat/nat44-ed/nat44_ed.api
@@ -13,13 +13,13 @@
* limitations under the License.
*/
-option version = "5.3.0";
+option version = "5.5.0";
import "vnet/ip/ip_types.api";
import "vnet/interface_types.api";
import "plugins/nat/lib/nat_types.api";
/**
- * @file nat44.api
+ * @file nat44_ed.api
* @brief VPP control-plane API messages.
*
* This file defines VPP control-plane API messages which are generally
@@ -35,41 +35,6 @@ enum nat44_config_flags : u8
NAT44_IS_OUT2IN_DPO = 0x08,
};
-/** \brief Enable/disable NAT44 plugin
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param inside_vrf - inside vrf id
- @param outside_vrf - outside vrf id
- @param users - maximum number of users per thread
- (NAT44_IS_ENDPOINT_INDEPENDENT)
- @param user_memory - overwrite hash allocation parameter
- (NAT44_IS_ENDPOINT_INDEPENDENT)
- @param sessions - maximum number of sessions per thread
- @param session_memory - overwrite hash allocation parameter
- @param user_sessions - maximum number of sessions per user
- (NAT44_IS_ENDPOINT_INDEPENDENT)
- @param enable - true if enable, false if disable
- @param flags - flag NAT44_IS_ENDPOINT_INDEPENDENT,
- NAT44_IS_ENDPOINT_DEPENDENT,
- NAT44_IS_STATIC_MAPPING_ONLY,
- NAT44_IS_CONNECTION_TRACKING,
- NAT44_IS_OUT2IN_DPO
-*/
-autoreply define nat44_plugin_enable_disable {
- option deprecated;
- u32 client_index;
- u32 context;
- u32 inside_vrf;
- u32 outside_vrf;
- u32 users;
- u32 user_memory;
- u32 sessions;
- u32 session_memory;
- u32 user_sessions;
- bool enable;
- vl_api_nat44_config_flags_t flags;
-};
-
/** \brief Enable/disable NAT44ED plugin
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -82,7 +47,6 @@ autoreply define nat44_plugin_enable_disable {
NAT44_IS_CONNECTION_TRACKING
*/
autoreply define nat44_ed_plugin_enable_disable {
- option in_progress;
u32 client_index;
u32 context;
u32 inside_vrf;
@@ -93,146 +57,65 @@ autoreply define nat44_ed_plugin_enable_disable {
vl_api_nat44_config_flags_t flags;
};
-/** \brief Control ping from client to api server request
+/** \brief Enable/disable forwarding for NAT44
+ Forward packets which don't match existing translation
+ or static mapping instead of dropping them.
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
+ @param enable - true for enable, false for disable
*/
-define nat_control_ping
-{
+autoreply define nat44_forwarding_enable_disable {
option deprecated;
u32 client_index;
u32 context;
+ bool enable;
};
-/** \brief Control ping from the client to the server response
+/** \brief Enable/disable NAT IPFIX logging
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param retval - return code for the request
- @param vpe_pid - the pid of the vpe, returned by the server
+ @param domain_id - observation domain ID
+ @param src_port - source port number
+ @param enable - true if enable, false if disable
*/
-define nat_control_ping_reply
-{
+autoreply define nat_ipfix_enable_disable {
option deprecated;
- u32 context;
- i32 retval;
u32 client_index;
- u32 vpe_pid;
+ u32 context;
+ u32 domain_id;
+ u16 src_port;
+ bool enable;
};
-/** \brief Show NAT plugin startup config
+/** \brief Set values of timeouts for NAT sessions (seconds)
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
+ @param udp - UDP timeout (default 300sec)
+ @param tcp_established - TCP established timeout (default 7440sec)
+ @param tcp_transitory - TCP transitory timeout (default 240sec)
+ @param icmp - ICMP timeout (default 60sec)
*/
-define nat_show_config
-{
+autoreply define nat_set_timeouts {
option deprecated;
u32 client_index;
u32 context;
+ u32 udp;
+ u32 tcp_established;
+ u32 tcp_transitory;
+ u32 icmp;
};
-/** \brief DEPRECATED: Show NAT plugin startup config reply
- @param context - sender context, to match reply w/ request
- @param retval - return code for the request
- @param static_mapping_only - if true dynamic translations disabled
- @param static_mapping_connection_tracking - if true create session data
- @param deterministic - if true deterministic mapping
- @param endpoint_dependent - if true endpoint-dependent mode
- @param out2in_dpo - if true out2in dpo mode
- @param dslite_ce - if true DS-Lite is CE/B4 element, if false AFTR elemet
- @param translation_buckets - number of translation hash buckets
- @param translation_memory_size - translation hash memory size
- @param user_buckets - number of user hash buckets
- @param user_memory_size - user hash memory size
- @param max_translations_per_user - maximum number of translations per user
- @param outside_vrf_id - outside VRF id
- @param inside_vrf_id - default inside VRF id
- @param nat64_bib_buckets - number of NAT64 BIB hash buckets
- @param nat64_bib_memory_size - memory size of NAT64 BIB hash
- @param nat64_st_buckets - number of NAT64 session table hash buckets
- @param nat64_st_memory_size - memory size of NAT64 session table hash
-*/
-define nat_show_config_reply
-{
- option deprecated;
- u32 context;
- i32 retval;
- bool static_mapping_only;
- bool static_mapping_connection_tracking;
- bool deterministic;
- bool endpoint_dependent;
- bool out2in_dpo;
- bool dslite_ce;
- u32 translation_buckets;
- u32 translation_memory_size;
- u32 user_buckets;
- u64 user_memory_size;
- u32 max_translations_per_user;
- u32 outside_vrf_id;
- u32 inside_vrf_id;
- u32 nat64_bib_buckets;
- u64 nat64_bib_memory_size;
- u32 nat64_st_buckets;
- u64 nat64_st_memory_size;
-};
-
-/** \brief Show NAT plugin startup config
+/** \brief NAT44 set session limit
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
+ @param session_limit - session limit
+ @param vrf_id - vrf id
*/
-define nat_show_config_2
-{
- option deprecated;
+autoreply define nat44_set_session_limit {
u32 client_index;
u32 context;
-};
-
-/** \brief Show NAT plugin startup config reply
- @param context - sender context, to match reply w/ request
- @param retval - return code for the request
- @param static_mapping_only - if true dynamic translations disabled
- @param static_mapping_connection_tracking - if true create session data
- @param deterministic - if true deterministic mapping
- @param endpoint_dependent - if true endpoint-dependent mode
- @param out2in_dpo - if true out2in dpo mode
- @param dslite_ce - if true DS-Lite is CE/B4 element, if false AFTR elemet
- @param translation_buckets - number of translation hash buckets
- @param translation_memory_size - translation hash memory size
- @param user_buckets - number of user hash buckets
- @param user_memory_size - user hash memory size
- @param max_translations_per_user - maximum number of translations per user
- @param outside_vrf_id - outside VRF id
- @param inside_vrf_id - default inside VRF id
- @param nat64_bib_buckets - number of NAT64 BIB hash buckets
- @param nat64_bib_memory_size - memory size of NAT64 BIB hash
- @param nat64_st_buckets - number of NAT64 session table hash buckets
- @param nat64_st_memory_size - memory size of NAT64 session table hash
- @param max_translations_per_thread - max translations per worker thread
- @param max_users_per_thread - max users per worker thread
-*/
-define nat_show_config_2_reply
-{
- option deprecated;
- u32 context;
- i32 retval;
- bool static_mapping_only;
- bool static_mapping_connection_tracking;
- bool deterministic;
- bool endpoint_dependent;
- bool out2in_dpo;
- bool dslite_ce;
- u32 translation_buckets;
- u64 translation_memory_size;
- u32 user_buckets;
- u64 user_memory_size;
- u32 max_translations_per_user;
- u32 outside_vrf_id;
- u32 inside_vrf_id;
- u32 nat64_bib_buckets;
- u64 nat64_bib_memory_size;
- u32 nat64_st_buckets;
- u64 nat64_st_memory_size;
- u32 max_translations_per_thread;
- u32 max_users_per_thread;
+ u32 session_limit;
+ u32 vrf_id;
};
/** \brief Show NAT44 plugin running config
@@ -241,7 +124,6 @@ define nat_show_config_2_reply
*/
define nat44_show_running_config
{
- option in_progress;
u32 client_index;
u32 context;
};
@@ -267,7 +149,6 @@ define nat44_show_running_config
*/
define nat44_show_running_config_reply
{
- option in_progress;
u32 context;
i32 retval;
u32 inside_vrf;
@@ -284,41 +165,6 @@ define nat44_show_running_config_reply
vl_api_nat44_config_flags_t flags;
};
-/** \brief Run nat44 garbage collection
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
-*/
-autoreply define nat44_session_cleanup {
- option deprecated;
- u32 client_index;
- u32 context;
-};
-
-/** \brief NAT44 set session limit
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param session_limit - session limit
- @param vrf_id - vrf id
-*/
-autoreply define nat44_set_session_limit {
- u32 client_index;
- u32 context;
- u32 session_limit;
- u32 vrf_id;
-};
-
-/** \brief Set NAT logging level
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param log_level - logging level
-*/
-autoreply define nat_set_log_level {
- option deprecated;
- u32 client_index;
- u32 context;
- vl_api_nat_log_level_t log_level;
-};
-
/** \brief Set NAT workers
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -352,121 +198,82 @@ define nat_worker_details {
string name[64];
};
-/** \brief Enable/disable NAT IPFIX logging
+/** \brief Add/delete inter VRF NAT44-ED routing table
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param domain_id - observation domain ID
- @param src_port - source port number
- @param enable - true if enable, false if disable
+ @param table_vrf_id - id of (rx) VRF used for resolving
+ destination (tx) VRF during dynamic
+ session creation
+ @param is_add - if true add else del
*/
-autoreply define nat_ipfix_enable_disable {
- option deprecated;
+autoreply define nat44_ed_add_del_vrf_table {
u32 client_index;
u32 context;
- u32 domain_id;
- u16 src_port;
- bool enable;
+ u32 table_vrf_id;
+ bool is_add;
};
-/** \brief Set values of timeouts for NAT sessions (seconds)
+/** \brief Add/del inter VRF NAT44-ED route record
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param udp - UDP timeout (default 300sec)
- @param tcp_established - TCP established timeout (default 7440sec)
- @param tcp_transitory - TCP transitory timeout (default 240sec)
- @param icmp - ICMP timeout (default 60sec)
+ @param table_vrf_id - id of the VRF NAT routing table
+ @param vrf_id - id of resolving destination (tx) VRF table
+ @param is_add - if true add else del
*/
-autoreply define nat_set_timeouts {
- option deprecated;
+autoreply define nat44_ed_add_del_vrf_route {
u32 client_index;
u32 context;
- u32 udp;
- u32 tcp_established;
- u32 tcp_transitory;
- u32 icmp;
+ u32 table_vrf_id;
+ u32 vrf_id;
+ bool is_add;
};
-/** \brief Get values of timeouts for NAT sessions (seconds)
+/** \brief Dump NAT44-ED inter VRF NAT routing tables
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
*/
-define nat_get_timeouts {
- option deprecated;
+define nat44_ed_vrf_tables_dump {
u32 client_index;
u32 context;
-};
-
-/** \brief Get values of timeouts for NAT sessions reply
- @param context - sender context, to match reply w/ request
- @param retval - return code
- @param udp - UDP timeout
- @param tcp_established - TCP established timeout
- @param tcp_transitory - TCP transitory timeout
- @param icmp - ICMP timeout
-*/
-define nat_get_timeouts_reply {
option deprecated;
- u32 context;
- i32 retval;
- u32 udp;
- u32 tcp_established;
- u32 tcp_transitory;
- u32 icmp;
};
-/** \brief Set address and port assignment algorithm
- @param client_index - opaque cookie to identify the sender
+/** \brief NAT44-ED inter VRF NAT routing table details response
@param context - sender context, to match reply w/ request
- @param alg - address and port assignment algorithm:
- 0 - default, 1 - MAP-E, 2 - port range
- (see nat_addr_and_port_alloc_alg_t in nat.h)
- @param psid_offset - number of offset bits (valid only for MAP-E alg)
- @param psid_length - length of PSID (valid only for MAP-E alg)
- @param psid - Port Set Identifier (PSID) value (valid only for MAP-E alg)
- @param start_port - beginning of the port range
- @param end_port - end of the port range
+ @param table_vrf_id - id of the VRF NAT routing table
+ @param n_vrf_ids - number of vrf_ids
+ @param vrf_ids - ids of resolving destination (tx) VRFs
*/
-autoreply define nat_set_addr_and_port_alloc_alg {
- u32 client_index;
+define nat44_ed_vrf_tables_details {
u32 context;
- u8 alg;
- u8 psid_offset;
- u8 psid_length;
- u16 psid;
- u16 start_port;
- u16 end_port;
+ u32 table_vrf_id;
+ u32 n_vrf_ids;
+ u32 vrf_ids[n_vrf_ids];
+ option deprecated;
};
-/** \brief Get address and port assignment algorithm
+/** \brief Dump NAT44-ED inter VRF NAT routing tables
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
*/
-define nat_get_addr_and_port_alloc_alg {
+define nat44_ed_vrf_tables_v2_dump {
u32 client_index;
u32 context;
+ option status="in_progress";
};
-/** \brief Get address and port assignment algorithm reply
+/** \brief NAT44-ED inter VRF NAT routing table details response
@param context - sender context, to match reply w/ request
- @param retval - return code
- @param alg - address and port assignment algorithm:
- 0 - default, 1 - MAP-E, 2 - port range
- (see nat_addr_and_port_alloc_alg_t in nat.h)
- @param psid_offset - number of offset bits (valid only for MAP-E alg)
- @param psid_length - length of PSID (valid only for MAP-E alg)
- @param psid - Port Set Identifier (PSID) value (valid only for MAP-E alg)
- @param start_port - beginning of the port range
- @param end_port - end of the port range
+ @param table_vrf_id - id of the VRF NAT routing table
+ @param n_vrf_ids - number of vrf_ids
+ @param vrf_ids - ids of resolving destination (tx) VRFs
*/
-define nat_get_addr_and_port_alloc_alg_reply {
+define nat44_ed_vrf_tables_v2_details {
u32 context;
- i32 retval;
- u8 alg;
- u8 psid_offset;
- u8 psid_length;
- u16 psid;
- u16 start_port;
- u16 end_port;
+ u32 table_vrf_id;
+ u32 n_vrf_ids;
+ u32 vrf_ids[n_vrf_ids];
+ option status="in_progress";
};
/** \brief Set TCP MSS rewriting configuration
@@ -504,138 +311,73 @@ define nat_get_mss_clamping_reply {
bool enable;
};
-/** \brief Set HA listener (local settings)
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param ip_address - local IP4 address
- @param port - local UDP port number
- @param path_mtu - path MTU between local and failover
-*/
-autoreply define nat_ha_set_listener {
- u32 client_index;
- u32 context;
- vl_api_ip4_address_t ip_address;
- u16 port;
- u32 path_mtu;
-};
-
-/** \brief Set HA failover (remote settings)
+/** \brief Set NAT handoff frame queue options
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param ip_address - failover IP4 address
- @param port - failvoer UDP port number
- @param session_refresh_interval - number of seconds after which to send
- session counters refresh
+ @param frame_queue_nelts - number of worker handoff frame queue elements
*/
-autoreply define nat_ha_set_failover {
+autoreply define nat44_ed_set_fq_options {
u32 client_index;
u32 context;
- vl_api_ip4_address_t ip_address;
- u16 port;
- u32 session_refresh_interval;
+ u32 frame_queue_nelts;
};
-/** \brief Get HA listener/local configuration
+/** \brief Show NAT handoff frame queue options
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
*/
-define nat_ha_get_listener {
+define nat44_ed_show_fq_options
+{
u32 client_index;
u32 context;
};
-/** \brief Get HA listener/local configuration reply
+/** \brief Show NAT handoff frame queue options reply
@param context - sender context, to match reply w/ request
- @param retval - return code
- @param ip_address - local IP4 address
- @param port - local UDP port number
- @param path_mtu - Path MTU between local and failover
+ @param retval - return code for the request
+ @param frame_queue_nelts - number of worker handoff frame queue elements
*/
-define nat_ha_get_listener_reply {
+define nat44_ed_show_fq_options_reply
+{
u32 context;
i32 retval;
- vl_api_ip4_address_t ip_address;
- u16 port;
- u32 path_mtu;
+ u32 frame_queue_nelts;
};
-/** \brief Get HA failover/remote settings
+/** \brief Add/delete NAT44 pool address from specific interfce
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
+ @param is_add - true if add, false if delete
+ @param sw_if_index - software index of the interface
+ @param flags - flag NAT_TWICE_NAT if NAT address range for external hosts
*/
-define nat_ha_get_failover {
+autoreply define nat44_add_del_interface_addr {
u32 client_index;
u32 context;
+ bool is_add;
+ vl_api_interface_index_t sw_if_index;
+ vl_api_nat_config_flags_t flags;
};
-/** \brief Get HA failover/remote settings reply
- @param context - sender context, to match reply w/ request
- @param retval - return code
- @param ip_address - failover IP4 address
- @param port - failvoer UDP port number
- @param session_refresh_interval - number of seconds after which to send
- session counters refresh
-*/
-define nat_ha_get_failover_reply {
- u32 context;
- i32 retval;
- vl_api_ip4_address_t ip_address;
- u16 port;
- u32 session_refresh_interval;
-};
-
-/** \brief Flush the current HA data (for testing)
+/** \brief Dump NAT44 pool addresses interfaces
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
*/
-autoreply define nat_ha_flush {
+define nat44_interface_addr_dump {
u32 client_index;
u32 context;
};
-/** \brief Resync HA (resend existing sessions to new failover)
- @param client_index - opaque cookie to identify the sender
+/** \brief NAT44 pool addresses interfaces details response
@param context - sender context, to match reply w/ request
- @param want_resync_event - resync completed event sent to the sender via
- nat_ha_resync_completed_event API message if
- non-zero
- @param pid - sender's pid
-*/
-autoreply define nat_ha_resync
-{
- u32 client_index;
- u32 context;
- u8 want_resync_event;
- u32 pid;
-};
-
-/** \brief Tell client about a HA resync completion event
- @param client_index - opaque cookie to identify the sender
- @param pid - client pid registered to receive notification
- @param missed_count - number of missed (not ACKed) messages
-*/
-define nat_ha_resync_completed_event
-{
- u32 client_index;
- u32 pid;
- u32 missed_count;
-};
-
-service {
- rpc nat_ha_resync returns nat_ha_resync_reply events nat_ha_resync_completed_event;
-};
+ @param sw_if_index - software index of the interface
+ @param flags - flag NAT_TWICE_NAT if NAT address range for external hosts
-/** \brief Del NAT44 user
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param ip_address - IPv4 address
- @param fib_index - FIB index
*/
-autoreply define nat44_del_user {
- u32 client_index;
+define nat44_interface_addr_details {
u32 context;
- vl_api_ip4_address_t ip_address;
- u32 fib_index;
+ vl_api_interface_index_t sw_if_index;
+ vl_api_nat_config_flags_t flags;
};
/** \brief Add/del NAT44 address range
@@ -719,41 +461,42 @@ define nat44_interface_details {
vl_api_interface_index_t sw_if_index;
};
-/** \brief Enable/disbale NAT44 as an interface output feature (postrouting
+/** \brief add/del NAT output interface (postrouting
in2out translation)
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param is_add - true if add, false if delete
- @param flags - flag NAT_IS_INSIDE if interface is inside else
- interface is outside
@param sw_if_index - software index of the interface
*/
-autoreply define nat44_interface_add_del_output_feature {
+autoendian autoreply define nat44_ed_add_del_output_interface {
u32 client_index;
u32 context;
bool is_add;
- vl_api_nat_config_flags_t flags;
vl_api_interface_index_t sw_if_index;
};
-/** \brief Dump interfaces with NAT44 output feature
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
-*/
-define nat44_interface_output_feature_dump {
+service {
+ rpc nat44_ed_output_interface_get returns nat44_ed_output_interface_get_reply
+ stream nat44_ed_output_interface_details;
+};
+
+define nat44_ed_output_interface_get
+{
u32 client_index;
u32 context;
+ u32 cursor;
};
-/** \brief NAT44 interface with output feature details response
- @param context - sender context, to match reply w/ request
- @param flags - flag NAT_IS_INSIDE if interface is inside else
- interface is outside
- @param sw_if_index - software index of the interface
-*/
-define nat44_interface_output_feature_details {
+define nat44_ed_output_interface_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+
+define nat44_ed_output_interface_details
+{
u32 context;
- vl_api_nat_config_flags_t flags;
vl_api_interface_index_t sw_if_index;
};
@@ -779,6 +522,8 @@ define nat44_interface_output_feature_details {
@param tag - opaque string tag
*/
autoreply define nat44_add_del_static_mapping {
+ option deprecated;
+
u32 client_index;
u32 context;
bool is_add;
@@ -817,7 +562,6 @@ autoreply define nat44_add_del_static_mapping {
@param tag - opaque string tag
*/
autoreply define nat44_add_del_static_mapping_v2 {
- option in_progress;
u32 client_index;
u32 context;
bool is_add;
@@ -929,117 +673,6 @@ define nat44_identity_mapping_details {
string tag[64];
};
-/** \brief Add/delete NAT44 pool address from specific interfce
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param is_add - true if add, false if delete
- @param sw_if_index - software index of the interface
- @param flags - flag NAT_TWICE_NAT if NAT address range for external hosts
-*/
-autoreply define nat44_add_del_interface_addr {
- u32 client_index;
- u32 context;
- bool is_add;
- vl_api_interface_index_t sw_if_index;
- vl_api_nat_config_flags_t flags;
-};
-
-/** \brief Dump NAT44 pool addresses interfaces
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
-*/
-define nat44_interface_addr_dump {
- u32 client_index;
- u32 context;
-};
-
-/** \brief NAT44 pool addresses interfaces details response
- @param context - sender context, to match reply w/ request
- @param sw_if_index - software index of the interface
- @param flags - flag NAT_TWICE_NAT if NAT address range for external hosts
-
-*/
-define nat44_interface_addr_details {
- u32 context;
- vl_api_interface_index_t sw_if_index;
- vl_api_nat_config_flags_t flags;
-};
-
-/** \brief Dump NAT44 users
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
-*/
-define nat44_user_dump {
- u32 client_index;
- u32 context;
-};
-
-/** \brief NAT44 users response
- @param context - sender context, to match reply w/ request
- @vrf_id - VRF ID
- @param ip_address - IPv4 address
- @param nsessions - number of dynamic sessions
- @param nstaticsessions - number of static sessions
-*/
-define nat44_user_details {
- u32 context;
- u32 vrf_id;
- vl_api_ip4_address_t ip_address;
- u32 nsessions;
- u32 nstaticsessions;
-};
-
-/** \brief NAT44 user's sessions
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param ip_address - IPv4 address of the user to dump
- @param vrf_id - VRF_ID
-*/
-define nat44_user_session_dump {
- u32 client_index;
- u32 context;
- vl_api_ip4_address_t ip_address;
- u32 vrf_id;
-};
-
-/** \brief NAT44 user's sessions response
- @param context - sender context, to match reply w/ request
- @param outside_ip_address - outside IPv4 address
- @param outside_port - outside port
- @param inside_ip_address - inside IPv4 address
- @param inside_port - inside port
- @param protocol - protocol
- @param flags - flag NAT_IS_STATIC if session is static,
- flag NAT_IS_TWICE_NAT if session is twice-nat,
- flag NAT_IS_EXT_HOST_VALID if external host address
- and port are valid
- @param last_heard - last heard timer
- @param total_bytes - count of bytes sent through session
- @param total_pkts - count of pakets sent through session
- @param ext_host_address - external host IPv4 address
- @param ext_host_port - external host port
- @param ext_host_nat_address - post-NAT external host IPv4 address (valid
- only if twice-nat session)
- @param ext_host_nat_port - post-NAT external host port (valid only if
- twice-nat session)
-*/
-define nat44_user_session_details {
- u32 context;
- vl_api_ip4_address_t outside_ip_address;
- u16 outside_port;
- vl_api_ip4_address_t inside_ip_address;
- u16 inside_port;
- u16 protocol;
- vl_api_nat_config_flags_t flags;
- u64 last_heard;
- u64 total_bytes;
- u32 total_pkts;
- vl_api_ip4_address_t ext_host_address;
- u16 ext_host_port;
- vl_api_ip4_address_t ext_host_nat_address;
- u16 ext_host_nat_port;
-};
-
/** \brief NAT44 load-balancing address and port pair
@param addr - IPv4 address of the internal node
@param port - L4 port number of the internal node
@@ -1167,72 +800,195 @@ autoreply define nat44_del_session {
u16 ext_host_port;
};
-/** \brief Enable/disable forwarding for NAT44
- Forward packets which don't match existing translation
- or static mapping instead of dropping them.
+/** \brief Dump NAT44 users
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param enable - true for enable, false for disable
*/
-autoreply define nat44_forwarding_enable_disable {
- option deprecated;
+define nat44_user_dump {
u32 client_index;
u32 context;
- bool enable;
};
-/** \brief Check if forwarding is enabled or disabled
+/** \brief NAT44 users response
+ @param context - sender context, to match reply w/ request
+ @vrf_id - VRF ID
+ @param ip_address - IPv4 address
+ @param nsessions - number of dynamic sessions
+ @param nstaticsessions - number of static sessions
+*/
+define nat44_user_details {
+ u32 context;
+ u32 vrf_id;
+ vl_api_ip4_address_t ip_address;
+ u32 nsessions;
+ u32 nstaticsessions;
+};
+
+/** \brief NAT44 user's sessions
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
+ @param ip_address - IPv4 address of the user to dump
+ @param vrf_id - VRF_ID
*/
-define nat44_forwarding_is_enabled {
+define nat44_user_session_dump {
option deprecated;
+
u32 client_index;
u32 context;
+ vl_api_ip4_address_t ip_address;
+ u32 vrf_id;
};
-/** \brief Response to check if forwarding is enabled or disabled
+/** \brief NAT44 user's sessions response
@param context - sender context, to match reply w/ request
- @param enabled - true if enabled, false if disabled
+ @param outside_ip_address - outside IPv4 address
+ @param outside_port - outside port
+ @param inside_ip_address - inside IPv4 address
+ @param inside_port - inside port
+ @param protocol - protocol
+ @param flags - flag NAT_IS_STATIC if session is static,
+ flag NAT_IS_TWICE_NAT if session is twice-nat,
+ flag NAT_IS_EXT_HOST_VALID if external host address
+ and port are valid
+ @param last_heard - last heard timer
+ @param total_bytes - count of bytes sent through session
+ @param total_pkts - count of pakets sent through session
+ @param ext_host_address - external host IPv4 address
+ @param ext_host_port - external host port
+ @param ext_host_nat_address - post-NAT external host IPv4 address (valid
+ only if twice-nat session)
+ @param ext_host_nat_port - post-NAT external host port (valid only if
+ twice-nat session)
*/
-define nat44_forwarding_is_enabled_reply {
+define nat44_user_session_details {
option deprecated;
+
u32 context;
- bool enabled;
+ vl_api_ip4_address_t outside_ip_address;
+ u16 outside_port;
+ vl_api_ip4_address_t inside_ip_address;
+ u16 inside_port;
+ u16 protocol;
+ vl_api_nat_config_flags_t flags;
+ u64 last_heard;
+ u64 total_bytes;
+ u32 total_pkts;
+ vl_api_ip4_address_t ext_host_address;
+ u16 ext_host_port;
+ vl_api_ip4_address_t ext_host_nat_address;
+ u16 ext_host_nat_port;
};
-/** \brief Set NAT handoff frame queue options
+/** \brief NAT44 user's sessions
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param frame_queue_nelts - number of worker handoff frame queue elements
+ @param ip_address - IPv4 address of the user to dump
+ @param vrf_id - VRF_ID
*/
-autoreply define nat44_ed_set_fq_options {
- option in_progress;
+define nat44_user_session_v2_dump {
+ option deprecated;
+
u32 client_index;
u32 context;
- u32 frame_queue_nelts;
+ vl_api_ip4_address_t ip_address;
+ u32 vrf_id;
};
-/** \brief Show NAT handoff frame queue options
- @param client_index - opaque cookie to identify the sender
+/** \brief NAT44 user's sessions response
@param context - sender context, to match reply w/ request
+ @param outside_ip_address - outside IPv4 address
+ @param outside_port - outside port
+ @param inside_ip_address - inside IPv4 address
+ @param inside_port - inside port
+ @param protocol - protocol
+ @param flags - flag NAT_IS_STATIC if session is static,
+ flag NAT_IS_TWICE_NAT if session is twice-nat,
+ flag NAT_IS_EXT_HOST_VALID if external host address
+ and port are valid
+ @param last_heard - last heard timer
+ @param total_bytes - count of bytes sent through session
+ @param total_pkts - count of pakets sent through session
+ @param ext_host_address - external host IPv4 address
+ @param ext_host_port - external host port
+ @param ext_host_nat_address - post-NAT external host IPv4 address (valid
+ only if twice-nat session)
+ @param ext_host_nat_port - post-NAT external host port (valid only if
+ twice-nat session)
+ @param is_timed_out - true, if session is timed out, and false, if session
+ is active
*/
-define nat44_ed_show_fq_options
-{
- option in_progress;
- u32 client_index;
+define nat44_user_session_v2_details {
+ option deprecated;
+
u32 context;
+ vl_api_ip4_address_t outside_ip_address;
+ u16 outside_port;
+ vl_api_ip4_address_t inside_ip_address;
+ u16 inside_port;
+ u16 protocol;
+ vl_api_nat_config_flags_t flags;
+ u64 last_heard;
+ u64 total_bytes;
+ u32 total_pkts;
+ vl_api_ip4_address_t ext_host_address;
+ u16 ext_host_port;
+ vl_api_ip4_address_t ext_host_nat_address;
+ u16 ext_host_nat_port;
+ bool is_timed_out;
};
-/** \brief Show NAT handoff frame queue options reply
+/** \brief NAT44 user's sessions response
@param context - sender context, to match reply w/ request
- @param retval - return code for the request
- @param frame_queue_nelts - number of worker handoff frame queue elements
+ @param outside_ip_address - outside IPv4 address
+ @param outside_port - outside port
+ @param inside_ip_address - inside IPv4 address
+ @param inside_port - inside port
+ @param protocol - protocol
+ @param flags - flag NAT_IS_STATIC if session is static,
+ flag NAT_IS_TWICE_NAT if session is twice-nat,
+ flag NAT_IS_EXT_HOST_VALID if external host address
+ and port are valid
+ @param last_heard - last heard timer since VPP start
+ @param time_since_last_heard - difference between current vpp time and last_heard value
+ @param total_bytes - count of bytes sent through session
+ @param total_pkts - count of pakets sent through session
+ @param ext_host_address - external host IPv4 address
+ @param ext_host_port - external host port
+ @param ext_host_nat_address - post-NAT external host IPv4 address (valid
+ only if twice-nat session)
+ @param ext_host_nat_port - post-NAT external host port (valid only if
+ twice-nat session)
+ @param is_timed_out - true, if session is timed out, and false, if session
+ is active
*/
-define nat44_ed_show_fq_options_reply
-{
- option in_progress;
+define nat44_user_session_v3_details {
u32 context;
- i32 retval;
- u32 frame_queue_nelts;
+ vl_api_ip4_address_t outside_ip_address;
+ u16 outside_port;
+ vl_api_ip4_address_t inside_ip_address;
+ u16 inside_port;
+ u16 protocol;
+ vl_api_nat_config_flags_t flags;
+ u64 last_heard;
+ u64 time_since_last_heard;
+ u64 total_bytes;
+ u32 total_pkts;
+ vl_api_ip4_address_t ext_host_address;
+ u16 ext_host_port;
+ vl_api_ip4_address_t ext_host_nat_address;
+ u16 ext_host_nat_port;
+ bool is_timed_out;
+};
+
+/** \brief NAT44 user's sessions
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ip_address - IPv4 address of the user to dump
+ @param vrf_id - VRF_ID
+*/
+define nat44_user_session_v3_dump {
+ u32 client_index;
+ u32 context;
+ vl_api_ip4_address_t ip_address;
+ u32 vrf_id;
};
diff --git a/src/plugins/nat/nat44-ed/nat44_ed.c b/src/plugins/nat/nat44-ed/nat44_ed.c
index 8ad971decea..08e577747c3 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed.c
@@ -1,6 +1,4 @@
/*
- * snat.c - simple nat plugin
- *
* Copyright (c) 2016 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -28,15 +26,17 @@
#include <vppinfra/bihash_16_8.h>
#include <nat/lib/log.h>
-#include <nat/lib/nat_syslog.h>
#include <nat/lib/nat_inlines.h>
#include <nat/lib/ipfix_logging.h>
+#include <vnet/syslog/syslog.h>
+#include <nat/lib/nat_syslog_constants.h>
+#include <nat/lib/nat_syslog.h>
#include <nat/nat44-ed/nat44_ed.h>
#include <nat/nat44-ed/nat44_ed_affinity.h>
#include <nat/nat44-ed/nat44_ed_inlines.h>
-#include <vpp/stats/stat_segment.h>
+#include <vlib/stats/stats.h>
snat_main_t snat_main;
@@ -59,7 +59,7 @@ static_always_inline void nat_validate_interface_counters (snat_main_t *sm,
if (PREDICT_FALSE (sm->enabled)) \
{ \
nat_log_err ("plugin enabled"); \
- return 1; \
+ return VNET_API_ERROR_FEATURE_ALREADY_ENABLED; \
} \
} \
while (0)
@@ -71,12 +71,11 @@ static_always_inline void nat_validate_interface_counters (snat_main_t *sm,
if (PREDICT_FALSE (!sm->enabled)) \
{ \
nat_log_err ("plugin disabled"); \
- return 1; \
+ return VNET_API_ERROR_FEATURE_ALREADY_DISABLED; \
} \
} \
while (0)
-/* Hook up input features */
VNET_FEATURE_INIT (nat_pre_in2out, static) = {
.arc_name = "ip4-unicast",
.node_name = "nat-pre-in2out",
@@ -90,6 +89,18 @@ VNET_FEATURE_INIT (nat_pre_out2in, static) = {
"ip4-dhcp-client-detect",
"ip4-sv-reassembly-feature"),
};
+VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "nat44-ed-classify",
+ .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
+ "ip4-sv-reassembly-feature"),
+};
+VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "nat44-handoff-classify",
+ .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
+ "ip4-sv-reassembly-feature"),
+};
VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
.arc_name = "ip4-unicast",
.node_name = "nat44-in2out-worker-handoff",
@@ -101,17 +112,6 @@ VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
.runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
"ip4-dhcp-client-detect"),
};
-VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-in2out",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
-};
-VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-out2in",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
- "ip4-dhcp-client-detect"),
-};
VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
.arc_name = "ip4-unicast",
.node_name = "nat44-ed-in2out",
@@ -123,32 +123,9 @@ VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
.runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
"ip4-dhcp-client-detect"),
};
-VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-ed-classify",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
-};
-VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-handoff-classify",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
-};
-VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-in2out-fast",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
-};
-VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-out2in-fast",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
- "ip4-dhcp-client-detect"),
-};
-
-/* Hook up output features */
-VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
+VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
.arc_name = "ip4-output",
- .node_name = "nat44-in2out-output",
+ .node_name = "nat-pre-in2out-output",
.runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
.runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
};
@@ -158,12 +135,6 @@ VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
.runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
.runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
};
-VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
- .arc_name = "ip4-output",
- .node_name = "nat-pre-in2out-output",
- .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
- .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
-};
VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
.arc_name = "ip4-output",
.node_name = "nat44-ed-in2out-output",
@@ -176,156 +147,375 @@ VLIB_PLUGIN_REGISTER () = {
.description = "Network Address Translation (NAT)",
};
-static void nat44_ed_db_init (u32 translations, u32 translation_buckets);
-
+static void nat44_ed_db_init ();
static void nat44_ed_db_free ();
+static void nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm);
+
+static int nat44_ed_add_static_mapping_internal (
+ ip4_address_t l_addr, ip4_address_t e_addr, u16 l_port, u16 e_port,
+ ip_protocol_t proto, u32 vrf_id, u32 sw_if_index, u32 flags,
+ ip4_address_t pool_addr, u8 *tag);
+static int nat44_ed_del_static_mapping_internal (ip4_address_t l_addr,
+ ip4_address_t e_addr,
+ u16 l_port, u16 e_port,
+ ip_protocol_t proto,
+ u32 vrf_id, u32 flags);
u32 nat_calc_bihash_buckets (u32 n_elts);
-u8 *
-format_session_kvp (u8 * s, va_list * args)
+static_always_inline int
+nat44_ed_sm_i2o_add (snat_main_t *sm, snat_static_mapping_t *m,
+ ip4_address_t addr, u16 port, u32 fib_index, u8 proto)
{
- clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
+ ASSERT (!pool_is_free (sm->static_mappings, m));
+ clib_bihash_kv_16_8_t kv;
+ nat44_ed_sm_init_i2o_kv (&kv, addr.as_u32, port, fib_index, proto,
+ m - sm->static_mappings);
+ return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 1 /*is_add*/);
+}
- s = format (s, "%U thread-index %llu session-index %llu", format_snat_key,
- v->key, nat_value_get_thread_index (v),
- nat_value_get_session_index (v));
+static_always_inline int
+nat44_ed_sm_i2o_del (snat_main_t *sm, ip4_address_t addr, u16 port,
+ u32 fib_index, u8 proto)
+{
+ clib_bihash_kv_16_8_t kv;
+ nat44_ed_sm_init_i2o_k (&kv, addr.as_u32, port, fib_index, proto);
+ return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 0 /*is_add*/);
+}
- return s;
+static_always_inline int
+nat44_ed_sm_o2i_add (snat_main_t *sm, snat_static_mapping_t *m,
+ ip4_address_t addr, u16 port, u32 fib_index, u8 proto)
+{
+ ASSERT (!pool_is_free (sm->static_mappings, m));
+ clib_bihash_kv_16_8_t kv;
+ nat44_ed_sm_init_o2i_kv (&kv, addr.as_u32, port, fib_index, proto,
+ m - sm->static_mappings);
+ return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 1 /*is_add*/);
}
-u8 *
-format_static_mapping_kvp (u8 * s, va_list * args)
+static_always_inline int
+nat44_ed_sm_o2i_del (snat_main_t *sm, ip4_address_t addr, u16 port,
+ u32 fib_index, u8 proto)
+{
+ clib_bihash_kv_16_8_t kv;
+ nat44_ed_sm_init_o2i_k (&kv, addr.as_u32, port, fib_index, proto);
+ return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 0 /*is_add*/);
+}
+
+void
+nat44_ed_free_session_data (snat_main_t *sm, snat_session_t *s,
+ u32 thread_index, u8 is_ha)
{
- clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
+ per_vrf_sessions_unregister_session (s, thread_index);
- s = format (s, "%U static-mapping-index %llu",
- format_snat_key, v->key, v->value);
+ if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 0))
+ nat_elog_warn (sm, "flow hash del failed");
- return s;
+ if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0))
+ nat_elog_warn (sm, "flow hash del failed");
+
+ if (na44_ed_is_fwd_bypass_session (s))
+ {
+ return;
+ }
+
+ if (nat44_ed_is_affinity_session (s))
+ nat_affinity_unlock (s->ext_host_addr, s->out2in.addr, s->proto,
+ s->out2in.port);
+
+ if (!is_ha)
+ nat_syslog_nat44_sdel (0, s->in2out.fib_index, &s->in2out.addr,
+ s->in2out.port, &s->ext_host_nat_addr,
+ s->ext_host_nat_port, &s->out2in.addr,
+ s->out2in.port, &s->ext_host_addr, s->ext_host_port,
+ s->proto, nat44_ed_is_twice_nat_session (s));
+
+ if (!is_ha)
+ {
+ /* log NAT event */
+ nat_ipfix_logging_nat44_ses_delete (
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
+ s->in2out.port, s->out2in.port, s->in2out.fib_index);
+ }
}
-u8 *
-format_ed_session_kvp (u8 * s, va_list * args)
+static ip_interface_address_t *
+nat44_ed_get_ip_interface_address (u32 sw_if_index, ip4_address_t addr)
{
- clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
+ snat_main_t *sm = &snat_main;
+
+ ip_lookup_main_t *lm = &sm->ip4_main->lookup_main;
+ ip_interface_address_t *ia;
+ ip4_address_t *ip4a;
- u8 proto;
- u16 r_port, l_port;
- ip4_address_t l_addr, r_addr;
+ foreach_ip_interface_address (
+ lm, ia, sw_if_index, 1, ({
+ ip4a = ip_interface_address_get_address (lm, ia);
+ nat_log_debug ("sw_if_idx: %u addr: %U ? %U", sw_if_index,
+ format_ip4_address, ip4a, format_ip4_address, &addr);
+ if (ip4a->as_u32 == addr.as_u32)
+ {
+ return ia;
+ }
+ }));
+ return NULL;
+}
+
+static int
+nat44_ed_resolve_nat_addr_len (snat_address_t *ap,
+ snat_interface_t *interfaces)
+{
+ ip_interface_address_t *ia;
+ snat_interface_t *i;
u32 fib_index;
- split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
- s = format (s,
- "local %U:%d remote %U:%d proto %U fib %d thread-index %u "
- "session-index %u",
- format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
- format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
- format_ip_protocol, proto, fib_index,
- ed_value_get_thread_index (v), ed_value_get_session_index (v));
+ pool_foreach (i, interfaces)
+ {
+ if (!nat44_ed_is_interface_outside (i))
+ {
+ continue;
+ }
- return s;
+ fib_index = ip4_fib_table_get_index_for_sw_if_index (i->sw_if_index);
+ if (fib_index != ap->fib_index)
+ {
+ continue;
+ }
+
+ if ((ia = nat44_ed_get_ip_interface_address (i->sw_if_index, ap->addr)))
+ {
+ ap->addr_len = ia->address_length;
+ ap->sw_if_index = i->sw_if_index;
+ ap->net.as_u32 = ap->addr.as_u32 & ip4_main.fib_masks[ap->addr_len];
+
+ nat_log_debug ("pool addr %U binds to -> sw_if_idx: %u net: %U/%u",
+ format_ip4_address, &ap->addr, ap->sw_if_index,
+ format_ip4_address, &ap->net, ap->addr_len);
+ return 0;
+ }
+ }
+ return 1;
}
-void
-nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
- u8 is_ha)
+static void
+nat44_ed_update_outside_if_addresses (snat_address_t *ap)
{
- per_vrf_sessions_unregister_session (s, thread_index);
-
- if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 0))
- nat_elog_warn (sm, "flow hash del failed");
+ snat_main_t *sm = &snat_main;
- if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0))
- nat_elog_warn (sm, "flow hash del failed");
+ if (!nat44_ed_resolve_nat_addr_len (ap, sm->interfaces))
+ {
+ return;
+ }
- if (is_fwd_bypass_session (s))
+ if (!nat44_ed_resolve_nat_addr_len (ap, sm->output_feature_interfaces))
{
return;
}
+}
+
+static void
+nat44_ed_bind_if_addr_to_nat_addr (u32 sw_if_index)
+{
+ snat_main_t *sm = &snat_main;
+ ip_interface_address_t *ia;
+ snat_address_t *ap;
+
+ u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+
+ vec_foreach (ap, sm->addresses)
+ {
+ if (fib_index != ap->fib_index)
+ {
+ continue;
+ }
+
+ if ((ia = nat44_ed_get_ip_interface_address (sw_if_index, ap->addr)))
+ {
+ ap->addr_len = ia->address_length;
+ ap->sw_if_index = sw_if_index;
+ ap->net.as_u32 = ap->addr.as_u32 & ip4_main.fib_masks[ap->addr_len];
+
+ nat_log_debug ("pool addr %U binds to -> sw_if_idx: %u net: %U/%u",
+ format_ip4_address, &ap->addr, ap->sw_if_index,
+ format_ip4_address, &ap->net, ap->addr_len);
+ return;
+ }
+ }
+}
- if (is_affinity_sessions (s))
- nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
- s->nat_proto, s->out2in.port);
+static_always_inline snat_fib_entry_reg_t *
+nat44_ed_get_fib_entry_reg (ip4_address_t addr, u32 sw_if_index, int *out_idx)
+{
+ snat_main_t *sm = &snat_main;
+ snat_fib_entry_reg_t *fe;
+ int i;
- if (!is_ha)
- nat_syslog_nat44_sdel (
- 0, s->in2out.fib_index, &s->in2out.addr, s->in2out.port,
- &s->ext_host_nat_addr, s->ext_host_nat_port, &s->out2in.addr,
- s->out2in.port, &s->ext_host_addr, s->ext_host_port, s->nat_proto,
- is_twice_nat_session (s));
+ for (i = 0; i < vec_len (sm->fib_entry_reg); i++)
+ {
+ fe = sm->fib_entry_reg + i;
+ if ((addr.as_u32 == fe->addr.as_u32) && (sw_if_index == fe->sw_if_index))
+ {
+ if (out_idx)
+ {
+ *out_idx = i;
+ }
+ return fe;
+ }
+ }
+ return NULL;
+}
- if (snat_is_unk_proto_session (s))
- return;
+static void
+nat44_ed_add_fib_entry_reg (ip4_address_t addr, u32 sw_if_index)
+{
+ // Add the external NAT address to the FIB as receive entries. This ensures
+ // that VPP will reply to ARP for this address and we don't need to enable
+ // proxy ARP on the outside interface.
+ snat_main_t *sm = &snat_main;
+ snat_fib_entry_reg_t *fe;
- if (!is_ha)
+ if (!(fe = nat44_ed_get_fib_entry_reg (addr, sw_if_index, 0)))
{
- /* log NAT event */
- nat_ipfix_logging_nat44_ses_delete (thread_index,
- s->in2out.addr.as_u32,
- s->out2in.addr.as_u32,
- s->nat_proto,
- s->in2out.port,
- s->out2in.port,
- s->in2out.fib_index);
+ fib_prefix_t prefix = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = addr.as_u32,
+ },
+ };
+ u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+ fib_table_entry_update_one_path (fib_index, &prefix, sm->fib_src_low,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL |
+ FIB_ENTRY_FLAG_EXCLUSIVE),
+ DPO_PROTO_IP4, NULL, sw_if_index, ~0, 1,
+ NULL, FIB_ROUTE_PATH_FLAG_NONE);
+
+ vec_add2 (sm->fib_entry_reg, fe, 1);
+ clib_memset (fe, 0, sizeof (*fe));
+ fe->addr.as_u32 = addr.as_u32;
+ fe->sw_if_index = sw_if_index;
+ }
+ fe->count++;
+}
+
+static void
+nat44_ed_del_fib_entry_reg (ip4_address_t addr, u32 sw_if_index)
+{
+ snat_main_t *sm = &snat_main;
+ snat_fib_entry_reg_t *fe;
+ int i;
+
+ if ((fe = nat44_ed_get_fib_entry_reg (addr, sw_if_index, &i)))
+ {
+ fe->count--;
+ if (0 == fe->count)
+ {
+ fib_prefix_t prefix = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = addr.as_u32,
+ },
+ };
+ u32 fib_index =
+ ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+ fib_table_entry_delete (fib_index, &prefix, sm->fib_src_low);
+ vec_del1 (sm->fib_entry_reg, i);
+ }
}
+}
- /* Twice NAT address and port for external host */
- if (is_twice_nat_session (s))
+static void
+nat44_ed_add_del_interface_fib_reg_entries (ip4_address_t addr, u8 is_add)
+{
+ snat_main_t *sm = &snat_main;
+ snat_interface_t *i;
+
+ pool_foreach (i, sm->interfaces)
{
- snat_free_outside_address_and_port (sm->twice_nat_addresses,
- thread_index,
- &s->ext_host_nat_addr,
- s->ext_host_nat_port, s->nat_proto);
+ if (nat44_ed_is_interface_outside (i))
+ {
+ if (is_add)
+ {
+ nat44_ed_add_fib_entry_reg (addr, i->sw_if_index);
+ }
+ else
+ {
+ nat44_ed_del_fib_entry_reg (addr, i->sw_if_index);
+ }
+ }
+ }
+ pool_foreach (i, sm->output_feature_interfaces)
+ {
+ if (nat44_ed_is_interface_outside (i))
+ {
+ if (is_add)
+ {
+ nat44_ed_add_fib_entry_reg (addr, i->sw_if_index);
+ }
+ else
+ {
+ nat44_ed_del_fib_entry_reg (addr, i->sw_if_index);
+ }
+ }
}
+}
- if (snat_is_session_static (s))
- return;
+static_always_inline void
+nat44_ed_add_del_nat_addr_fib_reg_entries (u32 sw_if_index, u8 is_add)
+{
+ snat_main_t *sm = &snat_main;
+ snat_address_t *ap;
- snat_free_outside_address_and_port (sm->addresses, thread_index,
- &s->out2in.addr, s->out2in.port,
- s->nat_proto);
+ vec_foreach (ap, sm->addresses)
+ {
+ if (is_add)
+ {
+ nat44_ed_add_fib_entry_reg (ap->addr, sw_if_index);
+ }
+ else
+ {
+ nat44_ed_del_fib_entry_reg (ap->addr, sw_if_index);
+ }
+ }
}
-void
-snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
- int is_add)
+static_always_inline void
+nat44_ed_add_del_sm_fib_reg_entries (u32 sw_if_index, u8 is_add)
{
snat_main_t *sm = &snat_main;
- fib_prefix_t prefix = {
- .fp_len = p_len,
- .fp_proto = FIB_PROTOCOL_IP4,
- .fp_addr = {
- .ip4.as_u32 = addr->as_u32,
- },
- };
- u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+ snat_static_mapping_t *m;
- if (is_add)
- fib_table_entry_update_one_path (fib_index,
- &prefix,
- sm->fib_src_low,
- (FIB_ENTRY_FLAG_CONNECTED |
- FIB_ENTRY_FLAG_LOCAL |
- FIB_ENTRY_FLAG_EXCLUSIVE),
- DPO_PROTO_IP4,
- NULL,
- sw_if_index,
- ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
- else
- fib_table_entry_delete (fib_index, &prefix, sm->fib_src_low);
+ pool_foreach (m, sm->static_mappings)
+ {
+ if (is_add)
+ {
+ nat44_ed_add_fib_entry_reg (m->external_addr, sw_if_index);
+ }
+ else
+ {
+ nat44_ed_del_fib_entry_reg (m->external_addr, sw_if_index);
+ }
+ }
}
int
-snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
- u8 twice_nat)
+nat44_ed_add_address (ip4_address_t *addr, u32 vrf_id, u8 twice_nat)
{
- snat_address_t *ap;
- snat_interface_t *i;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
+ snat_main_t *sm = &snat_main;
+ snat_address_t *ap, *addresses;
+
+ addresses = twice_nat ? sm->twice_nat_addresses : sm->addresses;
- /* Check if address already exists */
- vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses)
+ if (!sm->enabled)
+ {
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+
+ // check if address already exists
+ vec_foreach (ap, addresses)
{
if (ap->addr.as_u32 == addr->as_u32)
{
@@ -335,62 +525,231 @@ snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
}
if (twice_nat)
- vec_add2 (sm->twice_nat_addresses, ap, 1);
+ {
+ vec_add2 (sm->twice_nat_addresses, ap, 1);
+ }
else
- vec_add2 (sm->addresses, ap, 1);
+ {
+ vec_add2 (sm->addresses, ap, 1);
+ }
+ ap->addr_len = ~0;
+ ap->fib_index = ~0;
ap->addr = *addr;
+
if (vrf_id != ~0)
- ap->fib_index =
- fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
- sm->fib_src_low);
+ {
+ ap->fib_index = fib_table_find_or_create_and_lock (
+ FIB_PROTOCOL_IP4, vrf_id, sm->fib_src_low);
+ }
+
+ if (!twice_nat)
+ {
+ // if we don't have enabled interface we don't add address
+ // to fib
+ nat44_ed_add_del_interface_fib_reg_entries (*addr, 1);
+ nat44_ed_update_outside_if_addresses (ap);
+ }
+ return 0;
+}
+
+int
+nat44_ed_del_address (ip4_address_t addr, u8 twice_nat)
+{
+ snat_main_t *sm = &snat_main;
+ snat_address_t *a = 0, *addresses;
+ snat_session_t *ses;
+ u32 *ses_to_be_removed = 0, *ses_index;
+ snat_main_per_thread_data_t *tsm;
+ int j;
+
+ addresses = twice_nat ? sm->twice_nat_addresses : sm->addresses;
+
+ for (j = 0; j < vec_len (addresses); j++)
+ {
+ if (addresses[j].addr.as_u32 == addr.as_u32)
+ {
+ a = addresses + j;
+ break;
+ }
+ }
+ if (!a)
+ {
+ nat_log_err ("no such address");
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ // delete dynamic sessions only
+ vec_foreach (tsm, sm->per_thread_data)
+ {
+ pool_foreach (ses, tsm->sessions)
+ {
+ if (ses->flags & SNAT_SESSION_FLAG_STATIC_MAPPING)
+ {
+ continue;
+ }
+ if (ses->out2in.addr.as_u32 == addr.as_u32)
+ {
+ nat44_ed_free_session_data (sm, ses, tsm - sm->per_thread_data,
+ 0);
+ vec_add1 (ses_to_be_removed, ses - tsm->sessions);
+ }
+ }
+ vec_foreach (ses_index, ses_to_be_removed)
+ {
+ ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
+ nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
+ }
+ vec_free (ses_to_be_removed);
+ }
+
+ if (!twice_nat)
+ {
+ nat44_ed_add_del_interface_fib_reg_entries (addr, 0);
+ }
+
+ if (a->fib_index != ~0)
+ {
+ fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
+ }
+
+ if (!twice_nat)
+ {
+ vec_del1 (sm->addresses, j);
+ }
else
- ap->fib_index = ~0;
+ {
+ vec_del1 (sm->twice_nat_addresses, j);
+ }
- #define _(N, i, n, s) \
- clib_memset(ap->busy_##n##_port_refcounts, 0, sizeof(ap->busy_##n##_port_refcounts));\
- ap->busy_##n##_ports = 0; \
- ap->busy_##n##_ports_per_thread = 0;\
- vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
- foreach_nat_protocol
- #undef _
+ return 0;
+}
- if (twice_nat)
- return 0;
+vrf_table_t *
+nat44_ed_get_vrf_table (u32 table_vrf_id)
+{
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t;
- /* Add external address to FIB */
- pool_foreach (i, sm->interfaces)
- {
- if (nat_interface_is_inside (i))
- continue;
+ pool_foreach (t, sm->vrf_tables)
+ {
+ if (table_vrf_id == t->table_vrf_id)
+ {
+ return t;
+ }
+ }
+ return NULL;
+}
- snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
- break;
- }
- pool_foreach (i, sm->output_feature_interfaces)
- {
- if (nat_interface_is_inside (i))
- continue;
+vrf_route_t *
+nat44_ed_get_vrf_route (vrf_table_t *t, u32 vrf_id)
+{
+ vrf_route_t *r;
- snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
- break;
- }
+ pool_foreach (r, t->routes)
+ {
+ if (vrf_id == r->vrf_id)
+ {
+ return r;
+ }
+ }
+ return NULL;
+}
+
+int
+nat44_ed_add_del_vrf_table (u32 table_vrf_id, bool is_add)
+{
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t;
+ vrf_route_t *r;
+
+ t = nat44_ed_get_vrf_table (table_vrf_id);
+ if (t)
+ {
+ if (is_add)
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+ pool_foreach (r, t->routes)
+ {
+ fib_table_unlock (r->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
+ }
+ fib_table_unlock (t->table_fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
+
+ pool_free (t->routes);
+ pool_put (sm->vrf_tables, t);
+ }
+ else
+ {
+ if (!is_add)
+ {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+ pool_get (sm->vrf_tables, t);
+ clib_memset (t, 0, sizeof (*t));
+ t->table_vrf_id = table_vrf_id;
+ t->table_fib_index = fib_table_find_or_create_and_lock (
+ FIB_PROTOCOL_IP4, table_vrf_id, sm->fib_src_low);
+ }
return 0;
}
-static int
-is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr)
+void
+nat44_ed_del_vrf_tables ()
{
- snat_static_mapping_t *m;
- pool_foreach (m, sm->static_mappings)
- {
- if (is_sm_addr_only (m->flags) || is_sm_out2in_only (m->flags) ||
- is_sm_identity_nat (m->flags))
- continue;
- if (m->external_addr.as_u32 == addr.as_u32)
- return 1;
- }
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t;
+ vrf_route_t *r;
+
+ pool_foreach (t, sm->vrf_tables)
+ {
+ pool_foreach (r, t->routes)
+ {
+ fib_table_unlock (r->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
+ }
+ fib_table_unlock (t->table_fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
+ pool_free (t->routes);
+ }
+ pool_free (sm->vrf_tables);
+}
+
+int
+nat44_ed_add_del_vrf_route (u32 table_vrf_id, u32 vrf_id, bool is_add)
+{
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t;
+ vrf_route_t *r;
+
+ t = nat44_ed_get_vrf_table (table_vrf_id);
+ if (!t)
+ {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ r = nat44_ed_get_vrf_route (t, vrf_id);
+ if (r)
+ {
+ if (is_add)
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+ fib_table_unlock (r->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
+ pool_put (t->routes, r);
+ }
+ else
+ {
+ if (!is_add)
+ {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+ pool_get (t->routes, r);
+ clib_memset (r, 0, sizeof (*r));
+ r->vrf_id = vrf_id;
+ r->fib_index = fib_table_find_or_create_and_lock (
+ FIB_PROTOCOL_IP4, vrf_id, sm->fib_src_low);
+ }
+
return 0;
}
@@ -401,9 +760,9 @@ get_thread_idx_by_port (u16 e_port)
u32 thread_idx = sm->num_workers;
if (sm->num_workers > 1)
{
- thread_idx =
- sm->first_worker_index +
- sm->workers[(e_port - 1024) / sm->port_per_thread];
+ thread_idx = sm->first_worker_index +
+ sm->workers[(e_port - ED_USER_PORT_OFFSET) /
+ sm->port_per_thread % _vec_len (sm->workers)];
}
return thread_idx;
}
@@ -427,18 +786,17 @@ nat_ed_static_mapping_del_sessions (snat_main_t * sm,
}
if (!addr_only)
{
- if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
- s->out2in.port != e_port ||
- s->in2out.port != l_port ||
- s->nat_proto != protocol)
- continue;
+ if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
+ s->out2in.port != e_port || s->in2out.port != l_port ||
+ s->proto != protocol)
+ continue;
}
- if (is_lb_session (s))
+ if (nat44_ed_is_lb_session (s))
continue;
- if (!snat_is_session_static (s))
+ if (!nat44_ed_is_session_static (s))
continue;
- nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
+ nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
vec_add1 (indexes_to_free, s - tsm->sessions);
if (!addr_only)
break;
@@ -452,118 +810,50 @@ nat_ed_static_mapping_del_sessions (snat_main_t * sm,
vec_free (indexes_to_free);
}
-int
-nat44_ed_reserve_port (ip4_address_t addr, u16 port, nat_protocol_t proto)
+static_always_inline snat_static_mapping_t *
+nat44_ed_sm_lookup (snat_main_t *sm, clib_bihash_kv_16_8_t *kv)
{
- u32 ti = get_thread_idx_by_port (port);
- snat_main_t *sm = &snat_main;
- snat_address_t *a = 0;
- int i;
-
- for (i = 0; i < vec_len (sm->addresses); i++)
+ clib_bihash_kv_16_8_t v;
+ int rc = clib_bihash_search_16_8 (&sm->flow_hash, kv, &v);
+ if (!rc)
{
- a = sm->addresses + i;
-
- if (a->addr.as_u32 != addr.as_u32)
- continue;
-
- switch (proto)
- {
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_port_refcounts[port]) \
- goto done; \
- ++a->busy_##n##_port_refcounts[port]; \
- if (port > 1024) \
- { \
- a->busy_##n##_ports++; \
- a->busy_##n##_ports_per_thread[ti]++; \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- goto done;
- }
-
- return 0;
+ ASSERT (0 == ed_value_get_thread_index (&v));
+ return pool_elt_at_index (sm->static_mappings,
+ ed_value_get_session_index (&v));
}
-
-done:
- return 1;
+ return NULL;
}
-int
-nat44_ed_free_port (ip4_address_t addr, u16 port, nat_protocol_t proto)
+snat_static_mapping_t *
+nat44_ed_sm_o2i_lookup (snat_main_t *sm, ip4_address_t addr, u16 port,
+ u32 fib_index, u8 proto)
{
- u32 ti = get_thread_idx_by_port (port);
- snat_main_t *sm = &snat_main;
- snat_address_t *a = 0;
- int i;
-
- for (i = 0; i < vec_len (sm->addresses); i++)
- {
- a = sm->addresses + i;
-
- if (a->addr.as_u32 != addr.as_u32)
- continue;
-
- switch (proto)
- {
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- --a->busy_##n##_port_refcounts[port]; \
- if (port > 1024) \
- { \
- a->busy_##n##_ports--; \
- a->busy_##n##_ports_per_thread[ti]--; \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- goto done;
- }
-
- return 0;
- }
-
-done:
- return 1;
+ clib_bihash_kv_16_8_t kv;
+ nat44_ed_sm_init_o2i_k (&kv, addr.as_u32, port, fib_index, proto);
+ return nat44_ed_sm_lookup (sm, &kv);
}
-void
-nat44_ed_add_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
- nat_protocol_t proto, u32 vrf_id, u32 sw_if_index,
- u32 flags, ip4_address_t pool_addr, u8 *tag)
+snat_static_mapping_t *
+nat44_ed_sm_i2o_lookup (snat_main_t *sm, ip4_address_t addr, u16 port,
+ u32 fib_index, u8 proto)
{
- snat_static_map_resolve_t *rp;
- snat_main_t *sm = &snat_main;
-
- vec_add2 (sm->to_resolve, rp, 1);
- rp->l_addr.as_u32 = l_addr.as_u32;
- rp->l_port = l_port;
- rp->e_port = e_port;
- rp->sw_if_index = sw_if_index;
- rp->vrf_id = vrf_id;
- rp->proto = proto;
- rp->flags = flags;
- rp->pool_addr = pool_addr;
- rp->tag = vec_dup (tag);
+ clib_bihash_kv_16_8_t kv;
+ nat44_ed_sm_init_i2o_k (&kv, addr.as_u32, port, fib_index, proto);
+ return nat44_ed_sm_lookup (sm, &kv);
}
-int
+static snat_static_mapping_resolve_t *
nat44_ed_get_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
- nat_protocol_t proto, u32 vrf_id, u32 sw_if_index,
- u32 flags, int *out)
+ ip_protocol_t proto, u32 vrf_id, u32 sw_if_index,
+ u32 flags, int *out_idx)
{
- snat_static_map_resolve_t *rp;
+ snat_static_mapping_resolve_t *rp;
snat_main_t *sm = &snat_main;
int i;
- for (i = 0; i < vec_len (sm->to_resolve); i++)
+ for (i = 0; i < vec_len (sm->sm_to_resolve); i++)
{
- rp = sm->to_resolve + i;
+ rp = sm->sm_to_resolve + i;
if (rp->sw_if_index == sw_if_index && rp->vrf_id == vrf_id)
{
@@ -592,27 +882,27 @@ nat44_ed_get_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
{
continue;
}
- if (out)
+ if (out_idx)
{
- *out = i;
+ *out_idx = i;
}
- return 0;
+ return rp;
}
}
- return 1;
+ return NULL;
}
-int
+static int
nat44_ed_del_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
- nat_protocol_t proto, u32 vrf_id, u32 sw_if_index,
+ ip_protocol_t proto, u32 vrf_id, u32 sw_if_index,
u32 flags)
{
snat_main_t *sm = &snat_main;
int i;
- if (!nat44_ed_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
- sw_if_index, flags, &i))
+ if (nat44_ed_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags, &i))
{
- vec_del1 (sm->to_resolve, i);
+ vec_del1 (sm->sm_to_resolve, i);
return 0;
}
return 1;
@@ -639,45 +929,98 @@ nat44_ed_validate_sm_input (u32 flags)
int
nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
- u16 l_port, u16 e_port, nat_protocol_t proto,
+ u16 l_port, u16 e_port, ip_protocol_t proto,
u32 vrf_id, u32 sw_if_index, u32 flags,
ip4_address_t pool_addr, u8 *tag)
{
+ snat_static_mapping_resolve_t *rp;
snat_main_t *sm = &snat_main;
- clib_bihash_kv_8_8_t kv, value;
- snat_interface_t *interface;
- nat44_lb_addr_port_t *local;
- snat_static_mapping_t *m;
- u32 fib_index = ~0;
int rv;
+ if (!sm->enabled)
+ {
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+
rv = nat44_ed_validate_sm_input (flags);
if (rv != 0)
{
return rv;
}
- if (is_sm_addr_only (flags))
+ // interface bound mapping
+ if (is_sm_switch_address (flags))
{
- e_port = l_port = proto = 0;
+ if (nat44_ed_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags, 0))
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+
+ vec_add2 (sm->sm_to_resolve, rp, 1);
+ rp->l_addr.as_u32 = l_addr.as_u32;
+ rp->l_port = l_port;
+ rp->e_port = e_port;
+ rp->sw_if_index = sw_if_index;
+ rp->vrf_id = vrf_id;
+ rp->proto = proto;
+ rp->flags = flags;
+ rp->pool_addr = pool_addr;
+ rp->tag = vec_dup (tag);
+ rp->is_resolved = 0;
+
+ ip4_address_t *first_int_addr =
+ ip4_interface_first_address (sm->ip4_main, sw_if_index, 0);
+ if (!first_int_addr)
+ {
+ return 0;
+ }
+
+ e_addr.as_u32 = first_int_addr->as_u32;
+ rp->is_resolved = 1;
}
- if (is_sm_switch_address (flags))
+ rv = nat44_ed_add_static_mapping_internal (l_addr, e_addr, l_port, e_port,
+ proto, vrf_id, sw_if_index, flags,
+ pool_addr, tag);
+ if ((0 != rv) && is_sm_switch_address (flags))
+ {
+ nat44_ed_del_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags);
+ }
+
+ return rv;
+}
+
+int
+nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
+ u16 l_port, u16 e_port, ip_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags)
+{
+ snat_main_t *sm = &snat_main;
+ int rv;
+
+ if (!sm->enabled)
+ {
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+
+ rv = nat44_ed_validate_sm_input (flags);
+ if (rv != 0)
{
- // this mapping is interface bound
- ip4_address_t *first_int_addr;
+ return rv;
+ }
- // check if this record isn't registered for resolve
- if (!nat44_ed_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
- sw_if_index, flags, 0))
+ // interface bound mapping
+ if (is_sm_switch_address (flags))
+ {
+ if (nat44_ed_del_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags))
{
- return VNET_API_ERROR_VALUE_EXIST;
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
}
- // register record for resolve
- nat44_ed_add_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
- sw_if_index, flags, pool_addr, tag);
- first_int_addr =
+ ip4_address_t *first_int_addr =
ip4_interface_first_address (sm->ip4_main, sw_if_index, 0);
if (!first_int_addr)
{
@@ -688,25 +1031,44 @@ nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
e_addr.as_u32 = first_int_addr->as_u32;
}
+ return nat44_ed_del_static_mapping_internal (l_addr, e_addr, l_port, e_port,
+ proto, vrf_id, flags);
+}
+
+static int
+nat44_ed_add_static_mapping_internal (ip4_address_t l_addr,
+ ip4_address_t e_addr, u16 l_port,
+ u16 e_port, ip_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags,
+ ip4_address_t pool_addr, u8 *tag)
+{
+ snat_main_t *sm = &snat_main;
+ nat44_lb_addr_port_t *local;
+ snat_static_mapping_t *m;
+ u32 fib_index = ~0;
+
+ if (is_sm_addr_only (flags))
+ {
+ e_port = l_port = proto = 0;
+ }
+
if (is_sm_identity_nat (flags))
{
l_port = e_port;
l_addr.as_u32 = e_addr.as_u32;
}
- // fib index 0
- init_nat_k (&kv, e_addr, e_port, 0, proto);
-
- if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value.value);
+ // case:
+ // adding local identity nat record for different vrf table
+
if (!is_sm_identity_nat (m->flags))
{
return VNET_API_ERROR_VALUE_EXIST;
}
- // case:
- // adding local identity nat record for different vrf table
pool_foreach (local, m->locals)
{
if (local->vrf_id == vrf_id)
@@ -721,9 +1083,8 @@ nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
local->fib_index = fib_table_find_or_create_and_lock (
FIB_PROTOCOL_IP4, vrf_id, sm->fib_src_low);
- init_nat_kv (&kv, m->local_addr, m->local_port, local->fib_index,
- m->proto, 0, m - sm->static_mappings);
- clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
+ nat44_ed_sm_i2o_add (sm, m, m->local_addr, m->local_port,
+ local->fib_index, m->proto);
return 0;
}
@@ -745,28 +1106,12 @@ nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
// identity nat supports multiple records in local mapping
if (!(is_sm_out2in_only (flags) || is_sm_identity_nat (flags)))
{
- init_nat_k (&kv, l_addr, l_port, fib_index, proto);
- if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
+ if (nat44_ed_sm_i2o_lookup (sm, l_addr, l_port, fib_index, proto))
{
return VNET_API_ERROR_VALUE_EXIST;
}
}
- if (!(is_sm_out2in_only (flags) || is_sm_addr_only (flags) ||
- sm->static_mapping_only))
- {
- if (nat44_ed_reserve_port (e_addr, e_port, proto))
- {
- // remove resolve record
- if (is_sm_switch_address (flags) && !is_sm_identity_nat (flags))
- {
- nat44_ed_del_resolve_record (l_addr, l_port, e_port, proto,
- vrf_id, sw_if_index, flags);
- }
- return VNET_API_ERROR_NO_SUCH_ENTRY;
- }
- }
-
pool_get (sm->static_mappings, m);
clib_memset (m, 0, sizeof (*m));
@@ -774,13 +1119,9 @@ nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
m->local_addr = l_addr;
m->external_addr = e_addr;
+ m->pool_addr = pool_addr;
m->tag = vec_dup (tag);
- if (is_sm_exact_address (flags) && is_sm_twice_nat (flags))
- {
- m->pool_addr = pool_addr;
- }
-
if (!is_sm_addr_only (flags))
{
m->local_port = l_port;
@@ -803,14 +1144,11 @@ nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
if (!is_sm_out2in_only (flags))
{
- init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto, 0,
- m - sm->static_mappings);
- clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
+ nat44_ed_sm_i2o_add (sm, m, m->local_addr, m->local_port, fib_index,
+ m->proto);
}
- init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
- m - sm->static_mappings);
- clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1);
+ nat44_ed_sm_o2i_add (sm, m, m->external_addr, m->external_port, 0, m->proto);
if (sm->num_workers > 1)
{
@@ -824,79 +1162,29 @@ nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
vec_add1 (m->workers, worker_index);
}
- if (is_sm_identity_nat (flags) || !is_sm_addr_only (flags))
- return 0;
-
- pool_foreach (interface, sm->interfaces)
- {
- if (nat_interface_is_inside (interface))
- continue;
-
- snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, 1);
- break;
- }
-
- pool_foreach (interface, sm->output_feature_interfaces)
- {
- if (nat_interface_is_inside (interface))
- continue;
-
- snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, 1);
- break;
- }
+ nat44_ed_add_del_interface_fib_reg_entries (e_addr, 1);
return 0;
}
-int
-nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
- u16 l_port, u16 e_port, nat_protocol_t proto,
- u32 vrf_id, u32 sw_if_index, u32 flags)
+static int
+nat44_ed_del_static_mapping_internal (ip4_address_t l_addr,
+ ip4_address_t e_addr, u16 l_port,
+ u16 e_port, ip_protocol_t proto,
+ u32 vrf_id, u32 flags)
{
snat_main_per_thread_data_t *tsm;
snat_main_t *sm = &snat_main;
- clib_bihash_kv_8_8_t kv, value;
- snat_interface_t *interface;
nat44_lb_addr_port_t *local;
snat_static_mapping_t *m;
u32 fib_index = ~0;
- int rv;
-
- rv = nat44_ed_validate_sm_input (flags);
- if (rv != 0)
- {
- return rv;
- }
if (is_sm_addr_only (flags))
{
e_port = l_port = proto = 0;
}
- if (is_sm_switch_address (flags))
- {
- // this mapping is interface bound
- ip4_address_t *first_int_addr;
-
- // delete record registered for resolve
- if (nat44_ed_del_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
- sw_if_index, flags))
- {
- return VNET_API_ERROR_NO_SUCH_ENTRY;
- }
-
- first_int_addr =
- ip4_interface_first_address (sm->ip4_main, sw_if_index, 0);
- if (!first_int_addr)
- {
- // dhcp resolution required
- return 0;
- }
-
- e_addr.as_u32 = first_int_addr->as_u32;
- }
-
if (is_sm_identity_nat (flags))
{
l_port = e_port;
@@ -904,24 +1192,17 @@ nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
}
// fib index 0
- init_nat_k (&kv, e_addr, e_port, 0, proto);
-
- if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
+ if (!m)
{
- if (is_sm_switch_address (flags))
- {
- return 0;
- }
return VNET_API_ERROR_NO_SUCH_ENTRY;
}
- m = pool_elt_at_index (sm->static_mappings, value.value);
-
if (is_sm_identity_nat (flags))
{
- u8 failure = 1;
+ u8 found = 0;
- if (!is_sm_switch_address (flags))
+ if (vrf_id == ~0)
{
vrf_id = sm->inside_vrf_id;
}
@@ -933,11 +1214,11 @@ nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
local = pool_elt_at_index (m->locals, local - m->locals);
fib_index = local->fib_index;
pool_put (m->locals, local);
- failure = 0;
+ found = 1;
}
}
- if (failure)
+ if (!found)
{
return VNET_API_ERROR_NO_SUCH_ENTRY;
}
@@ -947,66 +1228,38 @@ nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
fib_index = m->fib_index;
}
- if (!(is_sm_out2in_only (flags) || is_sm_addr_only (flags) ||
- sm->static_mapping_only))
+ if (!is_sm_out2in_only (flags))
{
- if (nat44_ed_free_port (e_addr, e_port, proto))
- {
- return VNET_API_ERROR_INVALID_VALUE;
- }
+ nat44_ed_sm_i2o_del (sm, l_addr, l_port, fib_index, proto);
}
- if (!is_sm_out2in_only (flags))
+ // delete sessions for static mapping
+ if (sm->num_workers > 1)
{
- init_nat_k (&kv, l_addr, l_port, fib_index, proto);
- clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0);
+ tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
}
-
- if (!sm->static_mapping_only || sm->static_mapping_connection_tracking)
+ else
{
- // delete sessions for static mapping
- if (sm->num_workers > 1)
- tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
- else
- tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
-
- nat_ed_static_mapping_del_sessions (
- sm, tsm, m->local_addr, m->local_port, m->proto, fib_index,
- is_sm_addr_only (flags), e_addr, e_port);
+ tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
}
- fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
-
- if (pool_elts (m->locals))
- return 0;
-
- // fib_index 0
- init_nat_k (&kv, e_addr, e_port, 0, proto);
- clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0);
+ nat_ed_static_mapping_del_sessions (sm, tsm, m->local_addr, m->local_port,
+ m->proto, fib_index,
+ is_sm_addr_only (flags), e_addr, e_port);
- vec_free (m->tag);
- vec_free (m->workers);
- pool_put (sm->static_mappings, m);
-
- if (is_sm_identity_nat (flags) || !is_sm_addr_only (flags))
- return 0;
+ fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
- pool_foreach (interface, sm->interfaces)
+ if (!pool_elts (m->locals))
{
- if (nat_interface_is_inside (interface))
- continue;
-
- snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, 0);
- break;
- }
+ // this is last record remove all required stuff
+ // fib_index 0
+ nat44_ed_sm_o2i_del (sm, e_addr, e_port, 0, proto);
- pool_foreach (interface, sm->output_feature_interfaces)
- {
- if (nat_interface_is_inside (interface))
- continue;
+ vec_free (m->tag);
+ vec_free (m->workers);
+ pool_put (sm->static_mappings, m);
- snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, 0);
- break;
+ nat44_ed_add_del_interface_fib_reg_entries (e_addr, 0);
}
return 0;
@@ -1014,66 +1267,59 @@ nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
int
nat44_ed_add_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
- nat_protocol_t proto,
+ ip_protocol_t proto,
nat44_lb_addr_port_t *locals, u32 flags,
u8 *tag, u32 affinity)
{
snat_main_t *sm = &snat_main;
snat_static_mapping_t *m;
- clib_bihash_kv_8_8_t kv, value;
snat_address_t *a = 0;
nat44_lb_addr_port_t *local;
uword *bitmap = 0;
+ int rc = 0;
int i;
- init_nat_k (&kv, e_addr, e_port, 0, proto);
- if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
- m = 0;
- else
- m = pool_elt_at_index (sm->static_mappings, value.value);
+ if (!sm->enabled)
+ {
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+
+ m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
if (m)
- return VNET_API_ERROR_VALUE_EXIST;
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
if (vec_len (locals) < 2)
- return VNET_API_ERROR_INVALID_VALUE;
+ {
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
- /* Find external address in allocated addresses and reserve port for
- address and port pair mapping when dynamic translations enabled */
- if (!(sm->static_mapping_only || is_sm_out2in_only (flags)))
+ if (!is_sm_out2in_only (flags))
{
+ /* Find external address in allocated addresses and reserve port for
+ address and port pair mapping when dynamic translations enabled */
for (i = 0; i < vec_len (sm->addresses); i++)
{
if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
{
- a = sm->addresses + i;
/* External port must be unused */
- switch (proto)
+ a = sm->addresses + i;
+ if (nat44_ed_sm_o2i_lookup (sm, a->addr, e_port, 0, proto))
{
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_port_refcounts[e_port]) \
- return VNET_API_ERROR_INVALID_VALUE; \
- ++a->busy_##n##_port_refcounts[e_port]; \
- if (e_port > 1024) \
- { \
- a->busy_##n##_ports++; \
- a->busy_##n##_ports_per_thread[get_thread_idx_by_port (e_port)]++; \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- return VNET_API_ERROR_INVALID_VALUE_2;
+ return VNET_API_ERROR_VALUE_EXIST;
}
break;
}
}
- /* External address must be allocated */
+ // external address must be allocated
if (!a)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
}
pool_get (sm->static_mappings, m);
@@ -1093,11 +1339,10 @@ nat44_ed_add_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
else
m->affinity_per_service_list_head_index = ~0;
- init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
- m - sm->static_mappings);
- if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1))
+ if (nat44_ed_sm_o2i_add (sm, m, m->external_addr, m->external_port, 0,
+ m->proto))
{
- nat_elog_err (sm, "static_mapping_by_external key add failed");
+ nat_log_err ("sm o2i key add failed");
return VNET_API_ERROR_UNSPECIFIED;
}
@@ -1107,10 +1352,17 @@ nat44_ed_add_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
FIB_PROTOCOL_IP4, locals[i].vrf_id, sm->fib_src_low);
if (!is_sm_out2in_only (flags))
{
- init_nat_kv (&kv, locals[i].addr, locals[i].port,
- locals[i].fib_index, m->proto, 0,
- m - sm->static_mappings);
- clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
+ if (nat44_ed_sm_i2o_add (sm, m, locals[i].addr, locals[i].port, 0,
+ proto))
+ {
+ nat_log_err ("sm i2o key add failed");
+ rc = VNET_API_ERROR_UNSPECIFIED;
+ // here we continue with add operation so that it can be safely
+ // reversed in delete path - otherwise we'd have to track what
+ // we've done and deal with partial cleanups and since bihash
+ // adds are (extremely improbable) the only points of failure,
+ // it's easier to just do it this way
+ }
}
locals[i].prefix = (i == 0) ?
locals[i].probability :
@@ -1137,68 +1389,36 @@ nat44_ed_add_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
}
}
- return 0;
+ return rc;
}
int
nat44_ed_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
- nat_protocol_t proto, u32 flags)
+ ip_protocol_t proto, u32 flags)
{
snat_main_t *sm = &snat_main;
snat_static_mapping_t *m;
- clib_bihash_kv_8_8_t kv, value;
- snat_address_t *a = 0;
nat44_lb_addr_port_t *local;
snat_main_per_thread_data_t *tsm;
snat_session_t *s;
- int i;
- init_nat_k (&kv, e_addr, e_port, 0, proto);
- if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
- m = 0;
- else
- m = pool_elt_at_index (sm->static_mappings, value.value);
+ if (!sm->enabled)
+ {
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+ m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
if (!m)
return VNET_API_ERROR_NO_SUCH_ENTRY;
if (!is_sm_lb (m->flags))
return VNET_API_ERROR_INVALID_VALUE;
- /* Free external address port */
- if (!(sm->static_mapping_only || is_sm_out2in_only (flags)))
- {
- for (i = 0; i < vec_len (sm->addresses); i++)
- {
- if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
- {
- a = sm->addresses + i;
- switch (proto)
- {
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- --a->busy_##n##_port_refcounts[e_port]; \
- if (e_port > 1024) \
- { \
- a->busy_##n##_ports--; \
- a->busy_##n##_ports_per_thread[get_thread_idx_by_port (e_port)]--; \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- return VNET_API_ERROR_INVALID_VALUE_2;
- }
- break;
- }
- }
- }
-
- init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
- if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0))
+ if (nat44_ed_sm_o2i_del (sm, m->external_addr, m->external_port, 0,
+ m->proto))
{
- nat_elog_err (sm, "static_mapping_by_external key del failed");
+ nat_log_err ("sm o2i key del failed");
return VNET_API_ERROR_UNSPECIFIED;
}
@@ -1207,12 +1427,11 @@ nat44_ed_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
if (!is_sm_out2in_only (flags))
{
- init_nat_k (&kv, local->addr, local->port, local->fib_index,
- m->proto);
- if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
+ if (nat44_ed_sm_i2o_del (sm, local->addr, local->port,
+ local->fib_index, m->proto))
{
- nat_elog_err (sm, "static_mapping_by_local key del failed");
- return VNET_API_ERROR_UNSPECIFIED;
+ nat_log_err ("sm i2o key del failed");
+ // For the same reasons as above
}
}
@@ -1231,14 +1450,14 @@ nat44_ed_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
/* Delete sessions */
pool_foreach (s, tsm->sessions)
{
- if (!(is_lb_session (s)))
+ if (!(nat44_ed_is_lb_session (s)))
continue;
if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
s->in2out.port != local->port)
continue;
- nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
+ nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
}
}
@@ -1259,12 +1478,11 @@ nat44_ed_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
int
nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
ip4_address_t l_addr, u16 l_port,
- nat_protocol_t proto, u32 vrf_id,
+ ip_protocol_t proto, u32 vrf_id,
u8 probability, u8 is_add)
{
snat_main_t *sm = &snat_main;
snat_static_mapping_t *m = 0;
- clib_bihash_kv_8_8_t kv, value;
nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
snat_main_per_thread_data_t *tsm;
snat_session_t *s;
@@ -1272,15 +1490,22 @@ nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
uword *bitmap = 0;
int i;
- init_nat_k (&kv, e_addr, e_port, 0, proto);
- if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
- m = pool_elt_at_index (sm->static_mappings, value.value);
+ if (!sm->enabled)
+ {
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+
+ m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
if (!m)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
if (!is_sm_lb (m->flags))
- return VNET_API_ERROR_INVALID_VALUE;
+ {
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
pool_foreach (local, m->locals)
{
@@ -1295,7 +1520,9 @@ nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
if (is_add)
{
if (match_local)
- return VNET_API_ERROR_VALUE_EXIST;
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
pool_get (m->locals, local);
clib_memset (local, 0, sizeof (*local));
@@ -1309,10 +1536,13 @@ nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
if (!is_sm_out2in_only (m->flags))
{
- init_nat_kv (&kv, l_addr, l_port, local->fib_index, proto, 0,
- m - sm->static_mappings);
- if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1))
- nat_elog_err (sm, "static_mapping_by_local key add failed");
+ if (nat44_ed_sm_i2o_add (sm, m, l_addr, l_port, local->fib_index,
+ proto))
+ {
+ nat_log_err ("sm i2o key add failed");
+ pool_put (m->locals, local);
+ return VNET_API_ERROR_UNSPECIFIED;
+ }
}
}
else
@@ -1328,9 +1558,9 @@ nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
if (!is_sm_out2in_only (m->flags))
{
- init_nat_k (&kv, l_addr, l_port, match_local->fib_index, proto);
- if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
- nat_elog_err (sm, "static_mapping_by_local key del failed");
+ if (nat44_ed_sm_i2o_del (sm, l_addr, l_port, match_local->fib_index,
+ proto))
+ nat_log_err ("sm i2o key del failed");
}
if (sm->num_workers > 1)
@@ -1347,15 +1577,15 @@ nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
/* Delete sessions */
pool_foreach (s, tsm->sessions) {
- if (!(is_lb_session (s)))
- continue;
+ if (!(nat44_ed_is_lb_session (s)))
+ continue;
- if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
- s->in2out.port != match_local->port)
- continue;
+ if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
+ s->in2out.port != match_local->port)
+ continue;
- nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
- nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
+ nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
+ nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
}
pool_put (m->locals, match_local);
@@ -1396,116 +1626,6 @@ nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
return 0;
}
-int
-snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
- u8 twice_nat)
-{
- snat_address_t *a = 0;
- snat_session_t *ses;
- u32 *ses_to_be_removed = 0, *ses_index;
- snat_main_per_thread_data_t *tsm;
- snat_static_mapping_t *m;
- snat_interface_t *interface;
- int i;
- snat_address_t *addresses =
- twice_nat ? sm->twice_nat_addresses : sm->addresses;
-
- /* Find SNAT address */
- for (i = 0; i < vec_len (addresses); i++)
- {
- if (addresses[i].addr.as_u32 == addr.as_u32)
- {
- a = addresses + i;
- break;
- }
- }
- if (!a)
- {
- nat_log_err ("no such address");
- return VNET_API_ERROR_NO_SUCH_ENTRY;
- }
-
- if (delete_sm)
- {
- pool_foreach (m, sm->static_mappings)
- {
- if (m->external_addr.as_u32 == addr.as_u32)
- {
- nat44_ed_del_static_mapping (m->local_addr, m->external_addr,
- m->local_port, m->external_port,
- m->proto, m->vrf_id, ~0, m->flags);
- }
- }
- }
- else
- {
- /* Check if address is used in some static mapping */
- if (is_snat_address_used_in_static_mapping (sm, addr))
- {
- nat_log_err ("address used in static mapping");
- return VNET_API_ERROR_UNSPECIFIED;
- }
- }
-
- if (a->fib_index != ~0)
- fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
-
- /* Delete sessions using address */
- if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
- {
- vec_foreach (tsm, sm->per_thread_data)
- {
- pool_foreach (ses, tsm->sessions) {
- if (ses->out2in.addr.as_u32 == addr.as_u32)
- {
- nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0);
- vec_add1 (ses_to_be_removed, ses - tsm->sessions);
- }
- }
-
- vec_foreach (ses_index, ses_to_be_removed)
- {
- ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
- nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
- }
-
- vec_free (ses_to_be_removed);
- }
- }
-
-#define _(N, i, n, s) \
- vec_free (a->busy_##n##_ports_per_thread);
- foreach_nat_protocol
-#undef _
-
- if (twice_nat)
- {
- vec_del1 (sm->twice_nat_addresses, i);
- return 0;
- }
- else vec_del1 (sm->addresses, i);
-
- /* Delete external address from FIB */
- pool_foreach (interface, sm->interfaces)
- {
- if (nat_interface_is_inside (interface))
- continue;
-
- snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
- break;
- }
- pool_foreach (interface, sm->output_feature_interfaces)
- {
- if (nat_interface_is_inside (interface))
- continue;
-
- snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
- break;
- }
-
- return 0;
-}
-
void
expire_per_vrf_sessions (u32 fib_index)
{
@@ -1515,19 +1635,19 @@ expire_per_vrf_sessions (u32 fib_index)
vec_foreach (tsm, sm->per_thread_data)
{
- vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
- {
- if ((per_vrf_sessions->rx_fib_index == fib_index) ||
- (per_vrf_sessions->tx_fib_index == fib_index))
- {
- per_vrf_sessions->expired = 1;
- }
- }
+ pool_foreach (per_vrf_sessions, tsm->per_vrf_sessions_pool)
+ {
+ if ((per_vrf_sessions->rx_fib_index == fib_index) ||
+ (per_vrf_sessions->tx_fib_index == fib_index))
+ {
+ per_vrf_sessions->expired = 1;
+ }
+ }
}
}
void
-update_per_vrf_sessions_vec (u32 fib_index, int is_del)
+update_per_vrf_sessions_pool (u32 fib_index, int is_del)
{
snat_main_t *sm = &snat_main;
nat_fib_t *fib;
@@ -1563,10 +1683,10 @@ update_per_vrf_sessions_vec (u32 fib_index, int is_del)
}
}
-static_always_inline nat_outside_fib_t *
-nat44_ed_get_outside_fib (nat_outside_fib_t *outside_fibs, u32 fib_index)
+static_always_inline nat_fib_t *
+nat44_ed_get_outside_fib (nat_fib_t *outside_fibs, u32 fib_index)
{
- nat_outside_fib_t *f;
+ nat_fib_t *f;
vec_foreach (f, outside_fibs)
{
if (f->fib_index == fib_index)
@@ -1597,10 +1717,8 @@ nat44_ed_add_interface (u32 sw_if_index, u8 is_inside)
const char *del_feature_name, *feature_name;
snat_main_t *sm = &snat_main;
- nat_outside_fib_t *outside_fib;
- snat_static_mapping_t *m;
+ nat_fib_t *outside_fib;
snat_interface_t *i;
- snat_address_t *ap;
u32 fib_index;
int rv;
@@ -1619,8 +1737,8 @@ nat44_ed_add_interface (u32 sw_if_index, u8 is_inside)
i = nat44_ed_get_interface (sm->interfaces, sw_if_index);
if (i)
{
- if ((nat_interface_is_inside (i) && is_inside) ||
- (nat_interface_is_outside (i) && !is_inside))
+ if ((nat44_ed_is_interface_inside (i) && is_inside) ||
+ (nat44_ed_is_interface_outside (i) && !is_inside))
{
return 0;
}
@@ -1672,7 +1790,7 @@ nat44_ed_add_interface (u32 sw_if_index, u8 is_inside)
fib_index =
fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
- update_per_vrf_sessions_vec (fib_index, 0 /*is_del*/);
+ update_per_vrf_sessions_pool (fib_index, 0 /*is_del*/);
if (!is_inside)
{
@@ -1681,28 +1799,19 @@ nat44_ed_add_interface (u32 sw_if_index, u8 is_inside)
outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
if (outside_fib)
{
- outside_fib->refcount++;
+ outside_fib->ref_count++;
}
else
{
vec_add2 (sm->outside_fibs, outside_fib, 1);
outside_fib->fib_index = fib_index;
- outside_fib->refcount = 1;
+ outside_fib->ref_count = 1;
}
- vec_foreach (ap, sm->addresses)
- {
- snat_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, 1);
- }
- pool_foreach (m, sm->static_mappings)
- {
- if (!(is_sm_addr_only (m->flags)) ||
- (m->local_addr.as_u32 == m->external_addr.as_u32))
- {
- continue;
- }
- snat_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index, 1);
- }
+ nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 1);
+ nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 1);
+
+ nat44_ed_bind_if_addr_to_nat_addr (sw_if_index);
}
else
{
@@ -1718,10 +1827,8 @@ nat44_ed_del_interface (u32 sw_if_index, u8 is_inside)
const char *del_feature_name, *feature_name;
snat_main_t *sm = &snat_main;
- nat_outside_fib_t *outside_fib;
- snat_static_mapping_t *m;
+ nat_fib_t *outside_fib;
snat_interface_t *i;
- snat_address_t *ap;
u32 fib_index;
int rv;
@@ -1738,7 +1845,7 @@ nat44_ed_del_interface (u32 sw_if_index, u8 is_inside)
return VNET_API_ERROR_NO_SUCH_ENTRY;
}
- if (nat_interface_is_inside (i) && nat_interface_is_outside (i))
+ if (nat44_ed_is_interface_inside (i) && nat44_ed_is_interface_outside (i))
{
if (sm->num_workers > 1)
{
@@ -1798,34 +1905,22 @@ nat44_ed_del_interface (u32 sw_if_index, u8 is_inside)
fib_index =
fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
- update_per_vrf_sessions_vec (fib_index, 1 /*is_del*/);
+ update_per_vrf_sessions_pool (fib_index, 1 /*is_del*/);
if (!is_inside)
{
outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
if (outside_fib)
{
- outside_fib->refcount--;
- if (!outside_fib->refcount)
+ outside_fib->ref_count--;
+ if (!outside_fib->ref_count)
{
vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
}
}
- vec_foreach (ap, sm->addresses)
- {
- snat_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, 0);
- }
-
- pool_foreach (m, sm->static_mappings)
- {
- if (!(is_sm_addr_only (m->flags)) ||
- (m->local_addr.as_u32 == m->external_addr.as_u32))
- {
- continue;
- }
- snat_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index, 0);
- }
+ nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 0);
+ nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 0);
}
return 0;
@@ -1836,10 +1931,8 @@ nat44_ed_add_output_interface (u32 sw_if_index)
{
snat_main_t *sm = &snat_main;
- nat_outside_fib_t *outside_fib;
- snat_static_mapping_t *m;
+ nat_fib_t *outside_fib;
snat_interface_t *i;
- snat_address_t *ap;
u32 fib_index;
int rv;
@@ -1911,34 +2004,24 @@ nat44_ed_add_output_interface (u32 sw_if_index)
fib_index =
fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
- update_per_vrf_sessions_vec (fib_index, 0 /*is_del*/);
+ update_per_vrf_sessions_pool (fib_index, 0 /*is_del*/);
outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
if (outside_fib)
{
- outside_fib->refcount++;
+ outside_fib->ref_count++;
}
else
{
vec_add2 (sm->outside_fibs, outside_fib, 1);
outside_fib->fib_index = fib_index;
- outside_fib->refcount = 1;
+ outside_fib->ref_count = 1;
}
- vec_foreach (ap, sm->addresses)
- {
- snat_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, 1);
- }
+ nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 1);
+ nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 1);
- pool_foreach (m, sm->static_mappings)
- {
- if (!((is_sm_addr_only (m->flags))) ||
- (m->local_addr.as_u32 == m->external_addr.as_u32))
- {
- continue;
- }
- snat_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index, 1);
- }
+ nat44_ed_bind_if_addr_to_nat_addr (sw_if_index);
return 0;
}
@@ -1948,10 +2031,8 @@ nat44_ed_del_output_interface (u32 sw_if_index)
{
snat_main_t *sm = &snat_main;
- nat_outside_fib_t *outside_fib;
- snat_static_mapping_t *m;
+ nat_fib_t *outside_fib;
snat_interface_t *i;
- snat_address_t *ap;
u32 fib_index;
int rv;
@@ -2013,32 +2094,20 @@ nat44_ed_del_output_interface (u32 sw_if_index)
fib_index =
fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
- update_per_vrf_sessions_vec (fib_index, 1 /*is_del*/);
+ update_per_vrf_sessions_pool (fib_index, 1 /*is_del*/);
outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
if (outside_fib)
{
- outside_fib->refcount--;
- if (!outside_fib->refcount)
+ outside_fib->ref_count--;
+ if (!outside_fib->ref_count)
{
vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
}
}
- vec_foreach (ap, sm->addresses)
- {
- snat_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, 0);
- }
-
- pool_foreach (m, sm->static_mappings)
- {
- if (!((is_sm_addr_only (m->flags))) ||
- (m->local_addr.as_u32 == m->external_addr.as_u32))
- {
- continue;
- }
- snat_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index, 0);
- }
+ nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 0);
+ nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 0);
return 0;
}
@@ -2064,7 +2133,7 @@ snat_set_workers (uword * bitmap)
j++;
}
- sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
+ sm->port_per_thread = (65536 - ED_USER_PORT_OFFSET) / _vec_len (sm->workers);
return 0;
}
@@ -2074,17 +2143,28 @@ nat44_ed_set_frame_queue_nelts (u32 frame_queue_nelts)
{
fail_if_enabled ();
snat_main_t *sm = &snat_main;
+
+ if ((sm->fq_in2out_index != ~0) || (sm->fq_out2in_index != ~0) ||
+ (sm->fq_in2out_output_index != ~0))
+ {
+ // frame queu nelts can be set only before first
+ // call to nat44_plugin_enable after that it
+ // doesn't make sense
+ nat_log_err ("Frame queue was already initialized. "
+ "Change is not possible");
+ return 1;
+ }
+
sm->frame_queue_nelts = frame_queue_nelts;
return 0;
}
static void
-snat_update_outside_fib (ip4_main_t * im, uword opaque,
- u32 sw_if_index, u32 new_fib_index,
- u32 old_fib_index)
+nat44_ed_update_outside_fib_cb (ip4_main_t *im, uword opaque, u32 sw_if_index,
+ u32 new_fib_index, u32 old_fib_index)
{
snat_main_t *sm = &snat_main;
- nat_outside_fib_t *outside_fib;
+ nat_fib_t *outside_fib;
snat_interface_t *i;
u8 is_add = 1;
u8 match = 0;
@@ -2096,20 +2176,20 @@ snat_update_outside_fib (ip4_main_t * im, uword opaque,
}
pool_foreach (i, sm->interfaces)
- {
+ {
if (i->sw_if_index == sw_if_index)
{
- if (!(nat_interface_is_outside (i)))
+ if (!(nat44_ed_is_interface_outside (i)))
return;
match = 1;
}
}
pool_foreach (i, sm->output_feature_interfaces)
- {
+ {
if (i->sw_if_index == sw_if_index)
{
- if (!(nat_interface_is_outside (i)))
+ if (!(nat44_ed_is_interface_outside (i)))
return;
match = 1;
}
@@ -2119,54 +2199,45 @@ snat_update_outside_fib (ip4_main_t * im, uword opaque,
return;
vec_foreach (outside_fib, sm->outside_fibs)
- {
- if (outside_fib->fib_index == old_fib_index)
- {
- outside_fib->refcount--;
- if (!outside_fib->refcount)
- vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
- break;
- }
- }
+ {
+ if (outside_fib->fib_index == old_fib_index)
+ {
+ outside_fib->ref_count--;
+ if (!outside_fib->ref_count)
+ vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
+ break;
+ }
+ }
vec_foreach (outside_fib, sm->outside_fibs)
- {
- if (outside_fib->fib_index == new_fib_index)
- {
- outside_fib->refcount++;
- is_add = 0;
- break;
- }
- }
+ {
+ if (outside_fib->fib_index == new_fib_index)
+ {
+ outside_fib->ref_count++;
+ is_add = 0;
+ break;
+ }
+ }
if (is_add)
{
vec_add2 (sm->outside_fibs, outside_fib, 1);
- outside_fib->refcount = 1;
+ outside_fib->ref_count = 1;
outside_fib->fib_index = new_fib_index;
}
}
-static void
-snat_update_outside_fib (ip4_main_t * im, uword opaque,
- u32 sw_if_index, u32 new_fib_index,
- u32 old_fib_index);
+static void nat44_ed_update_outside_fib_cb (ip4_main_t *im, uword opaque,
+ u32 sw_if_index, u32 new_fib_index,
+ u32 old_fib_index);
-static void
-snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
- uword opaque,
- u32 sw_if_index,
- ip4_address_t * address,
- u32 address_length,
- u32 if_address_index, u32 is_delete);
+static void nat44_ed_add_del_interface_address_cb (
+ ip4_main_t *im, uword opaque, u32 sw_if_index, ip4_address_t *address,
+ u32 address_length, u32 if_address_index, u32 is_delete);
-static void
-nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
- uword opaque,
- u32 sw_if_index,
- ip4_address_t * address,
- u32 address_length,
- u32 if_address_index, u32 is_delete);
+static void nat44_ed_add_del_static_mapping_cb (
+ ip4_main_t *im, uword opaque, u32 sw_if_index, ip4_address_t *address,
+ u32 address_length, u32 if_address_index, u32 is_delete);
void
test_key_calc_split ()
@@ -2188,8 +2259,8 @@ test_key_calc_split ()
u32 thread_index = 3000000001;
u32 session_index = 3000000221;
clib_bihash_kv_16_8_t kv;
- init_ed_kv (&kv, l_addr, l_port, r_addr, r_port, fib_index, proto,
- thread_index, session_index);
+ init_ed_kv (&kv, l_addr.as_u32, l_port, r_addr.as_u32, r_port, fib_index,
+ proto, thread_index, session_index);
ip4_address_t l_addr2;
ip4_address_t r_addr2;
clib_memset (&l_addr2, 0, sizeof (l_addr2));
@@ -2208,16 +2279,6 @@ test_key_calc_split ()
ASSERT (fib_index == fib_index2);
ASSERT (thread_index == ed_value_get_thread_index (&kv));
ASSERT (session_index == ed_value_get_session_index (&kv));
-
- fib_index = 7001;
- proto = 5;
- nat_protocol_t proto3 = ~0;
- u64 key = calc_nat_key (l_addr, l_port, fib_index, proto);
- split_nat_key (key, &l_addr2, &l_port2, &fib_index2, &proto3);
- ASSERT (l_addr.as_u32 == l_addr2.as_u32);
- ASSERT (l_port == l_port2);
- ASSERT (proto == proto3);
- ASSERT (fib_index == fib_index2);
}
static clib_error_t *
@@ -2237,21 +2298,6 @@ nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
-void
-nat44_set_node_indexes (snat_main_t * sm, vlib_main_t * vm)
-{
- vlib_node_t *node;
-
- node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
- sm->out2in_node_index = node->index;
-
- node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
- sm->in2out_node_index = node->index;
-
- node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output");
- sm->in2out_output_node_index = node->index;
-}
-
#define nat_validate_simple_counter(c, i) \
do \
{ \
@@ -2295,12 +2341,8 @@ nat_init (vlib_main_t * vm)
clib_memset (sm, 0, sizeof (*sm));
- // required
- sm->vnet_main = vnet_get_main ();
// convenience
sm->ip4_main = &ip4_main;
- sm->api_main = vlibapi_get_main ();
- sm->ip4_lookup_main = &ip4_main.lookup_main;
// frame queue indices used for handoff
sm->fq_out2in_index = ~0;
@@ -2309,15 +2351,13 @@ nat_init (vlib_main_t * vm)
sm->log_level = NAT_LOG_ERROR;
- nat44_set_node_indexes (sm, vm);
-
sm->log_class = vlib_log_register_class ("nat", 0);
nat_ipfix_logging_init (vm);
nat_init_simple_counter (sm->total_sessions, "total-sessions",
"/nat44-ed/total-sessions");
- sm->max_cfg_sessions_gauge = stat_segment_new_entry (
- (u8 *) "/nat44-ed/max-cfg-sessions", STAT_DIR_TYPE_SCALAR_INDEX);
+ sm->max_cfg_sessions_gauge =
+ vlib_stats_add_gauge ("/nat44-ed/max-cfg-sessions");
#define _(x) \
nat_init_simple_counter (sm->counters.fastpath.in2out.x, #x, \
@@ -2344,7 +2384,7 @@ nat_init (vlib_main_t * vm)
}
}
num_threads = tm->n_vlib_mains - 1;
- sm->port_per_thread = 0xffff - 1024;
+ sm->port_per_thread = 65536 - ED_USER_PORT_OFFSET;
vec_validate (sm->per_thread_data, num_threads);
/* Use all available workers by default */
@@ -2361,13 +2401,13 @@ nat_init (vlib_main_t * vm)
}
/* callbacks to call when interface address changes. */
- cbi.function = snat_ip4_add_del_interface_address_cb;
+ cbi.function = nat44_ed_add_del_interface_address_cb;
vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
- cbi.function = nat_ip4_add_del_addr_only_sm_cb;
+ cbi.function = nat44_ed_add_del_static_mapping_cb;
vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
/* callbacks to call when interface to table biding changes */
- cbt.function = snat_update_outside_fib;
+ cbt.function = nat44_ed_update_outside_fib_cb;
vec_add1 (sm->ip4_main->table_bind_callbacks, cbt);
sm->fib_src_low =
@@ -2392,26 +2432,15 @@ nat44_plugin_enable (nat44_config_t c)
fail_if_enabled ();
- if (c.static_mapping_only && !c.connection_tracking)
- {
- nat_log_err ("unsupported combination of configuration");
- return 1;
- }
-
- sm->static_mapping_only = c.static_mapping_only;
- sm->static_mapping_connection_tracking = c.connection_tracking;
-
sm->forwarding_enabled = 0;
sm->mss_clamping = 0;
- sm->pat = (!c.static_mapping_only ||
- (c.static_mapping_only && c.connection_tracking));
if (!c.sessions)
c.sessions = 63 * 1024;
sm->max_translations_per_thread = c.sessions;
- stat_segment_set_state_counter (sm->max_cfg_sessions_gauge,
- sm->max_translations_per_thread);
+ vlib_stats_set_gauge (sm->max_cfg_sessions_gauge,
+ sm->max_translations_per_thread);
sm->translation_buckets = nat_calc_bihash_buckets (c.sessions);
vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
@@ -2425,7 +2454,7 @@ nat44_plugin_enable (nat44_config_t c)
sm->outside_fib_index = fib_table_find_or_create_and_lock (
FIB_PROTOCOL_IP4, c.outside_vrf, sm->fib_src_hi);
- nat44_ed_db_init (sm->max_translations_per_thread, sm->translation_buckets);
+ nat44_ed_db_init ();
nat_affinity_enable ();
@@ -2440,20 +2469,26 @@ nat44_plugin_enable (nat44_config_t c)
if (sm->num_workers > 1)
{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_node_t *node;
+
if (sm->fq_in2out_index == ~0)
{
- sm->fq_in2out_index = vlib_frame_queue_main_init (
- sm->in2out_node_index, sm->frame_queue_nelts);
+ node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
+ sm->fq_in2out_index =
+ vlib_frame_queue_main_init (node->index, sm->frame_queue_nelts);
}
if (sm->fq_out2in_index == ~0)
{
- sm->fq_out2in_index = vlib_frame_queue_main_init (
- sm->out2in_node_index, sm->frame_queue_nelts);
+ node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
+ sm->fq_out2in_index =
+ vlib_frame_queue_main_init (node->index, sm->frame_queue_nelts);
}
if (sm->fq_in2out_output_index == ~0)
{
- sm->fq_in2out_output_index = vlib_frame_queue_main_init (
- sm->in2out_output_node_index, sm->frame_queue_nelts);
+ node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output");
+ sm->fq_in2out_output_index =
+ vlib_frame_queue_main_init (node->index, sm->frame_queue_nelts);
}
}
@@ -2463,86 +2498,213 @@ nat44_plugin_enable (nat44_config_t c)
return 0;
}
-void
-nat44_addresses_free (snat_address_t ** addresses)
+int
+nat44_ed_del_addresses ()
{
- snat_address_t *ap;
- vec_foreach (ap, *addresses)
+ snat_main_t *sm = &snat_main;
+ snat_address_t *a, *vec;
+ int error = 0;
+
+ vec = vec_dup (sm->addresses);
+ vec_foreach (a, vec)
{
- #define _(N, i, n, s) \
- vec_free (ap->busy_##n##_ports_per_thread);
- foreach_nat_protocol
- #undef _
+ error = nat44_ed_del_address (a->addr, 0);
+ if (error)
+ {
+ nat_log_err ("error occurred while removing adderess");
+ }
}
- vec_free (*addresses);
- *addresses = 0;
+ vec_free (vec);
+ vec_free (sm->addresses);
+ sm->addresses = 0;
+
+ vec = vec_dup (sm->twice_nat_addresses);
+ vec_foreach (a, vec)
+ {
+ error = nat44_ed_del_address (a->addr, 1);
+ if (error)
+ {
+ nat_log_err ("error occurred while removing adderess");
+ }
+ }
+ vec_free (vec);
+ vec_free (sm->twice_nat_addresses);
+ sm->twice_nat_addresses = 0;
+
+ vec_free (sm->addr_to_resolve);
+ sm->addr_to_resolve = 0;
+
+ return error;
}
int
-nat44_plugin_disable ()
+nat44_ed_del_interfaces ()
{
snat_main_t *sm = &snat_main;
snat_interface_t *i, *pool;
int error = 0;
- fail_if_disabled ();
-
pool = pool_dup (sm->interfaces);
pool_foreach (i, pool)
{
- if (nat_interface_is_inside (i))
+ if (nat44_ed_is_interface_inside (i))
{
error = nat44_ed_del_interface (i->sw_if_index, 1);
}
- if (nat_interface_is_outside (i))
+ if (nat44_ed_is_interface_outside (i))
{
error = nat44_ed_del_interface (i->sw_if_index, 0);
}
+
if (error)
- {
- nat_log_err ("error occurred while removing interface %u",
- i->sw_if_index);
- }
+ {
+ nat_log_err ("error occurred while removing interface");
+ }
}
- pool_free (sm->interfaces);
pool_free (pool);
+ pool_free (sm->interfaces);
sm->interfaces = 0;
+ return error;
+}
+
+int
+nat44_ed_del_output_interfaces ()
+{
+ snat_main_t *sm = &snat_main;
+ snat_interface_t *i, *pool;
+ int error = 0;
pool = pool_dup (sm->output_feature_interfaces);
pool_foreach (i, pool)
{
error = nat44_ed_del_output_interface (i->sw_if_index);
if (error)
- {
- nat_log_err ("error occurred while removing interface %u",
- i->sw_if_index);
- }
+ {
+ nat_log_err ("error occurred while removing output interface");
+ }
}
- pool_free (sm->output_feature_interfaces);
pool_free (pool);
+ pool_free (sm->output_feature_interfaces);
sm->output_feature_interfaces = 0;
+ return error;
+}
+
+static clib_error_t *
+nat44_ed_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
+{
+ snat_main_t *sm = &snat_main;
+ snat_interface_t *i;
+ int error = 0;
+
+ if (is_add)
+ return 0;
+
+ if (!sm->enabled)
+ return 0;
+
+ i = nat44_ed_get_interface (sm->interfaces, sw_if_index);
+ if (i)
+ {
+ bool is_inside = nat44_ed_is_interface_inside (i);
+ bool is_outside = nat44_ed_is_interface_outside (i);
+
+ if (is_inside)
+ {
+ error |= nat44_ed_del_interface (sw_if_index, 1);
+ }
+ if (is_outside)
+ {
+ error |= nat44_ed_del_interface (sw_if_index, 0);
+ }
+
+ if (error)
+ {
+ nat_log_err ("error occurred while removing interface");
+ }
+ }
+
+ i = nat44_ed_get_interface (sm->output_feature_interfaces, sw_if_index);
+ if (i)
+ {
+ error = nat44_ed_del_output_interface (sw_if_index);
+ if (error)
+ {
+ nat_log_err ("error occurred while removing output interface");
+ }
+ }
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (nat44_ed_sw_interface_add_del);
+
+int
+nat44_ed_del_static_mappings ()
+{
+ snat_main_t *sm = &snat_main;
+ snat_static_mapping_t *m, *pool;
+ int error = 0;
+
+ pool = pool_dup (sm->static_mappings);
+ pool_foreach (m, pool)
+ {
+ error = nat44_ed_del_static_mapping_internal (
+ m->local_addr, m->external_addr, m->local_port, m->external_port,
+ m->proto, m->vrf_id, m->flags);
+ if (error)
+ {
+ nat_log_err ("error occurred while removing mapping");
+ }
+ }
+ pool_free (pool);
+ pool_free (sm->static_mappings);
+ sm->static_mappings = 0;
+
+ vec_free (sm->sm_to_resolve);
+ sm->sm_to_resolve = 0;
+
+ return error;
+}
+
+int
+nat44_plugin_disable ()
+{
+ snat_main_t *sm = &snat_main;
+ int rc, error = 0;
+
+ fail_if_disabled ();
+
+ rc = nat44_ed_del_static_mappings ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ rc = nat44_ed_del_addresses ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ rc = nat44_ed_del_interfaces ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ rc = nat44_ed_del_output_interfaces ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ nat44_ed_del_vrf_tables ();
vec_free (sm->max_translations_per_fib);
+ sm->max_translations_per_fib = 0;
nat44_ed_db_free ();
- nat44_addresses_free (&sm->addresses);
- nat44_addresses_free (&sm->twice_nat_addresses);
-
- vec_free (sm->to_resolve);
- vec_free (sm->auto_add_sw_if_indices);
- vec_free (sm->auto_add_sw_if_indices_twice_nat);
+ clib_memset (&sm->rconfig, 0, sizeof (sm->rconfig));
- sm->to_resolve = 0;
- sm->auto_add_sw_if_indices = 0;
- sm->auto_add_sw_if_indices_twice_nat = 0;
+ nat_affinity_disable ();
sm->forwarding_enabled = 0;
-
sm->enabled = 0;
- clib_memset (&sm->rconfig, 0, sizeof (sm->rconfig));
- return 0;
+ return error;
}
void
@@ -2556,14 +2718,16 @@ nat44_ed_forwarding_enable_disable (u8 is_enable)
sm->forwarding_enabled = is_enable != 0;
- if (is_enable)
- return;
+ if (!sm->enabled || is_enable)
+ {
+ return;
+ }
vec_foreach (tsm, sm->per_thread_data)
{
pool_foreach (s, tsm->sessions)
{
- if (is_fwd_bypass_session (s))
+ if (na44_ed_is_fwd_bypass_session (s))
{
vec_add1 (ses_to_be_removed, s - tsm->sessions);
}
@@ -2571,7 +2735,7 @@ nat44_ed_forwarding_enable_disable (u8 is_enable)
vec_foreach (ses_index, ses_to_be_removed)
{
s = pool_elt_at_index (tsm->sessions, ses_index[0]);
- nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
+ nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
}
@@ -2579,125 +2743,91 @@ nat44_ed_forwarding_enable_disable (u8 is_enable)
}
}
-void
-snat_free_outside_address_and_port (snat_address_t *addresses,
- u32 thread_index, ip4_address_t *addr,
- u16 port, nat_protocol_t protocol)
+static_always_inline snat_static_mapping_t *
+nat44_ed_sm_match (snat_main_t *sm, ip4_address_t match_addr, u16 match_port,
+ u32 match_fib_index, ip_protocol_t match_protocol,
+ int by_external)
{
- snat_main_t *sm = &snat_main;
- snat_address_t *a;
- u32 address_index;
- u16 port_host_byte_order = clib_net_to_host_u16 (port);
-
- for (address_index = 0; address_index < vec_len (addresses);
- address_index++)
+ snat_static_mapping_t *m;
+ if (!by_external)
{
- if (addresses[address_index].addr.as_u32 == addr->as_u32)
- break;
- }
-
- ASSERT (address_index < vec_len (addresses));
-
- a = addresses + address_index;
+ m = nat44_ed_sm_i2o_lookup (sm, match_addr, match_port, match_fib_index,
+ match_protocol);
+ if (m)
+ return m;
- switch (protocol)
- {
-#define _(N, i, n, s) \
- case NAT_PROTOCOL_##N: \
- ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
- --a->busy_##n##_port_refcounts[port_host_byte_order]; \
- a->busy_##n##_ports--; \
- a->busy_##n##_ports_per_thread[thread_index]--; \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- return;
- }
-}
+ // try address only mapping
+ m = nat44_ed_sm_i2o_lookup (sm, match_addr, 0, match_fib_index, 0);
+ if (m)
+ return m;
-int
-nat_set_outside_address_and_port (snat_address_t *addresses, u32 thread_index,
- ip4_address_t addr, u16 port,
- nat_protocol_t protocol)
-{
- snat_main_t *sm = &snat_main;
- snat_address_t *a = 0;
- u32 address_index;
- u16 port_host_byte_order = clib_net_to_host_u16 (port);
-
- for (address_index = 0; address_index < vec_len (addresses);
- address_index++)
- {
- if (addresses[address_index].addr.as_u32 != addr.as_u32)
- continue;
-
- a = addresses + address_index;
- switch (protocol)
- {
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
- return VNET_API_ERROR_INSTANCE_IN_USE; \
- ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
- a->busy_##n##_ports_per_thread[thread_index]++; \
- a->busy_##n##_ports++; \
- return 0;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- return 1;
+ // default static mapping fib index (based on configuration)
+ if (sm->inside_fib_index != match_fib_index)
+ {
+ m = nat44_ed_sm_i2o_lookup (sm, match_addr, match_port,
+ sm->inside_fib_index, match_protocol);
+ if (m)
+ return m;
+
+ // try address only mapping
+ m = nat44_ed_sm_i2o_lookup (sm, match_addr, 0, sm->inside_fib_index,
+ 0);
+ if (m)
+ return m;
+ }
+ // TODO: this specific use case may be deprecated (needs testing)
+ if (sm->outside_fib_index != match_fib_index)
+ {
+ m = nat44_ed_sm_i2o_lookup (sm, match_addr, match_port,
+ sm->outside_fib_index, match_protocol);
+ if (m)
+ return m;
+
+ // try address only mapping
+ m = nat44_ed_sm_i2o_lookup (sm, match_addr, 0, sm->outside_fib_index,
+ 0);
+ if (m)
+ return m;
}
}
+ else
+ {
+ m =
+ nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol);
+ if (m)
+ return m;
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ // try address only mapping
+ m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0);
+ if (m)
+ return m;
+ }
+ return 0;
}
int
-snat_static_mapping_match (vlib_main_t *vm, snat_main_t *sm,
- ip4_address_t match_addr, u16 match_port,
- u32 match_fib_index, nat_protocol_t match_protocol,
+snat_static_mapping_match (vlib_main_t *vm, ip4_address_t match_addr,
+ u16 match_port, u32 match_fib_index,
+ ip_protocol_t match_protocol,
ip4_address_t *mapping_addr, u16 *mapping_port,
- u32 *mapping_fib_index, u8 by_external,
+ u32 *mapping_fib_index, int by_external,
u8 *is_addr_only, twice_nat_type_t *twice_nat,
lb_nat_type_t *lb, ip4_address_t *ext_host_addr,
u8 *is_identity_nat, snat_static_mapping_t **out)
{
- clib_bihash_kv_8_8_t kv, value;
- clib_bihash_8_8_t *mapping_hash;
+ snat_main_t *sm = &snat_main;
snat_static_mapping_t *m;
u32 rand, lo = 0, hi, mid, *tmp = 0, i;
nat44_lb_addr_port_t *local;
u8 backend_index;
- if (!by_external)
- {
- mapping_hash = &sm->static_mapping_by_local;
- init_nat_k (&kv, match_addr, match_port, match_fib_index,
- match_protocol);
- if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
- {
- /* Try address only mapping */
- init_nat_k (&kv, match_addr, 0, match_fib_index, 0);
- if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
- return 1;
- }
- }
- else
+ m = nat44_ed_sm_match (sm, match_addr, match_port, match_fib_index,
+ match_protocol, by_external);
+ if (!m)
{
- mapping_hash = &sm->static_mapping_by_external;
- init_nat_k (&kv, match_addr, match_port, 0, match_protocol);
- if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
- {
- /* Try address only mapping */
- init_nat_k (&kv, match_addr, 0, 0, 0);
- if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
- return 1;
- }
+ return 1;
}
- m = pool_elt_at_index (sm->static_mappings, value.value);
-
if (by_external)
{
if (is_sm_lb (m->flags))
@@ -2829,7 +2959,7 @@ nat44_ed_get_in2out_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
if (PREDICT_FALSE (is_output))
{
fib_index = sm->outside_fib_index;
- nat_outside_fib_t *outside_fib;
+ nat_fib_t *outside_fib;
fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
fib_prefix_t pfx = {
.fp_proto = FIB_PROTOCOL_IP4,
@@ -2864,9 +2994,33 @@ nat44_ed_get_in2out_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
}
}
- init_ed_k (&kv16, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
- ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
- fib_index, ip->protocol);
+ if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
+ {
+ ip4_address_t lookup_saddr, lookup_daddr;
+ u16 lookup_sport, lookup_dport;
+ u8 lookup_protocol;
+
+ if (!nat_get_icmp_session_lookup_values (
+ b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr,
+ &lookup_dport, &lookup_protocol))
+ {
+ init_ed_k (&kv16, lookup_saddr.as_u32, lookup_sport,
+ lookup_daddr.as_u32, lookup_dport, rx_fib_index,
+ lookup_protocol);
+ if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
+ {
+ next_worker_index = ed_value_get_thread_index (&value16);
+ vnet_buffer2 (b)->nat.cached_session_index =
+ ed_value_get_session_index (&value16);
+ goto out;
+ }
+ }
+ }
+
+ init_ed_k (&kv16, ip->src_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_dst_port, fib_index,
+ ip->protocol);
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
{
@@ -2877,9 +3031,10 @@ nat44_ed_get_in2out_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
}
// dst NAT
- init_ed_k (&kv16, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
- ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
- rx_fib_index, ip->protocol);
+ init_ed_k (&kv16, ip->dst_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_src_port, rx_fib_index,
+ ip->protocol);
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
{
next_worker_index = ed_value_get_thread_index (&value16);
@@ -2890,7 +3045,9 @@ nat44_ed_get_in2out_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
}
hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
- (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
+ (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24) +
+ rx_fib_index + (rx_fib_index >> 8) + (rx_fib_index >> 16) +
+ (rx_fib_index >> 24);
if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
@@ -2921,17 +3078,16 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
u32 rx_fib_index, u8 is_output)
{
snat_main_t *sm = &snat_main;
- clib_bihash_kv_8_8_t kv, value;
clib_bihash_kv_16_8_t kv16, value16;
- u32 proto, next_worker_index = 0;
+ u8 proto, next_worker_index = 0;
u16 port;
snat_static_mapping_t *m;
u32 hash;
- proto = ip_proto_to_nat_proto (ip->protocol);
+ proto = ip->protocol;
- if (PREDICT_FALSE (proto == NAT_PROTOCOL_ICMP))
+ if (PREDICT_FALSE (IP_PROTOCOL_ICMP == proto))
{
ip4_address_t lookup_saddr, lookup_daddr;
u16 lookup_sport, lookup_dport;
@@ -2940,8 +3096,9 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr, &lookup_dport,
&lookup_protocol))
{
- init_ed_k (&kv16, lookup_saddr, lookup_sport, lookup_daddr,
- lookup_dport, rx_fib_index, lookup_protocol);
+ init_ed_k (&kv16, lookup_saddr.as_u32, lookup_sport,
+ lookup_daddr.as_u32, lookup_dport, rx_fib_index,
+ lookup_protocol);
if (PREDICT_TRUE (
!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
{
@@ -2955,9 +3112,10 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
}
}
- init_ed_k (&kv16, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
- ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
- rx_fib_index, ip->protocol);
+ init_ed_k (&kv16, ip->src_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_dst_port, rx_fib_index,
+ ip->protocol);
if (PREDICT_TRUE (
!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
@@ -2975,18 +3133,18 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
/* first try static mappings without port */
if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
{
- init_nat_k (&kv, ip->dst_address, 0, 0, 0);
- if (!clib_bihash_search_8_8
- (&sm->static_mapping_by_external, &kv, &value))
+ m = nat44_ed_sm_o2i_lookup (sm, ip->dst_address, 0, 0, proto);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value.value);
- next_worker_index = m->workers[0];
- goto done;
+ {
+ next_worker_index = m->workers[0];
+ goto done;
+ }
}
}
/* unknown protocol */
- if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
+ if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto)))
{
/* use current thread */
next_worker_index = vlib_get_thread_index ();
@@ -2999,7 +3157,7 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
{
udp_header_t *udp = ip4_next_header (ip);
icmp46_header_t *icmp = (icmp46_header_t *) udp;
- icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
+ nat_icmp_echo_header_t *echo = (nat_icmp_echo_header_t *) (icmp + 1);
if (!icmp_type_is_error_message
(vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
port = vnet_buffer (b)->ip.reass.l4_src_port;
@@ -3007,18 +3165,19 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
{
/* if error message, then it's not fragmented and we can access it */
ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
- proto = ip_proto_to_nat_proto (inner_ip->protocol);
+ proto = inner_ip->protocol;
void *l4_header = ip4_next_header (inner_ip);
switch (proto)
{
- case NAT_PROTOCOL_ICMP:
+ case IP_PROTOCOL_ICMP:
icmp = (icmp46_header_t *) l4_header;
- echo = (icmp_echo_header_t *) (icmp + 1);
+ echo = (nat_icmp_echo_header_t *) (icmp + 1);
port = echo->identifier;
break;
- case NAT_PROTOCOL_UDP:
- case NAT_PROTOCOL_TCP:
- port = ((tcp_udp_header_t *) l4_header)->src_port;
+ case IP_PROTOCOL_UDP:
+ /* breakthrough */
+ case IP_PROTOCOL_TCP:
+ port = ((nat_tcp_udp_header_t *) l4_header)->src_port;
break;
default:
next_worker_index = vlib_get_thread_index ();
@@ -3030,11 +3189,9 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
/* try static mappings with port */
if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
{
- init_nat_k (&kv, ip->dst_address, port, 0, proto);
- if (!clib_bihash_search_8_8
- (&sm->static_mapping_by_external, &kv, &value))
+ m = nat44_ed_sm_o2i_lookup (sm, ip->dst_address, port, 0, proto);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value.value);
if (!is_sm_lb (m->flags))
{
next_worker_index = m->workers[0];
@@ -3054,9 +3211,7 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
}
/* worker by outside port */
- next_worker_index = sm->first_worker_index;
- next_worker_index +=
- sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
+ next_worker_index = get_thread_idx_by_port (clib_net_to_host_u16 (port));
done:
nat_elog_debug_handoff (sm, "HANDOFF OUT2IN", next_worker_index,
@@ -3085,16 +3240,12 @@ nat44_set_session_limit (u32 session_limit, u32 vrf_id)
{
snat_main_t *sm = &snat_main;
u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
- u32 len = vec_len (sm->max_translations_per_fib);
- if (len <= fib_index)
- {
- vec_validate (sm->max_translations_per_fib, fib_index + 1);
-
- for (; len < vec_len (sm->max_translations_per_fib); len++)
- sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
- }
+ if (~0 == fib_index)
+ return -1;
+ vec_validate_init_empty (sm->max_translations_per_fib, fib_index,
+ sm->max_translations_per_thread);
sm->max_translations_per_fib[fib_index] = session_limit;
return 0;
}
@@ -3108,8 +3259,8 @@ nat44_update_session_limit (u32 session_limit, u32 vrf_id)
return 1;
sm->max_translations_per_thread = nat44_get_max_session_limit ();
- stat_segment_set_state_counter (sm->max_cfg_sessions_gauge,
- sm->max_translations_per_thread);
+ vlib_stats_set_gauge (sm->max_cfg_sessions_gauge,
+ sm->max_translations_per_thread);
sm->translation_buckets =
nat_calc_bihash_buckets (sm->max_translations_per_thread);
@@ -3119,11 +3270,11 @@ nat44_update_session_limit (u32 session_limit, u32 vrf_id)
}
static void
-nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations,
- u32 translation_buckets)
+nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations)
{
dlist_elt_t *head;
+ pool_alloc (tsm->per_vrf_sessions_pool, translations);
pool_alloc (tsm->sessions, translations);
pool_alloc (tsm->lru_pool, translations);
@@ -3149,7 +3300,7 @@ nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations,
}
static void
-reinit_ed_flow_hash ()
+nat44_ed_flow_hash_init ()
{
snat_main_t *sm = &snat_main;
// we expect 2 flows per session, so multiply translation_buckets by 2
@@ -3160,34 +3311,16 @@ reinit_ed_flow_hash ()
}
static void
-nat44_ed_db_init (u32 translations, u32 translation_buckets)
+nat44_ed_db_init ()
{
snat_main_t *sm = &snat_main;
snat_main_per_thread_data_t *tsm;
- u32 static_mapping_buckets = 1024;
- u32 static_mapping_memory_size = 64 << 20;
-
- reinit_ed_flow_hash ();
-
- clib_bihash_init_8_8 (&sm->static_mapping_by_local,
- "static_mapping_by_local", static_mapping_buckets,
- static_mapping_memory_size);
- clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local,
- format_static_mapping_kvp);
- clib_bihash_init_8_8 (&sm->static_mapping_by_external,
- "static_mapping_by_external", static_mapping_buckets,
- static_mapping_memory_size);
- clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external,
- format_static_mapping_kvp);
+ nat44_ed_flow_hash_init ();
- if (sm->pat)
+ vec_foreach (tsm, sm->per_thread_data)
{
- vec_foreach (tsm, sm->per_thread_data)
- {
- nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
- sm->translation_buckets);
- }
+ nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread);
}
}
@@ -3196,283 +3329,299 @@ nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm)
{
pool_free (tsm->lru_pool);
pool_free (tsm->sessions);
- vec_free (tsm->per_vrf_sessions_vec);
+ pool_free (tsm->per_vrf_sessions_pool);
}
static void
-nat44_ed_db_free ()
+nat44_ed_flow_hash_free ()
{
snat_main_t *sm = &snat_main;
- snat_main_per_thread_data_t *tsm;
- pool_free (sm->static_mappings);
clib_bihash_free_16_8 (&sm->flow_hash);
- clib_bihash_free_8_8 (&sm->static_mapping_by_local);
- clib_bihash_free_8_8 (&sm->static_mapping_by_external);
+}
- if (sm->pat)
+static void
+nat44_ed_db_free ()
+{
+ snat_main_t *sm = &snat_main;
+ snat_main_per_thread_data_t *tsm;
+
+ vec_foreach (tsm, sm->per_thread_data)
{
- vec_foreach (tsm, sm->per_thread_data)
- {
- nat44_ed_worker_db_free (tsm);
- }
+ nat44_ed_worker_db_free (tsm);
}
+
+ nat44_ed_flow_hash_free ();
}
void
nat44_ed_sessions_clear ()
{
snat_main_t *sm = &snat_main;
- snat_main_per_thread_data_t *tsm;
-
- reinit_ed_flow_hash ();
- if (sm->pat)
- {
- vec_foreach (tsm, sm->per_thread_data)
- {
-
- nat44_ed_worker_db_free (tsm);
- nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
- sm->translation_buckets);
- }
- }
+ nat44_ed_db_free ();
+ nat44_ed_db_init ();
vlib_zero_simple_counter (&sm->total_sessions, 0);
}
static void
-nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
- uword opaque,
- u32 sw_if_index,
- ip4_address_t * address,
- u32 address_length,
- u32 if_address_index, u32 is_delete)
+nat44_ed_add_del_static_mapping_cb (ip4_main_t *im, uword opaque,
+ u32 sw_if_index, ip4_address_t *address,
+ u32 address_length, u32 if_address_index,
+ u32 is_delete)
{
+ snat_static_mapping_resolve_t *rp;
snat_main_t *sm = &snat_main;
- snat_static_map_resolve_t *rp;
- snat_static_mapping_t *m;
- clib_bihash_kv_8_8_t kv, value;
- ip4_address_t l_addr;
- int i, rv;
+ int rv = 0;
if (!sm->enabled)
- return;
-
- for (i = 0; i < vec_len (sm->to_resolve); i++)
{
- rp = sm->to_resolve + i;
- if (rp->addr_only == 0)
- continue;
- if (rp->sw_if_index == sw_if_index)
- goto match;
+ return;
}
- return;
-
-match:
- init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
- sm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
- if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
- m = 0;
- else
- m = pool_elt_at_index (sm->static_mappings, value.value);
-
- if (!is_delete)
- {
- /* Don't trip over lease renewal, static config */
- if (m)
- return;
- }
- else
+ vec_foreach (rp, sm->sm_to_resolve)
{
- if (!m)
- return;
+ if (sw_if_index == rp->sw_if_index)
+ {
+ if (is_delete)
+ {
+ if (rp->is_resolved)
+ {
+ rv = nat44_ed_del_static_mapping_internal (
+ rp->l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
+ rp->vrf_id, rp->flags);
+ if (rv)
+ {
+ nat_log_err ("ed del static mapping failed");
+ }
+ else
+ {
+ rp->is_resolved = 0;
+ }
+ }
+ }
+ else
+ {
+ if (!rp->is_resolved)
+ {
+ rv = nat44_ed_add_static_mapping_internal (
+ rp->l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
+ rp->vrf_id, ~0, rp->flags, rp->pool_addr, rp->tag);
+ if (rv)
+ {
+ nat_log_err ("ed add static mapping failed");
+ }
+ else
+ {
+ rp->is_resolved = 1;
+ }
+ }
+ }
+ }
}
+}
- /* Indetity mapping? */
- if (rp->l_addr.as_u32 == 0)
- l_addr.as_u32 = address[0].as_u32;
- else
- l_addr.as_u32 = rp->l_addr.as_u32;
+static int
+nat44_ed_get_addr_resolve_record (u32 sw_if_index, u8 twice_nat, int *out)
+{
+ snat_main_t *sm = &snat_main;
+ snat_address_resolve_t *rp;
+ int i;
- if (is_delete)
+ for (i = 0; i < vec_len (sm->addr_to_resolve); i++)
{
- rv = nat44_ed_del_static_mapping (l_addr, address[0], rp->l_port,
- rp->e_port, rp->proto, rp->vrf_id, ~0,
- rp->flags);
- }
- else
- {
- rv = nat44_ed_add_static_mapping (l_addr, address[0], rp->l_port,
- rp->e_port, rp->proto, rp->vrf_id, ~0,
- rp->flags, rp->pool_addr, rp->tag);
+ rp = sm->addr_to_resolve + i;
+
+ if ((rp->sw_if_index == sw_if_index) && (rp->is_twice_nat == twice_nat))
+ {
+ if (out)
+ {
+ *out = i;
+ }
+ return 0;
+ }
}
- if (rv)
+ return 1;
+}
+static int
+nat44_ed_del_addr_resolve_record (u32 sw_if_index, u8 twice_nat)
+{
+ snat_main_t *sm = &snat_main;
+ int i;
+ if (!nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, &i))
{
- nat_elog_notice_X1 (sm, "add_static_mapping returned %d", "i4", rv);
+ vec_del1 (sm->addr_to_resolve, i);
+ return 0;
}
+ return 1;
}
static void
-snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
- uword opaque,
- u32 sw_if_index,
- ip4_address_t * address,
+nat44_ed_add_del_interface_address_cb (ip4_main_t *im, uword opaque,
+ u32 sw_if_index, ip4_address_t *address,
u32 address_length,
u32 if_address_index, u32 is_delete)
{
snat_main_t *sm = &snat_main;
- snat_static_map_resolve_t *rp;
- ip4_address_t l_addr;
- int i, j;
- int rv;
+ snat_address_resolve_t *arp;
+ snat_address_t *ap;
u8 twice_nat = 0;
- snat_address_t *addresses = sm->addresses;
+ int i, rv;
if (!sm->enabled)
- return;
-
- for (i = 0; i < vec_len (sm->auto_add_sw_if_indices); i++)
{
- if (sw_if_index == sm->auto_add_sw_if_indices[i])
- goto match;
+ return;
}
- for (i = 0; i < vec_len (sm->auto_add_sw_if_indices_twice_nat); i++)
+ if (nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, &i))
{
twice_nat = 1;
- addresses = sm->twice_nat_addresses;
- if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i])
- goto match;
+ if (nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, &i))
+ {
+ u32 fib_index =
+ ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+ vec_foreach (ap, sm->addresses)
+ {
+ if ((fib_index == ap->fib_index) &&
+ (address->as_u32 == ap->addr.as_u32))
+ {
+ if (!is_delete)
+ {
+ ap->addr_len = address_length;
+ ap->sw_if_index = sw_if_index;
+ ap->net.as_u32 =
+ ap->addr.as_u32 & ip4_main.fib_masks[ap->addr_len];
+
+ nat_log_debug (
+ "pool addr %U binds to -> sw_if_idx: %u net: %U/%u",
+ format_ip4_address, &ap->addr, ap->sw_if_index,
+ format_ip4_address, &ap->net, ap->addr_len);
+ }
+ else
+ {
+ ap->addr_len = ~0;
+ }
+ break;
+ }
+ }
+ return;
+ }
}
- return;
+ arp = sm->addr_to_resolve + i;
-match:
if (!is_delete)
{
- /* Don't trip over lease renewal, static config */
- for (j = 0; j < vec_len (addresses); j++)
- if (addresses[j].addr.as_u32 == address->as_u32)
+ if (arp->is_resolved)
+ {
return;
+ }
- (void) snat_add_address (sm, address, ~0, twice_nat);
- /* Scan static map resolution vector */
- for (j = 0; j < vec_len (sm->to_resolve); j++)
+ rv = nat44_ed_add_address (address, ~0, arp->is_twice_nat);
+ if (0 == rv)
{
- rp = sm->to_resolve + j;
- if (rp->addr_only)
- continue;
- /* On this interface? */
- if (rp->sw_if_index == sw_if_index)
- {
-
- // TODO: remove if not needed (handled by function)
- /* Indetity mapping? */
- if (rp->l_addr.as_u32 == 0)
- l_addr.as_u32 = address[0].as_u32;
- else
- l_addr.as_u32 = rp->l_addr.as_u32;
-
- /* Add the static mapping */
- rv = nat44_ed_add_static_mapping (
- l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
- rp->vrf_id, ~0, rp->flags, rp->pool_addr, rp->tag);
- if (rv)
- {
- nat_elog_notice_X1 (sm, "add_static_mapping returned %d",
- "i4", rv);
- }
- }
+ arp->is_resolved = 1;
}
- return;
}
else
{
- (void) snat_del_address (sm, address[0], 1, twice_nat);
- return;
+ if (!arp->is_resolved)
+ {
+ return;
+ }
+
+ rv = nat44_ed_del_address (address[0], arp->is_twice_nat);
+ if (0 == rv)
+ {
+ arp->is_resolved = 0;
+ }
}
}
int
-snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
- u8 twice_nat)
+nat44_ed_add_interface_address (u32 sw_if_index, u8 twice_nat)
{
+ snat_main_t *sm = &snat_main;
ip4_main_t *ip4_main = sm->ip4_main;
ip4_address_t *first_int_addr;
- snat_static_map_resolve_t *rp;
- u32 *indices_to_delete = 0;
- int i, j;
- u32 *auto_add_sw_if_indices =
- twice_nat ? sm->
- auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices;
+ snat_address_resolve_t *ap;
+ int rv;
- first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0 /* just want the address */
- );
+ if (!sm->enabled)
+ {
+ nat_log_err ("nat44 is disabled");
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
- for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
+ if (!nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, 0))
{
- if (auto_add_sw_if_indices[i] == sw_if_index)
- {
- if (is_del)
- {
- /* if have address remove it */
- if (first_int_addr)
- (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat);
- else
- {
- for (j = 0; j < vec_len (sm->to_resolve); j++)
- {
- rp = sm->to_resolve + j;
- if (rp->sw_if_index == sw_if_index)
- vec_add1 (indices_to_delete, j);
- }
- if (vec_len (indices_to_delete))
- {
- for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
- vec_del1 (sm->to_resolve, j);
- vec_free (indices_to_delete);
- }
- }
- if (twice_nat)
- vec_del1 (sm->auto_add_sw_if_indices_twice_nat, i);
- else
- vec_del1 (sm->auto_add_sw_if_indices, i);
- }
- else
- return VNET_API_ERROR_VALUE_EXIST;
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
- return 0;
+ vec_add2 (sm->addr_to_resolve, ap, 1);
+ ap->sw_if_index = sw_if_index;
+ ap->is_twice_nat = twice_nat;
+ ap->is_resolved = 0;
+
+ first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
+ if (first_int_addr)
+ {
+ rv = nat44_ed_add_address (first_int_addr, ~0, twice_nat);
+ if (0 != rv)
+ {
+ nat44_ed_del_addr_resolve_record (sw_if_index, twice_nat);
+ return rv;
}
+ ap->is_resolved = 1;
}
- if (is_del)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ return 0;
+}
- /* add to the auto-address list */
- if (twice_nat)
- vec_add1 (sm->auto_add_sw_if_indices_twice_nat, sw_if_index);
- else
- vec_add1 (sm->auto_add_sw_if_indices, sw_if_index);
+int
+nat44_ed_del_interface_address (u32 sw_if_index, u8 twice_nat)
+{
+ snat_main_t *sm = &snat_main;
+ ip4_main_t *ip4_main = sm->ip4_main;
+ ip4_address_t *first_int_addr;
- /* If the address is already bound - or static - add it now */
+ if (!sm->enabled)
+ {
+ nat_log_err ("nat44 is disabled");
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+
+ if (nat44_ed_del_addr_resolve_record (sw_if_index, twice_nat))
+ {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
if (first_int_addr)
- (void) snat_add_address (sm, first_int_addr, ~0, twice_nat);
+ {
+ return nat44_ed_del_address (first_int_addr[0], twice_nat);
+ }
return 0;
}
int
-nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
- ip4_address_t * eh_addr, u16 eh_port, u8 proto,
+nat44_ed_del_session (snat_main_t *sm, ip4_address_t *addr, u16 port,
+ ip4_address_t *eh_addr, u16 eh_port, u8 proto,
u32 vrf_id, int is_in)
{
ip4_header_t ip;
clib_bihash_kv_16_8_t kv, value;
- u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
+ u32 fib_index;
snat_session_t *s;
snat_main_per_thread_data_t *tsm;
+ if (!sm->enabled)
+ {
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
if (sm->num_workers > 1)
tsm = vec_elt_at_index (
@@ -3481,7 +3630,8 @@ nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
else
tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
- init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto);
+ init_ed_k (&kv, addr->as_u32, port, eh_addr->as_u32, eh_port, fib_index,
+ proto);
if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
{
return VNET_API_ERROR_NO_SUCH_ENTRY;
@@ -3490,7 +3640,7 @@ nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
if (pool_is_free_index (tsm->sessions, ed_value_get_session_index (&value)))
return VNET_API_ERROR_UNSPECIFIED;
s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
- nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
+ nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
return 0;
}
@@ -3596,13 +3746,13 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
static_always_inline void
nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
- nat_6t_flow_t *f, nat_protocol_t proto,
+ nat_6t_flow_t *f, ip_protocol_t proto,
int is_icmp_inner_ip4, int skip_saddr_rewrite)
{
udp_header_t *udp = ip4_next_header (ip);
tcp_header_t *tcp = (tcp_header_t *) udp;
- if ((NAT_PROTOCOL_TCP == proto || NAT_PROTOCOL_UDP == proto) &&
+ if ((IP_PROTOCOL_TCP == proto || IP_PROTOCOL_UDP == proto) &&
!vnet_buffer (b)->ip.reass.is_non_first_fragment)
{
if (!is_icmp_inner_ip4)
@@ -3620,7 +3770,7 @@ nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
udp->dst_port = f->rewrite.sport;
}
- if (NAT_PROTOCOL_TCP == proto)
+ if (IP_PROTOCOL_TCP == proto)
{
ip_csum_t tcp_sum = tcp->checksum;
tcp_sum = ip_csum_sub_even (tcp_sum, f->l3_csum_delta);
@@ -3628,7 +3778,7 @@ nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
mss_clamping (sm->mss_clamping, tcp, &tcp_sum);
tcp->checksum = ip_csum_fold (tcp_sum);
}
- else if (proto == NAT_PROTOCOL_UDP && udp->checksum)
+ else if (IP_PROTOCOL_UDP == proto && udp->checksum)
{
ip_csum_t udp_sum = udp->checksum;
udp_sum = ip_csum_sub_even (udp_sum, f->l3_csum_delta);
@@ -3685,7 +3835,7 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
icmp46_header_t *icmp = ip4_next_header (ip);
- icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
+ nat_icmp_echo_header_t *echo = (nat_icmp_echo_header_t *) (icmp + 1);
if ((!vnet_buffer (b)->ip.reass.is_non_first_fragment))
{
@@ -3701,7 +3851,7 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
{
ip_csum_t sum = icmp->checksum;
sum = ip_csum_update (sum, echo->identifier, f->rewrite.icmp_id,
- icmp_echo_header_t,
+ nat_icmp_echo_header_t,
identifier /* changed member */);
echo->identifier = f->rewrite.icmp_id;
icmp->checksum = ip_csum_fold (sum);
@@ -3726,8 +3876,7 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
return NAT_ED_TRNSL_ERR_INNER_IP_CORRUPT;
}
- nat_protocol_t inner_proto =
- ip_proto_to_nat_proto (inner_ip->protocol);
+ ip_protocol_t inner_proto = inner_ip->protocol;
ip_csum_t old_icmp_sum = icmp->checksum;
ip_csum_t old_inner_ip_sum = inner_ip->checksum;
@@ -3739,7 +3888,7 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
switch (inner_proto)
{
- case NAT_PROTOCOL_UDP:
+ case IP_PROTOCOL_UDP:
udp = (udp_header_t *) (inner_ip + 1);
if (!it_fits (vm, b, udp, sizeof (*udp)))
{
@@ -3760,7 +3909,7 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
new_icmp_sum = ip_csum_fold (new_icmp_sum);
icmp->checksum = new_icmp_sum;
break;
- case NAT_PROTOCOL_TCP:
+ case IP_PROTOCOL_TCP:
tcp = (tcp_header_t *) (inner_ip + 1);
if (!it_fits (vm, b, tcp, sizeof (*tcp)))
{
@@ -3781,7 +3930,10 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
new_icmp_sum = ip_csum_fold (new_icmp_sum);
icmp->checksum = new_icmp_sum;
break;
- case NAT_PROTOCOL_ICMP:
+ case IP_PROTOCOL_ICMP:
+ nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto,
+ 1 /* is_icmp_inner_ip4 */,
+ 0 /* skip_saddr_rewrite */);
if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
{
icmp46_header_t *inner_icmp = ip4_next_header (inner_ip);
@@ -3789,19 +3941,21 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
{
return NAT_ED_TRNSL_ERR_PACKET_TRUNCATED;
}
- icmp_echo_header_t *inner_echo =
- (icmp_echo_header_t *) (inner_icmp + 1);
+ nat_icmp_echo_header_t *inner_echo =
+ (nat_icmp_echo_header_t *) (inner_icmp + 1);
if (f->rewrite.icmp_id != inner_echo->identifier)
{
ip_csum_t sum = icmp->checksum;
- sum = ip_csum_update (
- sum, inner_echo->identifier, f->rewrite.icmp_id,
- icmp_echo_header_t, identifier /* changed member */);
+ sum = ip_csum_update (sum, inner_echo->identifier,
+ f->rewrite.icmp_id,
+ nat_icmp_echo_header_t,
+ identifier /* changed member */);
icmp->checksum = ip_csum_fold (sum);
ip_csum_t inner_sum = inner_icmp->checksum;
inner_sum = ip_csum_update (
sum, inner_echo->identifier, f->rewrite.icmp_id,
- icmp_echo_header_t, identifier /* changed member */);
+ nat_icmp_echo_header_t,
+ identifier /* changed member */);
inner_icmp->checksum = ip_csum_fold (inner_sum);
inner_echo->identifier = f->rewrite.icmp_id;
}
@@ -3820,7 +3974,7 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
static_always_inline nat_translation_error_e
nat_6t_flow_buf_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
ip4_header_t *ip, nat_6t_flow_t *f,
- nat_protocol_t proto, int is_output_feature,
+ ip_protocol_t proto, int is_output_feature,
int is_i2o)
{
if (!is_output_feature && f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
@@ -3828,7 +3982,7 @@ nat_6t_flow_buf_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
vnet_buffer (b)->sw_if_index[VLIB_TX] = f->rewrite.fib_index;
}
- if (NAT_PROTOCOL_ICMP == proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
if (ip->src_address.as_u32 != f->rewrite.saddr.as_u32)
{
@@ -3856,7 +4010,7 @@ nat_6t_flow_buf_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
nat_translation_error_e
nat_6t_flow_buf_translate_i2o (vlib_main_t *vm, snat_main_t *sm,
vlib_buffer_t *b, ip4_header_t *ip,
- nat_6t_flow_t *f, nat_protocol_t proto,
+ nat_6t_flow_t *f, ip_protocol_t proto,
int is_output_feature)
{
return nat_6t_flow_buf_translate (vm, sm, b, ip, f, proto, is_output_feature,
@@ -3866,116 +4020,130 @@ nat_6t_flow_buf_translate_i2o (vlib_main_t *vm, snat_main_t *sm,
nat_translation_error_e
nat_6t_flow_buf_translate_o2i (vlib_main_t *vm, snat_main_t *sm,
vlib_buffer_t *b, ip4_header_t *ip,
- nat_6t_flow_t *f, nat_protocol_t proto,
+ nat_6t_flow_t *f, ip_protocol_t proto,
int is_output_feature)
{
return nat_6t_flow_buf_translate (vm, sm, b, ip, f, proto, is_output_feature,
0 /* is_i2o */);
}
-u8 *
-format_nat_6t (u8 *s, va_list *args)
+static_always_inline void
+nat_syslog_nat44_sess (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
+ u16 isport, ip4_address_t *xsaddr, u16 xsport,
+ ip4_address_t *idaddr, u16 idport,
+ ip4_address_t *xdaddr, u16 xdport, u8 proto, u8 is_add,
+ u8 is_twicenat)
{
- nat_6t_t *t = va_arg (*args, nat_6t_t *);
+ syslog_msg_t syslog_msg;
+ fib_table_t *fib;
- s = format (s, "saddr %U sport %u daddr %U dport %u proto %U fib_idx %u",
- format_ip4_address, t->saddr.as_u8,
- clib_net_to_host_u16 (t->sport), format_ip4_address,
- t->daddr.as_u8, clib_net_to_host_u16 (t->dport),
- format_ip_protocol, t->proto, t->fib_index);
- return s;
+ if (!syslog_is_enabled ())
+ return;
+
+ if (syslog_severity_filter_block (SADD_SDEL_SEVERITY))
+ return;
+
+ fib = fib_table_get (sfibix, FIB_PROTOCOL_IP4);
+
+ syslog_msg_init (&syslog_msg, NAT_FACILITY, SADD_SDEL_SEVERITY, NAT_APPNAME,
+ is_add ? SADD_MSGID : SDEL_MSGID);
+
+ syslog_msg_sd_init (&syslog_msg, NSESS_SDID);
+ syslog_msg_add_sd_param (&syslog_msg, SSUBIX_SDPARAM_NAME, "%d", ssubix);
+ syslog_msg_add_sd_param (&syslog_msg, SVLAN_SDPARAM_NAME, "%d",
+ fib->ft_table_id);
+ syslog_msg_add_sd_param (&syslog_msg, IATYP_SDPARAM_NAME, IATYP_IPV4);
+ syslog_msg_add_sd_param (&syslog_msg, ISADDR_SDPARAM_NAME, "%U",
+ format_ip4_address, isaddr);
+ syslog_msg_add_sd_param (&syslog_msg, ISPORT_SDPARAM_NAME, "%d",
+ clib_net_to_host_u16 (isport));
+ syslog_msg_add_sd_param (&syslog_msg, XATYP_SDPARAM_NAME, IATYP_IPV4);
+ syslog_msg_add_sd_param (&syslog_msg, XSADDR_SDPARAM_NAME, "%U",
+ format_ip4_address, xsaddr);
+ syslog_msg_add_sd_param (&syslog_msg, XSPORT_SDPARAM_NAME, "%d",
+ clib_net_to_host_u16 (xsport));
+ syslog_msg_add_sd_param (&syslog_msg, PROTO_SDPARAM_NAME, "%d", proto);
+ syslog_msg_add_sd_param (&syslog_msg, XDADDR_SDPARAM_NAME, "%U",
+ format_ip4_address, xdaddr);
+ syslog_msg_add_sd_param (&syslog_msg, XDPORT_SDPARAM_NAME, "%d",
+ clib_net_to_host_u16 (xdport));
+ if (is_twicenat)
+ {
+ syslog_msg_add_sd_param (&syslog_msg, IDADDR_SDPARAM_NAME, "%U",
+ format_ip4_address, idaddr);
+ syslog_msg_add_sd_param (&syslog_msg, IDPORT_SDPARAM_NAME, "%d",
+ clib_net_to_host_u16 (idport));
+ }
+
+ syslog_msg_send (&syslog_msg);
}
-u8 *
-format_nat_ed_translation_error (u8 *s, va_list *args)
+void
+nat_syslog_nat44_sadd (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
+ u16 isport, ip4_address_t *idaddr, u16 idport,
+ ip4_address_t *xsaddr, u16 xsport,
+ ip4_address_t *xdaddr, u16 xdport, u8 proto,
+ u8 is_twicenat)
{
- nat_translation_error_e e = va_arg (*args, nat_translation_error_e);
-
- switch (e)
- {
- case NAT_ED_TRNSL_ERR_SUCCESS:
- s = format (s, "success");
- break;
- case NAT_ED_TRNSL_ERR_TRANSLATION_FAILED:
- s = format (s, "translation-failed");
- break;
- case NAT_ED_TRNSL_ERR_FLOW_MISMATCH:
- s = format (s, "flow-mismatch");
- break;
- case NAT_ED_TRNSL_ERR_PACKET_TRUNCATED:
- s = format (s, "packet-truncated");
- break;
- case NAT_ED_TRNSL_ERR_INNER_IP_CORRUPT:
- s = format (s, "inner-ip-corrupted");
- break;
- case NAT_ED_TRNSL_ERR_INVALID_CSUM:
- s = format (s, "invalid-checksum");
- break;
- }
- return s;
+ nat_syslog_nat44_sess (ssubix, sfibix, isaddr, isport, xsaddr, xsport,
+ idaddr, idport, xdaddr, xdport, proto, 1,
+ is_twicenat);
}
-u8 *
-format_nat_6t_flow (u8 *s, va_list *args)
+void
+nat_syslog_nat44_sdel (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
+ u16 isport, ip4_address_t *idaddr, u16 idport,
+ ip4_address_t *xsaddr, u16 xsport,
+ ip4_address_t *xdaddr, u16 xdport, u8 proto,
+ u8 is_twicenat)
+{
+ nat_syslog_nat44_sess (ssubix, sfibix, isaddr, isport, xsaddr, xsport,
+ idaddr, idport, xdaddr, xdport, proto, 0,
+ is_twicenat);
+}
+__clib_export void
+nat44_original_dst_lookup (ip4_address_t *i2o_src, u16 i2o_src_port,
+ ip4_address_t *i2o_dst, u16 i2o_dst_port,
+ ip_protocol_t proto, u32 *original_dst,
+ u16 *original_dst_port)
{
- nat_6t_flow_t *f = va_arg (*args, nat_6t_flow_t *);
+ snat_main_per_thread_data_t *tsm;
+ snat_main_t *sm = &snat_main;
+ u32 fib_index = 0;
+ snat_session_t *s;
+ ip4_header_t ip;
- s = format (s, "match: %U ", format_nat_6t, &f->match);
- int r = 0;
- if (f->ops & NAT_FLOW_OP_SADDR_REWRITE)
- {
- s = format (s, "rewrite: saddr %U ", format_ip4_address,
- f->rewrite.saddr.as_u8);
- r = 1;
- }
- if (f->ops & NAT_FLOW_OP_SPORT_REWRITE)
- {
- if (!r)
- {
- s = format (s, "rewrite: ");
- r = 1;
- }
- s = format (s, "sport %u ", clib_net_to_host_u16 (f->rewrite.sport));
- }
- if (f->ops & NAT_FLOW_OP_DADDR_REWRITE)
+ ip.src_address.as_u32 = i2o_src->as_u32;
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, 0);
+
+ if (sm->num_workers > 1)
{
- if (!r)
- {
- s = format (s, "rewrite: ");
- r = 1;
- }
- s = format (s, "daddr %U ", format_ip4_address, f->rewrite.daddr.as_u8);
+ tsm = vec_elt_at_index (
+ sm->per_thread_data,
+ nat44_ed_get_in2out_worker_index (0, &ip, fib_index, 0));
}
- if (f->ops & NAT_FLOW_OP_DPORT_REWRITE)
+ else
{
- if (!r)
- {
- s = format (s, "rewrite: ");
- r = 1;
- }
- s = format (s, "dport %u ", clib_net_to_host_u16 (f->rewrite.dport));
+ tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
}
- if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
+
+ /* query */
+ clib_bihash_kv_16_8_t kv = { 0 }, value;
+ init_ed_k (&kv, i2o_src->as_u32, i2o_src_port, i2o_dst->as_u32, i2o_dst_port,
+ fib_index, proto);
+ if (tsm->sessions == NULL ||
+ clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
{
- if (!r)
- {
- s = format (s, "rewrite: ");
- r = 1;
- }
- s = format (s, "icmp-id %u ", clib_net_to_host_u16 (f->rewrite.icmp_id));
+ return;
}
- if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
+ s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
+ if (s)
{
- if (!r)
- {
- s = format (s, "rewrite: ");
- r = 1;
- }
- s = format (s, "txfib %u ", f->rewrite.fib_index);
+ *original_dst = s->i2o.rewrite.saddr.as_u32;
+ *original_dst_port = s->i2o.rewrite.sport;
}
- return s;
+ return;
}
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/nat/nat44-ed/nat44_ed.h b/src/plugins/nat/nat44-ed/nat44_ed.h
index b74b46f81b7..706511475cf 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed.h
+++ b/src/plugins/nat/nat44-ed/nat44_ed.h
@@ -12,10 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-/**
- * @file nat.c
- * NAT plugin global declarations
- */
+
#ifndef __included_nat44_ed_h__
#define __included_nat44_ed_h__
@@ -39,6 +36,16 @@
/* default number of worker handoff frame queue elements */
#define NAT_FQ_NELTS_DEFAULT 64
+/* number of attempts to get a port for ED overloading algorithm, if rolling
+ * a dice this many times doesn't produce a free port, it's treated
+ * as if there were no free ports available to conserve resources */
+#define ED_PORT_ALLOC_ATTEMPTS (10)
+
+/* system ports range is 0-1023, first user port is 1024 per
+ * https://www.rfc-editor.org/rfc/rfc6335#section-6
+ */
+#define ED_USER_PORT_OFFSET 1024
+
/* NAT buffer flags */
#define SNAT_FLAG_HAIRPINNING (1 << 0)
@@ -58,16 +65,9 @@ typedef enum nat44_config_flags_t_
typedef struct
{
- /* nat44 plugin features */
- u8 static_mapping_only;
- u8 connection_tracking;
-
u32 inside_vrf;
u32 outside_vrf;
-
- /* maximum number of sessions */
u32 sessions;
-
} nat44_config_t;
typedef enum
@@ -91,46 +91,12 @@ typedef struct
u32 arc_next_index;
} nat_pre_trace_t;
-/* External address and port allocation modes */
-#define foreach_nat_addr_and_port_alloc_alg \
- _(0, DEFAULT, "default") \
- _(1, MAPE, "map-e") \
- _(2, RANGE, "port-range")
-
-typedef enum
-{
-#define _(v, N, s) NAT_ADDR_AND_PORT_ALLOC_ALG_##N = v,
- foreach_nat_addr_and_port_alloc_alg
-#undef _
-} nat_addr_and_port_alloc_alg_t;
-
-/* Session state */
-#define foreach_snat_session_state \
- _(0, UNKNOWN, "unknown") \
- _(1, UDP_ACTIVE, "udp-active") \
- _(2, TCP_SYN_SENT, "tcp-syn-sent") \
- _(3, TCP_ESTABLISHED, "tcp-established") \
- _(4, TCP_FIN_WAIT, "tcp-fin-wait") \
- _(5, TCP_CLOSE_WAIT, "tcp-close-wait") \
- _(6, TCP_CLOSING, "tcp-closing") \
- _(7, TCP_LAST_ACK, "tcp-last-ack") \
- _(8, TCP_CLOSED, "tcp-closed") \
- _(9, ICMP_ACTIVE, "icmp-active")
-
-typedef enum
-{
-#define _(v, N, s) SNAT_SESSION_##N = v,
- foreach_snat_session_state
-#undef _
-} snat_session_state_t;
-
#define foreach_nat_in2out_ed_error \
_ (UNSUPPORTED_PROTOCOL, "unsupported protocol") \
_ (OUT_OF_PORTS, "out of ports") \
_ (BAD_ICMP_TYPE, "unsupported ICMP type") \
_ (MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \
_ (NON_SYN, "non-SYN packet try to create session") \
- _ (TCP_CLOSED, "drops due to TCP in transitory timeout") \
_ (TRNSL_FAILED, "couldn't translate packet")
typedef enum
@@ -160,19 +126,42 @@ typedef enum
NAT_OUT2IN_ED_N_ERROR,
} nat_out2in_ed_error_t;
+typedef enum
+{
+ NAT44_ED_TCP_FLAG_FIN = 0,
+ NAT44_ED_TCP_FLAG_SYN,
+ NAT44_ED_TCP_FLAG_RST,
+ NAT44_ED_TCP_FLAG_ACK,
+ NAT44_ED_TCP_N_FLAG,
+} nat44_ed_tcp_flag_e;
+
+typedef enum
+{
+ NAT44_ED_DIR_I2O = 0,
+ NAT44_ED_DIR_O2I,
+ NAT44_ED_N_DIR,
+} nat44_ed_dir_e;
/* Endpoint dependent TCP session state */
-#define NAT44_SES_I2O_FIN 1
-#define NAT44_SES_O2I_FIN 2
-#define NAT44_SES_I2O_FIN_ACK 4
-#define NAT44_SES_O2I_FIN_ACK 8
-#define NAT44_SES_I2O_SYN 16
-#define NAT44_SES_O2I_SYN 32
-#define NAT44_SES_RST 64
+typedef enum
+{
+ NAT44_ED_TCP_STATE_CLOSED = 0,
+ NAT44_ED_TCP_STATE_ESTABLISHED,
+ NAT44_ED_TCP_STATE_CLOSING,
+ NAT44_ED_TCP_N_STATE,
+} nat44_ed_tcp_state_e;
+
+format_function_t format_ed_session_kvp;
+format_function_t format_snat_session;
+format_function_t format_snat_static_mapping;
+format_function_t format_snat_static_map_to_resolve;
+format_function_t format_nat_ed_translation_error;
+format_function_t format_nat_6t_flow;
+format_function_t format_nat_6t;
+format_function_t format_nat44_ed_tcp_state;
/* Session flags */
#define SNAT_SESSION_FLAG_STATIC_MAPPING (1 << 0)
-#define SNAT_SESSION_FLAG_UNKNOWN_PROTO (1 << 1)
#define SNAT_SESSION_FLAG_LOAD_BALANCING (1 << 2)
#define SNAT_SESSION_FLAG_TWICE_NAT (1 << 3)
#define SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT (1 << 4)
@@ -308,7 +297,7 @@ typedef CLIB_PACKED(struct
u16 port;
} in2out;
- nat_protocol_t nat_proto;
+ ip_protocol_t proto;
nat_6t_flow_t i2o;
nat_6t_flow_t o2i;
@@ -341,10 +330,8 @@ typedef CLIB_PACKED(struct
u16 ext_host_nat_port;
/* TCP session state */
- u8 state;
- u32 i2o_fin_seq;
- u32 o2i_fin_seq;
- u64 tcp_closed_timestamp;
+ u8 tcp_flags[NAT44_ED_N_DIR];
+ nat44_ed_tcp_state_e tcp_state;
/* per vrf sessions index */
u32 per_vrf_sessions_index;
@@ -355,29 +342,14 @@ typedef CLIB_PACKED(struct
typedef struct
{
ip4_address_t addr;
+ ip4_address_t net;
+ u32 sw_if_index;
u32 fib_index;
-#define _(N, i, n, s) \
- u32 busy_##n##_ports; \
- u32 * busy_##n##_ports_per_thread; \
- u32 busy_##n##_port_refcounts[65535];
- foreach_nat_protocol
-#undef _
+ u32 addr_len;
} snat_address_t;
typedef struct
{
- u32 fib_index;
- u32 ref_count;
-} nat_fib_t;
-
-typedef struct
-{
- u32 fib_index;
- u32 refcount;
-} nat_outside_fib_t;
-
-typedef struct
-{
/* backend IP address */
ip4_address_t addr;
/* backend port number */
@@ -412,7 +384,7 @@ typedef enum
typedef struct
{
- /* prefered pool address */
+ /* preferred pool address */
ip4_address_t pool_addr;
/* local IP address */
ip4_address_t local_addr;
@@ -426,7 +398,7 @@ typedef struct
u32 vrf_id;
u32 fib_index;
/* protocol */
- nat_protocol_t proto;
+ ip_protocol_t proto;
/* 0 = disabled, otherwise client IP affinity sticky time in seconds */
u32 affinity;
/* worker threads used by backends/local host */
@@ -449,21 +421,31 @@ typedef struct
typedef struct
{
+ u8 is_resolved;
ip4_address_t l_addr;
ip4_address_t pool_addr;
u16 l_port;
u16 e_port;
u32 sw_if_index;
u32 vrf_id;
- nat_protocol_t proto;
+ ip_protocol_t proto;
u32 flags;
- int addr_only;
- int twice_nat;
- int out2in_only;
- int identity_nat;
- int exact;
u8 *tag;
-} snat_static_map_resolve_t;
+} snat_static_mapping_resolve_t;
+
+typedef struct
+{
+ u8 is_resolved;
+ u8 is_twice_nat;
+ u32 sw_if_index;
+} snat_address_resolve_t;
+
+typedef struct
+{
+ u32 count;
+ u32 sw_if_index;
+ ip4_address_t addr;
+} snat_fib_entry_reg_t;
typedef struct
{
@@ -487,7 +469,7 @@ typedef struct
/* real thread index */
u32 thread_index;
- per_vrf_sessions_t *per_vrf_sessions_vec;
+ per_vrf_sessions_t *per_vrf_sessions_pool;
} snat_main_per_thread_data_t;
@@ -498,17 +480,24 @@ u32 nat44_ed_get_in2out_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
u32 nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
u32 rx_fib_index, u8 is_output);
-/* Return worker thread index for given packet */
-/* NAT address and port allocation function */
-typedef int (nat_alloc_out_addr_and_port_function_t) (snat_address_t *
- addresses,
- u32 fib_index,
- u32 thread_index,
- nat_protocol_t proto,
- ip4_address_t * addr,
- u16 * port,
- u16 port_per_thread,
- u32 snat_thread_index);
+typedef struct nat_fib_s
+{
+ u32 fib_index;
+ u32 ref_count;
+} nat_fib_t;
+
+typedef struct vrf_route_s
+{
+ u32 vrf_id;
+ u32 fib_index;
+} vrf_route_t;
+
+typedef struct vrf_table_s
+{
+ u32 table_vrf_id;
+ u32 table_fib_index;
+ vrf_route_t *routes;
+} vrf_table_t;
typedef struct snat_main_s
{
@@ -521,12 +510,6 @@ typedef struct snat_main_s
/* Per thread data */
snat_main_per_thread_data_t *per_thread_data;
- /* Find a static mapping by local */
- clib_bihash_8_8_t static_mapping_by_local;
-
- /* Find a static mapping by external */
- clib_bihash_8_8_t static_mapping_by_external;
-
/* Static mapping pool */
snat_static_mapping_t *static_mappings;
@@ -537,39 +520,40 @@ typedef struct snat_main_s
/* Endpoint dependent lookup table */
clib_bihash_16_8_t flow_hash;
+ // vector of fibs
+ nat_fib_t *fibs;
+
+ u32 inside_vrf_id;
+ u32 inside_fib_index;
+
+ u32 outside_vrf_id;
+ u32 outside_fib_index;
+
+ // vector of outside fibs
+ nat_fib_t *outside_fibs;
+
+ // VRF routing table for dynamic sessions
+ vrf_table_t *vrf_tables;
+
/* Interface pool */
snat_interface_t *interfaces;
snat_interface_t *output_feature_interfaces;
+ // broken api backward compatibility
+ snat_interface_t *output_feature_dummy_interfaces;
/* Vector of outside addresses */
snat_address_t *addresses;
- /* Address and port allocation function */
- nat_alloc_out_addr_and_port_function_t *alloc_addr_and_port;
- /* Address and port allocation type */
- nat_addr_and_port_alloc_alg_t addr_and_port_alloc_alg;
- /* Port set parameters (MAP-E) */
- u8 psid_offset;
- u8 psid_length;
- u16 psid;
- /* Port range parameters */
- u16 start_port;
- u16 end_port;
-
- /* vector of fibs */
- nat_fib_t *fibs;
-
- /* vector of outside fibs */
- nat_outside_fib_t *outside_fibs;
-
/* Vector of twice NAT addresses for external hosts */
snat_address_t *twice_nat_addresses;
- /* sw_if_indices whose intfc addresses should be auto-added */
- u32 *auto_add_sw_if_indices;
- u32 *auto_add_sw_if_indices_twice_nat;
+ /* first interface address should be auto-added */
+ snat_address_resolve_t *addr_to_resolve;
+
+ /* vector of fib entries */
+ snat_fib_entry_reg_t *fib_entry_reg;
/* vector of interface address static mappings to resolve. */
- snat_static_map_resolve_t *to_resolve;
+ snat_static_mapping_resolve_t *sm_to_resolve;
/* Randomize port allocation order */
u32 random_seed;
@@ -579,20 +563,11 @@ typedef struct snat_main_s
u32 fq_in2out_output_index;
u32 fq_out2in_index;
- u32 out2in_node_index;
- u32 in2out_node_index;
- u32 in2out_output_node_index;
-
nat44_config_t rconfig;
- //nat44_config_t cconfig;
/* If forwarding is enabled */
u8 forwarding_enabled;
- /* static mapping config */
- u8 static_mapping_only;
- u8 static_mapping_connection_tracking;
-
/* Is translation memory size calculated or user defined */
u8 translation_memory_size_set;
@@ -600,11 +575,6 @@ typedef struct snat_main_s
u32 max_translations_per_thread;
u32 *max_translations_per_fib;
- u32 outside_vrf_id;
- u32 outside_fib_index;
- u32 inside_vrf_id;
- u32 inside_fib_index;
-
nat_timeouts_t timeouts;
/* TCP MSS clamping */
@@ -657,24 +627,27 @@ typedef struct snat_main_s
u8 log_level;
/* convenience */
- api_main_t *api_main;
ip4_main_t *ip4_main;
- ip_lookup_main_t *ip4_lookup_main;
fib_source_t fib_src_hi;
fib_source_t fib_src_low;
- /* pat - dynamic mapping enabled or conneciton tracking */
- u8 pat;
-
/* number of worker handoff frame queue elements */
u32 frame_queue_nelts;
/* nat44 plugin enabled */
u8 enabled;
- vnet_main_t *vnet_main;
-
+ /* TCP session state machine table:
+ * first dimension is possible states
+ * second dimension is direction (in2out/out2in)
+ * third dimension is TCP flag (SYN, RST, FIN)
+ *
+ * value is next state to change to
+ */
+ nat44_ed_tcp_state_e tcp_state_change_table[NAT44_ED_TCP_N_STATE]
+ [NAT44_ED_N_DIR]
+ [NAT44_ED_TCP_N_FLAG];
} snat_main_t;
typedef struct
@@ -689,106 +662,109 @@ typedef struct
uword *cached_presence_by_ip4_address;
} snat_runtime_t;
+/*
+ * Why is this here? Because we don't need to touch this layer to
+ * simply reply to an icmp. We need to change id to a unique
+ * value to NAT an echo request/reply.
+ */
+
extern snat_main_t snat_main;
-// nat pre ed next_node feature classification
extern vlib_node_registration_t nat_default_node;
extern vlib_node_registration_t nat_pre_in2out_node;
extern vlib_node_registration_t nat_pre_out2in_node;
-extern vlib_node_registration_t snat_in2out_node;
-extern vlib_node_registration_t snat_in2out_output_node;
-extern vlib_node_registration_t snat_out2in_node;
-extern vlib_node_registration_t snat_in2out_worker_handoff_node;
-extern vlib_node_registration_t snat_in2out_output_worker_handoff_node;
-extern vlib_node_registration_t snat_out2in_worker_handoff_node;
extern vlib_node_registration_t nat44_ed_in2out_node;
extern vlib_node_registration_t nat44_ed_in2out_output_node;
extern vlib_node_registration_t nat44_ed_out2in_node;
-extern fib_source_t nat_fib_src_hi;
-extern fib_source_t nat_fib_src_low;
-
-/* format functions */
-format_function_t format_snat_static_mapping;
-format_function_t format_snat_static_map_to_resolve;
-format_function_t format_snat_session;
-format_function_t format_snat_key;
-format_function_t format_static_mapping_key;
-format_function_t format_nat_protocol;
-format_function_t format_nat_addr_and_port_alloc_alg;
-/* unformat functions */
-unformat_function_t unformat_nat_protocol;
+extern vlib_node_registration_t snat_in2out_worker_handoff_node;
+extern vlib_node_registration_t snat_in2out_output_worker_handoff_node;
+extern vlib_node_registration_t snat_out2in_worker_handoff_node;
/** \brief Check if SNAT session is created from static mapping.
@param s SNAT session
- @return 1 if SNAT session is created from static mapping otherwise 0
-*/
-#define snat_is_session_static(s) (s->flags & SNAT_SESSION_FLAG_STATIC_MAPPING)
-
-/** \brief Check if SNAT session for unknown protocol.
- @param s SNAT session
- @return 1 if SNAT session for unknown protocol otherwise 0
+ @return true if SNAT session is created from static mapping otherwise 0
*/
-#define snat_is_unk_proto_session(s) (s->flags & SNAT_SESSION_FLAG_UNKNOWN_PROTO)
+always_inline bool
+nat44_ed_is_session_static (snat_session_t *s)
+{
+ return s->flags & SNAT_SESSION_FLAG_STATIC_MAPPING;
+}
/** \brief Check if NAT session is twice NAT.
@param s NAT session
- @return 1 if NAT session is twice NAT
+ @return true if NAT session is twice NAT
*/
-#define is_twice_nat_session(s) (s->flags & SNAT_SESSION_FLAG_TWICE_NAT)
+always_inline bool
+nat44_ed_is_twice_nat_session (snat_session_t *s)
+{
+ return s->flags & SNAT_SESSION_FLAG_TWICE_NAT;
+}
/** \brief Check if NAT session is load-balancing.
@param s NAT session
- @return 1 if NAT session is load-balancing
+ @return true if NAT session is load-balancing
*/
-#define is_lb_session(s) (s->flags & SNAT_SESSION_FLAG_LOAD_BALANCING)
+always_inline bool
+nat44_ed_is_lb_session (snat_session_t *s)
+{
+ return s->flags & SNAT_SESSION_FLAG_LOAD_BALANCING;
+}
/** \brief Check if NAT session is forwarding bypass.
@param s NAT session
- @return 1 if NAT session is load-balancing
-*/
-#define is_fwd_bypass_session(s) (s->flags & SNAT_SESSION_FLAG_FWD_BYPASS)
-
-/** \brief Check if NAT session is endpoint dependent.
- @param s NAT session
- @return 1 if NAT session is endpoint dependent
+ @return true if NAT session is load-balancing
*/
-#define is_ed_session(s) (s->flags & SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT)
+always_inline bool
+na44_ed_is_fwd_bypass_session (snat_session_t *s)
+{
+ return s->flags & SNAT_SESSION_FLAG_FWD_BYPASS;
+}
/** \brief Check if NAT session has affinity record.
@param s NAT session
- @return 1 if NAT session has affinity record
+ @return true if NAT session has affinity record
*/
-#define is_affinity_sessions(s) (s->flags & SNAT_SESSION_FLAG_AFFINITY)
+always_inline bool
+nat44_ed_is_affinity_session (snat_session_t *s)
+{
+ return s->flags & SNAT_SESSION_FLAG_AFFINITY;
+}
/** \brief Check if exact pool address should be used.
@param s SNAT session
- @return 1 if exact pool address or 0
+ @return true if exact pool address
*/
-#define is_exact_address_session(s) (s->flags & SNAT_SESSION_FLAG_EXACT_ADDRESS)
+always_inline bool
+nat44_ed_is_exact_address_session (snat_session_t *s)
+{
+ return s->flags & SNAT_SESSION_FLAG_EXACT_ADDRESS;
+}
/** \brief Check if NAT interface is inside.
@param i NAT interface
- @return 1 if inside interface
+ @return true if inside interface
*/
-#define nat_interface_is_inside(i) i->flags & NAT_INTERFACE_FLAG_IS_INSIDE
+always_inline bool
+nat44_ed_is_interface_inside (snat_interface_t *i)
+{
+ return i->flags & NAT_INTERFACE_FLAG_IS_INSIDE;
+}
/** \brief Check if NAT interface is outside.
@param i NAT interface
- @return 1 if outside interface
-*/
-#define nat_interface_is_outside(i) i->flags & NAT_INTERFACE_FLAG_IS_OUTSIDE
-
-/** \brief Check if NAT44 endpoint-dependent TCP session is closed.
- @param s NAT session
- @return 1 if session is closed
+ @return true if outside interface
*/
-#define nat44_is_ses_closed(s) s->state == 0xf
+always_inline bool
+nat44_ed_is_interface_outside (snat_interface_t *i)
+{
+ return i->flags & NAT_INTERFACE_FLAG_IS_OUTSIDE;
+}
/** \brief Check if client initiating TCP connection (received SYN from client)
@param t TCP header
- @return 1 if client initiating TCP connection
+ @return true if client initiating TCP connection
*/
always_inline bool
tcp_flags_is_init (u8 f)
@@ -844,111 +820,59 @@ is_sm_switch_address (u32 f)
return (f & NAT_SM_FLAG_SWITCH_ADDRESS);
}
-/* logging */
#define nat_log_err(...) \
vlib_log(VLIB_LOG_LEVEL_ERR, snat_main.log_class, __VA_ARGS__)
#define nat_log_warn(...) \
vlib_log(VLIB_LOG_LEVEL_WARNING, snat_main.log_class, __VA_ARGS__)
-#define nat_log_notice(...) \
- vlib_log(VLIB_LOG_LEVEL_NOTICE, snat_main.log_class, __VA_ARGS__)
#define nat_log_info(...) \
vlib_log(VLIB_LOG_LEVEL_INFO, snat_main.log_class, __VA_ARGS__)
#define nat_log_debug(...)\
vlib_log(VLIB_LOG_LEVEL_DEBUG, snat_main.log_class, __VA_ARGS__)
+clib_error_t *nat44_api_hookup (vlib_main_t *vm);
+
+int snat_set_workers (uword *bitmap);
+
+int nat44_plugin_enable (nat44_config_t c);
+int nat44_plugin_disable ();
+
+int nat44_ed_add_interface (u32 sw_if_index, u8 is_inside);
+int nat44_ed_del_interface (u32 sw_if_index, u8 is_inside);
+int nat44_ed_add_output_interface (u32 sw_if_index);
+int nat44_ed_del_output_interface (u32 sw_if_index);
+
+int nat44_ed_add_address (ip4_address_t *addr, u32 vrf_id, u8 twice_nat);
+int nat44_ed_del_address (ip4_address_t addr, u8 twice_nat);
+int nat44_ed_add_interface_address (u32 sw_if_index, u8 twice_nat);
+int nat44_ed_del_interface_address (u32 sw_if_index, u8 twice_nat);
+
+int nat44_ed_add_del_vrf_table (u32 table_vrf_id, bool is_add);
+int nat44_ed_add_del_vrf_route (u32 table_vrf_id, u32 vrf_id, bool is_add);
+void nat44_ed_del_vrf_tables ();
+
int nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
- u16 l_port, u16 e_port, nat_protocol_t proto,
+ u16 l_port, u16 e_port, ip_protocol_t proto,
u32 vrf_id, u32 sw_if_index, u32 flags,
ip4_address_t pool_addr, u8 *tag);
int nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
- u16 l_port, u16 e_port, nat_protocol_t proto,
+ u16 l_port, u16 e_port, ip_protocol_t proto,
u32 vrf_id, u32 sw_if_index, u32 flags);
int nat44_ed_add_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
- nat_protocol_t proto,
+ ip_protocol_t proto,
nat44_lb_addr_port_t *locals, u32 flags,
u8 *tag, u32 affinity);
int nat44_ed_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
- nat_protocol_t proto, u32 flags);
+ ip_protocol_t proto, u32 flags);
int nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
ip4_address_t l_addr, u16 l_port,
- nat_protocol_t proto, u32 vrf_id,
+ ip_protocol_t proto, u32 vrf_id,
u8 probability, u8 is_add);
/**
- * @brief Enable NAT44 plugin
- *
- * @param c nat44_config_t
- *
- * @return 0 on success, non-zero value otherwise
- */
-int nat44_plugin_enable (nat44_config_t c);
-
-/**
- * @brief Disable NAT44 plugin
- *
- * @return 0 on success, non-zero value otherwise
- */
-int nat44_plugin_disable ();
-
-/**
- * @brief Add external address to NAT44 pool
- *
- * @param sm snat global configuration data
- * @param addr IPv4 address
- * @param vrf_id VRF id of tenant, ~0 means independent of VRF
- * @param twice_nat 1 if twice NAT address
- *
- * @return 0 on success, non-zero value otherwise
- */
-int snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
- u8 twice_nat);
-
-/**
- * @brief Delete external address from NAT44 pool
- *
- * @param sm snat global configuration data
- * @param addr IPv4 address
- * @param delete_sm 1 if delete static mapping using address
- * @param twice_nat 1 if twice NAT address
- *
- * @return 0 on success, non-zero value otherwise
- */
-int snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
- u8 twice_nat);
-
-clib_error_t *nat44_api_hookup (vlib_main_t * vm);
-
-/**
- * @brief Set NAT plugin workers
- *
- * @param bitmap NAT workers bitmap
- *
- * @return 0 on success, non-zero value otherwise
- */
-int snat_set_workers (uword * bitmap);
-
-int nat44_ed_add_interface (u32 sw_if_index, u8 is_inside);
-int nat44_ed_del_interface (u32 sw_if_index, u8 is_inside);
-int nat44_ed_add_output_interface (u32 sw_if_index);
-int nat44_ed_del_output_interface (u32 sw_if_index);
-
-/**
- * @brief Add/delete NAT44 pool address from specific interface
- *
- * @param sw_if_index software index of the interface
- * @param is_del 1 = delete, 0 = add
- * @param twice_nat 1 = twice NAT address for external hosts
- *
- * @return 0 on success, non-zero value otherwise
- */
-int snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
- u8 twice_nat);
-
-/**
* @brief Delete NAT44 endpoint-dependent session
*
* @param sm snat global configuration data
@@ -960,20 +884,12 @@ int snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
*
* @return 0 on success, non-zero value otherwise
*/
-int nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
- ip4_address_t * eh_addr, u16 eh_port, u8 proto,
+int nat44_ed_del_session (snat_main_t *sm, ip4_address_t *addr, u16 port,
+ ip4_address_t *eh_addr, u16 eh_port, u8 proto,
u32 vrf_id, int is_in);
-/**
- * @brief Free NAT44 session data (lookup keys, external address port)
- *
- * @param sm snat global configuration data
- * @param s NAT session
- * @param thread_index thread index
- * @param is_ha is HA event
- */
-void nat_free_session_data (snat_main_t * sm, snat_session_t * s,
- u32 thread_index, u8 is_ha);
+void nat44_ed_free_session_data (snat_main_t *sm, snat_session_t *s,
+ u32 thread_index, u8 is_ha);
/**
* @brief Set NAT44 session limit (session limit, vrf id)
@@ -993,89 +909,20 @@ int nat44_set_session_limit (u32 session_limit, u32 vrf_id);
*/
int nat44_update_session_limit (u32 session_limit, u32 vrf_id);
-/**
- * @brief Free outside address and port pair
- *
- * @param addresses vector of outside addresses
- * @param thread_index thread index
- * @param key address, port and protocol
- */
-void
-snat_free_outside_address_and_port (snat_address_t * addresses,
- u32 thread_index,
- ip4_address_t * addr,
- u16 port, nat_protocol_t protocol);
-
void expire_per_vrf_sessions (u32 fib_index);
-/**
- * @brief Match NAT44 static mapping.
- *
- * @param key address and port to match
- * @param addr external/local address of the matched mapping
- * @param port port of the matched mapping
- * @param fib_index fib index of the matched mapping
- * @param by_external if 0 match by local address otherwise match by external
- * address
- * @param is_addr_only 1 if matched mapping is address only
- * @param twice_nat matched mapping is twice NAT type
- * @param lb 1 if matched mapping is load-balanced
- * @param ext_host_addr external host address
- * @param is_identity_nat 1 if indentity mapping
- * @param out if !=0 set to pointer of the mapping structure
- *
- * @returns 0 if match found otherwise 1.
- */
-int snat_static_mapping_match (
- vlib_main_t *vm, snat_main_t *sm, ip4_address_t match_addr, u16 match_port,
- u32 match_fib_index, nat_protocol_t match_protocol,
- ip4_address_t *mapping_addr, u16 *mapping_port, u32 *mapping_fib_index,
- u8 by_external, u8 *is_addr_only, twice_nat_type_t *twice_nat,
- lb_nat_type_t *lb, ip4_address_t *ext_host_addr, u8 *is_identity_nat,
- snat_static_mapping_t **out);
-
-/**
- * @brief Add/del NAT address to FIB.
- *
- * Add the external NAT address to the FIB as receive entries. This ensures
- * that VPP will reply to ARP for this address and we don't need to enable
- * proxy ARP on the outside interface.
- *
- * @param addr IPv4 address
- * @param plen address prefix length
- * @param sw_if_index software index of the outside interface
- * @param is_add 0 = delete, 1 = add.
- */
-void snat_add_del_addr_to_fib (ip4_address_t * addr,
- u8 p_len, u32 sw_if_index, int is_add);
-
-int nat_set_outside_address_and_port (snat_address_t *addresses,
- u32 thread_index, ip4_address_t addr,
- u16 port, nat_protocol_t protocol);
-
-/*
- * Why is this here? Because we don't need to touch this layer to
- * simply reply to an icmp. We need to change id to a unique
- * value to NAT an echo request/reply.
- */
-
-typedef struct
-{
- u16 identifier;
- u16 sequence;
-} icmp_echo_header_t;
-
-typedef struct
-{
- u16 src_port, dst_port;
-} tcp_udp_header_t;
+int snat_static_mapping_match (vlib_main_t *vm, ip4_address_t match_addr,
+ u16 match_port, u32 match_fib_index,
+ ip_protocol_t match_protocol,
+ ip4_address_t *mapping_addr, u16 *mapping_port,
+ u32 *mapping_fib_index, int by_external,
+ u8 *is_addr_only, twice_nat_type_t *twice_nat,
+ lb_nat_type_t *lb, ip4_address_t *ext_host_addr,
+ u8 *is_identity_nat,
+ snat_static_mapping_t **out);
u32 get_thread_idx_by_port (u16 e_port);
-u8 *format_static_mapping_kvp (u8 *s, va_list *args);
-
-u8 *format_session_kvp (u8 *s, va_list *args);
-
u32 nat_calc_bihash_buckets (u32 n_elts);
void nat44_addresses_free (snat_address_t **addresses);
@@ -1084,6 +931,28 @@ void nat44_ed_sessions_clear ();
int nat44_ed_set_frame_queue_nelts (u32 frame_queue_nelts);
+void nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f);
+
+snat_static_mapping_t *nat44_ed_sm_i2o_lookup (snat_main_t *sm,
+ ip4_address_t addr, u16 port,
+ u32 fib_index, u8 proto);
+
+snat_static_mapping_t *nat44_ed_sm_o2i_lookup (snat_main_t *sm,
+ ip4_address_t addr, u16 port,
+ u32 fib_index, u8 proto);
+
+void nat_syslog_nat44_sadd (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
+ u16 isport, ip4_address_t *idaddr, u16 idport,
+ ip4_address_t *xsaddr, u16 xsport,
+ ip4_address_t *xdaddr, u16 xdport, u8 proto,
+ u8 is_twicenat);
+
+void nat_syslog_nat44_sdel (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
+ u16 isport, ip4_address_t *idaddr, u16 idport,
+ ip4_address_t *xsaddr, u16 xsport,
+ ip4_address_t *xdaddr, u16 xdport, u8 proto,
+ u8 is_twicenat);
+
typedef enum
{
NAT_ED_TRNSL_ERR_SUCCESS = 0,
@@ -1096,17 +965,11 @@ typedef enum
nat_translation_error_e nat_6t_flow_buf_translate_i2o (
vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
- nat_6t_flow_t *f, nat_protocol_t proto, int is_output_feature);
+ nat_6t_flow_t *f, ip_protocol_t proto, int is_output_feature);
nat_translation_error_e nat_6t_flow_buf_translate_o2i (
vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
- nat_6t_flow_t *f, nat_protocol_t proto, int is_output_feature);
-
-void nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f);
-
-format_function_t format_nat_ed_translation_error;
-format_function_t format_nat_6t_flow;
-format_function_t format_ed_session_kvp;
+ nat_6t_flow_t *f, ip_protocol_t proto, int is_output_feature);
#endif /* __included_nat44_ed_h__ */
/*
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_affinity.c b/src/plugins/nat/nat44-ed/nat44_ed_affinity.c
index 89f11c64ef3..178671c6b7e 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_affinity.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_affinity.c
@@ -37,10 +37,9 @@ format_affinity_kvp (u8 * s, va_list * args)
k.as_u64[1] = v->key[1];
s = format (s, "client %U backend %U:%d proto %U index %llu",
- format_ip4_address, &k.client_addr,
- format_ip4_address, &k.service_addr,
- clib_net_to_host_u16 (k.service_port),
- format_nat_protocol, k.proto);
+ format_ip4_address, &k.client_addr, format_ip4_address,
+ &k.service_addr, clib_net_to_host_u16 (k.service_port),
+ format_ip_protocol, k.proto);
return s;
}
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_api.c b/src/plugins/nat/nat44-ed/nat44_ed_api.c
index 74d48b2d821..1f01410afce 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_api.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_api.c
@@ -31,6 +31,8 @@
#include <nat/nat44-ed/nat44_ed.api_enum.h>
#include <nat/nat44-ed/nat44_ed.api_types.h>
+#include <nat/nat44-ed/nat44_ed_inlines.h>
+
#define REPLY_MSG_ID_BASE sm->msg_id_base
#include <vlibapi/api_helper_macros.h>
@@ -47,15 +49,19 @@ vl_api_nat44_ed_plugin_enable_disable_t_handler (
if (mp->enable)
{
- c.static_mapping_only = mp->flags & NAT44_API_IS_STATIC_MAPPING_ONLY;
- c.connection_tracking = mp->flags & NAT44_API_IS_CONNECTION_TRACKING;
-
- c.inside_vrf = ntohl (mp->inside_vrf);
- c.outside_vrf = ntohl (mp->outside_vrf);
-
- c.sessions = ntohl (mp->sessions);
+ if ((mp->flags & NAT44_API_IS_STATIC_MAPPING_ONLY) ||
+ (mp->flags & NAT44_API_IS_CONNECTION_TRACKING))
+ {
+ rv = VNET_API_ERROR_UNSUPPORTED;
+ }
+ else
+ {
+ c.sessions = ntohl (mp->sessions);
+ c.inside_vrf = ntohl (mp->inside_vrf);
+ c.outside_vrf = ntohl (mp->outside_vrf);
- rv = nat44_plugin_enable (c);
+ rv = nat44_plugin_enable (c);
+ }
}
else
{
@@ -171,21 +177,6 @@ vl_api_nat44_set_session_limit_t_handler (vl_api_nat44_set_session_limit_t *
}
static void
-vl_api_nat_set_log_level_t_handler (vl_api_nat_set_log_level_t * mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat_set_log_level_reply_t *rmp;
- int rv = 0;
-
- if (sm->log_level > NAT_LOG_DEBUG)
- rv = VNET_API_ERROR_UNSUPPORTED;
- else
- sm->log_level = mp->log_level;
-
- REPLY_MACRO (VL_API_NAT_SET_WORKERS_REPLY);
-}
-
-static void
vl_api_nat_ipfix_enable_disable_t_handler (vl_api_nat_ipfix_enable_disable_t *
mp)
{
@@ -217,22 +208,6 @@ vl_api_nat_set_timeouts_t_handler (vl_api_nat_set_timeouts_t * mp)
}
static void
-vl_api_nat_get_timeouts_t_handler (vl_api_nat_get_timeouts_t * mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat_get_timeouts_reply_t *rmp;
- int rv = 0;
-
- REPLY_MACRO2 (VL_API_NAT_GET_TIMEOUTS_REPLY,
- ({
- rmp->udp = htonl (sm->timeouts.udp);
- rmp->tcp_established = htonl (sm->timeouts.tcp.established);
- rmp->tcp_transitory = htonl (sm->timeouts.tcp.transitory);
- rmp->icmp = htonl (sm->timeouts.icmp);
- }))
-}
-
-static void
vl_api_nat_set_mss_clamping_t_handler (vl_api_nat_set_mss_clamping_t * mp)
{
snat_main_t *sm = &snat_main;
@@ -275,12 +250,6 @@ static void
int rv = 0;
u32 *tmp;
- if (sm->static_mapping_only)
- {
- rv = VNET_API_ERROR_FEATURE_DISABLED;
- goto send_reply;
- }
-
is_add = mp->is_add;
twice_nat = mp->flags & NAT_API_IS_TWICE_NAT;
@@ -303,9 +272,13 @@ static void
for (i = 0; i < count; i++)
{
if (is_add)
- rv = snat_add_address (sm, &this_addr, vrf_id, twice_nat);
+ {
+ rv = nat44_ed_add_address (&this_addr, vrf_id, twice_nat);
+ }
else
- rv = snat_del_address (sm, this_addr, 0, twice_nat);
+ {
+ rv = nat44_ed_del_address (this_addr, twice_nat);
+ }
if (rv)
goto send_reply;
@@ -400,9 +373,9 @@ send_nat44_interface_details (snat_interface_t * i,
rmp->_vl_msg_id = ntohs (VL_API_NAT44_INTERFACE_DETAILS + sm->msg_id_base);
rmp->sw_if_index = ntohl (i->sw_if_index);
- if (nat_interface_is_inside (i))
+ if (nat44_ed_is_interface_inside (i))
rmp->flags |= NAT_API_IS_INSIDE;
- if (nat_interface_is_outside (i))
+ if (nat44_ed_is_interface_outside (i))
rmp->flags |= NAT_API_IS_OUTSIDE;
rmp->context = context;
@@ -422,74 +395,76 @@ vl_api_nat44_interface_dump_t_handler (vl_api_nat44_interface_dump_t * mp)
return;
pool_foreach (i, sm->interfaces)
- {
- send_nat44_interface_details(i, reg, mp->context);
- }
+ {
+ send_nat44_interface_details (i, reg, mp->context);
+ }
}
static void
- vl_api_nat44_interface_add_del_output_feature_t_handler
- (vl_api_nat44_interface_add_del_output_feature_t * mp)
+vl_api_nat44_ed_add_del_output_interface_t_handler (
+ vl_api_nat44_ed_add_del_output_interface_t *mp)
{
- vl_api_nat44_interface_add_del_output_feature_reply_t *rmp;
+ vl_api_nat44_ed_add_del_output_interface_reply_t *rmp;
snat_main_t *sm = &snat_main;
- u32 sw_if_index;
int rv = 0;
- VALIDATE_SW_IF_INDEX (mp);
-
- sw_if_index = ntohl (mp->sw_if_index);
+ VALIDATE_SW_IF_INDEX_END (mp);
if (mp->is_add)
{
- rv = nat44_ed_add_output_interface (sw_if_index);
+ rv = nat44_ed_add_output_interface (mp->sw_if_index);
}
else
{
- rv = nat44_ed_del_output_interface (sw_if_index);
+ rv = nat44_ed_del_output_interface (mp->sw_if_index);
}
- BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO (VL_API_NAT44_INTERFACE_ADD_DEL_OUTPUT_FEATURE_REPLY);
+bad_sw_if_index:
+ REPLY_MACRO_END (VL_API_NAT44_ED_ADD_DEL_OUTPUT_INTERFACE_REPLY);
}
+#define vl_endianfun
+#include <nat/nat44-ed/nat44_ed.api.h>
+#undef vl_endianfun
static void
-send_nat44_interface_output_feature_details (snat_interface_t * i,
- vl_api_registration_t * reg,
- u32 context)
+send_nat44_ed_output_interface_details (u32 index, vl_api_registration_t *rp,
+ u32 context)
{
- vl_api_nat44_interface_output_feature_details_t *rmp;
snat_main_t *sm = &snat_main;
-
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id =
- ntohs (VL_API_NAT44_INTERFACE_OUTPUT_FEATURE_DETAILS + sm->msg_id_base);
- rmp->sw_if_index = ntohl (i->sw_if_index);
- rmp->context = context;
-
- if (nat_interface_is_inside (i))
- rmp->flags |= NAT_API_IS_INSIDE;
-
- vl_api_send_msg (reg, (u8 *) rmp);
+ vl_api_nat44_ed_output_interface_details_t *rmp;
+ snat_interface_t *i =
+ pool_elt_at_index (sm->output_feature_interfaces, index);
+
+ /* Make sure every field is initiated (or don't skip the clib_memset()) */
+ REPLY_MACRO_DETAILS4 (
+ VL_API_NAT44_ED_OUTPUT_INTERFACE_DETAILS, rp, context, ({
+ rmp->sw_if_index = i->sw_if_index;
+
+ /* Endian hack until apigen registers _details
+ * endian functions */
+ vl_api_nat44_ed_output_interface_details_t_endian (rmp);
+ rmp->_vl_msg_id = htons (rmp->_vl_msg_id);
+ rmp->context = htonl (rmp->context);
+ }));
}
static void
- vl_api_nat44_interface_output_feature_dump_t_handler
- (vl_api_nat44_interface_output_feature_dump_t * mp)
+vl_api_nat44_ed_output_interface_get_t_handler (
+ vl_api_nat44_ed_output_interface_get_t *mp)
{
- vl_api_registration_t *reg;
+ vl_api_nat44_ed_output_interface_get_reply_t *rmp;
snat_main_t *sm = &snat_main;
- snat_interface_t *i;
+ i32 rv = 0;
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
+ if (pool_elts (sm->output_feature_interfaces) == 0)
+ {
+ REPLY_MACRO (VL_API_NAT44_ED_OUTPUT_INTERFACE_GET_REPLY);
+ return;
+ }
- pool_foreach (i, sm->output_feature_interfaces)
- {
- send_nat44_interface_output_feature_details (i, reg, mp->context);
- }
+ REPLY_AND_DETAILS_MACRO (
+ VL_API_NAT44_ED_OUTPUT_INTERFACE_GET_REPLY, sm->output_feature_interfaces,
+ ({ send_nat44_ed_output_interface_details (cursor, rp, mp->context); }));
}
static void
@@ -504,7 +479,7 @@ static void
ip4_address_t l_addr, e_addr, pool_addr = { 0 };
u32 sw_if_index, flags = 0, vrf_id;
u16 l_port = 0, e_port = 0;
- nat_protocol_t proto = 0;
+ ip_protocol_t proto = 0;
u8 *tag = 0;
memcpy (&l_addr.as_u8, mp->local_ip_address, 4);
@@ -517,7 +492,7 @@ static void
{
l_port = mp->local_port;
e_port = mp->external_port;
- proto = ip_proto_to_nat_proto (mp->protocol);
+ proto = mp->protocol;
}
if (mp->flags & NAT_API_IS_TWICE_NAT)
@@ -578,7 +553,7 @@ static void
ip4_address_t l_addr, e_addr, pool_addr;
u32 sw_if_index, flags = 0, vrf_id;
u16 l_port = 0, e_port = 0;
- nat_protocol_t proto;
+ ip_protocol_t proto;
u8 *tag = 0;
memcpy (&l_addr.as_u8, mp->local_ip_address, 4);
@@ -615,7 +590,7 @@ static void
}
sw_if_index = clib_net_to_host_u32 (mp->external_sw_if_index);
- if (sw_if_index)
+ if (sw_if_index != ~0)
{
flags |= NAT_SM_FLAG_SWITCH_ADDRESS;
}
@@ -624,7 +599,7 @@ static void
memcpy (&e_addr.as_u8, mp->external_ip_address, 4);
}
- proto = ip_proto_to_nat_proto (mp->protocol);
+ proto = mp->protocol;
vrf_id = clib_net_to_host_u32 (mp->vrf_id);
if (mp->is_add)
@@ -688,7 +663,7 @@ send_nat44_static_mapping_details (snat_static_mapping_t * m,
}
else
{
- rmp->protocol = nat_proto_to_ip_proto (m->proto);
+ rmp->protocol = m->proto;
rmp->external_port = m->external_port;
rmp->local_port = m->local_port;
}
@@ -700,9 +675,8 @@ send_nat44_static_mapping_details (snat_static_mapping_t * m,
}
static void
-send_nat44_static_map_resolve_details (snat_static_map_resolve_t * m,
- vl_api_registration_t * reg,
- u32 context)
+send_nat44_static_map_resolve_details (snat_static_mapping_resolve_t *m,
+ vl_api_registration_t *reg, u32 context)
{
vl_api_nat44_static_mapping_details_t *rmp;
snat_main_t *sm = &snat_main;
@@ -716,19 +690,22 @@ send_nat44_static_map_resolve_details (snat_static_map_resolve_t * m,
rmp->vrf_id = htonl (m->vrf_id);
rmp->context = context;
- if (m->twice_nat)
- rmp->flags |= NAT_API_IS_TWICE_NAT;
+ if (is_sm_twice_nat (m->flags))
+ {
+ rmp->flags |= NAT_API_IS_TWICE_NAT;
+ }
- if (m->addr_only)
+ if (is_sm_addr_only (m->flags))
{
rmp->flags |= NAT_API_IS_ADDR_ONLY;
}
else
{
- rmp->protocol = nat_proto_to_ip_proto (m->proto);
+ rmp->protocol = m->proto;
rmp->external_port = m->e_port;
rmp->local_port = m->l_port;
}
+
if (m->tag)
strncpy ((char *) rmp->tag, (char *) m->tag, vec_len (m->tag));
@@ -742,7 +719,7 @@ vl_api_nat44_static_mapping_dump_t_handler (vl_api_nat44_static_mapping_dump_t
vl_api_registration_t *reg;
snat_main_t *sm = &snat_main;
snat_static_mapping_t *m;
- snat_static_map_resolve_t *rp;
+ snat_static_mapping_resolve_t *rp;
int j;
reg = vl_api_client_index_to_registration (mp->client_index);
@@ -755,10 +732,10 @@ vl_api_nat44_static_mapping_dump_t_handler (vl_api_nat44_static_mapping_dump_t
send_nat44_static_mapping_details (m, reg, mp->context);
}
- for (j = 0; j < vec_len (sm->to_resolve); j++)
+ for (j = 0; j < vec_len (sm->sm_to_resolve); j++)
{
- rp = sm->to_resolve + j;
- if (!rp->identity_nat)
+ rp = sm->sm_to_resolve + j;
+ if (!is_sm_identity_nat (rp->flags))
send_nat44_static_map_resolve_details (rp, reg, mp->context);
}
}
@@ -774,7 +751,7 @@ static void
ip4_address_t addr, pool_addr = { 0 };
u32 sw_if_index, flags, vrf_id;
- nat_protocol_t proto = 0;
+ ip_protocol_t proto = 0;
u16 port = 0;
u8 *tag = 0;
@@ -787,7 +764,7 @@ static void
else
{
port = mp->port;
- proto = ip_proto_to_nat_proto (mp->protocol);
+ proto = mp->protocol;
}
sw_if_index = clib_net_to_host_u32 (mp->sw_if_index);
@@ -840,7 +817,7 @@ send_nat44_identity_mapping_details (snat_static_mapping_t * m, int index,
rmp->port = m->local_port;
rmp->sw_if_index = ~0;
rmp->vrf_id = htonl (local->vrf_id);
- rmp->protocol = nat_proto_to_ip_proto (m->proto);
+ rmp->protocol = m->proto;
rmp->context = context;
if (m->tag)
strncpy ((char *) rmp->tag, (char *) m->tag, vec_len (m->tag));
@@ -849,8 +826,8 @@ send_nat44_identity_mapping_details (snat_static_mapping_t * m, int index,
}
static void
-send_nat44_identity_map_resolve_details (snat_static_map_resolve_t * m,
- vl_api_registration_t * reg,
+send_nat44_identity_map_resolve_details (snat_static_mapping_resolve_t *m,
+ vl_api_registration_t *reg,
u32 context)
{
vl_api_nat44_identity_mapping_details_t *rmp;
@@ -861,13 +838,13 @@ send_nat44_identity_map_resolve_details (snat_static_map_resolve_t * m,
rmp->_vl_msg_id =
ntohs (VL_API_NAT44_IDENTITY_MAPPING_DETAILS + sm->msg_id_base);
- if (m->addr_only)
+ if (is_sm_addr_only (m->flags))
rmp->flags = (vl_api_nat_config_flags_t) NAT_API_IS_ADDR_ONLY;
rmp->port = m->l_port;
rmp->sw_if_index = htonl (m->sw_if_index);
rmp->vrf_id = htonl (m->vrf_id);
- rmp->protocol = nat_proto_to_ip_proto (m->proto);
+ rmp->protocol = m->proto;
rmp->context = context;
if (m->tag)
strncpy ((char *) rmp->tag, (char *) m->tag, vec_len (m->tag));
@@ -882,7 +859,7 @@ static void
vl_api_registration_t *reg;
snat_main_t *sm = &snat_main;
snat_static_mapping_t *m;
- snat_static_map_resolve_t *rp;
+ snat_static_mapping_resolve_t *rp;
int j;
reg = vl_api_client_index_to_registration (mp->client_index);
@@ -890,20 +867,20 @@ static void
return;
pool_foreach (m, sm->static_mappings)
- {
- if (is_sm_identity_nat (m->flags) && !is_sm_lb (m->flags))
- {
- pool_foreach_index (j, m->locals)
- {
- send_nat44_identity_mapping_details (m, j, reg, mp->context);
- }
- }
- }
+ {
+ if (is_sm_identity_nat (m->flags) && !is_sm_lb (m->flags))
+ {
+ pool_foreach_index (j, m->locals)
+ {
+ send_nat44_identity_mapping_details (m, j, reg, mp->context);
+ }
+ }
+ }
- for (j = 0; j < vec_len (sm->to_resolve); j++)
+ for (j = 0; j < vec_len (sm->sm_to_resolve); j++)
{
- rp = sm->to_resolve + j;
- if (rp->identity_nat)
+ rp = sm->sm_to_resolve + j;
+ if (is_sm_identity_nat (rp->flags))
send_nat44_identity_map_resolve_details (rp, reg, mp->context);
}
}
@@ -915,25 +892,24 @@ static void
snat_main_t *sm = &snat_main;
vl_api_nat44_add_del_interface_addr_reply_t *rmp;
u32 sw_if_index = ntohl (mp->sw_if_index);
+ u8 twice_nat;
int rv = 0;
- u8 is_del;
-
- if (sm->static_mapping_only)
- {
- rv = VNET_API_ERROR_FEATURE_DISABLED;
- goto send_reply;
- }
-
- is_del = !mp->is_add;
VALIDATE_SW_IF_INDEX (mp);
- rv = snat_add_interface_address (sm, sw_if_index, is_del,
- mp->flags & NAT_API_IS_TWICE_NAT);
+ twice_nat = mp->flags & NAT_API_IS_TWICE_NAT;
+
+ if (mp->is_add)
+ {
+ rv = nat44_ed_add_interface_address (sw_if_index, twice_nat);
+ }
+ else
+ {
+ rv = nat44_ed_del_interface_address (sw_if_index, twice_nat);
+ }
BAD_SW_IF_INDEX_LABEL;
-send_reply:
REPLY_MACRO (VL_API_NAT44_ADD_DEL_INTERFACE_ADDR_REPLY);
}
@@ -962,21 +938,18 @@ static void
vl_api_nat44_interface_addr_dump_t_handler (vl_api_nat44_interface_addr_dump_t
* mp)
{
- vl_api_registration_t *reg;
snat_main_t *sm = &snat_main;
- u32 *i;
+ vl_api_registration_t *reg;
+ snat_address_resolve_t *ap;
reg = vl_api_client_index_to_registration (mp->client_index);
if (!reg)
return;
- vec_foreach (i, sm->auto_add_sw_if_indices)
- {
- send_nat44_interface_addr_details (*i, reg, mp->context, 0);
- }
- vec_foreach (i, sm->auto_add_sw_if_indices_twice_nat)
+ vec_foreach (ap, sm->addr_to_resolve)
{
- send_nat44_interface_addr_details (*i, reg, mp->context, 1);
+ send_nat44_interface_addr_details (ap->sw_if_index, reg, mp->context,
+ ap->is_twice_nat);
}
}
@@ -1010,7 +983,7 @@ vl_api_nat44_add_del_lb_static_mapping_t_handler (
vl_api_nat44_add_del_lb_static_mapping_reply_t *rmp;
nat44_lb_addr_port_t *locals = 0;
ip4_address_t e_addr;
- nat_protocol_t proto;
+ ip_protocol_t proto;
u32 flags = 0;
u8 *tag = 0;
int rv = 0;
@@ -1018,7 +991,7 @@ vl_api_nat44_add_del_lb_static_mapping_t_handler (
locals = unformat_nat44_lb_addr_port (mp->locals,
clib_net_to_host_u32 (mp->local_num));
clib_memcpy (&e_addr, mp->external_addr, 4);
- proto = ip_proto_to_nat_proto (mp->protocol);
+ proto = mp->protocol;
if (mp->flags & NAT_API_IS_TWICE_NAT)
{
@@ -1063,11 +1036,11 @@ vl_api_nat44_lb_static_mapping_add_del_local_t_handler (
vl_api_nat44_lb_static_mapping_add_del_local_reply_t *rmp;
int rv = 0;
ip4_address_t e_addr, l_addr;
- nat_protocol_t proto;
+ ip_protocol_t proto;
clib_memcpy (&e_addr, mp->external_addr, 4);
clib_memcpy (&l_addr, mp->local.addr, 4);
- proto = ip_proto_to_nat_proto (mp->protocol);
+ proto = mp->protocol;
rv = nat44_ed_add_del_lb_static_mapping_local (
e_addr, mp->external_port, l_addr, mp->local.port, proto,
@@ -1089,13 +1062,14 @@ send_nat44_lb_static_mapping_details (snat_static_mapping_t *m,
rmp = vl_msg_api_alloc (
sizeof (*rmp) + (pool_elts (m->locals) * sizeof (nat44_lb_addr_port_t)));
+
clib_memset (rmp, 0, sizeof (*rmp));
rmp->_vl_msg_id =
ntohs (VL_API_NAT44_LB_STATIC_MAPPING_DETAILS + sm->msg_id_base);
clib_memcpy (rmp->external_addr, &(m->external_addr), 4);
rmp->external_port = m->external_port;
- rmp->protocol = nat_proto_to_ip_proto (m->proto);
+ rmp->protocol = m->proto;
rmp->context = context;
if (is_sm_self_twice_nat (m->flags))
@@ -1169,7 +1143,7 @@ vl_api_nat44_del_session_t_handler (vl_api_nat44_del_session_t *mp)
is_in = mp->flags & NAT_API_IS_INSIDE;
- rv = nat44_del_ed_session (sm, &addr, port, &eh_addr, eh_port, mp->protocol,
+ rv = nat44_ed_del_session (sm, &addr, port, &eh_addr, eh_port, mp->protocol,
vrf_id, is_in);
REPLY_MACRO (VL_API_NAT44_DEL_SESSION_REPLY);
@@ -1187,253 +1161,160 @@ vl_api_nat44_forwarding_enable_disable_t_handler (
}
static void
-vl_api_nat44_forwarding_is_enabled_t_handler (
- vl_api_nat44_forwarding_is_enabled_t *mp)
+vl_api_nat44_show_running_config_t_handler (
+ vl_api_nat44_show_running_config_t *mp)
{
- vl_api_registration_t *reg;
+ vl_api_nat44_show_running_config_reply_t *rmp;
snat_main_t *sm = &snat_main;
- vl_api_nat44_forwarding_is_enabled_reply_t *rmp;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
+ nat44_config_t *rc = &sm->rconfig;
+ int rv = 0;
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id =
- ntohs (VL_API_NAT44_FORWARDING_IS_ENABLED_REPLY + sm->msg_id_base);
- rmp->context = mp->context;
+ REPLY_MACRO2_ZERO (
+ VL_API_NAT44_SHOW_RUNNING_CONFIG_REPLY, ({
+ rmp->inside_vrf = htonl (rc->inside_vrf);
+ rmp->outside_vrf = htonl (rc->outside_vrf);
- rmp->enabled = sm->forwarding_enabled;
+ rmp->sessions = htonl (rc->sessions);
+ rmp->translation_buckets = htonl (sm->translation_buckets);
- vl_api_send_msg (reg, (u8 *) rmp);
-}
+ // OBSOLETE
+ rmp->users = 0;
+ rmp->user_buckets = 0;
+ rmp->user_sessions = 0;
-/* Obsolete calls hold back because of deprecation
- * should not be used */
+ rmp->timeouts.udp = htonl (sm->timeouts.udp);
+ rmp->timeouts.tcp_established = htonl (sm->timeouts.tcp.established);
+ rmp->timeouts.tcp_transitory = htonl (sm->timeouts.tcp.transitory);
+ rmp->timeouts.icmp = htonl (sm->timeouts.icmp);
-static void
-vl_api_nat_set_addr_and_port_alloc_alg_t_handler (
- vl_api_nat_set_addr_and_port_alloc_alg_t *mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat_set_addr_and_port_alloc_alg_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_SET_ADDR_AND_PORT_ALLOC_ALG_REPLY);
+ rmp->forwarding_enabled = sm->forwarding_enabled == 1;
+ // consider how to split functionality between subplugins
+ rmp->ipfix_logging_enabled = nat_ipfix_logging_enabled ();
+ rmp->flags |= NAT44_IS_ENDPOINT_DEPENDENT;
+ }));
}
static void
-vl_api_nat_get_addr_and_port_alloc_alg_t_handler (
- vl_api_nat_get_addr_and_port_alloc_alg_t *mp)
+vl_api_nat44_ed_add_del_vrf_table_t_handler (
+ vl_api_nat44_ed_add_del_vrf_table_t *mp)
{
snat_main_t *sm = &snat_main;
- vl_api_nat_get_addr_and_port_alloc_alg_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_GET_ADDR_AND_PORT_ALLOC_ALG_REPLY);
+ vl_api_nat44_ed_add_del_vrf_table_reply_t *rmp;
+ int rv = nat44_ed_add_del_vrf_table (clib_net_to_host_u32 (mp->table_vrf_id),
+ mp->is_add);
+ REPLY_MACRO (VL_API_NAT44_ED_ADD_DEL_VRF_TABLE_REPLY);
}
static void
-vl_api_nat_ha_set_listener_t_handler (vl_api_nat_ha_set_listener_t *mp)
+vl_api_nat44_ed_add_del_vrf_route_t_handler (
+ vl_api_nat44_ed_add_del_vrf_route_t *mp)
{
snat_main_t *sm = &snat_main;
- vl_api_nat_ha_set_listener_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_HA_SET_LISTENER_REPLY);
+ vl_api_nat44_ed_add_del_vrf_route_reply_t *rmp;
+ int rv =
+ nat44_ed_add_del_vrf_route (clib_net_to_host_u32 (mp->table_vrf_id),
+ clib_net_to_host_u32 (mp->vrf_id), mp->is_add);
+ REPLY_MACRO (VL_API_NAT44_ED_ADD_DEL_VRF_ROUTE_REPLY);
}
static void
-vl_api_nat_ha_get_listener_t_handler (vl_api_nat_ha_get_listener_t *mp)
+nat44_ed_vrf_tables_send_details (vl_api_registration_t *rp, u32 context,
+ vrf_table_t *t)
{
snat_main_t *sm = &snat_main;
- vl_api_nat_ha_get_listener_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_HA_GET_LISTENER_REPLY);
-}
+ vl_api_nat44_ed_vrf_tables_details_t *mp;
-static void
-vl_api_nat_ha_set_failover_t_handler (vl_api_nat_ha_set_failover_t *mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat_ha_set_failover_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_HA_SET_FAILOVER_REPLY);
-}
+ u32 *vrf_ids = 0;
+ vrf_route_t *r;
-static void
-vl_api_nat_ha_get_failover_t_handler (vl_api_nat_ha_get_failover_t *mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat_ha_get_failover_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_HA_GET_FAILOVER_REPLY);
-}
+ mp = vl_msg_api_alloc_zero (sizeof (*mp) +
+ sizeof (mp->vrf_ids[0]) * vec_len (t->routes));
+ mp->_vl_msg_id =
+ ntohs (VL_API_NAT44_ED_VRF_TABLES_DETAILS + sm->msg_id_base);
+ mp->context = context;
+ mp->n_vrf_ids = clib_host_to_net_u32 (vec_len (t->routes));
-static void
-vl_api_nat_ha_flush_t_handler (vl_api_nat_ha_flush_t *mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat_ha_flush_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_HA_FLUSH_REPLY);
-}
+ pool_foreach (r, t->routes)
+ {
+ vec_add1 (vrf_ids, r->vrf_id);
+ }
-static void
-vl_api_nat_ha_resync_t_handler (vl_api_nat_ha_resync_t *mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat_ha_resync_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_HA_RESYNC_REPLY);
-}
+ // copy the records
+ clib_memcpy (mp->vrf_ids, vrf_ids,
+ sizeof (mp->vrf_ids[0]) * vec_len (t->routes));
-static void
-vl_api_nat44_del_user_t_handler (vl_api_nat44_del_user_t *mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat44_del_user_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT44_DEL_USER_REPLY);
-}
+ vec_free (vrf_ids);
-static void
-vl_api_nat44_session_cleanup_t_handler (vl_api_nat44_session_cleanup_t *mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat44_session_cleanup_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT44_SESSION_CLEANUP_REPLY);
+ // send the message
+ vl_api_send_msg (rp, (u8 *) mp);
}
static void
-vl_api_nat44_plugin_enable_disable_t_handler (
- vl_api_nat44_plugin_enable_disable_t *mp)
+nat44_ed_vrf_tables_send_details_v2 (vl_api_registration_t *rp, u32 context,
+ vrf_table_t *t)
{
snat_main_t *sm = &snat_main;
- nat44_config_t c = { 0 };
- vl_api_nat44_plugin_enable_disable_reply_t *rmp;
- int rv = 0;
+ vl_api_nat44_ed_vrf_tables_v2_details_t *mp;
- if (mp->enable)
- {
- if (mp->users || mp->user_sessions)
- {
- rv = VNET_API_ERROR_UNSUPPORTED;
- }
- else
- {
- c.static_mapping_only = mp->flags & NAT44_API_IS_STATIC_MAPPING_ONLY;
- c.connection_tracking = mp->flags & NAT44_API_IS_CONNECTION_TRACKING;
+ u32 *vrf_ids = 0;
+ vrf_route_t *r;
- c.inside_vrf = ntohl (mp->inside_vrf);
- c.outside_vrf = ntohl (mp->outside_vrf);
-
- c.sessions = ntohl (mp->sessions);
-
- rv = nat44_plugin_enable (c);
- }
- }
- else
+ mp = vl_msg_api_alloc_zero (sizeof (*mp) +
+ sizeof (mp->vrf_ids[0]) * vec_len (t->routes));
+ mp->_vl_msg_id = clib_net_to_host_u16 (VL_API_NAT44_ED_VRF_TABLES_DETAILS +
+ sm->msg_id_base);
+ mp->context = context;
+ mp->n_vrf_ids = clib_net_to_host_u32 (vec_len (t->routes));
+ mp->table_vrf_id = clib_net_to_host_u32 (t->table_vrf_id);
+ pool_foreach (r, t->routes)
{
- rv = nat44_plugin_disable ();
+ vec_add1 (vrf_ids, clib_net_to_host_u32 (r->vrf_id));
}
- REPLY_MACRO (VL_API_NAT44_PLUGIN_ENABLE_DISABLE_REPLY);
-}
+ // copy the records
+ clib_memcpy (mp->vrf_ids, vrf_ids,
+ sizeof (mp->vrf_ids[0]) * vec_len (t->routes));
-static void
-vl_api_nat_control_ping_t_handler (vl_api_nat_control_ping_t *mp)
-{
- vl_api_nat_control_ping_reply_t *rmp;
- snat_main_t *sm = &snat_main;
- int rv = 0;
+ vec_free (vrf_ids);
- REPLY_MACRO2 (VL_API_NAT_CONTROL_PING_REPLY,
- ({ rmp->vpe_pid = ntohl (getpid ()); }));
+ // send the message
+ vl_api_send_msg (rp, (u8 *) mp);
}
static void
-vl_api_nat_show_config_t_handler (vl_api_nat_show_config_t *mp)
+vl_api_nat44_ed_vrf_tables_dump_t_handler (
+ vl_api_nat44_ed_vrf_tables_dump_t *mp)
{
- vl_api_nat_show_config_reply_t *rmp;
snat_main_t *sm = &snat_main;
- int rv = 0;
+ vl_api_registration_t *rp;
+ vrf_table_t *t;
- REPLY_MACRO2_ZERO (VL_API_NAT_SHOW_CONFIG_REPLY, ({
- rmp->translation_buckets =
- htonl (sm->translation_buckets);
- rmp->user_buckets = 0;
- rmp->max_translations_per_user = 0;
- rmp->outside_vrf_id = htonl (sm->outside_vrf_id);
- rmp->inside_vrf_id = htonl (sm->inside_vrf_id);
- rmp->static_mapping_only = sm->static_mapping_only;
- rmp->static_mapping_connection_tracking =
- sm->static_mapping_connection_tracking;
- rmp->endpoint_dependent = 1;
- rmp->out2in_dpo = 0;
- }));
-}
-
-static void
-vl_api_nat_show_config_2_t_handler (vl_api_nat_show_config_2_t *mp)
-{
- vl_api_nat_show_config_2_reply_t *rmp;
- snat_main_t *sm = &snat_main;
- int rv = 0;
+ rp = vl_api_client_index_to_registration (mp->client_index);
+ if (rp == 0)
+ return;
- REPLY_MACRO2_ZERO (
- VL_API_NAT_SHOW_CONFIG_2_REPLY, ({
- rmp->translation_buckets = htonl (sm->translation_buckets);
- rmp->user_buckets = 0;
- rmp->max_translations_per_user = 0;
- rmp->outside_vrf_id = htonl (sm->outside_vrf_id);
- rmp->inside_vrf_id = htonl (sm->inside_vrf_id);
- rmp->static_mapping_only = sm->static_mapping_only;
- rmp->static_mapping_connection_tracking =
- sm->static_mapping_connection_tracking;
- rmp->endpoint_dependent = 1;
- rmp->out2in_dpo = 0;
- rmp->max_translations_per_thread =
- clib_net_to_host_u32 (sm->max_translations_per_thread);
- rmp->max_users_per_thread = 0;
- }));
+ pool_foreach (t, sm->vrf_tables)
+ {
+ nat44_ed_vrf_tables_send_details (rp, mp->context, t);
+ }
}
static void
-vl_api_nat44_show_running_config_t_handler (
- vl_api_nat44_show_running_config_t *mp)
+vl_api_nat44_ed_vrf_tables_v2_dump_t_handler (
+ vl_api_nat44_ed_vrf_tables_v2_dump_t *mp)
{
- vl_api_nat44_show_running_config_reply_t *rmp;
snat_main_t *sm = &snat_main;
- nat44_config_t *rc = &sm->rconfig;
- int rv = 0;
-
- REPLY_MACRO2_ZERO (
- VL_API_NAT44_SHOW_RUNNING_CONFIG_REPLY, ({
- rmp->inside_vrf = htonl (rc->inside_vrf);
- rmp->outside_vrf = htonl (rc->outside_vrf);
+ vl_api_registration_t *rp;
+ vrf_table_t *t;
- rmp->sessions = htonl (rc->sessions);
- rmp->translation_buckets = htonl (sm->translation_buckets);
-
- // OBSOLETE
- rmp->users = 0;
- rmp->user_buckets = 0;
- rmp->user_sessions = 0;
-
- rmp->timeouts.udp = htonl (sm->timeouts.udp);
- rmp->timeouts.tcp_established = htonl (sm->timeouts.tcp.established);
- rmp->timeouts.tcp_transitory = htonl (sm->timeouts.tcp.transitory);
- rmp->timeouts.icmp = htonl (sm->timeouts.icmp);
+ rp = vl_api_client_index_to_registration (mp->client_index);
+ if (rp == 0)
+ return;
- rmp->forwarding_enabled = sm->forwarding_enabled == 1;
- // consider how to split functionality between subplugins
- rmp->ipfix_logging_enabled = nat_ipfix_logging_enabled ();
- rmp->flags |= NAT44_IS_ENDPOINT_DEPENDENT;
- if (rc->static_mapping_only)
- rmp->flags |= NAT44_IS_STATIC_MAPPING_ONLY;
- if (rc->connection_tracking)
- rmp->flags |= NAT44_IS_CONNECTION_TRACKING;
- }));
+ pool_foreach (t, sm->vrf_tables)
+ {
+ nat44_ed_vrf_tables_send_details_v2 (rp, mp->context, t);
+ }
}
/* user (internal host) key */
@@ -1515,7 +1396,7 @@ nat_ed_user_create_helper (user_create_helper_t *uch, snat_session_t *s)
{
u = pool_elt_at_index (uch->users, value.value);
}
- if (snat_is_session_static (s))
+ if (nat44_ed_is_session_static (s))
{
++u->nstaticsessions;
}
@@ -1598,40 +1479,27 @@ send_nat44_user_session_details (snat_session_t * s,
clib_memcpy (rmp->outside_ip_address, (&s->out2in.addr), 4);
clib_memcpy (rmp->inside_ip_address, (&s->in2out.addr), 4);
- if (snat_is_session_static (s))
+ if (nat44_ed_is_session_static (s))
rmp->flags |= NAT_API_IS_STATIC;
- if (is_twice_nat_session (s))
+ if (nat44_ed_is_twice_nat_session (s))
rmp->flags |= NAT_API_IS_TWICE_NAT;
- if (is_ed_session (s) || is_fwd_bypass_session (s))
- rmp->flags |= NAT_API_IS_EXT_HOST_VALID;
+ rmp->flags |= NAT_API_IS_EXT_HOST_VALID;
rmp->last_heard = clib_host_to_net_u64 ((u64) s->last_heard);
rmp->total_bytes = clib_host_to_net_u64 (s->total_bytes);
rmp->total_pkts = ntohl (s->total_pkts);
rmp->context = context;
- if (snat_is_unk_proto_session (s))
- {
- rmp->outside_port = 0;
- rmp->inside_port = 0;
- rmp->protocol = ntohs (s->in2out.port);
- }
- else
+ rmp->outside_port = s->out2in.port;
+ rmp->inside_port = s->in2out.port;
+ rmp->protocol = clib_host_to_net_u16 (s->proto);
+ clib_memcpy (rmp->ext_host_address, &s->ext_host_addr, 4);
+ rmp->ext_host_port = s->ext_host_port;
+ if (nat44_ed_is_twice_nat_session (s))
{
- rmp->outside_port = s->out2in.port;
- rmp->inside_port = s->in2out.port;
- rmp->protocol = ntohs (nat_proto_to_ip_proto (s->nat_proto));
- }
- if (is_ed_session (s) || is_fwd_bypass_session (s))
- {
- clib_memcpy (rmp->ext_host_address, &s->ext_host_addr, 4);
- rmp->ext_host_port = s->ext_host_port;
- if (is_twice_nat_session (s))
- {
- clib_memcpy (rmp->ext_host_nat_address, &s->ext_host_nat_addr, 4);
- rmp->ext_host_nat_port = s->ext_host_nat_port;
- }
+ clib_memcpy (rmp->ext_host_nat_address, &s->ext_host_nat_addr, 4);
+ rmp->ext_host_nat_port = s->ext_host_nat_port;
}
vl_api_send_msg (reg, (u8 *) rmp);
@@ -1670,6 +1538,167 @@ vl_api_nat44_user_session_dump_t_handler (vl_api_nat44_user_session_dump_t *
}
}
+static void
+send_nat44_user_session_v2_details (snat_session_t *s,
+ vl_api_registration_t *reg, u32 context)
+{
+ vl_api_nat44_user_session_v2_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ u64 now = vlib_time_now (vnm->vlib_main);
+ u64 sess_timeout_time = 0;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_NAT44_USER_SESSION_V2_DETAILS + sm->msg_id_base);
+ clib_memcpy (rmp->outside_ip_address, (&s->out2in.addr), 4);
+ clib_memcpy (rmp->inside_ip_address, (&s->in2out.addr), 4);
+
+ if (nat44_ed_is_session_static (s))
+ rmp->flags |= NAT_API_IS_STATIC;
+
+ if (nat44_ed_is_twice_nat_session (s))
+ rmp->flags |= NAT_API_IS_TWICE_NAT;
+
+ rmp->flags |= NAT_API_IS_EXT_HOST_VALID;
+
+ rmp->last_heard = clib_host_to_net_u64 ((u64) s->last_heard);
+ rmp->total_bytes = clib_host_to_net_u64 (s->total_bytes);
+ rmp->total_pkts = ntohl (s->total_pkts);
+ rmp->context = context;
+ rmp->outside_port = s->out2in.port;
+ rmp->inside_port = s->in2out.port;
+ rmp->protocol = clib_host_to_net_u16 (s->proto);
+ clib_memcpy (rmp->ext_host_address, &s->ext_host_addr, 4);
+ rmp->ext_host_port = s->ext_host_port;
+ if (nat44_ed_is_twice_nat_session (s))
+ {
+ clib_memcpy (rmp->ext_host_nat_address, &s->ext_host_nat_addr, 4);
+ rmp->ext_host_nat_port = s->ext_host_nat_port;
+ }
+
+ sess_timeout_time = s->last_heard + nat44_session_get_timeout (sm, s);
+ rmp->is_timed_out = (now >= sess_timeout_time);
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+send_nat44_user_session_v3_details (snat_session_t *s,
+ vl_api_registration_t *reg, u32 context)
+{
+ vl_api_nat44_user_session_v3_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+ u64 now = vlib_time_now (vlib_get_main ());
+ u64 sess_timeout_time = 0;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_NAT44_USER_SESSION_V3_DETAILS + sm->msg_id_base);
+ clib_memcpy (rmp->outside_ip_address, (&s->out2in.addr), 4);
+ clib_memcpy (rmp->inside_ip_address, (&s->in2out.addr), 4);
+
+ if (nat44_ed_is_session_static (s))
+ rmp->flags |= NAT_API_IS_STATIC;
+
+ if (nat44_ed_is_twice_nat_session (s))
+ rmp->flags |= NAT_API_IS_TWICE_NAT;
+
+ rmp->flags |= NAT_API_IS_EXT_HOST_VALID;
+
+ rmp->last_heard = clib_host_to_net_u64 ((u64) s->last_heard);
+ rmp->time_since_last_heard =
+ clib_host_to_net_u64 ((u64) (now - s->last_heard));
+ rmp->total_bytes = clib_host_to_net_u64 (s->total_bytes);
+ rmp->total_pkts = ntohl (s->total_pkts);
+ rmp->context = context;
+ rmp->outside_port = s->out2in.port;
+ rmp->inside_port = s->in2out.port;
+ rmp->protocol = clib_host_to_net_u16 (s->proto);
+ clib_memcpy (rmp->ext_host_address, &s->ext_host_addr, 4);
+ rmp->ext_host_port = s->ext_host_port;
+ if (nat44_ed_is_twice_nat_session (s))
+ {
+ clib_memcpy (rmp->ext_host_nat_address, &s->ext_host_nat_addr, 4);
+ rmp->ext_host_nat_port = s->ext_host_nat_port;
+ }
+
+ sess_timeout_time = s->last_heard + nat44_session_get_timeout (sm, s);
+ rmp->is_timed_out = (now >= sess_timeout_time);
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_nat44_user_session_v2_dump_t_handler (
+ vl_api_nat44_user_session_v2_dump_t *mp)
+{
+ snat_main_per_thread_data_t *tsm;
+ snat_main_t *sm = &snat_main;
+ vl_api_registration_t *reg;
+ snat_user_key_t ukey;
+ snat_session_t *s;
+ ip4_header_t ip;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ clib_memcpy (&ukey.addr, mp->ip_address, 4);
+ ip.src_address.as_u32 = ukey.addr.as_u32;
+ ukey.fib_index = fib_table_find (FIB_PROTOCOL_IP4, ntohl (mp->vrf_id));
+ if (sm->num_workers > 1)
+ tsm = vec_elt_at_index (
+ sm->per_thread_data,
+ nat44_ed_get_in2out_worker_index (0, &ip, ukey.fib_index, 0));
+ else
+ tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
+
+ pool_foreach (s, tsm->sessions)
+ {
+ if (s->in2out.addr.as_u32 == ukey.addr.as_u32)
+ {
+ send_nat44_user_session_v2_details (s, reg, mp->context);
+ }
+ }
+}
+
+static void
+vl_api_nat44_user_session_v3_dump_t_handler (
+ vl_api_nat44_user_session_v3_dump_t *mp)
+{
+ snat_main_per_thread_data_t *tsm;
+ snat_main_t *sm = &snat_main;
+ vl_api_registration_t *reg;
+ snat_user_key_t ukey;
+ snat_session_t *s;
+ ip4_header_t ip;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ clib_memcpy (&ukey.addr, mp->ip_address, 4);
+ ip.src_address.as_u32 = ukey.addr.as_u32;
+ ukey.fib_index = fib_table_find (FIB_PROTOCOL_IP4, ntohl (mp->vrf_id));
+ if (sm->num_workers > 1)
+ tsm = vec_elt_at_index (
+ sm->per_thread_data,
+ nat44_ed_get_in2out_worker_index (0, &ip, ukey.fib_index, 0));
+ else
+ tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
+
+ pool_foreach (s, tsm->sessions)
+ {
+ if (s->in2out.addr.as_u32 == ukey.addr.as_u32)
+ {
+ send_nat44_user_session_v3_details (s, reg, mp->context);
+ }
+ }
+}
+
/* API definitions */
#include <vnet/format_fns.h>
#include <nat/nat44-ed/nat44_ed.api.c>
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_classify.c b/src/plugins/nat/nat44-ed/nat44_ed_classify.c
index 5a9f4e42657..229cf3669e6 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_classify.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_classify.c
@@ -98,7 +98,6 @@ nat44_handoff_classify_node_fn_inline (vlib_main_t * vm,
u32 next0 = NAT_NEXT_IN2OUT_CLASSIFY;
ip4_header_t *ip0;
snat_address_t *ap;
- clib_bihash_kv_8_8_t kv0, value0;
/* speculatively enqueue b0 to the current next frame */
bi0 = from[0];
@@ -122,23 +121,19 @@ nat44_handoff_classify_node_fn_inline (vlib_main_t * vm,
if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
{
- init_nat_k (&kv0, ip0->dst_address, 0, 0, 0);
/* try to classify the fragment based on IP header alone */
- if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external,
- &kv0, &value0))
+ m = nat44_ed_sm_o2i_lookup (sm, ip0->dst_address, 0, 0, 0);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value0.value);
if (m->local_addr.as_u32 != m->external_addr.as_u32)
next0 = NAT_NEXT_OUT2IN_CLASSIFY;
goto enqueue0;
}
- init_nat_k (&kv0, ip0->dst_address,
- vnet_buffer (b0)->ip.reass.l4_dst_port, 0,
- ip_proto_to_nat_proto (ip0->protocol));
- if (!clib_bihash_search_8_8
- (&sm->static_mapping_by_external, &kv0, &value0))
+ m = nat44_ed_sm_o2i_lookup (
+ sm, ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_dst_port,
+ 0, ip0->protocol);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value0.value);
if (m->local_addr.as_u32 != m->external_addr.as_u32)
next0 = NAT_NEXT_OUT2IN_CLASSIFY;
}
@@ -202,7 +197,6 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm,
u32 sw_if_index0, rx_fib_index0;
ip4_header_t *ip0;
snat_address_t *ap;
- clib_bihash_kv_8_8_t kv0, value0;
clib_bihash_kv_16_8_t ed_kv0, ed_value0;
/* speculatively enqueue b0 to the current next frame */
@@ -227,11 +221,11 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm,
rx_fib_index0 =
fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
sw_if_index0);
- init_ed_k (&ed_kv0, ip0->src_address,
+ init_ed_k (&ed_kv0, ip0->src_address.as_u32,
vnet_buffer (b0)->ip.reass.l4_src_port,
- ip0->dst_address,
- vnet_buffer (b0)->ip.reass.l4_dst_port,
- rx_fib_index0, ip0->protocol);
+ ip0->dst_address.as_u32,
+ vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
+ ip0->protocol);
/* process whole packet */
if (!clib_bihash_search_16_8 (&sm->flow_hash, &ed_kv0,
&ed_value0))
@@ -272,23 +266,19 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm,
if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
{
- init_nat_k (&kv0, ip0->dst_address, 0, 0, 0);
/* try to classify the fragment based on IP header alone */
- if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external,
- &kv0, &value0))
+ m = nat44_ed_sm_o2i_lookup (sm, ip0->dst_address, 0, 0, 0);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value0.value);
if (m->local_addr.as_u32 != m->external_addr.as_u32)
next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH;
goto enqueue0;
}
- init_nat_k (&kv0, ip0->dst_address,
- vnet_buffer (b0)->ip.reass.l4_dst_port, 0,
- ip_proto_to_nat_proto (ip0->protocol));
- if (!clib_bihash_search_8_8
- (&sm->static_mapping_by_external, &kv0, &value0))
+ m = nat44_ed_sm_o2i_lookup (
+ sm, ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_dst_port,
+ 0, ip0->protocol);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value0.value);
if (m->local_addr.as_u32 != m->external_addr.as_u32)
next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH;
}
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_cli.c b/src/plugins/nat/nat44-ed/nat44_ed_cli.c
index acf9069af2b..14313d05a35 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_cli.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_cli.c
@@ -38,23 +38,15 @@ nat44_ed_enable_disable_command_fn (vlib_main_t *vm, unformat_input_t *input,
clib_error_t *error = 0;
nat44_config_t c = { 0 };
- u8 enable_set = 0, enable = 0, mode_set = 0;
+ u8 enable_set = 0, enable = 0;
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (!mode_set && unformat (line_input, "static-mapping-only"))
- {
- mode_set = 1;
- c.static_mapping_only = 1;
- if (unformat (line_input, "connection-tracking"))
- {
- c.connection_tracking = 1;
- }
- }
- else if (unformat (line_input, "inside-vrf %u", &c.inside_vrf));
+ if (unformat (line_input, "inside-vrf %u", &c.inside_vrf))
+ ;
else if (unformat (line_input, "outside-vrf %u", &c.outside_vrf));
else if (unformat (line_input, "sessions %u", &c.sessions));
else if (!enable_set)
@@ -116,7 +108,6 @@ set_workers_command_fn (vlib_main_t * vm,
int rv = 0;
clib_error_t *error = 0;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -162,8 +153,8 @@ done:
}
static clib_error_t *
-nat_show_workers_commnad_fn (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
+nat_show_workers_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
snat_main_t *sm = &snat_main;
u32 *worker;
@@ -189,10 +180,9 @@ snat_set_log_level_command_fn (vlib_main_t * vm,
{
unformat_input_t _line_input, *line_input = &_line_input;
snat_main_t *sm = &snat_main;
- u8 log_level = NAT_LOG_NONE;
+ u32 log_level = NAT_LOG_NONE;
clib_error_t *error = 0;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -283,12 +273,7 @@ nat44_show_hash_command_fn (vlib_main_t * vm, unformat_input_t * input,
else if (unformat (input, "verbose"))
verbose = 2;
- vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->static_mapping_by_local,
- verbose);
- vlib_cli_output (vm, "%U",
- format_bihash_8_8, &sm->static_mapping_by_external,
- verbose);
- vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->flow_hash, verbose);
+ vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->flow_hash, verbose);
vec_foreach_index (i, sm->per_thread_data)
{
vlib_cli_output (vm, "-------- thread %d %s --------\n",
@@ -296,8 +281,7 @@ nat44_show_hash_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->flow_hash, verbose);
}
- vlib_cli_output (vm, "%U", format_bihash_16_8, &nam->affinity_hash,
- verbose);
+ vlib_cli_output (vm, "%U", format_bihash_16_8, &nam->affinity_hash, verbose);
vlib_cli_output (vm, "-------- hash table parameters --------\n");
vlib_cli_output (vm, "translation buckets: %u", sm->translation_buckets);
@@ -313,7 +297,6 @@ nat_set_mss_clamping_command_fn (vlib_main_t * vm, unformat_input_t * input,
clib_error_t *error = 0;
u32 mss;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -356,7 +339,6 @@ add_address_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
- snat_main_t *sm = &snat_main;
ip4_address_t start_addr, end_addr, this_addr;
u32 start_host_order, end_host_order;
u32 vrf_id = ~0;
@@ -366,7 +348,6 @@ add_address_command_fn (vlib_main_t * vm,
clib_error_t *error = 0;
u8 twice_nat = 0;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -392,12 +373,6 @@ add_address_command_fn (vlib_main_t * vm,
}
}
- if (sm->static_mapping_only)
- {
- error = clib_error_return (0, "static mapping only mode");
- goto done;
- }
-
start_host_order = clib_host_to_net_u32 (start_addr.as_u32);
end_host_order = clib_host_to_net_u32 (end_addr.as_u32);
@@ -419,9 +394,13 @@ add_address_command_fn (vlib_main_t * vm,
for (i = 0; i < count; i++)
{
if (is_add)
- rv = snat_add_address (sm, &this_addr, vrf_id, twice_nat);
+ {
+ rv = nat44_ed_add_address (&this_addr, vrf_id, twice_nat);
+ }
else
- rv = snat_del_address (sm, this_addr, 0, twice_nat);
+ {
+ rv = nat44_ed_del_address (this_addr, twice_nat);
+ }
switch (rv)
{
@@ -495,15 +474,12 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input,
u64 now = vlib_time_now (vm);
u64 sess_timeout_time = 0;
- u32 udp_sessions = 0;
- u32 tcp_sessions = 0;
- u32 icmp_sessions = 0;
-
- u32 timed_out = 0;
- u32 transitory = 0;
- u32 transitory_wait_closed = 0;
- u32 transitory_closed = 0;
- u32 established = 0;
+ struct
+ {
+ u32 total;
+ u32 timed_out;
+ } udp = { 0 }, tcp = { 0 }, tcp_established = { 0 }, tcp_transitory = { 0 },
+ icmp = { 0 }, other = { 0 };
u32 fib;
@@ -517,45 +493,48 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
pool_foreach (s, tsm->sessions)
{
- sess_timeout_time = s->last_heard +
- (f64) nat44_session_get_timeout (sm, s);
- if (now >= sess_timeout_time)
- timed_out++;
-
- switch (s->nat_proto)
- {
- case NAT_PROTOCOL_ICMP:
- icmp_sessions++;
- break;
- case NAT_PROTOCOL_TCP:
- tcp_sessions++;
- if (s->state)
- {
- if (s->tcp_closed_timestamp)
- {
- if (now >= s->tcp_closed_timestamp)
- {
- ++transitory_closed;
- }
- else
- {
- ++transitory_wait_closed;
- }
- }
- transitory++;
- }
- else
- established++;
- break;
- case NAT_PROTOCOL_UDP:
- default:
- udp_sessions++;
- break;
- }
- }
- nat44_show_lru_summary (vm, tsm, now, sess_timeout_time);
- count += pool_elts (tsm->sessions);
- }
+ sess_timeout_time =
+ s->last_heard + (f64) nat44_session_get_timeout (sm, s);
+
+ switch (s->proto)
+ {
+ case IP_PROTOCOL_ICMP:
+ ++icmp.total;
+ if (now >= sess_timeout_time)
+ ++icmp.timed_out;
+ break;
+ case IP_PROTOCOL_TCP:
+ ++tcp.total;
+ if (now >= sess_timeout_time)
+ ++tcp.timed_out;
+ if (nat44_ed_tcp_is_established (s->tcp_state))
+ {
+ ++tcp_established.total;
+ if (now >= sess_timeout_time)
+ ++tcp_established.timed_out;
+ }
+ else
+ {
+ ++tcp_transitory.total;
+ if (now >= sess_timeout_time)
+ ++tcp_transitory.timed_out;
+ }
+ break;
+ case IP_PROTOCOL_UDP:
+ ++udp.total;
+ if (now >= sess_timeout_time)
+ ++udp.timed_out;
+ break;
+ default:
+ ++other.total;
+ if (now >= sess_timeout_time)
+ ++other.timed_out;
+ break;
+ }
+ }
+ nat44_show_lru_summary (vm, tsm, now, sess_timeout_time);
+ count += pool_elts (tsm->sessions);
+ }
}
else
{
@@ -564,55 +543,66 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
sess_timeout_time = s->last_heard +
(f64) nat44_session_get_timeout (sm, s);
- if (now >= sess_timeout_time)
- timed_out++;
-
- switch (s->nat_proto)
- {
- case NAT_PROTOCOL_ICMP:
- icmp_sessions++;
- break;
- case NAT_PROTOCOL_TCP:
- tcp_sessions++;
- if (s->state)
- {
- if (s->tcp_closed_timestamp)
- {
- if (now >= s->tcp_closed_timestamp)
- {
- ++transitory_closed;
- }
- else
- {
- ++transitory_wait_closed;
- }
- }
- transitory++;
- }
- else
- established++;
- break;
- case NAT_PROTOCOL_UDP:
- default:
- udp_sessions++;
- break;
- }
+
+ switch (s->proto)
+ {
+ case IP_PROTOCOL_ICMP:
+ ++icmp.total;
+ if (now >= sess_timeout_time)
+ ++icmp.timed_out;
+ break;
+ case IP_PROTOCOL_TCP:
+ ++tcp.total;
+ if (now >= sess_timeout_time)
+ ++tcp.timed_out;
+ if (nat44_ed_tcp_is_established (s->tcp_state))
+ {
+ ++tcp_established.total;
+ if (now >= sess_timeout_time)
+ ++tcp_established.timed_out;
+ }
+ else
+ {
+ ++tcp_transitory.total;
+ if (now >= sess_timeout_time)
+ ++tcp_transitory.timed_out;
+ }
+ break;
+ case IP_PROTOCOL_UDP:
+ ++udp.total;
+ if (now >= sess_timeout_time)
+ ++udp.timed_out;
+ break;
+ default:
+ ++other.total;
+ if (now >= sess_timeout_time)
+ ++other.timed_out;
+ break;
+ }
}
nat44_show_lru_summary (vm, tsm, now, sess_timeout_time);
count = pool_elts (tsm->sessions);
}
- vlib_cli_output (vm, "total timed out sessions: %u", timed_out);
- vlib_cli_output (vm, "total sessions: %u", count);
- vlib_cli_output (vm, "total tcp sessions: %u", tcp_sessions);
- vlib_cli_output (vm, "total tcp established sessions: %u", established);
- vlib_cli_output (vm, "total tcp transitory sessions: %u", transitory);
- vlib_cli_output (vm, "total tcp transitory (WAIT-CLOSED) sessions: %u",
- transitory_wait_closed);
- vlib_cli_output (vm, "total tcp transitory (CLOSED) sessions: %u",
- transitory_closed);
- vlib_cli_output (vm, "total udp sessions: %u", udp_sessions);
- vlib_cli_output (vm, "total icmp sessions: %u", icmp_sessions);
+ u32 timed_out =
+ tcp.timed_out + icmp.timed_out + udp.timed_out + other.timed_out;
+ vlib_cli_output (vm, "total sessions: %u (timed out: %u)", count, timed_out);
+ vlib_cli_output (vm, "tcp sessions:");
+ vlib_cli_output (vm, " total: %u (timed out: %u)", tcp.total,
+ tcp.timed_out);
+ vlib_cli_output (vm, " established: %u (timed out: %u)",
+ tcp_established.total, tcp_established.timed_out);
+ vlib_cli_output (vm, " transitory: %u (timed out: %u)",
+ tcp_transitory.total, tcp_transitory.timed_out);
+ vlib_cli_output (vm, "udp sessions:");
+ vlib_cli_output (vm, " total: %u (timed out: %u)", udp.total,
+ udp.timed_out);
+ vlib_cli_output (vm, "icmp sessions:");
+ vlib_cli_output (vm, " total: %u (timed out: %u)", icmp.total,
+ icmp.timed_out);
+ vlib_cli_output (vm, "other sessions:");
+ vlib_cli_output (vm, " total: %u (timed out: %u)", other.total,
+ other.timed_out);
return 0;
}
@@ -628,14 +618,14 @@ nat44_show_addresses_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
vlib_cli_output (vm, "%U", format_ip4_address, &ap->addr);
if (ap->fib_index != ~0)
- vlib_cli_output (vm, " tenant VRF: %u",
- fib_table_get(ap->fib_index, FIB_PROTOCOL_IP4)->ft_table_id);
+ vlib_cli_output (
+ vm, " tenant VRF: %u",
+ fib_table_get (ap->fib_index, FIB_PROTOCOL_IP4)->ft_table_id);
else
vlib_cli_output (vm, " tenant VRF independent");
- #define _(N, i, n, s) \
- vlib_cli_output (vm, " %d busy %s ports", ap->busy_##n##_ports, s);
- foreach_nat_protocol
- #undef _
+
+ if (ap->addr_len != ~0)
+ vlib_cli_output (vm, " synced with interface address");
}
vlib_cli_output (vm, "NAT44 twice-nat pool addresses:");
vec_foreach (ap, sm->twice_nat_addresses)
@@ -646,10 +636,9 @@ nat44_show_addresses_command_fn (vlib_main_t * vm, unformat_input_t * input,
fib_table_get(ap->fib_index, FIB_PROTOCOL_IP4)->ft_table_id);
else
vlib_cli_output (vm, " tenant VRF independent");
- #define _(N, i, n, s) \
- vlib_cli_output (vm, " %d busy %s ports", ap->busy_##n##_ports, s);
- foreach_nat_protocol
- #undef _
+
+ if (ap->addr_len != ~0)
+ vlib_cli_output (vm, " synced with interface address");
}
return 0;
}
@@ -669,7 +658,6 @@ snat_feature_command_fn (vlib_main_t * vm,
sw_if_index = ~0;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -804,21 +792,22 @@ nat44_show_interfaces_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "NAT44 interfaces:");
pool_foreach (i, sm->interfaces)
{
- vlib_cli_output (vm, " %U %s", format_vnet_sw_if_index_name, vnm,
- i->sw_if_index,
- (nat_interface_is_inside(i) &&
- nat_interface_is_outside(i)) ? "in out" :
- (nat_interface_is_inside(i) ? "in" : "out"));
+ vlib_cli_output (vm, " %U %s", format_vnet_sw_if_index_name, vnm,
+ i->sw_if_index,
+ (nat44_ed_is_interface_inside (i) &&
+ nat44_ed_is_interface_outside (i)) ?
+ "in out" :
+ (nat44_ed_is_interface_inside (i) ? "in" : "out"));
}
pool_foreach (i, sm->output_feature_interfaces)
{
- vlib_cli_output (vm, " %U output-feature %s",
- format_vnet_sw_if_index_name, vnm,
- i->sw_if_index,
- (nat_interface_is_inside(i) &&
- nat_interface_is_outside(i)) ? "in out" :
- (nat_interface_is_inside(i) ? "in" : "out"));
+ vlib_cli_output (vm, " %U output-feature %s",
+ format_vnet_sw_if_index_name, vnm, i->sw_if_index,
+ (nat44_ed_is_interface_inside (i) &&
+ nat44_ed_is_interface_outside (i)) ?
+ "in out" :
+ (nat44_ed_is_interface_inside (i) ? "in" : "out"));
}
return 0;
@@ -832,14 +821,13 @@ add_static_mapping_command_fn (vlib_main_t * vm,
unformat_input_t _line_input, *line_input = &_line_input;
vnet_main_t *vnm = vnet_get_main ();
clib_error_t *error = 0;
- int rv;
-
- nat_protocol_t proto = NAT_PROTOCOL_OTHER;
ip4_address_t l_addr, e_addr, pool_addr;
u32 l_port = 0, e_port = 0, vrf_id = ~0;
u8 l_port_set = 0, e_port_set = 0;
- u32 sw_if_index, flags = 0;
- int is_add = 1;
+ int is_add = 1, rv;
+ u32 flags = 0;
+ u32 sw_if_index = ~0;
+ ip_protocol_t proto = 0;
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -881,7 +869,7 @@ add_static_mapping_command_fn (vlib_main_t * vm,
}
else if (unformat (line_input, "vrf %u", &vrf_id))
;
- else if (unformat (line_input, "%U", unformat_nat_protocol, &proto))
+ else if (unformat (line_input, "%U", unformat_ip_protocol, &proto))
;
else if (unformat (line_input, "self-twice-nat"))
{
@@ -923,8 +911,6 @@ add_static_mapping_command_fn (vlib_main_t * vm,
e_port = clib_host_to_net_u16 (e_port);
}
- // TODO: specific pool_addr for both pool & twice nat pool ?
-
if (is_add)
{
rv =
@@ -937,25 +923,17 @@ add_static_mapping_command_fn (vlib_main_t * vm,
vrf_id, sw_if_index, flags);
}
- // TODO: fix returns
-
switch (rv)
{
- case VNET_API_ERROR_INVALID_VALUE:
- error = clib_error_return (0, "External port already in use.");
- goto done;
+ case VNET_API_ERROR_UNSUPPORTED:
+ error = clib_error_return (0, "Plugin disabled.");
+ break;
case VNET_API_ERROR_NO_SUCH_ENTRY:
- if (is_add)
- error = clib_error_return (0, "External address must be allocated.");
- else
- error = clib_error_return (0, "Mapping not exist.");
- goto done;
- case VNET_API_ERROR_NO_SUCH_FIB:
- error = clib_error_return (0, "No such VRF id.");
- goto done;
+ error = clib_error_return (0, "Mapping not exist.");
+ break;
case VNET_API_ERROR_VALUE_EXIST:
error = clib_error_return (0, "Mapping already exist.");
- goto done;
+ break;
default:
break;
}
@@ -966,7 +944,6 @@ done:
return error;
}
-// TODO: either delete this bullshit or update it
static clib_error_t *
add_identity_mapping_command_fn (vlib_main_t * vm,
unformat_input_t * input,
@@ -978,12 +955,11 @@ add_identity_mapping_command_fn (vlib_main_t * vm,
int rv, is_add = 1, port_set = 0;
u32 sw_if_index, port, flags, vrf_id = ~0;
- nat_protocol_t proto;
+ ip_protocol_t proto = 0;
ip4_address_t addr;
flags = NAT_SM_FLAG_IDENTITY_NAT;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -998,7 +974,7 @@ add_identity_mapping_command_fn (vlib_main_t * vm,
}
else if (unformat (line_input, "vrf %u", &vrf_id))
;
- else if (unformat (line_input, "%U %u", unformat_nat_protocol, &proto,
+ else if (unformat (line_input, "%U %u", unformat_ip_protocol, &proto,
&port))
{
port_set = 1;
@@ -1036,25 +1012,17 @@ add_identity_mapping_command_fn (vlib_main_t * vm,
sw_if_index, flags);
}
- // TODO: fix returns
-
switch (rv)
{
- case VNET_API_ERROR_INVALID_VALUE:
- error = clib_error_return (0, "External port already in use.");
- goto done;
+ case VNET_API_ERROR_UNSUPPORTED:
+ error = clib_error_return (0, "Plugin disabled.");
+ break;
case VNET_API_ERROR_NO_SUCH_ENTRY:
- if (is_add)
- error = clib_error_return (0, "External address must be allocated.");
- else
- error = clib_error_return (0, "Mapping not exist.");
- goto done;
- case VNET_API_ERROR_NO_SUCH_FIB:
- error = clib_error_return (0, "No such VRF id.");
- goto done;
+ error = clib_error_return (0, "Mapping not exist.");
+ break;
case VNET_API_ERROR_VALUE_EXIST:
error = clib_error_return (0, "Mapping already exist.");
- goto done;
+ break;
default:
break;
}
@@ -1075,12 +1043,11 @@ add_lb_static_mapping_command_fn (vlib_main_t * vm,
ip4_address_t l_addr, e_addr;
u32 l_port = 0, e_port = 0, vrf_id = 0, probability = 0, affinity = 0;
u8 proto_set = 0;
- nat_protocol_t proto;
+ ip_protocol_t proto;
nat44_lb_addr_port_t *locals = 0, local;
int rv, is_add = 1;
u32 flags = 0;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -1091,6 +1058,7 @@ add_lb_static_mapping_command_fn (vlib_main_t * vm,
{
clib_memset (&local, 0, sizeof (local));
local.addr = l_addr;
+ l_port = clib_host_to_net_u16 (l_port);
local.port = (u16) l_port;
local.probability = (u8) probability;
vec_add1 (locals, local);
@@ -1101,6 +1069,7 @@ add_lb_static_mapping_command_fn (vlib_main_t * vm,
{
clib_memset (&local, 0, sizeof (local));
local.addr = l_addr;
+ l_port = clib_host_to_net_u16 (l_port);
local.port = (u16) l_port;
local.probability = (u8) probability;
local.vrf_id = vrf_id;
@@ -1108,8 +1077,10 @@ add_lb_static_mapping_command_fn (vlib_main_t * vm,
}
else if (unformat (line_input, "external %U:%u", unformat_ip4_address,
&e_addr, &e_port))
- ;
- else if (unformat (line_input, "protocol %U", unformat_nat_protocol,
+ {
+ e_port = clib_host_to_net_u16 (e_port);
+ }
+ else if (unformat (line_input, "protocol %U", unformat_ip_protocol,
&proto))
{
proto_set = 1;
@@ -1197,10 +1168,9 @@ add_lb_backend_command_fn (vlib_main_t * vm,
u32 l_port = 0, e_port = 0, vrf_id = 0, probability = 0;
int is_add = 1;
int rv;
- nat_protocol_t proto;
+ ip_protocol_t proto;
u8 proto_set = 0;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -1216,7 +1186,7 @@ add_lb_backend_command_fn (vlib_main_t * vm,
else if (unformat (line_input, "external %U:%u", unformat_ip4_address,
&e_addr, &e_port))
;
- else if (unformat (line_input, "protocol %U", unformat_nat_protocol,
+ else if (unformat (line_input, "protocol %U", unformat_ip_protocol,
&proto))
proto_set = 1;
else if (unformat (line_input, "del"))
@@ -1276,14 +1246,14 @@ nat44_show_static_mappings_command_fn (vlib_main_t * vm,
{
snat_main_t *sm = &snat_main;
snat_static_mapping_t *m;
- snat_static_map_resolve_t *rp;
+ snat_static_mapping_resolve_t *rp;
vlib_cli_output (vm, "NAT44 static mappings:");
pool_foreach (m, sm->static_mappings)
{
vlib_cli_output (vm, " %U", format_snat_static_mapping, m);
}
- vec_foreach (rp, sm->to_resolve)
+ vec_foreach (rp, sm->sm_to_resolve)
vlib_cli_output (vm, " %U", format_snat_static_map_to_resolve, rp);
return 0;
@@ -1294,27 +1264,31 @@ snat_add_interface_address_command_fn (vlib_main_t * vm,
unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- snat_main_t *sm = &snat_main;
unformat_input_t _line_input, *line_input = &_line_input;
- u32 sw_if_index;
- int rv;
- int is_del = 0;
+ vnet_main_t *vnm = vnet_get_main ();
clib_error_t *error = 0;
+ int rv, is_del = 0;
u8 twice_nat = 0;
+ u32 sw_if_index;
+
+ sw_if_index = ~0;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "%U", unformat_vnet_sw_interface,
- sm->vnet_main, &sw_if_index))
+ if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
;
else if (unformat (line_input, "twice-nat"))
- twice_nat = 1;
+ {
+ twice_nat = 1;
+ }
else if (unformat (line_input, "del"))
- is_del = 1;
+ {
+ is_del = 1;
+ }
else
{
error = clib_error_return (0, "unknown input '%U'",
@@ -1323,19 +1297,86 @@ snat_add_interface_address_command_fn (vlib_main_t * vm,
}
}
- rv = snat_add_interface_address (sm, sw_if_index, is_del, twice_nat);
+ if (is_del)
+ {
+ rv = nat44_ed_del_interface_address (sw_if_index, twice_nat);
+ }
+ else
+ {
+ rv = nat44_ed_add_interface_address (sw_if_index, twice_nat);
+ }
- switch (rv)
+ if (0 != rv)
{
- case 0:
- break;
+ error =
+ clib_error_return (0, "%s %U address failed", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ goto done;
+ }
- default:
- error = clib_error_return (0, "snat_add_interface_address returned %d",
- rv);
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+nat44_ed_add_del_vrf_table_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ bool is_add = true, not_set = true;
+ u32 vrf_id = ~0;
+ int rv;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%u", &vrf_id))
+ ;
+ else if (not_set)
+ {
+ if (unformat (line_input, "add"))
+ {
+ is_add = true;
+ }
+ else if (unformat (line_input, "del"))
+ {
+ is_add = false;
+ }
+ not_set = false;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (not_set)
+ {
+ error = clib_error_return (0, "missing required parameter");
+ goto done;
+ }
+
+ if (~0 == vrf_id)
+ {
+ error = clib_error_return (0, "missing vrf id");
goto done;
}
+ rv = nat44_ed_add_del_vrf_table (vrf_id, is_add);
+ if (rv)
+ {
+ error = clib_error_return (0, "%s vrf table returned %d",
+ is_add ? "add" : "del", rv);
+ }
+
done:
unformat_free (line_input);
@@ -1343,27 +1384,107 @@ done:
}
static clib_error_t *
+nat44_ed_add_del_vrf_route_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ bool is_add = true, not_set = true;
+ u32 vrf_id = ~0, table_vrf_id = ~0;
+ int rv;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "table %u", &table_vrf_id))
+ ;
+ else if (unformat (line_input, "%u", &vrf_id))
+ ;
+ else if (not_set)
+ {
+ if (unformat (line_input, "add"))
+ {
+ is_add = true;
+ }
+ else if (unformat (line_input, "del"))
+ {
+ is_add = false;
+ }
+ not_set = false;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (not_set)
+ {
+ error = clib_error_return (0, "missing required parameter");
+ goto done;
+ }
+
+ if ((~0 == vrf_id) || (~0 == table_vrf_id))
+ {
+ error = clib_error_return (0, "missing vrf id");
+ goto done;
+ }
+
+ rv = nat44_ed_add_del_vrf_route (table_vrf_id, vrf_id, is_add);
+ if (rv)
+ {
+ error = clib_error_return (0, "%s vrf table returned %d",
+ is_add ? "add" : "del", rv);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+nat44_ed_show_vrf_tables_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t;
+ vrf_route_t *r;
+ int i = 0;
+
+ pool_foreach (t, sm->vrf_tables)
+ {
+ vlib_cli_output (vm, "table %u:", t->table_vrf_id);
+ pool_foreach (r, t->routes)
+ {
+ vlib_cli_output (vm, "[%u] vrf-id %u", i, r->vrf_id);
+ i++;
+ }
+ }
+
+ return 0;
+}
+
+static clib_error_t *
nat44_show_interface_address_command_fn (vlib_main_t * vm,
unformat_input_t * input,
vlib_cli_command_t * cmd)
{
snat_main_t *sm = &snat_main;
vnet_main_t *vnm = vnet_get_main ();
- u32 *sw_if_index;
+ snat_address_resolve_t *ap;
vlib_cli_output (vm, "NAT44 pool address interfaces:");
- vec_foreach (sw_if_index, sm->auto_add_sw_if_indices)
- {
- vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name, vnm,
- *sw_if_index);
- }
- vlib_cli_output (vm, "NAT44 twice-nat pool address interfaces:");
- vec_foreach (sw_if_index, sm->auto_add_sw_if_indices_twice_nat)
+ vec_foreach (ap, sm->addr_to_resolve)
{
- vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name, vnm,
- *sw_if_index);
+ vlib_cli_output (vm, " %U%s", format_vnet_sw_if_index_name, vnm,
+ ap->sw_if_index, ap->is_twice_nat ? " twice-nat" : "");
}
-
return 0;
}
@@ -1375,22 +1496,61 @@ nat44_show_sessions_command_fn (vlib_main_t * vm, unformat_input_t * input,
clib_error_t *error = 0;
snat_main_per_thread_data_t *tsm;
snat_main_t *sm = &snat_main;
-
- int i = 0;
+ ip4_address_t i2o_sa, i2o_da, o2i_sa, o2i_da;
+ u8 filter_i2o_sa = 0, filter_i2o_da = 0;
+ u8 filter_o2i_sa = 0, filter_o2i_da = 0;
+ u16 i2o_sp, i2o_dp, o2i_sp, o2i_dp;
+ u8 filter_i2o_sp = 0, filter_i2o_dp = 0;
+ u8 filter_o2i_sp = 0, filter_o2i_dp = 0;
+ ip_protocol_t proto;
+ u8 filter_proto = 0;
+ u8 had_input = 1, filtering = 0;
+ int i = 0, showed_sessions;
if (!unformat_user (input, unformat_line_input, line_input))
- goto print;
+ {
+ had_input = 0;
+ goto print;
+ }
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- error = clib_error_return (0, "unknown input '%U'",
- format_unformat_error, line_input);
- break;
+ if (unformat (line_input, "filter i2o saddr %U", unformat_ip4_address,
+ &i2o_sa))
+ filter_i2o_sa = filtering = 1;
+ else if (unformat (line_input, "filter i2o daddr %U",
+ unformat_ip4_address, &i2o_da))
+ filter_i2o_da = filtering = 1;
+ else if (unformat (line_input, "filter o2i saddr %U",
+ unformat_ip4_address, &o2i_sa))
+ filter_o2i_sa = filtering = 1;
+ else if (unformat (line_input, "filter o2i daddr %U",
+ unformat_ip4_address, &o2i_da))
+ filter_o2i_da = filtering = 1;
+ else if (unformat (line_input, "filter i2o sport %u", &i2o_sp))
+ filter_i2o_sp = filtering = 1;
+ else if (unformat (line_input, "filter i2o dport %u", &i2o_dp))
+ filter_i2o_dp = filtering = 1;
+ else if (unformat (line_input, "filter o2i sport %u", &o2i_sp))
+ filter_o2i_sp = filtering = 1;
+ else if (unformat (line_input, "filter o2i dport %u", &o2i_dp))
+ filter_o2i_dp = filtering = 1;
+ else if (unformat (line_input, "filter i2o proto %U",
+ unformat_ip_protocol, &proto))
+ filter_proto = filtering = 1;
+ else if (unformat (line_input, "filter o2i proto %U",
+ unformat_ip_protocol, &proto))
+ filter_proto = filtering = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
}
- unformat_free (line_input);
print:
- vlib_cli_output (vm, "NAT44 ED sessions:");
+ vlib_cli_output (vm, "NAT44 ED sessions:");
vec_foreach_index (i, sm->per_thread_data)
{
@@ -1400,12 +1560,53 @@ print:
i, vlib_worker_threads[i].name,
pool_elts (tsm->sessions));
- snat_session_t *s;
- pool_foreach (s, tsm->sessions)
- {
- vlib_cli_output (vm, " %U\n", format_snat_session, tsm, s);
- }
+ showed_sessions = 0;
+ snat_session_t *s;
+ pool_foreach (s, tsm->sessions)
+ {
+ if (filtering)
+ {
+ if (filter_i2o_sa && i2o_sa.as_u32 != s->i2o.match.saddr.as_u32)
+ continue;
+ if (filter_i2o_da && i2o_da.as_u32 != s->i2o.match.daddr.as_u32)
+ continue;
+ if (filter_o2i_sa && o2i_sa.as_u32 != s->o2i.match.saddr.as_u32)
+ continue;
+ if (filter_o2i_da && o2i_da.as_u32 != s->o2i.match.daddr.as_u32)
+ continue;
+ if (filter_i2o_sp &&
+ i2o_sp != clib_net_to_host_u16 (s->i2o.match.sport))
+ continue;
+ if (filter_i2o_dp &&
+ i2o_dp != clib_net_to_host_u16 (s->i2o.match.dport))
+ continue;
+ if (filter_o2i_sp &&
+ o2i_sp != clib_net_to_host_u16 (s->o2i.match.sport))
+ continue;
+ if (filter_o2i_dp &&
+ o2i_dp != clib_net_to_host_u16 (s->o2i.match.dport))
+ continue;
+ if (filter_proto && proto != s->proto)
+ continue;
+ showed_sessions++;
+ }
+ vlib_cli_output (vm, " %U\n", format_snat_session, sm, tsm, s,
+ vlib_time_now (vm));
+ }
+ if (filtering)
+ {
+ vlib_cli_output (vm,
+ "Showed: %d, Filtered: %d of total %d "
+ "sessions of thread %d\n\n",
+ showed_sessions,
+ pool_elts (tsm->sessions) - showed_sessions,
+ pool_elts (tsm->sessions), i);
+ }
}
+
+done:
+ if (had_input)
+ unformat_free (line_input);
return error;
}
@@ -1457,7 +1658,7 @@ nat44_del_session_command_fn (vlib_main_t * vm,
u32 port = 0, eh_port = 0, vrf_id = sm->outside_vrf_id;
clib_error_t *error = 0;
ip4_address_t addr, eh_addr;
- nat_protocol_t proto;
+ ip_protocol_t proto;
int is_in = 0;
int rv;
@@ -1466,9 +1667,8 @@ nat44_del_session_command_fn (vlib_main_t * vm,
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat
- (line_input, "%U:%u %U", unformat_ip4_address, &addr, &port,
- unformat_nat_protocol, &proto))
+ if (unformat (line_input, "%U:%u %U", unformat_ip4_address, &addr, &port,
+ unformat_ip_protocol, &proto))
;
else if (unformat (line_input, "in"))
{
@@ -1493,10 +1693,9 @@ nat44_del_session_command_fn (vlib_main_t * vm,
}
}
- rv =
- nat44_del_ed_session (sm, &addr, clib_host_to_net_u16 (port), &eh_addr,
- clib_host_to_net_u16 (eh_port),
- nat_proto_to_ip_proto (proto), vrf_id, is_in);
+ rv = nat44_ed_del_session (sm, &addr, clib_host_to_net_u16 (port), &eh_addr,
+ clib_host_to_net_u16 (eh_port), proto, vrf_id,
+ is_in);
switch (rv)
{
@@ -1648,21 +1847,19 @@ done:
* @cliexstart{nat44}
* Enable nat44 plugin
* To enable nat44-ed, use:
- * vpp# nat44 enable
+ * vpp# nat44 plugin enable
* To disable nat44-ed, use:
- * vpp# nat44 disable
- * To enable nat44-ed static mapping with connection tracking, use:
- * vpp# nat44-ed enable static-mapping connection-tracking
+ * vpp# nat44 plugin disable
* To set inside-vrf outside-vrf, use:
- * vpp# nat44 enable inside-vrf <id> outside-vrf <id>
+ * vpp# nat44 plugin enable inside-vrf <id> outside-vrf <id>
* @cliexend
?*/
VLIB_CLI_COMMAND (nat44_ed_enable_disable_command, static) = {
- .path = "nat44",
- .short_help = "nat44 <enable [sessions <max-number>] [static-mapping-only "
- "connection-tracking] [inside-vrf <vrf-id>] "
- "[outside-vrf <vrf-id>]>|disable",
+ .path = "nat44 plugin",
.function = nat44_ed_enable_disable_command_fn,
+ .short_help =
+ "nat44 plugin <enable [sessions <max-number>] [inside-vrf <vrf-id>] "
+ "[outside-vrf <vrf-id>]>|disable",
};
/*?
@@ -1691,7 +1888,7 @@ VLIB_CLI_COMMAND (set_workers_command, static) = {
VLIB_CLI_COMMAND (nat_show_workers_command, static) = {
.path = "show nat workers",
.short_help = "show nat workers",
- .function = nat_show_workers_commnad_fn,
+ .function = nat_show_workers_command_fn,
};
/*?
@@ -2019,9 +2216,48 @@ VLIB_CLI_COMMAND (nat44_show_static_mappings_command, static) = {
* @cliexend
?*/
VLIB_CLI_COMMAND (snat_add_interface_address_command, static) = {
- .path = "nat44 add interface address",
- .short_help = "nat44 add interface address <interface> [twice-nat] [del]",
- .function = snat_add_interface_address_command_fn,
+ .path = "nat44 add interface address",
+ .function = snat_add_interface_address_command_fn,
+ .short_help = "nat44 add interface address <interface> [twice-nat] [del]",
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{nat44 vrf table}
+ * Add empty inter VRF routing table
+ * vpp# nat44 vrf table add 10
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (nat44_ed_add_del_vrf_table_command, static) = {
+ .path = "nat44 vrf table",
+ .short_help = "nat44 vrf table [add|del] <vrf-id>",
+ .function = nat44_ed_add_del_vrf_table_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{nat44 vrf route}
+ * Add inter VRF route record to VRF routing table
+ * vpp# nat44 vrf route add table 10 20
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (nat44_ed_add_del_vrf_route_command, static) = {
+ .path = "nat44 vrf route",
+ .short_help = "nat44 vrf route [add|del] table <vrf-id> <vrf-id>",
+ .function = nat44_ed_add_del_vrf_route_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{show nat44 vrf tables}
+ * Show inter VRF route tables
+ * vpp# show nat44 vrf tables
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (nat44_ed_show_vrf_tables_command, static) = {
+ .path = "show nat44 vrf tables",
+ .short_help = "show nat44 vrf tables",
+ .function = nat44_ed_show_vrf_tables_command_fn,
};
/*?
@@ -2049,7 +2285,9 @@ VLIB_CLI_COMMAND (nat44_show_interface_address_command, static) = {
?*/
VLIB_CLI_COMMAND (nat44_show_sessions_command, static) = {
.path = "show nat44 sessions",
- .short_help = "show nat44 sessions",
+ .short_help = "show nat44 sessions [filter {i2o | o2i} {saddr <ip4-addr> "
+ "| sport <n> | daddr <ip4-addr> | dport <n> | proto <proto>} "
+ "[filter .. [..]]]",
.function = nat44_show_sessions_command_fn,
};
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_doc.rst b/src/plugins/nat/nat44-ed/nat44_ed_doc.rst
new file mode 100644
index 00000000000..a6c461d4260
--- /dev/null
+++ b/src/plugins/nat/nat44-ed/nat44_ed_doc.rst
@@ -0,0 +1,729 @@
+.. _NAT44_Endpoint_Dependent:
+
+.. toctree::
+
+NAT44-ED: NAT44 Endpoint Dependent
+==================================
+
+Introduction
+------------
+
+NAT44-ED is the IPv4 endpoint dependent network address translation
+plugin. The component implements an address and port-dependent mapping
+and address and port-dependent filtering NAT as described in
+`RFC4787 <https://tools.ietf.org/html/rfc4787>`__.
+
+The outside address and port (X1’:x1’) is reused for internal hosts
+(X:x) for different values of Y:y. A flow is matched by {source address,
+destination address, protocol, transport source port, transport
+destination port, fib index}. As long as all these are unique the
+mapping is valid. While a single outside address in theory allows for
+2^16 source ports \* 2^32 destination IP addresses \* 2^16 destination
+ports = 2^64 sessions, this number is much smaller in practice. Few
+destination ports are generally used (80, 443) and a fraction of the IP
+address space is available. The limitation is 2^16 bindings per outside
+IP address to a single destination address and port (Y:y).
+
+The implementation is split, a control-plane / slow-path and a
+data-plane / fast-path. Essentially acting as a flow router. The
+data-plane does a 6-tuple flow lookup (SA, DA, P, SP, DP, FIB) and on a
+match runs the per-flow packet handling instructions on the packet. On a
+flow lookup miss, the packet is punted to the slow-path, where depending
+on policy new sessions are created.
+
+The support set of packet handling instructions is ever-increasing.
+Currently, the implementation supports rewrite of SA, DA, SP, DP and TCP
+MSS. The fast-path also does connection tracking and expiry of older
+sessions.
+
+NAT44-ED uses 6
+tuple\ ``(src address, src port, dst address, dst port, protocol and fib)``\ for
+matching communication.
+
+Structure
+~~~~~~~~~
+
+1) Dynamic NAT
+
+- also called PAT (Port Address Translation)
+- supports port overloading
+
+2) Static NAT
+
+- types of Static NAT:
+
+ a) identity mapping
+
+ - exceptions to translations
+
+ b) static mapping
+
+ - supported features:
+
+ 1. address only mapping
+
+ - one to one translation without ports
+
+ 2. twice-nat
+
+ - double-nat, translation of source and destination
+
+ 3. self-twice-nat
+
+ - double nat, translation of source and destination, where
+ external host address is the same as local host address
+
+ 4. out2in-only mapping
+
+ - session is created only from outside interface (out2in feature)
+
+ c) load balanced static mapping
+
+ - translates one frontend (``addr``:``port``) to multiple backends
+ (``addr``:``port``)
+
+3) Interfaces
+
+a) inside interface (in2out feature) - local to external network
+ translation - feature is before ip4-lookup
+b) outside interface (out2in feature) - external to local network
+ translation - feature is before ip4-lookup
+c) inside & outside interface (classify feature) - local or external
+ network translation - correct type of translation is determined per
+ communication - feature is before ip4-lookup
+d) output interface (output feature) - used for post routing translation
+ - feature is after ip4-lookup
+
+4) Addresses
+
+a) interface address - automatically managed external address - first
+ address of VPP interface
+b) pool address - range of external addresses
+
+5) Logging and Accounting
+
+a) ipfix logging
+b) syslog
+
+6) Miscellaneous Features
+
+a) inter-vrf translation control 1. basic
+
+ - nat44 plugin enable inside-vrf / outside-vrf
+ - inside/outside interface vrf’s
+
+ 2. advanced
+
+ - vrf table routing feature
+
+b) udp/tcp/icmp timeouts - configurable timeouts for these protocols
+c) session limiting 1. basic (plugin enable [sessions ] 2. advanced
+ (per vrf table / global limiting)
+d) mss-clamping - MSS (maximum segment size) is by default determined by
+ egress interface MTU (maximum transmission unit) size - used to lower
+ MSS value in VPN tunnel scenarios where additional headers can
+ enlarge the packet beyond MTU causing drops
+e) hairpinning - hosts on the same lan segment communicating via
+ external address
+f) forwarding - if enabled translation only occurs if active session or
+ static configuration exist, rest of the traffic is passed without
+ being translated
+
+Session Table
+-------------
+
+Session table exists per thread and contains pool of sessions that can
+be either expired or not expired. NAT44-ED plugin doesn’t use scavenging
+for clearing expired sessions. Rather then using scavenging plugin uses
+LRU doubly-linked list. LRU contains ordered list of sessions indices.
+Head of the list contains last updated session. Each session holds
+record of the LRU head (tcp transitory, tcp established, udp, icmp or
+unknown lru head). Because of this plugin can reach maximum number of
+sessions without requirement to clear old sessions. During session
+creation if a maximum number of sessions was reached LRU head is
+checked. Expired head record gets deleted and a new session gets
+created. For better performance LRU head records exist. Each time a new
+packet is received session index gets moved to the tail of LRU list.
+
+Terminology
+-----------
+
+IN2OUT (inside to outside translation) OUT2IN (outside to inside
+translation)
+
+NAT (network address translation) PAT (port address translation) MSS
+(maximum segment size) MTU (maximum transmission unit) VRF (virtual
+routing and forwarding)
+
+HAIRPINNING
+
+Dynamic NAT (Minimal Required Configuration)
+--------------------------------------------
+
+::
+
+ +-------------+
+ | 10.0.0.0/24 |
+ +-------------+
+ |
+ +----------------------+
+ | GigabitEthernet0/8/0 |
+ +----------------------+
+ +----------------------+
+ | GigabitEthernet0/a/0 |
+ +----------------------+
+ |
+ +-------------+
+ | 10.0.1.0/24 |
+ +-------------+
+
+1) enable nat plugin
+
+..
+
+ nat44 plugin enable sessions 10000
+
+2) configure NAT interfaces, two options:
+
+a) add inside NAT interface on local VPP interface, add outside NAT
+ interface on external VPP interface
+
+..
+
+ set interface nat44 in GigabitEthernet0/8/0 out GigabitEthernet0/a/0
+
+b) add output NAT interface on external VPP interface
+
+..
+
+ set interface nat44 in GigabitEthernet0/a/0 output-feature
+
+3) configure NAT address
+
+a) add external address range
+
+..
+
+ nat44 add address 10.0.1.1
+
+b) add external VPP interface address
+
+..
+
+ nat44 add interface address GigabitEthernet0/a/0
+
+Static NAT
+----------
+
+Identity Mapping
+~~~~~~~~~~~~~~~~
+
+ nat44 add identity mapping ``ip4-addr``\ \|external ``interface``
+ [``protocol`` ``port``] [vrf ``table-id``] [del]
+
+Static Mapping
+~~~~~~~~~~~~~~
+
+ nat44 add static mapping tcp|udp|icmp local ``addr``
+ [``port|icmp-echo-id``] external ``addr`` [``port|icmp-echo-id``]
+ [vrf ``table-id``] [twice-nat|self-twice-nat] [out2in-only] [exact
+ ``pool-addr``] [del]
+
+Load Balanced Static Mapping
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ nat44 add load-balancing back-end protocol tcp|udp external
+ ``addr``:``port`` local ``addr``:``port`` [vrf ``table-id``]
+ probability ``n`` [del]
+
+..
+
+ nat44 add load-balancing static mapping protocol tcp|udp external
+ ``addr``:``port`` local ``addr``:``port`` [vrf ``table-id``]
+ probability ``n`` [twice-nat|self-twice-nat] [out2in-only] [affinity
+ ``timeout-seconds``] [del]
+
+Interfaces
+----------
+
+Inside Interface
+~~~~~~~~~~~~~~~~
+
+::
+
+ NAT INSIDE IF
+ +----------------------+
+ | GigabitEthernet0/8/0 |
+ +----------------------+
+
+..
+
+ set interface nat44 in GigabitEthernet0/8/0 [del]
+
+NAT inside interface is used for translating local to external
+communication. Translates Dynamic and Static NAT traffic. If no matching
+session is found a new session is created for both Dynamic NAT and
+Static NAT. Dynamic NAT sessions can get created only on inside
+interface.
+
+Outside Interface
+~~~~~~~~~~~~~~~~~
+
+::
+
+ NAT OUTSIDE IF
+ +----------------------+
+ | GigabitEthernet0/a/0 |
+ +----------------------+
+
+..
+
+ set interface nat44 out GigabitEthernet0/a/0 [del]
+
+NAT outside interface is used for translating external to local
+communication. Translates Dynamic and Static NAT traffic. New session
+gets created only if no matching session is found and matching Static
+NAT configuration exists.
+
+Inside & Outside Interface
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ NAT IN AND OUT IF
+ +----------------------+
+ | GigabitEthernet0/8/0 |
+ +----------------------+
+ NAT IN AND OUT IF
+ +----------------------+
+ | GigabitEthernet0/a/0 |
+ +----------------------+
+
+..
+
+ set interface nat44 in GigabitEthernet0/8/0 out GigabitEthernet0/8/0
+ [del]
+
+ set interface nat44 in GigabitEthernet0/a/0 out GigabitEthernet0/a/0
+ [del]
+
+If one VPP interface is configured both as inside and outside NAT
+interface then classification feature is used. By default NAT inside
+interface uses in2out feature and NAT outside uses out2in feature.
+Classification feature determines if the communication should be passed
+to in2out feature or to out2in feature. Traffic will get passed to
+out2in feature if destination address is one of NAT addresses or a
+static mapping in out2in direction flow matches this communication. By
+default all traffic is passed to in2out feature.
+
+Output Interface
+~~~~~~~~~~~~~~~~
+
+::
+
+ +-------------+ +-------------+
+ | 10.0.2.0/24 | | 10.0.3.0/24 |
+ +-------------+ +-------------+
+ | |
+ +----------------------+ +----------------------+
+ | GigabitEthernet0/7/0 | | GigabitEthernet0/8/0 |
+ +----------------------+ +----------------------+
+ NAT OUTPUT IF
+ +----------------------+
+ | GigabitEthernet0/a/0 |
+ +----------------------+
+ +----------+
+ | 10.0.1.1 |
+ +----------+
+ |
+ +-------------+
+ | 10.0.1.0/24 |
+ +-------------+
+
+..
+
+ set interface nat44 in GigabitEthernet0/a/0 output-feature [del]
+
+NAT output interface acts as both inside and outside interfaces. Inside
+rules apply for all egress communication on VPP interface and outside
+rules apply for all ingress communication. Compared to inside/outside
+NAT configuration method non of the local interfaces require to be
+configured as inside NAT interfaces. Translation only occurs after
+routing decision has been made and just before leaving NAT output
+interface. In above example all traffic destined for 10.0.1.0/24 from
+10.0.2.0/24 or 10.0.3.0/24 will get translated. NAT output interface
+acts as post-routing feature.
+
+Addresses
+---------
+
+Interface Address
+~~~~~~~~~~~~~~~~~
+
+ nat44 add interface address ``interface`` `twice-nat <#twice-nat>`__
+ [del]
+
+NAT interface address is a standard external pool address that gets auto
+added upon resolving first VPP interface address. Supports both standard
+address and twice-nat address. Twice-nat address is used in conjunction
+with static mapping twice-nat and self-twice-nat feature.
+
+Pool Address
+~~~~~~~~~~~~
+
+ nat44 add address ``ip4-range-start`` [- ``ip4-range-end``]
+ [tenant-vrf ``vrf-id``] `twice-nat <#twice-nat>`__ [del]
+
+Statically configured address or range of addresses that supports both
+standard and twice-nat address. Specifying vrf-id lets user assign
+address/addresses to specific NAT inside interfaces that belong to the
+same vrf table.
+
+Logging
+-------
+
+ nat set logging level ``level``
+
+Configuration of logging level is used only for internal VPP logging.
+
+ nat ipfix logging [domain ``domain-id``] [src-port ``port``]
+ [disable]
+
+Both syslog and ipfix support connection tracking capabilities. Session
+creation, session deletion, maximum sessions exceeded among other things
+are logged by syslog and ipfix.
+
+Miscellaneous
+-------------
+
+VRFs
+~~~~
+
+::
+
+ VRF 0 VRF 1
+ +-------------+ +-------------+
+ | 10.0.2.0/24 | | 10.0.3.0/24 |
+ +-------------+ +-------------+
+ | |
+ NAT INSIDE IF NAT INSIDE IF
+ +----------------------+ +----------------------+
+ | GigabitEthernet0/7/0 | | GigabitEthernet0/8/0 |
+ +----------------------+ +----------------------+
+ NAT OUTSIDE IF NAT OUTSIDE IF
+ +----------------------+ +----------------------+
+ | GigabitEthernet0/a/0 | | GigabitEthernet0/b/0 |
+ +----------------------+ +----------------------+
+ VRF 2 VRF 3
+ | |
+ +--------------------------+
+ |
+ +------------+------------+------------+
+ | | | |
+ +----------+ +----------+ +----------+ +----------+
+ | 10.0.0.1 | | 10.0.0.2 | | 10.0.1.1 | | 10.0.1.2 |
+ +----------+ +----------+ +----------+ +----------+
+ VRF 0 POOL VRF 1 POOL VRF 0 POOL VRF 1 POOL
+
+..
+
+ nat44 add address ``ip4-addr`` [tenant-vrf ``vrf-id``] [del]
+
+ nat44 plugin enable inside-vrf ``vrf-id`` outside-vrf ``vrf-id``
+ [disable]",
+
+Default behavior
+^^^^^^^^^^^^^^^^
+
+By design NAT supports passing communication between VRFs. Passing
+communication between multiple different VRFs is also supported (GE0/7/0
+-> GE0/b/0, GE0/8/0 -> GE0/a/0).
+
+NAT pool address tenant-vrf configuration parameter is used to constrain
+pool address to specific inside VRF. Example communication (in the above
+diagram): 1) from GE0/7/0 -> GE0/b/0 would choose 10.0.1.1 pool address
+2) from GE0/8/0 -> GE0/b/0 would choose 10.0.1.2 pool address
+
+Plugin enable parameters inside-vrf and outside-vrf are used as follows:
+
+Both ``inside-vrf`` and ``outside-vrf`` configuration parameters are
+used in conjunction with Static NAT, inside-vrf is only used for Static
+NAT.
+
+inside VRF: - used only in conjunction with static mappings - default
+inside VRF parameter is used in in2out feature to lookup static mapping
+if mapping can’t be found by inside interface VRF - used as default when
+adding static mappings as in2out vrf
+
+outside VRF: - used in conjunction with static mappings - secondary
+option for looking up static mappings in in2out feature based on outside
+VRF - used as default destination vrf in in2out feature during session
+creation if non of outside interfaces can resolve destination IP address
+
+Session creation default behavior (in2out only): - ingress interface fib
+is used as inside fib - Outside fib is chosen based on ability to
+resolve destination address in one of the outside interface networks. if
+there is no such network that is able to resolve destination a default
+outside fib (outside vrf index) is used.
+
+Default behavior enables use of multiple outside and inside fibs with
+some limitations. The limitation in the default behavior is that if each
+interface belonging to different fib contains default gateway every time
+first interface network fib gets used as outside fib index during
+session creation.
+
+VRF tables
+^^^^^^^^^^
+
+ nat44 vrf table [add|del] ``vrf-id``
+
+..
+
+ nat44 vrf route [add|del] table ``vrf-id`` ``vrf-id``
+
+VRF tables change the default behavior of working with inter-vrf
+communication. Adding empty VRF table disables passing communication
+between VRFs. Adding additional routes to the table makes destination
+VRF decision making algorithm do lookups into these tables. During
+session creation destination VRF in in2out feature is resolved by
+traversing VRF routes in the matching VRF table. If VRF route resolves
+destination IPv4 address then this VRF gets used. If non VRF route can
+resolve destination IPv4 address If VRF route can’t be found source VRF
+will be used. Priority of VRF routes is based on order of configuration.
+
+Timeouts
+~~~~~~~~
+
+ set nat timeout [udp ``sec`` \| tcp-established ``sec``
+ tcp-transitory ``sec`` \| icmp ``sec`` \| reset]
+
+Session Limiting
+~~~~~~~~~~~~~~~~
+
+ nat44 plugin enable sessions ``max-number``
+
+Maximum number of sessions value is used on per-thread (per-worker)
+basis.
+
+ set nat44 session limit ``limit`` [vrf ``table-id``]
+
+Per-vrf session limiting makes it possible to split maximum number of
+sessions between different VRFs.
+
+MSS Clamping
+~~~~~~~~~~~~
+
+ nat mss-clamping ``mss-value``\ \|disable
+
+Forwarding
+~~~~~~~~~~
+
+ nat44 forwarding enable|disable
+
+Additional Configuration Commands
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ | set nat frame-queue-nelts ``number``
+ | set nat workers ``workers-list``
+ | nat44 del session in|out ``addr``:``port`` tcp|udp|icmp [vrf
+ ``id``] [external-host ``addr``:``port``]
+
+Show commands
+^^^^^^^^^^^^^
+
+::
+
+ show nat workers
+ show nat timeouts
+ show nat44 summary
+ show nat44 sessions
+ show nat44 addresses
+ show nat mss-clamping
+ show nat44 interfaces
+ show nat44 vrf tables
+ show nat44 hash tables
+ nat44 show static mappings
+ show nat44 interface address
+
+Configuration Examples
+----------------------
+
+TWICE-NAT
+~~~~~~~~~
+
+Twice NAT lets you translate both the source and destination address in
+a single rule. Currently, twice NAT44 is supported only for local
+network service session initiated from outside network. Twice NAT static
+mappings can only get initiated (create sessions) from outside network.
+
+Topology
+^^^^^^^^
+
+::
+
+ +--------------------------+
+ | 10.0.0.2/24 (local host) |
+ +--------------------------+
+ |
+ +---------------------------------+
+ | 10.0.0.1/24 (eth0) (nat inside) |
+ | 20.0.0.1/24 (eth1) (nat outside)|
+ +---------------------------------+
+ |
+ +---------------------------+
+ | 20.0.0.2/24 (remote host) |
+ +---------------------------+
+
+In this example traffic will be initiated from remote host. Remote host
+will be accessing local host via twice-nat mapping.
+
+Translation will occur as follows:
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+outside to inside translation:
+
+
+ | src address: 20.0.0.2 -> 192.168.160.101
+ | dst address: 20.0.0.1 -> 10.0.0.2
+
+inside to outside translation:
+
+
+ | src address: 10.0.0.2 -> 20.0.0.1
+ | dst address: 192.168.160.101 -> 20.0.0.2
+
+Configuration
+^^^^^^^^^^^^^
+
+Enable nat44-ed plugin:
+
+
+::
+
+ nat44 plugin enable sessions 1000
+
+Configure inside interface:
+
+
+::
+
+ set int state eth0 up
+ set int ip address eth0 10.0.0.1/24
+ set int nat44 in eth0
+
+Configure outside interface:
+
+
+::
+
+ set int state eth1 up
+ set int ip address eth1 20.0.0.1/24
+ set int nat44 out eth1
+
+Configure nat address pools:
+
+
+::
+
+ nat44 add address 20.0.0.1
+ nat44 add address 192.168.160.101 twice-nat
+
+- alternatively we could use ``nat44 add interface address eth1``
+- both pools are required
+- pool ``20.0.0.1`` is used for out2in incoming traffic
+- special twice-nat pool ``192.168.160.101`` is used for secondary
+ translation
+
+Finally, add twice-nat mapping:
+
+
+ nat44 add static mapping tcp local 10.0.0.2 5201 external 20.0.0.1
+ 5201 twice-nat
+
+SELF TWICE-NAT
+~~~~~~~~~~~~~~
+
+Self twice NAT works similar to twice NAT with few exceptions. Self
+twice NAT is a feature that lets client and service running on the same
+host to communicate via NAT device. This means that external address is
+the same address as local address. Self twice NAT static mappings can
+only get initiated (create sessions) from outside network.
+
+.. _topology-self-twice-nat:
+
+Topology
+^^^^^^^^
+
+::
+
+ +--------------------------+
+ | 10.0.0.2/24 (local host) |
+ +--------------------------+
+ |
+ +-------------------------------------------+
+ | 10.0.0.1/24 (eth0) (nat inside & outside) |
+ +-------------------------------------------+
+
+In this example traffic will be initiated from local host. Local host
+will be accessing itself via self-twice-nat mapping.
+
+.. _translation-will-occur-as-follows-1:
+
+Translation will occur as follows:
+''''''''''''''''''''''''''''''''''
+
+.. _outside-to-inside-translation-1:
+
+outside to inside translation:
+
+
+ | src address: 10.0.0.2 -> 192.168.160.101
+ | dst address: 10.0.0.1 -> 10.0.0.2
+
+.. _inside-to-outside-translation-1:
+
+inside to outside translation:
+
+
+ | src address: 10.0.0.2 -> 10.0.0.1
+ | dst address: 192.168.160.101 -> 10.0.0.2
+
+.. _configuration-1:
+
+Configuration
+^^^^^^^^^^^^^
+
+.. _enable-nat44-ed-plugin-1:
+
+Enable nat44-ed plugin:
+
+
+::
+
+ nat44 plugin enable sessions 1000
+
+Configure NAT interface:
+
+
+::
+
+ set int state eth0 up
+ set int ip address eth0 10.0.0.1/24
+ set int nat44 in eth0
+ set int nat44 out eth0
+
+.. _configure-nat-address-pools-1:
+
+Configure nat address pools:
+
+
+::
+
+ nat44 add address 10.0.0.1
+ nat44 add address 192.168.160.101 twice-nat
+
+Finally, add self-twice-nat mapping:
+
+
+ nat44 add static mapping tcp local 10.0.0.2 5201 external 10.0.0.1
+ 5201 self-twice-nat
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_format.c b/src/plugins/nat/nat44-ed/nat44_ed_format.c
index 597bc2b4d0b..ee3e925e529 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_format.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_format.c
@@ -12,111 +12,42 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-/**
- * @file
- * @brief NAT formatting
- */
#include <nat/nat44-ed/nat44_ed.h>
#include <nat/nat44-ed/nat44_ed_inlines.h>
-uword
-unformat_nat_protocol (unformat_input_t * input, va_list * args)
-{
- u32 *r = va_arg (*args, u32 *);
-
- if (0);
-#define _(N, i, n, s) else if (unformat (input, s)) *r = NAT_PROTOCOL_##N;
- foreach_nat_protocol
-#undef _
- else
- return 0;
- return 1;
-}
-
-u8 *
-format_nat_protocol (u8 * s, va_list * args)
-{
- u32 i = va_arg (*args, u32);
- u8 *t = 0;
-
- switch (i)
- {
-#define _(N, j, n, str) case NAT_PROTOCOL_##N: t = (u8 *) str; break;
- foreach_nat_protocol
-#undef _
- default:
- s = format (s, "unknown");
- return s;
- }
- s = format (s, "%s", t);
- return s;
-}
-
-u8 *
-format_nat_addr_and_port_alloc_alg (u8 * s, va_list * args)
-{
- u32 i = va_arg (*args, u32);
- u8 *t = 0;
-
- switch (i)
- {
-#define _(v, N, s) case NAT_ADDR_AND_PORT_ALLOC_ALG_##N: t = (u8 *) s; break;
- foreach_nat_addr_and_port_alloc_alg
-#undef _
- default:
- s = format (s, "unknown");
- return s;
- }
- s = format (s, "%s", t);
- return s;
-}
-
u8 *
-format_snat_key (u8 * s, va_list * args)
+format_ed_session_kvp (u8 *s, va_list *args)
{
- u64 key = va_arg (*args, u64);
+ clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
- ip4_address_t addr;
- u16 port;
- nat_protocol_t protocol;
+ u8 proto;
+ u16 r_port, l_port;
+ ip4_address_t l_addr, r_addr;
u32 fib_index;
- split_nat_key (key, &addr, &port, &fib_index, &protocol);
+ split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
+ s = format (s,
+ "local %U:%d remote %U:%d proto %U fib %d thread-index %u "
+ "session-index %u",
+ format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
+ format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
+ format_ip_protocol, proto, fib_index,
+ ed_value_get_thread_index (v), ed_value_get_session_index (v));
- s = format (s, "%U proto %U port %d fib %d",
- format_ip4_address, &addr,
- format_nat_protocol, protocol,
- clib_net_to_host_u16 (port), fib_index);
- return s;
-}
-
-u8 *
-format_snat_session_state (u8 * s, va_list * args)
-{
- u32 i = va_arg (*args, u32);
- u8 *t = 0;
-
- switch (i)
- {
-#define _(v, N, str) case SNAT_SESSION_##N: t = (u8 *) str; break;
- foreach_snat_session_state
-#undef _
- default:
- t = format (t, "unknown");
- }
- s = format (s, "%s", t);
return s;
}
u8 *
format_snat_session (u8 * s, va_list * args)
{
+ snat_main_t *sm = va_arg (*args, snat_main_t *);
snat_main_per_thread_data_t *tsm =
va_arg (*args, snat_main_per_thread_data_t *);
snat_session_t *sess = va_arg (*args, snat_session_t *);
+ f64 now = va_arg (*args, f64);
- if (snat_is_unk_proto_session (sess))
+ if (nat44_ed_is_unk_proto (sess->proto))
{
s = format (s, " i2o %U proto %u fib %u\n",
format_ip4_address, &sess->in2out.addr,
@@ -127,26 +58,23 @@ format_snat_session (u8 * s, va_list * args)
}
else
{
- s = format (s, " i2o %U proto %U port %d fib %d\n",
- format_ip4_address, &sess->in2out.addr,
- format_nat_protocol, sess->nat_proto,
+ s = format (s, " i2o %U proto %U port %d fib %d\n", format_ip4_address,
+ &sess->in2out.addr, format_ip_protocol, sess->proto,
clib_net_to_host_u16 (sess->in2out.port),
sess->in2out.fib_index);
s = format (s, " o2i %U proto %U port %d fib %d\n",
- format_ip4_address, &sess->out2in.addr, format_nat_protocol,
- sess->nat_proto, clib_net_to_host_u16 (sess->out2in.port),
+ format_ip4_address, &sess->out2in.addr, format_ip_protocol,
+ sess->proto, clib_net_to_host_u16 (sess->out2in.port),
sess->out2in.fib_index);
}
- if (is_ed_session (sess) || is_fwd_bypass_session (sess))
+ if (nat44_ed_is_twice_nat_session (sess))
{
- if (is_twice_nat_session (sess))
- {
- s = format (s, " external host o2i %U:%d i2o %U:%d\n",
- format_ip4_address, &sess->ext_host_addr,
- clib_net_to_host_u16 (sess->ext_host_port),
- format_ip4_address, &sess->ext_host_nat_addr,
- clib_net_to_host_u16 (sess->ext_host_nat_port));
- }
+ s = format (s, " external host o2i %U:%d i2o %U:%d\n",
+ format_ip4_address, &sess->ext_host_addr,
+ clib_net_to_host_u16 (sess->ext_host_port),
+ format_ip4_address, &sess->ext_host_nat_addr,
+ clib_net_to_host_u16 (sess->ext_host_nat_port));
+ }
else
{
if (sess->ext_host_addr.as_u32)
@@ -156,20 +84,21 @@ format_snat_session (u8 * s, va_list * args)
}
s = format (s, " i2o flow: %U\n", format_nat_6t_flow, &sess->i2o);
s = format (s, " o2i flow: %U\n", format_nat_6t_flow, &sess->o2i);
- }
s = format (s, " index %llu\n", sess - tsm->sessions);
s = format (s, " last heard %.2f\n", sess->last_heard);
- s = format (s, " total pkts %d, total bytes %lld\n",
- sess->total_pkts, sess->total_bytes);
- if (snat_is_session_static (sess))
+ s = format (s, " timeout in %.2f\n",
+ nat44_session_get_timeout (sm, sess) - (now - sess->last_heard));
+ s = format (s, " total pkts %d, total bytes %lld\n", sess->total_pkts,
+ sess->total_bytes);
+ if (nat44_ed_is_session_static (sess))
s = format (s, " static translation\n");
else
s = format (s, " dynamic translation\n");
- if (is_fwd_bypass_session (sess))
+ if (na44_ed_is_fwd_bypass_session (sess))
s = format (s, " forwarding-bypass\n");
- if (is_lb_session (sess))
+ if (nat44_ed_is_lb_session (sess))
s = format (s, " load-balancing\n");
- if (is_twice_nat_session (sess))
+ if (nat44_ed_is_twice_nat_session (sess))
s = format (s, " twice-nat\n");
return s;
}
@@ -186,9 +115,8 @@ format_snat_static_mapping (u8 * s, va_list * args)
s = format (s, "identity mapping %U",
format_ip4_address, &m->local_addr);
else
- s = format (s, "identity mapping %U %U:%d",
- format_nat_protocol, m->proto,
- format_ip4_address, &m->local_addr,
+ s = format (s, "identity mapping %U %U:%d", format_ip_protocol,
+ m->proto, format_ip4_address, &m->local_addr,
clib_net_to_host_u16 (m->local_port));
pool_foreach (local, m->locals)
@@ -212,8 +140,8 @@ format_snat_static_mapping (u8 * s, va_list * args)
if (is_sm_lb (m->flags))
{
s =
- format (s, "%U external %U:%d %s %s", format_nat_protocol,
- m->proto, format_ip4_address, &m->external_addr,
+ format (s, "%U external %U:%d %s %s", format_ip_protocol, m->proto,
+ format_ip4_address, &m->external_addr,
clib_net_to_host_u16 (m->external_port),
is_sm_twice_nat (m->flags) ?
"twice-nat" :
@@ -230,7 +158,7 @@ format_snat_static_mapping (u8 * s, va_list * args)
}
else
s = format (s, "%U local %U:%d external %U:%d vrf %d %s %s",
- format_nat_protocol, m->proto, format_ip4_address,
+ format_ip_protocol, m->proto, format_ip4_address,
&m->local_addr, clib_net_to_host_u16 (m->local_port),
format_ip4_address, &m->external_addr,
clib_net_to_host_u16 (m->external_port), m->vrf_id,
@@ -245,21 +173,146 @@ format_snat_static_mapping (u8 * s, va_list * args)
u8 *
format_snat_static_map_to_resolve (u8 * s, va_list * args)
{
- snat_static_map_resolve_t *m = va_arg (*args, snat_static_map_resolve_t *);
+ snat_static_mapping_resolve_t *m =
+ va_arg (*args, snat_static_mapping_resolve_t *);
vnet_main_t *vnm = vnet_get_main ();
- if (m->addr_only)
+ if (is_sm_addr_only (m->flags))
s = format (s, "local %U external %U vrf %d",
format_ip4_address, &m->l_addr,
format_vnet_sw_if_index_name, vnm, m->sw_if_index, m->vrf_id);
else
- s = format (s, "%U local %U:%d external %U:%d vrf %d",
- format_nat_protocol, m->proto,
- format_ip4_address, &m->l_addr,
- clib_net_to_host_u16 (m->l_port),
- format_vnet_sw_if_index_name, vnm, m->sw_if_index,
- clib_net_to_host_u16 (m->e_port), m->vrf_id);
+ s = format (s, "%U local %U:%d external %U:%d vrf %d", format_ip_protocol,
+ m->proto, format_ip4_address, &m->l_addr,
+ clib_net_to_host_u16 (m->l_port), format_vnet_sw_if_index_name,
+ vnm, m->sw_if_index, clib_net_to_host_u16 (m->e_port),
+ m->vrf_id);
+
+ return s;
+}
+
+u8 *
+format_nat_ed_translation_error (u8 *s, va_list *args)
+{
+ nat_translation_error_e e = va_arg (*args, nat_translation_error_e);
+
+ switch (e)
+ {
+ case NAT_ED_TRNSL_ERR_SUCCESS:
+ s = format (s, "success");
+ break;
+ case NAT_ED_TRNSL_ERR_TRANSLATION_FAILED:
+ s = format (s, "translation-failed");
+ break;
+ case NAT_ED_TRNSL_ERR_FLOW_MISMATCH:
+ s = format (s, "flow-mismatch");
+ break;
+ case NAT_ED_TRNSL_ERR_PACKET_TRUNCATED:
+ s = format (s, "packet-truncated");
+ break;
+ case NAT_ED_TRNSL_ERR_INNER_IP_CORRUPT:
+ s = format (s, "inner-ip-corrupted");
+ break;
+ case NAT_ED_TRNSL_ERR_INVALID_CSUM:
+ s = format (s, "invalid-checksum");
+ break;
+ }
+ return s;
+}
+
+u8 *
+format_nat_6t_flow (u8 *s, va_list *args)
+{
+ nat_6t_flow_t *f = va_arg (*args, nat_6t_flow_t *);
+
+ s = format (s, "match: %U ", format_nat_6t, &f->match);
+ int r = 0;
+ if (f->ops & NAT_FLOW_OP_SADDR_REWRITE)
+ {
+ s = format (s, "rewrite: saddr %U ", format_ip4_address,
+ f->rewrite.saddr.as_u8);
+ r = 1;
+ }
+ if (f->ops & NAT_FLOW_OP_SPORT_REWRITE)
+ {
+ if (!r)
+ {
+ s = format (s, "rewrite: ");
+ r = 1;
+ }
+ s = format (s, "sport %u ", clib_net_to_host_u16 (f->rewrite.sport));
+ }
+ if (f->ops & NAT_FLOW_OP_DADDR_REWRITE)
+ {
+ if (!r)
+ {
+ s = format (s, "rewrite: ");
+ r = 1;
+ }
+ s = format (s, "daddr %U ", format_ip4_address, f->rewrite.daddr.as_u8);
+ }
+ if (f->ops & NAT_FLOW_OP_DPORT_REWRITE)
+ {
+ if (!r)
+ {
+ s = format (s, "rewrite: ");
+ r = 1;
+ }
+ s = format (s, "dport %u ", clib_net_to_host_u16 (f->rewrite.dport));
+ }
+ if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
+ {
+ if (!r)
+ {
+ s = format (s, "rewrite: ");
+ r = 1;
+ }
+ s = format (s, "icmp-id %u ", clib_net_to_host_u16 (f->rewrite.icmp_id));
+ }
+ if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
+ {
+ if (!r)
+ {
+ s = format (s, "rewrite: ");
+ r = 1;
+ }
+ s = format (s, "txfib %u ", f->rewrite.fib_index);
+ }
+ return s;
+}
+
+u8 *
+format_nat_6t (u8 *s, va_list *args)
+{
+ nat_6t_t *t = va_arg (*args, nat_6t_t *);
+ s = format (s, "saddr %U sport %u daddr %U dport %u proto %U fib_idx %u",
+ format_ip4_address, t->saddr.as_u8,
+ clib_net_to_host_u16 (t->sport), format_ip4_address,
+ t->daddr.as_u8, clib_net_to_host_u16 (t->dport),
+ format_ip_protocol, t->proto, t->fib_index);
+ return s;
+}
+
+u8 *
+format_nat44_ed_tcp_state (u8 *s, va_list *args)
+{
+ nat44_ed_tcp_state_e e = va_arg (*args, nat44_ed_tcp_state_e);
+ switch (e)
+ {
+ case NAT44_ED_TCP_STATE_CLOSED:
+ s = format (s, "closed");
+ break;
+ case NAT44_ED_TCP_STATE_ESTABLISHED:
+ s = format (s, "established");
+ break;
+ case NAT44_ED_TCP_STATE_CLOSING:
+ s = format (s, "closing");
+ break;
+ case NAT44_ED_TCP_N_STATE:
+ s = format (s, "BUG! unexpected N_STATE! BUG!");
+ break;
+ }
return s;
}
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_handoff.c b/src/plugins/nat/nat44-ed/nat44_ed_handoff.c
index c5ceff4e454..5cb4effb6c4 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_handoff.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_handoff.c
@@ -19,7 +19,6 @@
#include <vlib/vlib.h>
#include <vnet/vnet.h>
-#include <vnet/handoff.h>
#include <vnet/fib/ip4_fib.h>
#include <vppinfra/error.h>
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_in2out.c b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c
index 0065d7703b1..9b4dac3b356 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_in2out.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c
@@ -25,18 +25,12 @@
#include <vnet/udp/udp_local.h>
#include <vppinfra/error.h>
-#include <nat/lib/nat_syslog.h>
#include <nat/lib/nat_inlines.h>
#include <nat/lib/ipfix_logging.h>
#include <nat/nat44-ed/nat44_ed.h>
#include <nat/nat44-ed/nat44_ed_inlines.h>
-/* number of attempts to get a port for ED overloading algorithm, if rolling
- * a dice this many times doesn't produce a free port, it's treated
- * as if there were no free ports available to conserve resources */
-#define ED_PORT_ALLOC_ATTEMPTS (10)
-
static char *nat_in2out_ed_error_strings[] = {
#define _(sym,string) string,
foreach_nat_in2out_ed_error
@@ -55,6 +49,7 @@ typedef struct
u8 is_slow_path;
u8 translation_via_i2of;
u8 lookup_skipped;
+ u8 tcp_state;
} nat_in2out_ed_trace_t;
static u8 *
@@ -84,7 +79,7 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args)
{
if (t->lookup_skipped)
{
- s = format (s, "\n lookup skipped - cached session index used");
+ s = format (s, "\n lookup skipped - cached session index used");
}
else
{
@@ -92,93 +87,33 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args)
&t->search_key);
}
}
-
- return s;
-}
-
-/**
- * @brief Check if packet should be translated
- *
- * Packets aimed at outside interface and external address with active session
- * should be translated.
- *
- * @param sm NAT main
- * @param rt NAT runtime data
- * @param sw_if_index0 index of the inside interface
- * @param ip0 IPv4 header
- * @param proto0 NAT protocol
- * @param rx_fib_index0 RX FIB index
- *
- * @returns 0 if packet should be translated otherwise 1
- */
-static inline int
-snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node,
- u32 sw_if_index0, ip4_header_t *ip0, u32 proto0,
- u32 rx_fib_index0)
-{
- fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
- nat_outside_fib_t *outside_fib;
- fib_prefix_t pfx = {
- .fp_proto = FIB_PROTOCOL_IP4,
- .fp_len = 32,
- .fp_addr = {
- .ip4.as_u32 = ip0->dst_address.as_u32,
- }
- ,
- };
-
- /* Don't NAT packet aimed at the intfc address */
- if (PREDICT_FALSE (
- is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32)))
- return 1;
-
- fei = fib_table_lookup (rx_fib_index0, &pfx);
- if (FIB_NODE_INDEX_INVALID != fei)
+ if (IP_PROTOCOL_TCP == t->i2of.match.proto)
{
- u32 sw_if_index = fib_entry_get_resolving_interface (fei);
- if (sw_if_index == ~0)
- {
- vec_foreach (outside_fib, sm->outside_fibs)
- {
- fei = fib_table_lookup (outside_fib->fib_index, &pfx);
- if (FIB_NODE_INDEX_INVALID != fei)
- {
- sw_if_index = fib_entry_get_resolving_interface (fei);
- if (sw_if_index != ~0)
- break;
- }
- }
- }
- if (sw_if_index == ~0)
- return 1;
-
- snat_interface_t *i;
- pool_foreach (i, sm->interfaces)
- {
- /* NAT packet aimed at outside interface */
- if ((nat_interface_is_outside (i)) &&
- (sw_if_index == i->sw_if_index))
- return 0;
- }
+ s = format (s, "\n TCP state: %U", format_nat44_ed_tcp_state,
+ t->tcp_state);
}
- return 1;
+ return s;
}
static int
nat_ed_alloc_addr_and_port_with_snat_address (
- snat_main_t *sm, u32 nat_proto, u32 thread_index, snat_address_t *a,
+ snat_main_t *sm, u8 proto, u32 thread_index, snat_address_t *a,
u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
ip4_address_t *outside_addr, u16 *outside_port)
{
- const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
+ const u16 port_thread_offset =
+ (port_per_thread * snat_thread_index) + ED_USER_PORT_OFFSET;
+
+ /* Backup original match in case of failure */
+ const nat_6t_t match = s->o2i.match;
s->o2i.match.daddr = a->addr;
/* first try port suggested by caller */
u16 port = clib_net_to_host_u16 (*outside_port);
u16 port_offset = port - port_thread_offset;
- if (port <= port_thread_offset ||
- port > port_thread_offset + port_per_thread)
+ if (port < port_thread_offset ||
+ port >= port_thread_offset + port_per_thread)
{
/* need to pick a different port, suggested port doesn't fit in
* this thread's port range */
@@ -188,27 +123,13 @@ nat_ed_alloc_addr_and_port_with_snat_address (
u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
do
{
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
s->o2i.match.sport = clib_host_to_net_u16 (port);
}
s->o2i.match.dport = clib_host_to_net_u16 (port);
if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
{
-#define _(N, i, n, s) \
- case NAT_PROTOCOL_##N: \
- ++a->busy_##n##_port_refcounts[port]; \
- a->busy_##n##_ports_per_thread[thread_index]++; \
- a->busy_##n##_ports++; \
- break;
- switch (nat_proto)
- {
- foreach_nat_protocol;
- default:
- nat_elog_info (sm, "unknown protocol");
- return 1;
- }
-#undef _
*outside_addr = a->addr;
*outside_port = clib_host_to_net_u16 (port);
return 0;
@@ -218,58 +139,141 @@ nat_ed_alloc_addr_and_port_with_snat_address (
--attempts;
}
while (attempts > 0);
+
+ /* Revert match */
+ s->o2i.match = match;
return 1;
}
static int
-nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto,
+nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index,
+ u32 tx_sw_if_index, u32 nat_proto,
u32 thread_index, ip4_address_t s_addr,
- u16 port_per_thread, u32 snat_thread_index,
+ ip4_address_t d_addr, u32 snat_thread_index,
snat_session_t *s, ip4_address_t *outside_addr,
u16 *outside_port)
{
- int i;
- snat_address_t *a, *ga = 0;
-
if (vec_len (sm->addresses) > 0)
{
- int s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
-
- for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
+ u32 s_addr_offset = (s_addr.as_u32 + (s_addr.as_u32 >> 8) +
+ (s_addr.as_u32 >> 16) + (s_addr.as_u32 >> 24)) %
+ vec_len (sm->addresses);
+ snat_address_t *a, *ja = 0, *ra = 0, *ba = 0;
+ int i;
+
+ // output feature
+ if (tx_sw_if_index != ~0)
{
- a = sm->addresses + i;
- if (a->fib_index == rx_fib_index)
+ for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
{
- return nat_ed_alloc_addr_and_port_with_snat_address (
- sm, nat_proto, thread_index, a, port_per_thread,
- snat_thread_index, s, outside_addr, outside_port);
+ a = sm->addresses + i;
+ if (a->fib_index == rx_fib_index)
+ {
+ if (a->sw_if_index == tx_sw_if_index)
+ {
+ if ((a->addr_len != ~0) &&
+ (a->net.as_u32 ==
+ (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
+
+ {
+ return nat_ed_alloc_addr_and_port_with_snat_address (
+ sm, nat_proto, thread_index, a,
+ sm->port_per_thread, snat_thread_index, s,
+ outside_addr, outside_port);
+ }
+ ra = a;
+ }
+ ja = a;
+ }
+ else if (a->fib_index == ~0)
+ {
+ ba = a;
+ }
}
- else if (a->fib_index == ~0)
+ for (i = 0; i < s_addr_offset; ++i)
{
- ga = a;
+ a = sm->addresses + i;
+ if (a->fib_index == rx_fib_index)
+ {
+ if (a->sw_if_index == tx_sw_if_index)
+ {
+ if ((a->addr_len != ~0) &&
+ (a->net.as_u32 ==
+ (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
+
+ {
+ return nat_ed_alloc_addr_and_port_with_snat_address (
+ sm, nat_proto, thread_index, a,
+ sm->port_per_thread, snat_thread_index, s,
+ outside_addr, outside_port);
+ }
+ ra = a;
+ }
+ ja = a;
+ }
+ else if (a->fib_index == ~0)
+ {
+ ba = a;
+ }
}
- }
-
- for (i = 0; i < s_addr_offset; ++i)
- {
- a = sm->addresses + i;
- if (a->fib_index == rx_fib_index)
+ if (ra)
{
return nat_ed_alloc_addr_and_port_with_snat_address (
- sm, nat_proto, thread_index, a, port_per_thread,
+ sm, nat_proto, thread_index, ra, sm->port_per_thread,
snat_thread_index, s, outside_addr, outside_port);
}
- else if (a->fib_index == ~0)
+ }
+ else
+ {
+ // first try nat pool addresses to sw interface addreses mappings
+ for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
{
- ga = a;
+ a = sm->addresses + i;
+ if (a->fib_index == rx_fib_index)
+ {
+ if ((a->addr_len != ~0) &&
+ (a->net.as_u32 ==
+ (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
+ {
+ return nat_ed_alloc_addr_and_port_with_snat_address (
+ sm, nat_proto, thread_index, a, sm->port_per_thread,
+ snat_thread_index, s, outside_addr, outside_port);
+ }
+ ja = a;
+ }
+ else if (a->fib_index == ~0)
+ {
+ ba = a;
+ }
+ }
+ for (i = 0; i < s_addr_offset; ++i)
+ {
+ a = sm->addresses + i;
+ if (a->fib_index == rx_fib_index)
+ {
+ if ((a->addr_len != ~0) &&
+ (a->net.as_u32 ==
+ (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
+ {
+ return nat_ed_alloc_addr_and_port_with_snat_address (
+ sm, nat_proto, thread_index, a, sm->port_per_thread,
+ snat_thread_index, s, outside_addr, outside_port);
+ }
+ ja = a;
+ }
+ else if (a->fib_index == ~0)
+ {
+ ba = a;
+ }
}
}
- if (ga)
+ if (ja || ba)
{
+ a = ja ? ja : ba;
return nat_ed_alloc_addr_and_port_with_snat_address (
- sm, nat_proto, thread_index, a, port_per_thread, snat_thread_index,
- s, outside_addr, outside_port);
+ sm, nat_proto, thread_index, a, sm->port_per_thread,
+ snat_thread_index, s, outside_addr, outside_port);
}
}
/* Totally out of translations to use... */
@@ -277,74 +281,167 @@ nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto,
return 1;
}
+static_always_inline int
+nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
+ u16 match_port, ip_protocol_t match_protocol,
+ ip4_address_t *daddr, u16 *dport)
+{
+ snat_static_mapping_t *m =
+ nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol);
+ if (!m)
+ {
+ /* Try address only mapping */
+ m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0);
+ if (!m)
+ return 0;
+ }
+ *daddr = m->local_addr;
+ if (dport)
+ {
+ /* Address only mapping doesn't change port */
+ *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
+ }
+ return 1;
+}
+
+static_always_inline vrf_table_t *
+get_vrf_table_by_fib (u32 fib_index)
+{
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t;
+
+ pool_foreach (t, sm->vrf_tables)
+ {
+ if (fib_index == t->table_fib_index)
+ {
+ return t;
+ }
+ }
+
+ return 0;
+}
+
static_always_inline u32
-nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
+get_tx_fib_index (u32 rx_fib_index, ip4_address_t addr)
{
fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
- nat_outside_fib_t *outside_fib;
fib_prefix_t pfx = {
.fp_proto = FIB_PROTOCOL_IP4,
.fp_len = 32,
.fp_addr = {.ip4.as_u32 = addr.as_u32,}
,
};
- // TODO: multiple vrfs none can resolve addr
- vec_foreach (outside_fib, sm->outside_fibs)
+
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
+ // default to rx fib
+ u32 tx_fib_index = rx_fib_index;
+
+ if (0 != t)
{
- fei = fib_table_lookup (outside_fib->fib_index, &pfx);
- if (FIB_NODE_INDEX_INVALID != fei)
- {
- if (fib_entry_get_resolving_interface (fei) != ~0)
- {
- return outside_fib->fib_index;
- }
- }
+ // managed routes to other fibs
+ vrf_route_t *r;
+ pool_foreach (r, t->routes)
+ {
+ fei = fib_table_lookup (r->fib_index, &pfx);
+ if ((FIB_NODE_INDEX_INVALID != fei) &&
+ (~0 != fib_entry_get_resolving_interface (fei)))
+ {
+ tx_fib_index = r->fib_index;
+ break;
+ }
+ }
}
- return ~0;
+ else
+ {
+ // default to configured fib
+ tx_fib_index = sm->outside_fib_index;
+
+ // default routes to other fibs
+ nat_fib_t *f;
+ vec_foreach (f, sm->outside_fibs)
+ {
+ fei = fib_table_lookup (f->fib_index, &pfx);
+ if ((FIB_NODE_INDEX_INVALID != fei) &&
+ (~0 != fib_entry_get_resolving_interface (fei)))
+ {
+ tx_fib_index = f->fib_index;
+ break;
+ }
+ }
+ }
+
+ return tx_fib_index;
}
static_always_inline int
-nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
- u16 match_port, nat_protocol_t match_protocol,
- u32 match_fib_index, ip4_address_t *daddr,
- u16 *dport)
+is_destination_resolvable (u32 rx_fib_index, ip4_address_t addr)
{
- clib_bihash_kv_8_8_t kv, value;
- init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol);
- if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = 32,
+ .fp_addr = {.ip4.as_u32 = addr.as_u32,}
+ ,
+ };
+
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
+ u32 ii;
+
+ if (0 != t)
{
- /* Try address only mapping */
- init_nat_k (&kv, match_addr, 0, 0, 0);
- if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv,
- &value))
- return 0;
+ // managed routes to other fibs
+ vrf_route_t *r;
+ pool_foreach (r, t->routes)
+ {
+ fei = fib_table_lookup (r->fib_index, &pfx);
+ if ((FIB_NODE_INDEX_INVALID != fei) &&
+ (~0 != (ii = fib_entry_get_resolving_interface (fei))))
+ {
+ return 1;
+ }
+ }
}
-
- snat_static_mapping_t *m =
- pool_elt_at_index (sm->static_mappings, value.value);
- *daddr = m->local_addr;
- if (dport)
+ else
{
- /* Address only mapping doesn't change port */
- *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
+ // default routes to other fibs
+ nat_fib_t *f;
+ vec_foreach (f, sm->outside_fibs)
+ {
+ fei = fib_table_lookup (f->fib_index, &pfx);
+ if ((FIB_NODE_INDEX_INVALID != fei) &&
+ (~0 != (ii = fib_entry_get_resolving_interface (fei))))
+ {
+ snat_interface_t *i;
+ pool_foreach (i, sm->interfaces)
+ {
+ if ((nat44_ed_is_interface_outside (i)) &&
+ (ii == i->sw_if_index))
+ {
+ return 1;
+ }
+ }
+ }
+ }
}
- return 1;
+
+ return 0;
}
static u32
slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
- u16 r_port, u8 proto, u32 rx_fib_index,
+ u16 r_port, u8 proto, u32 rx_fib_index, u32 tx_sw_if_index,
snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
u32 thread_index, f64 now)
{
snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
ip4_address_t outside_addr;
u16 outside_port;
- u32 outside_fib_index;
+ u32 tx_fib_index;
u8 is_identity_nat = 0;
- u32 nat_proto = ip_proto_to_nat_proto (proto);
snat_session_t *s = NULL;
lb_nat_type_t lb = 0;
ip4_address_t daddr = r_addr;
@@ -363,33 +460,14 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
}
}
- outside_fib_index = sm->outside_fib_index;
-
- switch (vec_len (sm->outside_fibs))
- {
- case 0:
- outside_fib_index = sm->outside_fib_index;
- break;
- case 1:
- outside_fib_index = sm->outside_fibs[0].fib_index;
- break;
- default:
- outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr);
- break;
- }
-
ip4_address_t sm_addr;
u16 sm_port;
u32 sm_fib_index;
- /* First try to match static mapping by local address and port */
- int is_sm;
- if (snat_static_mapping_match (vm, sm, l_addr, l_port, rx_fib_index,
- nat_proto, &sm_addr, &sm_port, &sm_fib_index,
- 0, 0, 0, &lb, 0, &is_identity_nat, 0))
- {
- is_sm = 0;
- }
- else
+ int is_sm = 0;
+ // First try to match static mapping by local address and port
+ if (!snat_static_mapping_match (vm, l_addr, l_port, rx_fib_index, proto,
+ &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0,
+ &lb, 0, &is_identity_nat, 0))
{
if (PREDICT_FALSE (is_identity_nat))
{
@@ -399,7 +477,7 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
is_sm = 1;
}
- if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP))
+ if (PREDICT_TRUE (proto == IP_PROTOCOL_TCP))
{
if (PREDICT_FALSE (!tcp_flags_is_init (
vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
@@ -412,28 +490,31 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
s = nat_ed_session_alloc (sm, thread_index, now, proto);
ASSERT (s);
+ tx_fib_index = get_tx_fib_index (rx_fib_index, r_addr);
+
if (!is_sm)
{
s->in2out.addr = l_addr;
s->in2out.port = l_port;
- s->nat_proto = nat_proto;
+ s->proto = proto;
s->in2out.fib_index = rx_fib_index;
- s->out2in.fib_index = outside_fib_index;
+ s->out2in.fib_index = tx_fib_index;
// suggest using local port to allocation function
outside_port = l_port;
- // hairpinning?
- int is_hairpinning = nat44_ed_external_sm_lookup (
- sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
- s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
+ if (PREDICT_FALSE (nat44_ed_external_sm_lookup (sm, r_addr, r_port,
+ proto, &daddr, &dport)))
+ {
+ s->flags |= SNAT_SESSION_FLAG_HAIRPINNING;
+ }
// destination addr/port updated with real values in
// nat_ed_alloc_addr_and_port
nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
s->out2in.fib_index, proto);
nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
}
@@ -442,11 +523,11 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
}
nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
+ nat_6t_flow_saddr_rewrite_set (&s->o2i, r_addr.as_u32);
if (nat_ed_alloc_addr_and_port (
- sm, rx_fib_index, nat_proto, thread_index, l_addr,
- sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr,
- &outside_port))
+ sm, rx_fib_index, tx_sw_if_index, proto, thread_index, l_addr,
+ r_addr, tsm->snat_thread_index, s, &outside_addr, &outside_port))
{
nat_elog_notice (sm, "addresses exhausted");
b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
@@ -463,17 +544,17 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
s->out2in.port = outside_port = sm_port;
s->in2out.addr = l_addr;
s->in2out.port = l_port;
- s->nat_proto = nat_proto;
+ s->proto = proto;
s->in2out.fib_index = rx_fib_index;
- s->out2in.fib_index = outside_fib_index;
+ s->out2in.fib_index = tx_fib_index;
s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
// hairpinning?
- int is_hairpinning = nat44_ed_external_sm_lookup (
- sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
+ int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port,
+ proto, &daddr, &dport);
s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
sm_port, s->out2in.fib_index, proto);
@@ -487,6 +568,7 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
}
nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
+ nat_6t_flow_saddr_rewrite_set (&s->o2i, r_addr.as_u32);
if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
{
nat_elog_notice (sm, "out2in key add failed");
@@ -496,7 +578,6 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
if (lb)
s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
- s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
s->ext_host_addr = r_addr;
s->ext_host_port = r_port;
@@ -505,7 +586,7 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
}
@@ -514,7 +595,7 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
}
- nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
+ nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
{
@@ -523,17 +604,14 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
}
/* log NAT event */
- nat_ipfix_logging_nat44_ses_create (thread_index,
- s->in2out.addr.as_u32,
- s->out2in.addr.as_u32,
- s->nat_proto,
- s->in2out.port,
- s->out2in.port, s->in2out.fib_index);
+ nat_ipfix_logging_nat44_ses_create (
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
+ s->in2out.port, s->out2in.port, s->in2out.fib_index);
nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
s->in2out.port, &s->ext_host_nat_addr,
s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
- &s->ext_host_addr, s->ext_host_port, s->nat_proto, 0);
+ &s->ext_host_addr, s->ext_host_port, s->proto, 0);
per_vrf_sessions_register_session (s, thread_index);
@@ -542,12 +620,6 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
error:
if (s)
{
- if (!is_sm)
- {
- snat_free_outside_address_and_port (sm->addresses, thread_index,
- &outside_addr, outside_port,
- nat_proto);
- }
nat_ed_session_delete (sm, s, thread_index, 1);
}
*sessionp = s = NULL;
@@ -555,38 +627,55 @@ error:
}
static_always_inline int
-nat44_ed_not_translate (vlib_main_t *vm, snat_main_t *sm,
- vlib_node_runtime_t *node, u32 sw_if_index,
- vlib_buffer_t *b, ip4_header_t *ip, u32 proto,
- u32 rx_fib_index, u32 thread_index)
+nat44_ed_not_translate (vlib_main_t *vm, vlib_node_runtime_t *node,
+ u32 sw_if_index, vlib_buffer_t *b, ip4_header_t *ip,
+ u32 proto, u32 rx_fib_index)
{
+ snat_main_t *sm = &snat_main;
+
clib_bihash_kv_16_8_t kv, value;
+ ip4_address_t placeholder_addr;
+ u32 placeholder_fib_index;
+ u16 placeholder_port;
+
+ init_ed_k (&kv, ip->dst_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_src_port, sm->outside_fib_index,
+ ip->protocol);
+
+ // do nat if active session or is static mapping
+ if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value) ||
+ !snat_static_mapping_match (
+ vm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
+ sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
+ &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
+ {
+ return 0;
+ }
- init_ed_k (&kv, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
- ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
- sm->outside_fib_index, ip->protocol);
+ // do not nat if forwarding enabled
+ if (sm->forwarding_enabled)
+ {
+ return 1;
+ }
- /* NAT packet aimed at external address if has active sessions */
- if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
+ // do not nat packet aimed at the interface address
+ if (PREDICT_FALSE (
+ is_interface_addr (sm, node, sw_if_index, ip->dst_address.as_u32)))
{
- /* or is static mappings */
- ip4_address_t placeholder_addr;
- u16 placeholder_port;
- u32 placeholder_fib_index;
- if (!snat_static_mapping_match (
- vm, sm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
- sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
- &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
- return 0;
+ return 1;
}
- else
- return 0;
- if (sm->forwarding_enabled)
- return 1;
+ // do nat packets with resolvable destination
+ // destination can be resolved either by:
+ // a) vrf routing table entry
+ // b) (non output feature) outside interface fib
+ if (is_destination_resolvable (rx_fib_index, ip->dst_address))
+ {
+ return 0;
+ }
- return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
- rx_fib_index);
+ return 1;
}
static_always_inline int
@@ -610,18 +699,18 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
&lookup_sport, &lookup_daddr,
&lookup_dport, &lookup_protocol))
return 0;
- init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport,
- 0, lookup_protocol);
+ init_ed_k (&kv, lookup_saddr.as_u32, lookup_sport, lookup_daddr.as_u32,
+ lookup_dport, 0, lookup_protocol);
}
else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
{
- init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
- ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0,
- ip->protocol);
+ init_ed_k (&kv, ip->src_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_dst_port, 0, ip->protocol);
}
else
{
- init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0,
+ init_ed_k (&kv, ip->src_address.as_u32, 0, ip->dst_address.as_u32, 0, 0,
ip->protocol);
}
@@ -632,11 +721,13 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
pool_elt_at_index (tsm->sessions,
ed_value_get_session_index (&value));
- if (is_fwd_bypass_session (s))
+ if (na44_ed_is_fwd_bypass_session (s))
{
if (ip->protocol == IP_PROTOCOL_TCP)
{
- nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index);
+ nat44_set_tcp_session_state_i2o (
+ sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags,
+ thread_index);
}
/* Accounting */
nat44_session_update_counters (s, now,
@@ -658,7 +749,7 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
ip4_header_t *ip, u16 src_port,
u16 dst_port, u32 thread_index,
u32 rx_sw_if_index, u32 tx_sw_if_index,
- f64 now, int is_multi_worker)
+ int is_multi_worker)
{
clib_bihash_kv_16_8_t kv, value;
snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
@@ -668,20 +759,14 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
/* src NAT check */
- init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
- tx_fib_index, ip->protocol);
+ init_ed_k (&kv, ip->src_address.as_u32, src_port, ip->dst_address.as_u32,
+ dst_port, tx_fib_index, ip->protocol);
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
{
ASSERT (thread_index == ed_value_get_thread_index (&value));
s =
pool_elt_at_index (tsm->sessions,
ed_value_get_session_index (&value));
- if (nat44_is_ses_closed (s)
- && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp))
- {
- nat_free_session_data (sm, s, thread_index, 0);
- nat_ed_session_delete (sm, s, thread_index, 1);
- }
return 1;
}
@@ -706,8 +791,8 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
s = NULL;
}
- init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port,
- rx_fib_index, ip->protocol);
+ init_ed_k (&kv, ip->dst_address.as_u32, dst_port, ip->src_address.as_u32,
+ src_port, rx_fib_index, ip->protocol);
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
{
ASSERT (thread_index == ed_value_get_thread_index (&value));
@@ -716,15 +801,16 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
ed_value_get_session_index (&value));
skip_dst_nat_lookup:
- if (is_fwd_bypass_session (s))
+ if (na44_ed_is_fwd_bypass_session (s))
return 0;
/* hairpinning */
pool_foreach (i, sm->output_feature_interfaces)
- {
- if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index))
- return 0;
- }
+ {
+ if ((nat44_ed_is_interface_inside (i)) &&
+ (rx_sw_if_index == i->sw_if_index))
+ return 0;
+ }
return 1;
}
@@ -734,9 +820,9 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
static inline u32
icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
icmp46_header_t *icmp, u32 sw_if_index,
- u32 rx_fib_index, vlib_node_runtime_t *node,
- u32 next, f64 now, u32 thread_index,
- nat_protocol_t nat_proto, snat_session_t **s_p,
+ u32 tx_sw_if_index, u32 rx_fib_index,
+ vlib_node_runtime_t *node, u32 next, f64 now,
+ u32 thread_index, snat_session_t **s_p,
int is_multi_worker)
{
vlib_main_t *vm = vlib_get_main ();
@@ -756,20 +842,19 @@ icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
return NAT_NEXT_DROP;
}
- if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
+ if (tx_sw_if_index != ~0)
{
if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
- vnet_buffer (b)->sw_if_index[VLIB_TX], now, is_multi_worker)))
+ tx_sw_if_index, is_multi_worker)))
{
return next;
}
}
else
{
- if (PREDICT_FALSE (nat44_ed_not_translate (vm, sm, node, sw_if_index, b,
- ip, NAT_PROTOCOL_ICMP,
- rx_fib_index, thread_index)))
+ if (PREDICT_FALSE (nat44_ed_not_translate (
+ vm, node, sw_if_index, b, ip, IP_PROTOCOL_ICMP, rx_fib_index)))
{
return next;
}
@@ -782,9 +867,10 @@ icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
return NAT_NEXT_DROP;
}
- next = slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address,
- lookup_sport, lookup_dport, ip->protocol, rx_fib_index,
- &s, node, next, thread_index, vlib_time_now (vm));
+ next =
+ slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address, lookup_sport,
+ lookup_dport, ip->protocol, rx_fib_index, tx_sw_if_index, &s,
+ node, next, thread_index, vlib_time_now (vm));
if (NAT_NEXT_DROP == next)
goto out;
@@ -822,12 +908,11 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
vlib_main_t *vm,
vlib_node_runtime_t *node)
{
- clib_bihash_kv_8_8_t kv, value;
clib_bihash_kv_16_8_t s_kv, s_value;
snat_static_mapping_t *m = NULL;
snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
snat_session_t *s = NULL;
- u32 outside_fib_index = sm->outside_fib_index;
+ u32 tx_fib_index;
int i;
ip4_address_t new_src_addr = { 0 };
ip4_address_t new_dst_addr = ip->dst_address;
@@ -842,25 +927,13 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
return 0;
}
- switch (vec_len (sm->outside_fibs))
- {
- case 0:
- outside_fib_index = sm->outside_fib_index;
- break;
- case 1:
- outside_fib_index = sm->outside_fibs[0].fib_index;
- break;
- default:
- outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
- break;
- }
+ tx_fib_index = get_tx_fib_index (rx_fib_index, ip->dst_address);
- init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0);
-
- /* Try to find static mapping first */
- if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
+ // Try to find static mapping first
+ m = nat44_ed_sm_i2o_lookup (sm, ip->src_address, 0, rx_fib_index,
+ ip->protocol);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value.value);
new_src_addr = m->external_addr;
}
else
@@ -869,8 +942,9 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
{
if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
{
- init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0,
- outside_fib_index, ip->protocol);
+ init_ed_k (&s_kv, s->out2in.addr.as_u32, 0,
+ ip->dst_address.as_u32, 0, tx_fib_index,
+ ip->protocol);
if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
{
new_src_addr = s->out2in.addr;
@@ -883,8 +957,9 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
{
for (i = 0; i < vec_len (sm->addresses); i++)
{
- init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0,
- outside_fib_index, ip->protocol);
+ init_ed_k (&s_kv, sm->addresses[i].addr.as_u32, 0,
+ ip->dst_address.as_u32, 0, tx_fib_index,
+ ip->protocol);
if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
{
new_src_addr = sm->addresses[i].addr;
@@ -910,28 +985,25 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
ip->dst_address, 0, rx_fib_index, ip->protocol);
nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
- nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
+ nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
// hairpinning?
- int is_hairpinning =
- nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER,
- outside_fib_index, &new_dst_addr, NULL);
+ int is_hairpinning = nat44_ed_external_sm_lookup (
+ sm, ip->dst_address, 0, ip->protocol, &new_dst_addr, NULL);
s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
- nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
+ nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
- outside_fib_index, ip->protocol);
+ tx_fib_index, ip->protocol);
nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
- s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
- s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
s->out2in.addr.as_u32 = new_src_addr.as_u32;
- s->out2in.fib_index = outside_fib_index;
+ s->out2in.fib_index = tx_fib_index;
s->in2out.addr.as_u32 = ip->src_address.as_u32;
s->in2out.fib_index = rx_fib_index;
s->in2out.port = s->out2in.port = ip->protocol;
@@ -988,11 +1060,13 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
while (n_left_from > 0)
{
vlib_buffer_t *b0;
- u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
- nat_protocol_t proto0;
+ u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
+ u32 tx_sw_if_index0;
+ u32 cntr_sw_if_index0;
+ ip_protocol_t proto0;
ip4_header_t *ip0;
snat_session_t *s0 = 0;
- clib_bihash_kv_16_8_t kv0, value0;
+ clib_bihash_kv_16_8_t kv0 = { 0 }, value0;
nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
nat_6t_flow_t *f = 0;
nat_6t_t lookup;
@@ -1023,9 +1097,12 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
ip0 =
(ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- rx_fib_index0 =
- fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
+ rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ cntr_sw_if_index0 =
+ is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
+ rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ rx_sw_if_index0);
lookup.fib_index = rx_fib_index0;
if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
@@ -1038,7 +1115,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
goto trace0;
}
- proto0 = ip_proto_to_nat_proto (ip0->protocol);
+ proto0 = ip0->protocol;
if (is_output_feature)
{
@@ -1048,7 +1125,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
goto trace0;
}
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
+ if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
{
if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
ICMP4_echo_request &&
@@ -1102,8 +1179,8 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
s0 = NULL;
}
- init_ed_k (&kv0, lookup.saddr, lookup.sport, lookup.daddr, lookup.dport,
- lookup.fib_index, lookup.proto);
+ init_ed_k (&kv0, lookup.saddr.as_u32, lookup.sport, lookup.daddr.as_u32,
+ lookup.dport, lookup.fib_index, lookup.proto);
// lookup flow
if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
@@ -1125,25 +1202,10 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
{
// session is closed, go slow path
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
- next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
- goto trace0;
- }
-
- if (s0->tcp_closed_timestamp)
- {
- if (now >= s0->tcp_closed_timestamp)
- {
- // session is closed, go slow path, freed in slow path
- next[0] = def_slow;
- }
- else
- {
- // session in transitory timeout, drop
- b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
- next[0] = NAT_NEXT_DROP;
- }
+ s0 = 0;
+ next[0] = def_slow;
goto trace0;
}
@@ -1153,8 +1215,9 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
if (now >= sess_timeout_time)
{
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
// session is closed, go slow path
next[0] = def_slow;
goto trace0;
@@ -1174,8 +1237,9 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
else
{
translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
next[0] = NAT_NEXT_DROP;
b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
goto trace0;
@@ -1185,8 +1249,9 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
(translation_error = nat_6t_flow_buf_translate_i2o (
vm, sm, b0, ip0, f, proto0, is_output_feature)))
{
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
next[0] = NAT_NEXT_DROP;
b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
goto trace0;
@@ -1194,22 +1259,24 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
switch (proto0)
{
- case NAT_PROTOCOL_TCP:
+ case IP_PROTOCOL_TCP:
vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
- thread_index, sw_if_index0, 1);
- nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
+ thread_index, cntr_sw_if_index0, 1);
+ nat44_set_tcp_session_state_i2o (
+ sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
+ thread_index);
break;
- case NAT_PROTOCOL_UDP:
+ case IP_PROTOCOL_UDP:
vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
break;
- case NAT_PROTOCOL_ICMP:
+ case IP_PROTOCOL_ICMP:
vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
break;
- case NAT_PROTOCOL_OTHER:
+ default:
vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
break;
}
@@ -1227,7 +1294,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
{
nat_in2out_ed_trace_t *t =
vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sw_if_index = sw_if_index0;
+ t->sw_if_index = rx_sw_if_index0;
t->next_index = next[0];
t->is_slow_path = 0;
t->translation_error = translation_error;
@@ -1240,6 +1307,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
t->translation_via_i2of = (&s0->i2o == f);
+ t->tcp_state = s0->tcp_state;
}
else
{
@@ -1250,7 +1318,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
if (next[0] == NAT_NEXT_DROP)
{
vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
n_left_from--;
@@ -1285,13 +1353,15 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
while (n_left_from > 0)
{
vlib_buffer_t *b0;
- u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
- nat_protocol_t proto0;
+ u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
+ u32 tx_sw_if_index0;
+ u32 cntr_sw_if_index0;
+ ip_protocol_t proto0;
ip4_header_t *ip0;
udp_header_t *udp0;
icmp46_header_t *icmp0;
snat_session_t *s0 = 0;
- clib_bihash_kv_16_8_t kv0, value0;
+ clib_bihash_kv_16_8_t kv0 = { 0 }, value0;
int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
b0 = *b;
@@ -1304,9 +1374,12 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
iph_offset0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- rx_fib_index0 =
- fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
+ rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ cntr_sw_if_index0 =
+ is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
+ rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ rx_sw_if_index0);
if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
{
@@ -1320,9 +1393,9 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
udp0 = ip4_next_header (ip0);
icmp0 = (icmp46_header_t *) udp0;
- proto0 = ip_proto_to_nat_proto (ip0->protocol);
+ proto0 = ip0->protocol;
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
+ if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto0)))
{
s0 = nat44_ed_in2out_slowpath_unknown_proto (
sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
@@ -1334,57 +1407,57 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
(translation_error = nat_6t_flow_buf_translate_i2o (
vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
{
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
next[0] = NAT_NEXT_DROP;
b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
goto trace0;
}
vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
goto trace0;
}
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
+ if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
{
next[0] = icmp_in2out_ed_slow_path (
- sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next[0],
- now, thread_index, proto0, &s0, is_multi_worker);
+ sm, b0, ip0, icmp0, rx_sw_if_index0, tx_sw_if_index0,
+ rx_fib_index0, node, next[0], now, thread_index, &s0,
+ is_multi_worker);
if (NAT_NEXT_DROP != next[0] && s0 &&
NAT_ED_TRNSL_ERR_SUCCESS !=
(translation_error = nat_6t_flow_buf_translate_i2o (
vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
{
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
next[0] = NAT_NEXT_DROP;
b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
goto trace0;
}
- vlib_increment_simple_counter (&sm->counters.slowpath.in2out.icmp,
- thread_index, sw_if_index0, 1);
+ if (NAT_NEXT_DROP != next[0])
+ {
+ vlib_increment_simple_counter (
+ &sm->counters.slowpath.in2out.icmp, thread_index,
+ cntr_sw_if_index0, 1);
+ }
goto trace0;
}
- init_ed_k (&kv0, ip0->src_address,
- vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
- vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
- ip0->protocol);
+ init_ed_k (
+ &kv0, ip0->src_address.as_u32, vnet_buffer (b0)->ip.reass.l4_src_port,
+ ip0->dst_address.as_u32, vnet_buffer (b0)->ip.reass.l4_dst_port,
+ rx_fib_index0, ip0->protocol);
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
{
ASSERT (thread_index == ed_value_get_thread_index (&value0));
s0 =
pool_elt_at_index (tsm->sessions,
ed_value_get_session_index (&value0));
-
- if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
- {
- nat_free_session_data (sm, s0, thread_index, 0);
- nat_ed_session_delete (sm, s0, thread_index, 1);
- s0 = NULL;
- }
}
if (!s0)
@@ -1394,34 +1467,34 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
- sw_if_index0, vnet_buffer (b0)->sw_if_index[VLIB_TX], now,
- is_multi_worker)))
+ rx_sw_if_index0, tx_sw_if_index0, is_multi_worker)))
goto trace0;
/*
* Send DHCP packets to the ipv4 stack, or we won't
* be able to use dhcp client on the outside interface
*/
- if (PREDICT_FALSE
- (proto0 == NAT_PROTOCOL_UDP
- && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
- clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server))
- && ip0->dst_address.as_u32 == 0xffffffff))
+ if (PREDICT_FALSE (
+ proto0 == IP_PROTOCOL_UDP &&
+ (vnet_buffer (b0)->ip.reass.l4_dst_port ==
+ clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)) &&
+ ip0->dst_address.as_u32 == 0xffffffff))
goto trace0;
}
else
{
- if (PREDICT_FALSE (nat44_ed_not_translate (
- vm, sm, node, sw_if_index0, b0, ip0, proto0, rx_fib_index0,
- thread_index)))
+ if (PREDICT_FALSE (
+ nat44_ed_not_translate (vm, node, rx_sw_if_index0, b0, ip0,
+ proto0, rx_fib_index0)))
goto trace0;
}
- next[0] = slow_path_ed (
- vm, sm, b0, ip0->src_address, ip0->dst_address,
- vnet_buffer (b0)->ip.reass.l4_src_port,
- vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->protocol,
- rx_fib_index0, &s0, node, next[0], thread_index, now);
+ next[0] =
+ slow_path_ed (vm, sm, b0, ip0->src_address, ip0->dst_address,
+ vnet_buffer (b0)->ip.reass.l4_src_port,
+ vnet_buffer (b0)->ip.reass.l4_dst_port,
+ ip0->protocol, rx_fib_index0, tx_sw_if_index0, &s0,
+ node, next[0], thread_index, now);
if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
goto trace0;
@@ -1437,23 +1510,26 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
(translation_error = nat_6t_flow_buf_translate_i2o (
vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
{
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
next[0] = NAT_NEXT_DROP;
b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
goto trace0;
}
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
+ if (PREDICT_TRUE (proto0 == IP_PROTOCOL_TCP))
{
vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
- thread_index, sw_if_index0, 1);
- nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
+ thread_index, cntr_sw_if_index0, 1);
+ nat44_set_tcp_session_state_i2o (
+ sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
+ thread_index);
}
else
{
vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
/* Accounting */
@@ -1469,7 +1545,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
{
nat_in2out_ed_trace_t *t =
vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sw_if_index = sw_if_index0;
+ t->sw_if_index = rx_sw_if_index0;
t->next_index = next[0];
t->is_slow_path = 1;
t->translation_error = translation_error;
@@ -1481,6 +1557,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
t->translation_via_i2of = 1;
+ t->tcp_state = s0->tcp_state;
}
else
@@ -1492,7 +1569,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
if (next[0] == NAT_NEXT_DROP)
{
vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
n_left_from--;
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_inlines.h b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h
index 0d75e736849..04e5236b7f9 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_inlines.h
+++ b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h
@@ -12,6 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/**
* @brief The NAT inline functions
*/
@@ -24,116 +25,53 @@
#include <vnet/fib/ip4_fib.h>
#include <nat/lib/log.h>
+#include <nat/lib/ipfix_logging.h>
#include <nat/nat44-ed/nat44_ed.h>
-always_inline u64
-calc_nat_key (ip4_address_t addr, u16 port, u32 fib_index, u8 proto)
-{
- ASSERT (fib_index <= (1 << 14) - 1);
- ASSERT (proto <= (1 << 3) - 1);
- return (u64) addr.as_u32 << 32 | (u64) port << 16 | fib_index << 3 |
- (proto & 0x7);
-}
-
-always_inline void
-split_nat_key (u64 key, ip4_address_t *addr, u16 *port, u32 *fib_index,
- nat_protocol_t *proto)
-{
- if (addr)
- {
- addr->as_u32 = key >> 32;
- }
- if (port)
- {
- *port = (key >> 16) & (u16) ~0;
- }
- if (fib_index)
- {
- *fib_index = key >> 3 & ((1 << 13) - 1);
- }
- if (proto)
- {
- *proto = key & 0x7;
- }
-}
-
always_inline void
-init_nat_k (clib_bihash_kv_8_8_t *kv, ip4_address_t addr, u16 port,
- u32 fib_index, nat_protocol_t proto)
+init_ed_k (clib_bihash_kv_16_8_t *kv, u32 l_addr, u16 l_port, u32 r_addr,
+ u16 r_port, u32 fib_index, ip_protocol_t proto)
{
- kv->key = calc_nat_key (addr, port, fib_index, proto);
- kv->value = ~0ULL;
+ kv->key[0] = (u64) r_addr << 32 | l_addr;
+ kv->key[1] =
+ (u64) r_port << 48 | (u64) l_port << 32 | fib_index << 8 | proto;
}
always_inline void
-init_nat_kv (clib_bihash_kv_8_8_t *kv, ip4_address_t addr, u16 port,
- u32 fib_index, nat_protocol_t proto, u32 thread_index,
- u32 session_index)
+init_ed_kv (clib_bihash_kv_16_8_t *kv, u32 l_addr, u16 l_port, u32 r_addr,
+ u16 r_port, u32 fib_index, u8 proto, u32 thread_index,
+ u32 session_index)
{
- init_nat_k (kv, addr, port, fib_index, proto);
+ init_ed_k (kv, l_addr, l_port, r_addr, r_port, fib_index, proto);
kv->value = (u64) thread_index << 32 | session_index;
}
always_inline void
-init_nat_i2o_k (clib_bihash_kv_8_8_t *kv, snat_session_t *s)
+nat44_ed_sm_init_i2o_kv (clib_bihash_kv_16_8_t *kv, u32 addr, u16 port,
+ u32 fib_index, u8 proto, u32 sm_index)
{
- return init_nat_k (kv, s->in2out.addr, s->in2out.port, s->in2out.fib_index,
- s->nat_proto);
+ return init_ed_kv (kv, addr, port, 0, 0, fib_index, proto, 0, sm_index);
}
always_inline void
-init_nat_i2o_kv (clib_bihash_kv_8_8_t *kv, snat_session_t *s, u32 thread_index,
- u32 session_index)
+nat44_ed_sm_init_o2i_kv (clib_bihash_kv_16_8_t *kv, u32 e_addr, u16 e_port,
+ u32 fib_index, u8 proto, u32 sm_index)
{
- init_nat_k (kv, s->in2out.addr, s->in2out.port, s->in2out.fib_index,
- s->nat_proto);
- kv->value = (u64) thread_index << 32 | session_index;
+ return init_ed_kv (kv, 0, 0, e_addr, e_port, fib_index, proto, 0, sm_index);
}
always_inline void
-init_nat_o2i_k (clib_bihash_kv_8_8_t *kv, snat_session_t *s)
+nat44_ed_sm_init_i2o_k (clib_bihash_kv_16_8_t *kv, u32 addr, u16 port,
+ u32 fib_index, u8 proto)
{
- return init_nat_k (kv, s->out2in.addr, s->out2in.port, s->out2in.fib_index,
- s->nat_proto);
+ return nat44_ed_sm_init_i2o_kv (kv, addr, port, fib_index, proto, 0);
}
always_inline void
-init_nat_o2i_kv (clib_bihash_kv_8_8_t *kv, snat_session_t *s, u32 thread_index,
- u32 session_index)
+nat44_ed_sm_init_o2i_k (clib_bihash_kv_16_8_t *kv, u32 e_addr, u16 e_port,
+ u32 fib_index, u8 proto)
{
- init_nat_k (kv, s->out2in.addr, s->out2in.port, s->out2in.fib_index,
- s->nat_proto);
- kv->value = (u64) thread_index << 32 | session_index;
-}
-
-always_inline u32
-nat_value_get_thread_index (clib_bihash_kv_8_8_t *value)
-{
- return value->value >> 32;
-}
-
-always_inline u32
-nat_value_get_session_index (clib_bihash_kv_8_8_t *value)
-{
- return value->value & ~(u32) 0;
-}
-
-always_inline void
-init_ed_k (clib_bihash_kv_16_8_t *kv, ip4_address_t l_addr, u16 l_port,
- ip4_address_t r_addr, u16 r_port, u32 fib_index, u8 proto)
-{
- kv->key[0] = (u64) r_addr.as_u32 << 32 | l_addr.as_u32;
- kv->key[1] =
- (u64) r_port << 48 | (u64) l_port << 32 | fib_index << 8 | proto;
-}
-
-always_inline void
-init_ed_kv (clib_bihash_kv_16_8_t *kv, ip4_address_t l_addr, u16 l_port,
- ip4_address_t r_addr, u16 r_port, u32 fib_index, u8 proto,
- u32 thread_index, u32 session_index)
-{
- init_ed_k (kv, l_addr, l_port, r_addr, r_port, fib_index, proto);
- kv->value = (u64) thread_index << 32 | session_index;
+ return nat44_ed_sm_init_o2i_kv (kv, e_addr, e_port, fib_index, proto, 0);
}
always_inline u32
@@ -187,13 +125,13 @@ nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0,
u16 *lookup_dport, u8 *lookup_protocol)
{
icmp46_header_t *icmp0;
- icmp_echo_header_t *echo0, *inner_echo0 = 0;
+ nat_icmp_echo_header_t *echo0, *inner_echo0 = 0;
ip4_header_t *inner_ip0 = 0;
void *l4_header = 0;
icmp46_header_t *inner_icmp0;
icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
- echo0 = (icmp_echo_header_t *) (icmp0 + 1);
+ echo0 = (nat_icmp_echo_header_t *) (icmp0 + 1);
// avoid warning about unused variables in caller by setting to bogus values
*lookup_sport = 0;
@@ -215,18 +153,18 @@ nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0,
*lookup_protocol = inner_ip0->protocol;
lookup_saddr->as_u32 = inner_ip0->dst_address.as_u32;
lookup_daddr->as_u32 = inner_ip0->src_address.as_u32;
- switch (ip_proto_to_nat_proto (inner_ip0->protocol))
+ switch (inner_ip0->protocol)
{
- case NAT_PROTOCOL_ICMP:
+ case IP_PROTOCOL_ICMP:
inner_icmp0 = (icmp46_header_t *) l4_header;
- inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
+ inner_echo0 = (nat_icmp_echo_header_t *) (inner_icmp0 + 1);
*lookup_sport = inner_echo0->identifier;
*lookup_dport = inner_echo0->identifier;
break;
- case NAT_PROTOCOL_UDP:
- case NAT_PROTOCOL_TCP:
- *lookup_sport = ((tcp_udp_header_t *) l4_header)->dst_port;
- *lookup_dport = ((tcp_udp_header_t *) l4_header)->src_port;
+ case IP_PROTOCOL_UDP:
+ case IP_PROTOCOL_TCP:
+ *lookup_sport = ((nat_tcp_udp_header_t *) l4_header)->dst_port;
+ *lookup_dport = ((nat_tcp_udp_header_t *) l4_header)->src_port;
break;
default:
return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL;
@@ -235,21 +173,29 @@ nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0,
return 0;
}
+always_inline int
+nat44_ed_tcp_is_established (nat44_ed_tcp_state_e state)
+{
+ return state == NAT44_ED_TCP_STATE_ESTABLISHED ? 1 : 0;
+}
+
always_inline u32
nat44_session_get_timeout (snat_main_t *sm, snat_session_t *s)
{
- switch (s->nat_proto)
+ switch (s->proto)
{
- case NAT_PROTOCOL_ICMP:
+ case IP_PROTOCOL_ICMP:
+ /* fallthrough */
+ case IP_PROTOCOL_ICMP6:
return sm->timeouts.icmp;
- case NAT_PROTOCOL_UDP:
+ case IP_PROTOCOL_UDP:
return sm->timeouts.udp;
- case NAT_PROTOCOL_TCP:
+ case IP_PROTOCOL_TCP:
{
- if (s->state)
- return sm->timeouts.tcp.transitory;
- else
+ if (nat44_ed_tcp_is_established (s->tcp_state))
return sm->timeouts.tcp.established;
+ else
+ return sm->timeouts.tcp.transitory;
}
default:
return sm->timeouts.udp;
@@ -300,7 +246,7 @@ nat_ed_lru_insert (snat_main_per_thread_data_t *tsm, snat_session_t *s,
static_always_inline void
nat_6t_flow_to_ed_k (clib_bihash_kv_16_8_t *kv, nat_6t_flow_t *f)
{
- init_ed_k (kv, f->match.saddr, f->match.sport, f->match.daddr,
+ init_ed_k (kv, f->match.saddr.as_u32, f->match.sport, f->match.daddr.as_u32,
f->match.dport, f->match.fib_index, f->match.proto);
}
@@ -308,7 +254,7 @@ static_always_inline void
nat_6t_flow_to_ed_kv (clib_bihash_kv_16_8_t *kv, nat_6t_flow_t *f,
u32 thread_idx, u32 session_idx)
{
- init_ed_kv (kv, f->match.saddr, f->match.sport, f->match.daddr,
+ init_ed_kv (kv, f->match.saddr.as_u32, f->match.sport, f->match.daddr.as_u32,
f->match.dport, f->match.fib_index, f->match.proto, thread_idx,
session_idx);
}
@@ -348,6 +294,15 @@ nat_ed_ses_o2i_flow_hash_add_del (snat_main_t *sm, u32 thread_idx,
else
{
nat_6t_flow_to_ed_kv (&kv, &s->o2i, thread_idx, s - tsm->sessions);
+ if (!(s->flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
+ {
+ if (nat44_ed_sm_o2i_lookup (sm, s->o2i.match.daddr,
+ s->o2i.match.dport, 0,
+ s->o2i.match.proto))
+ {
+ return -1;
+ }
+ }
nat_6t_l3_l4_csum_calc (&s->o2i);
}
ASSERT (thread_idx == s->thread_index);
@@ -393,10 +348,9 @@ nat_lru_free_one_with_head (snat_main_t *sm, int thread_index, f64 now,
sess_timeout_time =
s->last_heard + (f64) nat44_session_get_timeout (sm, s);
- if (now >= sess_timeout_time ||
- (s->tcp_closed_timestamp && now >= s->tcp_closed_timestamp))
+ if (now >= sess_timeout_time)
{
- nat_free_session_data (sm, s, thread_index, 0);
+ nat44_ed_free_session_data (sm, s, thread_index, 0);
nat_ed_session_delete (sm, s, thread_index, 0);
return 1;
}
@@ -460,23 +414,16 @@ per_vrf_sessions_cleanup (u32 thread_index)
per_vrf_sessions_t *per_vrf_sessions;
u32 *to_free = 0, *i;
- vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
+ pool_foreach (per_vrf_sessions, tsm->per_vrf_sessions_pool)
{
- if (per_vrf_sessions->expired)
- {
- if (per_vrf_sessions->ses_count == 0)
- {
- vec_add1 (to_free, per_vrf_sessions - tsm->per_vrf_sessions_vec);
- }
- }
+ if (per_vrf_sessions->expired && per_vrf_sessions->ses_count == 0)
+ vec_add1 (to_free, per_vrf_sessions - tsm->per_vrf_sessions_pool);
}
- if (vec_len (to_free))
+ vec_foreach (i, to_free)
{
- vec_foreach (i, to_free)
- {
- vec_del1 (tsm->per_vrf_sessions_vec, *i);
- }
+ per_vrf_sessions = pool_elt_at_index (tsm->per_vrf_sessions_pool, *i);
+ pool_put (tsm->per_vrf_sessions_pool, per_vrf_sessions);
}
vec_free (to_free);
@@ -495,7 +442,7 @@ per_vrf_sessions_register_session (snat_session_t *s, u32 thread_index)
// s->per_vrf_sessions_index == ~0 ... reuse of old session
- vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
+ pool_foreach (per_vrf_sessions, tsm->per_vrf_sessions_pool)
{
// ignore already expired registrations
if (per_vrf_sessions->expired)
@@ -514,14 +461,13 @@ per_vrf_sessions_register_session (snat_session_t *s, u32 thread_index)
}
// create a new registration
- vec_add2 (tsm->per_vrf_sessions_vec, per_vrf_sessions, 1);
+ pool_get (tsm->per_vrf_sessions_pool, per_vrf_sessions);
clib_memset (per_vrf_sessions, 0, sizeof (*per_vrf_sessions));
-
per_vrf_sessions->rx_fib_index = s->in2out.fib_index;
per_vrf_sessions->tx_fib_index = s->out2in.fib_index;
done:
- s->per_vrf_sessions_index = per_vrf_sessions - tsm->per_vrf_sessions_vec;
+ s->per_vrf_sessions_index = per_vrf_sessions - tsm->per_vrf_sessions_pool;
per_vrf_sessions->ses_count++;
}
@@ -537,7 +483,7 @@ per_vrf_sessions_unregister_session (snat_session_t *s, u32 thread_index)
tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
per_vrf_sessions =
- vec_elt_at_index (tsm->per_vrf_sessions_vec, s->per_vrf_sessions_index);
+ pool_elt_at_index (tsm->per_vrf_sessions_pool, s->per_vrf_sessions_index);
ASSERT (per_vrf_sessions->ses_count != 0);
@@ -557,7 +503,7 @@ per_vrf_sessions_is_expired (snat_session_t *s, u32 thread_index)
tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
per_vrf_sessions =
- vec_elt_at_index (tsm->per_vrf_sessions_vec, s->per_vrf_sessions_index);
+ pool_elt_at_index (tsm->per_vrf_sessions_pool, s->per_vrf_sessions_index);
return per_vrf_sessions->expired;
}
@@ -754,100 +700,120 @@ is_interface_addr (snat_main_t *sm, vlib_node_runtime_t *node,
}
always_inline void
-nat44_set_tcp_session_state_i2o (snat_main_t *sm, f64 now, snat_session_t *ses,
- vlib_buffer_t *b, u32 thread_index)
+nat44_ed_session_reopen (u32 thread_index, snat_session_t *s)
+{
+ nat_syslog_nat44_sdel (0, s->in2out.fib_index, &s->in2out.addr,
+ s->in2out.port, &s->ext_host_nat_addr,
+ s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
+ &s->ext_host_addr, s->ext_host_port, s->proto,
+ nat44_ed_is_twice_nat_session (s));
+
+ nat_ipfix_logging_nat44_ses_delete (
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
+ s->in2out.port, s->out2in.port, s->in2out.fib_index);
+ nat_ipfix_logging_nat44_ses_create (
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
+ s->in2out.port, s->out2in.port, s->in2out.fib_index);
+
+ nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
+ s->in2out.port, &s->ext_host_nat_addr,
+ s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
+ &s->ext_host_addr, s->ext_host_port, s->proto, 0);
+ s->total_pkts = 0;
+ s->total_bytes = 0;
+}
+
+/*
+ * "Some rise by SYN, and some by virtue FIN" - William Shakespeare
+ * TCP state tracking patterned after RFC 7857 (and RFC 6146, which is
+ * referenced by RFC 7857). In contrast to the state machine in RFC7857 we only
+ * transition to ESTABLISHED state after seeing a full 3-way handshake (SYNs
+ * and ACKs in both directions). RFC7857 as a means of protecting against
+ * spurious RSTs closing a session, goes back to ESTABLISHED if a data packet
+ * is received after the RST. This state machine will leave the state in
+ * transitory if RST is seen. Our implementation also goes beyond by supporting
+ * creation of a new session while old session is in transitory timeout after
+ * seeing FIN packets from both sides.
+ */
+always_inline void
+nat44_set_tcp_session_state (snat_main_t *sm, f64 now, snat_session_t *ses,
+ u8 tcp_flags, u32 thread_index,
+ nat44_ed_dir_e dir)
{
snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
- u8 tcp_flags = vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags;
- u32 tcp_ack_number = vnet_buffer (b)->ip.reass.tcp_ack_number;
- u32 tcp_seq_number = vnet_buffer (b)->ip.reass.tcp_seq_number;
- if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST))
- ses->state = NAT44_SES_RST;
- if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST))
- ses->state = 0;
- if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
- (ses->state & NAT44_SES_O2I_SYN))
- ses->state = 0;
- if (tcp_flags & TCP_FLAG_SYN)
- ses->state |= NAT44_SES_I2O_SYN;
- if (tcp_flags & TCP_FLAG_FIN)
- {
- ses->i2o_fin_seq = clib_net_to_host_u32 (tcp_seq_number);
- ses->state |= NAT44_SES_I2O_FIN;
- }
- if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_O2I_FIN))
+ u8 old_flags = ses->tcp_flags[dir];
+ ses->tcp_flags[dir] |=
+ tcp_flags & (TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK);
+ if (old_flags == ses->tcp_flags[dir])
+ return;
+
+ u8 old_state = ses->tcp_state;
+
+ switch (old_state)
{
- if (clib_net_to_host_u32 (tcp_ack_number) > ses->o2i_fin_seq)
+ case NAT44_ED_TCP_STATE_CLOSED:
+ // ESTABLISHED when a SYN and ACK is seen from both sides
+ if ((ses->tcp_flags[NAT44_ED_DIR_I2O] &
+ ses->tcp_flags[NAT44_ED_DIR_O2I]) == (TCP_FLAG_SYN | TCP_FLAG_ACK))
{
- ses->state |= NAT44_SES_O2I_FIN_ACK;
- if (nat44_is_ses_closed (ses))
- { // if session is now closed, save the timestamp
- ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory;
- ses->last_lru_update = now;
- }
+ ses->tcp_state = NAT44_ED_TCP_STATE_ESTABLISHED;
+ ses->lru_head_index = tsm->tcp_estab_lru_head_index;
}
+ break;
+ case NAT44_ED_TCP_STATE_ESTABLISHED:
+ // CLOSING when a FIN is seen from either side or session has been RST
+ if ((ses->tcp_flags[dir] & TCP_FLAG_FIN) ||
+ (ses->tcp_flags[dir] & TCP_FLAG_RST))
+ {
+ ses->tcp_state = NAT44_ED_TCP_STATE_CLOSING;
+ ses->tcp_flags[NAT44_ED_DIR_I2O] = 0;
+ ses->tcp_flags[NAT44_ED_DIR_O2I] = 0;
+ // need to update last heard otherwise session might get
+ // immediately timed out if it has been idle longer than
+ // transitory timeout
+ ses->last_heard = now;
+ ses->lru_head_index = tsm->tcp_trans_lru_head_index;
+ }
+ break;
+ case NAT44_ED_TCP_STATE_CLOSING:
+ // Allow a transitory session to reopen
+ if ((ses->tcp_flags[NAT44_ED_DIR_I2O] &
+ ses->tcp_flags[NAT44_ED_DIR_O2I]) == (TCP_FLAG_SYN | TCP_FLAG_ACK))
+ {
+ nat44_ed_session_reopen (thread_index, ses);
+ ses->tcp_state = NAT44_ED_TCP_STATE_ESTABLISHED;
+ ses->lru_head_index = tsm->tcp_estab_lru_head_index;
+ }
+ break;
}
-
- // move the session to proper LRU
- if (ses->state)
- {
- ses->lru_head_index = tsm->tcp_trans_lru_head_index;
- }
- else
- {
- ses->lru_head_index = tsm->tcp_estab_lru_head_index;
- }
+ if (old_state == ses->tcp_state)
+ return;
+ ses->last_lru_update = now;
clib_dlist_remove (tsm->lru_pool, ses->lru_index);
clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index);
}
always_inline void
+nat44_set_tcp_session_state_i2o (snat_main_t *sm, f64 now, snat_session_t *ses,
+ u8 tcp_flags, u32 thread_index)
+{
+ return nat44_set_tcp_session_state (sm, now, ses, tcp_flags, thread_index,
+ NAT44_ED_DIR_I2O);
+}
+
+always_inline void
nat44_set_tcp_session_state_o2i (snat_main_t *sm, f64 now, snat_session_t *ses,
- u8 tcp_flags, u32 tcp_ack_number,
- u32 tcp_seq_number, u32 thread_index)
+ u8 tcp_flags, u32 thread_index)
{
- snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
- if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST))
- ses->state = NAT44_SES_RST;
- if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST))
- ses->state = 0;
- if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
- (ses->state & NAT44_SES_O2I_SYN))
- ses->state = 0;
- if (tcp_flags & TCP_FLAG_SYN)
- ses->state |= NAT44_SES_O2I_SYN;
- if (tcp_flags & TCP_FLAG_FIN)
- {
- ses->o2i_fin_seq = clib_net_to_host_u32 (tcp_seq_number);
- ses->state |= NAT44_SES_O2I_FIN;
- }
- if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_FIN))
- {
- if (clib_net_to_host_u32 (tcp_ack_number) > ses->i2o_fin_seq)
- ses->state |= NAT44_SES_I2O_FIN_ACK;
- if (nat44_is_ses_closed (ses))
- { // if session is now closed, save the timestamp
- ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory;
- ses->last_lru_update = now;
- }
- }
- // move the session to proper LRU
- if (ses->state)
- {
- ses->lru_head_index = tsm->tcp_trans_lru_head_index;
- }
- else
- {
- ses->lru_head_index = tsm->tcp_estab_lru_head_index;
- }
- clib_dlist_remove (tsm->lru_pool, ses->lru_index);
- clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index);
+ return nat44_set_tcp_session_state (sm, now, ses, tcp_flags, thread_index,
+ NAT44_ED_DIR_O2I);
}
always_inline void
nat44_session_update_counters (snat_session_t *s, f64 now, uword bytes,
u32 thread_index)
{
+ // regardless of TCP state, reset the timer if data packet is seen.
s->last_heard = now;
s->total_pkts++;
s->total_bytes += bytes;
@@ -868,6 +834,19 @@ nat44_session_update_lru (snat_main_t *sm, snat_session_t *s, u32 thread_index)
}
}
+static_always_inline int
+nat44_ed_is_unk_proto (u8 proto)
+{
+ static const int lookup_table[256] = {
+ [IP_PROTOCOL_TCP] = 1,
+ [IP_PROTOCOL_UDP] = 1,
+ [IP_PROTOCOL_ICMP] = 1,
+ [IP_PROTOCOL_ICMP6] = 1,
+ };
+
+ return 1 - lookup_table[proto];
+}
+
#endif /* __included_nat44_ed_inlines_h__ */
/*
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_out2in.c b/src/plugins/nat/nat44-ed/nat44_ed_out2in.c
index 186d1d6c004..fe4a41c5e08 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_out2in.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_out2in.c
@@ -25,7 +25,6 @@
#include <vnet/udp/udp_local.h>
#include <vppinfra/error.h>
-#include <nat/lib/nat_syslog.h>
#include <nat/lib/ipfix_logging.h>
#include <nat/nat44-ed/nat44_ed.h>
@@ -42,7 +41,6 @@ typedef enum
NAT_ED_SP_REASON_NO_REASON,
NAT_ED_SP_REASON_LOOKUP_FAILED,
NAT_ED_SP_REASON_VRF_EXPIRED,
- NAT_ED_SP_TCP_CLOSED,
NAT_ED_SP_SESS_EXPIRED,
} nat_slow_path_reason_e;
@@ -58,6 +56,7 @@ typedef struct
u8 is_slow_path;
u8 translation_via_i2of;
u8 lookup_skipped;
+ u8 tcp_state;
nat_slow_path_reason_e slow_path_reason;
} nat44_ed_out2in_trace_t;
@@ -73,8 +72,6 @@ format_slow_path_reason (u8 *s, va_list *args)
return format (s, "slow path because lookup failed");
case NAT_ED_SP_REASON_VRF_EXPIRED:
return format (s, "slow path because vrf expired");
- case NAT_ED_SP_TCP_CLOSED:
- return format (s, "slow path because tcp closed");
case NAT_ED_SP_SESS_EXPIRED:
return format (s, "slow path because session expired");
}
@@ -108,14 +105,19 @@ format_nat44_ed_out2in_trace (u8 * s, va_list * args)
{
if (t->lookup_skipped)
{
- s = format (s, "\n lookup skipped - cached session index used");
+ s = format (s, "\n lookup skipped - cached session index used");
}
else
{
s = format (s, "\n search key %U", format_ed_session_kvp,
&t->search_key);
}
- s = format (s, "\n %U", format_slow_path_reason, t->slow_path_reason);
+ s = format (s, "\n %U", format_slow_path_reason, t->slow_path_reason);
+ }
+ if (IP_PROTOCOL_TCP == t->i2of.match.proto)
+ {
+ s = format (s, "\n TCP state: %U", format_nat44_ed_tcp_state,
+ t->tcp_state);
}
return s;
@@ -123,12 +125,12 @@ format_nat44_ed_out2in_trace (u8 * s, va_list * args)
static int
next_src_nat (snat_main_t *sm, ip4_header_t *ip, u16 src_port, u16 dst_port,
- u32 thread_index, u32 rx_fib_index)
+ u32 rx_fib_index)
{
clib_bihash_kv_16_8_t kv, value;
- init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
- rx_fib_index, ip->protocol);
+ init_ed_k (&kv, ip->src_address.as_u32, src_port, ip->dst_address.as_u32,
+ dst_port, rx_fib_index, ip->protocol);
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
return 1;
@@ -142,8 +144,8 @@ static void create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b,
static snat_session_t *create_session_for_static_mapping_ed (
snat_main_t *sm, vlib_buffer_t *b, ip4_address_t i2o_addr, u16 i2o_port,
u32 i2o_fib_index, ip4_address_t o2i_addr, u16 o2i_port, u32 o2i_fib_index,
- nat_protocol_t nat_proto, vlib_node_runtime_t *node, u32 rx_fib_index,
- u32 thread_index, twice_nat_type_t twice_nat, lb_nat_type_t lb_nat, f64 now,
+ ip_protocol_t proto, vlib_node_runtime_t *node, u32 thread_index,
+ twice_nat_type_t twice_nat, lb_nat_type_t lb_nat, f64 now,
snat_static_mapping_t *mapping);
static inline u32
@@ -180,10 +182,10 @@ icmp_out2in_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
goto out;
}
- if (snat_static_mapping_match (
- vm, sm, ip->dst_address, lookup_sport, rx_fib_index,
- ip_proto_to_nat_proto (ip->protocol), &sm_addr, &sm_port,
- &sm_fib_index, 1, &is_addr_only, 0, 0, 0, &identity_nat, &m))
+ if (snat_static_mapping_match (vm, ip->dst_address, lookup_sport,
+ rx_fib_index, ip->protocol, &sm_addr,
+ &sm_port, &sm_fib_index, 1, &is_addr_only, 0,
+ 0, 0, &identity_nat, &m))
{
// static mapping not matched
if (!sm->forwarding_enabled)
@@ -198,8 +200,7 @@ icmp_out2in_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
}
else
{
- if (next_src_nat (sm, ip, lookup_sport, lookup_dport, thread_index,
- rx_fib_index))
+ if (next_src_nat (sm, ip, lookup_sport, lookup_dport, rx_fib_index))
{
next = NAT_NEXT_IN2OUT_ED_FAST_PATH;
}
@@ -230,8 +231,8 @@ icmp_out2in_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
/* Create session initiated by host from external network */
s = create_session_for_static_mapping_ed (
sm, b, sm_addr, sm_port, sm_fib_index, ip->dst_address, lookup_sport,
- rx_fib_index, ip_proto_to_nat_proto (lookup_protocol), node, rx_fib_index,
- thread_index, 0, 0, vlib_time_now (vm), m);
+ rx_fib_index, lookup_protocol, node, thread_index, 0, 0,
+ vlib_time_now (vm), m);
if (!s)
next = NAT_NEXT_DROP;
@@ -266,44 +267,30 @@ out:
return next;
}
-// allocate exact address based on preference
static_always_inline int
-nat_alloc_addr_and_port_exact (snat_address_t * a,
- u32 thread_index,
- nat_protocol_t proto,
- ip4_address_t * addr,
- u16 * port,
- u16 port_per_thread, u32 snat_thread_index)
+nat44_ed_alloc_i2o_port (snat_main_t *sm, snat_address_t *a, snat_session_t *s,
+ ip4_address_t i2o_addr, u16 i2o_port,
+ u32 i2o_fib_index, ip_protocol_t proto,
+ u32 thread_index, u32 snat_thread_index,
+ ip4_address_t *outside_addr, u16 *outside_port)
{
- snat_main_t *sm = &snat_main;
u32 portnum;
- switch (proto)
+ for (int i = 0; i < ED_PORT_ALLOC_ATTEMPTS; ++i)
{
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
- { \
- while (1) \
- { \
- portnum = (port_per_thread * \
- snat_thread_index) + \
- snat_random_port(0, port_per_thread - 1) + 1024; \
- if (a->busy_##n##_port_refcounts[portnum]) \
- continue; \
- --a->busy_##n##_port_refcounts[portnum]; \
- a->busy_##n##_ports_per_thread[thread_index]++; \
- a->busy_##n##_ports++; \
- *addr = a->addr; \
- *port = clib_host_to_net_u16(portnum); \
- return 0; \
- } \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- return 1;
+ portnum = (sm->port_per_thread * snat_thread_index) +
+ snat_random_port (0, sm->port_per_thread - 1) +
+ ED_USER_PORT_OFFSET;
+ portnum = clib_host_to_net_u16 (portnum);
+ nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port, a->addr,
+ portnum, i2o_fib_index, proto);
+ if (!nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s,
+ 1 /* is_add */))
+ {
+ *outside_addr = a->addr;
+ *outside_port = portnum;
+ return 0;
+ }
}
/* Totally out of translations to use... */
@@ -312,80 +299,56 @@ nat_alloc_addr_and_port_exact (snat_address_t * a,
}
static_always_inline int
-nat44_ed_alloc_outside_addr_and_port (snat_address_t *addresses, u32 fib_index,
- u32 thread_index, nat_protocol_t proto,
- ip4_address_t *addr, u16 *port,
- u16 port_per_thread,
- u32 snat_thread_index)
+nat44_ed_alloc_i2o_addr_and_port (snat_main_t *sm, snat_address_t *addresses,
+ snat_session_t *s, ip4_address_t i2o_addr,
+ u16 i2o_port, u32 i2o_fib_index,
+ ip_protocol_t proto, u32 thread_index,
+ u32 snat_thread_index,
+ ip4_address_t *outside_addr,
+ u16 *outside_port)
{
- snat_main_t *sm = &snat_main;
snat_address_t *a, *ga = 0;
- u32 portnum;
int i;
- for (i = 0; i < vec_len (addresses); i++)
+ if (vec_len (addresses) > 0)
{
- a = addresses + i;
- switch (proto)
+ int s_addr_offset = i2o_addr.as_u32 % vec_len (addresses);
+
+ for (i = s_addr_offset; i < vec_len (addresses); ++i)
{
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
- { \
- if (a->fib_index == fib_index) \
- { \
- while (1) \
- { \
- portnum = (port_per_thread * snat_thread_index) + \
- snat_random_port (0, port_per_thread - 1) + 1024; \
- if (a->busy_##n##_port_refcounts[portnum]) \
- continue; \
- --a->busy_##n##_port_refcounts[portnum]; \
- a->busy_##n##_ports_per_thread[thread_index]++; \
- a->busy_##n##_ports++; \
- *addr = a->addr; \
- *port = clib_host_to_net_u16 (portnum); \
- return 0; \
- } \
- } \
- else if (a->fib_index == ~0) \
- { \
- ga = a; \
- } \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- return 1;
+ a = addresses + i;
+ if (a->fib_index == i2o_fib_index)
+ {
+ return nat44_ed_alloc_i2o_port (
+ sm, a, s, i2o_addr, i2o_port, i2o_fib_index, proto,
+ thread_index, snat_thread_index, outside_addr, outside_port);
+ }
+ else if (a->fib_index == ~0)
+ {
+ ga = a;
+ }
}
- }
- if (ga)
- {
- a = ga;
- switch (proto)
+ for (i = 0; i < s_addr_offset; ++i)
{
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- while (1) \
- { \
- portnum = (port_per_thread * snat_thread_index) + \
- snat_random_port (0, port_per_thread - 1) + 1024; \
- if (a->busy_##n##_port_refcounts[portnum]) \
- continue; \
- ++a->busy_##n##_port_refcounts[portnum]; \
- a->busy_##n##_ports_per_thread[thread_index]++; \
- a->busy_##n##_ports++; \
- *addr = a->addr; \
- *port = clib_host_to_net_u16 (portnum); \
- return 0; \
- }
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- return 1;
+ a = addresses + i;
+ if (a->fib_index == i2o_fib_index)
+ {
+ return nat44_ed_alloc_i2o_port (
+ sm, a, s, i2o_addr, i2o_port, i2o_fib_index, proto,
+ thread_index, snat_thread_index, outside_addr, outside_port);
+ }
+ else if (a->fib_index == ~0)
+ {
+ ga = a;
+ }
+ }
+
+ if (ga)
+ {
+ return nat44_ed_alloc_i2o_port (
+ sm, a, s, i2o_addr, i2o_port, i2o_fib_index, proto, thread_index,
+ snat_thread_index, outside_addr, outside_port);
}
}
@@ -398,23 +361,23 @@ static snat_session_t *
create_session_for_static_mapping_ed (
snat_main_t *sm, vlib_buffer_t *b, ip4_address_t i2o_addr, u16 i2o_port,
u32 i2o_fib_index, ip4_address_t o2i_addr, u16 o2i_port, u32 o2i_fib_index,
- nat_protocol_t nat_proto, vlib_node_runtime_t *node, u32 rx_fib_index,
- u32 thread_index, twice_nat_type_t twice_nat, lb_nat_type_t lb_nat, f64 now,
+ ip_protocol_t proto, vlib_node_runtime_t *node, u32 thread_index,
+ twice_nat_type_t twice_nat, lb_nat_type_t lb_nat, f64 now,
snat_static_mapping_t *mapping)
{
snat_session_t *s;
ip4_header_t *ip;
snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
- if (PREDICT_FALSE
- (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
+ if (PREDICT_FALSE (
+ nat44_ed_maximum_sessions_exceeded (sm, o2i_fib_index, thread_index)))
{
b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED];
nat_elog_notice (sm, "maximum sessions exceeded");
return 0;
}
- s = nat_ed_session_alloc (sm, thread_index, now, nat_proto);
+ s = nat_ed_session_alloc (sm, thread_index, now, proto);
if (!s)
{
b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED];
@@ -426,22 +389,21 @@ create_session_for_static_mapping_ed (
s->ext_host_addr.as_u32 = ip->src_address.as_u32;
s->ext_host_port =
- nat_proto == NAT_PROTOCOL_ICMP ? 0 : vnet_buffer (b)->ip.reass.l4_src_port;
+ proto == IP_PROTOCOL_ICMP ? 0 : vnet_buffer (b)->ip.reass.l4_src_port;
s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
if (lb_nat)
s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
if (lb_nat == AFFINITY_LB_NAT)
s->flags |= SNAT_SESSION_FLAG_AFFINITY;
- s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
s->out2in.addr = o2i_addr;
s->out2in.port = o2i_port;
s->out2in.fib_index = o2i_fib_index;
s->in2out.addr = i2o_addr;
s->in2out.port = i2o_port;
s->in2out.fib_index = i2o_fib_index;
- s->nat_proto = nat_proto;
+ s->proto = proto;
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_o2i_flow_init (sm, thread_index, s, s->ext_host_addr, o2i_port,
o2i_addr, o2i_port, o2i_fib_index, ip->protocol);
@@ -487,33 +449,23 @@ create_session_for_static_mapping_ed (
if (filter)
{
- rc = nat_alloc_addr_and_port_exact (filter,
- thread_index,
- nat_proto,
- &s->ext_host_nat_addr,
- &s->ext_host_nat_port,
- sm->port_per_thread,
- tsm->snat_thread_index);
+ rc = nat44_ed_alloc_i2o_port (
+ sm, filter, s, i2o_addr, i2o_port, i2o_fib_index, proto,
+ thread_index, tsm->snat_thread_index, &s->ext_host_nat_addr,
+ &s->ext_host_nat_port);
s->flags |= SNAT_SESSION_FLAG_EXACT_ADDRESS;
}
else
{
- rc = nat44_ed_alloc_outside_addr_and_port (
- sm->twice_nat_addresses, 0, thread_index, nat_proto,
- &s->ext_host_nat_addr, &s->ext_host_nat_port, sm->port_per_thread,
- tsm->snat_thread_index);
+ rc = nat44_ed_alloc_i2o_addr_and_port (
+ sm, sm->twice_nat_addresses, s, i2o_addr, i2o_port, i2o_fib_index,
+ proto, thread_index, tsm->snat_thread_index, &s->ext_host_nat_addr,
+ &s->ext_host_nat_port);
}
if (rc)
{
b->error = node->errors[NAT_OUT2IN_ED_ERROR_OUT_OF_PORTS];
- if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0))
- {
- nat_elog_warn (sm, "out2in flow hash del failed");
- }
- snat_free_outside_address_and_port (
- sm->twice_nat_addresses, thread_index, &s->ext_host_nat_addr,
- s->ext_host_nat_port, s->nat_proto);
nat_ed_session_delete (sm, s, thread_index, 1);
return 0;
}
@@ -521,7 +473,7 @@ create_session_for_static_mapping_ed (
s->flags |= SNAT_SESSION_FLAG_TWICE_NAT;
nat_6t_flow_saddr_rewrite_set (&s->o2i, s->ext_host_nat_addr.as_u32);
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_flow_icmp_id_rewrite_set (&s->o2i, s->ext_host_nat_port);
}
@@ -532,11 +484,8 @@ create_session_for_static_mapping_ed (
nat_6t_l3_l4_csum_calc (&s->o2i);
- nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port,
- s->ext_host_nat_addr, s->ext_host_nat_port,
- i2o_fib_index, ip->protocol);
nat_6t_flow_daddr_rewrite_set (&s->i2o, s->ext_host_addr.as_u32);
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_flow_icmp_id_rewrite_set (&s->i2o, s->ext_host_port);
}
@@ -544,10 +493,21 @@ create_session_for_static_mapping_ed (
{
nat_6t_flow_dport_rewrite_set (&s->i2o, s->ext_host_port);
}
+
+ nat_6t_flow_saddr_rewrite_set (&s->i2o, o2i_addr.as_u32);
+ if (IP_PROTOCOL_ICMP == proto)
+ {
+ nat_6t_flow_icmp_id_rewrite_set (&s->i2o, o2i_port);
+ }
+ else
+ {
+ nat_6t_flow_sport_rewrite_set (&s->i2o, o2i_port);
+ }
+ nat_6t_l3_l4_csum_calc (&s->i2o);
}
else
{
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port,
s->ext_host_addr, i2o_port, i2o_fib_index,
@@ -559,10 +519,9 @@ create_session_for_static_mapping_ed (
s->ext_host_addr, s->ext_host_port,
i2o_fib_index, ip->protocol);
}
- }
nat_6t_flow_saddr_rewrite_set (&s->i2o, o2i_addr.as_u32);
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_flow_icmp_id_rewrite_set (&s->i2o, o2i_port);
}
@@ -581,19 +540,16 @@ create_session_for_static_mapping_ed (
nat_ed_session_delete (sm, s, thread_index, 1);
return 0;
}
-
- nat_ipfix_logging_nat44_ses_create (thread_index,
- s->in2out.addr.as_u32,
- s->out2in.addr.as_u32,
- s->nat_proto,
- s->in2out.port,
- s->out2in.port, s->in2out.fib_index);
+ }
+ nat_ipfix_logging_nat44_ses_create (
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
+ s->in2out.port, s->out2in.port, s->in2out.fib_index);
nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
s->in2out.port, &s->ext_host_nat_addr,
s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
- &s->ext_host_addr, s->ext_host_port, s->nat_proto,
- is_twice_nat_session (s));
+ &s->ext_host_addr, s->ext_host_port, s->proto,
+ nat44_ed_is_twice_nat_session (s));
per_vrf_sessions_register_session (s, thread_index);
@@ -636,8 +592,8 @@ create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, snat_session_t *s,
lookup_protocol = ip->protocol;
}
- init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport,
- rx_fib_index, lookup_protocol);
+ init_ed_k (&kv, lookup_saddr.as_u32, lookup_sport, lookup_daddr.as_u32,
+ lookup_dport, rx_fib_index, lookup_protocol);
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
{
@@ -654,8 +610,6 @@ create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, snat_session_t *s,
}
else
{
- u32 proto;
-
if (PREDICT_FALSE
(nat44_ed_maximum_sessions_exceeded
(sm, rx_fib_index, thread_index)))
@@ -668,19 +622,12 @@ create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, snat_session_t *s,
return;
}
- proto = ip_proto_to_nat_proto (ip->protocol);
-
s->ext_host_addr = ip->src_address;
s->ext_host_port = lookup_dport;
s->flags |= SNAT_SESSION_FLAG_FWD_BYPASS;
s->out2in.addr = ip->dst_address;
s->out2in.port = lookup_sport;
- s->nat_proto = proto;
- if (proto == NAT_PROTOCOL_OTHER)
- {
- s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
- s->out2in.port = ip->protocol;
- }
+ s->proto = ip->protocol;
s->out2in.fib_index = rx_fib_index;
s->in2out.addr = s->out2in.addr;
s->in2out.port = s->out2in.port;
@@ -702,10 +649,9 @@ create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, snat_session_t *s,
if (ip->protocol == IP_PROTOCOL_TCP)
{
- tcp_header_t *tcp = ip4_next_header (ip);
- nat44_set_tcp_session_state_o2i (sm, now, s, tcp->flags,
- tcp->ack_number, tcp->seq_number,
- thread_index);
+ nat44_set_tcp_session_state_o2i (
+ sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags,
+ thread_index);
}
/* Accounting */
@@ -721,7 +667,6 @@ nat44_ed_out2in_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
vlib_main_t *vm,
vlib_node_runtime_t *node)
{
- clib_bihash_kv_8_8_t kv, value;
snat_static_mapping_t *m;
snat_session_t *s;
@@ -733,15 +678,13 @@ nat44_ed_out2in_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
return 0;
}
- init_nat_k (&kv, ip->dst_address, 0, 0, 0);
- if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ m = nat44_ed_sm_o2i_lookup (sm, ip->dst_address, 0, 0, ip->protocol);
+ if (!m)
{
b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
return 0;
}
- m = pool_elt_at_index (sm->static_mappings, value.value);
-
/* Create a new session */
s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
if (!s)
@@ -752,9 +695,7 @@ nat44_ed_out2in_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
}
s->ext_host_addr.as_u32 = ip->src_address.as_u32;
- s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
- s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
s->out2in.addr.as_u32 = ip->dst_address.as_u32;
s->out2in.fib_index = rx_fib_index;
s->in2out.addr.as_u32 = m->local_addr.as_u32;
@@ -816,10 +757,10 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
{
vlib_buffer_t *b0;
u32 sw_if_index0, rx_fib_index0;
- nat_protocol_t proto0;
+ ip_protocol_t proto0;
ip4_header_t *ip0;
snat_session_t *s0 = 0;
- clib_bihash_kv_16_8_t kv0, value0;
+ clib_bihash_kv_16_8_t kv0 = {}, value0;
nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
nat_slow_path_reason_e slow_path_reason = NAT_ED_SP_REASON_NO_REASON;
nat_6t_flow_t *f = 0;
@@ -865,9 +806,9 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
goto trace0;
}
- proto0 = ip_proto_to_nat_proto (ip0->protocol);
+ proto0 = ip0->protocol;
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
+ if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
{
if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
ICMP4_echo_request &&
@@ -916,8 +857,8 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
s0 = NULL;
}
- init_ed_k (&kv0, lookup.saddr, lookup.sport, lookup.daddr, lookup.dport,
- lookup.fib_index, lookup.proto);
+ init_ed_k (&kv0, lookup.saddr.as_u32, lookup.sport, lookup.daddr.as_u32,
+ lookup.dport, lookup.fib_index, lookup.proto);
// lookup flow
if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
@@ -938,30 +879,14 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
{
// session is closed, go slow path
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
slow_path_reason = NAT_ED_SP_REASON_VRF_EXPIRED;
next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
goto trace0;
}
- if (s0->tcp_closed_timestamp)
- {
- if (now >= s0->tcp_closed_timestamp)
- {
- // session is closed, go slow path, freed in slow path
- slow_path_reason = NAT_ED_SP_TCP_CLOSED;
- next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
- }
- else
- {
- // session in transitory timeout, drop
- b0->error = node->errors[NAT_OUT2IN_ED_ERROR_TCP_CLOSED];
- next[0] = NAT_NEXT_DROP;
- }
- goto trace0;
- }
-
// drop if session expired
u64 sess_timeout_time;
sess_timeout_time =
@@ -969,8 +894,9 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
if (now >= sess_timeout_time)
{
// session is closed, go slow path
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
slow_path_reason = NAT_ED_SP_SESS_EXPIRED;
next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
goto trace0;
@@ -992,7 +918,7 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
* be able to use dhcp client on the outside interface
*/
if (PREDICT_FALSE (
- proto0 == NAT_PROTOCOL_UDP &&
+ proto0 == IP_PROTOCOL_UDP &&
(vnet_buffer (b0)->ip.reass.l4_dst_port ==
clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client))))
{
@@ -1017,8 +943,9 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
// create_bypass_for_fwd (sm, b0, s0, ip0, rx_fib_index0,
// thread_index);
translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
next[0] = NAT_NEXT_DROP;
b0->error = node->errors[NAT_OUT2IN_ED_ERROR_TRNSL_FAILED];
goto trace0;
@@ -1037,27 +964,23 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
switch (proto0)
{
- case NAT_PROTOCOL_TCP:
+ case IP_PROTOCOL_TCP:
vlib_increment_simple_counter (&sm->counters.fastpath.out2in.tcp,
thread_index, sw_if_index0, 1);
nat44_set_tcp_session_state_o2i (sm, now, s0,
vnet_buffer (b0)->ip.
reass.icmp_type_or_tcp_flags,
- vnet_buffer (b0)->ip.
- reass.tcp_ack_number,
- vnet_buffer (b0)->ip.
- reass.tcp_seq_number,
thread_index);
break;
- case NAT_PROTOCOL_UDP:
+ case IP_PROTOCOL_UDP:
vlib_increment_simple_counter (&sm->counters.fastpath.out2in.udp,
thread_index, sw_if_index0, 1);
break;
- case NAT_PROTOCOL_ICMP:
+ case IP_PROTOCOL_ICMP:
vlib_increment_simple_counter (&sm->counters.fastpath.out2in.icmp,
thread_index, sw_if_index0, 1);
break;
- case NAT_PROTOCOL_OTHER:
+ default:
vlib_increment_simple_counter (&sm->counters.fastpath.out2in.other,
thread_index, sw_if_index0, 1);
break;
@@ -1090,6 +1013,7 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
t->translation_via_i2of = (&s0->i2o == f);
+ t->tcp_state = s0->tcp_state;
}
else
{
@@ -1135,12 +1059,12 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
{
vlib_buffer_t *b0;
u32 sw_if_index0, rx_fib_index0;
- nat_protocol_t proto0;
+ ip_protocol_t proto0;
ip4_header_t *ip0;
udp_header_t *udp0;
icmp46_header_t *icmp0;
snat_session_t *s0 = 0;
- clib_bihash_kv_16_8_t kv0, value0;
+ clib_bihash_kv_16_8_t kv0 = {}, value0;
lb_nat_type_t lb_nat0;
twice_nat_type_t twice_nat0;
u8 identity_nat0;
@@ -1171,9 +1095,9 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
udp0 = ip4_next_header (ip0);
icmp0 = (icmp46_header_t *) udp0;
- proto0 = ip_proto_to_nat_proto (ip0->protocol);
+ proto0 = ip0->protocol;
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
+ if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto0)))
{
s0 = nat44_ed_out2in_slowpath_unknown_proto (
sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
@@ -1198,7 +1122,7 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
goto trace0;
}
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
+ if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
{
next[0] = icmp_out2in_ed_slow_path
(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
@@ -1215,15 +1139,19 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
goto trace0;
}
- vlib_increment_simple_counter (&sm->counters.slowpath.out2in.icmp,
- thread_index, sw_if_index0, 1);
+ if (NAT_NEXT_DROP != next[0])
+ {
+ vlib_increment_simple_counter (
+ &sm->counters.slowpath.out2in.icmp, thread_index, sw_if_index0,
+ 1);
+ }
goto trace0;
}
- init_ed_k (&kv0, ip0->src_address,
- vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
- vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
- ip0->protocol);
+ init_ed_k (
+ &kv0, ip0->src_address.as_u32, vnet_buffer (b0)->ip.reass.l4_src_port,
+ ip0->dst_address.as_u32, vnet_buffer (b0)->ip.reass.l4_dst_port,
+ rx_fib_index0, ip0->protocol);
s0 = NULL;
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
@@ -1232,13 +1160,6 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
s0 =
pool_elt_at_index (tsm->sessions,
ed_value_get_session_index (&value0));
-
- if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
- {
- nat_free_session_data (sm, s0, thread_index, 0);
- nat_ed_session_delete (sm, s0, thread_index, 1);
- s0 = NULL;
- }
}
if (!s0)
@@ -1247,19 +1168,18 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
destination address and port in packet */
if (snat_static_mapping_match (
- vm, sm, ip0->dst_address,
- vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, proto0,
- &sm_addr, &sm_port, &sm_fib_index, 1, 0, &twice_nat0, &lb_nat0,
- &ip0->src_address, &identity_nat0, &m))
+ vm, ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_dst_port,
+ rx_fib_index0, proto0, &sm_addr, &sm_port, &sm_fib_index, 1, 0,
+ &twice_nat0, &lb_nat0, &ip0->src_address, &identity_nat0, &m))
{
/*
* Send DHCP packets to the ipv4 stack, or we won't
* be able to use dhcp client on the outside interface
*/
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_UDP
- && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
- clib_host_to_net_u16
- (UDP_DST_PORT_dhcp_to_client))))
+ if (PREDICT_FALSE (
+ proto0 == IP_PROTOCOL_UDP &&
+ (vnet_buffer (b0)->ip.reass.l4_dst_port ==
+ clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client))))
{
goto trace0;
}
@@ -1272,10 +1192,9 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
}
else
{
- if (next_src_nat
- (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
- vnet_buffer (b0)->ip.reass.l4_dst_port,
- thread_index, rx_fib_index0))
+ if (next_src_nat (
+ sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
+ vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0))
{
next[0] = NAT_NEXT_IN2OUT_ED_FAST_PATH;
}
@@ -1291,9 +1210,9 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
if (PREDICT_FALSE (identity_nat0))
goto trace0;
- if ((proto0 == NAT_PROTOCOL_TCP)
- && !tcp_flags_is_init (vnet_buffer (b0)->ip.
- reass.icmp_type_or_tcp_flags))
+ if ((proto0 == IP_PROTOCOL_TCP) &&
+ !tcp_flags_is_init (
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
{
b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN];
next[0] = NAT_NEXT_DROP;
@@ -1301,16 +1220,10 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
}
/* Create session initiated by host from external network */
- s0 = create_session_for_static_mapping_ed (sm, b0,
- sm_addr, sm_port,
- sm_fib_index,
- ip0->dst_address,
- vnet_buffer (b0)->
- ip.reass.l4_dst_port,
- rx_fib_index0, proto0,
- node, rx_fib_index0,
- thread_index, twice_nat0,
- lb_nat0, now, m);
+ s0 = create_session_for_static_mapping_ed (
+ sm, b0, sm_addr, sm_port, sm_fib_index, ip0->dst_address,
+ vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, proto0,
+ node, thread_index, twice_nat0, lb_nat0, now, m);
if (!s0)
{
next[0] = NAT_NEXT_DROP;
@@ -1326,17 +1239,13 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
goto trace0;
}
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
+ if (PREDICT_TRUE (proto0 == IP_PROTOCOL_TCP))
{
vlib_increment_simple_counter (&sm->counters.slowpath.out2in.tcp,
thread_index, sw_if_index0, 1);
nat44_set_tcp_session_state_o2i (sm, now, s0,
vnet_buffer (b0)->ip.
reass.icmp_type_or_tcp_flags,
- vnet_buffer (b0)->ip.
- reass.tcp_ack_number,
- vnet_buffer (b0)->ip.
- reass.tcp_seq_number,
thread_index);
}
else
@@ -1369,6 +1278,7 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
t->session_index = s0 - tsm->sessions;
clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
+ t->tcp_state = s0->tcp_state;
}
else
{
diff --git a/src/plugins/nat/nat44-ed/tcp_conn_track.rst b/src/plugins/nat/nat44-ed/tcp_conn_track.rst
new file mode 100644
index 00000000000..faf0dec8b06
--- /dev/null
+++ b/src/plugins/nat/nat44-ed/tcp_conn_track.rst
@@ -0,0 +1,65 @@
+NAT44ED TCP connection tracking
+===============================
+
+TCP connection tracking in endpoint-dependent NAT is based on RFC 7857
+and RFC 6146, which RFC 7857 references.
+
+See RFC 7857 for the original graph - our graph is slightly different,
+allowing creation of new session, while an old session is in transitory
+timeout after seeing FIN packets from both sides:
+
+After discussion on vpp-dev and with Andrew Yourtschenko we agreed that
+it's friendly behaviour to allow creating a new session while the old
+one is closed and in transitory timeout. The alternative means VPP is
+insisting that a 5-tuple connection cannot be created while an old one
+is finished and timing out. There is no apparent reason why our change
+would break anything and we agreed that it could only help users.
+
+::
+
+
+ +------------transitory timeout----------------+
+ | |
+ | +-------------+ |
+ | session created---->+ CLOSED | |
+ | +-------------+ |
+ | | | |
++-----+ | SYN SYN |
+| v v IN2OUT OUT2IN |
+| +->session removed | | |
+| | ^ ^ ^ ^ ^ v v |
+| | | | | | | +-------+ +-------+ |
+| | | | | | +----transitory timeout---+SYN_I2O| |SYN_O2I+--+
+| | | | | | +---------+ |-------| |-------|
+| | | | | +-transitory---+RST_TRANS| | |
+| | | | | timeout +---------+ SYN SYN
+| | | | | | ^ OUT2IN IN2OUT
+| | | | | | | | |
+| | | | | | | v v
+| | | | | | | +-----------+
+| | | | | | +--RST----+ESTABLISHED+<-SYN IN2OUT-+
+| | | | | | +-----------+ |
+| | | | | +---data pkt-----^ | | | ^ |
+| | | | | | | | | |
+| | | | +----established timeout---------------+ | | | |
+| | | | | | | |
+| | | | +-----FIN IN2OUT---------+ | | |
+| | | | v | | |
+| | | | +-------+ +--FIN OUT2IN----+ | |
+| | | +--established---+FIN_I2O| | | |
+| | | timeout +-------+ v +-SYN OUT2IN-+ |
+| | | | +-------+ | |
+| | +----established-------------+FIN_O2I| +--------------+ |
+| | timeout | +-------+ |REOPEN_SYN_I2O| +--------------+
+| | | | +--------------+ |REOPEN_SYN_O2I|
+| | FIN FIN ^ | +--------------+
+| | OUT2IN IN2OUT | | ^ |
+| | | | | | | |
+| | v v | | | |
+| | +-------------+ | | | |
+| +--transitory timeout---+ FIN_TRANS +-SYN IN2OUT-+ | | |
+| +-------------+ | | |
+| | | | |
+| +--------SYN OUT2IN----|-----------+ |
+| v |
++------------------transitory timeout-------------------+<-------------+
diff --git a/src/plugins/nat/nat44-ei/nat44_ei.api b/src/plugins/nat/nat44-ei/nat44_ei.api
index 9ea1a3a1dde..6d24b541e8d 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei.api
+++ b/src/plugins/nat/nat44-ei/nat44_ei.api
@@ -550,6 +550,45 @@ define nat44_ei_interface_output_feature_details {
vl_api_interface_index_t sw_if_index;
};
+/** \brief add/del NAT output interface (postrouting
+ in2out translation)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - true if add, false if delete
+ @param sw_if_index - software index of the interface
+*/
+autoendian autoreply define nat44_ei_add_del_output_interface {
+ u32 client_index;
+ u32 context;
+ bool is_add;
+ vl_api_interface_index_t sw_if_index;
+};
+
+service {
+ rpc nat44_ei_output_interface_get returns nat44_ei_output_interface_get_reply
+ stream nat44_ei_output_interface_details;
+};
+
+define nat44_ei_output_interface_get
+{
+ u32 client_index;
+ u32 context;
+ u32 cursor;
+};
+
+define nat44_ei_output_interface_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+
+define nat44_ei_output_interface_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+};
+
/** \brief Add/delete NAT44 static mapping
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -784,6 +823,52 @@ define nat44_ei_user_session_details {
u16 ext_host_port;
};
+/** \brief NAT44 user's sessions
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ip_address - IPv4 address of the user to dump
+ @param vrf_id - VRF_ID
+*/
+define nat44_ei_user_session_v2_dump {
+ option in_progress;
+ u32 client_index;
+ u32 context;
+ vl_api_ip4_address_t ip_address;
+ u32 vrf_id;
+};
+
+/** \brief NAT44 user's sessions response
+ @param context - sender context, to match reply w/ request
+ @param outside_ip_address - outside IPv4 address
+ @param outside_port - outside port
+ @param inside_ip_address - inside IPv4 address
+ @param inside_port - inside port
+ @param protocol - protocol
+ @param flags - flag NAT_IS_STATIC if session is static
+ @param last_heard - last heard timer since VPP start
+ @param time_since_last_heard - difference between current vpp time and last_heard value
+ @param total_bytes - count of bytes sent through session
+ @param total_pkts - count of pakets sent through session
+ @param ext_host_address - external host IPv4 address
+ @param ext_host_port - external host port
+*/
+define nat44_ei_user_session_v2_details {
+ option in_progress;
+ u32 context;
+ vl_api_ip4_address_t outside_ip_address;
+ u16 outside_port;
+ vl_api_ip4_address_t inside_ip_address;
+ u16 inside_port;
+ u16 protocol;
+ vl_api_nat44_ei_config_flags_t flags;
+ u64 last_heard;
+ u64 time_since_last_heard;
+ u64 total_bytes;
+ u32 total_pkts;
+ vl_api_ip4_address_t ext_host_address;
+ u16 ext_host_port;
+};
+
/** \brief Delete NAT44 session
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/plugins/nat/nat44-ei/nat44_ei.c b/src/plugins/nat/nat44-ei/nat44_ei.c
index 3c9a9a85346..e16625a2946 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei.c
@@ -40,7 +40,6 @@
nat44_ei_main_t nat44_ei_main;
extern vlib_node_registration_t nat44_ei_hairpinning_node;
-extern vlib_node_registration_t nat44_ei_hairpin_dst_node;
extern vlib_node_registration_t
nat44_ei_in2out_hairpinning_finish_ip4_lookup_node;
extern vlib_node_registration_t
@@ -62,7 +61,7 @@ extern vlib_node_registration_t
if (PREDICT_FALSE (nm->enabled)) \
{ \
nat44_ei_log_err ("plugin enabled"); \
- return 1; \
+ return VNET_API_ERROR_FEATURE_ALREADY_ENABLED; \
} \
} \
while (0)
@@ -74,7 +73,7 @@ extern vlib_node_registration_t
if (PREDICT_FALSE (!nm->enabled)) \
{ \
nat44_ei_log_err ("plugin disabled"); \
- return 1; \
+ return VNET_API_ERROR_FEATURE_ALREADY_DISABLED; \
} \
} \
while (0)
@@ -111,31 +110,6 @@ VNET_FEATURE_INIT (ip4_nat44_ei_in2out_output, static) = {
.runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa",
"ip4-sv-reassembly-output-feature"),
};
-VNET_FEATURE_INIT (ip4_nat44_ei_in2out_fast, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-ei-in2out-fast",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
- "ip4-sv-reassembly-feature"),
-};
-VNET_FEATURE_INIT (ip4_nat44_ei_out2in_fast, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-ei-out2in-fast",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
- "ip4-sv-reassembly-feature",
- "ip4-dhcp-client-detect"),
-};
-VNET_FEATURE_INIT (ip4_nat44_ei_hairpin_dst, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-ei-hairpin-dst",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
- "ip4-sv-reassembly-feature"),
-};
-VNET_FEATURE_INIT (ip4_nat44_ei_hairpin_src, static) = {
- .arc_name = "ip4-output",
- .node_name = "nat44-ei-hairpin-src",
- .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa",
- "ip4-sv-reassembly-output-feature"),
-};
VNET_FEATURE_INIT (ip4_nat44_ei_hairpinning, static) = {
.arc_name = "ip4-local",
.node_name = "nat44-ei-hairpinning",
@@ -200,6 +174,39 @@ typedef struct
void nat44_ei_add_del_addr_to_fib (ip4_address_t *addr, u8 p_len,
u32 sw_if_index, int is_add);
+static void nat44_ei_worker_db_free (nat44_ei_main_per_thread_data_t *tnm);
+
+static int nat44_ei_add_static_mapping_internal (
+ ip4_address_t l_addr, ip4_address_t e_addr, u16 l_port, u16 e_port,
+ nat_protocol_t proto, u32 vrf_id, u32 sw_if_index, u32 flags,
+ ip4_address_t pool_addr, u8 *tag);
+
+static int nat44_ei_del_static_mapping_internal (
+ ip4_address_t l_addr, ip4_address_t e_addr, u16 l_port, u16 e_port,
+ nat_protocol_t proto, u32 vrf_id, u32 sw_if_index, u32 flags);
+
+always_inline bool
+nat44_ei_port_is_used (nat44_ei_address_t *a, u8 proto, u16 port)
+{
+ return clib_bitmap_get (a->busy_port_bitmap[proto], port);
+}
+
+always_inline void
+nat44_ei_port_get (nat44_ei_address_t *a, u8 proto, u16 port)
+{
+ ASSERT (!nat44_ei_port_is_used (a, proto, port));
+ a->busy_port_bitmap[proto] =
+ clib_bitmap_set (a->busy_port_bitmap[proto], port, 1);
+}
+
+always_inline void
+nat44_ei_port_put (nat44_ei_address_t *a, u8 proto, u16 port)
+{
+ ASSERT (nat44_ei_port_is_used (a, proto, port));
+ a->busy_port_bitmap[proto] =
+ clib_bitmap_set (a->busy_port_bitmap[proto], port, 0);
+}
+
static u8 *
format_nat44_ei_classify_trace (u8 *s, va_list *args)
{
@@ -219,8 +226,6 @@ format_nat44_ei_classify_trace (u8 *s, va_list *args)
return s;
}
-static void nat44_ei_db_free ();
-
static void nat44_ei_db_init (u32 translations, u32 translation_buckets,
u32 user_buckets);
@@ -304,6 +309,76 @@ nat_validate_interface_counters (nat44_ei_main_t *nm, u32 sw_if_index)
nat_validate_simple_counter (nm->counters.hairpinning, sw_if_index);
}
+static void
+nat44_ei_add_del_addr_to_fib_foreach_out_if (ip4_address_t *addr, u8 is_add)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_interface_t *i;
+
+ pool_foreach (i, nm->interfaces)
+ {
+ if (nat44_ei_interface_is_outside (i) && !nm->out2in_dpo)
+ {
+ nat44_ei_add_del_addr_to_fib (addr, 32, i->sw_if_index, is_add);
+ }
+ }
+ pool_foreach (i, nm->output_feature_interfaces)
+ {
+ if (nat44_ei_interface_is_outside (i) && !nm->out2in_dpo)
+ {
+ nat44_ei_add_del_addr_to_fib (addr, 32, i->sw_if_index, is_add);
+ }
+ }
+}
+
+static_always_inline void
+nat44_ei_add_del_addr_to_fib_foreach_addr (u32 sw_if_index, u8 is_add)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_address_t *ap;
+
+ vec_foreach (ap, nm->addresses)
+ {
+ nat44_ei_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, is_add);
+ }
+}
+
+static_always_inline void
+nat44_ei_add_del_addr_to_fib_foreach_addr_only_sm (u32 sw_if_index, u8 is_add)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_static_mapping_t *m;
+
+ pool_foreach (m, nm->static_mappings)
+ {
+ if (is_sm_addr_only (m->flags) &&
+ !(m->local_addr.as_u32 == m->external_addr.as_u32))
+ {
+ nat44_ei_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index,
+ is_add);
+ }
+ }
+}
+
+static int
+nat44_ei_is_address_used_in_static_mapping (ip4_address_t addr)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_static_mapping_t *m;
+ pool_foreach (m, nm->static_mappings)
+ {
+ if (is_sm_addr_only (m->flags) || is_sm_identity_nat (m->flags))
+ {
+ continue;
+ }
+ if (m->external_addr.as_u32 == addr.as_u32)
+ {
+ return 1;
+ }
+ }
+ return 0;
+}
+
clib_error_t *
nat44_ei_init (vlib_main_t *vm)
{
@@ -372,14 +447,15 @@ nat44_ei_init (vlib_main_t *vm)
/* Use all available workers by default */
if (nm->num_workers > 1)
{
-
for (i = 0; i < nm->num_workers; i++)
bitmap = clib_bitmap_set (bitmap, i, 1);
nat44_ei_set_workers (bitmap);
clib_bitmap_free (bitmap);
}
else
- nm->per_thread_data[0].snat_thread_index = 0;
+ {
+ nm->per_thread_data[0].snat_thread_index = 0;
+ }
/* callbacks to call when interface address changes. */
cbi.function = nat44_ei_ip4_add_del_interface_address_cb;
@@ -402,8 +478,6 @@ nat44_ei_init (vlib_main_t *vm)
nm->hairpinning_fq_index =
vlib_frame_queue_main_init (nat44_ei_hairpinning_node.index, 0);
- nm->hairpin_dst_fq_index =
- vlib_frame_queue_main_init (nat44_ei_hairpin_dst_node.index, 0);
nm->in2out_hairpinning_finish_ip4_lookup_node_fq_index =
vlib_frame_queue_main_init (
nat44_ei_in2out_hairpinning_finish_ip4_lookup_node.index, 0);
@@ -466,43 +540,104 @@ nat44_ei_plugin_enable (nat44_ei_config_t c)
nm->user_buckets);
nat44_ei_set_alloc_default ();
- // TODO: zero simple counter for all counters missing
-
vlib_zero_simple_counter (&nm->total_users, 0);
vlib_zero_simple_counter (&nm->total_sessions, 0);
vlib_zero_simple_counter (&nm->user_limit_reached, 0);
+ if (nm->num_workers > 1)
+ {
+ if (nm->fq_in2out_index == ~0)
+ {
+ nm->fq_in2out_index = vlib_frame_queue_main_init (
+ nm->in2out_node_index, nm->frame_queue_nelts);
+ }
+ if (nm->fq_out2in_index == ~0)
+ {
+ nm->fq_out2in_index = vlib_frame_queue_main_init (
+ nm->out2in_node_index, nm->frame_queue_nelts);
+ }
+ if (nm->fq_in2out_output_index == ~0)
+ {
+ nm->fq_in2out_output_index = vlib_frame_queue_main_init (
+ nm->in2out_output_node_index, nm->frame_queue_nelts);
+ }
+ }
+
nat_ha_enable ();
nm->enabled = 1;
return 0;
}
-void
-nat44_ei_addresses_free (nat44_ei_address_t **addresses)
+static_always_inline nat44_ei_outside_fib_t *
+nat44_ei_get_outside_fib (nat44_ei_outside_fib_t *outside_fibs, u32 fib_index)
{
- nat44_ei_address_t *ap;
- vec_foreach (ap, *addresses)
+ nat44_ei_outside_fib_t *f;
+ vec_foreach (f, outside_fibs)
{
-#define _(N, i, n, s) vec_free (ap->busy_##n##_ports_per_thread);
- foreach_nat_protocol
-#undef _
+ if (f->fib_index == fib_index)
+ {
+ return f;
+ }
}
- vec_free (*addresses);
- *addresses = 0;
+ return 0;
+}
+
+static_always_inline nat44_ei_interface_t *
+nat44_ei_get_interface (nat44_ei_interface_t *interfaces, u32 sw_if_index)
+{
+ nat44_ei_interface_t *i;
+ pool_foreach (i, interfaces)
+ {
+ if (i->sw_if_index == sw_if_index)
+ {
+ return i;
+ }
+ }
+ return 0;
+}
+
+static_always_inline int
+nat44_ei_hairpinning_enable (u8 is_enable)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ u32 sw_if_index = 0; // local0
+
+ if (is_enable)
+ {
+ nm->hairpin_reg += 1;
+ if (1 == nm->hairpin_reg)
+ {
+ return vnet_feature_enable_disable (
+ "ip4-local", "nat44-ei-hairpinning", sw_if_index, is_enable, 0, 0);
+ }
+ }
+ else
+ {
+ if (0 == nm->hairpin_reg)
+ return 1;
+
+ nm->hairpin_reg -= 1;
+ if (0 == nm->hairpin_reg)
+ {
+ return vnet_feature_enable_disable (
+ "ip4-local", "nat44-ei-hairpinning", sw_if_index, is_enable, 0, 0);
+ }
+ }
+
+ return 0;
}
int
-nat44_ei_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
+nat44_ei_add_interface (u32 sw_if_index, u8 is_inside)
{
const char *feature_name, *del_feature_name;
nat44_ei_main_t *nm = &nat44_ei_main;
- nat44_ei_interface_t *i;
- nat44_ei_address_t *ap;
- nat44_ei_static_mapping_t *m;
+
nat44_ei_outside_fib_t *outside_fib;
- u32 fib_index =
- fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
+ nat44_ei_interface_t *i;
+ u32 fib_index;
+ int rv;
fail_if_disabled ();
@@ -512,470 +647,665 @@ nat44_ei_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
return VNET_API_ERROR_UNSUPPORTED;
}
- pool_foreach (i, nm->output_feature_interfaces)
+ if (nat44_ei_get_interface (nm->output_feature_interfaces, sw_if_index))
{
- if (i->sw_if_index == sw_if_index)
+ nat44_ei_log_err ("error interface already configured");
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+
+ i = nat44_ei_get_interface (nm->interfaces, sw_if_index);
+ if (i)
+ {
+ if ((nat44_ei_interface_is_inside (i) && is_inside) ||
+ (nat44_ei_interface_is_outside (i) && !is_inside))
{
- nat44_ei_log_err ("error interface already configured");
- return VNET_API_ERROR_VALUE_EXIST;
+ return 0;
+ }
+ if (nm->num_workers > 1)
+ {
+ del_feature_name = !is_inside ? "nat44-ei-in2out-worker-handoff" :
+ "nat44-ei-out2in-worker-handoff";
+ feature_name = "nat44-ei-handoff-classify";
+ }
+ else
+ {
+ del_feature_name =
+ !is_inside ? "nat44-ei-in2out" : "nat44-ei-out2in";
+
+ feature_name = "nat44-ei-classify";
}
- }
- if (nm->static_mapping_only && !(nm->static_mapping_connection_tracking))
- feature_name = is_inside ? "nat44-ei-in2out-fast" : "nat44-ei-out2in-fast";
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
+ if (rv)
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
+ sw_if_index, 0, 0, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-unicast", feature_name,
+ sw_if_index, 1, 0, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ if (!is_inside)
+ {
+ rv = nat44_ei_hairpinning_enable (0);
+ if (rv)
+ {
+ return rv;
+ }
+ }
+ }
else
{
if (nm->num_workers > 1)
- feature_name = is_inside ? "nat44-ei-in2out-worker-handoff" :
- "nat44-ei-out2in-worker-handoff";
+ {
+ feature_name = is_inside ? "nat44-ei-in2out-worker-handoff" :
+ "nat44-ei-out2in-worker-handoff";
+ }
else
- feature_name = is_inside ? "nat44-ei-in2out" : "nat44-ei-out2in";
- }
+ {
+ feature_name = is_inside ? "nat44-ei-in2out" : "nat44-ei-out2in";
+ }
+ nat_validate_interface_counters (nm, sw_if_index);
+
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
+ if (rv)
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-unicast", feature_name,
+ sw_if_index, 1, 0, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ if (is_inside && !nm->out2in_dpo)
+ {
+ rv = nat44_ei_hairpinning_enable (1);
+ if (rv)
+ {
+ return rv;
+ }
+ }
- if (nm->fq_in2out_index == ~0 && nm->num_workers > 1)
- nm->fq_in2out_index = vlib_frame_queue_main_init (nm->in2out_node_index,
- nm->frame_queue_nelts);
+ pool_get (nm->interfaces, i);
+ i->sw_if_index = sw_if_index;
+ i->flags = 0;
+ }
- if (nm->fq_out2in_index == ~0 && nm->num_workers > 1)
- nm->fq_out2in_index = vlib_frame_queue_main_init (nm->out2in_node_index,
- nm->frame_queue_nelts);
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
if (!is_inside)
{
- vec_foreach (outside_fib, nm->outside_fibs)
+ i->flags |= NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE;
+
+ outside_fib = nat44_ei_get_outside_fib (nm->outside_fibs, fib_index);
+ if (outside_fib)
{
- if (outside_fib->fib_index == fib_index)
- {
- if (is_del)
- {
- outside_fib->refcount--;
- if (!outside_fib->refcount)
- vec_del1 (nm->outside_fibs,
- outside_fib - nm->outside_fibs);
- }
- else
- outside_fib->refcount++;
- goto feature_set;
- }
+ outside_fib->refcount++;
}
- if (!is_del)
+ else
{
vec_add2 (nm->outside_fibs, outside_fib, 1);
- outside_fib->refcount = 1;
outside_fib->fib_index = fib_index;
+ outside_fib->refcount = 1;
}
- }
-feature_set:
- pool_foreach (i, nm->interfaces)
+ nat44_ei_add_del_addr_to_fib_foreach_addr (sw_if_index, 1);
+ nat44_ei_add_del_addr_to_fib_foreach_addr_only_sm (sw_if_index, 1);
+ }
+ else
{
- if (i->sw_if_index == sw_if_index)
- {
- if (is_del)
- {
- if (nat44_ei_interface_is_inside (i) &&
- nat44_ei_interface_is_outside (i))
- {
- if (is_inside)
- i->flags &= ~NAT44_EI_INTERFACE_FLAG_IS_INSIDE;
- else
- i->flags &= ~NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE;
+ i->flags |= NAT44_EI_INTERFACE_FLAG_IS_INSIDE;
+ }
- if (nm->num_workers > 1)
- {
- del_feature_name = "nat44-ei-handoff-classify";
- clib_warning (
- "del_feature_name = nat44-ei-handoff-classify");
- feature_name = !is_inside ?
- "nat44-ei-in2out-worker-handoff" :
- "nat44-ei-out2in-worker-handoff";
- }
- else
- {
- del_feature_name = "nat44-ei-classify";
- clib_warning ("del_feature_name = nat44-ei-classify");
- feature_name =
- !is_inside ? "nat44-ei-in2out" : "nat44-ei-out2in";
- }
+ return 0;
+}
- int rv =
- ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
- if (rv)
- return rv;
- rv = vnet_feature_enable_disable (
- "ip4-unicast", del_feature_name, sw_if_index, 0, 0, 0);
- if (rv)
- return rv;
- rv = vnet_feature_enable_disable (
- "ip4-unicast", feature_name, sw_if_index, 1, 0, 0);
- if (rv)
- return rv;
- if (!is_inside)
- {
- rv = vnet_feature_enable_disable ("ip4-local",
- "nat44-ei-hairpinning",
- sw_if_index, 1, 0, 0);
- if (rv)
- return rv;
- }
- }
- else
- {
- int rv =
- ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
- if (rv)
- return rv;
- rv = vnet_feature_enable_disable (
- "ip4-unicast", feature_name, sw_if_index, 0, 0, 0);
- if (rv)
- return rv;
- pool_put (nm->interfaces, i);
- if (is_inside)
- {
- rv = vnet_feature_enable_disable ("ip4-local",
- "nat44-ei-hairpinning",
- sw_if_index, 0, 0, 0);
- if (rv)
- return rv;
- }
- }
- }
- else
- {
- if ((nat44_ei_interface_is_inside (i) && is_inside) ||
- (nat44_ei_interface_is_outside (i) && !is_inside))
- return 0;
+int
+nat44_ei_del_interface (u32 sw_if_index, u8 is_inside)
+{
+ const char *feature_name, *del_feature_name;
+ nat44_ei_main_t *nm = &nat44_ei_main;
- if (nm->num_workers > 1)
- {
- del_feature_name = !is_inside ?
- "nat44-ei-in2out-worker-handoff" :
- "nat44-ei-out2in-worker-handoff";
- feature_name = "nat44-ei-handoff-classify";
- clib_warning ("feature_name = nat44-ei-handoff-classify");
- }
- else
- {
- del_feature_name =
- !is_inside ? "nat44-ei-in2out" : "nat44-ei-out2in";
- feature_name = "nat44-ei-classify";
- clib_warning ("feature_name = nat44-ei-classify");
- }
+ nat44_ei_outside_fib_t *outside_fib;
+ nat44_ei_interface_t *i;
+ u32 fib_index;
+ int rv;
- int rv =
- ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
- if (rv)
- return rv;
- rv = vnet_feature_enable_disable (
- "ip4-unicast", del_feature_name, sw_if_index, 0, 0, 0);
- if (rv)
- return rv;
- rv = vnet_feature_enable_disable ("ip4-unicast", feature_name,
- sw_if_index, 1, 0, 0);
- if (rv)
- return rv;
- if (!is_inside)
- {
- rv = vnet_feature_enable_disable (
- "ip4-local", "nat44-ei-hairpinning", sw_if_index, 0, 0, 0);
- if (rv)
- return rv;
- }
- goto set_flags;
- }
+ fail_if_disabled ();
- goto fib;
- }
+ if (nm->out2in_dpo && !is_inside)
+ {
+ nat44_ei_log_err ("error unsupported");
+ return VNET_API_ERROR_UNSUPPORTED;
}
- if (is_del)
+ i = nat44_ei_get_interface (nm->interfaces, sw_if_index);
+ if (i == 0)
{
nat44_ei_log_err ("error interface couldn't be found");
return VNET_API_ERROR_NO_SUCH_ENTRY;
}
- pool_get (nm->interfaces, i);
- i->sw_if_index = sw_if_index;
- i->flags = 0;
- nat_validate_interface_counters (nm, sw_if_index);
-
- int rv = vnet_feature_enable_disable ("ip4-unicast", feature_name,
- sw_if_index, 1, 0, 0);
- if (rv)
- return rv;
-
- rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
- if (rv)
- return rv;
-
- if (is_inside && !nm->out2in_dpo)
+ if (nat44_ei_interface_is_inside (i) && nat44_ei_interface_is_outside (i))
{
- rv = vnet_feature_enable_disable ("ip4-local", "nat44-ei-hairpinning",
+ if (nm->num_workers > 1)
+ {
+ del_feature_name = "nat44-ei-handoff-classify";
+ feature_name = !is_inside ? "nat44-ei-in2out-worker-handoff" :
+ "nat44-ei-out2in-worker-handoff";
+ }
+ else
+ {
+ del_feature_name = "nat44-ei-classify";
+ feature_name = !is_inside ? "nat44-ei-in2out" : "nat44-ei-out2in";
+ }
+
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
+ sw_if_index, 0, 0, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-unicast", feature_name,
sw_if_index, 1, 0, 0);
if (rv)
- return rv;
- }
-
-set_flags:
- if (is_inside)
- {
- i->flags |= NAT44_EI_INTERFACE_FLAG_IS_INSIDE;
- return 0;
+ {
+ return rv;
+ }
+ if (is_inside)
+ {
+ i->flags &= ~NAT44_EI_INTERFACE_FLAG_IS_INSIDE;
+ }
+ else
+ {
+ rv = nat44_ei_hairpinning_enable (1);
+ if (rv)
+ {
+ return rv;
+ }
+ i->flags &= ~NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE;
+ }
}
else
- i->flags |= NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE;
+ {
+ if (nm->num_workers > 1)
+ {
+ feature_name = is_inside ? "nat44-ei-in2out-worker-handoff" :
+ "nat44-ei-out2in-worker-handoff";
+ }
+ else
+ {
+ feature_name = is_inside ? "nat44-ei-in2out" : "nat44-ei-out2in";
+ }
- /* Add/delete external addresses to FIB */
-fib:
- vec_foreach (ap, nm->addresses)
- nat44_ei_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, !is_del);
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-unicast", feature_name,
+ sw_if_index, 0, 0, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ if (is_inside)
+ {
+ rv = nat44_ei_hairpinning_enable (0);
+ if (rv)
+ {
+ return rv;
+ }
+ }
- pool_foreach (m, nm->static_mappings)
+ // remove interface
+ pool_put (nm->interfaces, i);
+ }
+
+ if (!is_inside)
{
- if (!(nat44_ei_is_addr_only_static_mapping (m)) ||
- (m->local_addr.as_u32 == m->external_addr.as_u32))
- continue;
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
+ outside_fib = nat44_ei_get_outside_fib (nm->outside_fibs, fib_index);
+ if (outside_fib)
+ {
+ outside_fib->refcount--;
+ if (!outside_fib->refcount)
+ {
+ vec_del1 (nm->outside_fibs, outside_fib - nm->outside_fibs);
+ }
+ }
- nat44_ei_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index,
- !is_del);
+ nat44_ei_add_del_addr_to_fib_foreach_addr (sw_if_index, 0);
+ nat44_ei_add_del_addr_to_fib_foreach_addr_only_sm (sw_if_index, 0);
}
return 0;
}
int
-nat44_ei_interface_add_del_output_feature (u32 sw_if_index, u8 is_inside,
- int is_del)
+nat44_ei_add_output_interface (u32 sw_if_index)
{
nat44_ei_main_t *nm = &nat44_ei_main;
- nat44_ei_interface_t *i;
- nat44_ei_address_t *ap;
- nat44_ei_static_mapping_t *m;
+
nat44_ei_outside_fib_t *outside_fib;
- u32 fib_index =
- fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
+ nat44_ei_interface_t *i;
+ u32 fib_index;
+ int rv;
fail_if_disabled ();
- if (nm->static_mapping_only && !(nm->static_mapping_connection_tracking))
+ if (nat44_ei_get_interface (nm->interfaces, sw_if_index))
{
- nat44_ei_log_err ("error unsupported");
- return VNET_API_ERROR_UNSUPPORTED;
+ nat44_ei_log_err ("error interface already configured");
+ return VNET_API_ERROR_VALUE_EXIST;
}
- pool_foreach (i, nm->interfaces)
+ if (nat44_ei_get_interface (nm->output_feature_interfaces, sw_if_index))
{
- if (i->sw_if_index == sw_if_index)
- {
- nat44_ei_log_err ("error interface already configured");
- return VNET_API_ERROR_VALUE_EXIST;
- }
+ nat44_ei_log_err ("error interface already configured");
+ return VNET_API_ERROR_VALUE_EXIST;
}
- if (!is_inside)
+ if (nm->num_workers > 1)
{
- vec_foreach (outside_fib, nm->outside_fibs)
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
+ if (rv)
{
- if (outside_fib->fib_index == fib_index)
- {
- if (is_del)
- {
- outside_fib->refcount--;
- if (!outside_fib->refcount)
- vec_del1 (nm->outside_fibs,
- outside_fib - nm->outside_fibs);
- }
- else
- outside_fib->refcount++;
- goto feature_set;
- }
+ return rv;
}
- if (!is_del)
+ rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 1);
+ if (rv)
{
- vec_add2 (nm->outside_fibs, outside_fib, 1);
- outside_fib->refcount = 1;
- outside_fib->fib_index = fib_index;
+ return rv;
+ }
+ rv = vnet_feature_enable_disable (
+ "ip4-unicast", "nat44-ei-out2in-worker-handoff", sw_if_index, 1, 0, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable (
+ "ip4-output", "nat44-ei-in2out-output-worker-handoff", sw_if_index, 1,
+ 0, 0);
+ if (rv)
+ {
+ return rv;
}
}
-
-feature_set:
- if (is_inside)
+ else
{
- int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
if (rv)
- return rv;
- rv =
- ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
+ {
+ return rv;
+ }
+ rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 1);
if (rv)
- return rv;
- rv = vnet_feature_enable_disable ("ip4-unicast", "nat44-ei-hairpin-dst",
- sw_if_index, !is_del, 0, 0);
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-unicast", "nat44-ei-out2in",
+ sw_if_index, 1, 0, 0);
if (rv)
- return rv;
- rv = vnet_feature_enable_disable ("ip4-output", "nat44-ei-hairpin-src",
- sw_if_index, !is_del, 0, 0);
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-output", "nat44-ei-in2out-output",
+ sw_if_index, 1, 0, 0);
if (rv)
- return rv;
- goto fq;
+ {
+ return rv;
+ }
+ }
+
+ nat_validate_interface_counters (nm, sw_if_index);
+
+ pool_get (nm->output_feature_interfaces, i);
+ i->sw_if_index = sw_if_index;
+ i->flags = 0;
+ i->flags |= NAT44_EI_INTERFACE_FLAG_IS_INSIDE;
+ i->flags |= NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE;
+
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
+ outside_fib = nat44_ei_get_outside_fib (nm->outside_fibs, fib_index);
+ if (outside_fib)
+ {
+ outside_fib->refcount++;
+ }
+ else
+ {
+ vec_add2 (nm->outside_fibs, outside_fib, 1);
+ outside_fib->fib_index = fib_index;
+ outside_fib->refcount = 1;
+ }
+
+ nat44_ei_add_del_addr_to_fib_foreach_addr (sw_if_index, 1);
+ nat44_ei_add_del_addr_to_fib_foreach_addr_only_sm (sw_if_index, 1);
+
+ return 0;
+}
+
+int
+nat44_ei_del_output_interface (u32 sw_if_index)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+
+ nat44_ei_outside_fib_t *outside_fib;
+ nat44_ei_interface_t *i;
+ u32 fib_index;
+ int rv;
+
+ fail_if_disabled ();
+
+ i = nat44_ei_get_interface (nm->output_feature_interfaces, sw_if_index);
+ if (!i)
+ {
+ nat44_ei_log_err ("error interface couldn't be found");
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
}
if (nm->num_workers > 1)
{
- int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
if (rv)
- return rv;
- rv =
- ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
+ {
+ return rv;
+ }
+ rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 0);
if (rv)
- return rv;
- rv = vnet_feature_enable_disable ("ip4-unicast",
- "nat44-ei-out2in-worker-handoff",
- sw_if_index, !is_del, 0, 0);
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable (
+ "ip4-unicast", "nat44-ei-out2in-worker-handoff", sw_if_index, 0, 0, 0);
if (rv)
- return rv;
+ {
+ return rv;
+ }
rv = vnet_feature_enable_disable (
- "ip4-output", "nat44-ei-in2out-output-worker-handoff", sw_if_index,
- !is_del, 0, 0);
+ "ip4-output", "nat44-ei-in2out-output-worker-handoff", sw_if_index, 0,
+ 0, 0);
if (rv)
- return rv;
+ {
+ return rv;
+ }
}
else
{
- int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
if (rv)
- return rv;
- rv =
- ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
+ {
+ return rv;
+ }
+ rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 0);
if (rv)
- return rv;
+ {
+ return rv;
+ }
rv = vnet_feature_enable_disable ("ip4-unicast", "nat44-ei-out2in",
- sw_if_index, !is_del, 0, 0);
+ sw_if_index, 0, 0, 0);
if (rv)
- return rv;
+ {
+ return rv;
+ }
rv = vnet_feature_enable_disable ("ip4-output", "nat44-ei-in2out-output",
- sw_if_index, !is_del, 0, 0);
+ sw_if_index, 0, 0, 0);
if (rv)
- return rv;
+ {
+ return rv;
+ }
}
-fq:
- if (nm->fq_in2out_output_index == ~0 && nm->num_workers > 1)
- nm->fq_in2out_output_index =
- vlib_frame_queue_main_init (nm->in2out_output_node_index, 0);
+ pool_put (nm->output_feature_interfaces, i);
- if (nm->fq_out2in_index == ~0 && nm->num_workers > 1)
- nm->fq_out2in_index =
- vlib_frame_queue_main_init (nm->out2in_node_index, 0);
-
- pool_foreach (i, nm->output_feature_interfaces)
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
+ outside_fib = nat44_ei_get_outside_fib (nm->outside_fibs, fib_index);
+ if (outside_fib)
{
- if (i->sw_if_index == sw_if_index)
+ outside_fib->refcount--;
+ if (!outside_fib->refcount)
{
- if (is_del)
- pool_put (nm->output_feature_interfaces, i);
- else
- return VNET_API_ERROR_VALUE_EXIST;
-
- goto fib;
+ vec_del1 (nm->outside_fibs, outside_fib - nm->outside_fibs);
}
}
+ nat44_ei_add_del_addr_to_fib_foreach_addr (sw_if_index, 0);
+ nat44_ei_add_del_addr_to_fib_foreach_addr_only_sm (sw_if_index, 0);
+
+ return 0;
+}
+
+int
+nat44_ei_add_del_output_interface (u32 sw_if_index, int is_del)
+{
if (is_del)
{
- nat44_ei_log_err ("error interface couldn't be found");
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ return nat44_ei_del_output_interface (sw_if_index);
}
-
- pool_get (nm->output_feature_interfaces, i);
- i->sw_if_index = sw_if_index;
- i->flags = 0;
- nat_validate_interface_counters (nm, sw_if_index);
- if (is_inside)
- i->flags |= NAT44_EI_INTERFACE_FLAG_IS_INSIDE;
else
- i->flags |= NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE;
-
- /* Add/delete external addresses to FIB */
-fib:
- if (is_inside)
- return 0;
+ {
+ return nat44_ei_add_output_interface (sw_if_index);
+ }
+}
- vec_foreach (ap, nm->addresses)
- nat44_ei_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, !is_del);
+int
+nat44_ei_del_addresses ()
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_address_t *a, *vec;
+ int error = 0;
- pool_foreach (m, nm->static_mappings)
+ vec = vec_dup (nm->addresses);
+ vec_foreach (a, vec)
{
- if (!((nat44_ei_is_addr_only_static_mapping (m))) ||
- (m->local_addr.as_u32 == m->external_addr.as_u32))
- continue;
+ error = nat44_ei_del_address (a->addr, 0);
- nat44_ei_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index,
- !is_del);
+ if (error)
+ {
+ nat44_ei_log_err ("error occurred while removing adderess");
+ }
}
+ vec_free (vec);
+ vec_free (nm->addresses);
+ nm->addresses = 0;
- return 0;
+ vec_free (nm->auto_add_sw_if_indices);
+ nm->auto_add_sw_if_indices = 0;
+ return error;
}
int
-nat44_ei_plugin_disable ()
+nat44_ei_del_interfaces ()
{
nat44_ei_main_t *nm = &nat44_ei_main;
- nat44_ei_interface_t *i, *vec;
+ nat44_ei_interface_t *i, *pool;
int error = 0;
- // first unregister all nodes from interfaces
- vec = vec_dup (nm->interfaces);
- vec_foreach (i, vec)
+ pool = pool_dup (nm->interfaces);
+ pool_foreach (i, pool)
{
if (nat44_ei_interface_is_inside (i))
- error = nat44_ei_interface_add_del (i->sw_if_index, 1, 1);
+ {
+ error = nat44_ei_del_interface (i->sw_if_index, 1);
+ }
if (nat44_ei_interface_is_outside (i))
- error = nat44_ei_interface_add_del (i->sw_if_index, 0, 1);
+ {
+ error = nat44_ei_del_interface (i->sw_if_index, 0);
+ }
if (error)
{
- nat44_ei_log_err ("error occurred while removing interface %u",
- i->sw_if_index);
+ nat44_ei_log_err ("error occurred while removing interface");
}
}
- vec_free (vec);
+ pool_free (pool);
+ pool_free (nm->interfaces);
nm->interfaces = 0;
+ return error;
+}
- vec = vec_dup (nm->output_feature_interfaces);
- vec_foreach (i, vec)
- {
- if (nat44_ei_interface_is_inside (i))
- error =
- nat44_ei_interface_add_del_output_feature (i->sw_if_index, 1, 1);
- if (nat44_ei_interface_is_outside (i))
- error =
- nat44_ei_interface_add_del_output_feature (i->sw_if_index, 0, 1);
+int
+nat44_ei_del_output_interfaces ()
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_interface_t *i, *pool;
+ int error = 0;
+ pool = pool_dup (nm->output_feature_interfaces);
+ pool_foreach (i, pool)
+ {
+ error = nat44_ei_del_output_interface (i->sw_if_index);
if (error)
{
- nat44_ei_log_err ("error occurred while removing interface %u",
- i->sw_if_index);
+ nat44_ei_log_err ("error occurred while removing output interface");
}
}
- vec_free (vec);
+ pool_free (pool);
+ pool_free (nm->output_feature_interfaces);
nm->output_feature_interfaces = 0;
+ return error;
+}
- nat_ha_disable ();
- nat44_ei_db_free ();
+static clib_error_t *
+nat44_ei_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_interface_t *i;
+ int error = 0;
- nat44_ei_addresses_free (&nm->addresses);
+ if (is_add)
+ return 0;
- vec_free (nm->to_resolve);
- vec_free (nm->auto_add_sw_if_indices);
+ if (!nm->enabled)
+ return 0;
+
+ i = nat44_ei_get_interface (nm->interfaces, sw_if_index);
+ if (i)
+ {
+ bool is_inside = nat44_ei_interface_is_inside (i);
+ bool is_outside = nat44_ei_interface_is_outside (i);
+
+ if (is_inside)
+ {
+ error |= nat44_ei_del_interface (sw_if_index, 1);
+ }
+ if (is_outside)
+ {
+ error |= nat44_ei_del_interface (sw_if_index, 0);
+ }
+
+ if (error)
+ {
+ nat44_ei_log_err ("error occurred while removing interface");
+ }
+ }
+
+ i = nat44_ei_get_interface (nm->output_feature_interfaces, sw_if_index);
+ if (i)
+ {
+ error = nat44_ei_del_output_interface (sw_if_index);
+ if (error)
+ {
+ nat44_ei_log_err ("error occurred while removing output interface");
+ }
+ }
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (nat44_ei_sw_interface_add_del);
+
+int
+nat44_ei_del_static_mappings ()
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_static_mapping_t *m, *pool;
+ int error = 0;
+ pool = pool_dup (nm->static_mappings);
+ pool_foreach (m, pool)
+ {
+ error = nat44_ei_del_static_mapping_internal (
+ m->local_addr, m->external_addr, m->local_port, m->external_port,
+ m->proto, m->vrf_id, ~0, m->flags);
+ if (error)
+ {
+ nat44_ei_log_err ("error occurred while removing mapping");
+ }
+ }
+ pool_free (pool);
+ pool_free (nm->static_mappings);
+ nm->static_mappings = 0;
+
+ vec_free (nm->to_resolve);
nm->to_resolve = 0;
- nm->auto_add_sw_if_indices = 0;
- nm->forwarding_enabled = 0;
+ clib_bihash_free_8_8 (&nm->static_mapping_by_local);
+ clib_bihash_free_8_8 (&nm->static_mapping_by_external);
+
+ return error;
+}
+
+int
+nat44_ei_plugin_disable ()
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_main_per_thread_data_t *tnm;
+ int rc, error = 0;
+
+ fail_if_disabled ();
+
+ nat_ha_disable ();
+
+ rc = nat44_ei_del_static_mappings ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ rc = nat44_ei_del_addresses ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ rc = nat44_ei_del_interfaces ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ rc = nat44_ei_del_output_interfaces ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ if (nm->pat)
+ {
+ clib_bihash_free_8_8 (&nm->in2out);
+ clib_bihash_free_8_8 (&nm->out2in);
+
+ vec_foreach (tnm, nm->per_thread_data)
+ {
+ nat44_ei_worker_db_free (tnm);
+ }
+ }
- nm->enabled = 0;
clib_memset (&nm->rconfig, 0, sizeof (nm->rconfig));
+ nm->forwarding_enabled = 0;
+ nm->enabled = 0;
+
return error;
}
@@ -984,7 +1314,6 @@ nat44_ei_set_outside_address_and_port (nat44_ei_address_t *addresses,
u32 thread_index, ip4_address_t addr,
u16 port, nat_protocol_t protocol)
{
- nat44_ei_main_t *nm = &nat44_ei_main;
nat44_ei_address_t *a = 0;
u32 address_index;
u16 port_host_byte_order = clib_net_to_host_u16 (port);
@@ -995,21 +1324,13 @@ nat44_ei_set_outside_address_and_port (nat44_ei_address_t *addresses,
continue;
a = addresses + address_index;
- switch (protocol)
- {
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
- return VNET_API_ERROR_INSTANCE_IN_USE; \
- ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
- a->busy_##n##_ports_per_thread[thread_index]++; \
- a->busy_##n##_ports++; \
- return 0;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (nm, "unknown protocol");
- return 1;
- }
+ if (nat44_ei_port_is_used (a, protocol, port_host_byte_order))
+ return VNET_API_ERROR_INSTANCE_IN_USE;
+
+ nat44_ei_port_get (a, protocol, port_host_byte_order);
+ a->busy_ports_per_thread[protocol][thread_index]++;
+ a->busy_ports[protocol]++;
+ return 0;
}
return VNET_API_ERROR_NO_SUCH_ENTRY;
@@ -1044,7 +1365,6 @@ nat44_ei_free_outside_address_and_port (nat44_ei_address_t *addresses,
u32 thread_index, ip4_address_t *addr,
u16 port, nat_protocol_t protocol)
{
- nat44_ei_main_t *nm = &nat44_ei_main;
nat44_ei_address_t *a;
u32 address_index;
u16 port_host_byte_order = clib_net_to_host_u16 (port);
@@ -1058,21 +1378,9 @@ nat44_ei_free_outside_address_and_port (nat44_ei_address_t *addresses,
ASSERT (address_index < vec_len (addresses));
a = addresses + address_index;
-
- switch (protocol)
- {
-#define _(N, i, n, s) \
- case NAT_PROTOCOL_##N: \
- ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
- --a->busy_##n##_port_refcounts[port_host_byte_order]; \
- a->busy_##n##_ports--; \
- a->busy_##n##_ports_per_thread[thread_index]--; \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (nm, "unknown protocol");
- return;
- }
+ nat44_ei_port_put (a, protocol, port_host_byte_order);
+ a->busy_ports[protocol]--;
+ a->busy_ports_per_thread[protocol][thread_index]--;
}
void
@@ -1102,7 +1410,8 @@ nat44_ei_free_session_data_v2 (nat44_ei_main_t *nm, nat44_ei_session_t *s,
/* log NAT event */
nat_ipfix_logging_nat44_ses_delete (
thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32,
- s->nat_proto, s->in2out.port, s->out2in.port, s->in2out.fib_index);
+ nat_proto_to_ip_proto (s->nat_proto), s->in2out.port, s->out2in.port,
+ s->in2out.fib_index);
nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
s->ext_host_port, s->nat_proto, s->out2in.fib_index,
@@ -1270,7 +1579,8 @@ nat44_ei_free_session_data (nat44_ei_main_t *nm, nat44_ei_session_t *s,
nat_ipfix_logging_nat44_ses_delete (
thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32,
- s->nat_proto, s->in2out.port, s->out2in.port, s->in2out.fib_index);
+ nat_proto_to_ip_proto (s->nat_proto), s->in2out.port, s->out2in.port,
+ s->in2out.fib_index);
nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
s->ext_host_port, s->nat_proto, s->out2in.fib_index,
@@ -1425,6 +1735,20 @@ nat44_ei_get_in2out_worker_index (ip4_header_t *ip0, u32 rx_fib_index0,
}
u32
+nat44_ei_get_thread_idx_by_port (u16 e_port)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ u32 thread_idx = nm->num_workers;
+ if (nm->num_workers > 1)
+ {
+ thread_idx = nm->first_worker_index +
+ nm->workers[(e_port - 1024) / nm->port_per_thread %
+ _vec_len (nm->workers)];
+ }
+ return thread_idx;
+}
+
+u32
nat44_ei_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip0,
u32 rx_fib_index0, u8 is_output)
{
@@ -1502,9 +1826,8 @@ nat44_ei_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip0,
}
/* worker by outside port */
- next_worker_index = nm->first_worker_index;
- next_worker_index +=
- nm->workers[(clib_net_to_host_u16 (port) - 1024) / nm->port_per_thread];
+ next_worker_index =
+ nat44_ei_get_thread_idx_by_port (clib_net_to_host_u16 (port));
return next_worker_index;
}
@@ -1522,75 +1845,95 @@ nat44_ei_alloc_default_cb (nat44_ei_address_t *addresses, u32 fib_index,
if (vec_len (addresses) > 0)
{
-
int s_addr_offset = s_addr.as_u32 % vec_len (addresses);
for (i = s_addr_offset; i < vec_len (addresses); ++i)
{
a = addresses + i;
- switch (proto)
+
+ if (a->busy_ports_per_thread[proto][thread_index] < port_per_thread)
{
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
- { \
- if (a->fib_index == fib_index) \
- { \
- while (1) \
- { \
- portnum = (port_per_thread * snat_thread_index) + \
- nat_random_port (&nm->random_seed, 0, \
- port_per_thread - 1) + \
- 1024; \
- if (a->busy_##n##_port_refcounts[portnum]) \
- continue; \
- --a->busy_##n##_port_refcounts[portnum]; \
- a->busy_##n##_ports_per_thread[thread_index]++; \
- a->busy_##n##_ports++; \
- *addr = a->addr; \
- *port = clib_host_to_net_u16 (portnum); \
- return 0; \
- } \
- } \
- else if (a->fib_index == ~0) \
- { \
- ga = a; \
- } \
- } \
- break;
- foreach_nat_protocol;
- default:
- nat_elog_info (nm, "unknown protocol");
- return 1;
+ if (a->fib_index == fib_index)
+ {
+ while (1)
+ {
+ portnum = (port_per_thread * snat_thread_index) +
+ nat_random_port (&nm->random_seed, 0,
+ port_per_thread - 1) +
+ 1024;
+ if (nat44_ei_port_is_used (a, proto, portnum))
+ continue;
+ nat44_ei_port_get (a, proto, portnum);
+ a->busy_ports_per_thread[proto][thread_index]++;
+ a->busy_ports[proto]++;
+ *addr = a->addr;
+ *port = clib_host_to_net_u16 (portnum);
+ return 0;
+ }
+ }
+ else if (a->fib_index == ~0)
+ {
+ ga = a;
+ }
}
}
for (i = 0; i < s_addr_offset; ++i)
{
a = addresses + i;
- switch (proto)
+ if (a->busy_ports_per_thread[proto][thread_index] < port_per_thread)
{
- foreach_nat_protocol;
- default:
- nat_elog_info (nm, "unknown protocol");
- return 1;
+ if (a->fib_index == fib_index)
+ {
+ while (1)
+ {
+ portnum = (port_per_thread * snat_thread_index) +
+ nat_random_port (&nm->random_seed, 0,
+ port_per_thread - 1) +
+ 1024;
+ if (nat44_ei_port_is_used (a, proto, portnum))
+ continue;
+ nat44_ei_port_get (a, proto, portnum);
+ a->busy_ports_per_thread[proto][thread_index]++;
+ a->busy_ports[proto]++;
+ *addr = a->addr;
+ *port = clib_host_to_net_u16 (portnum);
+ return 0;
+ }
+ }
+ else if (a->fib_index == ~0)
+ {
+ ga = a;
+ }
}
}
- if (ga)
- {
- a = ga;
- // fake fib index to reuse macro
- fib_index = ~0;
- switch (proto)
+
+ if (ga)
{
- foreach_nat_protocol;
- default : nat_elog_info (nm, "unknown protocol");
- return 1;
+ a = ga;
+ if (a->busy_ports_per_thread[proto][thread_index] < port_per_thread)
+ {
+ if (a->fib_index == ~0)
+ {
+ while (1)
+ {
+ portnum = (port_per_thread * snat_thread_index) +
+ nat_random_port (&nm->random_seed, 0,
+ port_per_thread - 1) +
+ 1024;
+ if (nat44_ei_port_is_used (a, proto, portnum))
+ continue;
+ nat44_ei_port_get (a, proto, portnum);
+ a->busy_ports_per_thread[proto][thread_index]++;
+ a->busy_ports[proto]++;
+ *addr = a->addr;
+ *port = clib_host_to_net_u16 (portnum);
+ return 0;
+ }
+ }
+ }
}
}
- }
-
-#undef _
/* Totally out of translations to use... */
nat_ipfix_logging_addresses_exhausted (thread_index, 0);
@@ -1612,30 +1955,20 @@ nat44_ei_alloc_range_cb (nat44_ei_address_t *addresses, u32 fib_index,
if (!vec_len (addresses))
goto exhausted;
- switch (proto)
- {
-#define _(N, i, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_ports < ports) \
- { \
- while (1) \
- { \
- portnum = nat_random_port (&nm->random_seed, nm->start_port, \
- nm->end_port); \
- if (a->busy_##n##_port_refcounts[portnum]) \
- continue; \
- ++a->busy_##n##_port_refcounts[portnum]; \
- a->busy_##n##_ports++; \
- *addr = a->addr; \
- *port = clib_host_to_net_u16 (portnum); \
- return 0; \
- } \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (nm, "unknown protocol");
- return 1;
+ if (a->busy_ports[proto] < ports)
+ {
+ while (1)
+ {
+ portnum =
+ nat_random_port (&nm->random_seed, nm->start_port, nm->end_port);
+ if (nat44_ei_port_is_used (a, proto, portnum))
+ continue;
+ nat44_ei_port_get (a, proto, portnum);
+ a->busy_ports[proto]++;
+ *addr = a->addr;
+ *port = clib_host_to_net_u16 (portnum);
+ return 0;
+ }
}
exhausted:
@@ -1659,32 +1992,22 @@ nat44_ei_alloc_mape_cb (nat44_ei_address_t *addresses, u32 fib_index,
if (!vec_len (addresses))
goto exhausted;
- switch (proto)
- {
-#define _(N, i, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_ports < ports) \
- { \
- while (1) \
- { \
- A = nat_random_port (&nm->random_seed, 1, \
- pow2_mask (nm->psid_offset)); \
- j = nat_random_port (&nm->random_seed, 0, pow2_mask (m)); \
- portnum = A | (nm->psid << nm->psid_offset) | (j << (16 - m)); \
- if (a->busy_##n##_port_refcounts[portnum]) \
- continue; \
- ++a->busy_##n##_port_refcounts[portnum]; \
- a->busy_##n##_ports++; \
- *addr = a->addr; \
- *port = clib_host_to_net_u16 (portnum); \
- return 0; \
- } \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (nm, "unknown protocol");
- return 1;
+ if (a->busy_ports[proto] < ports)
+ {
+ while (1)
+ {
+ A =
+ nat_random_port (&nm->random_seed, 1, pow2_mask (nm->psid_offset));
+ j = nat_random_port (&nm->random_seed, 0, pow2_mask (m));
+ portnum = A | (nm->psid << nm->psid_offset) | (j << (16 - m));
+ if (nat44_ei_port_is_used (a, proto, portnum))
+ continue;
+ nat44_ei_port_get (a, proto, portnum);
+ a->busy_ports[proto]++;
+ *addr = a->addr;
+ *port = clib_host_to_net_u16 (portnum);
+ return 0;
+ }
}
exhausted:
@@ -1725,30 +2048,6 @@ nat44_ei_set_alloc_mape (u16 psid, u16 psid_offset, u16 psid_length)
nm->psid_length = psid_length;
}
-static void
-nat44_ei_add_static_mapping_when_resolved (ip4_address_t l_addr, u16 l_port,
- u16 e_port, nat_protocol_t proto,
- u32 sw_if_index, u32 vrf_id,
- int addr_only, int identity_nat,
- u8 *tag)
-{
- nat44_ei_main_t *nm = &nat44_ei_main;
- nat44_ei_static_map_resolve_t *rp;
-
- vec_add2 (nm->to_resolve, rp, 1);
- clib_memset (rp, 0, sizeof (*rp));
-
- rp->l_addr.as_u32 = l_addr.as_u32;
- rp->l_port = l_port;
- rp->e_port = e_port;
- rp->sw_if_index = sw_if_index;
- rp->vrf_id = vrf_id;
- rp->proto = proto;
- rp->addr_only = addr_only;
- rp->identity_nat = identity_nat;
- rp->tag = vec_dup (tag);
-}
-
void
nat44_ei_delete_session (nat44_ei_main_t *nm, nat44_ei_session_t *ses,
u32 thread_index)
@@ -1787,10 +2086,13 @@ nat44_ei_del_session (nat44_ei_main_t *nm, ip4_address_t *addr, u16 port,
{
nat44_ei_main_per_thread_data_t *tnm;
clib_bihash_kv_8_8_t kv, value;
- u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
+ u32 fib_index;
nat44_ei_session_t *s;
clib_bihash_8_8_t *t;
+ fail_if_disabled ();
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
init_nat_k (&kv, *addr, port, fib_index, proto);
t = is_in ? &nm->in2out : &nm->out2in;
if (!clib_bihash_search_8_8 (t, &kv, &value))
@@ -1812,19 +2114,6 @@ nat44_ei_del_session (nat44_ei_main_t *nm, ip4_address_t *addr, u16 port,
return VNET_API_ERROR_NO_SUCH_ENTRY;
}
-u32
-nat44_ei_get_thread_idx_by_port (u16 e_port)
-{
- nat44_ei_main_t *nm = &nat44_ei_main;
- u32 thread_idx = nm->num_workers;
- if (nm->num_workers > 1)
- {
- thread_idx = nm->first_worker_index +
- nm->workers[(e_port - 1024) / nm->port_per_thread];
- }
- return thread_idx;
-}
-
void
nat44_ei_add_del_addr_to_fib (ip4_address_t *addr, u8 p_len, u32 sw_if_index,
int is_add)
@@ -1840,412 +2129,560 @@ nat44_ei_add_del_addr_to_fib (ip4_address_t *addr, u8 p_len, u32 sw_if_index,
u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
if (is_add)
- fib_table_entry_update_one_path (
- fib_index, &prefix, nm->fib_src_low,
- (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL |
- FIB_ENTRY_FLAG_EXCLUSIVE),
- DPO_PROTO_IP4, NULL, sw_if_index, ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
+ {
+ fib_table_entry_update_one_path (fib_index, &prefix, nm->fib_src_low,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL |
+ FIB_ENTRY_FLAG_EXCLUSIVE),
+ DPO_PROTO_IP4, NULL, sw_if_index, ~0, 1,
+ NULL, FIB_ROUTE_PATH_FLAG_NONE);
+ }
else
- fib_table_entry_delete (fib_index, &prefix, nm->fib_src_low);
+ {
+ fib_table_entry_delete (fib_index, &prefix, nm->fib_src_low);
+ }
}
int
-nat44_ei_add_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
- u16 l_port, u16 e_port, nat_protocol_t proto,
- u32 sw_if_index, u32 vrf_id, u8 addr_only,
- u8 identity_nat, u8 *tag, u8 is_add)
+nat44_ei_reserve_port (ip4_address_t addr, u16 port, nat_protocol_t proto)
{
+ u32 ti = nat44_ei_get_thread_idx_by_port (port);
nat44_ei_main_t *nm = &nat44_ei_main;
- nat44_ei_static_mapping_t *m = 0;
- clib_bihash_kv_8_8_t kv, value;
nat44_ei_address_t *a = 0;
- u32 fib_index = ~0;
- nat44_ei_interface_t *interface;
- nat44_ei_main_per_thread_data_t *tnm;
- nat44_ei_user_key_t u_key;
- nat44_ei_user_t *u;
- dlist_elt_t *head, *elt;
- u32 elt_index, head_index;
- u32 ses_index;
- u64 user_index;
- nat44_ei_session_t *s;
- nat44_ei_static_map_resolve_t *rp, *rp_match = 0;
- nat44_ei_lb_addr_port_t *local;
- u32 find = ~0;
int i;
- if (sw_if_index != ~0)
+ for (i = 0; i < vec_len (nm->addresses); i++)
{
- ip4_address_t *first_int_addr;
+ a = nm->addresses + i;
- for (i = 0; i < vec_len (nm->to_resolve); i++)
- {
- rp = nm->to_resolve + i;
- if (rp->sw_if_index != sw_if_index ||
- rp->l_addr.as_u32 != l_addr.as_u32 || rp->vrf_id != vrf_id ||
- rp->addr_only != addr_only)
- continue;
+ if (a->addr.as_u32 != addr.as_u32)
+ continue;
- if (!addr_only)
- {
- if ((rp->l_port != l_port && rp->e_port != e_port) ||
- rp->proto != proto)
- continue;
- }
+ if (nat44_ei_port_is_used (a, proto, port))
+ continue;
- rp_match = rp;
- break;
+ nat44_ei_port_get (a, proto, port);
+ if (port > 1024)
+ {
+ a->busy_ports[proto]++;
+ a->busy_ports_per_thread[proto][ti]++;
}
+ return 0;
+ }
- /* Might be already set... */
- first_int_addr = ip4_interface_first_address (
- nm->ip4_main, sw_if_index, 0 /* just want the address */);
+ return 1;
+}
- if (is_add)
- {
- if (rp_match)
- return VNET_API_ERROR_VALUE_EXIST;
+int
+nat44_ei_free_port (ip4_address_t addr, u16 port, nat_protocol_t proto)
+{
+ u32 ti = nat44_ei_get_thread_idx_by_port (port);
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_address_t *a = 0;
+ int i;
- nat44_ei_add_static_mapping_when_resolved (
- l_addr, l_port, e_port, proto, sw_if_index, vrf_id, addr_only,
- identity_nat, tag);
+ for (i = 0; i < vec_len (nm->addresses); i++)
+ {
+ a = nm->addresses + i;
- /* DHCP resolution required? */
- if (!first_int_addr)
- return 0;
+ if (a->addr.as_u32 != addr.as_u32)
+ continue;
- e_addr.as_u32 = first_int_addr->as_u32;
- /* Identity mapping? */
- if (l_addr.as_u32 == 0)
- l_addr.as_u32 = e_addr.as_u32;
- }
- else
+ nat44_ei_port_put (a, proto, port);
+ if (port > 1024)
{
- if (!rp_match)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
- vec_del1 (nm->to_resolve, i);
-
- if (!first_int_addr)
- return 0;
-
- e_addr.as_u32 = first_int_addr->as_u32;
- /* Identity mapping? */
- if (l_addr.as_u32 == 0)
- l_addr.as_u32 = e_addr.as_u32;
+ a->busy_ports[proto]--;
+ a->busy_ports_per_thread[proto][ti]--;
}
+ return 0;
}
- init_nat_k (&kv, e_addr, addr_only ? 0 : e_port, 0, addr_only ? 0 : proto);
- if (!clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
- m = pool_elt_at_index (nm->static_mappings, value.value);
+ return 1;
+}
- if (is_add)
- {
- if (m)
- {
- // identity mapping for second vrf
- if (nat44_ei_is_identity_static_mapping (m))
- {
- pool_foreach (local, m->locals)
- {
- if (local->vrf_id == vrf_id)
- return VNET_API_ERROR_VALUE_EXIST;
- }
- pool_get (m->locals, local);
- local->vrf_id = vrf_id;
- local->fib_index = fib_table_find_or_create_and_lock (
- FIB_PROTOCOL_IP4, vrf_id, nm->fib_src_low);
- init_nat_kv (&kv, m->local_addr, m->local_port, local->fib_index,
- m->proto, 0, m - nm->static_mappings);
- clib_bihash_add_del_8_8 (&nm->static_mapping_by_local, &kv, 1);
- return 0;
- }
- return VNET_API_ERROR_VALUE_EXIST;
- }
+void
+nat44_ei_add_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
+ nat_protocol_t proto, u32 vrf_id, u32 sw_if_index,
+ u32 flags, ip4_address_t pool_addr, u8 *tag)
+{
+ nat44_ei_static_map_resolve_t *rp;
+ nat44_ei_main_t *nm = &nat44_ei_main;
- /* Convert VRF id to FIB index */
- if (vrf_id != ~0)
- {
- fib_index = fib_table_find_or_create_and_lock (
- FIB_PROTOCOL_IP4, vrf_id, nm->fib_src_low);
- }
- /* If not specified use inside VRF id from NAT44 plugin config */
- else
- {
- fib_index = nm->inside_fib_index;
- vrf_id = nm->inside_vrf_id;
- fib_table_lock (fib_index, FIB_PROTOCOL_IP4, nm->fib_src_low);
- }
+ vec_add2 (nm->to_resolve, rp, 1);
+ rp->l_addr.as_u32 = l_addr.as_u32;
+ rp->l_port = l_port;
+ rp->e_port = e_port;
+ rp->sw_if_index = sw_if_index;
+ rp->vrf_id = vrf_id;
+ rp->proto = proto;
+ rp->flags = flags;
+ rp->pool_addr = pool_addr;
+ rp->tag = vec_dup (tag);
+}
- if (!identity_nat)
- {
- init_nat_k (&kv, l_addr, addr_only ? 0 : l_port, fib_index,
- addr_only ? 0 : proto);
- if (!clib_bihash_search_8_8 (&nm->static_mapping_by_local, &kv,
- &value))
- return VNET_API_ERROR_VALUE_EXIST;
- }
+int
+nat44_ei_get_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
+ nat_protocol_t proto, u32 vrf_id, u32 sw_if_index,
+ u32 flags, int *out)
+{
+ nat44_ei_static_map_resolve_t *rp;
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ int i;
+
+ for (i = 0; i < vec_len (nm->to_resolve); i++)
+ {
+ rp = nm->to_resolve + i;
- /* Find external address in allocated addresses and reserve port for
- address and port pair mapping when dynamic translations enabled */
- if (!(addr_only || nm->static_mapping_only))
+ if (rp->sw_if_index == sw_if_index && rp->vrf_id == vrf_id)
{
- for (i = 0; i < vec_len (nm->addresses); i++)
+ if (is_sm_identity_nat (rp->flags) && is_sm_identity_nat (flags))
{
- if (nm->addresses[i].addr.as_u32 == e_addr.as_u32)
+ if (!(is_sm_addr_only (rp->flags) && is_sm_addr_only (flags)))
{
- a = nm->addresses + i;
- /* External port must be unused */
- switch (proto)
+ if (rp->e_port != e_port || rp->proto != proto)
{
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_port_refcounts[e_port]) \
- return VNET_API_ERROR_INVALID_VALUE; \
- ++a->busy_##n##_port_refcounts[e_port]; \
- if (e_port > 1024) \
- { \
- a->busy_##n##_ports++; \
- a->busy_##n##_ports_per_thread[nat44_ei_get_thread_idx_by_port ( \
- e_port)]++; \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (nm, "unknown protocol");
- return VNET_API_ERROR_INVALID_VALUE_2;
+ continue;
}
- break;
}
}
- /* External address must be allocated */
- if (!a && (l_addr.as_u32 != e_addr.as_u32))
+ else if (rp->l_addr.as_u32 == l_addr.as_u32)
{
- if (sw_if_index != ~0)
+ if (!(is_sm_addr_only (rp->flags) && is_sm_addr_only (flags)))
{
- for (i = 0; i < vec_len (nm->to_resolve); i++)
+ if (rp->l_port != l_port || rp->e_port != e_port ||
+ rp->proto != proto)
{
- rp = nm->to_resolve + i;
- if (rp->addr_only)
- continue;
- if (rp->sw_if_index != sw_if_index &&
- rp->l_addr.as_u32 != l_addr.as_u32 &&
- rp->vrf_id != vrf_id && rp->l_port != l_port &&
- rp->e_port != e_port && rp->proto != proto)
- continue;
-
- vec_del1 (nm->to_resolve, i);
- break;
+ continue;
}
}
- return VNET_API_ERROR_NO_SUCH_ENTRY;
}
+ else
+ {
+ continue;
+ }
+ if (out)
+ {
+ *out = i;
+ }
+ return 0;
}
+ }
+ return 1;
+}
- pool_get (nm->static_mappings, m);
- clib_memset (m, 0, sizeof (*m));
- m->tag = vec_dup (tag);
- m->local_addr = l_addr;
- m->external_addr = e_addr;
+int
+nat44_ei_del_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
+ nat_protocol_t proto, u32 vrf_id, u32 sw_if_index,
+ u32 flags)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ int i;
+ if (!nat44_ei_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags, &i))
+ {
+ vec_del1 (nm->to_resolve, i);
+ return 0;
+ }
+ return 1;
+}
- if (addr_only)
- m->flags |= NAT44_EI_STATIC_MAPPING_FLAG_ADDR_ONLY;
- else
+void
+delete_matching_dynamic_sessions (const nat44_ei_static_mapping_t *m,
+ u32 worker_index)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ clib_bihash_kv_8_8_t kv, value;
+ nat44_ei_session_t *s;
+ nat44_ei_user_key_t u_key;
+ nat44_ei_user_t *u;
+ nat44_ei_main_per_thread_data_t *tnm;
+ dlist_elt_t *head, *elt;
+ u32 elt_index, head_index;
+ u32 ses_index;
+ u64 user_index;
+
+ if (nm->static_mapping_only)
+ return;
+
+ tnm = vec_elt_at_index (nm->per_thread_data, worker_index);
+
+ u_key.addr = m->local_addr;
+ u_key.fib_index = m->fib_index;
+ kv.key = u_key.as_u64;
+ if (!clib_bihash_search_8_8 (&tnm->user_hash, &kv, &value))
+ {
+ user_index = value.value;
+ u = pool_elt_at_index (tnm->users, user_index);
+ if (u->nsessions)
{
- m->local_port = l_port;
- m->external_port = e_port;
- m->proto = proto;
+ head_index = u->sessions_per_user_list_head_index;
+ head = pool_elt_at_index (tnm->list_pool, head_index);
+ elt_index = head->next;
+ elt = pool_elt_at_index (tnm->list_pool, elt_index);
+ ses_index = elt->value;
+ while (ses_index != ~0)
+ {
+ s = pool_elt_at_index (tnm->sessions, ses_index);
+ elt = pool_elt_at_index (tnm->list_pool, elt->next);
+ ses_index = elt->value;
+
+ if (nat44_ei_is_session_static (s))
+ continue;
+
+ if (!is_sm_addr_only (m->flags) &&
+ s->in2out.port != m->local_port)
+ continue;
+
+ nat44_ei_free_session_data_v2 (nm, s, tnm - nm->per_thread_data,
+ 0);
+ nat44_ei_delete_session (nm, s, tnm - nm->per_thread_data);
+
+ if (!is_sm_addr_only (m->flags))
+ break;
+ }
}
+ }
+}
+
+int
+nat44_ei_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
+ u16 l_port, u16 e_port, nat_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags,
+ ip4_address_t pool_addr, u8 *tag)
+
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
- if (identity_nat)
+ if (is_sm_switch_address (flags))
+ {
+ if (!nat44_ei_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags, 0))
{
- m->flags |= NAT44_EI_STATIC_MAPPING_FLAG_IDENTITY_NAT;
- pool_get (m->locals, local);
- local->vrf_id = vrf_id;
- local->fib_index = fib_index;
+ return VNET_API_ERROR_VALUE_EXIST;
}
- else
+
+ nat44_ei_add_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags, pool_addr, tag);
+
+ ip4_address_t *first_int_addr =
+ ip4_interface_first_address (nm->ip4_main, sw_if_index, 0);
+ if (!first_int_addr)
{
- m->vrf_id = vrf_id;
- m->fib_index = fib_index;
+ // dhcp resolution required
+ return 0;
}
- if (nm->num_workers > 1)
+ e_addr.as_u32 = first_int_addr->as_u32;
+ }
+
+ return nat44_ei_add_static_mapping_internal (l_addr, e_addr, l_port, e_port,
+ proto, vrf_id, sw_if_index,
+ flags, pool_addr, tag);
+}
+
+int
+nat44_ei_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
+ u16 l_port, u16 e_port, nat_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+
+ if (is_sm_switch_address (flags))
+ {
+
+ if (nat44_ei_del_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags))
{
- ip4_header_t ip = {
- .src_address = m->local_addr,
- };
- vec_add1 (m->workers,
- nat44_ei_get_in2out_worker_index (&ip, m->fib_index, 0));
- tnm = vec_elt_at_index (nm->per_thread_data, m->workers[0]);
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
}
- else
- tnm = vec_elt_at_index (nm->per_thread_data, nm->num_workers);
- init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto, 0,
- m - nm->static_mappings);
- clib_bihash_add_del_8_8 (&nm->static_mapping_by_local, &kv, 1);
+ ip4_address_t *first_int_addr =
+ ip4_interface_first_address (nm->ip4_main, sw_if_index, 0);
+ if (!first_int_addr)
+ {
+ // dhcp resolution required
+ return 0;
+ }
- init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
- m - nm->static_mappings);
- clib_bihash_add_del_8_8 (&nm->static_mapping_by_external, &kv, 1);
+ e_addr.as_u32 = first_int_addr->as_u32;
+ }
- /* Delete dynamic sessions matching local address (+ local port) */
- // TODO: based on type of NAT EI/ED
- if (!(nm->static_mapping_only))
- {
- u_key.addr = m->local_addr;
- u_key.fib_index = m->fib_index;
- kv.key = u_key.as_u64;
- if (!clib_bihash_search_8_8 (&tnm->user_hash, &kv, &value))
- {
- user_index = value.value;
- u = pool_elt_at_index (tnm->users, user_index);
- if (u->nsessions)
- {
- head_index = u->sessions_per_user_list_head_index;
- head = pool_elt_at_index (tnm->list_pool, head_index);
- elt_index = head->next;
- elt = pool_elt_at_index (tnm->list_pool, elt_index);
- ses_index = elt->value;
- while (ses_index != ~0)
- {
- s = pool_elt_at_index (tnm->sessions, ses_index);
- elt = pool_elt_at_index (tnm->list_pool, elt->next);
- ses_index = elt->value;
+ return nat44_ei_del_static_mapping_internal (
+ l_addr, e_addr, l_port, e_port, proto, vrf_id, sw_if_index, flags);
+}
- if (nat44_ei_is_session_static (s))
- continue;
+static int
+nat44_ei_add_static_mapping_internal (ip4_address_t l_addr,
+ ip4_address_t e_addr, u16 l_port,
+ u16 e_port, nat_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags,
+ ip4_address_t pool_addr, u8 *tag)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ clib_bihash_kv_8_8_t kv, value;
+ nat44_ei_lb_addr_port_t *local;
+ nat44_ei_static_mapping_t *m;
+ u32 fib_index = ~0;
+ u32 worker_index;
- if (!addr_only && s->in2out.port != m->local_port)
- continue;
+ fail_if_disabled ();
- nat44_ei_free_session_data_v2 (
- nm, s, tnm - nm->per_thread_data, 0);
- nat44_ei_delete_session (nm, s,
- tnm - nm->per_thread_data);
+ if (is_sm_addr_only (flags))
+ {
+ e_port = l_port = proto = 0;
+ }
- if (!addr_only)
- break;
- }
- }
+ if (is_sm_identity_nat (flags))
+ {
+ l_port = e_port;
+ l_addr.as_u32 = e_addr.as_u32;
+ }
+
+ // fib index 0
+ init_nat_k (&kv, e_addr, e_port, 0, proto);
+
+ if (!clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
+ {
+ m = pool_elt_at_index (nm->static_mappings, value.value);
+ if (!is_sm_identity_nat (m->flags))
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+
+ // case:
+ // adding local identity nat record for different vrf table
+ pool_foreach (local, m->locals)
+ {
+ if (local->vrf_id == vrf_id)
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
}
}
+
+ pool_get (m->locals, local);
+
+ local->vrf_id = vrf_id;
+ local->fib_index = fib_table_find_or_create_and_lock (
+ FIB_PROTOCOL_IP4, vrf_id, nm->fib_src_low);
+
+ init_nat_kv (&kv, m->local_addr, m->local_port, local->fib_index,
+ m->proto, 0, m - nm->static_mappings);
+ clib_bihash_add_del_8_8 (&nm->static_mapping_by_local, &kv, 1);
+
+ return 0;
+ }
+
+ if (vrf_id != ~0)
+ {
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
+ nm->fib_src_low);
}
else
{
- if (!m)
+ // fallback to default vrf
+ vrf_id = nm->inside_vrf_id;
+ fib_index = nm->inside_fib_index;
+ fib_table_lock (fib_index, FIB_PROTOCOL_IP4, nm->fib_src_low);
+ }
+
+ if (!is_sm_identity_nat (flags))
+ {
+ init_nat_k (&kv, l_addr, l_port, fib_index, proto);
+ if (!clib_bihash_search_8_8 (&nm->static_mapping_by_local, &kv, &value))
{
- if (sw_if_index != ~0)
- return 0;
- else
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ return VNET_API_ERROR_VALUE_EXIST;
}
+ }
- if (identity_nat)
+ if (!(is_sm_addr_only (flags) || nm->static_mapping_only))
+ {
+ if (nat44_ei_reserve_port (e_addr, e_port, proto))
{
- if (vrf_id == ~0)
- vrf_id = nm->inside_vrf_id;
-
- pool_foreach (local, m->locals)
+ // remove resolve record
+ if ((is_sm_switch_address (flags)) && !is_sm_identity_nat (flags))
{
- if (local->vrf_id == vrf_id)
- find = local - m->locals;
+ nat44_ei_del_resolve_record (l_addr, l_port, e_port, proto,
+ vrf_id, sw_if_index, flags);
}
- if (find == ~0)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+ }
+
+ pool_get (nm->static_mappings, m);
+ clib_memset (m, 0, sizeof (*m));
- local = pool_elt_at_index (m->locals, find);
- fib_index = local->fib_index;
- pool_put (m->locals, local);
+ m->flags = flags;
+ m->local_addr = l_addr;
+ m->external_addr = e_addr;
+
+ m->tag = vec_dup (tag);
+
+ if (!is_sm_addr_only (flags))
+ {
+ m->local_port = l_port;
+ m->external_port = e_port;
+ m->proto = proto;
+ }
+
+ if (is_sm_identity_nat (flags))
+ {
+ pool_get (m->locals, local);
+
+ local->vrf_id = vrf_id;
+ local->fib_index = fib_index;
+ }
+ else
+ {
+ m->vrf_id = vrf_id;
+ m->fib_index = fib_index;
+ }
+
+ init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto, 0,
+ m - nm->static_mappings);
+ clib_bihash_add_del_8_8 (&nm->static_mapping_by_local, &kv, 1);
+
+ init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
+ m - nm->static_mappings);
+ clib_bihash_add_del_8_8 (&nm->static_mapping_by_external, &kv, 1);
+
+ if (nm->num_workers > 1)
+ {
+ // store worker index for this record
+ ip4_header_t ip = {
+ .src_address = m->local_addr,
+ };
+ worker_index = nat44_ei_get_in2out_worker_index (&ip, m->fib_index, 0);
+ vec_add1 (m->workers, worker_index);
+ }
+ else
+ {
+ worker_index = nm->num_workers;
+ }
+ delete_matching_dynamic_sessions (m, worker_index);
+
+ if (is_sm_addr_only (flags))
+ {
+ nat44_ei_add_del_addr_to_fib_foreach_out_if (&e_addr, 1);
+ }
+
+ return 0;
+}
+
+static int
+nat44_ei_del_static_mapping_internal (ip4_address_t l_addr,
+ ip4_address_t e_addr, u16 l_port,
+ u16 e_port, nat_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags)
+{
+ nat44_ei_main_per_thread_data_t *tnm;
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ clib_bihash_kv_8_8_t kv, value;
+ nat44_ei_lb_addr_port_t *local;
+ nat44_ei_static_mapping_t *m;
+ u32 fib_index = ~0;
+ nat44_ei_user_key_t u_key;
+
+ fail_if_disabled ();
+
+ if (is_sm_addr_only (flags))
+ {
+ e_port = l_port = proto = 0;
+ }
+
+ if (is_sm_identity_nat (flags))
+ {
+ l_port = e_port;
+ l_addr.as_u32 = e_addr.as_u32;
+ }
+
+ // fib index 0
+ init_nat_k (&kv, e_addr, e_port, 0, proto);
+
+ if (clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
+ {
+ if (is_sm_switch_address (flags))
+ {
+ return 0;
}
- else
- fib_index = m->fib_index;
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ m = pool_elt_at_index (nm->static_mappings, value.value);
- /* Free external address port */
- if (!(addr_only || nm->static_mapping_only))
+ if (is_sm_identity_nat (flags))
+ {
+ u8 found = 0;
+
+ if (vrf_id == ~0)
{
- for (i = 0; i < vec_len (nm->addresses); i++)
+ vrf_id = nm->inside_vrf_id;
+ }
+
+ pool_foreach (local, m->locals)
+ {
+ if (local->vrf_id == vrf_id)
{
- if (nm->addresses[i].addr.as_u32 == e_addr.as_u32)
- {
- a = nm->addresses + i;
- switch (proto)
- {
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- --a->busy_##n##_port_refcounts[e_port]; \
- if (e_port > 1024) \
- { \
- a->busy_##n##_ports--; \
- a->busy_##n##_ports_per_thread[nat44_ei_get_thread_idx_by_port ( \
- e_port)]--; \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : return VNET_API_ERROR_INVALID_VALUE_2;
- }
- break;
- }
+ local = pool_elt_at_index (m->locals, local - m->locals);
+ fib_index = local->fib_index;
+ pool_put (m->locals, local);
+ found = 1;
}
}
+ if (!found)
+ {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+ }
+ else
+ {
+ fib_index = m->fib_index;
+ }
+
+ if (!(is_sm_addr_only (flags) || nm->static_mapping_only))
+ {
+ if (nat44_ei_free_port (e_addr, e_port, proto))
+ {
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+ }
+ init_nat_k (&kv, l_addr, l_port, fib_index, proto);
+ clib_bihash_add_del_8_8 (&nm->static_mapping_by_local, &kv, 0);
+
+ if (!nm->static_mapping_only || nm->static_mapping_connection_tracking)
+ {
+ // delete sessions for static mapping
if (nm->num_workers > 1)
tnm = vec_elt_at_index (nm->per_thread_data, m->workers[0]);
else
tnm = vec_elt_at_index (nm->per_thread_data, nm->num_workers);
- init_nat_k (&kv, m->local_addr, m->local_port, fib_index, m->proto);
- clib_bihash_add_del_8_8 (&nm->static_mapping_by_local, &kv, 0);
-
- /* Delete session(s) for static mapping if exist */
- if (!(nm->static_mapping_only) ||
- (nm->static_mapping_only && nm->static_mapping_connection_tracking))
- {
- u_key.addr = m->local_addr;
- u_key.fib_index = fib_index;
- kv.key = u_key.as_u64;
- nat44_ei_static_mapping_del_sessions (nm, tnm, u_key, addr_only,
- e_addr, e_port);
- }
+ u_key.addr = m->local_addr;
+ u_key.fib_index = fib_index;
+ kv.key = u_key.as_u64;
+ nat44_ei_static_mapping_del_sessions (
+ nm, tnm, u_key, is_sm_addr_only (flags), e_addr, e_port);
+ }
- fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, nm->fib_src_low);
- if (pool_elts (m->locals))
- return 0;
+ fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, nm->fib_src_low);
- init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
+ if (!pool_elts (m->locals))
+ {
+ // this is last record remove all required stuff
+ // fib_index 0
+ init_nat_k (&kv, e_addr, e_port, 0, proto);
clib_bihash_add_del_8_8 (&nm->static_mapping_by_external, &kv, 0);
vec_free (m->tag);
vec_free (m->workers);
- /* Delete static mapping from pool */
pool_put (nm->static_mappings, m);
- }
- if (!addr_only || (l_addr.as_u32 == e_addr.as_u32))
- return 0;
-
- /* Add/delete external address to FIB */
- pool_foreach (interface, nm->interfaces)
- {
- if (nat44_ei_interface_is_inside (interface) || nm->out2in_dpo)
- continue;
-
- nat44_ei_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index,
- is_add);
- break;
+ if (is_sm_addr_only (flags) && !is_sm_identity_nat (flags))
+ {
+ nat44_ei_add_del_addr_to_fib_foreach_out_if (&e_addr, 0);
+ }
}
- pool_foreach (interface, nm->output_feature_interfaces)
- {
- if (nat44_ei_interface_is_inside (interface) || nm->out2in_dpo)
- continue;
- nat44_ei_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index,
- is_add);
- break;
- }
return 0;
}
@@ -2300,16 +2737,16 @@ nat44_ei_static_mapping_match (ip4_address_t match_addr, u16 match_port,
}
/* Address only mapping doesn't change port */
- if (nat44_ei_is_addr_only_static_mapping (m))
+ if (is_sm_addr_only (m->flags))
*mapping_port = match_port;
else
*mapping_port = port;
if (PREDICT_FALSE (is_addr_only != 0))
- *is_addr_only = nat44_ei_is_addr_only_static_mapping (m);
+ *is_addr_only = is_sm_addr_only (m->flags);
if (PREDICT_FALSE (is_identity_nat != 0))
- *is_identity_nat = nat44_ei_is_identity_static_mapping (m);
+ *is_identity_nat = is_sm_identity_nat (m->flags);
return 0;
}
@@ -2418,27 +2855,6 @@ nat44_ei_worker_db_init (nat44_ei_main_per_thread_data_t *tnm,
}
static void
-nat44_ei_db_free ()
-{
- nat44_ei_main_t *nm = &nat44_ei_main;
- nat44_ei_main_per_thread_data_t *tnm;
-
- pool_free (nm->static_mappings);
- clib_bihash_free_8_8 (&nm->static_mapping_by_local);
- clib_bihash_free_8_8 (&nm->static_mapping_by_external);
-
- if (nm->pat)
- {
- clib_bihash_free_8_8 (&nm->in2out);
- clib_bihash_free_8_8 (&nm->out2in);
- vec_foreach (tnm, nm->per_thread_data)
- {
- nat44_ei_worker_db_free (tnm);
- }
- }
-}
-
-static void
nat44_ei_db_init (u32 translations, u32 translation_buckets, u32 user_buckets)
{
nat44_ei_main_t *nm = &nat44_ei_main;
@@ -2572,11 +2988,13 @@ nat44_ei_update_outside_fib (ip4_main_t *im, uword opaque, u32 sw_if_index,
}
int
-nat44_ei_add_address (nat44_ei_main_t *nm, ip4_address_t *addr, u32 vrf_id)
+nat44_ei_add_address (ip4_address_t *addr, u32 vrf_id)
{
- nat44_ei_address_t *ap;
- nat44_ei_interface_t *i;
+ nat44_ei_main_t *nm = &nat44_ei_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
+ nat44_ei_address_t *ap;
+
+ fail_if_disabled ();
/* Check if address already exists */
vec_foreach (ap, nm->addresses)
@@ -2590,137 +3008,49 @@ nat44_ei_add_address (nat44_ei_main_t *nm, ip4_address_t *addr, u32 vrf_id)
vec_add2 (nm->addresses, ap, 1);
+ ap->fib_index = ~0;
ap->addr = *addr;
- if (vrf_id != ~0)
- ap->fib_index = fib_table_find_or_create_and_lock (
- FIB_PROTOCOL_IP4, vrf_id, nm->fib_src_low);
- else
- ap->fib_index = ~0;
-
-#define _(N, i, n, s) \
- clib_memset (ap->busy_##n##_port_refcounts, 0, \
- sizeof (ap->busy_##n##_port_refcounts)); \
- ap->busy_##n##_ports = 0; \
- ap->busy_##n##_ports_per_thread = 0; \
- vec_validate_init_empty (ap->busy_##n##_ports_per_thread, \
- tm->n_vlib_mains - 1, 0);
- foreach_nat_protocol
-#undef _
- /* Add external address to FIB */
- pool_foreach (i, nm->interfaces)
- {
- if (nat44_ei_interface_is_inside (i) || nm->out2in_dpo)
- continue;
-
- nat44_ei_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
- break;
- }
- pool_foreach (i, nm->output_feature_interfaces)
+ if (vrf_id != ~0)
{
- if (nat44_ei_interface_is_inside (i) || nm->out2in_dpo)
- continue;
-
- nat44_ei_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
- break;
+ ap->fib_index = fib_table_find_or_create_and_lock (
+ FIB_PROTOCOL_IP4, vrf_id, nm->fib_src_low);
}
- return 0;
-}
-
-int
-nat44_ei_add_interface_address (nat44_ei_main_t *nm, u32 sw_if_index,
- int is_del)
-{
- ip4_main_t *ip4_main = nm->ip4_main;
- ip4_address_t *first_int_addr;
- nat44_ei_static_map_resolve_t *rp;
- u32 *indices_to_delete = 0;
- int i, j;
- u32 *auto_add_sw_if_indices = nm->auto_add_sw_if_indices;
-
- first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index,
- 0 /* just want the address */);
-
- for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
+ nat_protocol_t proto;
+ for (proto = 0; proto < NAT_N_PROTOCOLS; ++proto)
{
- if (auto_add_sw_if_indices[i] == sw_if_index)
- {
- if (is_del)
- {
- /* if have address remove it */
- if (first_int_addr)
- (void) nat44_ei_del_address (nm, first_int_addr[0], 1);
- else
- {
- for (j = 0; j < vec_len (nm->to_resolve); j++)
- {
- rp = nm->to_resolve + j;
- if (rp->sw_if_index == sw_if_index)
- vec_add1 (indices_to_delete, j);
- }
- if (vec_len (indices_to_delete))
- {
- for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
- vec_del1 (nm->to_resolve, j);
- vec_free (indices_to_delete);
- }
- }
- vec_del1 (nm->auto_add_sw_if_indices, i);
- }
- else
- return VNET_API_ERROR_VALUE_EXIST;
-
- return 0;
- }
+ ap->busy_port_bitmap[proto] = 0;
+ ap->busy_ports[proto] = 0;
+ ap->busy_ports_per_thread[proto] = 0;
+ vec_validate_init_empty (ap->busy_ports_per_thread[proto],
+ tm->n_vlib_mains - 1, 0);
}
- if (is_del)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
- /* add to the auto-address list */
- vec_add1 (nm->auto_add_sw_if_indices, sw_if_index);
-
- /* If the address is already bound - or static - add it now */
- if (first_int_addr)
- (void) nat44_ei_add_address (nm, first_int_addr, ~0);
+ nat44_ei_add_del_addr_to_fib_foreach_out_if (addr, 1);
return 0;
}
-static int
-nat44_ei_is_address_used_in_static_mapping (ip4_address_t addr)
-{
- nat44_ei_main_t *nm = &nat44_ei_main;
- nat44_ei_static_mapping_t *m;
- pool_foreach (m, nm->static_mappings)
- {
- if (nat44_ei_is_addr_only_static_mapping (m) ||
- nat44_ei_is_identity_static_mapping (m))
- continue;
- if (m->external_addr.as_u32 == addr.as_u32)
- return 1;
- }
- return 0;
-}
-
int
-nat44_ei_del_address (nat44_ei_main_t *nm, ip4_address_t addr, u8 delete_sm)
+nat44_ei_del_address (ip4_address_t addr, u8 delete_sm)
{
+ nat44_ei_main_t *nm = &nat44_ei_main;
nat44_ei_address_t *a = 0;
nat44_ei_session_t *ses;
u32 *ses_to_be_removed = 0, *ses_index;
nat44_ei_main_per_thread_data_t *tnm;
- nat44_ei_interface_t *interface;
nat44_ei_static_mapping_t *m;
- int i;
+ int j;
+
+ fail_if_disabled ();
/* Find SNAT address */
- for (i = 0; i < vec_len (nm->addresses); i++)
+ for (j = 0; j < vec_len (nm->addresses); j++)
{
- if (nm->addresses[i].addr.as_u32 == addr.as_u32)
+ if (nm->addresses[j].addr.as_u32 == addr.as_u32)
{
- a = nm->addresses + i;
+ a = nm->addresses + j;
break;
}
}
@@ -2735,11 +3065,9 @@ nat44_ei_del_address (nat44_ei_main_t *nm, ip4_address_t addr, u8 delete_sm)
pool_foreach (m, nm->static_mappings)
{
if (m->external_addr.as_u32 == addr.as_u32)
- (void) nat44_ei_add_del_static_mapping (
+ nat44_ei_del_static_mapping_internal (
m->local_addr, m->external_addr, m->local_port, m->external_port,
- m->proto, ~0 /* sw_if_index */, m->vrf_id,
- nat44_ei_is_addr_only_static_mapping (m),
- nat44_ei_is_identity_static_mapping (m), m->tag, 0);
+ m->proto, m->vrf_id, ~0, m->flags);
}
}
else
@@ -2752,11 +3080,9 @@ nat44_ei_del_address (nat44_ei_main_t *nm, ip4_address_t addr, u8 delete_sm)
}
}
- if (a->fib_index != ~0)
- fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, nm->fib_src_low);
-
/* Delete sessions using address */
- if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
+ if (a->busy_ports[NAT_PROTOCOL_TCP] || a->busy_ports[NAT_PROTOCOL_UDP] ||
+ a->busy_ports[NAT_PROTOCOL_ICMP])
{
vec_foreach (tnm, nm->per_thread_data)
{
@@ -2778,28 +3104,116 @@ nat44_ei_del_address (nat44_ei_main_t *nm, ip4_address_t addr, u8 delete_sm)
}
}
-#define _(N, i, n, s) vec_free (a->busy_##n##_ports_per_thread);
- foreach_nat_protocol
-#undef _
- vec_del1 (nm->addresses, i);
+ nat44_ei_add_del_addr_to_fib_foreach_out_if (&addr, 0);
- /* Delete external address from FIB */
- pool_foreach (interface, nm->interfaces)
+ if (a->fib_index != ~0)
{
- if (nat44_ei_interface_is_inside (interface) || nm->out2in_dpo)
- continue;
- nat44_ei_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
- break;
+ fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, nm->fib_src_low);
}
- pool_foreach (interface, nm->output_feature_interfaces)
+ nat_protocol_t proto;
+ for (proto = 0; proto < NAT_N_PROTOCOLS; ++proto)
{
- if (nat44_ei_interface_is_inside (interface) || nm->out2in_dpo)
- continue;
- nat44_ei_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
- break;
+ vec_free (a->busy_ports_per_thread[proto]);
+ }
+
+ vec_del1 (nm->addresses, j);
+ return 0;
+}
+
+int
+nat44_ei_add_interface_address (u32 sw_if_index)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ ip4_main_t *ip4_main = nm->ip4_main;
+ ip4_address_t *first_int_addr;
+ u32 *auto_add_sw_if_indices = nm->auto_add_sw_if_indices;
+ int i;
+
+ for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
+ {
+ if (auto_add_sw_if_indices[i] == sw_if_index)
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+ }
+
+ /* add to the auto-address list */
+ vec_add1 (nm->auto_add_sw_if_indices, sw_if_index);
+
+ // if the address is already bound - or static - add it now
+ first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
+ if (first_int_addr)
+ {
+ (void) nat44_ei_add_address (first_int_addr, ~0);
+ }
+
+ return 0;
+}
+
+int
+nat44_ei_del_interface_address (u32 sw_if_index)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ ip4_main_t *ip4_main = nm->ip4_main;
+ ip4_address_t *first_int_addr;
+ nat44_ei_static_map_resolve_t *rp;
+ u32 *indices_to_delete = 0;
+ int i, j;
+ u32 *auto_add_sw_if_indices = nm->auto_add_sw_if_indices;
+
+ fail_if_disabled ();
+
+ first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
+
+ for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
+ {
+ if (auto_add_sw_if_indices[i] == sw_if_index)
+ {
+ first_int_addr =
+ ip4_interface_first_address (ip4_main, sw_if_index, 0);
+ if (first_int_addr)
+ {
+ (void) nat44_ei_del_address (first_int_addr[0], 1);
+ }
+ else
+ {
+ for (j = 0; j < vec_len (nm->to_resolve); j++)
+ {
+ rp = nm->to_resolve + j;
+ if (rp->sw_if_index == sw_if_index)
+ {
+ vec_add1 (indices_to_delete, j);
+ }
+ }
+ if (vec_len (indices_to_delete))
+ {
+ for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
+ {
+ vec_del1 (nm->to_resolve, j);
+ }
+ vec_free (indices_to_delete);
+ }
+ }
+
+ vec_del1 (nm->auto_add_sw_if_indices, i);
+ return 0;
+ }
}
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+}
+static_always_inline int
+is_sw_if_index_reg_for_auto_resolve (u32 *sw_if_indices, u32 sw_if_index)
+{
+ u32 *i;
+ vec_foreach (i, sw_if_indices)
+ {
+ if (*i == sw_if_index)
+ {
+ return 1;
+ }
+ }
return 0;
}
@@ -2812,61 +3226,59 @@ nat44_ei_ip4_add_del_interface_address_cb (ip4_main_t *im, uword opaque,
{
nat44_ei_main_t *nm = &nat44_ei_main;
nat44_ei_static_map_resolve_t *rp;
- ip4_address_t l_addr;
- int i, j;
- int rv;
nat44_ei_address_t *addresses = nm->addresses;
+ int rv, i;
if (!nm->enabled)
- return;
-
- for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++)
{
- if (sw_if_index == nm->auto_add_sw_if_indices[i])
- goto match;
+ return;
}
- return;
+ if (!is_sw_if_index_reg_for_auto_resolve (nm->auto_add_sw_if_indices,
+ sw_if_index))
+ {
+ return;
+ }
-match:
if (!is_delete)
{
/* Don't trip over lease renewal, static config */
- for (j = 0; j < vec_len (addresses); j++)
- if (addresses[j].addr.as_u32 == address->as_u32)
- return;
+ for (i = 0; i < vec_len (addresses); i++)
+ {
+ if (addresses[i].addr.as_u32 == address->as_u32)
+ {
+ return;
+ }
+ }
+
+ (void) nat44_ei_add_address (address, ~0);
- (void) nat44_ei_add_address (nm, address, ~0);
/* Scan static map resolution vector */
- for (j = 0; j < vec_len (nm->to_resolve); j++)
+ for (i = 0; i < vec_len (nm->to_resolve); i++)
{
- rp = nm->to_resolve + j;
- if (rp->addr_only)
- continue;
+ rp = nm->to_resolve + i;
+ if (is_sm_addr_only (rp->flags))
+ {
+ continue;
+ }
/* On this interface? */
if (rp->sw_if_index == sw_if_index)
{
- /* Indetity mapping? */
- if (rp->l_addr.as_u32 == 0)
- l_addr.as_u32 = address[0].as_u32;
- else
- l_addr.as_u32 = rp->l_addr.as_u32;
- /* Add the static mapping */
- rv = nat44_ei_add_del_static_mapping (
- l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
- ~0 /* sw_if_index */, rp->vrf_id, rp->addr_only,
- rp->identity_nat, rp->tag, 1);
+ rv = nat44_ei_add_static_mapping_internal (
+ rp->l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
+ rp->vrf_id, ~0, rp->flags, rp->pool_addr, rp->tag);
if (rv)
- nat_elog_notice_X1 (
- nm, "nat44_ei_add_del_static_mapping returned %d", "i4", rv);
+ {
+ nat_elog_notice_X1 (
+ nm, "add_static_mapping_internal returned %d", "i4", rv);
+ }
}
}
- return;
}
else
{
- (void) nat44_ei_del_address (nm, address[0], 1);
- return;
+ // remove all static mapping records
+ (void) nat44_ei_del_address (address[0], 1);
}
}
@@ -2889,57 +3301,64 @@ nat44_ei_ip4_add_del_addr_only_sm_cb (ip4_main_t *im, uword opaque,
nat44_ei_static_map_resolve_t *rp;
nat44_ei_static_mapping_t *m;
clib_bihash_kv_8_8_t kv, value;
- int i, rv;
- ip4_address_t l_addr;
+ int i, rv = 0, match = 0;
if (!nm->enabled)
- return;
+ {
+ return;
+ }
for (i = 0; i < vec_len (nm->to_resolve); i++)
{
rp = nm->to_resolve + i;
- if (rp->addr_only == 0)
- continue;
- if (rp->sw_if_index == sw_if_index)
- goto match;
+
+ if (is_sm_addr_only (rp->flags) && rp->sw_if_index == sw_if_index)
+ {
+ match = 1;
+ break;
+ }
}
- return;
+ if (!match)
+ {
+ return;
+ }
-match:
- init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
- nm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
+ init_nat_k (&kv, *address, is_sm_addr_only (rp->flags) ? 0 : rp->e_port,
+ nm->outside_fib_index,
+ is_sm_addr_only (rp->flags) ? 0 : rp->proto);
if (clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
m = 0;
else
m = pool_elt_at_index (nm->static_mappings, value.value);
- if (!is_delete)
+ if (is_delete)
{
- /* Don't trip over lease renewal, static config */
- if (m)
+ if (!m)
return;
+ rv = nat44_ei_del_static_mapping_internal (
+ rp->l_addr, address[0], rp->l_port, rp->e_port, rp->proto, rp->vrf_id,
+ ~0, rp->flags);
+ if (rv)
+ {
+ nat_elog_notice_X1 (nm, "nat44_ei_del_static_mapping returned %d",
+ "i4", rv);
+ }
}
else
{
- if (!m)
+ if (m)
return;
- }
+ rv = nat44_ei_add_static_mapping_internal (
+ rp->l_addr, address[0], rp->l_port, rp->e_port, rp->proto, rp->vrf_id,
+ ~0, rp->flags, rp->pool_addr, rp->tag);
- /* Indetity mapping? */
- if (rp->l_addr.as_u32 == 0)
- l_addr.as_u32 = address[0].as_u32;
- else
- l_addr.as_u32 = rp->l_addr.as_u32;
- /* Add the static mapping */
-
- rv = nat44_ei_add_del_static_mapping (
- l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
- ~0 /* sw_if_index */, rp->vrf_id, rp->addr_only, rp->identity_nat, rp->tag,
- !is_delete);
- if (rv)
- nat_elog_notice_X1 (nm, "nat44_ei_add_del_static_mapping returned %d",
- "i4", rv);
+ if (rv)
+ {
+ nat_elog_notice_X1 (nm, "nat44_ei_add_static_mapping returned %d",
+ "i4", rv);
+ }
+ }
}
static_always_inline uword
diff --git a/src/plugins/nat/nat44-ei/nat44_ei.h b/src/plugins/nat/nat44-ei/nat44_ei.h
index 055f81c069b..b4aa0f26c0b 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei.h
+++ b/src/plugins/nat/nat44-ei/nat44_ei.h
@@ -36,6 +36,7 @@
#include <nat/lib/lib.h>
#include <nat/lib/inlines.h>
+#include <nat/lib/nat_proto.h>
/* default number of worker handoff frame queue elements */
#define NAT_FQ_NELTS_DEFAULT 64
@@ -62,19 +63,17 @@ typedef enum
#define NAT44_EI_SESSION_FLAG_UNKNOWN_PROTO (1 << 1)
/* Static mapping flags */
-#define NAT44_EI_STATIC_MAPPING_FLAG_ADDR_ONLY (1 << 0)
-#define NAT44_EI_STATIC_MAPPING_FLAG_IDENTITY_NAT (1 << 1)
+#define NAT44_EI_SM_FLAG_ADDR_ONLY (1 << 0)
+#define NAT44_EI_SM_FLAG_IDENTITY_NAT (1 << 1)
+#define NAT44_EI_SM_FLAG_SWITCH_ADDRESS (1 << 2)
typedef struct
{
ip4_address_t addr;
u32 fib_index;
-#define _(N, i, n, s) \
- u32 busy_##n##_ports; \
- u32 *busy_##n##_ports_per_thread; \
- u32 busy_##n##_port_refcounts[0xffff + 1];
- foreach_nat_protocol
-#undef _
+ u32 busy_ports[NAT_N_PROTOCOLS];
+ u32 *busy_ports_per_thread[NAT_N_PROTOCOLS];
+ uword *busy_port_bitmap[NAT_N_PROTOCOLS];
} nat44_ei_address_t;
clib_error_t *nat44_ei_api_hookup (vlib_main_t *vm);
@@ -138,13 +137,9 @@ typedef struct
u32 vrf_id;
u32 flags;
nat_protocol_t proto;
- u8 addr_only;
- u8 identity_nat;
- u8 exact;
u8 *tag;
} nat44_ei_static_map_resolve_t;
-// TODO: cleanup/redo (there is no lb in EI nat)
typedef struct
{
/* backend IP address */
@@ -161,7 +156,7 @@ typedef struct
typedef struct
{
- /* prefered pool address */
+ /* preferred pool address */
ip4_address_t pool_addr;
/* local IP address */
ip4_address_t local_addr;
@@ -339,6 +334,8 @@ typedef struct nat44_ei_main_s
/* Interface pool */
nat44_ei_interface_t *interfaces;
nat44_ei_interface_t *output_feature_interfaces;
+ // broken api backward compatibility
+ nat44_ei_interface_t *output_feature_dummy_interfaces;
/* Is translation memory size calculated or user defined */
u8 translation_memory_size_set;
@@ -470,12 +467,14 @@ typedef struct nat44_ei_main_s
/* nat44 plugin enabled */
u8 enabled;
+ /* hairpinning registration counter */
+ u32 hairpin_reg;
+
nat44_ei_config_t rconfig;
u32 in2out_hairpinning_finish_ip4_lookup_node_fq_index;
u32 in2out_hairpinning_finish_interface_output_node_fq_index;
u32 hairpinning_fq_index;
- u32 hairpin_dst_fq_index;
vnet_main_t *vnet_main;
} nat44_ei_main_t;
@@ -483,9 +482,17 @@ typedef struct nat44_ei_main_s
extern nat44_ei_main_t nat44_ei_main;
int nat44_ei_plugin_enable (nat44_ei_config_t c);
-
int nat44_ei_plugin_disable ();
+int nat44_ei_add_interface (u32 sw_if_index, u8 is_inside);
+int nat44_ei_del_interface (u32 sw_if_index, u8 is_inside);
+int nat44_ei_add_output_interface (u32 sw_if_index);
+int nat44_ei_del_output_interface (u32 sw_if_index);
+int nat44_ei_add_address (ip4_address_t *addr, u32 vrf_id);
+int nat44_ei_del_address (ip4_address_t addr, u8 delete_sm);
+int nat44_ei_add_interface_address (u32 sw_if_index);
+int nat44_ei_del_interface_address (u32 sw_if_index);
+
/**
* @brief Delete specific NAT44 EI user and his sessions
*
@@ -532,29 +539,14 @@ void nat44_ei_set_alloc_mape (u16 psid, u16 psid_offset, u16 psid_length);
*/
void nat44_ei_set_alloc_range (u16 start_port, u16 end_port);
-/**
- * @brief Add/delete NAT44-EI static mapping
- *
- * @param l_addr local IPv4 address
- * @param e_addr external IPv4 address
- * @param l_port local port number
- * @param e_port external port number
- * @param proto L4 protocol
- * @param sw_if_index use interface address as external IPv4 address
- * @param vrf_id local VRF ID
- * @param addr_only 1 = 1:1NAT, 0 = 1:1NAPT
- * @param identity_nat identity NAT
- * @param tag opaque string tag
- * @param is_add 1 = add, 0 = delete
- *
- * @return 0 on success, non-zero value otherwise
+int nat44_ei_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
+ u16 l_port, u16 e_port, nat_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags,
+ ip4_address_t pool_addr, u8 *tag);
- */
-int nat44_ei_add_del_static_mapping (ip4_address_t l_addr,
- ip4_address_t e_addr, u16 l_port,
- u16 e_port, nat_protocol_t proto,
- u32 sw_if_index, u32 vrf_id, u8 addr_only,
- u8 identity_nat, u8 *tag, u8 is_add);
+int nat44_ei_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
+ u16 l_port, u16 e_port, nat_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags);
/**
* @brief Delete NAT44-EI session
@@ -619,9 +611,6 @@ int nat44_ei_set_outside_address_and_port (nat44_ei_address_t *addresses,
ip4_address_t addr, u16 port,
nat_protocol_t protocol);
-int nat44_ei_del_address (nat44_ei_main_t *nm, ip4_address_t addr,
- u8 delete_sm);
-
void nat44_ei_free_session_data (nat44_ei_main_t *nm, nat44_ei_session_t *s,
u32 thread_index, u8 is_ha);
@@ -629,55 +618,56 @@ int nat44_ei_set_workers (uword *bitmap);
void nat44_ei_add_del_address_dpo (ip4_address_t addr, u8 is_add);
-int nat44_ei_add_address (nat44_ei_main_t *nm, ip4_address_t *addr,
- u32 vrf_id);
-
void nat44_ei_delete_session (nat44_ei_main_t *nm, nat44_ei_session_t *ses,
u32 thread_index);
-int nat44_ei_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del);
-
-int nat44_ei_interface_add_del_output_feature (u32 sw_if_index, u8 is_inside,
- int is_del);
-
-int nat44_ei_add_interface_address (nat44_ei_main_t *nm, u32 sw_if_index,
- int is_del);
-
/* Call back functions for clib_bihash_add_or_overwrite_stale */
int nat44_i2o_is_idle_session_cb (clib_bihash_kv_8_8_t *kv, void *arg);
int nat44_o2i_is_idle_session_cb (clib_bihash_kv_8_8_t *kv, void *arg);
-int nat44_ei_hairpinning (vlib_main_t *vm, vlib_node_runtime_t *node,
- nat44_ei_main_t *nm, u32 thread_index,
- vlib_buffer_t *b0, ip4_header_t *ip0,
- udp_header_t *udp0, tcp_header_t *tcp0, u32 proto0,
- int do_trace, u32 *required_thread_index);
+int nat44_ei_set_frame_queue_nelts (u32 frame_queue_nelts);
-void nat44_ei_hairpinning_sm_unknown_proto (nat44_ei_main_t *nm,
- vlib_buffer_t *b,
- ip4_header_t *ip);
+always_inline bool
+nat44_ei_is_session_static (nat44_ei_session_t *s)
+{
+ return (s->flags & NAT44_EI_SESSION_FLAG_STATIC_MAPPING);
+}
-u32 nat44_ei_icmp_hairpinning (nat44_ei_main_t *nm, vlib_buffer_t *b0,
- u32 thread_index, ip4_header_t *ip0,
- icmp46_header_t *icmp0,
- u32 *required_thread_index);
+always_inline bool
+nat44_ei_is_unk_proto_session (nat44_ei_session_t *s)
+{
+ return (s->flags & NAT44_EI_SESSION_FLAG_UNKNOWN_PROTO);
+}
-int nat44_ei_set_frame_queue_nelts (u32 frame_queue_nelts);
+always_inline bool
+nat44_ei_interface_is_inside (nat44_ei_interface_t *i)
+{
+ return (i->flags & NAT44_EI_INTERFACE_FLAG_IS_INSIDE);
+}
+
+always_inline bool
+nat44_ei_interface_is_outside (nat44_ei_interface_t *i)
+{
+ return (i->flags & NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE);
+}
-#define nat44_ei_is_session_static(sp) \
- (sp->flags & NAT44_EI_SESSION_FLAG_STATIC_MAPPING)
-#define nat44_ei_is_unk_proto_session(sp) \
- (sp->flags & NAT44_EI_SESSION_FLAG_UNKNOWN_PROTO)
+always_inline bool
+is_sm_addr_only (u32 f)
+{
+ return (f & NAT44_EI_SM_FLAG_ADDR_ONLY);
+}
-#define nat44_ei_interface_is_inside(ip) \
- (ip->flags & NAT44_EI_INTERFACE_FLAG_IS_INSIDE)
-#define nat44_ei_interface_is_outside(ip) \
- (ip->flags & NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE)
+always_inline bool
+is_sm_identity_nat (u32 f)
+{
+ return (f & NAT44_EI_SM_FLAG_IDENTITY_NAT);
+}
-#define nat44_ei_is_addr_only_static_mapping(mp) \
- (mp->flags & NAT44_EI_STATIC_MAPPING_FLAG_ADDR_ONLY)
-#define nat44_ei_is_identity_static_mapping(mp) \
- (mp->flags & NAT44_EI_STATIC_MAPPING_FLAG_IDENTITY_NAT)
+always_inline bool
+is_sm_switch_address (u32 f)
+{
+ return (f & NAT44_EI_SM_FLAG_SWITCH_ADDRESS);
+}
/* logging */
#define nat44_ei_log_err(...) \
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_api.c b/src/plugins/nat/nat44-ei/nat44_ei_api.c
index 427140ffb92..8671a556929 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_api.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_api.c
@@ -173,7 +173,9 @@ vl_api_nat44_ei_plugin_enable_disable_t_handler (
rv = nat44_ei_plugin_enable (c);
}
else
- rv = nat44_ei_plugin_disable ();
+ {
+ rv = nat44_ei_plugin_disable ();
+ }
REPLY_MACRO (VL_API_NAT44_EI_PLUGIN_ENABLE_DISABLE_REPLY);
}
@@ -469,9 +471,9 @@ vl_api_nat44_ei_add_del_address_range_t_handler (
for (i = 0; i < count; i++)
{
if (is_add)
- rv = nat44_ei_add_address (nm, &this_addr, vrf_id);
+ rv = nat44_ei_add_address (&this_addr, vrf_id);
else
- rv = nat44_ei_del_address (nm, this_addr, 0);
+ rv = nat44_ei_del_address (this_addr, 0);
if (rv)
goto send_reply;
@@ -533,18 +535,22 @@ vl_api_nat44_ei_interface_add_del_feature_t_handler (
nat44_ei_main_t *nm = &nat44_ei_main;
vl_api_nat44_ei_interface_add_del_feature_reply_t *rmp;
u32 sw_if_index = ntohl (mp->sw_if_index);
- u8 is_del;
int rv = 0;
- is_del = !mp->is_add;
-
VALIDATE_SW_IF_INDEX (mp);
- rv = nat44_ei_interface_add_del (sw_if_index, mp->flags & NAT44_EI_IF_INSIDE,
- is_del);
+ if (mp->is_add)
+ {
+ rv =
+ nat44_ei_add_interface (sw_if_index, mp->flags & NAT44_EI_IF_INSIDE);
+ }
+ else
+ {
+ rv =
+ nat44_ei_del_interface (sw_if_index, mp->flags & NAT44_EI_IF_INSIDE);
+ }
BAD_SW_IF_INDEX_LABEL;
-
REPLY_MACRO (VL_API_NAT44_EI_INTERFACE_ADD_DEL_FEATURE_REPLY);
}
@@ -588,19 +594,75 @@ vl_api_nat44_ei_interface_dump_t_handler (vl_api_nat44_ei_interface_dump_t *mp)
}
}
+static_always_inline int
+add_del_dummy_output_interface (u32 sw_if_index, u8 is_inside, u8 is_add)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_interface_t *i;
+ int rv = 1;
+
+ pool_foreach (i, nm->output_feature_dummy_interfaces)
+ {
+ if (i->sw_if_index == sw_if_index)
+ {
+ if (!is_add)
+ {
+ pool_put (nm->output_feature_dummy_interfaces, i);
+ rv = 0;
+ }
+ goto done;
+ }
+ }
+
+ if (is_add)
+ {
+ pool_get (nm->output_feature_dummy_interfaces, i);
+ i->sw_if_index = sw_if_index;
+
+ if (is_inside)
+ {
+ i->flags |= NAT44_EI_INTERFACE_FLAG_IS_INSIDE;
+ }
+ else
+ {
+ i->flags |= NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE;
+ }
+
+ rv = 0;
+ }
+
+done:
+ return rv;
+}
+
static void
vl_api_nat44_ei_interface_add_del_output_feature_t_handler (
vl_api_nat44_ei_interface_add_del_output_feature_t *mp)
{
- nat44_ei_main_t *nm = &nat44_ei_main;
vl_api_nat44_ei_interface_add_del_output_feature_reply_t *rmp;
- u32 sw_if_index = ntohl (mp->sw_if_index);
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ u32 sw_if_index;
int rv = 0;
VALIDATE_SW_IF_INDEX (mp);
- rv = nat44_ei_interface_add_del_output_feature (
- sw_if_index, mp->flags & NAT44_EI_IF_INSIDE, !mp->is_add);
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ // register all interfaces in the dummy structure
+ rv = add_del_dummy_output_interface (
+ sw_if_index, mp->flags & NAT44_EI_IF_INSIDE, mp->is_add);
+
+ if (!(mp->flags & NAT44_EI_IF_INSIDE))
+ {
+ if (mp->is_add)
+ {
+ rv = nat44_ei_add_output_interface (sw_if_index);
+ }
+ else
+ {
+ rv = nat44_ei_del_output_interface (sw_if_index);
+ }
+ }
BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_NAT44_EI_INTERFACE_ADD_DEL_OUTPUT_FEATURE_REPLY);
@@ -622,7 +684,9 @@ send_nat44_ei_interface_output_feature_details (nat44_ei_interface_t *i,
rmp->context = context;
if (nat44_ei_interface_is_inside (i))
- rmp->flags |= NAT44_EI_IF_INSIDE;
+ {
+ rmp->flags |= NAT44_EI_IF_INSIDE;
+ }
vl_api_send_msg (reg, (u8 *) rmp);
}
@@ -639,49 +703,135 @@ vl_api_nat44_ei_interface_output_feature_dump_t_handler (
if (!reg)
return;
- pool_foreach (i, nm->output_feature_interfaces)
+ pool_foreach (i, nm->output_feature_dummy_interfaces)
{
send_nat44_ei_interface_output_feature_details (i, reg, mp->context);
}
}
static void
+vl_api_nat44_ei_add_del_output_interface_t_handler (
+ vl_api_nat44_ei_add_del_output_interface_t *mp)
+{
+ vl_api_nat44_ei_add_del_output_interface_reply_t *rmp;
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX_END (mp);
+
+ if (mp->is_add)
+ {
+ rv = nat44_ei_add_output_interface (mp->sw_if_index);
+ }
+ else
+ {
+ rv = nat44_ei_del_output_interface (mp->sw_if_index);
+ }
+
+bad_sw_if_index:
+ REPLY_MACRO_END (VL_API_NAT44_EI_ADD_DEL_OUTPUT_INTERFACE_REPLY);
+}
+
+#define vl_endianfun
+#include <nat/nat44-ei/nat44_ei.api.h>
+#undef vl_endianfun
+static void
+send_nat44_ei_output_interface_details (u32 index, vl_api_registration_t *rp,
+ u32 context)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ vl_api_nat44_ei_output_interface_details_t *rmp;
+ nat44_ei_interface_t *i =
+ pool_elt_at_index (nm->output_feature_interfaces, index);
+
+ /* Make sure every field is initiated (or don't skip the clib_memset()) */
+ REPLY_MACRO_DETAILS4 (
+ VL_API_NAT44_EI_OUTPUT_INTERFACE_DETAILS, rp, context, ({
+ rmp->sw_if_index = i->sw_if_index;
+
+ /* Endian hack until apigen registers _details
+ * endian functions */
+ vl_api_nat44_ei_output_interface_details_t_endian (rmp);
+ rmp->_vl_msg_id = htons (rmp->_vl_msg_id);
+ rmp->context = htonl (rmp->context);
+ }));
+}
+
+static void
+vl_api_nat44_ei_output_interface_get_t_handler (
+ vl_api_nat44_ei_output_interface_get_t *mp)
+{
+ vl_api_nat44_ei_output_interface_get_reply_t *rmp;
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ i32 rv = 0;
+
+ if (pool_elts (nm->output_feature_interfaces) == 0)
+ {
+ REPLY_MACRO (VL_API_NAT44_EI_OUTPUT_INTERFACE_GET_REPLY);
+ return;
+ }
+
+ REPLY_AND_DETAILS_MACRO (
+ VL_API_NAT44_EI_OUTPUT_INTERFACE_GET_REPLY, nm->output_feature_interfaces,
+ ({ send_nat44_ei_output_interface_details (cursor, rp, mp->context); }));
+}
+
+static void
vl_api_nat44_ei_add_del_static_mapping_t_handler (
vl_api_nat44_ei_add_del_static_mapping_t *mp)
{
- nat44_ei_main_t *nm = &nat44_ei_main;
vl_api_nat44_ei_add_del_static_mapping_reply_t *rmp;
- ip4_address_t local_addr, external_addr;
- u16 local_port = 0, external_port = 0;
- u32 vrf_id, external_sw_if_index;
+
+ nat44_ei_main_t *nm = &nat44_ei_main;
int rv = 0;
- nat_protocol_t proto;
+
+ ip4_address_t l_addr, e_addr, pool_addr = { 0 };
+ u32 sw_if_index, flags = 0, vrf_id;
+ u16 l_port = 0, e_port = 0;
+ nat_protocol_t proto = 0;
u8 *tag = 0;
- memcpy (&local_addr.as_u8, mp->local_ip_address, 4);
- memcpy (&external_addr.as_u8, mp->external_ip_address, 4);
+ memcpy (&l_addr.as_u8, mp->local_ip_address, 4);
- if (!(mp->flags & NAT44_EI_ADDR_ONLY_MAPPING))
+ if (mp->flags & NAT44_EI_ADDR_ONLY_MAPPING)
{
- local_port = mp->local_port;
- external_port = mp->external_port;
+ flags |= NAT44_EI_SM_FLAG_ADDR_ONLY;
+ }
+ else
+ {
+ l_port = mp->local_port;
+ e_port = mp->external_port;
+ proto = ip_proto_to_nat_proto (mp->protocol);
}
- vrf_id = clib_net_to_host_u32 (mp->vrf_id);
- external_sw_if_index = clib_net_to_host_u32 (mp->external_sw_if_index);
- proto = ip_proto_to_nat_proto (mp->protocol);
-
- mp->tag[sizeof (mp->tag) - 1] = 0;
- tag = format (0, "%s", mp->tag);
- vec_terminate_c_string (tag);
-
- rv = nat44_ei_add_del_static_mapping (
- local_addr, external_addr, local_port, external_port, proto,
- external_sw_if_index, vrf_id, mp->flags & NAT44_EI_ADDR_ONLY_MAPPING, 0,
- tag, mp->is_add);
+ sw_if_index = clib_net_to_host_u32 (mp->external_sw_if_index);
+ if (sw_if_index != ~0)
+ {
+ flags |= NAT44_EI_SM_FLAG_SWITCH_ADDRESS;
+ }
+ else
+ {
+ memcpy (&e_addr.as_u8, mp->external_ip_address, 4);
+ }
- vec_free (tag);
+ vrf_id = clib_net_to_host_u32 (mp->vrf_id);
+ if (mp->is_add)
+ {
+ mp->tag[sizeof (mp->tag) - 1] = 0;
+ tag = format (0, "%s", mp->tag);
+ vec_terminate_c_string (tag);
+
+ rv = nat44_ei_add_static_mapping (l_addr, e_addr, l_port, e_port, proto,
+ vrf_id, sw_if_index, flags, pool_addr,
+ tag);
+ vec_free (tag);
+ }
+ else
+ {
+ rv = nat44_ei_del_static_mapping (l_addr, e_addr, l_port, e_port, proto,
+ vrf_id, sw_if_index, flags);
+ }
REPLY_MACRO (VL_API_NAT44_EI_ADD_DEL_STATIC_MAPPING_REPLY);
}
@@ -704,7 +854,7 @@ send_nat44_ei_static_mapping_details (nat44_ei_static_mapping_t *m,
rmp->vrf_id = htonl (m->vrf_id);
rmp->context = context;
- if (nat44_ei_is_addr_only_static_mapping (m))
+ if (is_sm_addr_only (m->flags))
{
rmp->flags |= NAT44_EI_ADDR_ONLY_MAPPING;
}
@@ -738,7 +888,7 @@ send_nat44_ei_static_map_resolve_details (nat44_ei_static_map_resolve_t *m,
rmp->vrf_id = htonl (m->vrf_id);
rmp->context = context;
- if (m->addr_only)
+ if (is_sm_addr_only (m->flags))
{
rmp->flags |= NAT44_EI_ADDR_ONLY_MAPPING;
}
@@ -770,14 +920,14 @@ vl_api_nat44_ei_static_mapping_dump_t_handler (
pool_foreach (m, nm->static_mappings)
{
- if (!nat44_ei_is_identity_static_mapping (m))
+ if (!is_sm_identity_nat (m->flags))
send_nat44_ei_static_mapping_details (m, reg, mp->context);
}
for (j = 0; j < vec_len (nm->to_resolve); j++)
{
rp = nm->to_resolve + j;
- if (!rp->identity_nat)
+ if (!is_sm_identity_nat (rp->flags))
send_nat44_ei_static_map_resolve_details (rp, reg, mp->context);
}
}
@@ -786,35 +936,56 @@ static void
vl_api_nat44_ei_add_del_identity_mapping_t_handler (
vl_api_nat44_ei_add_del_identity_mapping_t *mp)
{
- nat44_ei_main_t *nm = &nat44_ei_main;
vl_api_nat44_ei_add_del_identity_mapping_reply_t *rmp;
- ip4_address_t addr;
- u16 port = 0;
- u32 vrf_id, sw_if_index;
+
+ nat44_ei_main_t *nm = &nat44_ei_main;
int rv = 0;
- nat_protocol_t proto = NAT_PROTOCOL_OTHER;
+
+ ip4_address_t addr, pool_addr = { 0 };
+ u32 sw_if_index, flags, vrf_id;
+ nat_protocol_t proto = 0;
+ u16 port = 0;
u8 *tag = 0;
- if (!(mp->flags & NAT44_EI_ADDR_ONLY_MAPPING))
+ flags = NAT44_EI_SM_FLAG_IDENTITY_NAT;
+
+ if (mp->flags & NAT44_EI_ADDR_ONLY_MAPPING)
+ {
+ flags |= NAT44_EI_SM_FLAG_ADDR_ONLY;
+ }
+ else
{
port = mp->port;
proto = ip_proto_to_nat_proto (mp->protocol);
}
- vrf_id = clib_net_to_host_u32 (mp->vrf_id);
+
sw_if_index = clib_net_to_host_u32 (mp->sw_if_index);
if (sw_if_index != ~0)
- addr.as_u32 = 0;
+ {
+ flags |= NAT44_EI_SM_FLAG_SWITCH_ADDRESS;
+ }
else
- memcpy (&addr.as_u8, mp->ip_address, 4);
- mp->tag[sizeof (mp->tag) - 1] = 0;
- tag = format (0, "%s", mp->tag);
- vec_terminate_c_string (tag);
+ {
+ memcpy (&addr.as_u8, mp->ip_address, 4);
+ }
- rv = nat44_ei_add_del_static_mapping (
- addr, addr, port, port, proto, sw_if_index, vrf_id,
- mp->flags & NAT44_EI_ADDR_ONLY_MAPPING, 1, tag, mp->is_add);
+ vrf_id = clib_net_to_host_u32 (mp->vrf_id);
- vec_free (tag);
+ if (mp->is_add)
+ {
+ mp->tag[sizeof (mp->tag) - 1] = 0;
+ tag = format (0, "%s", mp->tag);
+ vec_terminate_c_string (tag);
+
+ rv = nat44_ei_add_static_mapping (addr, addr, port, port, proto, vrf_id,
+ sw_if_index, flags, pool_addr, tag);
+ vec_free (tag);
+ }
+ else
+ {
+ rv = nat44_ei_del_static_mapping (addr, addr, port, port, proto, vrf_id,
+ sw_if_index, flags);
+ }
REPLY_MACRO (VL_API_NAT44_EI_ADD_DEL_IDENTITY_MAPPING_REPLY);
}
@@ -833,7 +1004,7 @@ send_nat44_ei_identity_mapping_details (nat44_ei_static_mapping_t *m,
rmp->_vl_msg_id =
ntohs (VL_API_NAT44_EI_IDENTITY_MAPPING_DETAILS + nm->msg_id_base);
- if (nat44_ei_is_addr_only_static_mapping (m))
+ if (is_sm_addr_only (m->flags))
rmp->flags |= NAT44_EI_ADDR_ONLY_MAPPING;
clib_memcpy (rmp->ip_address, &(m->local_addr), 4);
@@ -860,7 +1031,7 @@ send_nat44_ei_identity_map_resolve_details (nat44_ei_static_map_resolve_t *m,
rmp->_vl_msg_id =
ntohs (VL_API_NAT44_EI_IDENTITY_MAPPING_DETAILS + nm->msg_id_base);
- if (m->addr_only)
+ if (is_sm_addr_only (m->flags))
rmp->flags = (vl_api_nat44_ei_config_flags_t) NAT44_EI_ADDR_ONLY_MAPPING;
rmp->port = m->l_port;
@@ -890,7 +1061,7 @@ vl_api_nat44_ei_identity_mapping_dump_t_handler (
pool_foreach (m, nm->static_mappings)
{
- if (nat44_ei_is_identity_static_mapping (m))
+ if (is_sm_identity_nat (m->flags))
{
pool_foreach_index (j, m->locals)
{
@@ -902,7 +1073,7 @@ vl_api_nat44_ei_identity_mapping_dump_t_handler (
for (j = 0; j < vec_len (nm->to_resolve); j++)
{
rp = nm->to_resolve + j;
- if (rp->identity_nat)
+ if (is_sm_identity_nat (rp->flags))
send_nat44_ei_identity_map_resolve_details (rp, reg, mp->context);
}
}
@@ -915,13 +1086,17 @@ vl_api_nat44_ei_add_del_interface_addr_t_handler (
vl_api_nat44_ei_add_del_interface_addr_reply_t *rmp;
u32 sw_if_index = ntohl (mp->sw_if_index);
int rv = 0;
- u8 is_del;
-
- is_del = !mp->is_add;
VALIDATE_SW_IF_INDEX (mp);
- rv = nat44_ei_add_interface_address (nm, sw_if_index, is_del);
+ if (mp->is_add)
+ {
+ rv = nat44_ei_add_interface_address (sw_if_index);
+ }
+ else
+ {
+ rv = nat44_ei_del_interface_address (sw_if_index);
+ }
BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_NAT44_EI_ADD_DEL_INTERFACE_ADDR_REPLY);
@@ -1044,6 +1219,44 @@ send_nat44_ei_user_session_details (nat44_ei_session_t *s,
}
static void
+send_nat44_ei_user_session_v2_details (nat44_ei_session_t *s,
+ vl_api_registration_t *reg, u32 context)
+{
+ vl_api_nat44_ei_user_session_v2_details_t *rmp;
+ nat44_ei_main_t *nm = &nat44_ei_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_NAT44_EI_USER_SESSION_V2_DETAILS + nm->msg_id_base);
+ clib_memcpy (rmp->outside_ip_address, (&s->out2in.addr), 4);
+ clib_memcpy (rmp->inside_ip_address, (&s->in2out.addr), 4);
+
+ if (nat44_ei_is_session_static (s))
+ rmp->flags |= NAT44_EI_STATIC_MAPPING;
+
+ rmp->last_heard = clib_host_to_net_u64 ((u64) s->last_heard);
+ rmp->time_since_last_heard = clib_host_to_net_u64 (
+ (u64) (vlib_time_now (vlib_get_main ()) - s->last_heard));
+ rmp->total_bytes = clib_host_to_net_u64 (s->total_bytes);
+ rmp->total_pkts = ntohl (s->total_pkts);
+ rmp->context = context;
+ if (nat44_ei_is_unk_proto_session (s))
+ {
+ rmp->outside_port = 0;
+ rmp->inside_port = 0;
+ rmp->protocol = ntohs (s->in2out.port);
+ }
+ else
+ {
+ rmp->outside_port = s->out2in.port;
+ rmp->inside_port = s->in2out.port;
+ rmp->protocol = ntohs (nat_proto_to_ip_proto (s->nat_proto));
+ }
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
vl_api_nat44_ei_user_session_dump_t_handler (
vl_api_nat44_ei_user_session_dump_t *mp)
{
@@ -1097,6 +1310,59 @@ vl_api_nat44_ei_user_session_dump_t_handler (
}
static void
+vl_api_nat44_ei_user_session_v2_dump_t_handler (
+ vl_api_nat44_ei_user_session_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_main_per_thread_data_t *tnm;
+ nat44_ei_session_t *s;
+ clib_bihash_kv_8_8_t key, value;
+ nat44_ei_user_key_t ukey;
+ nat44_ei_user_t *u;
+ u32 session_index, head_index, elt_index;
+ dlist_elt_t *head, *elt;
+ ip4_header_t ip;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ clib_memcpy (&ukey.addr, mp->ip_address, 4);
+ ip.src_address.as_u32 = ukey.addr.as_u32;
+ ukey.fib_index = fib_table_find (FIB_PROTOCOL_IP4, ntohl (mp->vrf_id));
+ key.key = ukey.as_u64;
+ if (nm->num_workers > 1)
+ tnm = vec_elt_at_index (
+ nm->per_thread_data,
+ nat44_ei_get_in2out_worker_index (&ip, ukey.fib_index, 0));
+ else
+ tnm = vec_elt_at_index (nm->per_thread_data, nm->num_workers);
+
+ if (clib_bihash_search_8_8 (&tnm->user_hash, &key, &value))
+ return;
+ u = pool_elt_at_index (tnm->users, value.value);
+ if (!u->nsessions && !u->nstaticsessions)
+ return;
+
+ head_index = u->sessions_per_user_list_head_index;
+ head = pool_elt_at_index (tnm->list_pool, head_index);
+ elt_index = head->next;
+ elt = pool_elt_at_index (tnm->list_pool, elt_index);
+ session_index = elt->value;
+ while (session_index != ~0)
+ {
+ s = pool_elt_at_index (tnm->sessions, session_index);
+
+ send_nat44_ei_user_session_v2_details (s, reg, mp->context);
+
+ elt_index = elt->next;
+ elt = pool_elt_at_index (tnm->list_pool, elt_index);
+ session_index = elt->value;
+ }
+}
+
+static void
vl_api_nat44_ei_del_session_t_handler (vl_api_nat44_ei_del_session_t *mp)
{
nat44_ei_main_t *nm = &nat44_ei_main;
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_cli.c b/src/plugins/nat/nat44-ei/nat44_ei_cli.c
index a009f0292d3..eab50a4bc6c 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_cli.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_cli.c
@@ -112,9 +112,9 @@ format_nat44_ei_static_mapping (u8 *s, va_list *args)
nat44_ei_static_mapping_t *m = va_arg (*args, nat44_ei_static_mapping_t *);
nat44_ei_lb_addr_port_t *local;
- if (nat44_ei_is_identity_static_mapping (m))
+ if (is_sm_identity_nat (m->flags))
{
- if (nat44_ei_is_addr_only_static_mapping (m))
+ if (is_sm_addr_only (m->flags))
s = format (s, "identity mapping %U", format_ip4_address,
&m->local_addr);
else
@@ -130,7 +130,7 @@ format_nat44_ei_static_mapping (u8 *s, va_list *args)
return s;
}
- if (nat44_ei_is_addr_only_static_mapping (m))
+ if (is_sm_addr_only (m->flags))
{
s = format (s, "local %U external %U vrf %d", format_ip4_address,
&m->local_addr, format_ip4_address, &m->external_addr,
@@ -154,7 +154,7 @@ format_nat44_ei_static_map_to_resolve (u8 *s, va_list *args)
va_arg (*args, nat44_ei_static_map_resolve_t *);
vnet_main_t *vnm = vnet_get_main ();
- if (m->addr_only)
+ if (is_sm_addr_only (m->flags))
s =
format (s, "local %U external %U vrf %d", format_ip4_address, &m->l_addr,
format_vnet_sw_if_index_name, vnm, m->sw_if_index, m->vrf_id);
@@ -312,7 +312,7 @@ done:
}
static clib_error_t *
-nat_show_workers_commnad_fn (vlib_main_t *vm, unformat_input_t *input,
+nat_show_workers_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
nat44_ei_main_t *nm = &nat44_ei_main;
@@ -338,7 +338,7 @@ nat44_ei_set_log_level_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
nat44_ei_main_t *nm = &nat44_ei_main;
- u8 log_level = NAT_LOG_NONE;
+ u32 log_level = NAT_LOG_NONE;
clib_error_t *error = 0;
if (!unformat_user (input, unformat_line_input, line_input))
@@ -790,9 +790,9 @@ add_address_command_fn (vlib_main_t *vm, unformat_input_t *input,
for (i = 0; i < count; i++)
{
if (is_add)
- rv = nat44_ei_add_address (nm, &this_addr, vrf_id);
+ rv = nat44_ei_add_address (&this_addr, vrf_id);
else
- rv = nat44_ei_del_address (nm, this_addr, 0);
+ rv = nat44_ei_del_address (this_addr, 0);
switch (rv)
{
@@ -841,7 +841,7 @@ nat44_ei_show_addresses_command_fn (vlib_main_t *vm, unformat_input_t *input,
else
vlib_cli_output (vm, " tenant VRF independent");
#define _(N, i, n, s) \
- vlib_cli_output (vm, " %d busy %s ports", ap->busy_##n##_ports, s);
+ vlib_cli_output (vm, " %d busy %s ports", ap->busy_ports[i], s);
foreach_nat_protocol
#undef _
}
@@ -859,8 +859,7 @@ nat44_ei_feature_command_fn (vlib_main_t *vm, unformat_input_t *input,
u32 *inside_sw_if_indices = 0;
u32 *outside_sw_if_indices = 0;
u8 is_output_feature = 0;
- int is_del = 0;
- int i;
+ int i, rv, is_del = 0;
sw_if_index = ~0;
@@ -894,8 +893,15 @@ nat44_ei_feature_command_fn (vlib_main_t *vm, unformat_input_t *input,
sw_if_index = inside_sw_if_indices[i];
if (is_output_feature)
{
- if (nat44_ei_interface_add_del_output_feature (sw_if_index, 1,
- is_del))
+ if (is_del)
+ {
+ rv = nat44_ei_del_output_interface (sw_if_index);
+ }
+ else
+ {
+ rv = nat44_ei_add_output_interface (sw_if_index);
+ }
+ if (rv)
{
error = clib_error_return (
0, "%s %U failed", is_del ? "del" : "add",
@@ -905,7 +911,15 @@ nat44_ei_feature_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
else
{
- if (nat44_ei_interface_add_del (sw_if_index, 1, is_del))
+ if (is_del)
+ {
+ rv = nat44_ei_del_interface (sw_if_index, 1);
+ }
+ else
+ {
+ rv = nat44_ei_add_interface (sw_if_index, 1);
+ }
+ if (rv)
{
error = clib_error_return (
0, "%s %U failed", is_del ? "del" : "add",
@@ -923,8 +937,15 @@ nat44_ei_feature_command_fn (vlib_main_t *vm, unformat_input_t *input,
sw_if_index = outside_sw_if_indices[i];
if (is_output_feature)
{
- if (nat44_ei_interface_add_del_output_feature (sw_if_index, 0,
- is_del))
+ if (is_del)
+ {
+ rv = nat44_ei_del_output_interface (sw_if_index);
+ }
+ else
+ {
+ rv = nat44_ei_add_output_interface (sw_if_index);
+ }
+ if (rv)
{
error = clib_error_return (
0, "%s %U failed", is_del ? "del" : "add",
@@ -934,7 +955,15 @@ nat44_ei_feature_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
else
{
- if (nat44_ei_interface_add_del (sw_if_index, 0, is_del))
+ if (is_del)
+ {
+ rv = nat44_ei_del_interface (sw_if_index, 0);
+ }
+ else
+ {
+ rv = nat44_ei_add_interface (sw_if_index, 0);
+ }
+ if (rv)
{
error = clib_error_return (
0, "%s %U failed", is_del ? "del" : "add",
@@ -990,14 +1019,16 @@ add_static_mapping_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
- clib_error_t *error = 0;
- ip4_address_t l_addr, e_addr;
- u32 l_port = 0, e_port = 0, vrf_id = ~0;
- int is_add = 1, addr_only = 1, rv;
- u32 sw_if_index = ~0;
vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ int rv;
+
nat_protocol_t proto = NAT_PROTOCOL_OTHER;
- u8 proto_set = 0;
+ ip4_address_t l_addr, e_addr, pool_addr = { 0 };
+ u32 l_port = 0, e_port = 0, vrf_id = ~0;
+ u8 l_port_set = 0, e_port_set = 0;
+ u32 sw_if_index = ~0, flags = 0;
+ int is_add = 1;
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_EI_EXPECTED_ARGUMENT);
@@ -1006,29 +1037,37 @@ add_static_mapping_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
if (unformat (line_input, "local %U %u", unformat_ip4_address, &l_addr,
&l_port))
- addr_only = 0;
+ {
+ l_port_set = 1;
+ }
else if (unformat (line_input, "local %U", unformat_ip4_address,
&l_addr))
;
else if (unformat (line_input, "external %U %u", unformat_ip4_address,
&e_addr, &e_port))
- addr_only = 0;
+ {
+ e_port_set = 1;
+ }
else if (unformat (line_input, "external %U", unformat_ip4_address,
&e_addr))
;
else if (unformat (line_input, "external %U %u",
unformat_vnet_sw_interface, vnm, &sw_if_index,
&e_port))
- addr_only = 0;
+ {
+ e_port_set = 1;
+ }
else if (unformat (line_input, "external %U", unformat_vnet_sw_interface,
vnm, &sw_if_index))
;
else if (unformat (line_input, "vrf %u", &vrf_id))
;
else if (unformat (line_input, "%U", unformat_nat_protocol, &proto))
- proto_set = 1;
+ ;
else if (unformat (line_input, "del"))
- is_add = 0;
+ {
+ is_add = 0;
+ }
else
{
error = clib_error_return (0, "unknown input: '%U'",
@@ -1037,25 +1076,38 @@ add_static_mapping_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
}
- if (addr_only)
+ if (l_port_set != e_port_set)
{
- if (proto_set)
- {
- error = clib_error_return (
- 0, "address only mapping doesn't support protocol");
- goto done;
- }
+ error = clib_error_return (0, "Either both ports are set or none.");
+ goto done;
}
- else if (!proto_set)
+
+ if (!l_port_set)
{
- error = clib_error_return (0, "protocol is required");
- goto done;
+ flags |= NAT44_EI_SM_FLAG_ADDR_ONLY;
+ }
+ else
+ {
+ l_port = clib_host_to_net_u16 (l_port);
+ e_port = clib_host_to_net_u16 (e_port);
+ }
+
+ if (sw_if_index != ~0)
+ {
+ flags |= NAT44_EI_SM_FLAG_SWITCH_ADDRESS;
}
- rv = nat44_ei_add_del_static_mapping (
- l_addr, e_addr, clib_host_to_net_u16 (l_port),
- clib_host_to_net_u16 (e_port), proto, sw_if_index, vrf_id, addr_only, 0, 0,
- is_add);
+ if (is_add)
+ {
+ rv =
+ nat44_ei_add_static_mapping (l_addr, e_addr, l_port, e_port, proto,
+ vrf_id, sw_if_index, flags, pool_addr, 0);
+ }
+ else
+ {
+ rv = nat44_ei_del_static_mapping (l_addr, e_addr, l_port, e_port, proto,
+ vrf_id, sw_if_index, flags);
+ }
switch (rv)
{
@@ -1091,17 +1143,15 @@ add_identity_mapping_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
clib_error_t *error = 0;
- u32 port = 0, vrf_id = ~0;
+
+ int rv, is_add = 1, port_set = 0;
+ u32 sw_if_index = ~0, port, flags, vrf_id = ~0;
+ nat_protocol_t proto = NAT_PROTOCOL_OTHER;
ip4_address_t addr;
- int is_add = 1;
- int addr_only = 1;
- u32 sw_if_index = ~0;
- vnet_main_t *vnm = vnet_get_main ();
- int rv;
- nat_protocol_t proto;
- addr.as_u32 = 0;
+ flags = NAT44_EI_SM_FLAG_IDENTITY_NAT;
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_EI_EXPECTED_ARGUMENT);
@@ -1117,9 +1167,13 @@ add_identity_mapping_command_fn (vlib_main_t *vm, unformat_input_t *input,
;
else if (unformat (line_input, "%U %u", unformat_nat_protocol, &proto,
&port))
- addr_only = 0;
+ {
+ port_set = 1;
+ }
else if (unformat (line_input, "del"))
- is_add = 0;
+ {
+ is_add = 0;
+ }
else
{
error = clib_error_return (0, "unknown input: '%U'",
@@ -1128,9 +1182,31 @@ add_identity_mapping_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
}
- rv = nat44_ei_add_del_static_mapping (
- addr, addr, clib_host_to_net_u16 (port), clib_host_to_net_u16 (port),
- proto, sw_if_index, vrf_id, addr_only, 1, 0, is_add);
+ if (!port_set)
+ {
+ flags |= NAT44_EI_SM_FLAG_ADDR_ONLY;
+ }
+ else
+ {
+ port = clib_host_to_net_u16 (port);
+ }
+
+ if (sw_if_index != ~0)
+ {
+ flags |= NAT44_EI_SM_FLAG_SWITCH_ADDRESS;
+ }
+
+ if (is_add)
+ {
+
+ rv = nat44_ei_add_static_mapping (addr, addr, port, port, proto, vrf_id,
+ sw_if_index, flags, addr, 0);
+ }
+ else
+ {
+ rv = nat44_ei_del_static_mapping (addr, addr, port, port, proto, vrf_id,
+ sw_if_index, flags);
+ }
switch (rv)
{
@@ -1184,12 +1260,11 @@ nat44_ei_add_interface_address_command_fn (vlib_main_t *vm,
unformat_input_t *input,
vlib_cli_command_t *cmd)
{
- nat44_ei_main_t *nm = &nat44_ei_main;
unformat_input_t _line_input, *line_input = &_line_input;
- u32 sw_if_index;
- int rv;
- int is_del = 0;
+ nat44_ei_main_t *nm = &nat44_ei_main;
clib_error_t *error = 0;
+ int rv, is_del = 0;
+ u32 sw_if_index;
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_EI_EXPECTED_ARGUMENT);
@@ -1200,7 +1275,9 @@ nat44_ei_add_interface_address_command_fn (vlib_main_t *vm,
nm->vnet_main, &sw_if_index))
;
else if (unformat (line_input, "del"))
- is_del = 1;
+ {
+ is_del = 1;
+ }
else
{
error = clib_error_return (0, "unknown input '%U'",
@@ -1209,17 +1286,21 @@ nat44_ei_add_interface_address_command_fn (vlib_main_t *vm,
}
}
- rv = nat44_ei_add_interface_address (nm, sw_if_index, is_del);
-
- switch (rv)
+ if (!is_del)
{
- case 0:
- break;
-
- default:
- error = clib_error_return (
- 0, "nat44_ei_add_interface_address returned %d", rv);
- goto done;
+ rv = nat44_ei_add_interface_address (sw_if_index);
+ if (rv)
+ {
+ error = clib_error_return (0, "add address returned %d", rv);
+ }
+ }
+ else
+ {
+ rv = nat44_ei_del_interface_address (sw_if_index);
+ if (rv)
+ {
+ error = clib_error_return (0, "del address returned %d", rv);
+ }
}
done:
@@ -1252,6 +1333,8 @@ nat44_ei_show_sessions_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
clib_error_t *error = 0;
+ ip4_address_t saddr;
+ u8 filter_saddr = 0;
nat44_ei_main_per_thread_data_t *tnm;
nat44_ei_main_t *nm = &nat44_ei_main;
@@ -1266,6 +1349,9 @@ nat44_ei_show_sessions_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
if (unformat (line_input, "detail"))
detail = 1;
+ else if (unformat (line_input, "filter saddr %U", unformat_ip4_address,
+ &saddr))
+ filter_saddr = 1;
else
{
error = clib_error_return (0, "unknown input '%U'",
@@ -1288,6 +1374,8 @@ print:
nat44_ei_user_t *u;
pool_foreach (u, tnm->users)
{
+ if (filter_saddr && saddr.as_u32 != u->addr.as_u32)
+ continue;
vlib_cli_output (vm, " %U", format_nat44_ei_user, tnm, u, detail);
}
}
@@ -1486,7 +1574,6 @@ nat_show_timeouts_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
nat44_ei_main_t *nm = &nat44_ei_main;
- // TODO: make format timeout function
vlib_cli_output (vm, "udp timeout: %dsec", nm->timeouts.udp);
vlib_cli_output (vm, "tcp-established timeout: %dsec",
nm->timeouts.tcp.established);
@@ -1502,23 +1589,23 @@ nat_show_timeouts_command_fn (vlib_main_t *vm, unformat_input_t *input,
* @cliexstart{nat44 ei}
* Enable nat44 ei plugin
* To enable nat44-ei, use:
- * vpp# nat44 ei enable
+ * vpp# nat44 ei plugin enable
* To disable nat44-ei, use:
- * vpp# nat44 ei disable
+ * vpp# nat44 ei plugin disable
* To enable nat44 ei static mapping only, use:
- * vpp# nat44 ei enable static-mapping
+ * vpp# nat44 ei plugin enable static-mapping
* To enable nat44 ei static mapping with connection tracking, use:
- * vpp# nat44 ei enable static-mapping connection-tracking
+ * vpp# nat44 ei plugin enable static-mapping connection-tracking
* To enable nat44 ei out2in dpo, use:
- * vpp# nat44 ei enable out2in-dpo
+ * vpp# nat44 ei plugin enable out2in-dpo
* To set inside-vrf outside-vrf, use:
- * vpp# nat44 ei enable inside-vrf <id> outside-vrf <id>
+ * vpp# nat44 ei plugin enable inside-vrf <id> outside-vrf <id>
* @cliexend
?*/
VLIB_CLI_COMMAND (nat44_ei_enable_disable_command, static) = {
- .path = "nat44 ei",
+ .path = "nat44 ei plugin",
.short_help =
- "nat44 ei <enable [sessions <max-number>] [users <max-number>] "
+ "nat44 ei plugin <enable [sessions <max-number>] [users <max-number>] "
"[static-mappig-only [connection-tracking]|out2in-dpo] [inside-vrf "
"<vrf-id>] [outside-vrf <vrf-id>] [user-sessions <max-number>]>|disable",
.function = nat44_ei_enable_disable_command_fn,
@@ -1550,7 +1637,7 @@ VLIB_CLI_COMMAND (set_workers_command, static) = {
VLIB_CLI_COMMAND (nat_show_workers_command, static) = {
.path = "show nat44 ei workers",
.short_help = "show nat44 ei workers",
- .function = nat_show_workers_commnad_fn,
+ .function = nat_show_workers_command_fn,
};
/*?
@@ -1930,7 +2017,7 @@ VLIB_CLI_COMMAND (nat44_ei_show_interface_address_command, static) = {
?*/
VLIB_CLI_COMMAND (nat44_ei_show_sessions_command, static) = {
.path = "show nat44 ei sessions",
- .short_help = "show nat44 ei sessions [detail]",
+ .short_help = "show nat44 ei sessions [detail] [filter saddr <ip>]",
.function = nat44_ei_show_sessions_command_fn,
};
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_ha.c b/src/plugins/nat/nat44-ei/nat44_ei_ha.c
index 39bce255bd6..9546a595cc2 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_ha.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_ha.c
@@ -926,14 +926,12 @@ nat_ha_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat_ha_worker_node) = {
.function = nat_ha_worker_fn,
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_INTERRUPT,
.name = "nat44-ei-ha-worker",
};
-/* *INDENT-ON* */
/* periodically send interrupt to each thread */
static uword
@@ -969,13 +967,11 @@ nat_ha_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat_ha_process_node) = {
.function = nat_ha_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "nat44-ei-ha-process",
};
-/* *INDENT-ON* */
void
nat_ha_get_resync_status (u8 * in_resync, u32 * resync_ack_missed)
@@ -1166,7 +1162,6 @@ nat_ha_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat_ha_node) = {
.function = nat_ha_node_fn,
.name = "nat44-ei-ha",
@@ -1181,7 +1176,6 @@ VLIB_REGISTER_NODE (nat_ha_node) = {
[NAT_HA_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
typedef struct
{
@@ -1286,7 +1280,6 @@ nat_ha_resync (u32 client_index, u32 pid,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat_ha_handoff_node) = {
.function = nat_ha_handoff_node_fn,
.name = "nat44-ei-ha-handoff",
@@ -1300,7 +1293,6 @@ VLIB_REGISTER_NODE (nat_ha_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_ha_doc.md b/src/plugins/nat/nat44-ei/nat44_ei_ha_doc.md
deleted file mode 100644
index f0ea209e250..00000000000
--- a/src/plugins/nat/nat44-ei/nat44_ei_ha_doc.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# Active-Passive NAT HA {#nat_ha_doc}
-
-## Introduction
-
-One NAT node actively manages traffic while the other is synchronized and ready to transition to the active state and takes over seamlessly and enforces the same NAT sessions when failure occur. Both nodes share the same configuration settings.
-
-## Configuration
-
-### NAT HA protocol
-Session synchronization traffic is distributed through an IPv4 UDP connection. The active node sends NAT HA protocol events to passive node. To achieve reliable transfer NAT HA protocol uses acknowledgement with re-transmission. This require the passive node to respond with an acknowledgement message as it receives the data. The active node keeps a record of each packet it sends and maintains a timer from when the packet was sent. The active node re-transmits a packet if the timer expires before receiving the acknowledgement.
-
-### Topology
-
-The two NAT nodes have a dedicated link (interface GE0/0/3 on both) to synchronize NAT sessions using NAT HA protocol.
-
-```
- +-----------------------+
- | outside network |
- +-----------------------+
- / \
- / \
- / \
- / \
- / \
-+---------+ +---------+
-| GE0/0/1 | Active Passive | GE0/0/1 |
-| | | |
-| GE0/0/3|-------------------|GE0/0/3 |
-| | sync network | |
-| GE0/0/0 | | GE0/0/0 |
-+---------+ +---------+
- \ /
- \ /
- \ /
- \ /
- \ /
- +-----------------------+
- | inside network |
- +-----------------------+
-```
-
-### Active node configuration
-
-```
-set interface ip address GigabitEthernet0/0/1 10.15.7.101/24
-set interface ip address GigabitEthernet0/0/0 172.16.10.101/24
-set interface ip address GigabitEthernet0/0/3 10.0.0.1/24
-set interface state GigabitEthernet0/0/0 up
-set interface state GigabitEthernet0/0/1 up
-set interface state GigabitEthernet0/0/3 up
-set interface nat44 in GigabitEthernet0/0/0 out GigabitEthernet0/0/1
-nat44 add address 10.15.7.100
-nat ha listener 10.0.0.1:1234
-nat ha failover 10.0.0.2:2345
-```
-
-### Passive node configuration
-
-```
-set interface ip address GigabitEthernet0/0/1 10.15.7.102/24
-set interface ip address GigabitEthernet0/0/0 172.16.10.102/24
-set interface ip address GigabitEthernet0/0/3 10.0.0.2/24
-set interface state GigabitEthernet0/0/0 up
-set interface state GigabitEthernet0/0/1 up
-set interface state GigabitEthernet0/0/3 up
-set interface nat44 in GigabitEthernet0/0/0 out GigabitEthernet0/0/1
-nat44 add address 10.15.7.100
-nat ha listener 10.0.0.2:2345
-```
-
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_ha_doc.rst b/src/plugins/nat/nat44-ei/nat44_ei_ha_doc.rst
new file mode 100644
index 00000000000..46befc52351
--- /dev/null
+++ b/src/plugins/nat/nat44-ei/nat44_ei_ha_doc.rst
@@ -0,0 +1,88 @@
+Active-Passive NAT HA
+=====================
+
+Introduction
+------------
+
+One NAT node actively manages traffic while the other is synchronized
+and ready to transition to the active state and takes over seamlessly
+and enforces the same NAT sessions when failure occur. Both nodes share
+the same configuration settings.
+
+Configuration
+-------------
+
+NAT HA protocol
+~~~~~~~~~~~~~~~
+
+Session synchronization traffic is distributed through an IPv4 UDP
+connection. The active node sends NAT HA protocol events to passive
+node. To achieve reliable transfer NAT HA protocol uses acknowledgment
+with re-transmission. This require the passive node to respond with an
+acknowledgment message as it receives the data. The active node keeps a
+record of each packet it sends and maintains a timer from when the
+packet was sent. The active node re-transmits a packet if the timer
+expires before receiving the acknowledgment.
+
+Topology
+~~~~~~~~
+
+The two NAT nodes have a dedicated link (interface GE0/0/3 on both) to
+synchronize NAT sessions using NAT HA protocol.
+
+::
+
+ +-----------------------+
+ | outside network |
+ +-----------------------+
+ / \
+ / \
+ / \
+ / \
+ / \
+ +---------+ +---------+
+ | GE0/0/1 | Active Passive | GE0/0/1 |
+ | | | |
+ | GE0/0/3|-------------------|GE0/0/3 |
+ | | sync network | |
+ | GE0/0/0 | | GE0/0/0 |
+ +---------+ +---------+
+ \ /
+ \ /
+ \ /
+ \ /
+ \ /
+ +-----------------------+
+ | inside network |
+ +-----------------------+
+
+Active node configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ set interface ip address GigabitEthernet0/0/1 10.15.7.101/24
+ set interface ip address GigabitEthernet0/0/0 172.16.10.101/24
+ set interface ip address GigabitEthernet0/0/3 10.0.0.1/24
+ set interface state GigabitEthernet0/0/0 up
+ set interface state GigabitEthernet0/0/1 up
+ set interface state GigabitEthernet0/0/3 up
+ set interface nat44 in GigabitEthernet0/0/0 out GigabitEthernet0/0/1
+ nat44 add address 10.15.7.100
+ nat ha listener 10.0.0.1:1234
+ nat ha failover 10.0.0.2:2345
+
+Passive node configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ set interface ip address GigabitEthernet0/0/1 10.15.7.102/24
+ set interface ip address GigabitEthernet0/0/0 172.16.10.102/24
+ set interface ip address GigabitEthernet0/0/3 10.0.0.2/24
+ set interface state GigabitEthernet0/0/0 up
+ set interface state GigabitEthernet0/0/1 up
+ set interface state GigabitEthernet0/0/3 up
+ set interface nat44 in GigabitEthernet0/0/0 out GigabitEthernet0/0/1
+ nat44 add address 10.15.7.100
+ nat ha listener 10.0.0.2:2345
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_hairpinning.c b/src/plugins/nat/nat44-ei/nat44_ei_hairpinning.c
deleted file mode 100644
index a049e4659a7..00000000000
--- a/src/plugins/nat/nat44-ei/nat44_ei_hairpinning.c
+++ /dev/null
@@ -1,756 +0,0 @@
-/*
- * nat44_ei.c - nat44 endpoint dependent plugin
- * * Copyright (c) 2020 Cisco and/or its affiliates. * Licensed under the
- * Apache License, Version 2.0 (the "License"); you may not use this file
- * except in compliance with the License. You may obtain a copy of the License
- * at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vnet/fib/ip4_fib.h>
-
-#include <nat/nat44-ei/nat44_ei.h>
-#include <nat/nat44-ei/nat44_ei_inlines.h>
-#include <nat/nat44-ei/nat44_ei_hairpinning.h>
-
-/* NAT buffer flags */
-#define NAT44_EI_FLAG_HAIRPINNING (1 << 0)
-
-typedef enum
-{
- NAT44_EI_HAIRPIN_SRC_NEXT_DROP,
- NAT44_EI_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
- NAT44_EI_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
- NAT44_EI_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
- NAT44_EI_HAIRPIN_SRC_N_NEXT,
-} nat44_ei_hairpin_src_next_t;
-
-typedef enum
-{
- NAT44_EI_HAIRPIN_NEXT_LOOKUP,
- NAT44_EI_HAIRPIN_NEXT_DROP,
- NAT44_EI_HAIRPIN_NEXT_HANDOFF,
- NAT44_EI_HAIRPIN_N_NEXT,
-} nat44_ei_hairpin_next_t;
-
-typedef struct
-{
- ip4_address_t addr;
- u16 port;
- u32 fib_index;
- u32 session_index;
-} nat44_ei_hairpin_trace_t;
-
-static u8 *
-format_nat44_ei_hairpin_trace (u8 *s, va_list *args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- nat44_ei_hairpin_trace_t *t = va_arg (*args, nat44_ei_hairpin_trace_t *);
-
- s = format (s, "new dst addr %U port %u fib-index %u", format_ip4_address,
- &t->addr, clib_net_to_host_u16 (t->port), t->fib_index);
- if (~0 == t->session_index)
- {
- s = format (s, " is-static-mapping");
- }
- else
- {
- s = format (s, " session-index %u", t->session_index);
- }
-
- return s;
-}
-
-extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
-
-static_always_inline int
-nat44_ei_is_hairpinning (nat44_ei_main_t *nm, ip4_address_t *dst_addr)
-{
- nat44_ei_address_t *ap;
- clib_bihash_kv_8_8_t kv, value;
-
- vec_foreach (ap, nm->addresses)
- {
- if (ap->addr.as_u32 == dst_addr->as_u32)
- return 1;
- }
-
- init_nat_k (&kv, *dst_addr, 0, 0, 0);
- if (!clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
- return 1;
-
- return 0;
-}
-
-#ifndef CLIB_MARCH_VARIANT
-void
-nat44_ei_hairpinning_sm_unknown_proto (nat44_ei_main_t *nm, vlib_buffer_t *b,
- ip4_header_t *ip)
-{
- clib_bihash_kv_8_8_t kv, value;
- nat44_ei_static_mapping_t *m;
- u32 old_addr, new_addr;
- ip_csum_t sum;
-
- init_nat_k (&kv, ip->dst_address, 0, 0, 0);
- if (clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
- return;
-
- m = pool_elt_at_index (nm->static_mappings, value.value);
-
- old_addr = ip->dst_address.as_u32;
- new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
- sum = ip->checksum;
- sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
- ip->checksum = ip_csum_fold (sum);
-
- if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
- vnet_buffer (b)->sw_if_index[VLIB_TX] = m->fib_index;
-}
-#endif
-
-#ifndef CLIB_MARCH_VARIANT
-int
-nat44_ei_hairpinning (vlib_main_t *vm, vlib_node_runtime_t *node,
- nat44_ei_main_t *nm, u32 thread_index, vlib_buffer_t *b0,
- ip4_header_t *ip0, udp_header_t *udp0,
- tcp_header_t *tcp0, u32 proto0, int do_trace,
- u32 *required_thread_index)
-{
- nat44_ei_session_t *s0 = NULL;
- clib_bihash_kv_8_8_t kv0, value0;
- ip_csum_t sum0;
- u32 new_dst_addr0 = 0, old_dst_addr0, si = ~0;
- u16 new_dst_port0 = ~0, old_dst_port0;
- int rv;
- ip4_address_t sm0_addr;
- u16 sm0_port;
- u32 sm0_fib_index;
- u32 old_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
-
- /* Check if destination is static mappings */
- if (!nat44_ei_static_mapping_match (
- ip0->dst_address, udp0->dst_port, nm->outside_fib_index, proto0,
- &sm0_addr, &sm0_port, &sm0_fib_index, 1 /* by external */, 0, 0))
- {
- new_dst_addr0 = sm0_addr.as_u32;
- new_dst_port0 = sm0_port;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = sm0_fib_index;
- }
- /* or active session */
- else
- {
- init_nat_k (&kv0, ip0->dst_address, udp0->dst_port,
- nm->outside_fib_index, proto0);
- rv = clib_bihash_search_8_8 (&nm->out2in, &kv0, &value0);
- if (rv)
- {
- rv = 0;
- goto trace;
- }
-
- if (thread_index != nat_value_get_thread_index (&value0))
- {
- *required_thread_index = nat_value_get_thread_index (&value0);
- return 0;
- }
-
- si = nat_value_get_session_index (&value0);
- s0 = pool_elt_at_index (nm->per_thread_data[thread_index].sessions, si);
- new_dst_addr0 = s0->in2out.addr.as_u32;
- new_dst_port0 = s0->in2out.port;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
- }
-
- /* Check if anything has changed and if not, then return 0. This
- helps avoid infinite loop, repeating the three nodes
- nat44-hairpinning-->ip4-lookup-->ip4-local, in case nothing has
- changed. */
- old_dst_addr0 = ip0->dst_address.as_u32;
- old_dst_port0 = tcp0->dst;
- if (new_dst_addr0 == old_dst_addr0 && new_dst_port0 == old_dst_port0 &&
- vnet_buffer (b0)->sw_if_index[VLIB_TX] == old_sw_if_index)
- return 0;
-
- /* Destination is behind the same NAT, use internal address and port */
- if (new_dst_addr0)
- {
- old_dst_addr0 = ip0->dst_address.as_u32;
- ip0->dst_address.as_u32 = new_dst_addr0;
- sum0 = ip0->checksum;
- sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t,
- dst_address);
- ip0->checksum = ip_csum_fold (sum0);
-
- old_dst_port0 = tcp0->dst;
- if (PREDICT_TRUE (new_dst_port0 != old_dst_port0))
- {
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
- {
- tcp0->dst = new_dst_port0;
- sum0 = tcp0->checksum;
- sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
- ip4_header_t, dst_address);
- sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
- ip4_header_t /* cheat */, length);
- tcp0->checksum = ip_csum_fold (sum0);
- }
- else
- {
- udp0->dst_port = new_dst_port0;
- udp0->checksum = 0;
- }
- }
- else
- {
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
- {
- sum0 = tcp0->checksum;
- sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
- ip4_header_t, dst_address);
- tcp0->checksum = ip_csum_fold (sum0);
- }
- }
- rv = 1;
- goto trace;
- }
- rv = 0;
-trace:
- if (do_trace && PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
- (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- nat44_ei_hairpin_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
- t->addr.as_u32 = new_dst_addr0;
- t->port = new_dst_port0;
- t->fib_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
- if (s0)
- {
- t->session_index = si;
- }
- else
- {
- t->session_index = ~0;
- }
- }
- return rv;
-}
-#endif
-
-#ifndef CLIB_MARCH_VARIANT
-u32
-nat44_ei_icmp_hairpinning (nat44_ei_main_t *nm, vlib_buffer_t *b0,
- u32 thread_index, ip4_header_t *ip0,
- icmp46_header_t *icmp0, u32 *required_thread_index)
-{
- clib_bihash_kv_8_8_t kv0, value0;
- u32 old_dst_addr0, new_dst_addr0;
- u32 old_addr0, new_addr0;
- u16 old_port0, new_port0;
- u16 old_checksum0, new_checksum0;
- u32 si, ti = 0;
- ip_csum_t sum0;
- nat44_ei_session_t *s0;
- nat44_ei_static_mapping_t *m0;
-
- if (icmp_type_is_error_message (
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
- {
- ip4_header_t *inner_ip0 = 0;
- tcp_udp_header_t *l4_header = 0;
-
- inner_ip0 = (ip4_header_t *) ((icmp_echo_header_t *) (icmp0 + 1) + 1);
- l4_header = ip4_next_header (inner_ip0);
- u32 protocol = ip_proto_to_nat_proto (inner_ip0->protocol);
-
- if (protocol != NAT_PROTOCOL_TCP && protocol != NAT_PROTOCOL_UDP)
- return 1;
-
- init_nat_k (&kv0, ip0->dst_address, l4_header->src_port,
- nm->outside_fib_index, protocol);
- if (clib_bihash_search_8_8 (&nm->out2in, &kv0, &value0))
- return 1;
- ti = nat_value_get_thread_index (&value0);
- if (ti != thread_index)
- {
- *required_thread_index = ti;
- return 1;
- }
- si = nat_value_get_session_index (&value0);
- s0 = pool_elt_at_index (nm->per_thread_data[ti].sessions, si);
- new_dst_addr0 = s0->in2out.addr.as_u32;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
-
- /* update inner source IP address */
- old_addr0 = inner_ip0->src_address.as_u32;
- inner_ip0->src_address.as_u32 = new_dst_addr0;
- new_addr0 = inner_ip0->src_address.as_u32;
- sum0 = icmp0->checksum;
- sum0 =
- ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address);
- icmp0->checksum = ip_csum_fold (sum0);
-
- /* update inner IP header checksum */
- old_checksum0 = inner_ip0->checksum;
- sum0 = inner_ip0->checksum;
- sum0 =
- ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address);
- inner_ip0->checksum = ip_csum_fold (sum0);
- new_checksum0 = inner_ip0->checksum;
- sum0 = icmp0->checksum;
- sum0 = ip_csum_update (sum0, old_checksum0, new_checksum0, ip4_header_t,
- checksum);
- icmp0->checksum = ip_csum_fold (sum0);
-
- /* update inner source port */
- old_port0 = l4_header->src_port;
- l4_header->src_port = s0->in2out.port;
- new_port0 = l4_header->src_port;
- sum0 = icmp0->checksum;
- sum0 = ip_csum_update (sum0, old_port0, new_port0, tcp_udp_header_t,
- src_port);
- icmp0->checksum = ip_csum_fold (sum0);
- }
- else
- {
- init_nat_k (&kv0, ip0->dst_address, 0, nm->outside_fib_index, 0);
- if (clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv0,
- &value0))
- {
- icmp_echo_header_t *echo0 = (icmp_echo_header_t *) (icmp0 + 1);
- u16 icmp_id0 = echo0->identifier;
- init_nat_k (&kv0, ip0->dst_address, icmp_id0, nm->outside_fib_index,
- NAT_PROTOCOL_ICMP);
- int rv = clib_bihash_search_8_8 (&nm->out2in, &kv0, &value0);
- if (!rv)
- {
- ti = nat_value_get_thread_index (&value0);
- if (ti != thread_index)
- {
- *required_thread_index = ti;
- return 1;
- }
- si = nat_value_get_session_index (&value0);
- s0 = pool_elt_at_index (nm->per_thread_data[ti].sessions, si);
- new_dst_addr0 = s0->in2out.addr.as_u32;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
- echo0->identifier = s0->in2out.port;
- sum0 = icmp0->checksum;
- sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
- icmp_echo_header_t, identifier);
- icmp0->checksum = ip_csum_fold (sum0);
- goto change_addr;
- }
-
- return 1;
- }
-
- m0 = pool_elt_at_index (nm->static_mappings, value0.value);
-
- new_dst_addr0 = m0->local_addr.as_u32;
- if (vnet_buffer (b0)->sw_if_index[VLIB_TX] == ~0)
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = m0->fib_index;
- }
-change_addr:
- /* Destination is behind the same NAT, use internal address and port */
- if (new_dst_addr0)
- {
- old_dst_addr0 = ip0->dst_address.as_u32;
- ip0->dst_address.as_u32 = new_dst_addr0;
- sum0 = ip0->checksum;
- sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t,
- dst_address);
- ip0->checksum = ip_csum_fold (sum0);
- }
- return 0;
-}
-#endif
-
-void nat44_ei_hairpinning_unknown_proto (nat44_ei_main_t *nm, vlib_buffer_t *b,
- ip4_header_t *ip);
-
-#ifndef CLIB_MARCH_VARIANT
-void
-nat44_ei_hairpinning_unknown_proto (nat44_ei_main_t *nm, vlib_buffer_t *b,
- ip4_header_t *ip)
-{
- clib_bihash_kv_8_8_t kv, value;
- nat44_ei_static_mapping_t *m;
- u32 old_addr, new_addr;
- ip_csum_t sum;
-
- init_nat_k (&kv, ip->dst_address, 0, 0, 0);
- if (clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
- return;
-
- m = pool_elt_at_index (nm->static_mappings, value.value);
-
- old_addr = ip->dst_address.as_u32;
- new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
- sum = ip->checksum;
- sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
- ip->checksum = ip_csum_fold (sum);
-
- if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
- vnet_buffer (b)->sw_if_index[VLIB_TX] = m->fib_index;
-}
-#endif
-
-VLIB_NODE_FN (nat44_ei_hairpin_src_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- u32 n_left_from, *from, *to_next;
- nat44_ei_hairpin_src_next_t next_index;
- nat44_ei_main_t *nm = &nat44_ei_main;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- u32 next0;
- nat44_ei_interface_t *i;
- u32 sw_if_index0;
-
- /* speculatively enqueue b0 to the current next frame */
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-
- pool_foreach (i, nm->output_feature_interfaces)
- {
- /* Only packets from NAT inside interface */
- if ((nat44_ei_interface_is_inside (i)) &&
- (sw_if_index0 == i->sw_if_index))
- {
- if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
- NAT44_EI_FLAG_HAIRPINNING))
- {
- if (PREDICT_TRUE (nm->num_workers > 1))
- {
- next0 = NAT44_EI_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
- goto skip_feature_next;
- }
- else
- {
- next0 = NAT44_EI_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
- goto skip_feature_next;
- }
- }
- break;
- }
- }
-
- vnet_feature_next (&next0, b0);
- skip_feature_next:
-
- if (next0 != NAT44_EI_HAIRPIN_SRC_NEXT_DROP)
- {
- vlib_increment_simple_counter (
- &nm->counters.hairpinning, vm->thread_index, sw_if_index0, 1);
- }
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (nat44_ei_hairpin_dst_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- u32 n_left_from, *from, *to_next;
- u32 thread_index = vm->thread_index;
- nat44_ei_hairpin_next_t next_index;
- nat44_ei_main_t *nm = &nat44_ei_main;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- u32 next0;
- ip4_header_t *ip0;
- u32 proto0;
- u32 sw_if_index0;
- u32 required_thread_index = thread_index;
-
- /* speculatively enqueue b0 to the current next frame */
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- next0 = NAT44_EI_HAIRPIN_NEXT_LOOKUP;
- ip0 = vlib_buffer_get_current (b0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-
- proto0 = ip_proto_to_nat_proto (ip0->protocol);
-
- vnet_buffer (b0)->snat.flags = 0;
- if (PREDICT_FALSE (nat44_ei_is_hairpinning (nm, &ip0->dst_address)))
- {
- if (proto0 == NAT_PROTOCOL_TCP || proto0 == NAT_PROTOCOL_UDP)
- {
- udp_header_t *udp0 = ip4_next_header (ip0);
- tcp_header_t *tcp0 = (tcp_header_t *) udp0;
-
- nat44_ei_hairpinning (vm, node, nm, thread_index, b0, ip0,
- udp0, tcp0, proto0, 1 /* do_trace */,
- &required_thread_index);
- }
- else if (proto0 == NAT_PROTOCOL_ICMP)
- {
- icmp46_header_t *icmp0 = ip4_next_header (ip0);
-
- nat44_ei_icmp_hairpinning (nm, b0, thread_index, ip0, icmp0,
- &required_thread_index);
- }
- else
- {
- nat44_ei_hairpinning_unknown_proto (nm, b0, ip0);
- }
-
- vnet_buffer (b0)->snat.flags = NAT44_EI_FLAG_HAIRPINNING;
- }
-
- if (thread_index != required_thread_index)
- {
- vnet_buffer (b0)->snat.required_thread_index =
- required_thread_index;
- next0 = NAT44_EI_HAIRPIN_NEXT_HANDOFF;
- }
-
- if (next0 != NAT44_EI_HAIRPIN_NEXT_DROP)
- {
- vlib_increment_simple_counter (
- &nm->counters.hairpinning, vm->thread_index, sw_if_index0, 1);
- }
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (nat44_ei_hairpinning_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- u32 n_left_from, *from, *to_next;
- u32 thread_index = vm->thread_index;
- nat44_ei_hairpin_next_t next_index;
- nat44_ei_main_t *nm = &nat44_ei_main;
- vnet_feature_main_t *fm = &feature_main;
- u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
- vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- u32 next0;
- ip4_header_t *ip0;
- u32 proto0;
- udp_header_t *udp0;
- tcp_header_t *tcp0;
- u32 sw_if_index0;
- u32 required_thread_index = thread_index;
-
- /* speculatively enqueue b0 to the current next frame */
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- ip0 = vlib_buffer_get_current (b0);
- udp0 = ip4_next_header (ip0);
- tcp0 = (tcp_header_t *) udp0;
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-
- proto0 = ip_proto_to_nat_proto (ip0->protocol);
- int next0_resolved = 0;
-
- if (nat44_ei_hairpinning (vm, node, nm, thread_index, b0, ip0, udp0,
- tcp0, proto0, 1 /* do_trace */,
- &required_thread_index))
- {
- next0 = NAT44_EI_HAIRPIN_NEXT_LOOKUP;
- next0_resolved = 1;
- }
-
- if (thread_index != required_thread_index)
- {
- vnet_buffer (b0)->snat.required_thread_index =
- required_thread_index;
- next0 = NAT44_EI_HAIRPIN_NEXT_HANDOFF;
- next0_resolved = 1;
- }
-
- if (!next0_resolved)
- vnet_get_config_data (&cm->config_main, &b0->current_config_index,
- &next0, 0);
-
- if (next0 != NAT44_EI_HAIRPIN_NEXT_DROP)
- {
- vlib_increment_simple_counter (
- &nm->counters.hairpinning, vm->thread_index, sw_if_index0, 1);
- }
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (nat44_ei_hairpinning_dst_handoff_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_hairpinning_handoff_fn_inline (
- vm, node, frame, nat44_ei_main.hairpin_dst_fq_index);
-}
-
-VLIB_NODE_FN (nat44_ei_hairpinning_handoff_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_hairpinning_handoff_fn_inline (
- vm, node, frame, nat44_ei_main.hairpinning_fq_index);
-}
-
-VLIB_REGISTER_NODE (nat44_ei_hairpinning_dst_handoff_node) = {
- .name = "nat44-ei-hairpin-dst-handoff",
- .vector_size = sizeof (u32),
- .n_errors = ARRAY_LEN(nat44_ei_hairpinning_handoff_error_strings),
- .error_strings = nat44_ei_hairpinning_handoff_error_strings,
- .format_trace = format_nat44_ei_hairpinning_handoff_trace,
-
- .n_next_nodes = 1,
-
- .next_nodes = {
- [0] = "error-drop",
- },
-};
-
-VLIB_REGISTER_NODE (nat44_ei_hairpinning_handoff_node) = {
- .name = "nat44-ei-hairpinning-handoff",
- .vector_size = sizeof (u32),
- .n_errors = ARRAY_LEN(nat44_ei_hairpinning_handoff_error_strings),
- .error_strings = nat44_ei_hairpinning_handoff_error_strings,
- .format_trace = format_nat44_ei_hairpinning_handoff_trace,
-
- .n_next_nodes = 1,
-
- .next_nodes = {
- [0] = "error-drop",
- },
-};
-
-VLIB_REGISTER_NODE (nat44_ei_hairpin_src_node) = {
- .name = "nat44-ei-hairpin-src",
- .vector_size = sizeof (u32),
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_next_nodes = NAT44_EI_HAIRPIN_SRC_N_NEXT,
- .next_nodes = {
- [NAT44_EI_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
- [NAT44_EI_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-ei-in2out-output",
- [NAT44_EI_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
- [NAT44_EI_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-ei-in2out-output-worker-handoff",
- },
-};
-
-VLIB_REGISTER_NODE (nat44_ei_hairpin_dst_node) = {
- .name = "nat44-ei-hairpin-dst",
- .vector_size = sizeof (u32),
- .type = VLIB_NODE_TYPE_INTERNAL,
- .format_trace = format_nat44_ei_hairpin_trace,
- .n_next_nodes = NAT44_EI_HAIRPIN_N_NEXT,
- .next_nodes = {
- [NAT44_EI_HAIRPIN_NEXT_DROP] = "error-drop",
- [NAT44_EI_HAIRPIN_NEXT_LOOKUP] = "ip4-lookup",
- [NAT44_EI_HAIRPIN_NEXT_HANDOFF] = "nat44-ei-hairpin-dst-handoff",
- },
-};
-
-VLIB_REGISTER_NODE (nat44_ei_hairpinning_node) = {
- .name = "nat44-ei-hairpinning",
- .vector_size = sizeof (u32),
- .type = VLIB_NODE_TYPE_INTERNAL,
- .format_trace = format_nat44_ei_hairpin_trace,
- .n_next_nodes = NAT44_EI_HAIRPIN_N_NEXT,
- .next_nodes = {
- [NAT44_EI_HAIRPIN_NEXT_DROP] = "error-drop",
- [NAT44_EI_HAIRPIN_NEXT_LOOKUP] = "ip4-lookup",
- [NAT44_EI_HAIRPIN_NEXT_HANDOFF] = "nat44-ei-hairpinning-handoff",
- },
-};
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_hairpinning.h b/src/plugins/nat/nat44-ei/nat44_ei_hairpinning.h
deleted file mode 100644
index 908e6b2cfc9..00000000000
--- a/src/plugins/nat/nat44-ei/nat44_ei_hairpinning.h
+++ /dev/null
@@ -1,92 +0,0 @@
-#ifndef __included_nat44_ei_hairpinning_h__
-#define __included_nat44_ei_hairpinning_h__
-
-#include <nat/nat44-ei/nat44_ei.h>
-
-#define foreach_nat44_ei_hairpinning_handoff_error \
- _ (CONGESTION_DROP, "congestion drop")
-
-typedef enum
-{
-#define _(sym, str) NAT44_EI_HAIRPINNING_HANDOFF_ERROR_##sym,
- foreach_nat44_ei_hairpinning_handoff_error
-#undef _
- NAT44_EI_HAIRPINNING_HANDOFF_N_ERROR,
-} nat44_ei_hairpinning_handoff_error_t;
-
-static char *nat44_ei_hairpinning_handoff_error_strings[] = {
-#define _(sym, string) string,
- foreach_nat44_ei_hairpinning_handoff_error
-#undef _
-};
-
-typedef struct
-{
- u32 next_worker_index;
-} nat44_ei_hairpinning_handoff_trace_t;
-
-static u8 *
-format_nat44_ei_hairpinning_handoff_trace (u8 *s, va_list *args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- nat44_ei_hairpinning_handoff_trace_t *t =
- va_arg (*args, nat44_ei_hairpinning_handoff_trace_t *);
-
- s = format (s, "nat44-ei-hairpinning-handoff: next-worker %d",
- t->next_worker_index);
-
- return s;
-}
-
-always_inline uword
-nat44_ei_hairpinning_handoff_fn_inline (vlib_main_t *vm,
- vlib_node_runtime_t *node,
- vlib_frame_t *frame, u32 fq_index)
-{
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u32 n_enq, n_left_from, *from;
- u16 thread_indices[VLIB_FRAME_SIZE], *ti;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- b = bufs;
- ti = thread_indices;
-
- while (n_left_from > 0)
- {
- ti[0] = vnet_buffer (b[0])->snat.required_thread_index;
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
- (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
- {
- nat44_ei_hairpinning_handoff_trace_t *t =
- vlib_add_trace (vm, node, b[0], sizeof (*t));
- t->next_worker_index = ti[0];
- }
-
- n_left_from -= 1;
- ti += 1;
- b += 1;
- }
- n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
- thread_indices, frame->n_vectors, 1);
-
- if (n_enq < frame->n_vectors)
- vlib_node_increment_counter (
- vm, node->node_index, NAT44_EI_HAIRPINNING_HANDOFF_ERROR_CONGESTION_DROP,
- frame->n_vectors - n_enq);
- return frame->n_vectors;
-}
-
-#endif // __included_nat44_ei_hairpinning_h__
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_handoff.c b/src/plugins/nat/nat44-ei/nat44_ei_handoff.c
index c7a1317026b..f1821d7721f 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_handoff.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_handoff.c
@@ -15,7 +15,6 @@
#include <vlib/vlib.h>
#include <vnet/vnet.h>
-#include <vnet/handoff.h>
#include <vnet/fib/ip4_fib.h>
#include <vppinfra/error.h>
@@ -83,8 +82,6 @@ nat44_ei_worker_handoff_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_get_buffers (vm, from, b, n_left_from);
- // TODO: move to nm
- // TODO: remove callbacks and use inlines that should be moved here
if (is_in2out)
{
fq_index = is_output ? nm->fq_in2out_output_index : nm->fq_in2out_index;
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c
index 7ac1a92a61b..01b333a5234 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c
@@ -34,50 +34,8 @@
#include <nat/lib/nat_inlines.h>
#include <nat/nat44-ei/nat44_ei_inlines.h>
#include <nat/nat44-ei/nat44_ei.h>
-#include <nat/nat44-ei/nat44_ei_hairpinning.h>
-typedef struct
-{
- u32 sw_if_index;
- u32 next_index;
- u32 session_index;
- u32 is_slow_path;
- u32 is_hairpinning;
-} nat44_ei_in2out_trace_t;
-
-/* packet trace format function */
-static u8 *
-format_nat44_ei_in2out_trace (u8 *s, va_list *args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- nat44_ei_in2out_trace_t *t = va_arg (*args, nat44_ei_in2out_trace_t *);
- char *tag;
-
- tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
-
- s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
- t->sw_if_index, t->next_index, t->session_index);
- if (t->is_hairpinning)
- {
- s = format (s, ", with-hairpinning");
- }
-
- return s;
-}
-
-static u8 *
-format_nat44_ei_in2out_fast_trace (u8 *s, va_list *args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- nat44_ei_in2out_trace_t *t = va_arg (*args, nat44_ei_in2out_trace_t *);
-
- s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
- t->sw_if_index, t->next_index);
-
- return s;
-}
+extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
#define foreach_nat44_ei_in2out_error \
_ (UNSUPPORTED_PROTOCOL, "unsupported protocol") \
@@ -88,6 +46,9 @@ format_nat44_ei_in2out_fast_trace (u8 *s, va_list *args)
_ (MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \
_ (CANNOT_CREATE_USER, "cannot create NAT user")
+#define foreach_nat44_ei_hairpinning_handoff_error \
+ _ (CONGESTION_DROP, "congestion drop")
+
typedef enum
{
#define _(sym, str) NAT44_EI_IN2OUT_ERROR_##sym,
@@ -104,6 +65,20 @@ static char *nat44_ei_in2out_error_strings[] = {
typedef enum
{
+#define _(sym, str) NAT44_EI_HAIRPINNING_HANDOFF_ERROR_##sym,
+ foreach_nat44_ei_hairpinning_handoff_error
+#undef _
+ NAT44_EI_HAIRPINNING_HANDOFF_N_ERROR,
+} nat44_ei_hairpinning_handoff_error_t;
+
+static char *nat44_ei_hairpinning_handoff_error_strings[] = {
+#define _(sym, string) string,
+ foreach_nat44_ei_hairpinning_handoff_error
+#undef _
+};
+
+typedef enum
+{
NAT44_EI_IN2OUT_NEXT_LOOKUP,
NAT44_EI_IN2OUT_NEXT_DROP,
NAT44_EI_IN2OUT_NEXT_ICMP_ERROR,
@@ -119,7 +94,98 @@ typedef enum
NAT44_EI_IN2OUT_HAIRPINNING_FINISH_N_NEXT,
} nat44_ei_in2out_hairpinnig_finish_next_t;
-static inline int
+typedef enum
+{
+ NAT44_EI_HAIRPIN_NEXT_LOOKUP,
+ NAT44_EI_HAIRPIN_NEXT_DROP,
+ NAT44_EI_HAIRPIN_NEXT_HANDOFF,
+ NAT44_EI_HAIRPIN_N_NEXT,
+} nat44_ei_hairpin_next_t;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 next_index;
+ u32 session_index;
+ u32 is_slow_path;
+ u32 is_hairpinning;
+} nat44_ei_in2out_trace_t;
+
+typedef struct
+{
+ ip4_address_t addr;
+ u16 port;
+ u32 fib_index;
+ u32 session_index;
+} nat44_ei_hairpin_trace_t;
+
+typedef struct
+{
+ u32 next_worker_index;
+} nat44_ei_hairpinning_handoff_trace_t;
+
+static u8 *
+format_nat44_ei_in2out_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ nat44_ei_in2out_trace_t *t = va_arg (*args, nat44_ei_in2out_trace_t *);
+ char *tag;
+ tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
+ s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
+ t->sw_if_index, t->next_index, t->session_index);
+ if (t->is_hairpinning)
+ s = format (s, ", with-hairpinning");
+ return s;
+}
+
+static u8 *
+format_nat44_ei_in2out_fast_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ nat44_ei_in2out_trace_t *t = va_arg (*args, nat44_ei_in2out_trace_t *);
+ s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
+ t->sw_if_index, t->next_index);
+ return s;
+}
+
+static u8 *
+format_nat44_ei_hairpin_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ nat44_ei_hairpin_trace_t *t = va_arg (*args, nat44_ei_hairpin_trace_t *);
+
+ s = format (s, "new dst addr %U port %u fib-index %u", format_ip4_address,
+ &t->addr, clib_net_to_host_u16 (t->port), t->fib_index);
+ if (~0 == t->session_index)
+ {
+ s = format (s, " is-static-mapping");
+ }
+ else
+ {
+ s = format (s, " session-index %u", t->session_index);
+ }
+
+ return s;
+}
+
+static u8 *
+format_nat44_ei_hairpinning_handoff_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ nat44_ei_hairpinning_handoff_trace_t *t =
+ va_arg (*args, nat44_ei_hairpinning_handoff_trace_t *);
+
+ s = format (s, "nat44-ei-hairpinning-handoff: next-worker %d",
+ t->next_worker_index);
+
+ return s;
+}
+
+static_always_inline int
nat44_ei_not_translate_fast (vlib_node_runtime_t *node, u32 sw_if_index0,
ip4_header_t *ip0, u32 proto0, u32 rx_fib_index0)
{
@@ -177,7 +243,7 @@ nat44_ei_not_translate_fast (vlib_node_runtime_t *node, u32 sw_if_index0,
return 1;
}
-static inline int
+static_always_inline int
nat44_ei_not_translate (nat44_ei_main_t *nm, vlib_node_runtime_t *node,
u32 sw_if_index0, ip4_header_t *ip0, u32 proto0,
u32 rx_fib_index0, u32 thread_index)
@@ -212,7 +278,7 @@ nat44_ei_not_translate (nat44_ei_main_t *nm, vlib_node_runtime_t *node,
rx_fib_index0);
}
-static inline int
+static_always_inline int
nat44_ei_not_translate_output_feature (nat44_ei_main_t *nm, ip4_header_t *ip0,
u32 proto0, u16 src_port, u16 dst_port,
u32 thread_index, u32 sw_if_index)
@@ -271,13 +337,10 @@ nat44_i2o_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg)
if (clib_bihash_add_del_8_8 (&nm->out2in, &s_kv, 0))
nat_elog_warn (nm, "out2in key del failed");
- nat_ipfix_logging_nat44_ses_delete (ctx->thread_index,
- s->in2out.addr.as_u32,
- s->out2in.addr.as_u32,
- s->nat_proto,
- s->in2out.port,
- s->out2in.port,
- s->in2out.fib_index);
+ nat_ipfix_logging_nat44_ses_delete (
+ ctx->thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32,
+ nat_proto_to_ip_proto (s->nat_proto), s->in2out.port, s->out2in.port,
+ s->in2out.fib_index);
nat_syslog_nat44_apmdel (s->user_index, s->in2out.fib_index,
&s->in2out.addr, s->in2out.port,
@@ -430,8 +493,9 @@ slow_path (nat44_ei_main_t *nm, vlib_buffer_t *b0, ip4_header_t *ip0,
/* log NAT event */
nat_ipfix_logging_nat44_ses_create (
- thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->nat_proto,
- s->in2out.port, s->out2in.port, s->in2out.fib_index);
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32,
+ nat_proto_to_ip_proto (s->nat_proto), s->in2out.port, s->out2in.port,
+ s->in2out.fib_index);
nat_syslog_nat44_apmadd (s->user_index, s->in2out.fib_index, &s->in2out.addr,
s->in2out.port, &s->out2in.addr, s->out2in.port,
@@ -445,7 +509,6 @@ slow_path (nat44_ei_main_t *nm, vlib_buffer_t *b0, ip4_header_t *ip0,
return next0;
}
-#ifndef CLIB_MARCH_VARIANT
static_always_inline nat44_ei_in2out_error_t
icmp_get_key (vlib_buffer_t *b, ip4_header_t *ip0, ip4_address_t *addr,
u16 *port, nat_protocol_t *nat_proto)
@@ -490,22 +553,7 @@ icmp_get_key (vlib_buffer_t *b, ip4_header_t *ip0, ip4_address_t *addr,
return -1; /* success */
}
-/**
- * Get address and port values to be used for ICMP packet translation
- * and create session if needed
- *
- * @param[in,out] nm NAT main
- * @param[in,out] node NAT node runtime
- * @param[in] thread_index thread index
- * @param[in,out] b0 buffer containing packet to be translated
- * @param[in,out] ip0 ip header
- * @param[out] p_proto protocol used for matching
- * @param[out] p_value address and port after NAT translation
- * @param[out] p_dont_translate if packet should not be translated
- * @param d optional parameter
- * @param e optional parameter
- */
-u32
+static_always_inline u32
nat44_ei_icmp_match_in2out_slow (vlib_node_runtime_t *node, u32 thread_index,
vlib_buffer_t *b0, ip4_header_t *ip0,
ip4_address_t *addr, u16 *port,
@@ -607,10 +655,8 @@ out:
*p_s0 = s0;
return next0;
}
-#endif
-#ifndef CLIB_MARCH_VARIANT
-u32
+static_always_inline u32
nat44_ei_icmp_match_in2out_fast (vlib_node_runtime_t *node, u32 thread_index,
vlib_buffer_t *b0, ip4_header_t *ip0,
ip4_address_t *addr, u16 *port,
@@ -676,16 +722,135 @@ nat44_ei_icmp_match_in2out_fast (vlib_node_runtime_t *node, u32 thread_index,
out:
return next0;
}
-#endif
-u32 nat44_ei_icmp_in2out (vlib_buffer_t *b0, ip4_header_t *ip0,
- icmp46_header_t *icmp0, u32 sw_if_index0,
- u32 rx_fib_index0, vlib_node_runtime_t *node,
- u32 next0, u32 thread_index,
- nat44_ei_session_t **p_s0);
+static_always_inline u32
+nat44_ei_icmp_hairpinning (nat44_ei_main_t *nm, vlib_buffer_t *b0,
+ u32 thread_index, ip4_header_t *ip0,
+ icmp46_header_t *icmp0, u32 *required_thread_index)
+{
+ clib_bihash_kv_8_8_t kv0, value0;
+ u32 old_dst_addr0, new_dst_addr0;
+ u32 old_addr0, new_addr0;
+ u16 old_port0, new_port0;
+ u16 old_checksum0, new_checksum0;
+ u32 si, ti = 0;
+ ip_csum_t sum0;
+ nat44_ei_session_t *s0;
+ nat44_ei_static_mapping_t *m0;
-#ifndef CLIB_MARCH_VARIANT
-u32
+ if (icmp_type_is_error_message (
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
+ {
+ ip4_header_t *inner_ip0 = 0;
+ tcp_udp_header_t *l4_header = 0;
+
+ inner_ip0 = (ip4_header_t *) ((icmp_echo_header_t *) (icmp0 + 1) + 1);
+ l4_header = ip4_next_header (inner_ip0);
+ u32 protocol = ip_proto_to_nat_proto (inner_ip0->protocol);
+
+ if (protocol != NAT_PROTOCOL_TCP && protocol != NAT_PROTOCOL_UDP)
+ return 1;
+
+ init_nat_k (&kv0, ip0->dst_address, l4_header->src_port,
+ nm->outside_fib_index, protocol);
+ if (clib_bihash_search_8_8 (&nm->out2in, &kv0, &value0))
+ return 1;
+ ti = nat_value_get_thread_index (&value0);
+ if (ti != thread_index)
+ {
+ *required_thread_index = ti;
+ return 1;
+ }
+ si = nat_value_get_session_index (&value0);
+ s0 = pool_elt_at_index (nm->per_thread_data[ti].sessions, si);
+ new_dst_addr0 = s0->in2out.addr.as_u32;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+
+ /* update inner source IP address */
+ old_addr0 = inner_ip0->src_address.as_u32;
+ inner_ip0->src_address.as_u32 = new_dst_addr0;
+ new_addr0 = inner_ip0->src_address.as_u32;
+ sum0 = icmp0->checksum;
+ sum0 =
+ ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address);
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ /* update inner IP header checksum */
+ old_checksum0 = inner_ip0->checksum;
+ sum0 = inner_ip0->checksum;
+ sum0 =
+ ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address);
+ inner_ip0->checksum = ip_csum_fold (sum0);
+ new_checksum0 = inner_ip0->checksum;
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_checksum0, new_checksum0, ip4_header_t,
+ checksum);
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ /* update inner source port */
+ old_port0 = l4_header->src_port;
+ l4_header->src_port = s0->in2out.port;
+ new_port0 = l4_header->src_port;
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_port0, new_port0, tcp_udp_header_t,
+ src_port);
+ icmp0->checksum = ip_csum_fold (sum0);
+ }
+ else
+ {
+ init_nat_k (&kv0, ip0->dst_address, 0, nm->outside_fib_index, 0);
+ if (clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv0,
+ &value0))
+ {
+ icmp_echo_header_t *echo0 = (icmp_echo_header_t *) (icmp0 + 1);
+ u16 icmp_id0 = echo0->identifier;
+ init_nat_k (&kv0, ip0->dst_address, icmp_id0, nm->outside_fib_index,
+ NAT_PROTOCOL_ICMP);
+ int rv = clib_bihash_search_8_8 (&nm->out2in, &kv0, &value0);
+ if (!rv)
+ {
+ ti = nat_value_get_thread_index (&value0);
+ if (ti != thread_index)
+ {
+ *required_thread_index = ti;
+ return 1;
+ }
+ si = nat_value_get_session_index (&value0);
+ s0 = pool_elt_at_index (nm->per_thread_data[ti].sessions, si);
+ new_dst_addr0 = s0->in2out.addr.as_u32;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+ echo0->identifier = s0->in2out.port;
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
+ icmp_echo_header_t, identifier);
+ icmp0->checksum = ip_csum_fold (sum0);
+ goto change_addr;
+ }
+
+ return 1;
+ }
+
+ m0 = pool_elt_at_index (nm->static_mappings, value0.value);
+
+ new_dst_addr0 = m0->local_addr.as_u32;
+ if (vnet_buffer (b0)->sw_if_index[VLIB_TX] == ~0)
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = m0->fib_index;
+ }
+change_addr:
+ /* Destination is behind the same NAT, use internal address and port */
+ if (new_dst_addr0)
+ {
+ old_dst_addr0 = ip0->dst_address.as_u32;
+ ip0->dst_address.as_u32 = new_dst_addr0;
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t,
+ dst_address);
+ ip0->checksum = ip_csum_fold (sum0);
+ }
+ return 0;
+}
+
+static_always_inline u32
nat44_ei_icmp_in2out (vlib_buffer_t *b0, ip4_header_t *ip0,
icmp46_header_t *icmp0, u32 sw_if_index0,
u32 rx_fib_index0, vlib_node_runtime_t *node, u32 next0,
@@ -856,7 +1021,6 @@ nat44_ei_icmp_in2out (vlib_buffer_t *b0, ip4_header_t *ip0,
out:
return next0;
}
-#endif
static_always_inline u32
nat44_ei_icmp_in2out_slow_path (nat44_ei_main_t *nm, vlib_buffer_t *b0,
@@ -881,6 +1045,31 @@ nat44_ei_icmp_in2out_slow_path (nat44_ei_main_t *nm, vlib_buffer_t *b0,
return next0;
}
+static_always_inline void
+nat44_ei_hairpinning_sm_unknown_proto (nat44_ei_main_t *nm, vlib_buffer_t *b,
+ ip4_header_t *ip)
+{
+ clib_bihash_kv_8_8_t kv, value;
+ nat44_ei_static_mapping_t *m;
+ u32 old_addr, new_addr;
+ ip_csum_t sum;
+
+ init_nat_k (&kv, ip->dst_address, 0, 0, 0);
+ if (clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
+ return;
+
+ m = pool_elt_at_index (nm->static_mappings, value.value);
+
+ old_addr = ip->dst_address.as_u32;
+ new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
+ sum = ip->checksum;
+ sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
+ ip->checksum = ip_csum_fold (sum);
+
+ if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = m->fib_index;
+}
+
static int
nat_in2out_sm_unknown_proto (nat44_ei_main_t *nm, vlib_buffer_t *b,
ip4_header_t *ip, u32 rx_fib_index)
@@ -913,7 +1102,174 @@ nat_in2out_sm_unknown_proto (nat44_ei_main_t *nm, vlib_buffer_t *b,
return 0;
}
-static inline uword
+static_always_inline int
+nat44_ei_hairpinning (vlib_main_t *vm, vlib_node_runtime_t *node,
+ nat44_ei_main_t *nm, u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, udp_header_t *udp0,
+ tcp_header_t *tcp0, u32 proto0, int do_trace,
+ u32 *required_thread_index)
+{
+ nat44_ei_session_t *s0 = NULL;
+ clib_bihash_kv_8_8_t kv0, value0;
+ ip_csum_t sum0;
+ u32 new_dst_addr0 = 0, old_dst_addr0, si = ~0;
+ u16 new_dst_port0 = ~0, old_dst_port0;
+ int rv;
+ ip4_address_t sm0_addr;
+ u16 sm0_port;
+ u32 sm0_fib_index;
+ u32 old_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ /* Check if destination is static mappings */
+ if (!nat44_ei_static_mapping_match (
+ ip0->dst_address, udp0->dst_port, nm->outside_fib_index, proto0,
+ &sm0_addr, &sm0_port, &sm0_fib_index, 1 /* by external */, 0, 0))
+ {
+ new_dst_addr0 = sm0_addr.as_u32;
+ new_dst_port0 = sm0_port;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = sm0_fib_index;
+ }
+ /* or active session */
+ else
+ {
+ init_nat_k (&kv0, ip0->dst_address, udp0->dst_port,
+ nm->outside_fib_index, proto0);
+ rv = clib_bihash_search_8_8 (&nm->out2in, &kv0, &value0);
+ if (rv)
+ {
+ rv = 0;
+ goto trace;
+ }
+
+ if (thread_index != nat_value_get_thread_index (&value0))
+ {
+ *required_thread_index = nat_value_get_thread_index (&value0);
+ return 0;
+ }
+
+ si = nat_value_get_session_index (&value0);
+ s0 = pool_elt_at_index (nm->per_thread_data[thread_index].sessions, si);
+ new_dst_addr0 = s0->in2out.addr.as_u32;
+ new_dst_port0 = s0->in2out.port;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+ }
+
+ /* Check if anything has changed and if not, then return 0. This
+ helps avoid infinite loop, repeating the three nodes
+ nat44-hairpinning-->ip4-lookup-->ip4-local, in case nothing has
+ changed. */
+ old_dst_addr0 = ip0->dst_address.as_u32;
+ old_dst_port0 = tcp0->dst;
+ if (new_dst_addr0 == old_dst_addr0 && new_dst_port0 == old_dst_port0 &&
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] == old_sw_if_index)
+ return 0;
+
+ /* Destination is behind the same NAT, use internal address and port */
+ if (new_dst_addr0)
+ {
+ old_dst_addr0 = ip0->dst_address.as_u32;
+ ip0->dst_address.as_u32 = new_dst_addr0;
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t,
+ dst_address);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ old_dst_port0 = tcp0->dst;
+ if (PREDICT_TRUE (new_dst_port0 != old_dst_port0))
+ {
+ if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
+ {
+ tcp0->dst = new_dst_port0;
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+ ip4_header_t, dst_address);
+ sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
+ ip4_header_t /* cheat */, length);
+ tcp0->checksum = ip_csum_fold (sum0);
+ }
+ else
+ {
+ udp0->dst_port = new_dst_port0;
+ udp0->checksum = 0;
+ }
+ }
+ else
+ {
+ if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
+ {
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+ ip4_header_t, dst_address);
+ tcp0->checksum = ip_csum_fold (sum0);
+ }
+ }
+ rv = 1;
+ goto trace;
+ }
+ rv = 0;
+trace:
+ if (do_trace && PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ nat44_ei_hairpin_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->addr.as_u32 = new_dst_addr0;
+ t->port = new_dst_port0;
+ t->fib_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ if (s0)
+ {
+ t->session_index = si;
+ }
+ else
+ {
+ t->session_index = ~0;
+ }
+ }
+ return rv;
+}
+
+static_always_inline uword
+nat44_ei_hairpinning_handoff_fn_inline (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame, u32 fq_index)
+{
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u32 n_enq, n_left_from, *from;
+ u16 thread_indices[VLIB_FRAME_SIZE], *ti;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+
+ b = bufs;
+ ti = thread_indices;
+
+ while (n_left_from > 0)
+ {
+ ti[0] = vnet_buffer (b[0])->snat.required_thread_index;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ nat44_ei_hairpinning_handoff_trace_t *t =
+ vlib_add_trace (vm, node, b[0], sizeof (*t));
+ t->next_worker_index = ti[0];
+ }
+
+ n_left_from -= 1;
+ ti += 1;
+ b += 1;
+ }
+ n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
+ thread_indices, frame->n_vectors, 1);
+
+ if (n_enq < frame->n_vectors)
+ vlib_node_increment_counter (
+ vm, node->node_index, NAT44_EI_HAIRPINNING_HANDOFF_ERROR_CONGESTION_DROP,
+ frame->n_vectors - n_enq);
+ return frame->n_vectors;
+}
+
+static_always_inline uword
nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, int is_slow_path,
int is_output_feature)
@@ -934,7 +1290,9 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
vlib_buffer_t *b0, *b1;
u32 next0, next1;
- u32 sw_if_index0, sw_if_index1;
+ u32 rx_sw_if_index0, rx_sw_if_index1;
+ u32 tx_sw_if_index0, tx_sw_if_index1;
+ u32 cntr_sw_if_index0, cntr_sw_if_index1;
ip4_header_t *ip0, *ip1;
ip_csum_t sum0, sum1;
u32 new_addr0, old_addr0, new_addr1, old_addr1;
@@ -978,13 +1336,16 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp0 = (tcp_header_t *) udp0;
icmp0 = (icmp46_header_t *) udp0;
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ cntr_sw_if_index0 =
+ is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
rx_fib_index0 =
- vec_elt (nm->ip4_main->fib_index_by_sw_if_index, sw_if_index0);
+ vec_elt (nm->ip4_main->fib_index_by_sw_if_index, rx_sw_if_index0);
next0 = next1 = NAT44_EI_IN2OUT_NEXT_LOOKUP;
- if (PREDICT_FALSE (ip0->ttl == 1))
+ if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
{
vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
@@ -1010,19 +1371,19 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.other :
&nm->counters.fastpath.in2out.other,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
goto trace00;
}
if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
{
next0 = nat44_ei_icmp_in2out_slow_path (
- nm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next0,
- now, thread_index, &s0);
+ nm, b0, ip0, icmp0, rx_sw_if_index0, rx_fib_index0, node,
+ next0, now, thread_index, &s0);
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.icmp :
&nm->counters.fastpath.in2out.icmp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
goto trace00;
}
}
@@ -1055,7 +1416,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
nm, ip0, proto0,
vnet_buffer (b0)->ip.reass.l4_src_port,
vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
- sw_if_index0)))
+ rx_sw_if_index0)))
goto trace00;
/*
@@ -1073,7 +1434,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
else
{
if (PREDICT_FALSE (nat44_ei_not_translate (
- nm, node, sw_if_index0, ip0, proto0, rx_fib_index0,
+ nm, node, rx_sw_if_index0, ip0, proto0, rx_fib_index0,
thread_index)))
goto trace00;
}
@@ -1131,7 +1492,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (is_slow_path ?
&nm->counters.slowpath.in2out.tcp :
&nm->counters.fastpath.in2out.tcp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
else
{
@@ -1155,7 +1516,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (is_slow_path ?
&nm->counters.slowpath.in2out.udp :
&nm->counters.fastpath.in2out.udp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
/* Accounting */
@@ -1171,7 +1532,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
nat44_ei_in2out_trace_t *t =
vlib_add_trace (vm, node, b0, sizeof (*t));
t->is_slow_path = is_slow_path;
- t->sw_if_index = sw_if_index0;
+ t->sw_if_index = rx_sw_if_index0;
t->next_index = next0;
t->session_index = ~0;
if (s0)
@@ -1183,7 +1544,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.drops :
&nm->counters.fastpath.in2out.drops,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
if (is_output_feature)
@@ -1196,11 +1557,14 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp1 = (tcp_header_t *) udp1;
icmp1 = (icmp46_header_t *) udp1;
- sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ rx_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ tx_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+ cntr_sw_if_index1 =
+ is_output_feature ? tx_sw_if_index1 : rx_sw_if_index1;
rx_fib_index1 =
- vec_elt (nm->ip4_main->fib_index_by_sw_if_index, sw_if_index1);
+ vec_elt (nm->ip4_main->fib_index_by_sw_if_index, rx_sw_if_index1);
- if (PREDICT_FALSE (ip1->ttl == 1))
+ if (PREDICT_FALSE (!is_output_feature && ip1->ttl == 1))
{
vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
@@ -1226,19 +1590,19 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.other :
&nm->counters.fastpath.in2out.other,
- thread_index, sw_if_index1, 1);
+ thread_index, cntr_sw_if_index1, 1);
goto trace01;
}
if (PREDICT_FALSE (proto1 == NAT_PROTOCOL_ICMP))
{
next1 = nat44_ei_icmp_in2out_slow_path (
- nm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, next1,
- now, thread_index, &s1);
+ nm, b1, ip1, icmp1, rx_sw_if_index1, rx_fib_index1, node,
+ next1, now, thread_index, &s1);
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.icmp :
&nm->counters.fastpath.in2out.icmp,
- thread_index, sw_if_index1, 1);
+ thread_index, cntr_sw_if_index1, 1);
goto trace01;
}
}
@@ -1271,7 +1635,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
nm, ip1, proto1,
vnet_buffer (b1)->ip.reass.l4_src_port,
vnet_buffer (b1)->ip.reass.l4_dst_port, thread_index,
- sw_if_index1)))
+ rx_sw_if_index1)))
goto trace01;
/*
@@ -1289,7 +1653,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
else
{
if (PREDICT_FALSE (nat44_ei_not_translate (
- nm, node, sw_if_index1, ip1, proto1, rx_fib_index1,
+ nm, node, rx_sw_if_index1, ip1, proto1, rx_fib_index1,
thread_index)))
goto trace01;
}
@@ -1346,7 +1710,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (is_slow_path ?
&nm->counters.slowpath.in2out.tcp :
&nm->counters.fastpath.in2out.tcp,
- thread_index, sw_if_index1, 1);
+ thread_index, cntr_sw_if_index1, 1);
}
else
{
@@ -1370,7 +1734,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (is_slow_path ?
&nm->counters.slowpath.in2out.udp :
&nm->counters.fastpath.in2out.udp,
- thread_index, sw_if_index1, 1);
+ thread_index, cntr_sw_if_index1, 1);
}
/* Accounting */
@@ -1385,7 +1749,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
nat44_ei_in2out_trace_t *t =
vlib_add_trace (vm, node, b1, sizeof (*t));
- t->sw_if_index = sw_if_index1;
+ t->sw_if_index = rx_sw_if_index1;
t->next_index = next1;
t->session_index = ~0;
if (s1)
@@ -1397,7 +1761,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.drops :
&nm->counters.fastpath.in2out.drops,
- thread_index, sw_if_index1, 1);
+ thread_index, cntr_sw_if_index1, 1);
}
n_left_from -= 2;
@@ -1410,7 +1774,9 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
vlib_buffer_t *b0;
u32 next0;
- u32 sw_if_index0;
+ u32 rx_sw_if_index0;
+ u32 tx_sw_if_index0;
+ u32 cntr_sw_if_index0;
ip4_header_t *ip0;
ip_csum_t sum0;
u32 new_addr0, old_addr0;
@@ -1438,11 +1804,14 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp0 = (tcp_header_t *) udp0;
icmp0 = (icmp46_header_t *) udp0;
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ cntr_sw_if_index0 =
+ is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
rx_fib_index0 =
- vec_elt (nm->ip4_main->fib_index_by_sw_if_index, sw_if_index0);
+ vec_elt (nm->ip4_main->fib_index_by_sw_if_index, rx_sw_if_index0);
- if (PREDICT_FALSE (ip0->ttl == 1))
+ if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
{
vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
@@ -1468,19 +1837,19 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.other :
&nm->counters.fastpath.in2out.other,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
goto trace0;
}
if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
{
next0 = nat44_ei_icmp_in2out_slow_path (
- nm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next0,
- now, thread_index, &s0);
+ nm, b0, ip0, icmp0, rx_sw_if_index0, rx_fib_index0, node,
+ next0, now, thread_index, &s0);
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.icmp :
&nm->counters.fastpath.in2out.icmp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
goto trace0;
}
}
@@ -1513,7 +1882,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
nm, ip0, proto0,
vnet_buffer (b0)->ip.reass.l4_src_port,
vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
- sw_if_index0)))
+ rx_sw_if_index0)))
goto trace0;
/*
@@ -1531,7 +1900,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
else
{
if (PREDICT_FALSE (nat44_ei_not_translate (
- nm, node, sw_if_index0, ip0, proto0, rx_fib_index0,
+ nm, node, rx_sw_if_index0, ip0, proto0, rx_fib_index0,
thread_index)))
goto trace0;
}
@@ -1590,7 +1959,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (is_slow_path ?
&nm->counters.slowpath.in2out.tcp :
&nm->counters.fastpath.in2out.tcp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
else
{
@@ -1615,7 +1984,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (is_slow_path ?
&nm->counters.slowpath.in2out.udp :
&nm->counters.fastpath.in2out.udp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
/* Accounting */
@@ -1631,7 +2000,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
nat44_ei_in2out_trace_t *t =
vlib_add_trace (vm, node, b0, sizeof (*t));
t->is_slow_path = is_slow_path;
- t->sw_if_index = sw_if_index0;
+ t->sw_if_index = rx_sw_if_index0;
t->next_index = next0;
t->session_index = ~0;
if (s0)
@@ -1643,7 +2012,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.drops :
&nm->counters.fastpath.in2out.drops,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
n_left_from--;
@@ -1656,128 +2025,10 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
return frame->n_vectors;
}
-VLIB_NODE_FN (nat44_ei_in2out_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */,
- 0);
-}
-
-VLIB_REGISTER_NODE (nat44_ei_in2out_node) = {
- .name = "nat44-ei-in2out",
- .vector_size = sizeof (u32),
- .format_trace = format_nat44_ei_in2out_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
- .error_strings = nat44_ei_in2out_error_strings,
-
- .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
-
- .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
- [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
- [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-slowpath",
- [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-ip4-lookup",
- },
-};
-
-VLIB_NODE_FN (nat44_ei_in2out_output_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */,
- 1);
-}
-
-VLIB_REGISTER_NODE (nat44_ei_in2out_output_node) = {
- .name = "nat44-ei-in2out-output",
- .vector_size = sizeof (u32),
- .format_trace = format_nat44_ei_in2out_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
- .error_strings = nat44_ei_in2out_error_strings,
-
- .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
-
- .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
- [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "interface-output",
- [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-output-slowpath",
- [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-interface-output",
- },
-};
-
-VLIB_NODE_FN (nat44_ei_in2out_slowpath_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */,
- 0);
-}
-
-VLIB_REGISTER_NODE (nat44_ei_in2out_slowpath_node) = {
- .name = "nat44-ei-in2out-slowpath",
- .vector_size = sizeof (u32),
- .format_trace = format_nat44_ei_in2out_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
- .error_strings = nat44_ei_in2out_error_strings,
-
- .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
-
- .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
- [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
- [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-slowpath",
- [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-ip4-lookup",
- },
-};
-
-VLIB_NODE_FN (nat44_ei_in2out_output_slowpath_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */,
- 1);
-}
-
-VLIB_REGISTER_NODE (nat44_ei_in2out_output_slowpath_node) = {
- .name = "nat44-ei-in2out-output-slowpath",
- .vector_size = sizeof (u32),
- .format_trace = format_nat44_ei_in2out_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
- .error_strings = nat44_ei_in2out_error_strings,
-
- .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
-
- .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
- [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "interface-output",
- [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-output-slowpath",
- [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-interface-output",
- },
-};
-
-VLIB_NODE_FN (nat44_ei_in2out_fast_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+static_always_inline uword
+nat44_ei_in2out_hairpinning_finish_inline (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
{
u32 n_left_from, *from, *to_next;
u32 thread_index = vm->thread_index;
@@ -1802,20 +2053,12 @@ VLIB_NODE_FN (nat44_ei_in2out_fast_node)
u32 next0;
u32 sw_if_index0;
ip4_header_t *ip0;
- ip_csum_t sum0;
- u32 new_addr0, old_addr0;
- u16 old_port0, new_port0;
udp_header_t *udp0;
tcp_header_t *tcp0;
icmp46_header_t *icmp0;
u32 proto0;
- u32 rx_fib_index0;
- ip4_address_t sm0_addr;
- u16 sm0_port;
- u32 sm0_fib_index;
u32 required_thread_index = thread_index;
- /* speculatively enqueue b0 to the current next frame */
bi0 = from[0];
to_next[0] = bi0;
from += 1;
@@ -1824,7 +2067,7 @@ VLIB_NODE_FN (nat44_ei_in2out_fast_node)
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
- next0 = NAT44_EI_IN2OUT_NEXT_LOOKUP;
+ next0 = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_LOOKUP;
ip0 = vlib_buffer_get_current (b0);
udp0 = ip4_next_header (ip0);
@@ -1832,117 +2075,36 @@ VLIB_NODE_FN (nat44_ei_in2out_fast_node)
icmp0 = (icmp46_header_t *) udp0;
sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- rx_fib_index0 =
- ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
-
- if (PREDICT_FALSE (ip0->ttl == 1))
- {
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
- ICMP4_time_exceeded_ttl_exceeded_in_transit,
- 0);
- next0 = NAT44_EI_IN2OUT_NEXT_ICMP_ERROR;
- goto trace0;
- }
-
proto0 = ip_proto_to_nat_proto (ip0->protocol);
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
- goto trace0;
-
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
- {
- next0 = nat44_ei_icmp_in2out (b0, ip0, icmp0, sw_if_index0,
- rx_fib_index0, node, next0, ~0, 0);
- goto trace0;
- }
-
- if (nat44_ei_static_mapping_match (
- ip0->src_address, udp0->src_port, rx_fib_index0, proto0,
- &sm0_addr, &sm0_port, &sm0_fib_index, 0, 0, 0))
- {
- b0->error = node->errors[NAT44_EI_IN2OUT_ERROR_NO_TRANSLATION];
- next0 = NAT44_EI_IN2OUT_NEXT_DROP;
- goto trace0;
- }
-
- new_addr0 = sm0_addr.as_u32;
- new_port0 = sm0_port;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = sm0_fib_index;
- old_addr0 = ip0->src_address.as_u32;
- ip0->src_address.as_u32 = new_addr0;
-
- sum0 = ip0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- src_address /* changed member */ );
- ip0->checksum = ip_csum_fold (sum0);
-
- if (PREDICT_FALSE (new_port0 != udp0->dst_port))
- {
- old_port0 = udp0->src_port;
- udp0->src_port = new_port0;
-
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
- {
- sum0 = tcp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- sum0 = ip_csum_update (sum0, old_port0, new_port0,
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- mss_clamping (nm->mss_clamping, tcp0, &sum0);
- tcp0->checksum = ip_csum_fold (sum0);
- }
- else if (udp0->checksum)
- {
- sum0 = udp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- sum0 = ip_csum_update (sum0, old_port0, new_port0,
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- udp0->checksum = ip_csum_fold (sum0);
- }
- }
- else
+ switch (proto0)
{
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
- {
- sum0 = tcp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- mss_clamping (nm->mss_clamping, tcp0, &sum0);
- tcp0->checksum = ip_csum_fold (sum0);
- }
- else if (udp0->checksum)
- {
- sum0 = udp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- udp0->checksum = ip_csum_fold (sum0);
- }
+ case NAT_PROTOCOL_TCP:
+ // fallthrough
+ case NAT_PROTOCOL_UDP:
+ is_hairpinning = nat44_ei_hairpinning (
+ vm, node, nm, thread_index, b0, ip0, udp0, tcp0, proto0,
+ 0 /* do_trace */, &required_thread_index);
+ break;
+ case NAT_PROTOCOL_ICMP:
+ is_hairpinning = (0 == nat44_ei_icmp_hairpinning (
+ nm, b0, thread_index, ip0, icmp0,
+ &required_thread_index));
+ break;
+ case NAT_PROTOCOL_OTHER:
+ // this should never happen
+ next0 = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP;
+ break;
}
- /* Hairpinning */
- is_hairpinning = nat44_ei_hairpinning (
- vm, node, nm, thread_index, b0, ip0, udp0, tcp0, proto0,
- 0 /* do_trace */, &required_thread_index);
-
if (thread_index != required_thread_index)
{
- vnet_buffer (b0)->snat.required_thread_index =
- required_thread_index;
- next0 = NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF;
+ // but we already did a handoff ...
+ next0 = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP;
}
- trace0:
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
{
nat44_ei_in2out_trace_t *t =
vlib_add_trace (vm, node, b0, sizeof (*t));
@@ -1951,18 +2113,15 @@ VLIB_NODE_FN (nat44_ei_in2out_fast_node)
t->is_hairpinning = is_hairpinning;
}
- if (next0 != NAT44_EI_IN2OUT_NEXT_DROP)
+ if (next0 != NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP)
{
-
vlib_increment_simple_counter (
&nm->counters.fastpath.in2out.other, sw_if_index0,
vm->thread_index, 1);
}
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -1971,83 +2130,16 @@ VLIB_NODE_FN (nat44_ei_in2out_fast_node)
return frame->n_vectors;
}
-VLIB_REGISTER_NODE (nat44_ei_in2out_fast_node) = {
- .name = "nat44-ei-in2out-fast",
- .vector_size = sizeof (u32),
- .format_trace = format_nat44_ei_in2out_fast_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
- .error_strings = nat44_ei_in2out_error_strings,
-
- .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
-
- .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
- [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
- [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-slowpath",
- [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-ip4-lookup",
- },
-};
-
-VLIB_NODE_FN (nat44_ei_in2out_hairpinning_handoff_ip4_lookup_node)
+VLIB_NODE_FN (nat44_ei_hairpinning_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return nat44_ei_hairpinning_handoff_fn_inline (
- vm, node, frame,
- nat44_ei_main.in2out_hairpinning_finish_ip4_lookup_node_fq_index);
-}
-
-VLIB_REGISTER_NODE (nat44_ei_in2out_hairpinning_handoff_ip4_lookup_node) = {
- .name = "nat44-ei-in2out-hairpinning-handoff-ip4-lookup",
- .vector_size = sizeof (u32),
- .n_errors = ARRAY_LEN(nat44_ei_hairpinning_handoff_error_strings),
- .error_strings = nat44_ei_hairpinning_handoff_error_strings,
- .format_trace = format_nat44_ei_hairpinning_handoff_trace,
-
- .n_next_nodes = 1,
-
- .next_nodes = {
- [0] = "error-drop",
- },
-};
-
-VLIB_NODE_FN (nat44_ei_in2out_hairpinning_handoff_interface_output_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_hairpinning_handoff_fn_inline (
- vm, node, frame,
- nat44_ei_main.in2out_hairpinning_finish_interface_output_node_fq_index);
-}
-
-VLIB_REGISTER_NODE (nat44_ei_in2out_hairpinning_handoff_interface_output_node) = {
- .name = "nat44-ei-in2out-hairpinning-handoff-interface-output",
- .vector_size = sizeof (u32),
- .n_errors = ARRAY_LEN(nat44_ei_hairpinning_handoff_error_strings),
- .error_strings = nat44_ei_hairpinning_handoff_error_strings,
- .format_trace = format_nat44_ei_hairpinning_handoff_trace,
-
- .n_next_nodes = 1,
-
- .next_nodes = {
- [0] = "error-drop",
- },
-};
-
-static_always_inline int
-nat44_ei_in2out_hairpinning_finish_inline (vlib_main_t *vm,
- vlib_node_runtime_t *node,
- vlib_frame_t *frame)
-{
u32 n_left_from, *from, *to_next;
u32 thread_index = vm->thread_index;
- nat44_ei_in2out_next_t next_index;
+ nat44_ei_hairpin_next_t next_index;
nat44_ei_main_t *nm = &nat44_ei_main;
- int is_hairpinning = 0;
+ vnet_feature_main_t *fm = &feature_main;
+ u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
+ vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -2064,15 +2156,13 @@ nat44_ei_in2out_hairpinning_finish_inline (vlib_main_t *vm,
u32 bi0;
vlib_buffer_t *b0;
u32 next0;
- u32 sw_if_index0;
ip4_header_t *ip0;
+ u32 proto0;
udp_header_t *udp0;
tcp_header_t *tcp0;
- icmp46_header_t *icmp0;
- u32 proto0;
+ u32 sw_if_index0;
u32 required_thread_index = thread_index;
- /* speculatively enqueue b0 to the current next frame */
bi0 = from[0];
to_next[0] = bi0;
from += 1;
@@ -2081,60 +2171,39 @@ nat44_ei_in2out_hairpinning_finish_inline (vlib_main_t *vm,
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
- next0 = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_LOOKUP;
-
ip0 = vlib_buffer_get_current (b0);
udp0 = ip4_next_header (ip0);
tcp0 = (tcp_header_t *) udp0;
- icmp0 = (icmp46_header_t *) udp0;
-
sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
proto0 = ip_proto_to_nat_proto (ip0->protocol);
+ int next0_resolved = 0;
- switch (proto0)
+ if (nat44_ei_hairpinning (vm, node, nm, thread_index, b0, ip0, udp0,
+ tcp0, proto0, 1, &required_thread_index))
{
- case NAT_PROTOCOL_TCP:
- // fallthrough
- case NAT_PROTOCOL_UDP:
- is_hairpinning = nat44_ei_hairpinning (
- vm, node, nm, thread_index, b0, ip0, udp0, tcp0, proto0,
- 0 /* do_trace */, &required_thread_index);
- break;
- case NAT_PROTOCOL_ICMP:
- is_hairpinning = (0 == nat44_ei_icmp_hairpinning (
- nm, b0, thread_index, ip0, icmp0,
- &required_thread_index));
- break;
- case NAT_PROTOCOL_OTHER:
- // this should never happen
- next0 = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP;
- break;
+ next0 = NAT44_EI_HAIRPIN_NEXT_LOOKUP;
+ next0_resolved = 1;
}
if (thread_index != required_thread_index)
{
- // but we already did a handoff ...
- next0 = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP;
+ vnet_buffer (b0)->snat.required_thread_index =
+ required_thread_index;
+ next0 = NAT44_EI_HAIRPIN_NEXT_HANDOFF;
+ next0_resolved = 1;
}
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
- (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- nat44_ei_in2out_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sw_if_index = sw_if_index0;
- t->next_index = next0;
- t->is_hairpinning = is_hairpinning;
- }
+ if (!next0_resolved)
+ vnet_get_config_data (&cm->config_main, &b0->current_config_index,
+ &next0, 0);
- if (next0 != NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP)
+ if (next0 != NAT44_EI_HAIRPIN_NEXT_DROP)
{
vlib_increment_simple_counter (
- &nm->counters.fastpath.in2out.other, sw_if_index0,
- vm->thread_index, 1);
+ &nm->counters.hairpinning, vm->thread_index, sw_if_index0, 1);
}
- /* verify speculative enqueue, maybe switch current next frame */
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
}
@@ -2145,58 +2214,216 @@ nat44_ei_in2out_hairpinning_finish_inline (vlib_main_t *vm,
return frame->n_vectors;
}
+VLIB_NODE_FN (nat44_ei_in2out_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_in2out_node_fn_inline (vm, node, frame, 0, 0);
+}
+
+VLIB_NODE_FN (nat44_ei_in2out_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_in2out_node_fn_inline (vm, node, frame, 0, 1);
+}
+
+VLIB_NODE_FN (nat44_ei_in2out_slowpath_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_in2out_node_fn_inline (vm, node, frame, 1, 0);
+}
+
+VLIB_NODE_FN (nat44_ei_in2out_output_slowpath_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_in2out_node_fn_inline (vm, node, frame, 1, 1);
+}
+
+VLIB_NODE_FN (nat44_ei_in2out_hairpinning_handoff_ip4_lookup_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_hairpinning_handoff_fn_inline (
+ vm, node, frame,
+ nat44_ei_main.in2out_hairpinning_finish_ip4_lookup_node_fq_index);
+}
+
+VLIB_NODE_FN (nat44_ei_in2out_hairpinning_handoff_interface_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_hairpinning_handoff_fn_inline (
+ vm, node, frame,
+ nat44_ei_main.in2out_hairpinning_finish_interface_output_node_fq_index);
+}
+
VLIB_NODE_FN (nat44_ei_in2out_hairpinning_finish_ip4_lookup_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return nat44_ei_in2out_hairpinning_finish_inline (vm, node, frame);
}
-VLIB_REGISTER_NODE (nat44_ei_in2out_hairpinning_finish_ip4_lookup_node) = {
- .name = "nat44-ei-in2out-hairpinning-finish-ip4-lookup",
+VLIB_NODE_FN (nat44_ei_in2out_hairpinning_finish_interface_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_in2out_hairpinning_finish_inline (vm, node, frame);
+}
+
+VLIB_NODE_FN (nat44_ei_hairpinning_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_hairpinning_handoff_fn_inline (
+ vm, node, frame, nat44_ei_main.hairpinning_fq_index);
+}
+
+VLIB_REGISTER_NODE (nat44_ei_in2out_node) = {
+ .name = "nat44-ei-in2out",
.vector_size = sizeof (u32),
- .format_trace = format_nat44_ei_in2out_fast_trace,
+ .format_trace = format_nat44_ei_in2out_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
+ .error_strings = nat44_ei_in2out_error_strings,
+ .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
+ .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
+ .next_nodes = {
+ [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
+ [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
+ [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-slowpath",
+ [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-ip4-lookup",
+ },
+};
+
+VLIB_REGISTER_NODE (nat44_ei_in2out_output_node) = {
+ .name = "nat44-ei-in2out-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_nat44_ei_in2out_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
+ .error_strings = nat44_ei_in2out_error_strings,
+ .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
+ .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
+ .next_nodes = {
+ [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
+ [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "interface-output",
+ [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-output-slowpath",
+ [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-interface-output",
+ },
+};
+VLIB_REGISTER_NODE (nat44_ei_in2out_slowpath_node) = {
+ .name = "nat44-ei-in2out-slowpath",
+ .vector_size = sizeof (u32),
+ .format_trace = format_nat44_ei_in2out_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
.error_strings = nat44_ei_in2out_error_strings,
+ .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
+ .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
+ .next_nodes = {
+ [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
+ [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
+ [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-slowpath",
+ [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-ip4-lookup",
+ },
+};
+VLIB_REGISTER_NODE (nat44_ei_in2out_output_slowpath_node) = {
+ .name = "nat44-ei-in2out-output-slowpath",
+ .vector_size = sizeof (u32),
+ .format_trace = format_nat44_ei_in2out_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
+ .error_strings = nat44_ei_in2out_error_strings,
.runtime_data_bytes = sizeof (nat44_ei_runtime_t),
+ .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
+ .next_nodes = {
+ [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
+ [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "interface-output",
+ [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-output-slowpath",
+ [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-interface-output",
+ },
+};
- .n_next_nodes = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_N_NEXT,
+VLIB_REGISTER_NODE (nat44_ei_in2out_hairpinning_handoff_ip4_lookup_node) = {
+ .name = "nat44-ei-in2out-hairpinning-handoff-ip4-lookup",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(nat44_ei_hairpinning_handoff_error_strings),
+ .error_strings = nat44_ei_hairpinning_handoff_error_strings,
+ .format_trace = format_nat44_ei_hairpinning_handoff_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
- /* edit / add dispositions here */
+VLIB_REGISTER_NODE (nat44_ei_in2out_hairpinning_handoff_interface_output_node) = {
+ .name = "nat44-ei-in2out-hairpinning-handoff-interface-output",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(nat44_ei_hairpinning_handoff_error_strings),
+ .error_strings = nat44_ei_hairpinning_handoff_error_strings,
+ .format_trace = format_nat44_ei_hairpinning_handoff_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (nat44_ei_in2out_hairpinning_finish_ip4_lookup_node) = {
+ .name = "nat44-ei-in2out-hairpinning-finish-ip4-lookup",
+ .vector_size = sizeof (u32),
+ .format_trace = format_nat44_ei_in2out_fast_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
+ .error_strings = nat44_ei_in2out_error_strings,
+ .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
+ .n_next_nodes = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_N_NEXT,
.next_nodes = {
[NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP] = "error-drop",
[NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_LOOKUP] = "ip4-lookup",
},
};
-VLIB_NODE_FN (nat44_ei_in2out_hairpinning_finish_interface_output_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_in2out_hairpinning_finish_inline (vm, node, frame);
-}
-
VLIB_REGISTER_NODE (nat44_ei_in2out_hairpinning_finish_interface_output_node) = {
.name = "nat44-ei-in2out-hairpinning-finish-interface-output",
.vector_size = sizeof (u32),
.format_trace = format_nat44_ei_in2out_fast_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
-
.n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
.error_strings = nat44_ei_in2out_error_strings,
-
.runtime_data_bytes = sizeof (nat44_ei_runtime_t),
-
.n_next_nodes = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_N_NEXT,
-
- /* edit / add dispositions here */
.next_nodes = {
[NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP] = "error-drop",
[NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_LOOKUP] = "interface-output",
},
};
+VLIB_REGISTER_NODE (nat44_ei_hairpinning_handoff_node) = {
+ .name = "nat44-ei-hairpinning-handoff",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(nat44_ei_hairpinning_handoff_error_strings),
+ .error_strings = nat44_ei_hairpinning_handoff_error_strings,
+ .format_trace = format_nat44_ei_hairpinning_handoff_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (nat44_ei_hairpinning_node) = {
+ .name = "nat44-ei-hairpinning",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .format_trace = format_nat44_ei_hairpin_trace,
+ .n_next_nodes = NAT44_EI_HAIRPIN_N_NEXT,
+ .next_nodes = {
+ [NAT44_EI_HAIRPIN_NEXT_DROP] = "error-drop",
+ [NAT44_EI_HAIRPIN_NEXT_LOOKUP] = "ip4-lookup",
+ [NAT44_EI_HAIRPIN_NEXT_HANDOFF] = "nat44-ei-hairpinning-handoff",
+ },
+};
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_inlines.h b/src/plugins/nat/nat44-ei/nat44_ei_inlines.h
index 672927256d1..399486c77dc 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_inlines.h
+++ b/src/plugins/nat/nat44-ei/nat44_ei_inlines.h
@@ -20,6 +20,7 @@
#include <nat/nat44-ei/nat44_ei.h>
#include <nat/nat44-ei/nat44_ei_ha.h>
+#include <nat/lib/nat_proto.h>
always_inline u64
calc_nat_key (ip4_address_t addr, u16 port, u32 fib_index, u8 proto)
@@ -220,6 +221,29 @@ nat44_ei_session_update_counters (nat44_ei_session_t *s, f64 now, uword bytes,
&s->ha_last_refreshed, now);
}
+static_always_inline u32
+nat_session_get_timeout (nat_timeouts_t *timeouts, nat_protocol_t proto,
+ u8 state)
+{
+ switch (proto)
+ {
+ case NAT_PROTOCOL_ICMP:
+ return timeouts->icmp;
+ case NAT_PROTOCOL_UDP:
+ return timeouts->udp;
+ case NAT_PROTOCOL_TCP:
+ {
+ if (state)
+ return timeouts->tcp.transitory;
+ else
+ return timeouts->tcp.established;
+ }
+ default:
+ return timeouts->udp;
+ }
+ return 0;
+}
+
#endif /* __included_nat44_ei_inlines_h__ */
/*
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_out2in.c b/src/plugins/nat/nat44-ei/nat44_ei_out2in.c
index 7796b11cfd7..5d91cb04f7c 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_out2in.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_out2in.c
@@ -56,18 +56,6 @@ format_nat44_ei_out2in_trace (u8 *s, va_list *args)
return s;
}
-static u8 *
-format_nat44_ei_out2in_fast_trace (u8 *s, va_list *args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- nat44_ei_out2in_trace_t *t = va_arg (*args, nat44_ei_out2in_trace_t *);
-
- s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d",
- t->sw_if_index, t->next_index);
- return s;
-}
-
#define foreach_nat44_ei_out2in_error \
_ (UNSUPPORTED_PROTOCOL, "unsupported protocol") \
_ (OUT_OF_PORTS, "out of ports") \
@@ -124,13 +112,10 @@ nat44_o2i_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg)
if (clib_bihash_add_del_8_8 (&nm->in2out, &s_kv, 0))
nat_elog_warn (nm, "out2in key del failed");
- nat_ipfix_logging_nat44_ses_delete (ctx->thread_index,
- s->in2out.addr.as_u32,
- s->out2in.addr.as_u32,
- s->nat_proto,
- s->in2out.port,
- s->out2in.port,
- s->in2out.fib_index);
+ nat_ipfix_logging_nat44_ses_delete (
+ ctx->thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32,
+ nat_proto_to_ip_proto (s->nat_proto), s->in2out.port, s->out2in.port,
+ s->in2out.fib_index);
nat_syslog_nat44_apmdel (s->user_index, s->in2out.fib_index,
&s->in2out.addr, s->in2out.port,
@@ -233,12 +218,10 @@ create_session_for_static_mapping (
nat_elog_notice (nm, "out2in key add failed");
/* log NAT event */
- nat_ipfix_logging_nat44_ses_create (thread_index,
- s->in2out.addr.as_u32,
- s->out2in.addr.as_u32,
- s->nat_proto,
- s->in2out.port,
- s->out2in.port, s->in2out.fib_index);
+ nat_ipfix_logging_nat44_ses_create (
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32,
+ nat_proto_to_ip_proto (s->nat_proto), s->in2out.port, s->out2in.port,
+ s->in2out.fib_index);
nat_syslog_nat44_apmadd (s->user_index, s->in2out.fib_index,
&s->in2out.addr, s->in2out.port, &s->out2in.addr,
@@ -1341,7 +1324,6 @@ VLIB_NODE_FN (nat44_ei_out2in_node)
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat44_ei_out2in_node) = {
.name = "nat44-ei-out2in",
.vector_size = sizeof (u32),
@@ -1362,190 +1344,6 @@ VLIB_REGISTER_NODE (nat44_ei_out2in_node) = {
[NAT44_EI_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
},
};
-/* *INDENT-ON* */
-
-VLIB_NODE_FN (nat44_ei_out2in_fast_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- u32 n_left_from, *from;
- nat44_ei_main_t *nm = &nat44_ei_main;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
-
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
- u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
- vlib_get_buffers (vm, from, b, n_left_from);
- while (n_left_from > 0)
- {
- vlib_buffer_t *b0;
- u32 next0 = NAT44_EI_OUT2IN_NEXT_DROP;
- u32 sw_if_index0;
- ip4_header_t *ip0;
- ip_csum_t sum0;
- u32 new_addr0, old_addr0;
- u16 new_port0, old_port0;
- udp_header_t *udp0;
- tcp_header_t *tcp0;
- icmp46_header_t *icmp0;
- u32 proto0;
- u32 rx_fib_index0;
- ip4_address_t sm_addr0;
- u16 sm_port0;
- u32 sm_fib_index0;
-
- b0 = *b;
- b++;
-
- ip0 = vlib_buffer_get_current (b0);
- udp0 = ip4_next_header (ip0);
- tcp0 = (tcp_header_t *) udp0;
- icmp0 = (icmp46_header_t *) udp0;
-
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
-
- vnet_feature_next (&next0, b0);
-
- if (PREDICT_FALSE (ip0->ttl == 1))
- {
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
- ICMP4_time_exceeded_ttl_exceeded_in_transit,
- 0);
- next0 = NAT44_EI_OUT2IN_NEXT_ICMP_ERROR;
- goto trace00;
- }
-
- proto0 = ip_proto_to_nat_proto (ip0->protocol);
-
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
- goto trace00;
-
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
- {
- next0 = nat44_ei_icmp_out2in (b0, ip0, icmp0, sw_if_index0,
- rx_fib_index0, node, next0, ~0, 0);
- goto trace00;
- }
-
- if (nat44_ei_static_mapping_match (ip0->dst_address, udp0->dst_port,
- rx_fib_index0, proto0, &sm_addr0,
- &sm_port0, &sm_fib_index0, 1, 0, 0))
- {
- b0->error = node->errors[NAT44_EI_OUT2IN_ERROR_NO_TRANSLATION];
- goto trace00;
- }
-
- new_addr0 = sm_addr0.as_u32;
- new_port0 = sm_port0;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = sm_fib_index0;
- old_addr0 = ip0->dst_address.as_u32;
- ip0->dst_address.as_u32 = new_addr0;
-
- sum0 = ip0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t, dst_address /* changed member */ );
- ip0->checksum = ip_csum_fold (sum0);
-
- if (PREDICT_FALSE (new_port0 != udp0->dst_port))
- {
- old_port0 = udp0->dst_port;
- udp0->dst_port = new_port0;
-
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
- {
- sum0 = tcp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- sum0 = ip_csum_update (sum0, old_port0, new_port0,
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- tcp0->checksum = ip_csum_fold (sum0);
- }
- else if (udp0->checksum)
- {
- sum0 = udp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- sum0 = ip_csum_update (sum0, old_port0, new_port0,
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- udp0->checksum = ip_csum_fold (sum0);
- }
- }
- else
- {
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
- {
- sum0 = tcp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- tcp0->checksum = ip_csum_fold (sum0);
- }
- else if (udp0->checksum)
- {
- sum0 = udp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- udp0->checksum = ip_csum_fold (sum0);
- }
- }
-
- trace00:
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- nat44_ei_out2in_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sw_if_index = sw_if_index0;
- t->next_index = next0;
- }
-
- if (next0 == NAT44_EI_OUT2IN_NEXT_DROP)
- {
- vlib_increment_simple_counter (&nm->counters.fastpath.out2in.drops,
- vm->thread_index, sw_if_index0, 1);
- }
-
- n_left_from--;
- next[0] = next0;
- next++;
- }
-
- vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
- frame->n_vectors);
-
- return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (nat44_ei_out2in_fast_node) = {
- .name = "nat44-ei-out2in-fast",
- .vector_size = sizeof (u32),
- .format_trace = format_nat44_ei_out2in_fast_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(nat44_ei_out2in_error_strings),
- .error_strings = nat44_ei_out2in_error_strings,
-
- .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
-
- .n_next_nodes = NAT44_EI_OUT2IN_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [NAT44_EI_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
- [NAT44_EI_OUT2IN_NEXT_DROP] = "error-drop",
- [NAT44_EI_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- },
-};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/nat64/nat64.c b/src/plugins/nat/nat64/nat64.c
index 1c1cdfba3fb..950eea60e5e 100644
--- a/src/plugins/nat/nat64/nat64.c
+++ b/src/plugins/nat/nat64/nat64.c
@@ -26,7 +26,6 @@
nat64_main_t nat64_main;
-/* *INDENT-OFF* */
/* Hook up input features */
VNET_FEATURE_INIT (nat64_in2out, static) = {
.arc_name = "ip6-unicast",
@@ -62,7 +61,6 @@ static u8 well_known_prefix[] = {
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00
};
-/* *INDENT-ON* */
#define nat_elog_str(_str) \
do \
@@ -135,6 +133,20 @@ nat64_get_worker_in2out (ip6_address_t * addr)
return next_worker_index;
}
+static u32
+get_thread_idx_by_port (u16 e_port)
+{
+ nat64_main_t *nm = &nat64_main;
+ u32 thread_idx = nm->num_workers;
+ if (nm->num_workers > 1)
+ {
+ thread_idx = nm->first_worker_index +
+ nm->workers[(e_port - 1024) / nm->port_per_thread %
+ _vec_len (nm->workers)];
+ }
+ return thread_idx;
+}
+
u32
nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip)
{
@@ -157,14 +169,12 @@ nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip)
clib_memset (&daddr, 0, sizeof (daddr));
daddr.ip4.as_u32 = ip->dst_address.as_u32;
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
bibe = nat64_db_bib_entry_find (db, &daddr, 0, ip->protocol, 0, 0);
if (bibe)
return (u32) (db - nm->db);
}
- /* *INDENT-ON* */
return vlib_get_thread_index ();
}
@@ -202,7 +212,7 @@ nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip)
/* worker by outside port (TCP/UDP) */
port = clib_net_to_host_u16 (port);
if (port > 1024)
- return nm->first_worker_index + ((port - 1024) / nm->port_per_thread);
+ return get_thread_idx_by_port (port);
return vlib_get_thread_index ();
}
@@ -282,12 +292,10 @@ nat64_init (vlib_main_t * vm)
for (i = 0; i < nm->num_workers; i++)
bitmap = clib_bitmap_set (bitmap, i, 1);
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, bitmap)
{
vec_add1(nm->workers, i);
}
- /* *INDENT-ON* */
clib_bitmap_free (bitmap);
@@ -323,7 +331,6 @@ nat64_init_hash (nat64_config_t c)
vec_validate (nm->db, tm->n_vlib_mains - 1);
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
if (nat64_db_init (db, c, nat64_free_out_addr_and_port))
@@ -332,7 +339,6 @@ nat64_init_hash (nat64_config_t c)
rv = 1;
}
}
- /* *INDENT-ON* */
return rv;
}
@@ -344,7 +350,6 @@ nat64_free_hash ()
nat64_db_t *db;
int rv = 0;
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
if (nat64_db_free (db))
@@ -353,7 +358,6 @@ nat64_free_hash ()
rv = 1;
}
}
- /* *INDENT-ON* */
vec_free (nm->db);
@@ -408,7 +412,6 @@ nat64_add_del_pool_addr (u32 thread_index,
if (a->fib_index != ~0)
fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6, nm->fib_src_hi);
/* Delete sessions using address */
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
nat64_db_free_out_addr (thread_index, db, &a->addr);
@@ -417,12 +420,10 @@ nat64_add_del_pool_addr (u32 thread_index,
vlib_set_simple_counter (&nm->total_sessions, db - nm->db, 0,
db->st.st_entries_num);
}
- /* *INDENT-ON* */
vec_del1 (nm->addr_pool, i);
}
/* Add/del external address to FIB */
- /* *INDENT-OFF* */
pool_foreach (interface, nm->interfaces)
{
if (nat64_interface_is_inside(interface))
@@ -431,7 +432,6 @@ nat64_add_del_pool_addr (u32 thread_index,
nat64_add_del_addr_to_fib (addr, 32, interface->sw_if_index, is_add);
break;
}
- /* *INDENT-ON* */
return 0;
}
@@ -442,13 +442,11 @@ nat64_pool_addr_walk (nat64_pool_addr_walk_fn_t fn, void *ctx)
nat64_main_t *nm = &nat64_main;
nat64_address_t *a = 0;
- /* *INDENT-OFF* */
vec_foreach (a, nm->addr_pool)
{
if (fn (a, ctx))
break;
};
- /* *INDENT-ON* */
}
int
@@ -546,7 +544,6 @@ nat64_interface_add_del (u32 sw_if_index, u8 is_inside, u8 is_add)
// TODO: is enabled ? we can't signal if it is not
/* Check if interface already exists */
- /* *INDENT-OFF* */
pool_foreach (i, nm->interfaces)
{
if (i->sw_if_index == sw_if_index)
@@ -555,7 +552,6 @@ nat64_interface_add_del (u32 sw_if_index, u8 is_inside, u8 is_add)
break;
}
}
- /* *INDENT-ON* */
if (is_add)
{
@@ -596,10 +592,8 @@ nat64_interface_add_del (u32 sw_if_index, u8 is_inside, u8 is_add)
if (!is_inside)
{
- /* *INDENT-OFF* */
vec_foreach (ap, nm->addr_pool)
nat64_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, is_add);
- /* *INDENT-ON* */
}
if (nm->num_workers > 1)
@@ -641,13 +635,11 @@ nat64_interfaces_walk (nat64_interface_walk_fn_t fn, void *ctx)
nat64_main_t *nm = &nat64_main;
nat64_interface_t *i = 0;
- /* *INDENT-OFF* */
pool_foreach (i, nm->interfaces)
{
if (fn (i, ctx))
break;
}
- /* *INDENT-ON* */
}
// TODO: plugin independent
@@ -822,7 +814,6 @@ nat64_static_bib_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
nat64_db_bib_entry_t *bibe;
ip46_address_t addr;
- /* *INDENT-OFF* */
pool_foreach (static_bib, nm->static_bibs)
{
if ((static_bib->thread_index != thread_index) || (static_bib->done))
@@ -859,21 +850,18 @@ nat64_static_bib_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
static_bib->done = 1;
}
- /* *INDENT-ON* */
return 0;
}
static vlib_node_registration_t nat64_static_bib_worker_node;
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_static_bib_worker_node, static) = {
.function = nat64_static_bib_worker_fn,
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_INTERRUPT,
.name = "nat64-static-bib-worker",
};
-/* *INDENT-ON* */
int
nat64_add_del_static_bib_entry (ip6_address_t * in_addr,
@@ -916,7 +904,7 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr,
/* outside port must be assigned to same thread as internall address */
if ((out_port > 1024) && (nm->num_workers > 1))
{
- if (thread_index != ((out_port - 1024) / nm->port_per_thread))
+ if (thread_index != get_thread_idx_by_port (out_port))
return VNET_API_ERROR_INVALID_VALUE_2;
}
@@ -977,7 +965,6 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr,
if (nm->num_workers)
{
- /* *INDENT-OFF* */
pool_foreach (static_bib, nm->static_bibs)
{
if (static_bib->done)
@@ -985,7 +972,6 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr,
}
vec_foreach (index, to_be_free)
pool_put_index (nm->static_bibs, index[0]);
- /* *INDENT-ON* */
vec_free (to_be_free);
pool_get (nm->static_bibs, static_bib);
static_bib->in_addr.as_u64[0] = in_addr->as_u64[0];
@@ -1258,13 +1244,11 @@ nat64_prefix_walk (nat64_prefix_walk_fn_t fn, void *ctx)
nat64_main_t *nm = &nat64_main;
nat64_prefix_t *p = 0;
- /* *INDENT-OFF* */
vec_foreach (p, nm->pref64)
{
if (fn (p, ctx))
break;
};
- /* *INDENT-ON* */
}
void
@@ -1273,7 +1257,6 @@ nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
nat64_main_t *nm = &nat64_main;
nat64_prefix_t *p, *gp = 0, *prefix = 0;
- /* *INDENT-OFF* */
vec_foreach (p, nm->pref64)
{
if (p->fib_index == fib_index)
@@ -1285,7 +1268,6 @@ nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
if (p->fib_index == 0)
gp = p;
};
- /* *INDENT-ON* */
if (!prefix)
prefix = gp;
@@ -1344,7 +1326,6 @@ nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
nat64_prefix_t *p, *gp = 0;
u8 plen = 0;
- /* *INDENT-OFF* */
vec_foreach (p, nm->pref64)
{
if (p->fib_index == fib_index)
@@ -1356,7 +1337,6 @@ nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
if (p->vrf_id == 0)
gp = p;
};
- /* *INDENT-ON* */
if (!plen)
{
@@ -1431,14 +1411,12 @@ nat64_expire_worker_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_expire_worker_walk_node, static) = {
.function = nat64_expire_worker_walk_fn,
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_INTERRUPT,
.name = "nat64-expire-worker-walk",
};
-/* *INDENT-ON* */
/**
* @brief Centralized process to drive per worker expire walk.
@@ -1566,12 +1544,10 @@ nat64_plugin_disable ()
}
nm->enabled = 0;
- /* *INDENT-OFF* */
pool_foreach (i, nm->interfaces)
{
vec_add1 (interfaces, *i);
}
- /* *INDENT-ON* */
vec_foreach (i, interfaces)
{
rv = nat64_interface_add_del (i->sw_if_index, i->flags, 0);
diff --git a/src/plugins/nat/nat64/nat64.h b/src/plugins/nat/nat64/nat64.h
index 1180f9df778..9eb8d915390 100644
--- a/src/plugins/nat/nat64/nat64.h
+++ b/src/plugins/nat/nat64/nat64.h
@@ -93,14 +93,12 @@ typedef struct
{
ip4_address_t addr;
u32 fib_index;
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
u16 busy_##n##_ports; \
u16 * busy_##n##_ports_per_thread; \
u32 busy_##n##_port_refcounts[65535];
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
} nat64_address_t;
typedef struct
diff --git a/src/plugins/nat/nat64/nat64_api.c b/src/plugins/nat/nat64/nat64_api.c
index e64b6434fd2..87cca01b59b 100644
--- a/src/plugins/nat/nat64/nat64_api.c
+++ b/src/plugins/nat/nat64/nat64_api.c
@@ -69,7 +69,6 @@ vl_api_nat64_get_timeouts_t_handler (vl_api_nat64_get_timeouts_t * mp)
vl_api_nat64_get_timeouts_reply_t *rmp;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_NAT64_GET_TIMEOUTS_REPLY,
({
rmp->udp = htonl (nm->udp_timeout);
@@ -77,7 +76,6 @@ vl_api_nat64_get_timeouts_t_handler (vl_api_nat64_get_timeouts_t * mp)
rmp->tcp_transitory = htonl (nm->tcp_trans_timeout);
rmp->icmp = htonl (nm->icmp_timeout);
}))
- /* *INDENT-ON* */
}
static void
@@ -298,10 +296,8 @@ vl_api_nat64_bib_dump_t_handler (vl_api_nat64_bib_dump_t * mp)
.context = mp->context,
};
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
nat64_db_bib_walk (db, mp->proto, nat64_api_bib_walk, &ctx);
- /* *INDENT-ON* */
}
static int
@@ -356,13 +352,11 @@ vl_api_nat64_st_dump_t_handler (vl_api_nat64_st_dump_t * mp)
.context = mp->context,
};
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
ctx.db = db;
nat64_db_st_walk (db, mp->proto, nat64_api_st_walk, &ctx);
}
- /* *INDENT-ON* */
}
static void
diff --git a/src/plugins/nat/nat64/nat64_cli.c b/src/plugins/nat/nat64/nat64_cli.c
index 3af715c2457..2cef71080f9 100644
--- a/src/plugins/nat/nat64/nat64_cli.c
+++ b/src/plugins/nat/nat64/nat64_cli.c
@@ -484,10 +484,8 @@ nat64_show_bib_command_fn (vlib_main_t * vm,
else
vlib_cli_output (vm, "NAT64 %U BIB entries:", format_nat_protocol, proto);
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
nat64_db_bib_walk (db, p, nat64_cli_bib_walk, vm);
- /* *INDENT-ON* */
done:
unformat_free (line_input);
@@ -586,13 +584,11 @@ nat64_show_st_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "NAT64 sessions:");
else
vlib_cli_output (vm, "NAT64 %U sessions:", format_nat_protocol, proto);
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
ctx.db = db;
nat64_db_st_walk (db, p, nat64_cli_st_walk, &ctx);
}
- /* *INDENT-ON* */
done:
unformat_free (line_input);
@@ -775,7 +771,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
/*?
* @cliexpar
* @cliexstart{nat64 plugin}
@@ -983,7 +978,6 @@ VLIB_CLI_COMMAND (nat64_add_interface_address_command, static) = {
.short_help = "nat64 add interface address <interface> [del]",
.function = nat64_add_interface_address_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/nat64/nat64_db.c b/src/plugins/nat/nat64/nat64_db.c
index 82ef70de5cf..e4e9febcb12 100644
--- a/src/plugins/nat/nat64/nat64_db.c
+++ b/src/plugins/nat/nat64/nat64_db.c
@@ -54,13 +54,11 @@ nat64_db_free (nat64_db_t * db)
clib_bihash_free_48_8 (&db->st.in2out);
clib_bihash_free_48_8 (&db->st.out2in);
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
pool_free (db->bib._##n##_bib); \
pool_free (db->st._##n##_st);
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
pool_free (db->bib._unk_proto_bib);
pool_free (db->st._unk_proto_st);
@@ -82,14 +80,13 @@ nat64_db_bib_entry_create (u32 thread_index, nat64_db_t * db,
if (db->bib.bib_entries_num >= db->bib.limit)
{
db->free_addr_port_cb (db, out_addr, out_port, proto);
- //nat_ipfix_logging_max_bibs (thread_index, db->bib.limit);
+ nat_ipfix_logging_max_bibs (thread_index, db->bib.limit);
return 0;
}
/* create pool entry */
switch (ip_proto_to_nat_proto (proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
pool_get (db->bib._##n##_bib, bibe); \
@@ -97,7 +94,6 @@ nat64_db_bib_entry_create (u32 thread_index, nat64_db_t * db,
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
pool_get (db->bib._unk_proto_bib, bibe);
kv.value = bibe - db->bib._unk_proto_bib;
@@ -155,7 +151,6 @@ nat64_db_bib_entry_free (u32 thread_index, nat64_db_t * db,
switch (ip_proto_to_nat_proto (bibe->proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
bib = db->bib._##n##_bib; \
@@ -163,7 +158,6 @@ nat64_db_bib_entry_free (u32 thread_index, nat64_db_t * db,
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
bib = db->bib._unk_proto_bib;
st = db->st._unk_proto_st;
@@ -232,14 +226,12 @@ nat64_db_bib_entry_find (nat64_db_t * db, ip46_address_t * addr, u16 port,
switch (ip_proto_to_nat_proto (proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
bib = db->bib._##n##_bib; \
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
bib = db->bib._unk_proto_bib;
break;
@@ -271,7 +263,6 @@ nat64_db_bib_walk (nat64_db_t * db, u8 proto,
if (proto == 255)
{
- /* *INDENT-OFF* */
#define _(N, i, n, s) \
bib = db->bib._##n##_bib; \
pool_foreach (bibe, bib) { \
@@ -285,32 +276,27 @@ nat64_db_bib_walk (nat64_db_t * db, u8 proto,
if (fn (bibe, ctx))
return;
}
- /* *INDENT-ON* */
}
else
{
switch (ip_proto_to_nat_proto (proto))
{
- /* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
bib = db->bib._##n##_bib; \
break;
foreach_nat_protocol
#undef _
- /* *INDENT-ON* */
default:
bib = db->bib._unk_proto_bib;
break;
}
- /* *INDENT-OFF* */
pool_foreach (bibe, bib)
{
if (fn (bibe, ctx))
return;
}
- /* *INDENT-ON* */
}
}
@@ -321,14 +307,12 @@ nat64_db_bib_entry_by_index (nat64_db_t * db, u8 proto, u32 bibe_index)
switch (ip_proto_to_nat_proto (proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
bib = db->bib._##n##_bib; \
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
bib = db->bib._unk_proto_bib;
break;
@@ -345,7 +329,6 @@ nat64_db_st_walk (nat64_db_t * db, u8 proto,
if (proto == 255)
{
- /* *INDENT-OFF* */
#define _(N, i, n, s) \
st = db->st._##n##_st; \
pool_foreach (ste, st) { \
@@ -359,32 +342,27 @@ nat64_db_st_walk (nat64_db_t * db, u8 proto,
if (fn (ste, ctx))
return;
}
- /* *INDENT-ON* */
}
else
{
switch (ip_proto_to_nat_proto (proto))
{
- /* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
st = db->st._##n##_st; \
break;
foreach_nat_protocol
#undef _
- /* *INDENT-ON* */
default:
st = db->st._unk_proto_st;
break;
}
- /* *INDENT-OFF* */
pool_foreach (ste, st)
{
if (fn (ste, ctx))
return;
}
- /* *INDENT-ON* */
}
}
@@ -401,14 +379,13 @@ nat64_db_st_entry_create (u32 thread_index, nat64_db_t * db,
if (db->st.st_entries_num >= db->st.limit)
{
- //nat_ipfix_logging_max_sessions (thread_index, db->st.limit);
+ nat_ipfix_logging_max_sessions (thread_index, db->st.limit);
return 0;
}
/* create pool entry */
switch (ip_proto_to_nat_proto (bibe->proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
pool_get (db->st._##n##_st, ste); \
@@ -417,7 +394,6 @@ nat64_db_st_entry_create (u32 thread_index, nat64_db_t * db,
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
pool_get (db->st._unk_proto_st, ste);
kv.value = ste - db->st._unk_proto_st;
@@ -494,7 +470,6 @@ nat64_db_st_entry_free (u32 thread_index,
switch (ip_proto_to_nat_proto (ste->proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
st = db->st._##n##_st; \
@@ -502,7 +477,6 @@ nat64_db_st_entry_free (u32 thread_index,
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
st = db->st._unk_proto_st;
bib = db->bib._unk_proto_bib;
@@ -579,14 +553,12 @@ nat64_db_st_entry_find (nat64_db_t * db, ip46_address_t * l_addr,
switch (ip_proto_to_nat_proto (proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
st = db->st._##n##_st; \
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
st = db->st._unk_proto_st;
break;
@@ -622,14 +594,12 @@ nat64_db_st_entry_get_index (nat64_db_t * db, nat64_db_st_entry_t * ste)
switch (ip_proto_to_nat_proto (ste->proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
st = db->st._##n##_st; \
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
st = db->st._unk_proto_st;
return (u32) ~ 0;
@@ -645,14 +615,12 @@ nat64_db_st_entry_by_index (nat64_db_t * db, u8 proto, u32 ste_index)
switch (ip_proto_to_nat_proto (proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
st = db->st._##n##_st; \
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
st = db->st._unk_proto_st;
break;
@@ -667,7 +635,6 @@ nad64_db_st_free_expired (u32 thread_index, nat64_db_t * db, u32 now)
u32 *ste_to_be_free = 0, *ste_index;
nat64_db_st_entry_t *st, *ste;
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
st = db->st._##n##_st; \
pool_foreach (ste, st) {\
@@ -692,7 +659,6 @@ nad64_db_st_free_expired (u32 thread_index, nat64_db_t * db, u32 now)
nat64_db_st_entry_free (thread_index, db,
pool_elt_at_index(st, ste_index[0]));
vec_free (ste_to_be_free);
-/* *INDENT-ON* */
}
void
@@ -704,7 +670,6 @@ nat64_db_free_out_addr (u32 thread_index,
nat64_db_bib_entry_t *bibe;
db->addr_free = 1;
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
st = db->st._##n##_st; \
pool_foreach (ste, st) { \
@@ -730,7 +695,6 @@ nat64_db_free_out_addr (u32 thread_index,
pool_elt_at_index(st, ste_index[0]));
vec_free (ste_to_be_free);
db->addr_free = 0;
-/* *INDENT-ON* */
}
/*
diff --git a/src/plugins/nat/nat64/nat64_db.h b/src/plugins/nat/nat64/nat64_db.h
index 711b6bf6b03..a7d433fb8ea 100644
--- a/src/plugins/nat/nat64/nat64_db.h
+++ b/src/plugins/nat/nat64/nat64_db.h
@@ -18,6 +18,7 @@
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
#include <vnet/fib/fib_source.h>
+#include <nat/lib/nat_proto.h>
#include <vppinfra/bihash_24_8.h>
#include <vppinfra/bihash_48_8.h>
@@ -46,7 +47,6 @@ typedef struct
};
} nat64_db_bib_entry_key_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct
{
ip6_address_t in_addr;
@@ -58,17 +58,14 @@ typedef CLIB_PACKED(struct
u8 proto;
u8 is_static;
}) nat64_db_bib_entry_t;
-/* *INDENT-ON* */
typedef struct
{
/* BIBs */
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
nat64_db_bib_entry_t *_##n##_bib;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
nat64_db_bib_entry_t *_unk_proto_bib;
/* BIB lookup */
@@ -97,7 +94,6 @@ typedef struct
};
} nat64_db_st_entry_key_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct
{
ip6_address_t in_r_addr;
@@ -108,17 +104,14 @@ typedef CLIB_PACKED(struct
u8 proto;
u8 tcp_state;
}) nat64_db_st_entry_t;
-/* *INDENT-ON* */
typedef struct
{
/* session tables */
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
nat64_db_st_entry_t *_##n##_st;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
nat64_db_st_entry_t *_unk_proto_st;
/* session lookup */
diff --git a/src/plugins/nat/nat64/nat64_doc.md b/src/plugins/nat/nat64/nat64_doc.md
deleted file mode 100644
index f65b46338b0..00000000000
--- a/src/plugins/nat/nat64/nat64_doc.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Stateful NAT64: Network Address and Protocol Translation from IPv6 Clients to IPv4 Servers {#nat64_doc}
-
-## Introduction
-
-Stateful NAT64 in VPP allows IPv6-only clients to contact IPv4 servers using unicast UDP, TCP, or ICMP based on RFC 6146.
-
-## Configuration
-
-### Enable/disable NAT64 feature on the interface
-
-> set interface nat64 in|out <intfc> [del]
-
-in: inside/local/IPv6 network
-out: outside/external/IPv4 network
-intfc: interface name
-
-### Add/delete NAT64 pool address
-
-One or more public IPv4 addresses assigned to a NAT64 are shared among several IPv6-only clients.
-
-> nat64 add pool address <ip4-range-start> [- <ip4-range-end>] [tenant-vrf <tenant-vrf-id>] [del]
-
-ip4-range-start: First IPv4 address of the range
-ip4-range-end: Last IPv4 address of the range (optional, not used for single address)
-tenant-vrf-id: VRF id of the tenant associated with the pool address (optional, if not set pool address is global)
-
-### Add/delete static BIB entry
-
-Stateful NAT64 also supports IPv4-initiated communications to a subset of the IPv6 hosts through staticaly configured bindings.
-
-> nat64 add static bib <ip6-addr> <in-port> <ip4-addr> <out-port> tcp|udp|icmp [vfr <table-id>] [del]
-
-ip6-addr: inside IPv6 address of the host
-in-port: inside port or ICMPv6 identifier
-ip4-addr: outside IPv4 address of the host
-out-port: outside port or ICMPv4 identifier
-table-id: VRF id of the tenant associated with the BIB entry (optional, default use global VRF)
-
-### Set NAT64 session timeouts
-
-Session is deleted when timer expires. If all sessions corresponding to a dynamically create BIB entry are deleted, then the BIB entry is also deleted. When packets are flowing sessiom timer is refreshed to keep the session alive.
-
-> set nat64 timeouts udp <sec> icmp <sec> tcp-trans <sec> tcp-est <sec> tcp-incoming-syn <sec> | reset
-
-udp: UDP session timeout value (default 300sec)
-icmp: ICMP session timeout value (default 60sec)
-tcp-trans: transitory TCP session timeout value (default 240sec)
-tcp-est: established TCP session timeout value (default 7440sec)
-tcp-incoming-syn: incoming SYN TCP session timeout value (default 6sec)
-reset: reset timers to default values
-
-### Set NAT64 prefix
-
-Stateful NAT64 support the algorithm for generating IPv6 representations of IPv4 addresses defined in RFC 6052. If no prefix is configured, Well-Known Prefix (64:ff9b::/96) is used.
-
-> nat64 add prefix <ip6-prefix>/<plen> [tenant-vrf <vrf-id>] [del]
-
-ip6-prefix: IPv6 prefix
-plen: prefix length (valid values: 32, 40, 48, 56, 64, or 96)
-tenant-vrf: VRF id of the tenant associated with the prefix
-
-### Show commands
-
-> show nat64 pool
-> show nat64 interfaces
-> show nat64 bib tcp|udp|icmp
-> show nat64 session table tcp|udp|icmp
-> show nat64 timeouts
-> show nat64 prefix
-
-## Notes
-
-Multi thread is not supported yet (CLI/API commands are disabled when VPP runs with multiple threads).
diff --git a/src/plugins/nat/nat64/nat64_doc.rst b/src/plugins/nat/nat64/nat64_doc.rst
new file mode 100644
index 00000000000..f375fba68bd
--- /dev/null
+++ b/src/plugins/nat/nat64/nat64_doc.rst
@@ -0,0 +1,91 @@
+Stateful NAT64
+==============
+
+This document describes stateful NAT64 Network Address and Protocol
+Translation
+
+Introduction
+------------
+
+Stateful NAT64 in VPP allows IPv6-only clients to contact IPv4 servers
+using unicast UDP, TCP, or ICMP based on RFC 6146.
+
+Configuration
+-------------
+
+Enable/disable NAT64 feature on the interface
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ set interface nat64 in|out [del]
+
+in: inside/local/IPv6 network out: outside/external/IPv4 network intfc:
+interface name
+
+Add/delete NAT64 pool address
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+One or more public IPv4 addresses assigned to a NAT64 are shared among
+several IPv6-only clients.
+
+ nat64 add pool address [- ] [tenant-vrf ] [del]
+
+ip4-range-start: First IPv4 address of the range ip4-range-end: Last
+IPv4 address of the range (optional, not used for single address)
+tenant-vrf-id: VRF id of the tenant associated with the pool address
+(optional, if not set pool address is global)
+
+Add/delete static BIB entry
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Stateful NAT64 also supports IPv4-initiated communications to a subset
+of the IPv6 hosts through statically configured bindings.
+
+ nat64 add static bib tcp|udp|icmp [vfr ] [del]
+
+ip6-addr: inside IPv6 address of the host in-port: inside port or ICMPv6
+identifier ip4-addr: outside IPv4 address of the host out-port: outside
+port or ICMPv4 identifier table-id: VRF id of the tenant associated with
+the BIB entry (optional, default use global VRF)
+
+Set NAT64 session timeouts
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Session is deleted when timer expires. If all sessions corresponding to
+a dynamically create BIB entry are deleted, then the BIB entry is also
+deleted. When packets are flowing session timer is refreshed to keep the
+session alive.
+
+ set nat64 timeouts udp icmp tcp-trans tcp-est tcp-incoming-syn \|
+ reset
+
+udp: UDP session timeout value (default 300sec) icmp: ICMP session
+timeout value (default 60sec) tcp-trans: transitory TCP session timeout
+value (default 240sec) tcp-est: established TCP session timeout value
+(default 7440sec) tcp-incoming-syn: incoming SYN TCP session timeout
+value (default 6sec) reset: reset timers to default values
+
+Set NAT64 prefix
+~~~~~~~~~~~~~~~~
+
+Stateful NAT64 support the algorithm for generating IPv6 representations
+of IPv4 addresses defined in RFC 6052. If no prefix is configured,
+Well-Known Prefix (64:ff9b::/96) is used.
+
+ nat64 add prefix / [tenant-vrf ] [del]
+
+ip6-prefix: IPv6 prefix plen: prefix length (valid values: 32, 40, 48,
+56, 64, or 96) tenant-vrf: VRF id of the tenant associated with the
+prefix
+
+Show commands
+~~~~~~~~~~~~~
+
+ show nat64 pool show nat64 interfaces show nat64 bib tcp|udp|icmp
+ show nat64 session table tcp|udp|icmp show nat64 timeouts show nat64
+ prefix
+
+Notes
+-----
+
+Multi thread is not supported yet (CLI/API commands are disabled when
+VPP runs with multiple threads).
diff --git a/src/plugins/nat/nat64/nat64_in2out.c b/src/plugins/nat/nat64/nat64_in2out.c
index 52d3c7f0a3b..63eab72a7fb 100644
--- a/src/plugins/nat/nat64/nat64_in2out.c
+++ b/src/plugins/nat/nat64/nat64_in2out.c
@@ -86,14 +86,12 @@ nat64_not_translate (u32 sw_if_index, ip6_address_t ip6_addr)
ip_lookup_main_t *lm6 = &im6->lookup_main;
ip_interface_address_t *ia = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
({
addr = ip_interface_address_get_address (lm6, ia);
if (0 == ip6_address_compare (addr, &ip6_addr))
return 1;
}));
- /* *INDENT-ON* */
return 0;
}
@@ -744,7 +742,6 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
bibe = 0;
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, 0, 0);
@@ -752,7 +749,6 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
if (bibe)
break;
}
- /* *INDENT-ON* */
if (!bibe)
return -1;
@@ -851,7 +847,6 @@ nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
daddr.ip4.as_u32 = bibe->out_addr.as_u32;
ste = 0;
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
@@ -860,7 +855,6 @@ nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
if (ste)
break;
}
- /* *INDENT-ON* */
if (!ste)
return -1;
@@ -1006,7 +1000,6 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
bibe = 0;
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, 0, 0);
@@ -1014,7 +1007,6 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
if (bibe)
break;
}
- /* *INDENT-ON* */
if (!bibe)
return -1;
@@ -1226,7 +1218,6 @@ VLIB_NODE_FN (nat64_in2out_node) (vlib_main_t * vm,
return nat64_in2out_node_fn_inline (vm, node, frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_in2out_node) = {
.name = "nat64-in2out",
.vector_size = sizeof (u32),
@@ -1243,7 +1234,6 @@ VLIB_REGISTER_NODE (nat64_in2out_node) = {
[NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (nat64_in2out_slowpath_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1252,7 +1242,6 @@ VLIB_NODE_FN (nat64_in2out_slowpath_node) (vlib_main_t * vm,
return nat64_in2out_node_fn_inline (vm, node, frame, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
.name = "nat64-in2out-slowpath",
.vector_size = sizeof (u32),
@@ -1269,7 +1258,6 @@ VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
[NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
},
};
-/* *INDENT-ON* */
typedef struct nat64_in2out_frag_set_ctx_t_
{
@@ -1384,7 +1372,6 @@ VLIB_NODE_FN (nat64_in2out_handoff_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = {
.name = "nat64-in2out-handoff",
.vector_size = sizeof (u32),
@@ -1399,7 +1386,6 @@ VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/nat64/nat64_out2in.c b/src/plugins/nat/nat64/nat64_out2in.c
index 7cc2d3d3b70..a8faead4470 100644
--- a/src/plugins/nat/nat64/nat64_out2in.c
+++ b/src/plugins/nat/nat64/nat64_out2in.c
@@ -639,7 +639,6 @@ VLIB_NODE_FN (nat64_out2in_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_out2in_node) = {
.name = "nat64-out2in",
.vector_size = sizeof (u32),
@@ -655,7 +654,6 @@ VLIB_REGISTER_NODE (nat64_out2in_node) = {
[NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
},
};
-/* *INDENT-ON* */
typedef struct nat64_out2in_frag_set_ctx_t_
{
@@ -769,7 +767,6 @@ VLIB_NODE_FN (nat64_out2in_handoff_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_out2in_handoff_node) = {
.name = "nat64-out2in-handoff",
.vector_size = sizeof (u32),
@@ -784,7 +781,6 @@ VLIB_REGISTER_NODE (nat64_out2in_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/nat66/nat66_cli.c b/src/plugins/nat/nat66/nat66_cli.c
index 0ca40875584..9ef8d056685 100644
--- a/src/plugins/nat/nat66/nat66_cli.c
+++ b/src/plugins/nat/nat66/nat66_cli.c
@@ -330,16 +330,16 @@ nat66_show_static_mappings_command_fn (vlib_main_t * vm,
* @cliexpar
* @cliexstart{nat66}
* To enable NAT66 plugin
- * vpp# nat66 enable
+ * vpp# nat66 plugin enable
* To disable NAT66 plugin
- * vpp# nat66 disable
+ * vpp# nat66 plugin disable
* To enable NAT66 plugin with outside-vrf id 10
- * vpp# nat66 enable outside-vrf 10
+ * vpp# nat66 plugin enable outside-vrf 10
* @cliexend
?*/
VLIB_CLI_COMMAND (nat66_enable_disable_command, static) = {
- .path = "nat66",
- .short_help = "nat66 <enable [outside-vrf <vrf-id>]>|disable",
+ .path = "nat66 plugin",
+ .short_help = "nat66 plugin <enable [outside-vrf <vrf-id>]>|disable",
.function = nat66_enable_disable_command_fn,
};
diff --git a/src/plugins/nat/nat66/nat66_in2out.c b/src/plugins/nat/nat66/nat66_in2out.c
index 356100f89ef..aa2229e1997 100644
--- a/src/plugins/nat/nat66/nat66_in2out.c
+++ b/src/plugins/nat/nat66/nat66_in2out.c
@@ -94,14 +94,12 @@ nat66_not_translate (u32 rx_fib_index, ip6_address_t ip6_addr)
sw_if_index = fib_entry_get_resolving_interface (fei);
}
- /* *INDENT-OFF* */
pool_foreach (i, nm->interfaces)
{
/* NAT packet aimed at outside interface */
if (nat66_interface_is_outside (i) && sw_if_index == i->sw_if_index)
return 0;
}
- /* *INDENT-ON* */
return 1;
}
@@ -235,7 +233,6 @@ VLIB_NODE_FN (nat66_in2out_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat66_in2out_node) = {
.name = "nat66-in2out",
.vector_size = sizeof (u32),
@@ -250,7 +247,6 @@ VLIB_REGISTER_NODE (nat66_in2out_node) = {
[NAT66_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/nat66/nat66_out2in.c b/src/plugins/nat/nat66/nat66_out2in.c
index 9d44b4880eb..820e0c79033 100644
--- a/src/plugins/nat/nat66/nat66_out2in.c
+++ b/src/plugins/nat/nat66/nat66_out2in.c
@@ -193,7 +193,6 @@ VLIB_NODE_FN (nat66_out2in_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat66_out2in_node) = {
.name = "nat66-out2in",
.vector_size = sizeof (u32),
@@ -208,7 +207,6 @@ VLIB_REGISTER_NODE (nat66_out2in_node) = {
[NAT66_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/pnat/pnat.api b/src/plugins/nat/pnat/pnat.api
index b6632159d7c..de555c41412 100644
--- a/src/plugins/nat/pnat/pnat.api
+++ b/src/plugins/nat/pnat/pnat.api
@@ -26,6 +26,7 @@ enum pnat_mask
PNAT_DPORT = 0x8,
PNAT_COPY_BYTE = 0x10,
PNAT_CLEAR_BYTE = 0x20,
+ PNAT_PROTO = 0x40,
};
enum pnat_attachment_point
@@ -65,6 +66,7 @@ autoendian define pnat_binding_add
vl_api_pnat_rewrite_tuple_t rewrite;
};
+
autoendian define pnat_binding_add_reply
{
u32 context;
@@ -72,6 +74,22 @@ autoendian define pnat_binding_add_reply
u32 binding_index;
};
+autoendian define pnat_binding_add_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_pnat_match_tuple_t match;
+ vl_api_pnat_rewrite_tuple_t rewrite;
+};
+
+
+autoendian define pnat_binding_add_v2_reply
+{
+ u32 context;
+ i32 retval;
+ u32 binding_index;
+};
+
autoendian autoreply define pnat_binding_del
{
u32 client_index;
diff --git a/src/plugins/nat/pnat/pnat.c b/src/plugins/nat/pnat/pnat.c
index 547b063f286..2b4a6b49e96 100644
--- a/src/plugins/nat/pnat/pnat.c
+++ b/src/plugins/nat/pnat/pnat.c
@@ -56,7 +56,9 @@ static pnat_mask_fast_t pnat_mask2fast(pnat_mask_t lookup_mask) {
m.as_u64[0] = 0xffffffff00000000;
if (lookup_mask & PNAT_DA)
m.as_u64[0] |= 0x00000000ffffffff;
- m.as_u64[1] = 0xffffffff00000000;
+ m.as_u64[1] = 0x00ffffff00000000;
+ if (lookup_mask & PNAT_PROTO)
+ m.as_u64[1] |= 0xff00000000000000;
if (lookup_mask & PNAT_SPORT)
m.as_u64[1] |= 0x00000000ffff0000;
if (lookup_mask & PNAT_DPORT)
diff --git a/src/plugins/nat/pnat/pnat.md b/src/plugins/nat/pnat/pnat.md
deleted file mode 100644
index 1e6bc130848..00000000000
--- a/src/plugins/nat/pnat/pnat.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# PNAT: 1:1 match and rewrite programmable NAT {#pnat_doc}
-
-PNAT is a stateless statically configured, match and rewrite plugin.
-It uses a set of match and rewrite rules that are applied on the IP
-input and output feature paths. A PNAT rule is unidirectional.
-
-The match is done using up to a 6-tuple; IP source and destination address,
-IP protocol, transport layer source and destination ports, and FIB table / interface index.
-
-While multiple match/rewrite rules can be applied to an interface (per direction), the match
-pattern must be the same across all rules on that interface/direction.
-
-If required in the future, matching could be done using the general classifier, allowing matching
-on any protocol field, as well having an ordered set of match patterns.
-
-If the packet does not match, it will by default be passed to the next graph node in the feature chain.
-If desired a different miss behaviour could be implemented, e.g. similarly to dynamic NAT, the packet punted to a slow path.
-
-## Rewrite instructions
-
-``` c
-typedef enum {
- PNAT_INSTR_NONE = 1 << 0,
- PNAT_INSTR_SOURCE_ADDRESS = 1 << 1,
- PNAT_INSTR_SOURCE_PORT = 1 << 2,
- PNAT_INSTR_DESTINATION_ADDRESS = 1 << 3,
- PNAT_INSTR_DESTINATION_PORT = 1 << 4,
-} pnat_instructions_t;
-```
-
-These are the supported rewrite instructions.
-The IP checksum and the TCP/UDP checksum are incrementally updated as required.
-
-There are only a few "sanity checks" on the rewrites. For example, the rewrite in the outbound direction
-is applied on the ip-output feature chain. If one were to rewrite the IP destination address, the routing
-decision and determination of the next-hop has already been done, and the packet would still be forwarded
-to the original next-hop.
diff --git a/src/plugins/nat/pnat/pnat.rst b/src/plugins/nat/pnat/pnat.rst
new file mode 100644
index 00000000000..5cac047a236
--- /dev/null
+++ b/src/plugins/nat/pnat/pnat.rst
@@ -0,0 +1,45 @@
+PNAT 1:1 match & rewrite NAT
+============================
+
+PNAT is a stateless statically configured, match and rewrite plugin. It
+uses a set of match and rewrite rules that are applied on the IP input
+and output feature paths. A PNAT rule is unidirectional.
+
+The match is done using up to a 6-tuple; IP source and destination
+address, IP protocol, transport layer source and destination ports, and
+FIB table / interface index.
+
+While multiple match/rewrite rules can be applied to an interface (per
+direction), the match pattern must be the same across all rules on that
+interface/direction.
+
+If required in the future, matching could be done using the general
+classifier, allowing matching on any protocol field, as well having an
+ordered set of match patterns.
+
+If the packet does not match, it will by default be passed to the next
+graph node in the feature chain. If desired a different miss behaviour
+could be implemented, e.g. similarly to dynamic NAT, the packet punted
+to a slow path.
+
+Rewrite instructions
+--------------------
+
+.. code:: c
+
+ typedef enum {
+ PNAT_INSTR_NONE = 1 << 0,
+ PNAT_INSTR_SOURCE_ADDRESS = 1 << 1,
+ PNAT_INSTR_SOURCE_PORT = 1 << 2,
+ PNAT_INSTR_DESTINATION_ADDRESS = 1 << 3,
+ PNAT_INSTR_DESTINATION_PORT = 1 << 4,
+ } pnat_instructions_t;
+
+These are the supported rewrite instructions. The IP checksum and the
+TCP/UDP checksum are incrementally updated as required.
+
+There are only a few “sanity checks” on the rewrites. For example, the
+rewrite in the outbound direction is applied on the ip-output feature
+chain. If one were to rewrite the IP destination address, the routing
+decision and determination of the next-hop has already been done, and
+the packet would still be forwarded to the original next-hop.
diff --git a/src/plugins/nat/pnat/pnat_api.c b/src/plugins/nat/pnat/pnat_api.c
index eaefbaf4738..02e61219d1e 100644
--- a/src/plugins/nat/pnat/pnat_api.c
+++ b/src/plugins/nat/pnat/pnat_api.c
@@ -13,6 +13,7 @@
* limitations under the License.
*/
#include "pnat.h"
+#include <vnet/vnet.h>
#include <pnat/pnat.api_enum.h>
#include <pnat/pnat.api_types.h>
#include <vlibmemory/api.h>
@@ -22,6 +23,7 @@
#include <vnet/ip/reass/ip4_sv_reass.h>
#include <vnet/ip/reass/ip6_full_reass.h>
#include <vnet/ip/reass/ip6_sv_reass.h>
+#include <vpp/app/version.h>
/*
* This file contains the API handlers for the pnat.api
@@ -34,22 +36,33 @@ static void vl_api_pnat_binding_add_t_handler(vl_api_pnat_binding_add_t *mp) {
pnat_main_t *pm = &pnat_main;
vl_api_pnat_binding_add_reply_t *rmp;
u32 binding_index;
+
+ // for backward compatibility
+ if (mp->match.proto == 0)
+ mp->match.mask |= PNAT_PROTO;
+
int rv = pnat_binding_add(&mp->match, &mp->rewrite, &binding_index);
REPLY_MACRO2_END(VL_API_PNAT_BINDING_ADD_REPLY,
({ rmp->binding_index = binding_index; }));
}
static void
+vl_api_pnat_binding_add_v2_t_handler(vl_api_pnat_binding_add_t *mp) {
+ pnat_main_t *pm = &pnat_main;
+ vl_api_pnat_binding_add_reply_t *rmp;
+ u32 binding_index;
+ int rv = pnat_binding_add(&mp->match, &mp->rewrite, &binding_index);
+ REPLY_MACRO2_END(VL_API_PNAT_BINDING_ADD_V2_REPLY,
+ ({ rmp->binding_index = binding_index; }));
+}
+
+static void
vl_api_pnat_binding_attach_t_handler(vl_api_pnat_binding_attach_t *mp) {
pnat_main_t *pm = &pnat_main;
vl_api_pnat_binding_attach_reply_t *rmp;
int rv;
- /* Ensure that the interface exists */
- if (!vnet_sw_if_index_is_api_valid(mp->sw_if_index)) {
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto bad_sw_if_index;
- }
+ VALIDATE_SW_IF_INDEX_END(mp);
rv =
pnat_binding_attach(mp->sw_if_index, mp->attachment, mp->binding_index);
@@ -64,11 +77,7 @@ vl_api_pnat_binding_detach_t_handler(vl_api_pnat_binding_detach_t *mp) {
vl_api_pnat_binding_detach_reply_t *rmp;
int rv;
- /* Ensure that the interface exists */
- if (!vnet_sw_if_index_is_api_valid(mp->sw_if_index)) {
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto bad_sw_if_index;
- }
+ VALIDATE_SW_IF_INDEX_END(mp);
rv =
pnat_binding_detach(mp->sw_if_index, mp->attachment, mp->binding_index);
@@ -193,7 +202,7 @@ clib_error_t *pnat_plugin_api_hookup(vlib_main_t *vm) {
*/
#include <vnet/plugin/plugin.h>
VLIB_PLUGIN_REGISTER() = {
- .version = "0.0.1",
+ .version = VPP_BUILD_VER,
.description = "Policy 1:1 NAT",
};
diff --git a/src/plugins/nat/pnat/pnat_cli.c b/src/plugins/nat/pnat/pnat_cli.c
index 082f0778acb..ce9beee540d 100644
--- a/src/plugins/nat/pnat/pnat_cli.c
+++ b/src/plugins/nat/pnat/pnat_cli.c
@@ -122,6 +122,8 @@ uword unformat_pnat_match_tuple(unformat_input_t *input, va_list *args) {
t->mask |= PNAT_SA;
else if (unformat(input, "dst %U", unformat_ip4_address, &t->dst))
t->mask |= PNAT_DA;
+ else if (unformat(input, "proto %U", unformat_ip_protocol, &t->proto))
+ t->mask |= PNAT_PROTO;
else if (unformat(input, "sport %d", &sport)) {
if (sport == 0 || sport > 65535)
return 0;
@@ -132,9 +134,7 @@ uword unformat_pnat_match_tuple(unformat_input_t *input, va_list *args) {
return 0;
t->mask |= PNAT_DPORT;
t->dport = dport;
- } else if (unformat(input, "proto %U", unformat_ip_protocol, &t->proto))
- ;
- else
+ } else
break;
}
return 1;
diff --git a/src/plugins/nat/pnat/pnat_node.h b/src/plugins/nat/pnat/pnat_node.h
index 595189c2efb..f1afb450934 100644
--- a/src/plugins/nat/pnat/pnat_node.h
+++ b/src/plugins/nat/pnat/pnat_node.h
@@ -20,6 +20,7 @@
#include <pnat/pnat.api_enum.h>
#include <vnet/feature/feature.h>
#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
#include <vnet/ip/format.h>
/* PNAT next-nodes */
diff --git a/src/plugins/nat/pnat/tests/pnat_test.c b/src/plugins/nat/pnat/tests/pnat_test.c
index e7d946941b9..f515dd6d376 100644
--- a/src/plugins/nat/pnat/tests/pnat_test.c
+++ b/src/plugins/nat/pnat/tests/pnat_test.c
@@ -25,6 +25,11 @@
#include <vnet/fib/ip4_fib.h>
#include "../pnat.h"
#include <pnat/pnat.api_enum.h> /* For error counters */
+#ifdef __FreeBSD__
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#endif /* __FreeBSD__ */
#include <arpa/inet.h>
#include "pnat_test_stubs.h"
@@ -569,7 +574,8 @@ int main(int argc, char **argv) {
ip_checksum_init(vm);
- u32 node_index = vlib_register_node(vm, &pnat_input_node);
+ u32 node_index =
+ vlib_register_node(vm, &pnat_input_node, "%s", pnat_input_node.name);
node = vlib_node_get_runtime(vm, node_index);
assert(node);
diff --git a/src/plugins/nat/pnat/tests/pnat_test_stubs.h b/src/plugins/nat/pnat/tests/pnat_test_stubs.h
index bfe1838ffa4..2dc59ac8586 100644
--- a/src/plugins/nat/pnat/tests/pnat_test_stubs.h
+++ b/src/plugins/nat/pnat/tests/pnat_test_stubs.h
@@ -19,12 +19,6 @@
void os_panic(void) {}
void os_exit(int code) {}
u32 ip4_fib_table_get_index_for_sw_if_index(u32 sw_if_index) { return 0; }
-#include <vpp/stats/stat_segment.h>
-clib_error_t *stat_segment_register_gauge(u8 *names,
- stat_segment_update_fn update_fn,
- u32 index) {
- return 0;
-};
#include <vnet/feature/feature.h>
vnet_feature_main_t feature_main;
void classify_get_trace_chain(void){};
diff --git a/src/plugins/nat/pnat/tests/test_genpackets.py b/src/plugins/nat/pnat/tests/test_genpackets.py
index 9d32d3e3656..40867317078 100755
--- a/src/plugins/nat/pnat/tests/test_genpackets.py
+++ b/src/plugins/nat/pnat/tests/test_genpackets.py
@@ -6,33 +6,35 @@ from importlib.machinery import SourceFileLoader
from scapy.all import *
from scapy.contrib.geneve import GENEVE
+
def hexstring(p):
s = bytes(p.__class__(p))
return ",".join("0x{:02x}".format(c) for c in s)
+
def output_test(filename, tests):
(name, ext) = os.path.basename(filename).split(".")
- print('/* DO NOT EDIT: automatically generated by test_genpackets.py */')
- print('/* clang-format off */')
- print('test_t tests_{}[] = {{'.format(name))
+ print("/* DO NOT EDIT: automatically generated by test_genpackets.py */")
+ print("/* clang-format off */")
+ print("test_t tests_{}[] = {{".format(name))
for t in tests:
- print(' {')
+ print(" {")
print(' .name = "{}",'.format(t[0]))
- print(' .nsend = {},'.format(len(t[1])))
- print(' .send = (char []){{{}}},'.format(hexstring(t[1])))
- print(' .nexpect = {},'.format(len(t[2])))
- print(' .expect = (char []){{{}}},'.format(hexstring(t[2])))
- print(' .expect_next_index = {}'.format(t[3]))
- print(' },')
- print('};')
- print('/* clang-format on */')
+ print(" .nsend = {},".format(len(t[1])))
+ print(" .send = (char []){{{}}},".format(hexstring(t[1])))
+ print(" .nexpect = {},".format(len(t[2])))
+ print(" .expect = (char []){{{}}},".format(hexstring(t[2])))
+ print(" .expect_next_index = {}".format(t[3]))
+ print(" },")
+ print("};")
+ print("/* clang-format on */")
+
# Read tests from file
for filename in sys.argv[1:]:
with open(filename) as f:
- content = f.read().replace('\n', '')
+ content = f.read().replace("\n", "")
tests = eval(content)
output_test(filename, tests)
-
diff --git a/src/plugins/npt66/CMakeLists.txt b/src/plugins/npt66/CMakeLists.txt
new file mode 100644
index 00000000000..aee784d96f0
--- /dev/null
+++ b/src/plugins/npt66/CMakeLists.txt
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright(c) 2023 Cisco Systems, Inc.
+
+add_vpp_plugin(npt66
+ SOURCES
+ npt66.c
+ npt66_api.c
+ npt66_cli.c
+ npt66_node.c
+
+
+ MULTIARCH_SOURCES
+ npt66_node.c
+
+ API_FILES
+ npt66.api
+)
diff --git a/src/plugins/npt66/FEATURE.yaml b/src/plugins/npt66/FEATURE.yaml
new file mode 100644
index 00000000000..8874ae22017
--- /dev/null
+++ b/src/plugins/npt66/FEATURE.yaml
@@ -0,0 +1,16 @@
+---
+name: NPTv6
+maintainer: Ole Troan <otroan@employees.org>
+features:
+ - NPTv6
+
+description: "This plugin implements NPTv6 as described in RFC6296.
+ It supports arbitrary prefix lengths. And performs an
+ algorithmic mapping between internal and external IPv6 prefixes.
+ The mapping is checksum neutral.
+ The implementation is currently limited to a single statically configured binding
+ per interface.
+ A typical IPv6 CE use case, the external prefix would be learnt via DHCP PD
+ "
+state: development
+properties: [API, CLI, MULTITHREAD]
diff --git a/src/plugins/npt66/npt66.api b/src/plugins/npt66/npt66.api
new file mode 100644
index 00000000000..63640ac2097
--- /dev/null
+++ b/src/plugins/npt66/npt66.api
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright(c) 2023 Cisco Systems, Inc.
+
+option version = "0.0.1";
+
+import "vnet/interface_types.api";
+import "vnet/ip/ip_types.api";
+
+autoendian autoreply define npt66_binding_add_del
+{
+ u32 client_index;
+ u32 context;
+
+ bool is_add;
+ vl_api_interface_index_t sw_if_index;
+ vl_api_ip6_prefix_t internal;
+ vl_api_ip6_prefix_t external;
+};
+
+counters npt66 {
+ rx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "packets translated from external to internal";
+ };
+ tx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "packets translated from internal to external";
+ };
+ translation {
+ severity error;
+ type counter64;
+ units "packets";
+ description "packet translation failed";
+ };
+
+}; \ No newline at end of file
diff --git a/src/plugins/npt66/npt66.c b/src/plugins/npt66/npt66.c
new file mode 100644
index 00000000000..277fce496fc
--- /dev/null
+++ b/src/plugins/npt66/npt66.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright(c) 2023 Cisco Systems, Inc.
+
+/*
+ * npt66.c: NPT66 plugin
+ * An implementation of Network Prefix Translation for IPv6-to-IPv6 (NPTv6) as
+ * specified in RFC6296.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <vlib/vlib.h>
+#include <vnet/feature/feature.h>
+#include <vppinfra/pool.h>
+#include "npt66.h"
+
+static int
+npt66_feature_enable_disable (u32 sw_if_index, bool is_add)
+{
+ if (vnet_feature_enable_disable ("ip6-unicast", "npt66-input", sw_if_index,
+ is_add, 0, 0) != 0)
+ return -1;
+ if (vnet_feature_enable_disable ("ip6-output", "npt66-output", sw_if_index,
+ is_add, 0, 0) != 0)
+ return -1;
+ return 0;
+}
+
+static void
+ipv6_prefix_zero (ip6_address_t *address, int prefix_len)
+{
+ int byte_index = prefix_len / 8;
+ int bit_offset = prefix_len % 8;
+ uint8_t mask = (1 << (8 - bit_offset)) - 1;
+ if (byte_index < 16)
+ {
+ address->as_u8[byte_index] &= mask;
+ for (int i = byte_index + 1; i < 16; i++)
+ {
+ address->as_u8[i] = 0;
+ }
+ }
+}
+
+int
+npt66_binding_add_del (u32 sw_if_index, ip6_address_t *internal,
+ int internal_plen, ip6_address_t *external,
+ int external_plen, bool is_add)
+{
+ npt66_main_t *nm = &npt66_main;
+ int rv = 0;
+
+ /* Currently limited to a single binding per interface */
+ npt66_binding_t *b = npt66_interface_by_sw_if_index (sw_if_index);
+
+ if (is_add)
+ {
+ bool configure_feature = false;
+ /* Ensure prefix lengths are less than or equal to a /64 */
+ if (internal_plen > 64 || external_plen > 64)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ /* Create a binding entry (or update existing) */
+ if (!b)
+ {
+ pool_get_zero (nm->bindings, b);
+ configure_feature = true;
+ }
+ b->internal = *internal;
+ b->internal_plen = internal_plen;
+ b->external = *external;
+ b->external_plen = external_plen;
+ b->sw_if_index = sw_if_index;
+
+ ipv6_prefix_zero (&b->internal, internal_plen);
+ ipv6_prefix_zero (&b->external, external_plen);
+ vec_validate_init_empty (nm->interface_by_sw_if_index, sw_if_index, ~0);
+ nm->interface_by_sw_if_index[sw_if_index] = b - nm->bindings;
+
+ uword delta = 0;
+ delta = ip_csum_add_even (delta, b->external.as_u64[0]);
+ delta = ip_csum_add_even (delta, b->external.as_u64[1]);
+ delta = ip_csum_sub_even (delta, b->internal.as_u64[0]);
+ delta = ip_csum_sub_even (delta, b->internal.as_u64[1]);
+ delta = ip_csum_fold (delta);
+ b->delta = delta;
+
+ if (configure_feature)
+ rv = npt66_feature_enable_disable (sw_if_index, is_add);
+ }
+ else
+ {
+ /* Delete a binding entry */
+ npt66_binding_t *b = npt66_interface_by_sw_if_index (sw_if_index);
+ if (!b)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ nm->interface_by_sw_if_index[sw_if_index] = ~0;
+ pool_put (nm->bindings, b);
+ rv = npt66_feature_enable_disable (sw_if_index, is_add);
+ }
+
+ return rv;
+}
+
+/*
+ * Do a lookup in the interface vector (interface_by_sw_if_index)
+ * and return pool entry.
+ */
+npt66_binding_t *
+npt66_interface_by_sw_if_index (u32 sw_if_index)
+{
+ npt66_main_t *nm = &npt66_main;
+
+ if (!nm->interface_by_sw_if_index ||
+ sw_if_index > (vec_len (nm->interface_by_sw_if_index) - 1))
+ return 0;
+ u32 index = nm->interface_by_sw_if_index[sw_if_index];
+ if (index == ~0)
+ return 0;
+ if (pool_is_free_index (nm->bindings, index))
+ return 0;
+ return pool_elt_at_index (nm->bindings, index);
+}
diff --git a/src/plugins/npt66/npt66.h b/src/plugins/npt66/npt66.h
new file mode 100644
index 00000000000..428dadb1672
--- /dev/null
+++ b/src/plugins/npt66/npt66.h
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright(c) 2023 Cisco Systems, Inc.
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip6_packet.h>
+
+typedef struct
+{
+ u32 sw_if_index;
+ ip6_address_t internal;
+ ip6_address_t external;
+ u8 internal_plen;
+ u8 external_plen;
+ uword delta;
+} npt66_binding_t;
+typedef struct
+{
+ u32 *interface_by_sw_if_index;
+ npt66_binding_t *bindings;
+ u16 msg_id_base;
+} npt66_main_t;
+
+extern npt66_main_t npt66_main;
+
+int npt66_binding_add_del (u32 sw_if_index, ip6_address_t *internal,
+ int internal_plen, ip6_address_t *external,
+ int external_plen, bool is_add);
+npt66_binding_t *npt66_interface_by_sw_if_index (u32 sw_if_index);
diff --git a/src/plugins/npt66/npt66_api.c b/src/plugins/npt66/npt66_api.c
new file mode 100644
index 00000000000..ab27cec616e
--- /dev/null
+++ b/src/plugins/npt66/npt66_api.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright(c) 2023 Cisco Systems, Inc.
+
+#include <stdbool.h>
+#include <npt66/npt66.h>
+#include <vnet/vnet.h>
+#include <npt66/npt66.api_enum.h>
+#include <npt66/npt66.api_types.h>
+#include <vlibmemory/api.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip_types_api.h>
+#include <vpp/app/version.h>
+
+npt66_main_t npt66_main;
+
+/*
+ * This file contains the API handlers for the pnat.api
+ */
+
+#define REPLY_MSG_ID_BASE npt66_main.msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static void
+vl_api_npt66_binding_add_del_t_handler (vl_api_npt66_binding_add_del_t *mp)
+{
+ vl_api_npt66_binding_add_del_reply_t *rmp;
+ int rv;
+ clib_warning ("Interface index: %d", mp->sw_if_index);
+ VALIDATE_SW_IF_INDEX_END (mp);
+
+ rv = npt66_binding_add_del (
+ mp->sw_if_index, (ip6_address_t *) &mp->internal.address, mp->internal.len,
+ (ip6_address_t *) &mp->external.address, mp->external.len, mp->is_add);
+
+bad_sw_if_index:
+ REPLY_MACRO_END (VL_API_NPT66_BINDING_ADD_DEL_REPLY);
+}
+
+/* API definitions */
+#include <vnet/format_fns.h>
+#include <npt66/npt66.api.c>
+
+/* Set up the API message handling tables */
+clib_error_t *
+npt66_plugin_api_hookup (vlib_main_t *vm)
+{
+ npt66_main_t *nm = &npt66_main;
+
+ nm->msg_id_base = setup_message_id_table ();
+ return 0;
+}
+
+/*
+ * Register the plugin and hook up the API
+ */
+#include <vnet/plugin/plugin.h>
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "NPTv6",
+ .default_disabled = 1,
+};
+
+clib_error_t *
+npt66_init (vlib_main_t *vm)
+{
+ npt66_main_t *nm = &npt66_main;
+ memset (nm, 0, sizeof (*nm));
+
+ return npt66_plugin_api_hookup (vm);
+}
+
+VLIB_INIT_FUNCTION (npt66_init);
diff --git a/src/plugins/npt66/npt66_cli.c b/src/plugins/npt66/npt66_cli.c
new file mode 100644
index 00000000000..b875eb924c6
--- /dev/null
+++ b/src/plugins/npt66/npt66_cli.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright(c) 2023 Cisco Systems, Inc.
+
+#include <stdbool.h>
+#include <vlib/vlib.h>
+#include <vnet/feature/feature.h>
+#include <vnet/ip/ip.h>
+#include <vppinfra/clib_error.h>
+#include "npt66.h"
+
+static clib_error_t *
+set_npt66_binding_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ bool internal_set = false, external_set = false;
+ bool add = true;
+ u32 sw_if_index = ~0;
+ ip6_address_t internal, external;
+ int internal_plen = 0, external_plen = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "internal %U/%d", unformat_ip6_address,
+ &internal, &internal_plen))
+ internal_set = true;
+ else if (unformat (line_input, "external %U/%d", unformat_ip6_address,
+ &external, &external_plen))
+ external_set = true;
+ else if (unformat (line_input, "interface %U",
+ unformat_vnet_sw_interface, vnet_get_main (),
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "del"))
+ {
+ add = false;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+ if (sw_if_index == ~0)
+ {
+ error = clib_error_return (0, "interface is required `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ if (!internal_set)
+ {
+ error = clib_error_return (0, "missing parameter: internal `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ if (!external_set)
+ {
+ error = clib_error_return (0, "missing parameter: external `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ int rv = npt66_binding_add_del (sw_if_index, &internal, internal_plen,
+ &external, external_plen, add);
+ if (rv)
+ {
+ error = clib_error_return (0, "Adding binding failed %d", rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (set_npt66_binding_command, static) = {
+ .path = "set npt66 binding",
+ .short_help = "set npt66 binding interface <name> internal <pfx> "
+ "external <pfx> [del]",
+ .function = set_npt66_binding_command_fn,
+};
+
+static u8 *
+format_npt66_binding (u8 *s, va_list *args)
+{
+ u32 index = va_arg (*args, u32);
+ npt66_binding_t *b = va_arg (*args, npt66_binding_t *);
+ s = format (s, "[%d] internal: %U/%d external: %U/%d", index,
+ format_ip6_address, &b->internal, b->internal_plen,
+ format_ip6_address, &b->external, b->external_plen);
+ return s;
+}
+
+static clib_error_t *
+show_npt66_bindings_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ npt66_main_t *nm = &npt66_main;
+ npt66_binding_t *b;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ pool_foreach (b, nm->bindings)
+ {
+ vlib_cli_output (vm, "%U", format_npt66_binding, b - nm->bindings, b);
+ }
+ return error;
+}
+
+VLIB_CLI_COMMAND (show_npt66_bindings_command, static) = {
+ .path = "show npt66 bindings",
+ .short_help = "show npt66 bindings",
+ .function = show_npt66_bindings_command_fn,
+};
diff --git a/src/plugins/npt66/npt66_node.c b/src/plugins/npt66/npt66_node.c
new file mode 100644
index 00000000000..f74f9143998
--- /dev/null
+++ b/src/plugins/npt66/npt66_node.c
@@ -0,0 +1,372 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright(c) 2023 Cisco Systems, Inc.
+
+// This file contains the implementation of the NPT66 node.
+// RFC6296: IPv6-to-IPv6 Network Prefix Translation (NPTv6)
+
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6.h>
+#include <vnet/ip/ip6_packet.h>
+
+#include <npt66/npt66.h>
+#include <npt66/npt66.api_enum.h>
+
+typedef struct
+{
+ u32 pool_index;
+ ip6_address_t internal;
+ ip6_address_t external;
+} npt66_trace_t;
+
+static inline u8 *
+format_npt66_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ npt66_trace_t *t = va_arg (*args, npt66_trace_t *);
+
+ if (t->pool_index != ~0)
+ s = format (s, "npt66: index %d internal: %U external: %U\n",
+ t->pool_index, format_ip6_address, &t->internal,
+ format_ip6_address, &t->external);
+ else
+ s = format (s, "npt66: index %d (binding not found)\n", t->pool_index);
+ return s;
+}
+
+/* NPT66 next-nodes */
+typedef enum
+{
+ NPT66_NEXT_DROP,
+ NPT66_N_NEXT
+} npt66_next_t;
+
+static ip6_address_t
+ip6_prefix_copy (ip6_address_t dest, ip6_address_t src, int plen)
+{
+ int bytes_to_copy = plen / 8;
+ int residual_bits = plen % 8;
+
+ // Copy full bytes
+ for (int i = 0; i < bytes_to_copy; i++)
+ {
+ dest.as_u8[i] = src.as_u8[i];
+ }
+
+ // Handle the residual bits, if any
+ if (residual_bits)
+ {
+ uint8_t mask = 0xFF << (8 - residual_bits);
+ dest.as_u8[bytes_to_copy] = (dest.as_u8[bytes_to_copy] & ~mask) |
+ (src.as_u8[bytes_to_copy] & mask);
+ }
+ return dest;
+}
+static int
+ip6_prefix_cmp (ip6_address_t a, ip6_address_t b, int plen)
+{
+ int bytes_to_compare = plen / 8;
+ int residual_bits = plen % 8;
+
+ // Compare full bytes
+ for (int i = 0; i < bytes_to_compare; i++)
+ {
+ if (a.as_u8[i] != b.as_u8[i])
+ {
+ return 0; // prefixes are not identical
+ }
+ }
+
+ // Compare the residual bits, if any
+ if (residual_bits)
+ {
+ uint8_t mask = 0xFF << (8 - residual_bits);
+ if ((a.as_u8[bytes_to_compare] & mask) !=
+ (b.as_u8[bytes_to_compare] & mask))
+ {
+ return 0; // prefixes are not identical
+ }
+ }
+ return 1; // prefixes are identical
+}
+
+static int
+npt66_adjust_checksum (int plen, bool add, ip_csum_t delta,
+ ip6_address_t *address)
+{
+ if (plen <= 48)
+ {
+ // TODO: Check for 0xFFFF
+ if (address->as_u16[3] == 0xffff)
+ return -1;
+ address->as_u16[3] = add ? ip_csum_add_even (address->as_u16[3], delta) :
+ ip_csum_sub_even (address->as_u16[3], delta);
+ }
+ else
+ {
+ /* For prefixes longer than 48 find a 16-bit word in the interface id */
+ for (int i = 4; i < 8; i++)
+ {
+ if (address->as_u16[i] == 0xffff)
+ continue;
+ address->as_u16[i] = add ?
+ ip_csum_add_even (address->as_u16[i], delta) :
+ ip_csum_sub_even (address->as_u16[i], delta);
+ break;
+ }
+ }
+ return 0;
+}
+
+static int
+npt66_translate (ip6_header_t *ip, npt66_binding_t *binding, int dir)
+{
+ int rv = 0;
+ if (dir == VLIB_TX)
+ {
+ if (!ip6_prefix_cmp (ip->src_address, binding->internal,
+ binding->internal_plen))
+ {
+ clib_warning (
+ "npt66_translate: src address is not internal (%U -> %U)",
+ format_ip6_address, &ip->src_address, format_ip6_address,
+ &ip->dst_address);
+ goto done;
+ }
+ ip->src_address = ip6_prefix_copy (ip->src_address, binding->external,
+ binding->external_plen);
+ /* Checksum neutrality */
+ rv = npt66_adjust_checksum (binding->internal_plen, false,
+ binding->delta, &ip->src_address);
+ }
+ else
+ {
+ if (!ip6_prefix_cmp (ip->dst_address, binding->external,
+ binding->external_plen))
+ {
+ clib_warning (
+ "npt66_translate: dst address is not external (%U -> %U)",
+ format_ip6_address, &ip->src_address, format_ip6_address,
+ &ip->dst_address);
+ goto done;
+ }
+ ip->dst_address = ip6_prefix_copy (ip->dst_address, binding->internal,
+ binding->internal_plen);
+ rv = npt66_adjust_checksum (binding->internal_plen, true, binding->delta,
+ &ip->dst_address);
+ }
+done:
+ return rv;
+}
+
+static int
+npt66_icmp6_translate (vlib_buffer_t *b, ip6_header_t *outer_ip,
+ icmp46_header_t *icmp, npt66_binding_t *binding,
+ int dir)
+{
+ ip6_header_t *ip = (ip6_header_t *) (icmp + 2);
+ int rv = 0;
+ vlib_main_t *vm = vlib_get_main ();
+
+ if (clib_net_to_host_u16 (outer_ip->payload_length) <
+ sizeof (icmp46_header_t) + 4 + sizeof (ip6_header_t))
+ {
+ clib_warning ("ICMP6 payload too short");
+ return -1;
+ }
+
+ // Validate checksums
+ int bogus_length;
+ u16 sum16;
+ sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, b, outer_ip, &bogus_length);
+ if (sum16 != 0 && sum16 != 0xffff)
+ {
+ clib_warning ("ICMP6 checksum failed");
+ return -1;
+ }
+ if (dir == VLIB_RX)
+ {
+ if (!ip6_prefix_cmp (ip->src_address, binding->external,
+ binding->external_plen))
+ {
+ clib_warning (
+ "npt66_icmp6_translate: src address is not internal (%U -> %U)",
+ format_ip6_address, &ip->src_address, format_ip6_address,
+ &ip->dst_address);
+ goto done;
+ }
+ ip->src_address = ip6_prefix_copy (ip->src_address, binding->internal,
+ binding->internal_plen);
+ /* Checksum neutrality */
+ rv = npt66_adjust_checksum (binding->internal_plen, true, binding->delta,
+ &ip->src_address);
+ }
+ else
+ {
+ if (!ip6_prefix_cmp (ip->dst_address, binding->external,
+ binding->external_plen))
+ {
+ clib_warning (
+ "npt66_icmp6_translate: dst address is not external (%U -> %U)",
+ format_ip6_address, &ip->src_address, format_ip6_address,
+ &ip->dst_address);
+ goto done;
+ }
+ ip->dst_address = ip6_prefix_copy (ip->dst_address, binding->internal,
+ binding->internal_plen);
+ rv = npt66_adjust_checksum (binding->internal_plen, false,
+ binding->delta, &ip->dst_address);
+ }
+done:
+
+ return rv;
+}
+
+/*
+ * Lookup the packet tuple in the flow cache, given the lookup mask.
+ * If a binding is found, rewrite the packet according to instructions,
+ * otherwise follow configured default action (forward, punt or drop)
+ */
+// TODO: Make use of SVR configurable
+static_always_inline uword
+npt66_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int dir)
+{
+ npt66_main_t *nm = &npt66_main;
+ u32 n_left_from, *from;
+ u16 nexts[VLIB_FRAME_SIZE] = { 0 }, *next = nexts;
+ u32 pool_indicies[VLIB_FRAME_SIZE], *pi = pool_indicies;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ ip6_header_t *ip;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ vlib_get_buffers (vm, from, b, n_left_from);
+ npt66_binding_t *binding;
+
+ /* Stage 1: build vector of flow hash (based on lookup mask) */
+ while (n_left_from > 0)
+ {
+ u32 sw_if_index = vnet_buffer (b[0])->sw_if_index[dir];
+ u32 iph_offset =
+ dir == VLIB_TX ? vnet_buffer (b[0])->ip.save_rewrite_length : 0;
+ ip = (ip6_header_t *) (vlib_buffer_get_current (b[0]) + iph_offset);
+ binding = npt66_interface_by_sw_if_index (sw_if_index);
+ ASSERT (binding);
+ *pi = binding - nm->bindings;
+
+ /* By default pass packet to next node in the feature chain */
+ vnet_feature_next_u16 (next, b[0]);
+ int rv;
+ icmp46_header_t *icmp = (icmp46_header_t *) (ip + 1);
+ if (ip->protocol == IP_PROTOCOL_ICMP6 && icmp->type < 128)
+ {
+ rv = npt66_icmp6_translate (b[0], ip, icmp, binding, dir);
+ if (rv < 0)
+ {
+ clib_warning ("ICMP6 npt66_translate failed");
+ *next = NPT66_NEXT_DROP;
+ goto next;
+ }
+ }
+ rv = npt66_translate (ip, binding, dir);
+
+ if (rv < 0)
+ {
+ vlib_node_increment_counter (vm, node->node_index,
+ NPT66_ERROR_TRANSLATION, 1);
+ *next = NPT66_NEXT_DROP;
+ goto next;
+ }
+ else if (dir == VLIB_TX)
+ vlib_node_increment_counter (vm, node->node_index, NPT66_ERROR_TX, 1);
+ else
+ vlib_node_increment_counter (vm, node->node_index, NPT66_ERROR_RX, 1);
+
+ next:
+ next += 1;
+ n_left_from -= 1;
+ b += 1;
+ pi += 1;
+ }
+
+ /* Packet trace */
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ u32 i;
+ b = bufs;
+ pi = pool_indicies;
+
+ for (i = 0; i < frame->n_vectors; i++)
+ {
+ if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ npt66_trace_t *t = vlib_add_trace (vm, node, b[0], sizeof (*t));
+ if (*pi != ~0)
+ {
+ if (!pool_is_free_index (nm->bindings, *pi))
+ {
+ npt66_binding_t *tr =
+ pool_elt_at_index (nm->bindings, *pi);
+ t->internal = tr->internal;
+ t->external = tr->external;
+ }
+ }
+ t->pool_index = *pi;
+
+ b += 1;
+ pi += 1;
+ }
+ else
+ break;
+ }
+ }
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (npt66_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return npt66_node_inline (vm, node, frame, VLIB_RX);
+}
+VLIB_NODE_FN (npt66_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return npt66_node_inline (vm, node, frame, VLIB_TX);
+}
+
+VLIB_REGISTER_NODE(npt66_input_node) = {
+ .name = "npt66-input",
+ .vector_size = sizeof(u32),
+ .format_trace = format_npt66_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = NPT66_N_ERROR,
+ .error_counters = npt66_error_counters,
+ .n_next_nodes = NPT66_N_NEXT,
+ .next_nodes =
+ {
+ [NPT66_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (npt66_output_node) = {
+ .name = "npt66-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_npt66_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = NPT66_N_ERROR,
+ .error_counters = npt66_error_counters,
+ .sibling_of = "npt66-input",
+};
+
+/* Hook up features */
+VNET_FEATURE_INIT (npt66_input, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "npt66-input",
+};
+VNET_FEATURE_INIT (npt66_output, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "npt66-output",
+};
diff --git a/src/plugins/nsh/FEATURE.yaml b/src/plugins/nsh/FEATURE.yaml
index a6ef3749952..986008e41a5 100644
--- a/src/plugins/nsh/FEATURE.yaml
+++ b/src/plugins/nsh/FEATURE.yaml
@@ -8,6 +8,7 @@ features:
- NSH Proxy
- NSH OAM
- NSH Metadata
+ - Requires vxlan_plugin.so to run
description: "NSH for SFC"
state: production
diff --git a/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export.c b/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export.c
index cb2bb2107c9..d2b59ababa9 100644
--- a/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export.c
+++ b/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export.c
@@ -136,14 +136,12 @@ set_nsh_md2_ioam_export_ipfix_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_nsh_md2_ioam_ipfix_command, static) =
{
.path = "set nsh-md2-ioam export ipfix",
.short_help = "set nsh-md2-ioam export ipfix collector <ip4-address> src <ip4-address>",
.function = set_nsh_md2_ioam_export_ipfix_command_fn,
};
-/* *INDENT-ON* */
#define IPFIX_NSH_MD2_IOAM_EXPORT_ID 274 // TODO: Move this to ioam/ioam_export.h
diff --git a/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export_thread.c b/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export_thread.c
index 54dc01bc021..40ff8c087dd 100644
--- a/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export_thread.c
+++ b/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export_thread.c
@@ -33,14 +33,12 @@ nsh_md2_ioam_export_process (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nsh_md2_ioam_export_process_node, static) =
{
.function = nsh_md2_ioam_export_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "nsh-md2-ioam-export-process",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_node.c b/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_node.c
index 06874f64a73..b3bf8c59b96 100644
--- a/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_node.c
+++ b/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_node.c
@@ -129,7 +129,6 @@ nsh_md2_ioam_export_node_fn (vlib_main_t * vm,
/*
* Node for iOAM export
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nsh_md2_ioam_export_node) =
{
.function = nsh_md2_ioam_export_node_fn,
@@ -144,7 +143,6 @@ VLIB_REGISTER_NODE (nsh_md2_ioam_export_node) =
.next_nodes =
{[EXPORT_NEXT_NSH_MD2_IOAM_INPUT] = "nsh-pop"},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nsh/nsh-md2-ioam/md2_ioam_transit.c b/src/plugins/nsh/nsh-md2-ioam/md2_ioam_transit.c
index 876e00324e3..6c372a5bd4e 100644
--- a/src/plugins/nsh/nsh-md2-ioam/md2_ioam_transit.c
+++ b/src/plugins/nsh/nsh-md2-ioam/md2_ioam_transit.c
@@ -53,13 +53,11 @@ typedef enum
} nsh_md2_ioam_encap_transit_next_t;
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (nsh_md2_ioam_encap_transit, static) =
{
.arc_name = "ip4-output",
.node_name = "nsh-md2-ioam-encap-transit",
};
-/* *INDENT-ON* */
static uword
@@ -162,7 +160,6 @@ nsh_md2_ioam_encap_transit (vlib_main_t * vm,
}
extern u8 *format_nsh_node_map_trace (u8 * s, va_list * args);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nsh_md2_ioam_encap_transit_node) = {
.function = nsh_md2_ioam_encap_transit,
.name = "nsh-md2-ioam-encap-transit",
@@ -181,7 +178,6 @@ VLIB_REGISTER_NODE (nsh_md2_ioam_encap_transit_node) = {
},
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam.c b/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam.c
index 63e6a98fe27..2aac6760546 100644
--- a/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam.c
+++ b/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam.c
@@ -449,7 +449,6 @@ static clib_error_t *nsh_md2_ioam_set_transit_rewrite_command_fn
return rv;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (nsh_md2_ioam_set_transit_rewrite_cmd, static) = {
.path = "set nsh-md2-ioam-transit",
.short_help = "set nsh-ioam-lisp-gpe-transit dst-ip <dst_ip> [outer-fib-index <outer_fib_index>] [disable]",
@@ -500,7 +499,7 @@ void
nsh_md2_ioam_interface_init (void)
{
nsh_md2_ioam_main_t *hm = &nsh_md2_ioam_main;
- hm->fib_entry_type = fib_node_register_new_type (&nsh_md2_ioam_vft);
+ hm->fib_entry_type = fib_node_register_new_type ("nsh", &nsh_md2_ioam_vft);
return;
}
diff --git a/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_api.c b/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_api.c
index 9ed835bd98f..36c221619e5 100644
--- a/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_api.c
+++ b/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_api.c
@@ -38,7 +38,6 @@
#undef vl_endianfun
/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define vl_printfun
#include <nsh/nsh.api.h>
#undef vl_printfun
diff --git a/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_trace.c b/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_trace.c
index aad3cffb1db..2c553b39e45 100644
--- a/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_trace.c
+++ b/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_trace.c
@@ -38,7 +38,6 @@ typedef union
} time_u64_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
u16 class;
u8 type;
@@ -48,7 +47,6 @@ typedef CLIB_PACKED(struct {
u8 reserve;
u32 elts[0]; /* Variable type. So keep it generic */
}) nsh_md2_ioam_trace_option_t;
-/* *INDENT-ON* */
#define foreach_nsh_md2_ioam_trace_stats \
@@ -333,13 +331,11 @@ nsh_md2_ioam_show_ioam_trace_cmd_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (nsh_md2_ioam_show_ioam_trace_cmd, static) = {
.path = "show ioam nsh-lisp-gpe trace",
.short_help = "iOAM trace statistics",
.function = nsh_md2_ioam_show_ioam_trace_cmd_fn,
};
-/* *INDENT-ON* */
int
@@ -376,12 +372,10 @@ nsh_md2_ioam_trace_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (nsh_md2_ioam_trace_init) =
{
.runs_after = VLIB_INITS ("nsh_init", "nsh_md2_ioam_init"),
};
-/* *INDENT-ON* */
int
nsh_md2_ioam_trace_profile_cleanup (void)
diff --git a/src/plugins/nsh/nsh.c b/src/plugins/nsh/nsh.c
index ea084e4a553..a2c24e27b26 100644
--- a/src/plugins/nsh/nsh.c
+++ b/src/plugins/nsh/nsh.c
@@ -18,8 +18,8 @@
#include <vnet/vnet.h>
#include <vnet/plugin/plugin.h>
#include <nsh/nsh.h>
-#include <vnet/gre/gre.h>
-#include <vnet/vxlan/vxlan.h>
+#include <gre/gre.h>
+#include <vxlan/vxlan.h>
#include <vnet/vxlan-gpe/vxlan_gpe.h>
#include <vnet/l2/l2_classify.h>
#include <vnet/adj/adj.h>
@@ -164,14 +164,12 @@ format_nsh_tunnel_with_length (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (nsh_hw_class) = {
.name = "NSH",
.format_header = format_nsh_tunnel_with_length,
.build_rewrite = default_build_rewrite,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
void
nsh_md2_set_next_ioam_export_override (uword next)
@@ -184,10 +182,11 @@ nsh_md2_set_next_ioam_export_override (uword next)
clib_error_t *
nsh_init (vlib_main_t * vm)
{
- vlib_node_t *node;
+ vlib_node_t *node, *gre4_input, *gre6_input;
nsh_main_t *nm = &nsh_main;
clib_error_t *error = 0;
uword next_node;
+ vlib_node_registration_t *vxlan4_input, *vxlan6_input;
/* Init the main structures from VPP */
nm->vlib_main = vm;
@@ -239,19 +238,32 @@ nsh_init (vlib_main_t * vm)
vlib_node_add_next (vm, vxlan6_gpe_input_node.index,
nsh_aware_vnf_proxy_node.index);
- vlib_node_add_next (vm, gre4_input_node.index, nm->nsh_input_node_index);
- vlib_node_add_next (vm, gre4_input_node.index, nm->nsh_proxy_node_index);
- vlib_node_add_next (vm, gre4_input_node.index,
- nsh_aware_vnf_proxy_node.index);
-
- vlib_node_add_next (vm, gre6_input_node.index, nm->nsh_input_node_index);
- vlib_node_add_next (vm, gre6_input_node.index, nm->nsh_proxy_node_index);
- vlib_node_add_next (vm, gre6_input_node.index,
- nsh_aware_vnf_proxy_node.index);
+ gre4_input = vlib_get_node_by_name (vm, (u8 *) "gre4-input");
+ gre6_input = vlib_get_node_by_name (vm, (u8 *) "gre6-input");
+ if (gre4_input == 0 || gre6_input == 0)
+ {
+ error = clib_error_return (0, "gre_plugin.so is not loaded");
+ return error;
+ }
+ vlib_node_add_next (vm, gre4_input->index, nm->nsh_input_node_index);
+ vlib_node_add_next (vm, gre4_input->index, nm->nsh_proxy_node_index);
+ vlib_node_add_next (vm, gre4_input->index, nsh_aware_vnf_proxy_node.index);
+ vlib_node_add_next (vm, gre6_input->index, nm->nsh_input_node_index);
+ vlib_node_add_next (vm, gre6_input->index, nm->nsh_proxy_node_index);
+ vlib_node_add_next (vm, gre6_input->index, nsh_aware_vnf_proxy_node.index);
/* Add NSH-Proxy support */
- vlib_node_add_next (vm, vxlan4_input_node.index, nm->nsh_proxy_node_index);
- vlib_node_add_next (vm, vxlan6_input_node.index, nm->nsh_proxy_node_index);
+ vxlan4_input =
+ vlib_get_plugin_symbol ("vxlan_plugin.so", "vxlan4_input_node");
+ vxlan6_input =
+ vlib_get_plugin_symbol ("vxlan_plugin.so", "vxlan6_input_node");
+ if (vxlan4_input == 0 || vxlan6_input == 0)
+ {
+ error = clib_error_return (0, "vxlan_plugin.so is not loaded");
+ return error;
+ }
+ vlib_node_add_next (vm, vxlan4_input->index, nm->nsh_proxy_node_index);
+ vlib_node_add_next (vm, vxlan6_input->index, nm->nsh_proxy_node_index);
/* Add NSH-Classifier support */
vlib_node_add_next (vm, ip4_classify_node.index,
@@ -270,12 +282,10 @@ nsh_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (nsh_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Network Service Header (NSH)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nsh/nsh_api.c b/src/plugins/nsh/nsh_api.c
index 1faaea36c50..c7a686c8b83 100644
--- a/src/plugins/nsh/nsh_api.c
+++ b/src/plugins/nsh/nsh_api.c
@@ -64,13 +64,11 @@ format_nsh_name (u8 * s, va_list * args)
return format (s, "nsh_tunnel%d", dev_instance);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (nsh_device_class, static) = {
.name = "NSH",
.format_device_name = format_nsh_name,
.admin_up_down_function = nsh_interface_admin_up_down,
};
-/* *INDENT-ON* */
static void send_nsh_entry_details
(nsh_entry_t * t, vl_api_registration_t * rp, u32 context)
@@ -169,7 +167,7 @@ vl_api_nsh_add_del_map_t_handler (vl_api_nsh_add_del_map_t * mp)
{
vl_api_nsh_add_del_map_reply_t *rmp;
int rv;
- nsh_add_del_map_args_t _a, *a = &_a;
+ nsh_add_del_map_args_t _a = { 0 }, *a = &_a;
u32 map_index = ~0;
a->is_add = mp->is_add;
@@ -346,7 +344,7 @@ nsh_add_del_map (nsh_add_del_map_args_t * a, u32 * map_indexp)
{
nsh_hw_if = nm->free_nsh_tunnel_hw_if_indices
[vec_len (nm->free_nsh_tunnel_hw_if_indices) - 1];
- _vec_len (nm->free_nsh_tunnel_hw_if_indices) -= 1;
+ vec_dec_len (nm->free_nsh_tunnel_hw_if_indices, 1);
hi = vnet_get_hw_interface (vnm, nsh_hw_if);
hi->dev_instance = map_index;
@@ -550,7 +548,7 @@ static void vl_api_nsh_add_del_entry_t_handler
{
vl_api_nsh_add_del_entry_reply_t *rmp;
int rv;
- nsh_add_del_entry_args_t _a, *a = &_a;
+ nsh_add_del_entry_args_t _a = { 0 }, *a = &_a;
u32 entry_index = ~0;
u8 tlvs_len = 0;
u8 *data = 0;
diff --git a/src/plugins/nsh/nsh_cli.c b/src/plugins/nsh/nsh_cli.c
index 7bcaf1c51ad..4288a9654c5 100644
--- a/src/plugins/nsh/nsh_cli.c
+++ b/src/plugins/nsh/nsh_cli.c
@@ -140,7 +140,6 @@ nsh_get_adj_by_sw_if_index (u32 sw_if_index)
{
adj_index_t ai = ~0;
- /* *INDENT-OFF* */
pool_foreach_index (ai, adj_pool)
{
if (sw_if_index == adj_get_sw_if_index(ai))
@@ -148,7 +147,6 @@ nsh_get_adj_by_sw_if_index (u32 sw_if_index)
return ai;
}
}
- /* *INDENT-ON* */
return ~0;
}
@@ -290,7 +288,6 @@ nsh_add_del_map_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_nsh_map_command, static) = {
.path = "create nsh map",
.short_help =
@@ -299,7 +296,6 @@ VLIB_CLI_COMMAND (create_nsh_map_command, static) = {
" encap-vxlan4-intf <nn> | encap-vxlan6-intf <nn>| encap-eth-intf <nn> | encap-none]\n",
.function = nsh_add_del_map_command_fn,
};
-/* *INDENT-ON* */
/**
* CLI command for showing the mapping between NSH entries
@@ -322,12 +318,10 @@ show_nsh_map_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_nsh_map_command, static) = {
.path = "show nsh map",
.function = show_nsh_map_command_fn,
};
-/* *INDENT-ON* */
/**
* CLI command for adding NSH entry
@@ -494,7 +488,6 @@ nsh_add_del_entry_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_nsh_entry_command, static) = {
.path = "create nsh entry",
.short_help =
@@ -502,7 +495,6 @@ VLIB_CLI_COMMAND (create_nsh_entry_command, static) = {
" [c1 <nn> c2 <nn> c3 <nn> c4 <nn>] [tlv-ioam-trace] [del]\n",
.function = nsh_add_del_entry_command_fn,
};
-/* *INDENT-ON* */
/* format from network order */
u8 *
@@ -621,12 +613,10 @@ show_nsh_entry_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_nsh_entry_command, static) = {
.path = "show nsh entry",
.function = show_nsh_entry_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nsh/nsh_node.c b/src/plugins/nsh/nsh_node.c
index a467d2c34c2..5c084985948 100644
--- a/src/plugins/nsh/nsh_node.c
+++ b/src/plugins/nsh/nsh_node.c
@@ -906,7 +906,6 @@ static char *nsh_node_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
/* register nsh-input node */
VLIB_REGISTER_NODE (nsh_input_node) = {
@@ -978,7 +977,6 @@ VLIB_REGISTER_NODE (nsh_aware_vnf_proxy_node) =
#undef _
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nsh/nsh_output.c b/src/plugins/nsh/nsh_output.c
index 3a3da02f764..9b7014a0ed0 100644
--- a/src/plugins/nsh/nsh_output.c
+++ b/src/plugins/nsh/nsh_output.c
@@ -366,7 +366,6 @@ VNET_FEATURE_INIT (nsh_interface_output, static) = {
};
/* Built-in ip4 tx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (nsh_eth_output, static) =
{
.arc_name = "nsh-eth-output",
@@ -379,7 +378,6 @@ VNET_FEATURE_INIT (nsh_eth_tx_drop, static) =
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
/**
* @brief Next index values from the NSH incomplete adj node
*/
diff --git a/src/plugins/nsh/nsh_pop.c b/src/plugins/nsh/nsh_pop.c
index 90b8a73b5fb..8de319e158b 100644
--- a/src/plugins/nsh/nsh_pop.c
+++ b/src/plugins/nsh/nsh_pop.c
@@ -19,7 +19,6 @@
#include <vnet/plugin/plugin.h>
#include <nsh/nsh.h>
#include <vnet/gre/packet.h>
-#include <vnet/vxlan/vxlan.h>
#include <vnet/vxlan-gpe/vxlan_gpe.h>
#include <vnet/l2/l2_classify.h>
diff --git a/src/plugins/nsim/node.c b/src/plugins/nsim/node.c
index 159db4a1cc7..a8ba909ab07 100644
--- a/src/plugins/nsim/node.c
+++ b/src/plugins/nsim/node.c
@@ -299,7 +299,6 @@ VLIB_NODE_FN (nsim_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
0 /* is_trace */ , 1 /* is_cross_connect */ );
}
-/* *INDENT-OFF* */
#ifndef CLIB_MARCH_VARIANT
VLIB_REGISTER_NODE (nsim_node) =
{
@@ -319,7 +318,6 @@ VLIB_REGISTER_NODE (nsim_node) =
},
};
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-ON* */
VLIB_NODE_FN (nsim_feature_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -333,7 +331,6 @@ VLIB_NODE_FN (nsim_feature_node) (vlib_main_t * vm,
0 /* is_trace */ , 0 /* is_cross_connect */ );
}
-/* *INDENT-OFF* */
#ifndef CLIB_MARCH_VARIANT
VLIB_REGISTER_NODE (nsim_feature_node) =
{
@@ -353,7 +350,6 @@ VLIB_REGISTER_NODE (nsim_feature_node) =
},
};
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nsim/nsim.c b/src/plugins/nsim/nsim.c
index fc8ffebd4a2..1c5b26bbaaa 100644
--- a/src/plugins/nsim/nsim.c
+++ b/src/plugins/nsim/nsim.c
@@ -127,20 +127,20 @@ nsim_output_feature_enable_disable (nsim_main_t * nsm, u32 sw_if_index,
}
static nsim_wheel_t *
-nsim_wheel_alloc (nsim_main_t * nsm, u32 wheel_slots)
+nsim_wheel_alloc (nsim_main_t *nsm)
{
u32 pagesize = getpagesize ();
nsim_wheel_t *wp;
- nsm->mmap_size = sizeof (nsim_wheel_t)
- + wheel_slots * sizeof (nsim_wheel_entry_t);
+ nsm->mmap_size = sizeof (nsim_wheel_t) +
+ nsm->wheel_slots_per_wrk * sizeof (nsim_wheel_entry_t);
nsm->mmap_size += pagesize - 1;
nsm->mmap_size &= ~(pagesize - 1);
wp = clib_mem_vm_alloc (nsm->mmap_size);
ASSERT (wp != 0);
- wp->wheel_size = wheel_slots;
+ wp->wheel_size = nsm->wheel_slots_per_wrk;
wp->cursize = 0;
wp->head = 0;
wp->tail = 0;
@@ -150,7 +150,7 @@ nsim_wheel_alloc (nsim_main_t * nsm, u32 wheel_slots)
}
static int
-nsim_configure (nsim_main_t * nsm, f64 bandwidth, f64 delay, f64 packet_size,
+nsim_configure (nsim_main_t *nsm, f64 bandwidth, f64 delay, u32 packet_size,
f64 drop_fraction, f64 reorder_fraction)
{
u64 total_buffer_size_in_bytes, per_worker_buffer_size, wheel_slots_per_wrk;
@@ -163,7 +163,7 @@ nsim_configure (nsim_main_t * nsm, f64 bandwidth, f64 delay, f64 packet_size,
if (delay == 0.0)
return VNET_API_ERROR_INVALID_VALUE_2;
- if (packet_size < 64.0 || packet_size > 9000.0)
+ if (packet_size < 64 || packet_size > 9000)
return VNET_API_ERROR_INVALID_VALUE_3;
if (reorder_fraction > 0.0 && delay == 0.0)
@@ -201,13 +201,14 @@ nsim_configure (nsim_main_t * nsm, f64 bandwidth, f64 delay, f64 packet_size,
/* Save these for the show command */
nsm->bandwidth = bandwidth;
nsm->packet_size = packet_size;
+ nsm->wheel_slots_per_wrk = wheel_slots_per_wrk;
vec_validate (nsm->wheel_by_thread, num_workers);
/* Initialize the output scheduler wheels */
i = (!nsm->poll_main_thread && num_workers) ? 1 : 0;
for (; i < num_workers + 1; i++)
- nsm->wheel_by_thread[i] = nsim_wheel_alloc (nsm, wheel_slots_per_wrk);
+ nsm->wheel_by_thread[i] = nsim_wheel_alloc (nsm);
vlib_worker_thread_barrier_sync (vm);
@@ -333,7 +334,6 @@ VLIB_CONFIG_FUNCTION (nsim_config, "nsim");
* @cliend
* @cliexcmd{nsim enable-disable <intfc> <intfc> [disable]}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (nsim_enable_disable_command, static) =
{
.path = "nsim cross-connect enable-disable",
@@ -342,7 +342,6 @@ VLIB_CLI_COMMAND (nsim_enable_disable_command, static) =
"<interface-name-2> [disable]",
.function = nsim_cross_connect_enable_disable_command_fn,
};
-/* *INDENT-ON* */
/* API message handler */
static void vl_api_nsim_cross_connect_enable_disable_t_handler
@@ -522,7 +521,6 @@ nsim_output_feature_enable_disable_command_fn (vlib_main_t * vm,
* @cliend
* @cliexcmd{nsim output-feature enable-disable <intfc> [disable]}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (nsim_output_feature_enable_disable_command, static) =
{
.path = "nsim output-feature enable-disable",
@@ -530,7 +528,6 @@ VLIB_CLI_COMMAND (nsim_output_feature_enable_disable_command, static) =
"nsim output-feature enable-disable <interface-name> [disable]",
.function = nsim_output_feature_enable_disable_command_fn,
};
-/* *INDENT-ON* */
#include <nsim/nsim.api.c>
static clib_error_t *
@@ -549,30 +546,24 @@ nsim_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (nsim_init);
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (nsim, static) =
{
.arc_name = "device-input",
.node_name = "nsim",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (nsim_feature, static) = {
.arc_name = "interface-output",
.node_name = "nsim-output-feature",
.runs_before = VNET_FEATURES ("interface-output-arc-end"),
};
-/* *INDENT-ON */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Network Delay Simulator",
};
-/* *INDENT-ON* */
static uword
unformat_delay (unformat_input_t * input, va_list * args)
@@ -602,19 +593,59 @@ unformat_bandwidth (unformat_input_t * input, va_list * args)
*result = tmp * 1e9;
else if (unformat (input, "%f gbyte", &tmp))
*result = tmp * 8e9;
+ else if (unformat (input, "%f gbps", &tmp))
+ *result = tmp * 1e9;
+ else if (unformat (input, "%f mbps", &tmp))
+ *result = tmp * 1e6;
+ else if (unformat (input, "%f kbps", &tmp))
+ *result = tmp * 1e3;
+ else if (unformat (input, "%f bps", &tmp))
+ *result = tmp;
else
return 0;
return 1;
}
static u8 *
+format_delay (u8 *s, va_list *args)
+{
+ f64 delay = va_arg (*args, f64);
+
+ if (delay < 1e-3)
+ s = format (s, "%.1f us", delay * 1e6);
+ else if (delay < 1)
+ s = format (s, "%.1f ms", delay * 1e3);
+ else
+ s = format (s, "%f sec", delay);
+
+ return s;
+}
+
+static u8 *
+format_bandwidth (u8 *s, va_list *args)
+{
+ f64 bandwidth = va_arg (*args, f64);
+
+ if (bandwidth >= 1e9)
+ s = format (s, "%.1f gbps", bandwidth / 1e9);
+ else if (bandwidth >= 1e6)
+ s = format (s, "%.1f mbps", bandwidth / 1e6);
+ else if (bandwidth >= 1e3)
+ s = format (s, "%.1f kbps", bandwidth / 1e3);
+ else
+ s = format (s, "%f bps", bandwidth);
+
+ return s;
+}
+
+static u8 *
format_nsim_config (u8 * s, va_list * args)
{
int verbose = va_arg (*args, int);
nsim_main_t *nsm = &nsim_main;
s = format (s, "configuration\n");
- s = format (s, " delay (ms): %.2f\n", nsm->delay * 1e3);
+ s = format (s, " delay: %U\n", format_delay, nsm->delay);
if (nsm->drop_fraction)
s = format (s, " drop fraction: %.5f\n", nsm->drop_fraction);
else
@@ -624,7 +655,8 @@ format_nsim_config (u8 * s, va_list * args)
else
s = format (s, " reorder fraction: 0\n");
s = format (s, " packet size: %u\n", nsm->packet_size);
- s = format (s, " throughput (Gbps): %.2f\n", nsm->bandwidth / 1e9);
+ s = format (s, " worker wheel size: %u\n", nsm->wheel_slots_per_wrk);
+ s = format (s, " throughput: %U\n", format_bandwidth, nsm->bandwidth);
if (verbose)
{
@@ -666,9 +698,8 @@ static clib_error_t *
set_nsim_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
- f64 drop_fraction = 0.0, reorder_fraction = 0.0;
- f64 delay, bandwidth, packet_size = 1500.0;
- u32 packets_per_drop, packets_per_reorder;
+ f64 drop_fraction = 0.0, reorder_fraction = 0.0, delay, bandwidth;
+ u32 packets_per_drop, packets_per_reorder, packet_size = 1500;
nsim_main_t *nsm = &nsim_main;
int rv;
@@ -679,7 +710,7 @@ set_nsim_command_fn (vlib_main_t * vm,
else if (unformat (input, "bandwidth %U", unformat_bandwidth,
&bandwidth))
;
- else if (unformat (input, "packet-size %f", &packet_size))
+ else if (unformat (input, "packet-size %u", &packet_size))
;
else if (unformat (input, "packets-per-drop %d", &packets_per_drop))
{
@@ -756,7 +787,6 @@ set_nsim_command_fn (vlib_main_t * vm,
* @cliend
* @cliexcmd{set nsim delay <nn> bandwidth <bb> packet-size <nn>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_nsim_command, static) =
{
.path = "set nsim",
@@ -764,7 +794,6 @@ VLIB_CLI_COMMAND (set_nsim_command, static) =
" [packets-per-drop <nn>][drop-fraction <f64: 0.0 - 1.0>]",
.function = set_nsim_command_fn,
};
-/* *INDENT-ON*/
static clib_error_t *
@@ -801,14 +830,12 @@ show_nsim_command_fn (vlib_main_t * vm,
* @cliexcmd{show nsim}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_nsim_command, static) =
{
.path = "show nsim",
.short_help = "Display network delay simulator configuration",
.function = show_nsim_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nsim/nsim.h b/src/plugins/nsim/nsim.h
index d7d32b9d234..b35a1c685de 100644
--- a/src/plugins/nsim/nsim.h
+++ b/src/plugins/nsim/nsim.h
@@ -101,9 +101,10 @@ typedef struct
/* Config parameters */
f64 delay;
f64 bandwidth;
- f64 packet_size;
f64 drop_fraction;
f64 reorder_fraction;
+ u32 packet_size;
+ u32 wheel_slots_per_wrk;
u32 poll_main_thread;
u64 mmap_size;
diff --git a/src/plugins/nsim/nsim_input.c b/src/plugins/nsim/nsim_input.c
index 65a7ae11251..889d9c9841f 100644
--- a/src/plugins/nsim/nsim_input.c
+++ b/src/plugins/nsim/nsim_input.c
@@ -126,7 +126,6 @@ VLIB_NODE_FN (nsim_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
}
-/* *INDENT-OFF* */
#ifndef CLIB_MARCH_VARIANT
VLIB_REGISTER_NODE (nsim_input_node) =
{
@@ -142,7 +141,6 @@ VLIB_REGISTER_NODE (nsim_input_node) =
.error_strings = nsim_tx_error_strings,
};
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/oddbuf/CMakeLists.txt b/src/plugins/oddbuf/CMakeLists.txt
index 9720e927baf..5ce2b3115dc 100644
--- a/src/plugins/oddbuf/CMakeLists.txt
+++ b/src/plugins/oddbuf/CMakeLists.txt
@@ -26,4 +26,7 @@ add_vpp_plugin(oddbuf
API_TEST_SOURCES
oddbuf_test.c
+
+ COMPONENT
+ vpp-plugin-devtools
)
diff --git a/src/plugins/oddbuf/node.c b/src/plugins/oddbuf/node.c
index a1de52a7e8c..6652baaaec5 100644
--- a/src/plugins/oddbuf/node.c
+++ b/src/plugins/oddbuf/node.c
@@ -183,7 +183,6 @@ VLIB_NODE_FN (oddbuf_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
0 /* is_trace */ );
}
-/* *INDENT-OFF* */
#ifndef CLIB_MARCH_VARIANT
VLIB_REGISTER_NODE (oddbuf_node) =
{
@@ -203,7 +202,6 @@ VLIB_REGISTER_NODE (oddbuf_node) =
},
};
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/oddbuf/oddbuf.c b/src/plugins/oddbuf/oddbuf.c
index 8277836aeb8..44cdd11eaae 100644
--- a/src/plugins/oddbuf/oddbuf.c
+++ b/src/plugins/oddbuf/oddbuf.c
@@ -106,7 +106,6 @@ oddbuf_enable_disable_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (oddbuf_enable_disable_command, static) =
{
.path = "oddbuf enable-disable",
@@ -114,7 +113,6 @@ VLIB_CLI_COMMAND (oddbuf_enable_disable_command, static) =
"oddbuf enable-disable <interface-name> [disable]",
.function = oddbuf_enable_disable_command_fn,
};
-/* *INDENT-ON* */
/* API message handler */
static void vl_api_oddbuf_enable_disable_t_handler
@@ -157,23 +155,19 @@ oddbuf_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (oddbuf_init);
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (oddbuf, static) =
{
.arc_name = "device-input",
.node_name = "oddbuf",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Awkward chained buffer geometry generator",
.default_disabled = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -205,7 +199,6 @@ oddbuf_config_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (oddbuf_config_command, static) =
{
.path = "oddbuf configure",
@@ -213,7 +206,6 @@ VLIB_CLI_COMMAND (oddbuf_config_command, static) =
"oddbuf configure n_to_copy <nn> offset <nn> first_offset <nn>",
.function = oddbuf_config_command_fn,
};
-/* *INDENT-ON* */
diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt
index af0bd3c3f51..d820eb2f37e 100644
--- a/src/plugins/perfmon/CMakeLists.txt
+++ b/src/plugins/perfmon/CMakeLists.txt
@@ -11,24 +11,57 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
+if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*|aarch64.*")
return()
endif()
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
+ list(APPEND ARCH_PMU_SOURCES
+ intel/dispatch_wrapper.c
+ intel/core.c
+ intel/uncore.c
+ intel/bundle/backend_bound_core.c
+ intel/bundle/backend_bound_mem.c
+ intel/bundle/branch_mispred.c
+ intel/bundle/cache_hit_miss.c
+ intel/bundle/frontend_bound_bw_src.c
+ intel/bundle/frontend_bound_bw_uops.c
+ intel/bundle/frontend_bound_lat.c
+ intel/bundle/iio_bw.c
+ intel/bundle/inst_and_clock.c
+ intel/bundle/load_blocks.c
+ intel/bundle/mem_bw.c
+ intel/bundle/power_license.c
+ intel/bundle/topdown_icelake.c
+ intel/bundle/topdown_metrics.c
+ intel/bundle/topdown_tremont.c
+ )
+endif()
+
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64.*")
+ list(APPEND ARCH_PMU_SOURCES
+ arm/dispatch_wrapper.c
+ arm/events.c
+ arm/bundle/inst_clock.c
+ arm/bundle/cache_data.c
+ arm/bundle/cache_inst.c
+ arm/bundle/cache_data_tlb.c
+ arm/bundle/cache_inst_tlb.c
+ arm/bundle/mem_access.c
+ arm/bundle/branch_pred.c
+ arm/bundle/stall.c
+ )
+endif()
+
add_vpp_plugin(perfmon
SOURCES
cli.c
- dispatch_wrapper.c
linux.c
perfmon.c
- table.c
- intel/core.c
- intel/uncore.c
- intel/bundle/inst_and_clock.c
- intel/bundle/load_blocks.c
- intel/bundle/mem_bw.c
- intel/bundle/cache_hit_miss.c
- intel/bundle/branch_mispred.c
- intel/bundle/power_license.c
- intel/bundle/topdown_metrics.c
+ ${ARCH_PMU_SOURCES}
+
+ COMPONENT
+ vpp-plugin-devtools
+
+ SUPPORTED_OS_LIST Linux
)
diff --git a/src/plugins/perfmon/arm/bundle/branch_pred.c b/src/plugins/perfmon/arm/bundle/branch_pred.c
new file mode 100644
index 00000000000..7ab656f2758
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/branch_pred.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+/* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+enum
+{
+ BR_RETIRED,
+ BR_MIS_PRED_RETIRED,
+ BR_PRED,
+ BR_MIS_PRED
+};
+
+static u8 *
+format_arm_branch_pred (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%.2f", (f64) ns->value[BR_RETIRED] / ns->n_calls);
+ break;
+
+ case 1:
+ s = format (s, "%.2f", (f64) ns->value[BR_RETIRED] / ns->n_packets);
+ break;
+
+ case 2:
+ s =
+ format (s, "%.2f", (f64) ns->value[BR_MIS_PRED_RETIRED] / ns->n_calls);
+ break;
+
+ case 3:
+ s = format (s, "%.2f",
+ (f64) ns->value[BR_MIS_PRED_RETIRED] / ns->n_packets);
+ break;
+
+ case 4:
+ s =
+ format (s, "%.2f%%",
+ (ns->value[BR_RETIRED] ? (f64) ns->value[BR_MIS_PRED_RETIRED] /
+ ns->value[BR_RETIRED] * 100 :
+ 0));
+ break;
+
+ case 5:
+ s = format (s, "%.2f", (f64) ns->value[BR_PRED] / ns->n_calls);
+ break;
+
+ case 6:
+ s = format (s, "%.2f", (f64) ns->value[BR_PRED] / ns->n_packets);
+ break;
+
+ case 7:
+ s = format (s, "%.2f", (f64) ns->value[BR_MIS_PRED] / ns->n_calls);
+ break;
+
+ case 8:
+ s = format (s, "%.2f", (f64) ns->value[BR_MIS_PRED] / ns->n_packets);
+ break;
+
+ case 9:
+ s = format (s, "%.2f%%",
+ (ns->value[BR_PRED] ?
+ (f64) ns->value[BR_MIS_PRED] / ns->value[BR_PRED] * 100 :
+ 0));
+ break;
+
+ case 10:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_branch_pred) = {
+ .name = "branch-pred",
+ .description = "Branch (mis)predictions per call/packet",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_BR_RETIRED,
+ .events[1] = ARMV8_PMUV3_BR_MIS_PRED_RETIRED,
+ .events[2] = ARMV8_PMUV3_BR_PRED,
+ .events[3] = ARMV8_PMUV3_BR_MIS_PRED,
+ .n_events = 4,
+ .n_columns = 11,
+ .format_fn = format_arm_branch_pred,
+ .column_headers = PERFMON_STRINGS ("[1.1]", "[1.2]", "[1.3]", "[1.4]", "\%",
+ "[2.1]", "[2.2]", "[2.3]", "[2.4]", "\%",
+ "pkts"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events = PERFMON_COLUMN_EVENTS (
+ SET_BIT (BR_RETIRED), SET_BIT (BR_RETIRED), SET_BIT (BR_MIS_PRED_RETIRED),
+ SET_BIT (BR_MIS_PRED_RETIRED),
+ SET_BIT (BR_RETIRED) | SET_BIT (BR_MIS_PRED_RETIRED), SET_BIT (BR_PRED),
+ SET_BIT (BR_PRED), SET_BIT (BR_MIS_PRED), SET_BIT (BR_MIS_PRED),
+ SET_BIT (BR_PRED) | SET_BIT (BR_MIS_PRED), 0),
+ .footer =
+ "An instruction that has been executed and retired is defined to\n"
+ "be architecturally executed. When a PE can perform speculative\n"
+ "execution, an instruction is not architecturally executed if the\n"
+ "PE discards the results of the speculative execution.\n\n"
+ "Per node statistics:\n"
+ "[1] Branch instruction architecturally executed\n"
+ " [1.1] Branches/call\n"
+ " [1.2] Branches/pkt\n"
+ " [1.3] Mispredicted/call \n"
+ " [1.4] Mispredicted/pkt\n"
+ " [\%] Percentage of branches mispredicted\n"
+ "[2] Predictable branch speculatively executed\n"
+ " [2.1] Branches/call\n"
+ " [2.2] Branches/pkt\n"
+ " [2.3] Mispredicted/call \n"
+ " [2.4] Mispredicted/pkt\n"
+ " [\%] Percentage of branches mispredicted\n\n"
+ "- See Armv8-A Architecture Reference Manual, D7.10 PMU events and"
+ " event numbers for full description.\n"
+};
diff --git a/src/plugins/perfmon/arm/bundle/cache_data.c b/src/plugins/perfmon/arm/bundle/cache_data.c
new file mode 100644
index 00000000000..d7587700a8c
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/cache_data.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+/* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+enum
+{
+ L1D_CACHE,
+ L1D_CACHE_REFILL,
+ L2D_CACHE,
+ L2D_CACHE_REFILL,
+ L3D_CACHE,
+ L3D_CACHE_REFILL
+};
+
+static u8 *
+format_arm_cache_data (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%.2f", (f64) ns->value[L1D_CACHE] / ns->n_packets);
+ break;
+
+ case 1:
+ s =
+ format (s, "%.2f", (f64) ns->value[L1D_CACHE_REFILL] / ns->n_packets);
+ break;
+
+ case 2:
+ s = format (s, "%.2f%%",
+ (ns->value[L1D_CACHE] ? (f64) ns->value[L1D_CACHE_REFILL] /
+ ns->value[L1D_CACHE] * 100 :
+ 0));
+ break;
+
+ case 3:
+ s = format (s, "%.2f", (f64) ns->value[L2D_CACHE] / ns->n_packets);
+ break;
+
+ case 4:
+ s =
+ format (s, "%.2f", (f64) ns->value[L2D_CACHE_REFILL] / ns->n_packets);
+ break;
+
+ case 5:
+ s = format (s, "%.2f%%",
+ (ns->value[L2D_CACHE] ? (f64) ns->value[L2D_CACHE_REFILL] /
+ ns->value[L2D_CACHE] * 100 :
+ 0));
+ break;
+
+ case 6:
+ s = format (s, "%.2f", (f64) ns->value[L3D_CACHE] / ns->n_packets);
+ break;
+
+ case 7:
+ s =
+ format (s, "%.2f", (f64) ns->value[L3D_CACHE_REFILL] / ns->n_packets);
+ break;
+
+ case 8:
+ s = format (s, "%.2f%%",
+ (ns->value[L3D_CACHE] ? (f64) ns->value[L3D_CACHE_REFILL] /
+ ns->value[L3D_CACHE] * 100 :
+ 0));
+ break;
+
+ case 9:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_cache_data) = {
+ .name = "cache-data",
+ .description = "L1D/L2D/L3D data cache accesses and refills per packet",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_L1D_CACHE,
+ .events[1] = ARMV8_PMUV3_L1D_CACHE_REFILL,
+ .events[2] = ARMV8_PMUV3_L2D_CACHE,
+ .events[3] = ARMV8_PMUV3_L2D_CACHE_REFILL,
+ .events[4] = ARMV8_PMUV3_L3D_CACHE,
+ .events[5] = ARMV8_PMUV3_L3D_CACHE_REFILL,
+ .n_events = 6,
+ .n_columns = 10,
+ .format_fn = format_arm_cache_data,
+ .column_headers = PERFMON_STRINGS ("L1D: access", "refill", "\%*",
+ "L2D: access", "refill", "\%*",
+ "L3D: access", "refill", "\%*", "pkts"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events = PERFMON_COLUMN_EVENTS (
+ SET_BIT (L1D_CACHE), SET_BIT (L1D_CACHE_REFILL),
+ SET_BIT (L1D_CACHE) | SET_BIT (L1D_CACHE_REFILL), SET_BIT (L2D_CACHE),
+ SET_BIT (L2D_CACHE_REFILL),
+ SET_BIT (L2D_CACHE) | SET_BIT (L2D_CACHE_REFILL), SET_BIT (L3D_CACHE),
+ SET_BIT (L3D_CACHE_REFILL),
+ SET_BIT (L3D_CACHE) | SET_BIT (L3D_CACHE_REFILL), 0),
+ .footer = "all stats are per packet except refill rate (\%)\n"
+ "*\% percentage shown is total refills/accesses\n\n"
+ "- See Armv8-A Architecture Reference Manual, D7.10 PMU events and"
+ " event numbers for full description.\n"
+};
diff --git a/src/plugins/perfmon/arm/bundle/cache_data_tlb.c b/src/plugins/perfmon/arm/bundle/cache_data_tlb.c
new file mode 100644
index 00000000000..9adb2bc18b2
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/cache_data_tlb.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+/* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+enum
+{
+ L1D_TLB,
+ L1D_TLB_REFILL,
+ L2D_TLB,
+ L2D_TLB_REFILL
+};
+
+static u8 *
+format_arm_cache_data_tlb (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%.2f", (f64) ns->value[L1D_TLB] / ns->n_packets);
+ break;
+
+ case 1:
+ s = format (s, "%.2f", (f64) ns->value[L1D_TLB_REFILL] / ns->n_packets);
+ break;
+
+ case 2:
+ s = format (s, "%.2f%%",
+ (ns->value[L1D_TLB] ? (f64) ns->value[L1D_TLB_REFILL] /
+ ns->value[L1D_TLB] * 100 :
+ 0));
+ break;
+
+ case 3:
+ s = format (s, "%.2f", (f64) ns->value[L2D_TLB] / ns->n_packets);
+ break;
+
+ case 4:
+ s = format (s, "%.2f", (f64) ns->value[L2D_TLB_REFILL] / ns->n_packets);
+ break;
+
+ case 5:
+ s = format (s, "%.2f%%",
+ (ns->value[L2D_TLB] ? (f64) ns->value[L2D_TLB_REFILL] /
+ ns->value[L2D_TLB] * 100 :
+ 0));
+ break;
+
+ case 6:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_cache_data_tlb) = {
+ .name = "cache-data-tlb",
+ .description = "L1/L2 data TLB cache accesses, refills, walks per packet",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_L1D_TLB,
+ .events[1] = ARMV8_PMUV3_L1D_TLB_REFILL,
+ .events[2] = ARMV8_PMUV3_L2D_TLB,
+ .events[3] = ARMV8_PMUV3_L2D_TLB_REFILL,
+ .n_events = 4,
+ .n_columns = 7,
+ .format_fn = format_arm_cache_data_tlb,
+ .column_headers = PERFMON_STRINGS ("L1D-TLB: access", "refill", "\%*",
+ "L2D-TLB: access", "refill", "\%*",
+ "pkts"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events = PERFMON_COLUMN_EVENTS (
+ SET_BIT (L1D_TLB), SET_BIT (L1D_TLB_REFILL),
+ SET_BIT (L1D_TLB) | SET_BIT (L1D_TLB_REFILL), SET_BIT (L2D_TLB),
+ SET_BIT (L2D_TLB_REFILL), SET_BIT (L2D_TLB) | SET_BIT (L2D_TLB_REFILL), 0),
+ .footer =
+ "all stats are per packet except refill rates (\%)\n"
+ "*\% percentage shown is total refills/accesses\n\n"
+ "TLB: Memory-read operation or Memory-write operation that"
+ " causes a TLB access to at least the Level 1/2 data or unified TLB.\n"
+ "- See Armv8-A Architecture Reference Manual, D7.10 PMU events and"
+ " event numbers for full description.\n"
+};
diff --git a/src/plugins/perfmon/arm/bundle/cache_inst.c b/src/plugins/perfmon/arm/bundle/cache_inst.c
new file mode 100644
index 00000000000..b9d49c09e12
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/cache_inst.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+/* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+enum
+{
+ L1I_CACHE,
+ L1I_CACHE_REFILL,
+ L2I_CACHE,
+ L2I_CACHE_REFILL
+};
+
+static u8 *
+format_arm_cache_inst (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%.2f", (f64) ns->value[L1I_CACHE] / ns->n_packets);
+ break;
+
+ case 1:
+ s =
+ format (s, "%.2f", (f64) ns->value[L1I_CACHE_REFILL] / ns->n_packets);
+ break;
+
+ case 2:
+ s = format (s, "%.2f%%",
+ (ns->value[L1I_CACHE] ? (f64) ns->value[L1I_CACHE_REFILL] /
+ ns->value[L1I_CACHE] * 100 :
+ 0));
+ break;
+
+ case 3:
+ s = format (s, "%.2f", (f64) ns->value[L2I_CACHE] / ns->n_packets);
+ break;
+
+ case 4:
+ s =
+ format (s, "%.2f", (f64) ns->value[L2I_CACHE_REFILL] / ns->n_packets);
+ break;
+
+ case 5:
+ s = format (s, "%.2f%%",
+ (ns->value[L2I_CACHE] ? (f64) ns->value[L2I_CACHE_REFILL] /
+ ns->value[L2I_CACHE] * 100 :
+ 0));
+ break;
+
+ case 6:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_cache_inst) = {
+ .name = "cache-inst",
+ .description = "L1I/L2I instruction cache accesses and refills per packet",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_L1I_CACHE,
+ .events[1] = ARMV8_PMUV3_L1I_CACHE_REFILL,
+ .events[2] = ARMV8_PMUV3_L2I_CACHE,
+ .events[3] = ARMV8_PMUV3_L2I_CACHE_REFILL,
+ .n_events = 4,
+ .n_columns = 7,
+ .format_fn = format_arm_cache_inst,
+ .column_headers = PERFMON_STRINGS ("L1I: access", "refill", "\%*",
+ "L2I: access", "refill", "\%*", "pkts"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events = PERFMON_COLUMN_EVENTS (
+ SET_BIT (L1I_CACHE), SET_BIT (L1I_CACHE_REFILL),
+ SET_BIT (L1I_CACHE) | SET_BIT (L1I_CACHE_REFILL), SET_BIT (L2I_CACHE),
+ SET_BIT (L2I_CACHE_REFILL),
+ SET_BIT (L2I_CACHE) | SET_BIT (L2I_CACHE_REFILL), 0),
+ .footer = "all stats are per packet except refill rate (\%)\n"
+ "*\% percentage shown is total refills/accesses\n"
+};
diff --git a/src/plugins/perfmon/arm/bundle/cache_inst_tlb.c b/src/plugins/perfmon/arm/bundle/cache_inst_tlb.c
new file mode 100644
index 00000000000..7366be2fc16
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/cache_inst_tlb.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+enum /* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+{
+ L1I_TLB,
+ L1I_TLB_REFILL,
+ L2I_TLB,
+ L2I_TLB_REFILL,
+};
+
+static u8 *
+format_arm_cache_inst_tlb (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%.2f", (f64) ns->value[L1I_TLB] / ns->n_packets);
+ break;
+
+ case 1:
+ s = format (s, "%.2f", (f64) ns->value[L1I_TLB_REFILL] / ns->n_packets);
+ break;
+
+ case 2:
+ s = format (s, "%.2f%%",
+ (ns->value[L1I_TLB] ? (f64) ns->value[L1I_TLB_REFILL] /
+ ns->value[L1I_TLB] * 100 :
+ 0));
+ break;
+
+ case 3:
+ s = format (s, "%.2f", (f64) ns->value[L2I_TLB] / ns->n_packets);
+ break;
+
+ case 4:
+ s = format (s, "%.2f", (f64) ns->value[L2I_TLB_REFILL] / ns->n_packets);
+ break;
+
+ case 5:
+ s = format (s, "%.2f%%",
+ (ns->value[L2I_TLB] ? (f64) ns->value[L2I_TLB_REFILL] /
+ ns->value[L2I_TLB] * 100 :
+ 0));
+ break;
+
+ case 6:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_cache_inst_tlb) = {
+ .name = "cache-inst-tlb",
+ .description =
+ "L1/L2 instruction TLB cache accesses, refills, walks per packet",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_L1I_TLB,
+ .events[1] = ARMV8_PMUV3_L1I_TLB_REFILL,
+ .events[2] = ARMV8_PMUV3_L2I_TLB,
+ .events[3] = ARMV8_PMUV3_L2I_TLB_REFILL,
+ .n_events = 4,
+ .n_columns = 7,
+ .format_fn = format_arm_cache_inst_tlb,
+ .column_headers = PERFMON_STRINGS ("L1I-TLB: access", "refill", "\%*",
+ "L2I-TLB: access", "refill", "\%*",
+ "pkts"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events = PERFMON_COLUMN_EVENTS (
+ SET_BIT (L1I_TLB), SET_BIT (L1I_TLB_REFILL),
+ SET_BIT (L1I_TLB) | SET_BIT (L1I_TLB_REFILL), SET_BIT (L2I_TLB),
+ SET_BIT (L2I_TLB_REFILL), SET_BIT (L2I_TLB) | SET_BIT (L2I_TLB_REFILL), 0),
+ .footer = "all stats are per packet except refill rate (\%)\n"
+ "*\% percentage shown is total refills/accesses\n\n"
+ "TLB: Instruction memory access that causes a TLB access to at "
+ "least the Level 1/2 instruction TLB.\n"
+ "- See Armv8-A Architecture Reference Manual, D7.10 PMU events and"
+ " event numbers for full description.\n"
+};
diff --git a/src/plugins/perfmon/arm/bundle/inst_clock.c b/src/plugins/perfmon/arm/bundle/inst_clock.c
new file mode 100644
index 00000000000..272e524cffc
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/inst_clock.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+/* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+enum
+{
+ CPU_CYCLES,
+ INST_RETIRED
+};
+
+static u8 *
+format_arm_inst_clock (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+
+ case 1:
+ s = format (s, "%llu", ns->n_calls);
+ break;
+
+ case 2:
+ s = format (s, "%llu", ns->value[0]); /* Cycles */
+ break;
+
+ case 3:
+ s = format (s, "%llu", ns->value[1]); /* Inst */
+ break;
+
+ case 4:
+ s = format (s, "%.2f",
+ (f64) ns->n_packets / ns->n_calls); /* Packets/Call */
+ break;
+
+ case 5:
+ s = format (s, "%.2f",
+ (f64) ns->value[0] / ns->n_packets); /* Clocks/Packet */
+ break;
+
+ case 6:
+ s =
+ format (s, "%.2f",
+ (f64) ns->value[1] / ns->n_packets); /* Instructions/Packet */
+ break;
+
+ case 7:
+ s = format (s, "%.2f", (f64) ns->value[1] / ns->value[0]); /* IPC */
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_inst_clock) = {
+ .name = "inst-and-clock",
+ .description =
+ "CPU cycles, instructions, instructions/packet, cycles/packet and IPC",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_CPU_CYCLES,
+ .events[1] = ARMV8_PMUV3_INST_RETIRED,
+ .n_events = 2,
+ .n_columns = 8,
+ .format_fn = format_arm_inst_clock,
+ .column_headers = PERFMON_STRINGS ("Packets", "Calls", "CPU Cycles", "Inst*",
+ "Pkts/Call", "Cycles/Pkt", "Inst/Pkt",
+ "IPC"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events =
+ PERFMON_COLUMN_EVENTS (0, 0, SET_BIT (CPU_CYCLES), SET_BIT (INST_RETIRED),
+ 0, SET_BIT (CPU_CYCLES), SET_BIT (INST_RETIRED),
+ SET_BIT (CPU_CYCLES) | SET_BIT (INST_RETIRED)),
+ .footer = "* Instructions retired: the counter increments for every "
+ "architecturally executed instruction\n"
+ "- See Armv8-A Architecture Reference Manual, D7.10 PMU events and"
+ " event numbers for full description.\n"
+};
diff --git a/src/plugins/perfmon/arm/bundle/mem_access.c b/src/plugins/perfmon/arm/bundle/mem_access.c
new file mode 100644
index 00000000000..cfe8f4dc425
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/mem_access.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+/* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+enum
+{
+ MEM_ACCESS,
+ BUS_ACCESS,
+ MEMORY_ERROR
+};
+
+static u8 *
+format_arm_memory_access (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%.2f", (f64) ns->value[MEM_ACCESS] / ns->n_packets);
+ break;
+
+ case 1:
+ s = format (s, "%.3f", (f64) ns->value[BUS_ACCESS] / ns->n_packets);
+ break;
+
+ case 2:
+ s = format (s, "%llu", ns->value[MEMORY_ERROR]);
+ break;
+
+ case 3:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_memory_access) = {
+ .name = "memory-access",
+ .description = "Memory/bus accesses per pkt + total memory errors",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_MEM_ACCESS,
+ .events[1] = ARMV8_PMUV3_BUS_ACCESS,
+ .events[2] = ARMV8_PMUV3_MEMORY_ERROR,
+ .n_events = 3,
+ .n_columns = 4,
+ .format_fn = format_arm_memory_access,
+ .column_headers = PERFMON_STRINGS ("Mem-access/pkt", "Bus-access/pkt",
+ "Total-mem-errors", "pkts"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events = PERFMON_COLUMN_EVENTS (SET_BIT (MEM_ACCESS),
+ SET_BIT (BUS_ACCESS),
+ SET_BIT (MEMORY_ERROR), 0),
+ .footer =
+ "Mem-access: The counter counts Memory-read operations and Memory-write"
+ " operations that the PE made\n"
+ "Bus-access: The counter counts Memory-read operations and Memory-write"
+ " operations that access outside of the boundary of the PE and its "
+ "closely-coupled caches\n"
+ "Mem-error: Memory error refers to a physical error in memory closely "
+ "coupled to this PE, and detected by the hardware, such as a parity or"
+ " ECC error\n"
+ "- See Armv8-A Architecture Reference Manual, D7.10 PMU events and"
+ " event numbers for full description.\n"
+};
diff --git a/src/plugins/perfmon/arm/bundle/stall.c b/src/plugins/perfmon/arm/bundle/stall.c
new file mode 100644
index 00000000000..deef9045516
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/stall.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+/* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+enum
+{
+ STALL_BACKEND,
+ STALL_FRONTEND
+};
+
+static u8 *
+format_arm_stall (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%llu", ns->value[STALL_BACKEND] / ns->n_packets);
+ break;
+
+ case 1:
+ s = format (s, "%llu", ns->value[STALL_FRONTEND] / ns->n_packets);
+ break;
+
+ case 2:
+ s = format (s, "%llu", ns->value[STALL_BACKEND] / ns->n_calls);
+ break;
+
+ case 3:
+ s = format (s, "%llu", ns->value[STALL_FRONTEND] / ns->n_calls);
+ break;
+
+ case 4:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+
+ case 5:
+ s = format (s, "%llu", ns->n_calls);
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_stall) = {
+ .name = "stall",
+ .description = "PE cycle stalls per pkt/call",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_STALL_BACKEND,
+ .events[1] = ARMV8_PMUV3_STALL_FRONTEND,
+ .n_events = 2,
+ .n_columns = 6,
+ .format_fn = format_arm_stall,
+ .column_headers = PERFMON_STRINGS ("Backend/pkt", "Frontend/pkt",
+ "Backend/call", "Frontend/call",
+ "packets", "calls"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events = PERFMON_COLUMN_EVENTS (SET_BIT (STALL_BACKEND),
+ SET_BIT (STALL_FRONTEND),
+ SET_BIT (STALL_BACKEND),
+ SET_BIT (STALL_FRONTEND), 0, 0),
+ .footer =
+ "The stall counter counts every Attributable cycle on which no\n"
+ "Attributable instruction or operation was sent for execution\n"
+ "on this PE.\n\n"
+ " Stall backend: No operation issued due to the backend\n"
+ " Stall frontend: No operation issued due to the frontend\n"
+ "The division between frontend and backend is IMPLEMENTATION DEFINED\n\n"
+ "- See Armv8-A Architecture Reference Manual, D7.10 PMU events and"
+ " event numbers for full description.\n"
+};
diff --git a/src/plugins/perfmon/arm/dispatch_wrapper.c b/src/plugins/perfmon/arm/dispatch_wrapper.c
new file mode 100644
index 00000000000..df79bcd8631
--- /dev/null
+++ b/src/plugins/perfmon/arm/dispatch_wrapper.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <linux/limits.h>
+#include <sys/ioctl.h>
+
+#include <perfmon/perfmon.h>
+
+#define barrier() asm volatile("dmb ish" : : : "memory");
+
+typedef int64_t s64;
+
+static_always_inline u64
+get_pmc_register (u32 pmc_idx)
+{
+ u64 value = 0;
+ if (pmc_idx == 31)
+ /* i.e. CPU Cycle event code 0x11 - need to read via pmccntr_el0 */
+ asm volatile("mrs %x0, pmccntr_el0" : "=r"(value));
+ else
+ {
+ /* set event register 0x0-0x1F */
+ asm volatile("msr pmselr_el0, %x0" : : "r"((pmc_idx)));
+ /* get register value */
+ asm volatile("mrs %x0, pmxevcntr_el0" : "=r"(value));
+ }
+ asm volatile("isb" : : : "memory");
+ return value;
+}
+
+static_always_inline u64
+read_pmc_from_mmap (struct perf_event_mmap_page *pc)
+{
+ u32 seq, idx, width;
+ u64 offset = 0;
+ s64 pmc = 0;
+
+ do
+ {
+ seq = pc->lock;
+ barrier ();
+ idx = pc->index;
+ offset = pc->offset;
+ if (pc->cap_user_rdpmc && idx)
+ {
+ width = pc->pmc_width;
+ pmc = get_pmc_register (idx - 1);
+ /* for 32 bit regs, left shift 32b to zero/discard the top bits */
+ pmc <<= 64 - width;
+ pmc >>= 64 - width;
+ }
+ barrier ();
+ }
+ while (pc->lock != seq);
+
+ return pmc + offset;
+}
+
+static_always_inline void
+perfmon_read_pmcs (u64 *counters, perfmon_thread_runtime_t *rt, u8 n_counters)
+{
+ switch (n_counters)
+ {
+ default:
+ case 7:
+ counters[6] = read_pmc_from_mmap (rt->mmap_pages[6]);
+ case 6:
+ counters[5] = read_pmc_from_mmap (rt->mmap_pages[5]);
+ case 5:
+ counters[4] = read_pmc_from_mmap (rt->mmap_pages[4]);
+ case 4:
+ counters[3] = read_pmc_from_mmap (rt->mmap_pages[3]);
+ case 3:
+ counters[2] = read_pmc_from_mmap (rt->mmap_pages[2]);
+ case 2:
+ counters[1] = read_pmc_from_mmap (rt->mmap_pages[1]);
+ case 1:
+ counters[0] = read_pmc_from_mmap (rt->mmap_pages[0]);
+ break;
+ }
+}
+
+uword
+perfmon_dispatch_wrapper (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ perfmon_main_t *pm = &perfmon_main;
+ perfmon_thread_runtime_t *rt =
+ vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
+ perfmon_node_stats_t *s =
+ vec_elt_at_index (rt->node_stats, node->node_index);
+ u8 n_events = rt->n_events;
+ u64 before[n_events];
+ u64 after[n_events];
+
+ uword rv;
+
+ clib_prefetch_load (s);
+
+ perfmon_read_pmcs (before, rt, n_events);
+ rv = node->function (vm, node, frame);
+ perfmon_read_pmcs (after, rt, n_events);
+
+ if (rv == 0)
+ return rv;
+
+ s->n_calls += 1;
+ s->n_packets += rv;
+
+ for (int i = 0; i < n_events; i++)
+ {
+ s->value[i] += after[i] - before[i];
+ }
+
+ return rv;
+}
+
+clib_error_t *
+arm_config_dispatch_wrapper (perfmon_bundle_t *b,
+ vlib_node_function_t **dispatch_wrapper)
+{
+ (*dispatch_wrapper) = perfmon_dispatch_wrapper;
+ return 0;
+}
diff --git a/src/plugins/perfmon/arm/dispatch_wrapper.h b/src/plugins/perfmon/arm/dispatch_wrapper.h
new file mode 100644
index 00000000000..903971f8b5e
--- /dev/null
+++ b/src/plugins/perfmon/arm/dispatch_wrapper.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+clib_error_t *
+arm_config_dispatch_wrapper (perfmon_bundle_t *b,
+ vlib_node_function_t **dispatch_wrapper);
diff --git a/src/plugins/perfmon/arm/events.c b/src/plugins/perfmon/arm/events.c
new file mode 100644
index 00000000000..f2406b3ea05
--- /dev/null
+++ b/src/plugins/perfmon/arm/events.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+#include <perfmon/arm/dispatch_wrapper.h>
+#include <linux/perf_event.h>
+#include <dirent.h>
+
+VLIB_REGISTER_LOG_CLASS (if_default_log, static) = {
+ .class_name = "perfmon",
+};
+
+#define log_debug(fmt, ...) \
+ vlib_log_debug (if_default_log.class, fmt, __VA_ARGS__)
+#define log_warn(fmt, ...) \
+ vlib_log_warn (if_default_log.class, fmt, __VA_ARGS__)
+#define log_err(fmt, ...) vlib_log_err (if_default_log.class, fmt, __VA_ARGS__)
+
+/*
+ * config1 = 2 : user access enabled and always 32-bit
+ * config1 = 3 : user access enabled and always 64-bit
+ *
+ * Since there is no discovery into whether 64b counters are supported
+ * or not, first attempt to request 64b counters, then fall back to
+ * 32b if perf_event_open returns EOPNOTSUPP
+ */
+static perfmon_event_t events[] = {
+#define _(event, n, desc) \
+ [ARMV8_PMUV3_##n] = { \
+ .type = PERF_TYPE_RAW, \
+ .config = event, \
+ .config1 = 3, \
+ .name = #n, \
+ .description = desc, \
+ .exclude_kernel = 1, \
+ },
+ foreach_perf_arm_event
+#undef _
+};
+
+u8 *
+format_arm_config (u8 *s, va_list *args)
+{
+ u64 config = va_arg (*args, u64);
+
+ s = format (s, "event=0x%02x", config & 0xff);
+
+ return s;
+}
+
+static clib_error_t *
+arm_init (vlib_main_t *vm, perfmon_source_t *src)
+{
+ clib_error_t *err;
+
+ /*
+ check /proc/sys/kernel/perf_user_access flag to check if userspace
+ access to perf counters is enabled (disabled by default)
+ - if this file doesn't exist, we are on an unsupported kernel ver
+ - if the file exists and is 0, user access needs to be granted
+ with 'sudo sysctl kernel/perf_user_access=1'
+ */
+ u32 perf_user_access_enabled;
+ char *path = "/proc/sys/kernel/perf_user_access";
+ err = clib_sysfs_read (path, "%u", &perf_user_access_enabled);
+ if (err)
+ {
+ if (err->code == ENOENT) /* No such file or directory */
+ {
+ return clib_error_create (
+ "linux kernel version is unsupported, please upgrade to v5.17+ "
+ "- user access to perf counters is not possible");
+ }
+ return clib_error_return_unix (0, "failed to read: %s", path);
+ }
+
+ if (perf_user_access_enabled == 1)
+ log_debug ("user access to perf counters is enabled in %s", path);
+ else
+ {
+ return clib_error_create (
+ "user access to perf counters is not enabled: run"
+ " \'sudo sysctl kernel/perf_user_access=1\'");
+ }
+
+ /*
+ perfmon/arm/events.h has up to 0xFF/256 possible PMUv3 event codes
+ supported - create a bitmap to store whether each event is
+ implemented or not
+ */
+ uword *bitmap = NULL;
+ clib_bitmap_alloc (bitmap, 256);
+
+ struct dirent *dir_entry;
+ const char *event_path =
+ "/sys/bus/event_source/devices/armv8_pmuv3_0/events";
+ DIR *event_dir = opendir (event_path);
+
+ if (event_dir == NULL)
+ {
+ err =
+ clib_error_return_unix (0, "error listing directory: %s", event_path);
+ log_err ("%U", format_clib_error, err);
+ return err;
+ }
+
+ while ((dir_entry = readdir (event_dir)) != NULL)
+ {
+ if (dir_entry->d_name[0] != '.')
+ {
+ u8 *s = NULL;
+ u8 *tmpstr = NULL;
+ unformat_input_t input;
+ u32 config;
+
+ s = format (s, "%s/%s%c", event_path, dir_entry->d_name, 0);
+ err = clib_sysfs_read ((char *) s, "%s", &tmpstr);
+ if (err)
+ {
+ log_err ("%U", format_clib_error, err);
+ continue;
+ }
+ unformat_init_vector (&input, tmpstr);
+ if (unformat (&input, "event=0x%x", &config))
+ {
+ /* it's possible to have have event codes up to 0xFFFF */
+ if (config < 0xFF) /* perfmon supports < 0xFF */
+ {
+ clib_bitmap_set (bitmap, config, 1);
+ }
+ log_debug ("found supported event in sysfs: %s \'%s\' 0x%x",
+ dir_entry->d_name, tmpstr, config);
+ }
+ else
+ {
+ err = clib_error_create ("error parsing event: %s %s",
+ dir_entry->d_name, tmpstr);
+ log_err ("%U", format_clib_error, err);
+ continue;
+ }
+ }
+ }
+ closedir (event_dir);
+
+ for (int i = 0; i < ARRAY_LEN (events); i++)
+ {
+ if (clib_bitmap_get (bitmap, events[i].config))
+ events[i].implemented = 1;
+ }
+ clib_bitmap_free (bitmap);
+
+ return 0;
+}
+
+u8
+arm_bundle_supported (perfmon_bundle_t *b)
+{
+ clib_bitmap_alloc (b->event_disabled, b->n_events);
+ for (u32 i = 0; i < b->n_events; i++)
+ {
+ perfmon_event_t *e = b->src->events + b->events[i];
+ if (!e->implemented)
+ {
+ log_debug (
+ "bundle \'%s\': perf event %s is not implemented on this CPU",
+ b->name, e->name);
+ clib_bitmap_set (b->event_disabled, i, 1);
+ }
+ }
+
+ /* if no events are implemented, fail and do not register bundle */
+ if (clib_bitmap_count_set_bits (b->event_disabled) == b->n_events)
+ {
+ return 0;
+ }
+
+ /* disable columns that use unimplemented events */
+ clib_bitmap_alloc (b->column_disabled, b->n_columns);
+ if (b->column_events)
+ {
+ u32 disabled_event;
+ /* iterate through set bits */
+ clib_bitmap_foreach (disabled_event, b->event_disabled)
+ {
+ for (u32 j = 0; j < b->n_columns; j++)
+ {
+ if (clib_bitmap_get (b->column_disabled, j))
+ continue;
+ if (GET_BIT (b->column_events[j], disabled_event))
+ {
+ clib_bitmap_set (b->column_disabled, j, 1);
+ log_debug (
+ "bundle \'%s\': disabling column %d as event unsupported",
+ b->name, j);
+ }
+ }
+ }
+ }
+
+ return 1;
+}
+
+PERFMON_REGISTER_SOURCE (arm) = {
+ .name = "arm",
+ .description = "Arm PMU events",
+ .events = events,
+ .n_events = ARRAY_LEN (events),
+ .init_fn = arm_init,
+ .format_config = format_arm_config,
+ .bundle_support = arm_bundle_supported,
+ .config_dispatch_wrapper = arm_config_dispatch_wrapper,
+};
diff --git a/src/plugins/perfmon/arm/events.h b/src/plugins/perfmon/arm/events.h
new file mode 100644
index 00000000000..5b7c49801d0
--- /dev/null
+++ b/src/plugins/perfmon/arm/events.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __perfmon_arm_h
+#define __perfmon_arm_h
+
+/*
+ * Events from the Armv8 PMUv3 - See "Arm Architecture Reference Manual Armv8,
+ * for Armv8-A architecture profile" D7.10 PMU events and event numbers:
+ * https://developer.arm.com/documentation/ddi0487/latest/
+ * EventCode, name, description
+ */
+#define foreach_perf_arm_event \
+ _ (0x0D, BR_IMMED_RETIRED, "Immediate branch architecturally executed") \
+ _ (0x10, BR_MIS_PRED, \
+ "Mispredicted or not predicted branch Speculatively executed") \
+ _ (0x22, BR_MIS_PRED_RETIRED, \
+ "Instruction architecturally executed, mispredicted branch") \
+ _ (0x12, BR_PRED, "Predictable branch Speculatively executed") \
+ _ (0x21, BR_RETIRED, "Branch instruction architecturally executed") \
+ _ (0x0E, BR_RETURN_RETIRED, \
+ "Function return instruction architecturally executed and the " \
+ "condition code check pass") \
+ _ (0x19, BUS_ACCESS, "Attributable Bus access") \
+ _ (0x1D, BUS_CYCLES, "Bus cycle") \
+ _ (0x1E, CHAIN, \
+ "For an odd numbered counter, increment when an overflow occurs on" \
+ "the preceding even-numbered counter on the same PE") \
+ _ (0x0B, CID_WRITE_RETIRED, \
+ "Instruction architecturally executed, Condition code check pass, " \
+ "write to CONTEXTIDR") \
+ _ (0x11, CPU_CYCLES, "Cycle counter") \
+ _ (0x34, DTLB_WALK, \
+ "Access to data or unified TLB causes a translation table walk") \
+ _ (0x0A, EXC_RETURN, \
+ "Exception return instruction architecturally executed and the " \
+ "condition code check pass") \
+ _ (0x09, EXC_TAKEN, "Exception entry") \
+ _ (0x08, INST_RETIRED, "Instruction architecturally executed") \
+ _ (0x1B, INST_SPEC, "Operation Speculatively executed") \
+ _ (0x35, ITLB_WALK, \
+ "Access to instruction TLB that causes a translation table walk") \
+ _ (0x04, L1D_CACHE, "Level 1 data cache access") \
+ _ (0x1F, L1D_CACHE_ALLOCATE, \
+ "Level 1 data cache allocation without refill") \
+ _ (0x39, L1D_CACHE_LMISS_RD, "Level 1 data cache long-latency read miss") \
+ _ (0x03, L1D_CACHE_REFILL, "Level 1 data cache refill") \
+ _ (0x15, L1D_CACHE_WB, "Attributable Level 1 data cache write-back") \
+ _ (0x25, L1D_TLB, "Level 1 data or unified TLB access") \
+ _ (0x05, L1D_TLB_REFILL, "Level 1 data or unified TLB refill") \
+ _ (0x14, L1I_CACHE, "Level 1 instruction cache access") \
+ _ (0x01, L1I_CACHE_REFILL, "Level 1 instruction cache refill") \
+ _ (0x26, L1I_TLB, "Level 1 instruction TLB access") \
+ _ (0x02, L1I_TLB_REFILL, "Level 1 instruction TLB refill") \
+ _ (0x16, L2D_CACHE, "Level 2 data cache access") \
+ _ (0x20, L2D_CACHE_ALLOCATE, \
+ "Level 2 data cache allocation without refill") \
+ _ (0x17, L2D_CACHE_REFILL, "Level 2 data cache refill") \
+ _ (0x18, L2D_CACHE_WB, "Attributable Level 2 data cache write-back") \
+ _ (0x2F, L2D_TLB, "Level 2 data or unified TLB access") \
+ _ (0x2D, L2D_TLB_REFILL, "Level 2 data or unified TLB refill") \
+ _ (0x27, L2I_CACHE, "Level 2 instruction cache access") \
+ _ (0x28, L2I_CACHE_REFILL, "Attributable Level 2 instruction cache refill") \
+ _ (0x30, L2I_TLB, "Level 2 instruction TLB access") \
+ _ (0x2E, L2I_TLB_REFILL, "Level 2 instruction TLB refill") \
+ _ (0x2B, L3D_CACHE, "Level 3 data cache access") \
+ _ (0x29, L3D_CACHE_ALLOCATE, \
+ "Level 3 data cache allocation without refill") \
+ _ (0x2A, L3D_CACHE_REFILL, "Attributable Level 3 data cache refill") \
+ _ (0x2C, L3D_CACHE_WB, "Attributable Level 3 data cache write-back") \
+ _ (0x06, LD_RETIRED, \
+ "Memory-reading instruction architecturally executed and condition" \
+ " code check pass") \
+ _ (0x32, LL_CACHE, "Last Level cache access") \
+ _ (0x33, LL_CACHE_MISS, "Last Level cache miss") \
+ _ (0x37, LL_CACHE_MISS_RD, "Last level cache miss, read") \
+ _ (0x36, LL_CACHE_RD, "Last level data cache access, read") \
+ _ (0x1A, MEMORY_ERROR, "Local memory error") \
+ _ (0x13, MEM_ACCESS, "Data memory access") \
+ _ (0x3A, OP_RETIRED, "Micro-operation architecturally executed") \
+ _ (0x3B, OP_SPEC, "Micro-operation Speculatively executed") \
+ _ (0x0C, PC_WRITE_RETIRED, \
+ "Software change to the Program Counter (PC). Instruction is " \
+ "architecturally executed and condition code check pass") \
+ _ (0x31, REMOTE_ACCESS, \
+ "Access to another socket in a multi-socket system") \
+ _ (0x38, REMOTE_ACCESS_RD, \
+ "Access to another socket in a multi-socket system, read") \
+ _ (0x3C, STALL, "No operation sent for execution") \
+ _ (0x24, STALL_BACKEND, "No operation issued due to the backend") \
+ _ (0x23, STALL_FRONTEND, "No operation issued due to the frontend") \
+ _ (0x3F, STALL_SLOT, "No operation sent for execution on a Slot") \
+ _ (0x3D, STALL_SLOT_BACKEND, \
+ "No operation sent for execution on a Slot due to the backend") \
+ _ (0x3E, STALL_SLOT_FRONTEND, \
+ "No operation sent for execution on a Slot due to the frontend") \
+ _ (0x07, ST_RETIRED, \
+ "Memory-writing instruction architecturally executed and condition" \
+ " code check pass") \
+ _ (0x00, SW_INCR, \
+ "Instruction architecturally executed, Condition code check pass, " \
+ "software increment") \
+ _ (0x1C, TTBR_WRITE_RETIRED, \
+ "Instruction architecturally executed, Condition code check pass, " \
+ "write to TTBR") \
+ _ (0x0F, UNALIGNED_LDST_RETIRED, \
+ "Unaligned memory memory-reading or memory-writing instruction " \
+ "architecturally executed and condition code check pass")
+
+typedef enum
+{
+#define _(event, n, desc) ARMV8_PMUV3_##n,
+ foreach_perf_arm_event
+#undef _
+ ARM_N_EVENTS,
+} perf_arm_event_t;
+
+#endif
diff --git a/src/plugins/perfmon/cli.c b/src/plugins/perfmon/cli.c
index 3305480031d..2c59ddd478a 100644
--- a/src/plugins/perfmon/cli.c
+++ b/src/plugins/perfmon/cli.c
@@ -15,7 +15,7 @@
#include <vnet/vnet.h>
#include <perfmon/perfmon.h>
-#include <perfmon/table.h>
+#include <vppinfra/format_table.h>
uword
unformat_perfmon_bundle_name (unformat_input_t *input, va_list *args)
@@ -38,6 +38,40 @@ unformat_perfmon_bundle_name (unformat_input_t *input, va_list *args)
}
uword
+unformat_perfmon_active_type (unformat_input_t *input, va_list *args)
+{
+ perfmon_bundle_t *b = va_arg (*args, perfmon_bundle_t *);
+ perfmon_bundle_type_t *bundle_type = va_arg (*args, perfmon_bundle_type_t *);
+ char *str = 0;
+
+ char *_str_types[PERFMON_BUNDLE_TYPE_MAX];
+
+#define _(type, pstr) _str_types[type] = (char *) pstr;
+
+ foreach_perfmon_bundle_type
+#undef _
+
+ if (!b) return 0;
+
+ if (unformat (input, "%s", &str) == 0)
+ return 0;
+
+ for (int i = PERFMON_BUNDLE_TYPE_NODE; i < PERFMON_BUNDLE_TYPE_MAX; i++)
+ {
+ /* match the name and confirm it is available on this cpu */
+ if (strncmp (str, _str_types[i], strlen (_str_types[i])) == 0 &&
+ (b->type_flags & 1 << i))
+ {
+ *bundle_type = i;
+ break;
+ }
+ }
+
+ vec_free (str);
+ return 1;
+}
+
+uword
unformat_perfmon_source_name (unformat_input_t *input, va_list *args)
{
perfmon_main_t *pm = &perfmon_main;
@@ -57,23 +91,33 @@ unformat_perfmon_source_name (unformat_input_t *input, va_list *args)
return p ? 1 : 0;
}
+typedef enum
+{
+ FORMAT_PERFMON_BUNDLE_NONE = 0,
+ FORMAT_PERFMON_BUNDLE_VERBOSE = 1,
+ FORMAT_PERFMON_BUNDLE_SHOW_CONFIG = 2
+} format_perfmon_bundle_args_t;
+
u8 *
format_perfmon_bundle (u8 *s, va_list *args)
{
perfmon_bundle_t *b = va_arg (*args, perfmon_bundle_t *);
- int verbose = va_arg (*args, int);
+ format_perfmon_bundle_args_t cfg =
+ va_arg (*args, format_perfmon_bundle_args_t);
- const char *bundle_type[] = {
- [PERFMON_BUNDLE_TYPE_NODE] = "node",
- [PERFMON_BUNDLE_TYPE_THREAD] = "thread",
- [PERFMON_BUNDLE_TYPE_SYSTEM] = "system",
- };
+ int vl = 0;
- if (b == 0)
- return format (s, "%-20s%-10s%-20s%s", "Name", "Type", "Source",
- "Description");
+ u8 *_bundle_type = 0;
+ const char *bundle_type[PERFMON_BUNDLE_TYPE_MAX];
+#define _(type, pstr) bundle_type[type] = (const char *) pstr;
- if (verbose)
+ foreach_perfmon_bundle_type
+#undef _
+
+ if (b == 0) return format (s, "%-20s%-20s%-20s%s", "Name", "Type(s)",
+ "Source", "Description");
+
+ if (cfg != FORMAT_PERFMON_BUNDLE_NONE)
{
s = format (s, "name: %s\n", b->name);
s = format (s, "description: %s\n", b->description);
@@ -81,16 +125,47 @@ format_perfmon_bundle (u8 *s, va_list *args)
for (int i = 0; i < b->n_events; i++)
{
perfmon_event_t *e = b->src->events + b->events[i];
- s = format (s, "event %u: %s\n", i, e->name);
+ s = format (s, "event %u: %s", i, e->name);
+
+ format_function_t *format_config = b->src->format_config;
+
+ if (format_config && cfg == FORMAT_PERFMON_BUNDLE_SHOW_CONFIG)
+ s = format (s, " (%U)", format_config, e->config);
+
+ s = format (s, "\n");
}
}
else
- s = format (s, "%-20s%-10s%-20s%s", b->name, bundle_type[b->type],
- b->src->name, b->description);
+ {
+ s = format (s, "%-20s", b->name);
+ for (int i = PERFMON_BUNDLE_TYPE_NODE; i < PERFMON_BUNDLE_TYPE_MAX; i++)
+ {
+ /* check the type is available on this uarch*/
+ if (b->type_flags & 1 << i)
+ _bundle_type = format (_bundle_type, "%s,", bundle_type[i]);
+ }
+ /* remove any stray commas */
+ if ((vl = vec_len (_bundle_type)))
+ _bundle_type[vl - 1] = 0;
+
+ s =
+ format (s, "%-20s%-20s%s", _bundle_type, b->src->name, b->description);
+ }
+
+ vec_free (_bundle_type);
return s;
}
+static int
+bundle_name_sort_cmp (void *a1, void *a2)
+{
+ perfmon_bundle_t **n1 = a1;
+ perfmon_bundle_t **n2 = a2;
+
+ return clib_strcmp ((char *) (*n1)->name, (char *) (*n2)->name);
+}
+
static clib_error_t *
show_perfmon_bundle_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
@@ -99,6 +174,7 @@ show_perfmon_bundle_command_fn (vlib_main_t *vm, unformat_input_t *input,
unformat_input_t _line_input, *line_input = &_line_input;
perfmon_bundle_t *b = 0, **vb = 0;
int verbose = 0;
+ format_perfmon_bundle_args_t cfg = FORMAT_PERFMON_BUNDLE_NONE;
if (unformat_user (input, unformat_line_input, line_input))
{
@@ -116,20 +192,31 @@ show_perfmon_bundle_command_fn (vlib_main_t *vm, unformat_input_t *input,
unformat_free (line_input);
}
- if (vb == 0)
+ if (verbose) /* if verbose is specified */
+ cfg = FORMAT_PERFMON_BUNDLE_VERBOSE;
+
+ if (vb)
+ {
+ if (verbose) /* if verbose is specified with a bundle */
+ cfg = FORMAT_PERFMON_BUNDLE_SHOW_CONFIG;
+ else
+ cfg = FORMAT_PERFMON_BUNDLE_VERBOSE;
+ }
+ else
{
char *key;
hash_foreach_mem (key, b, pm->bundle_by_name, vec_add (vb, &b, 1););
}
- else
- verbose = 1;
- if (verbose == 0)
- vlib_cli_output (vm, "%U\n", format_perfmon_bundle, 0, 0);
+ if (cfg == FORMAT_PERFMON_BUNDLE_NONE)
+ vlib_cli_output (vm, "%U\n", format_perfmon_bundle, 0, cfg);
+
+ vec_sort_with_function (vb, bundle_name_sort_cmp);
for (int i = 0; i < vec_len (vb); i++)
- if (!vb[i]->cpu_supports || vb[i]->cpu_supports ())
- vlib_cli_output (vm, "%U\n", format_perfmon_bundle, vb[i], verbose);
+ /* bundle type will be unknown if no cpu_supports matched */
+ if (vb[i]->type_flags)
+ vlib_cli_output (vm, "%U\n", format_perfmon_bundle, vb[i], cfg);
vec_free (vb);
return 0;
@@ -283,7 +370,8 @@ show_perfmon_stats_command_fn (vlib_main_t *vm, unformat_input_t *input,
vec_validate (readings, n_instances - 1);
/*Only perform read() for THREAD or SYSTEM bundles*/
- for (int i = 0; i < n_instances && b->type != PERFMON_BUNDLE_TYPE_NODE; i++)
+ for (int i = 0;
+ i < n_instances && b->active_type != PERFMON_BUNDLE_TYPE_NODE; i++)
{
in = vec_elt_at_index (it->instances, i);
r = vec_elt_at_index (readings, i);
@@ -312,8 +400,8 @@ show_perfmon_stats_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
in = vec_elt_at_index (it->instances, i);
r = vec_elt_at_index (readings, i);
- table_format_cell (t, col, -1, "%s", in->name);
- if (b->type == PERFMON_BUNDLE_TYPE_NODE)
+ table_format_cell (t, col, -1, "%s", in->name, b->active_type);
+ if (b->active_type == PERFMON_BUNDLE_TYPE_NODE)
{
perfmon_thread_runtime_t *tr;
tr = vec_elt_at_index (pm->thread_runtimes, i);
@@ -322,19 +410,80 @@ show_perfmon_stats_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
perfmon_node_stats_t ns;
table_format_cell (t, ++col, -1, "%U", format_vlib_node_name,
- vm, j);
+ vm, j, b->active_type);
table_set_cell_align (t, col, -1, TTAA_RIGHT);
table_set_cell_fg_color (t, col, -1, TTAC_CYAN);
- clib_memcpy_fast (&ns, tr->node_stats + j, sizeof (ns));
+
+ if (PREDICT_TRUE (clib_bitmap_is_zero (b->event_disabled)))
+ clib_memcpy_fast (&ns, tr->node_stats + j, sizeof (ns));
+ /* if some events are not implemented, we need to realign these
+ to display under the correct column headers */
+ else
+ {
+ perfmon_node_stats_t *tr_ns = tr->node_stats + j;
+ ns.n_calls = tr_ns->n_calls;
+ ns.n_packets = tr_ns->n_packets;
+ /* loop through all events in bundle + manually copy into
+ the correct place, until we've read all values that are
+ implemented */
+ int num_enabled_events =
+ b->n_events -
+ clib_bitmap_count_set_bits (b->event_disabled);
+ for (int i = 0, k = 0; k < num_enabled_events; i++)
+ {
+ if (!clib_bitmap_get (b->event_disabled, i))
+ {
+ ns.value[i] = tr_ns->value[k];
+ k++;
+ }
+ }
+ }
for (int j = 0; j < n_row; j++)
- table_format_cell (t, col, j, "%U", b->format_fn, &ns, j);
+ {
+ if (clib_bitmap_get (b->column_disabled, j))
+ table_format_cell (t, col, j, "-");
+ else
+ table_format_cell (t, col, j, "%U", b->format_fn, &ns, j,
+ b->active_type);
+ }
}
}
- else
+ else /* b->type != PERFMON_BUNDLE_TYPE_NODE */
{
- for (int j = 0; j < n_row; j++)
- table_format_cell (t, i, j, "%U", b->format_fn, r, j);
+ if (PREDICT_TRUE (clib_bitmap_is_zero (b->event_disabled)))
+ {
+ for (int j = 0; j < n_row; j++)
+ table_format_cell (t, i, j, "%U", b->format_fn, r, j,
+ b->active_type);
+ }
+ /* similarly for THREAD/SYSTEM bundles, if some events are not
+ implemented, we need to realign readings under column headings */
+ else
+ {
+ perfmon_reading_t aligned_r[b->n_events];
+ aligned_r->nr = r->nr;
+ aligned_r->time_enabled = r->time_enabled;
+ aligned_r->time_running = r->time_running;
+ int num_enabled_events =
+ b->n_events - clib_bitmap_count_set_bits (b->event_disabled);
+ for (int i = 0, k = 0; k < num_enabled_events; i++)
+ {
+ if (!clib_bitmap_get (b->event_disabled, i))
+ {
+ aligned_r->value[i] = r->value[k];
+ k++;
+ }
+ }
+ for (int j = 0; j < n_row; j++)
+ {
+ if (clib_bitmap_get (b->column_disabled, j))
+ table_format_cell (t, col, j, "-");
+ else
+ table_format_cell (t, i, j, "%U", b->format_fn, aligned_r,
+ j, b->active_type);
+ }
+ }
}
col++;
}
@@ -380,6 +529,7 @@ perfmon_start_command_fn (vlib_main_t *vm, unformat_input_t *input,
perfmon_main_t *pm = &perfmon_main;
unformat_input_t _line_input, *line_input = &_line_input;
perfmon_bundle_t *b = 0;
+ perfmon_bundle_type_t bundle_type = PERFMON_BUNDLE_TYPE_UNKNOWN;
if (pm->is_running)
return clib_error_return (0, "please stop first");
@@ -391,6 +541,9 @@ perfmon_start_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
if (unformat (line_input, "bundle %U", unformat_perfmon_bundle_name, &b))
;
+ else if (unformat (line_input, "type %U", unformat_perfmon_active_type,
+ b, &bundle_type))
+ ;
else
return clib_error_return (0, "unknown input '%U'",
format_unformat_error, line_input);
@@ -400,12 +553,28 @@ perfmon_start_command_fn (vlib_main_t *vm, unformat_input_t *input,
if (b == 0)
return clib_error_return (0, "please specify bundle name");
+ /* if there is more than one valid mode */
+ if (count_set_bits (b->type_flags) > 1)
+ {
+ /* what did the user indicate */
+ if (!bundle_type)
+ return clib_error_return (0, "please specify a valid type");
+ }
+ /* otherwise just use the default */
+ else if (!bundle_type)
+ {
+ bundle_type =
+ (perfmon_bundle_type_t) count_trailing_zeros (b->type_flags);
+ }
+
+ b->active_type = bundle_type;
+
return perfmon_start (vm, b);
}
VLIB_CLI_COMMAND (perfmon_start_command, static) = {
.path = "perfmon start",
- .short_help = "perfmon start bundle [<bundle-name>]",
+ .short_help = "perfmon start bundle [<bundle-name>] type [<node|thread>]",
.function = perfmon_start_command_fn,
.is_mp_safe = 1,
};
diff --git a/src/plugins/perfmon/dispatch_wrapper.c b/src/plugins/perfmon/dispatch_wrapper.c
deleted file mode 100644
index fe0a449df99..00000000000
--- a/src/plugins/perfmon/dispatch_wrapper.c
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (c) 2020 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "vppinfra/string.h"
-#include <vnet/vnet.h>
-
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
-#include <vnet/plugin/plugin.h>
-#include <vpp/app/version.h>
-#include <linux/limits.h>
-#include <sys/ioctl.h>
-
-#include <perfmon/perfmon.h>
-
-static_always_inline void
-perfmon_read_pmcs (u64 *counters, int *pmc_index, u8 n_counters)
-{
- switch (n_counters)
- {
- default:
- case 7:
- counters[6] = _rdpmc (pmc_index[6]);
- case 6:
- counters[5] = _rdpmc (pmc_index[5]);
- case 5:
- counters[4] = _rdpmc (pmc_index[4]);
- case 4:
- counters[3] = _rdpmc (pmc_index[3]);
- case 3:
- counters[2] = _rdpmc (pmc_index[2]);
- case 2:
- counters[1] = _rdpmc (pmc_index[1]);
- case 1:
- counters[0] = _rdpmc (pmc_index[0]);
- break;
- }
-}
-
-static_always_inline int
-perfmon_calc_mmap_offset (perfmon_thread_runtime_t *tr, u8 i)
-{
- return (int) (tr->mmap_pages[i]->index + tr->mmap_pages[i]->offset);
-}
-
-static_always_inline int
-perfmon_metric_index (perfmon_bundle_t *b, u8 i)
-{
- return (int) (b->metrics[i]);
-}
-
-uword
-perfmon_dispatch_wrapper_mmap (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *frame)
-{
- perfmon_main_t *pm = &perfmon_main;
- perfmon_thread_runtime_t *rt =
- vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
- perfmon_node_stats_t *s =
- vec_elt_at_index (rt->node_stats, node->node_index);
-
- u8 n_events = rt->n_events;
-
- u64 before[PERF_MAX_EVENTS];
- u64 after[PERF_MAX_EVENTS];
- int pmc_index[PERF_MAX_EVENTS];
- uword rv;
-
- clib_prefetch_load (s);
-
- switch (n_events)
- {
- default:
- case 7:
- pmc_index[6] = perfmon_calc_mmap_offset (rt, 6);
- case 6:
- pmc_index[5] = perfmon_calc_mmap_offset (rt, 5);
- case 5:
- pmc_index[4] = perfmon_calc_mmap_offset (rt, 4);
- case 4:
- pmc_index[3] = perfmon_calc_mmap_offset (rt, 3);
- case 3:
- pmc_index[2] = perfmon_calc_mmap_offset (rt, 2);
- case 2:
- pmc_index[1] = perfmon_calc_mmap_offset (rt, 1);
- case 1:
- pmc_index[0] = perfmon_calc_mmap_offset (rt, 0);
- break;
- }
-
- perfmon_read_pmcs (&before[0], pmc_index, n_events);
- rv = node->function (vm, node, frame);
- perfmon_read_pmcs (&after[0], pmc_index, n_events);
-
- if (rv == 0)
- return rv;
-
- s->n_calls += 1;
- s->n_packets += rv;
-
- for (int i = 0; i < n_events; i++)
- s->value[i] += after[i] - before[i];
-
- return rv;
-}
-
-uword
-perfmon_dispatch_wrapper_metrics (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *frame)
-{
- perfmon_main_t *pm = &perfmon_main;
- perfmon_thread_runtime_t *rt =
- vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
- perfmon_node_stats_t *s =
- vec_elt_at_index (rt->node_stats, node->node_index);
-
- u8 n_events = rt->n_events;
-
- u64 before[PERF_MAX_EVENTS];
- int pmc_index[PERF_MAX_EVENTS];
- uword rv;
-
- clib_prefetch_load (s);
-
- switch (n_events)
- {
- default:
- case 7:
- pmc_index[6] = perfmon_metric_index (rt->bundle, 6);
- case 6:
- pmc_index[5] = perfmon_metric_index (rt->bundle, 5);
- case 5:
- pmc_index[4] = perfmon_metric_index (rt->bundle, 4);
- case 4:
- pmc_index[3] = perfmon_metric_index (rt->bundle, 3);
- case 3:
- pmc_index[2] = perfmon_metric_index (rt->bundle, 2);
- case 2:
- pmc_index[1] = perfmon_metric_index (rt->bundle, 1);
- case 1:
- pmc_index[0] = perfmon_metric_index (rt->bundle, 0);
- break;
- }
-
- perfmon_read_pmcs (&before[0], pmc_index, n_events);
- rv = node->function (vm, node, frame);
-
- clib_memcpy_fast (&s->t[0].value[0], &before, sizeof (before));
- perfmon_read_pmcs (&s->t[1].value[0], pmc_index, n_events);
-
- if (rv == 0)
- return rv;
-
- s->n_calls += 1;
- s->n_packets += rv;
-
- return rv;
-}
diff --git a/src/plugins/perfmon/intel/bundle/backend_bound_core.c b/src/plugins/perfmon/intel/bundle/backend_bound_core.c
new file mode 100644
index 00000000000..16905235119
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/backend_bound_core.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+enum
+{
+ PORT0 = 0,
+ PORT1 = 1,
+ PORT5 = 2,
+ PORT6 = 3,
+ PORT2_3 = 4,
+ PORT4_9 = 5,
+ PORT7_8 = 6,
+ DISTRIBUTED = 7,
+};
+
+static u8 *
+format_intel_backend_bound_core (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+
+ if (!ss->n_packets)
+ return s;
+
+ if (0 == row)
+ {
+ sv = ss->value[DISTRIBUTED] / ss->n_packets;
+
+ s = format (s, "%.0f", sv);
+ return s;
+ }
+
+ switch (row)
+ {
+ case 1:
+ sv = ss->value[PORT0] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 2:
+ sv = ss->value[PORT1] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 3:
+ sv = ss->value[PORT5] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 4:
+ sv = ss->value[PORT6] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 5:
+ sv = (ss->value[PORT2_3]) / (f64) (2 * ss->value[DISTRIBUTED]);
+ break;
+ case 6:
+ sv = (ss->value[PORT4_9] + ss->value[PORT7_8]) /
+ (f64) (4 * ss->value[DISTRIBUTED]);
+ break;
+ }
+
+ sv = clib_max (sv * 100, 0);
+ s = format (s, "%04.1f", sv);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t backend_bound_core_cpu_supports[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_backend_bound_core) = {
+ .name = "td-backend-core",
+ .description = "Topdown BackEnd-bound Core - % cycles core resources busy",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_0, /* 0xFF */
+ .events[1] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_1, /* 0xFF */
+ .events[2] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_5, /* 0xFF */
+ .events[3] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_6, /* 0xFF */
+ .events[4] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_2_3, /* 0xFF */
+ .events[5] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_4_9, /* 0xFF */
+ .events[6] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_7_8, /* 0xFF */
+ .events[7] = INTEL_CORE_E_CPU_CLK_UNHALTED_DISTRIBUTED, /* 0xFF */
+ .n_events = 8,
+ .format_fn = format_intel_backend_bound_core,
+ .cpu_supports = backend_bound_core_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (backend_bound_core_cpu_supports),
+ .column_headers = PERFMON_STRINGS ("Clocks/Packet", "%Port0", "%Port1",
+ "%Port5", "%Port6", "%Load", "%Store"),
+};
diff --git a/src/plugins/perfmon/intel/bundle/backend_bound_mem.c b/src/plugins/perfmon/intel/bundle/backend_bound_mem.c
new file mode 100644
index 00000000000..ccf1ed12153
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/backend_bound_mem.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2021 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+enum
+{
+ STALLS_L1D_MISS = 0,
+ STALLS_L2_MISS = 1,
+ STALLS_L3_MISS = 2,
+ STALLS_MEM_ANY = 3,
+ STALLS_TOTAL = 4,
+ BOUND_ON_STORES = 5,
+ FB_FULL = 6,
+ THREAD = 7,
+};
+
+static u8 *
+format_intel_backend_bound_mem (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+
+ if (!ss->n_packets)
+ return s;
+
+ if (0 == row)
+ {
+ sv = ss->value[THREAD] / ss->n_packets;
+
+ s = format (s, "%.0f", sv);
+ return s;
+ }
+
+ switch (row)
+ {
+ case 1:
+ sv = ss->value[BOUND_ON_STORES];
+ break;
+ case 2:
+ sv = ss->value[STALLS_MEM_ANY] - ss->value[STALLS_L1D_MISS];
+ break;
+ case 3:
+ sv = ss->value[FB_FULL];
+ break;
+ case 4:
+ sv = ss->value[STALLS_L1D_MISS] - ss->value[STALLS_L2_MISS];
+ break;
+ case 5:
+ sv = ss->value[STALLS_L2_MISS] - ss->value[STALLS_L3_MISS];
+ break;
+ case 6:
+ sv = ss->value[STALLS_L3_MISS];
+ break;
+ }
+
+ sv = clib_max ((sv / ss->value[THREAD]) * 100, 0);
+
+ s = format (s, "%04.1f", sv);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t backend_bound_mem_cpu_supports[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_backend_bound_mem) = {
+ .name = "td-backend-mem",
+ .description = "Topdown BackEnd-bound Memory - % cycles not retiring "
+ "instructions due to memory stalls",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L1D_MISS, /* 0x0F */
+ .events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L2_MISS, /* 0x0F */
+ .events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L3_MISS, /* 0x0F */
+ .events[3] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY, /* 0xFF */
+ .events[4] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_TOTAL, /* 0xFF */
+ .events[5] = INTEL_CORE_E_EXE_ACTIVITY_BOUND_ON_STORES, /* 0xFF */
+ .events[6] = INTEL_CORE_E_L1D_PEND_MISS_FB_FULL, /* 0x0F */
+ .events[7] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* 0xFF */
+ .n_events = 8,
+ .format_fn = format_intel_backend_bound_mem,
+ .cpu_supports = backend_bound_mem_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (backend_bound_mem_cpu_supports),
+ .column_headers = PERFMON_STRINGS ("Clocks/Packet", "%Store Bound",
+ "%L1 Bound", "%FB Full", "%L2 Bound",
+ "%L3 Bound", "%DRAM Bound"),
+};
diff --git a/src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c
new file mode 100644
index 00000000000..3db4ca9c0f3
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+enum
+{
+ DSB_UOPS,
+ MS_UOPS,
+ MITE_UOPS,
+ LSD_UOPS,
+};
+
+static u8 *
+format_intel_frontend_bound_bw_src (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+ f64 uops = ss->value[DSB_UOPS] + ss->value[MS_UOPS] + ss->value[MITE_UOPS] +
+ ss->value[LSD_UOPS];
+
+ if (!ss->n_packets)
+ return s;
+
+ if (row == 0)
+ {
+ sv = uops / ss->n_packets;
+ s = format (s, "%.0f", sv);
+
+ return s;
+ }
+
+ switch (row)
+ {
+ case 1:
+ sv = (ss->value[DSB_UOPS] / uops) * 100;
+ break;
+ case 2:
+ sv = (ss->value[MS_UOPS] / uops) * 100;
+ break;
+ case 3:
+ sv = (ss->value[MITE_UOPS] / uops) * 100;
+ break;
+ case 4:
+ sv = (ss->value[LSD_UOPS] / uops) * 100;
+ break;
+ }
+
+ s = format (s, "%04.1f", sv);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t frontend_bound_bw_cpu_supports_src[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_bw_src) = {
+ .name = "td-frontend-bw-src",
+ .description =
+ "Topdown FrontEnd-bound BandWidth - % uops from each uop fetch source",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_IDQ_DSB_UOPS, /* 0x0F */
+ .events[1] = INTEL_CORE_E_IDQ_MS_UOPS, /* 0x0F */
+ .events[2] = INTEL_CORE_E_IDQ_MITE_UOPS, /* 0x0F */
+ .events[3] = INTEL_CORE_E_LSD_UOPS, /* 0x0F */
+ .n_events = 4,
+ .format_fn = format_intel_frontend_bound_bw_src,
+ .cpu_supports = frontend_bound_bw_cpu_supports_src,
+ .n_cpu_supports = ARRAY_LEN (frontend_bound_bw_cpu_supports_src),
+ .column_headers = PERFMON_STRINGS ("UOPs/PKT", "% DSB UOPS", "% MS UOPS",
+ "% MITE UOPS", "% LSD UOPS"),
+ .footer =
+ "For more information, see the Intel(R) 64 and IA-32 Architectures\n"
+ "Optimization Reference Manual section on the Front End.",
+};
diff --git a/src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c
new file mode 100644
index 00000000000..6bf08af8154
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+enum
+{
+ THREAD_P,
+ THREE_UOP,
+ TWO_UOP,
+ ONE_UOP,
+ NO_UOP,
+ FOUR_UOP,
+};
+
+static u8 *
+format_intel_frontend_bound_bw_uops (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+ f64 cycles = ss->value[THREAD_P];
+
+ switch (row)
+ {
+ case 0:
+ sv = (ss->value[FOUR_UOP] / cycles) * 100;
+ break;
+ case 1:
+ sv = ((ss->value[THREE_UOP] - ss->value[TWO_UOP]) / cycles) * 100;
+ break;
+ case 2:
+ sv = ((ss->value[TWO_UOP] - ss->value[ONE_UOP]) / cycles) * 100;
+ break;
+ case 3:
+ sv = ((ss->value[ONE_UOP] - ss->value[NO_UOP]) / cycles) * 100;
+ break;
+ case 4:
+ sv = (ss->value[NO_UOP] / cycles) * 100;
+ break;
+ }
+
+ s = format (s, "%04.1f", sv);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t frontend_bound_bw_cpu_supports_uops[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_bw_uops) = {
+ .name = "td-frontend-bw-uops",
+ .description = "Topdown FrontEnd-bound BandWidth - distribution of "
+ "uops delivered to frontend",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* 0x0F */
+ .events[1] =
+ INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_3_UOP_DELIV_CORE, /* 0xFF */
+ .events[2] =
+ INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_2_UOP_DELIV_CORE, /* 0xFF */
+ .events[3] =
+ INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_1_UOP_DELIV_CORE, /* 0xFF */
+ .events[4] =
+ INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_0_UOP_DELIV_CORE, /* 0xFF */
+ .events[5] = INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_FE_WAS_OK, /* 0xFF */
+ .n_events = 6,
+ .format_fn = format_intel_frontend_bound_bw_uops,
+ .cpu_supports = frontend_bound_bw_cpu_supports_uops,
+ .n_cpu_supports = ARRAY_LEN (frontend_bound_bw_cpu_supports_uops),
+ .column_headers = PERFMON_STRINGS ("% 4 UOPS", "% 3 UOPS", "% 2 UOPS",
+ "% 1 UOPS", "% 0 UOPS"),
+ .footer =
+ "For more information, see the Intel(R) 64 and IA-32 Architectures\n"
+ "Optimization Reference Manual section on the Front End.",
+};
diff --git a/src/plugins/perfmon/intel/bundle/frontend_bound_lat.c b/src/plugins/perfmon/intel/bundle/frontend_bound_lat.c
new file mode 100644
index 00000000000..aea2149663f
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/frontend_bound_lat.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+static const int MS_Switches_Cost = 3;
+static const int BA_Clear_Cost = 10;
+
+enum
+{
+ ICACHE_MISS,
+ DSB_SWITCHES,
+ RESTEER,
+ MS_SWITCHES,
+ BACLEARS,
+ THREAD,
+};
+
+static u8 *
+format_intel_frontend_bound_lat (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+ f64 cycles = ss->value[THREAD];
+
+ if (!ss->n_packets)
+ return s;
+
+ if (!row)
+ {
+ sv = ss->value[THREAD] / ss->n_packets;
+
+ s = format (s, "%.0f", sv);
+
+ return s;
+ }
+
+ switch (row)
+ {
+ case 1:
+ sv = ss->value[ICACHE_MISS] / cycles;
+ break;
+ case 2:
+ sv = ss->value[DSB_SWITCHES] / cycles;
+ break;
+ case 3:
+ sv =
+ (ss->value[RESTEER] + (ss->value[BACLEARS] * BA_Clear_Cost)) / cycles;
+ break;
+ case 4:
+ sv = (ss->value[MS_SWITCHES] * MS_Switches_Cost) / cycles;
+ break;
+ }
+
+ s = format (s, "%04.1f", sv * 100);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t frontend_bound_lat_cpu_supports[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_lat) = {
+ .name = "td-frontend-lat",
+ .description = "Topdown FrontEnd-bound Latency - % cycles not retiring uops "
+ "due to frontend latency",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_ICACHE_16B_IFDATA_STALL, /* 0x0F */
+ .events[1] = INTEL_CORE_E_DSB2MITE_SWITCHES_PENALTY_CYCLES, /* 0x0F */
+ .events[2] = INTEL_CORE_E_INT_MISC_CLEAR_RESTEER_CYCLES, /* 0xFF */
+ .events[3] = INTEL_CORE_E_IDQ_MS_SWITCHES, /* 0x0F */
+ .events[4] = INTEL_CORE_E_BACLEARS_ANY, /* 0x0F */
+ .events[5] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* FIXED */
+ .n_events = 6,
+ .format_fn = format_intel_frontend_bound_lat,
+ .cpu_supports = frontend_bound_lat_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (frontend_bound_lat_cpu_supports),
+ .column_headers = PERFMON_STRINGS ("Clocks/Packet", "% iCache Miss",
+ "% DSB Switch", "% Branch Resteer",
+ "% MS Switch"),
+ .footer =
+ "For more information, see the Intel(R) 64 and IA-32 Architectures\n"
+ "Optimization Reference Manual on the Front End.",
+};
diff --git a/src/plugins/perfmon/intel/bundle/iio_bw.c b/src/plugins/perfmon/intel/bundle/iio_bw.c
new file mode 100644
index 00000000000..f746ca2c906
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/iio_bw.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2021 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/uncore.h>
+#include <vlib/pci/pci.h>
+#include <vppinfra/format.h>
+#include <linux/limits.h>
+#include <fcntl.h>
+#include <math.h>
+
+typedef struct
+{
+ u8 socket_id;
+ u8 sad_id;
+ u8 iio_unit_id;
+} iio_uncore_sad_t;
+typedef u32 index_t;
+
+static const char *procfs_pci_path = "/proc/bus/pci";
+
+#define PCM_INTEL_PCI_VENDOR_ID 0x8086
+#define SNR_ICX_SAD_CONTROL_CFG_OFFSET 0x3F4
+#define SNR_ICX_MESH2IIO_MMAP_DID 0x09A2
+
+static const u8 icx_sad_to_pmu_id_mapping[] = { 5, 0, 1, 2, 3, 4 };
+
+static const char *iio_bw_footer_message =
+ "* this bundle currently only measures x8 and x16 PCIe devices on Port #0\n"
+ "or Port #2. Please see the \"Intel® Xeon® Processor Scalable Memory\n"
+ "Family Uncore Performance Monitoring Reference Manual(336274)\"\n"
+ "Section 2.4 for more information.";
+
+static u32
+get_sad_ctrl_cfg (vlib_pci_addr_t *addr)
+{
+ int fd = 0;
+ u32 value;
+ u8 *dev_node_name = format (0, "%s/%02x/%02x.%x", procfs_pci_path, addr->bus,
+ addr->slot, addr->function);
+
+ fd = open ((char *) dev_node_name, O_RDWR);
+ if (fd < 0)
+ return -1;
+
+ if (pread (fd, &value, sizeof (u32), SNR_ICX_SAD_CONTROL_CFG_OFFSET) <
+ sizeof (u32))
+ value = -1;
+
+ close (fd);
+
+ return value;
+}
+
+static u64
+get_bus_to_sad_mappings (vlib_main_t *vm, index_t **ph, iio_uncore_sad_t **pp)
+{
+ index_t *h = 0;
+ iio_uncore_sad_t *p = 0, *e = 0;
+ vlib_pci_addr_t *addr = 0, *addrs;
+
+ addrs = vlib_pci_get_all_dev_addrs ();
+
+ vec_foreach (addr, addrs)
+ {
+ vlib_pci_device_info_t *d;
+ d = vlib_pci_get_device_info (vm, addr, 0);
+
+ if (!d)
+ continue;
+
+ if (d->vendor_id == PCM_INTEL_PCI_VENDOR_ID &&
+ d->device_id == SNR_ICX_MESH2IIO_MMAP_DID)
+ {
+
+ u32 sad_ctrl_cfg = get_sad_ctrl_cfg (addr);
+ if (sad_ctrl_cfg == 0xFFFFFFFF)
+ {
+ vlib_pci_free_device_info (d);
+ continue;
+ }
+
+ pool_get_zero (p, e);
+
+ e->socket_id = (sad_ctrl_cfg & 0xf);
+ e->sad_id = (sad_ctrl_cfg >> 4) & 0x7;
+ e->iio_unit_id = icx_sad_to_pmu_id_mapping[e->sad_id];
+
+ hash_set (h, addr->bus, e - p);
+ }
+
+ vlib_pci_free_device_info (d);
+ }
+
+ vec_free (addrs);
+
+ *ph = h;
+ *pp = p;
+
+ return 0;
+}
+
+u8 *
+format_stack_socket (u8 *s, va_list *va)
+{
+ iio_uncore_sad_t *e, *p = va_arg (*va, iio_uncore_sad_t *);
+ index_t *h = va_arg (*va, index_t *);
+ vlib_pci_addr_t root_bus, *addr = va_arg (*va, vlib_pci_addr_t *);
+ clib_error_t *err = vlib_pci_get_device_root_bus (addr, &root_bus);
+ if (err)
+ {
+ clib_error_free (err);
+ return s;
+ }
+
+ uword *pu = hash_get (h, root_bus.bus);
+ if (pu)
+ {
+ e = pool_elt_at_index (p, (index_t) pu[0]);
+
+ s = format (s, "IIO%u/%u", e->socket_id, e->iio_unit_id);
+ }
+ else
+ {
+ s = format (s, "[ERR: hash lookup for bus '%u' failed]", root_bus.bus);
+ }
+ return s;
+}
+
+static clib_error_t *
+init_intel_uncore_iio_bw (vlib_main_t *vm, struct perfmon_bundle *b)
+{
+ index_t *h = 0;
+ iio_uncore_sad_t *p = 0;
+ vlib_pci_addr_t *addr = 0, *addrs;
+ u8 *s = 0;
+
+ get_bus_to_sad_mappings (vm, &h, &p);
+
+ s = format (0, "%-10s%-5s%-13s%-12s%-14s%-16s%s\n", "Stack", "Port",
+ "Address", "VID:PID", "Link Speed", "Driver", "Product Name");
+
+ addrs = vlib_pci_get_all_dev_addrs ();
+
+ vec_foreach (addr, addrs)
+ {
+ vlib_pci_device_info_t *d;
+ d = vlib_pci_get_device_info (vm, addr, 0);
+
+ if (!d)
+ continue;
+
+ if (d->device_class != PCI_CLASS_NETWORK_ETHERNET)
+ continue;
+
+ s = format (
+ s, "%-10U%-5U%-13U%04x:%04x %-14U%-16s%v\n", format_stack_socket, p,
+ h, addr, format_vlib_pci_link_port, &d->config, format_vlib_pci_addr,
+ addr, d->vendor_id, d->device_id, format_vlib_pci_link_speed, d,
+ d->driver_name ? (char *) d->driver_name : "", d->product_name);
+
+ vlib_pci_free_device_info (d);
+ }
+
+ b->footer = (char *) format (s, "\n%s", iio_bw_footer_message);
+
+ vec_free (addrs);
+ pool_free (p);
+ hash_free (h);
+
+ return 0;
+}
+
+static u8 *
+format_intel_uncore_iio_bw (u8 *s, va_list *args)
+{
+ perfmon_reading_t *r = va_arg (*args, perfmon_reading_t *);
+ int col = va_arg (*args, int);
+ f64 tr = r->time_running * 1e-9;
+ f64 value = 0;
+
+ switch (col)
+ {
+ case 0:
+ s = format (s, "%9.2f", tr);
+ break;
+ default:
+ if (r->time_running)
+ {
+ value = r->value[col - 1] * 4 / tr;
+
+ if (value > 1.0e6)
+ s = format (s, "%9.0fM", value * 1e-6);
+ else if (value > 1.0e3)
+ s = format (s, "%9.0fK", value * 1e-3);
+ else
+ s = format (s, "%9.0f ", value);
+ }
+
+ break;
+ }
+
+ return s;
+}
+
+/*
+ * This bundle is currently only supported and tested on Intel Icelake.
+ */
+static int
+is_icelake ()
+{
+ return clib_cpu_supports_avx512_bitalg () && !clib_cpu_supports_movdir64b ();
+}
+
+static perfmon_cpu_supports_t iio_bw_cpu_supports[] = {
+ { is_icelake, PERFMON_BUNDLE_TYPE_SYSTEM }
+};
+
+PERFMON_REGISTER_BUNDLE (intel_uncore_iio_bw_pci) = {
+ .name = "iio-bandwidth-pci",
+ .description = "pci iio memory reads and writes per iio stack *",
+ .source = "intel-uncore",
+ .events[0] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_OF_CPU_PART0_RD,
+ .events[1] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART0_WR,
+ .events[2] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_RD,
+ .events[3] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_WR,
+ .n_events = 4,
+ .cpu_supports = iio_bw_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (iio_bw_cpu_supports),
+ .format_fn = format_intel_uncore_iio_bw,
+ .init_fn = init_intel_uncore_iio_bw,
+ .column_headers = PERFMON_STRINGS ("RunTime", "PCIe Rd/P0", "PCIe Wr/P0",
+ "PCIe Rd/P2", "PCIe Wr/P2")
+};
+
+PERFMON_REGISTER_BUNDLE (intel_uncore_iio_bw_cpu) = {
+ .name = "iio-bandwidth-cpu",
+ .description = "cpu iio memory reads and writes per iio stack *",
+ .source = "intel-uncore",
+ .events[0] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART0_RD,
+ .events[1] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART0_WR,
+ .events[2] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_RD,
+ .events[3] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_WR,
+ .n_events = 4,
+ .cpu_supports = iio_bw_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (iio_bw_cpu_supports),
+ .format_fn = format_intel_uncore_iio_bw,
+ .init_fn = init_intel_uncore_iio_bw,
+ .column_headers = PERFMON_STRINGS ("RunTime", "CPU Rd/P0", "CPU Wr/P0",
+ "CPU Rd/P2", "CPU Wr/P2")
+};
diff --git a/src/plugins/perfmon/intel/bundle/topdown_icelake.c b/src/plugins/perfmon/intel/bundle/topdown_icelake.c
new file mode 100644
index 00000000000..a3392e52f0a
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/topdown_icelake.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+static int
+is_icelake ()
+{
+ return clib_cpu_supports_avx512_bitalg () && !clib_cpu_supports_movdir64b ();
+}
+
+static perfmon_cpu_supports_t topdown_lvl2_cpu_supports_icx[] = {
+ { is_icelake, PERFMON_BUNDLE_TYPE_THREAD }
+};
+
+#define GET_METRIC(m, i) (f64) (((m) >> (i * 8)) & 0xff)
+
+enum
+{
+ TD_SLOTS = 0,
+ STALLS_MEM_ANY,
+ STALLS_TOTAL,
+ BOUND_ON_STORES,
+ RECOVERY_CYCLES,
+ UOP_DROPPING,
+ UOP_NOT_DELIVERED,
+ TD_RETIRING,
+ TD_BAD_SPEC,
+ TD_FE_BOUND,
+ TD_BE_BOUND,
+};
+
+static_always_inline f64
+memory_bound_fraction (perfmon_reading_t *ss)
+{
+ return (ss->value[STALLS_MEM_ANY] + ss->value[BOUND_ON_STORES]) /
+ (f64) (ss->value[STALLS_TOTAL] + ss->value[BOUND_ON_STORES]);
+}
+
+static_always_inline f64
+perf_metrics_sum (perfmon_reading_t *ss)
+{
+ return ss->value[TD_RETIRING] + ss->value[TD_BAD_SPEC] +
+ ss->value[TD_FE_BOUND] + ss->value[TD_BE_BOUND];
+}
+
+static_always_inline f64
+retiring (perfmon_reading_t *ss)
+{
+ return ss->value[TD_RETIRING] / perf_metrics_sum (ss);
+}
+
+static_always_inline f64
+bad_speculation (perfmon_reading_t *ss)
+{
+ return ss->value[TD_BAD_SPEC] / perf_metrics_sum (ss);
+}
+
+static_always_inline f64
+frontend_bound (perfmon_reading_t *ss)
+{
+ return (ss->value[TD_FE_BOUND] / perf_metrics_sum (ss)) -
+ (ss->value[UOP_DROPPING] / perf_metrics_sum (ss));
+}
+
+static_always_inline f64
+backend_bound (perfmon_reading_t *ss)
+{
+ return (ss->value[TD_BE_BOUND] / perf_metrics_sum (ss)) +
+ ((5 * ss->value[RECOVERY_CYCLES]) / perf_metrics_sum (ss));
+}
+
+static_always_inline f64
+fetch_latency (perfmon_reading_t *ss)
+{
+ f64 r = ((5 * ss->value[UOP_NOT_DELIVERED] - ss->value[UOP_DROPPING]) /
+ (f64) ss->value[TD_SLOTS]);
+ return r;
+}
+
+static_always_inline f64
+fetch_bandwidth (perfmon_reading_t *ss)
+{
+ return clib_max (0, frontend_bound (ss) - fetch_latency (ss));
+}
+
+static_always_inline f64
+memory_bound (perfmon_reading_t *ss)
+{
+ return backend_bound (ss) * memory_bound_fraction (ss);
+}
+
+static_always_inline f64
+core_bound (perfmon_reading_t *ss)
+{
+ return backend_bound (ss) - memory_bound (ss);
+}
+
+static u8 *
+format_topdown_lvl2_icx (u8 *s, va_list *args)
+{
+ perfmon_reading_t *ss = va_arg (*args, perfmon_reading_t *);
+ u64 idx = va_arg (*args, int);
+ f64 sv = 0;
+
+ switch (idx)
+ {
+ case 0:
+ sv = retiring (ss);
+ break;
+ case 1:
+ sv = bad_speculation (ss);
+ break;
+ case 2:
+ sv = frontend_bound (ss);
+ break;
+ case 3:
+ sv = backend_bound (ss);
+ break;
+ case 4:
+ sv = fetch_latency (ss);
+ break;
+ case 5:
+ sv = fetch_bandwidth (ss);
+ break;
+ case 6:
+ sv = memory_bound (ss);
+ break;
+ case 7:
+ sv = core_bound (ss);
+ break;
+ }
+
+ s = format (s, "%f", sv * 100);
+
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric_icx) = {
+ .name = "topdown",
+ .description = "Top-down Microarchitecture Analysis Level 1 & 2",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
+ .events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY,
+ .events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_TOTAL,
+ .events[3] = INTEL_CORE_E_EXE_ACTIVITY_BOUND_ON_STORES,
+ .events[4] = INTEL_CORE_E_INT_MISC_RECOVERY_CYCLES,
+ .events[5] = INTEL_CORE_E_INT_MISC_UOP_DROPPING,
+ .events[6] = INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CORE,
+ .events[7] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
+ .events[8] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
+ .events[9] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC,
+ .events[10] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC,
+ .n_events = 11,
+ .cpu_supports = topdown_lvl2_cpu_supports_icx,
+ .n_cpu_supports = ARRAY_LEN (topdown_lvl2_cpu_supports_icx),
+ .format_fn = format_topdown_lvl2_icx,
+ .column_headers = PERFMON_STRINGS ("% RT", "% BS", "% FE", "% BE", "% FE.FL",
+ "% FE.FB", "% BE.MB", "% BE.CB"),
+ .footer = "Retiring (RT), Bad Speculation (BS),\n"
+ " FrontEnd bound (FE), BackEnd bound (BE),\n"
+ " Fetch Latency (FL), Fetch Bandwidth (FB),\n"
+ " Memory Bound (MB), Core Bound (CB)",
+};
diff --git a/src/plugins/perfmon/intel/bundle/topdown_metrics.c b/src/plugins/perfmon/intel/bundle/topdown_metrics.c
index 386f3843bc3..a464dfe1c88 100644
--- a/src/plugins/perfmon/intel/bundle/topdown_metrics.c
+++ b/src/plugins/perfmon/intel/bundle/topdown_metrics.c
@@ -14,102 +14,202 @@
*/
#include <vnet/vnet.h>
+#include <vppinfra/math.h>
#include <perfmon/perfmon.h>
#include <perfmon/intel/core.h>
#define GET_METRIC(m, i) (((m) >> (i * 8)) & 0xff)
#define GET_RATIO(m, i) (((m) >> (i * 32)) & 0xffffffff)
-#define RDPMC_FIXED_SLOTS (1 << 30) /* fixed slots */
-#define RDPMC_L1_METRICS (1 << 29) /* l1 metric counters */
+#define RDPMC_SLOTS (1 << 30) /* fixed slots */
+#define RDPMC_METRICS (1 << 29) /* l1 & l2 metric counters */
#define FIXED_COUNTER_SLOTS 3
-#define METRIC_COUNTER_TOPDOWN_L1 0
+#define METRIC_COUNTER_TOPDOWN_L1_L2 0
typedef enum
{
- TOPDOWN_E_METRIC_RETIRING = 0,
- TOPDOWN_E_METRIC_BAD_SPEC,
- TOPDOWN_E_METRIC_FE_BOUND,
- TOPDOWN_E_METRIC_BE_BOUND,
-} topdown_lvl1_counters_t;
+ TOPDOWN_E_RETIRING = 0,
+ TOPDOWN_E_BAD_SPEC,
+ TOPDOWN_E_FE_BOUND,
+ TOPDOWN_E_BE_BOUND,
+ TOPDOWN_E_HEAVYOPS,
+ TOPDOWN_E_LIGHTOPS,
+ TOPDOWN_E_BMISPRED,
+ TOPDOWN_E_MCHCLEAR,
+ TOPDOWN_E_FETCHLAT,
+ TOPDOWN_E_FETCH_BW,
+ TOPDOWN_E_MEMBOUND,
+ TOPDOWN_E_CORBOUND,
+ TOPDOWN_E_MAX,
+} topdown_e_t;
enum
{
- TOPDOWN_SLOTS = 0,
- TOPDOWN_METRICS,
-} topdown_lvl1_metrics_t;
+ TOPDOWN_E_RDPMC_SLOTS = 0,
+ TOPDOWN_E_RDPMC_METRICS,
+};
+
+typedef f64 (topdown_lvl1_parse_fn_t) (void *, topdown_e_t);
-static_always_inline f32
-topdown_lvl1_parse_row (perfmon_node_stats_t *ns, topdown_lvl1_counters_t e)
+/* Parse thread level states from perfmon_reading */
+static_always_inline f64
+topdown_lvl1_perf_reading (void *ps, topdown_e_t e)
{
+ perfmon_reading_t *ss = (perfmon_reading_t *) ps;
+
+ /* slots are at value[0], everthing else follows at +1 */
+ return ((f64) ss->value[e + 1] / ss->value[0]) * 100;
+}
+
+static_always_inline f64
+topdown_lvl1_rdpmc_metric (void *ps, topdown_e_t e)
+{
+ perfmon_node_stats_t *ss = (perfmon_node_stats_t *) ps;
f64 slots_t0 =
- ns->t[0].value[TOPDOWN_SLOTS] *
- ((f64) GET_METRIC (ns->t[0].value[TOPDOWN_METRICS], e) / 0xff);
+ ss->t[0].value[TOPDOWN_E_RDPMC_SLOTS] *
+ ((f64) GET_METRIC (ss->t[0].value[TOPDOWN_E_RDPMC_METRICS], e) / 0xff);
f64 slots_t1 =
- ns->t[1].value[TOPDOWN_SLOTS] *
- ((f64) GET_METRIC (ns->t[1].value[TOPDOWN_METRICS], e) / 0xff);
- u64 slots_delta =
- ns->t[1].value[TOPDOWN_SLOTS] - ns->t[0].value[TOPDOWN_SLOTS];
+ ss->t[1].value[TOPDOWN_E_RDPMC_SLOTS] *
+ ((f64) GET_METRIC (ss->t[1].value[TOPDOWN_E_RDPMC_METRICS], e) / 0xff);
+ u64 slots_delta = ss->t[1].value[TOPDOWN_E_RDPMC_SLOTS] -
+ ss->t[0].value[TOPDOWN_E_RDPMC_SLOTS];
slots_t1 = slots_t1 - slots_t0;
return (slots_t1 / slots_delta) * 100;
}
-static u8 *
-format_topdown_lvl1 (u8 *s, va_list *args)
+/* Convert the TopDown enum to the perf reading index */
+#define TO_LVL2_PERF_IDX(e) \
+ ({ \
+ u8 to_idx[TOPDOWN_E_MAX] = { 0, 0, 0, 0, 5, 5, 6, 6, 7, 7, 8, 8 }; \
+ to_idx[e]; \
+ })
+
+/* Parse thread level stats from perfmon_reading */
+static_always_inline f64
+topdown_lvl2_perf_reading (void *ps, topdown_e_t e)
+{
+ perfmon_reading_t *ss = (perfmon_reading_t *) ps;
+ u64 value = ss->value[TO_LVL2_PERF_IDX (e)];
+
+ /* If it is an L1 metric, call L1 format */
+ if (TOPDOWN_E_BE_BOUND >= e)
+ {
+ return topdown_lvl1_perf_reading (ps, e);
+ }
+
+ /* all the odd metrics, are inferred from even and L1 metrics */
+ if (e & 0x1)
+ {
+ topdown_e_t e1 = TO_LVL2_PERF_IDX (e) - 4;
+ value = ss->value[e1] - value;
+ }
+
+ return (f64) value / ss->value[0] * 100;
+}
+
+/* Convert the TopDown enum to the rdpmc metric byte position */
+#define TO_LVL2_METRIC_BYTE(e) \
+ ({ \
+ u8 to_metric[TOPDOWN_E_MAX] = { 0, 0, 0, 0, 4, 4, 5, 5, 6, 6, 7, 7 }; \
+ to_metric[e]; \
+ })
+
+/* Convert the TopDown L2 enum to the reference TopDown L1 enum */
+#define TO_LVL1_REF(e) \
+ ({ \
+ u8 to_lvl1[TOPDOWN_E_MAX] = { -1, \
+ -1, \
+ -1, \
+ -1, \
+ TOPDOWN_E_RETIRING, \
+ TOPDOWN_E_RETIRING, \
+ TOPDOWN_E_BAD_SPEC, \
+ TOPDOWN_E_BAD_SPEC, \
+ TOPDOWN_E_FE_BOUND, \
+ TOPDOWN_E_FE_BOUND, \
+ TOPDOWN_E_BE_BOUND, \
+ TOPDOWN_E_BE_BOUND }; \
+ to_lvl1[e]; \
+ })
+
+static_always_inline f64
+topdown_lvl2_rdpmc_metric (void *ps, topdown_e_t e)
{
- perfmon_node_stats_t *st = va_arg (*args, perfmon_node_stats_t *);
- u64 row = va_arg (*args, int);
+ f64 r, l1_value = 0;
- switch (row)
+ /* If it is an L1 metric, call L1 format */
+ if (TOPDOWN_E_BE_BOUND >= e)
{
- case 0:
- s = format (s, "%f",
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BAD_SPEC) +
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_RETIRING));
- break;
- case 1:
- s = format (s, "%f",
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BE_BOUND) +
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_FE_BOUND));
- break;
- case 2:
- s = format (s, "%f",
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_RETIRING));
- break;
- case 3:
- s = format (s, "%f",
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BAD_SPEC));
- break;
- case 4:
- s = format (s, "%f",
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_FE_BOUND));
- break;
- case 5:
- s = format (s, "%f",
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BE_BOUND));
- break;
+ return topdown_lvl1_rdpmc_metric (ps, e);
}
+
+ /* all the odd metrics, are inferred from even and L1 metrics */
+ if (e & 0x1)
+ {
+ /* get the L1 reference metric */
+ l1_value = topdown_lvl1_rdpmc_metric (ps, TO_LVL1_REF (e));
+ }
+
+ /* calculate the l2 metric */
+ r =
+ fabs (l1_value - topdown_lvl1_rdpmc_metric (ps, TO_LVL2_METRIC_BYTE (e)));
+ return r;
+}
+
+static u8 *
+format_topdown_lvl2 (u8 *s, va_list *args)
+{
+ void *ps = va_arg (*args, void *);
+ u64 idx = va_arg (*args, int);
+ perfmon_bundle_type_t type = va_arg (*args, perfmon_bundle_type_t);
+ f64 sv = 0;
+
+ topdown_lvl1_parse_fn_t *parse_fn,
+ *parse_fns[PERFMON_BUNDLE_TYPE_MAX] = { 0, topdown_lvl2_rdpmc_metric,
+ topdown_lvl2_perf_reading, 0 };
+
+ parse_fn = parse_fns[type];
+ ASSERT (parse_fn);
+
+ sv = parse_fn (ps, (topdown_e_t) idx);
+ s = format (s, "%f", sv);
+
return s;
}
-PERFMON_REGISTER_BUNDLE (topdown_lvl1) = {
- .name = "topdown-level1",
- .description = "Top-down Microarchitecture Analysis Level 1",
+static perfmon_cpu_supports_t topdown_lvl2_cpu_supports[] = {
+ /* Intel SPR supports papi/thread or rdpmc/node */
+ { clib_cpu_supports_avx512_fp16, PERFMON_BUNDLE_TYPE_NODE_OR_THREAD }
+};
+
+PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric) = {
+ .name = "topdown",
+ .description = "Top-down Microarchitecture Analysis Level 1 & 2",
.source = "intel-core",
- .type = PERFMON_BUNDLE_TYPE_NODE,
- .offset_type = PERFMON_OFFSET_TYPE_METRICS,
.events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
- .events[1] = INTEL_CORE_E_TOPDOWN_L1_METRICS,
- .metrics[0] = RDPMC_FIXED_SLOTS | FIXED_COUNTER_SLOTS,
- .metrics[1] = RDPMC_L1_METRICS | METRIC_COUNTER_TOPDOWN_L1,
- .n_events = 2,
- .cpu_supports = clib_cpu_supports_avx512_bitalg,
- .format_fn = format_topdown_lvl1,
- .column_headers = PERFMON_STRINGS ("% NS", "% ST", "% NS.RT", "% NS.BS",
- "% ST.FE", "% ST.BE"),
- .footer = "Not Stalled (NS),STalled (ST),\n"
- " Retiring (RT), Bad Speculation (BS),\n"
- " FrontEnd bound (FE), BackEnd bound (BE)",
+ .events[1] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
+ .events[2] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
+ .events[3] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC,
+ .events[4] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC,
+ .events[5] = INTEL_CORE_E_TOPDOWN_L2_HEAVYOPS_METRIC,
+ .events[6] = INTEL_CORE_E_TOPDOWN_L2_BMISPRED_METRIC,
+ .events[7] = INTEL_CORE_E_TOPDOWN_L2_FETCHLAT_METRIC,
+ .events[8] = INTEL_CORE_E_TOPDOWN_L2_MEMBOUND_METRIC,
+ .n_events = 9,
+ .preserve_samples = 0x1FF,
+ .cpu_supports = topdown_lvl2_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (topdown_lvl2_cpu_supports),
+ .format_fn = format_topdown_lvl2,
+ .column_headers = PERFMON_STRINGS ("% RT", "% BS", "% FE", "% BE", "% RT.HO",
+ "% RT.LO", "% BS.BM", "% BS.MC",
+ "% FE.FL", "% FE.FB", "% BE.MB",
+ "% BE.CB"),
+ .footer = "Retiring (RT), Bad Speculation (BS),\n"
+ " FrontEnd bound (1FE), BackEnd bound (BE),\n"
+ " Light Operations (LO), Heavy Operations (HO),\n"
+ " Branch Misprediction (BM), Machine Clears (MC),\n"
+ " Fetch Latency (FL), Fetch Bandwidth (FB),\n"
+ " Memory Bound (MB), Core Bound (CB)",
};
diff --git a/src/plugins/perfmon/intel/bundle/topdown_tremont.c b/src/plugins/perfmon/intel/bundle/topdown_tremont.c
new file mode 100644
index 00000000000..b2626eb0480
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/topdown_tremont.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2021 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+typedef enum
+{
+ TOPDOWN_E_RETIRING = 0,
+ TOPDOWN_E_BAD_SPEC,
+ TOPDOWN_E_FE_BOUND,
+ TOPDOWN_E_BE_BOUND,
+ TOPDOWN_E_MAX,
+} topdown_lvl1_t;
+
+static u8 *
+format_topdown_lvl1 (u8 *s, va_list *args)
+{
+ perfmon_reading_t *ss = va_arg (*args, perfmon_reading_t *);
+ u64 idx = va_arg (*args, int);
+ f64 sv = 0;
+ u64 total = 0;
+
+ for (int i = 0; i < TOPDOWN_E_MAX; i++)
+ total += ss->value[i];
+
+ switch (idx)
+ {
+ case 0:
+ sv = (f64) ss->value[TOPDOWN_E_RETIRING] + ss->value[TOPDOWN_E_BAD_SPEC];
+ break;
+ case 1:
+ sv = (f64) ss->value[TOPDOWN_E_FE_BOUND] + ss->value[TOPDOWN_E_BE_BOUND];
+ break;
+ default:
+ sv = (f64) ss->value[idx - 2];
+ break;
+ }
+
+ sv = (sv / total) * 100;
+ s = format (s, "%f", sv);
+ return s;
+}
+
+static int
+is_tremont ()
+{
+ return clib_cpu_supports_movdir64b () && !clib_cpu_supports_avx2 ();
+}
+
+static perfmon_cpu_supports_t topdown_lvl1_cpu_supports[] = {
+ { is_tremont, PERFMON_BUNDLE_TYPE_THREAD }
+};
+
+PERFMON_REGISTER_BUNDLE (topdown_lvl1_tremont) = {
+ .name = "topdown-level1",
+ .description = "Top-down Microarchitecture Analysis Level 1",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_TREMONT,
+ .events[1] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_TREMONT,
+ .events[2] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_TREMONT,
+ .events[3] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_TREMONT,
+ .n_events = 4,
+ .cpu_supports = topdown_lvl1_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (topdown_lvl1_cpu_supports),
+ .format_fn = format_topdown_lvl1,
+ .column_headers = PERFMON_STRINGS ("% NS", "% ST", "% NS.RT", "% NS.BS",
+ "% ST.FE", "% ST.BE"),
+ .footer = "Not Stalled (NS),STalled (ST),\n"
+ " Retiring (RT), Bad Speculation (BS),\n"
+ " FrontEnd bound (FE), BackEnd bound (BE)",
+};
diff --git a/src/plugins/perfmon/intel/core.c b/src/plugins/perfmon/intel/core.c
index cef6f32d7e6..d6a16b2125e 100644
--- a/src/plugins/perfmon/intel/core.c
+++ b/src/plugins/perfmon/intel/core.c
@@ -16,6 +16,7 @@
#include <vnet/vnet.h>
#include <perfmon/perfmon.h>
#include <perfmon/intel/core.h>
+#include <perfmon/intel/dispatch_wrapper.h>
#include <linux/perf_event.h>
static perfmon_event_t events[] = {
@@ -25,9 +26,12 @@ static perfmon_event_t events[] = {
event, umask, edge, any, inv, cmask), \
.name = #n "." #suffix, \
.description = desc, \
+ .implemented = 1, \
.exclude_kernel = 1 },
- foreach_perf_intel_core_event
+ foreach_perf_intel_core_event foreach_perf_intel_peusdo_event
+ foreach_perf_intel_tremont_event
+
#undef _
};
@@ -55,6 +59,10 @@ format_intel_core_config (u8 *s, va_list *args)
if ((v = (config >> 24) & 0xff))
s = format (s, ", cmask=0x%02x", v);
+ /* show the raw config, for convenience sake */
+ if (!((config >> 16) & 0xffff))
+ s = format (s, ", raw=r%x", config & 0xffff);
+
return s;
}
@@ -71,11 +79,79 @@ intel_core_init (vlib_main_t *vm, perfmon_source_t *src)
return 0;
}
+perfmon_event_type_t
+intel_core_get_event_type (u32 event)
+{
+ u64 config = events[event].config;
+ u8 eventcode = (config & 0xFF);
+ u8 umask = ((config >> 8) & 0xFF);
+
+ if (!eventcode) /* is fixed or pseudo */
+ {
+ if (umask >= 0x80) /* is pseudo */
+ return PERFMON_EVENT_TYPE_PSEUDO;
+ else /* is fixed */
+ return PERFMON_EVENT_TYPE_FIXED;
+ }
+ else
+ return PERFMON_EVENT_TYPE_GENERAL;
+}
+
+static u8
+is_enough_counters (perfmon_bundle_t *b)
+{
+ u8 bl[PERFMON_EVENT_TYPE_MAX];
+ u8 cpu[PERFMON_EVENT_TYPE_MAX];
+
+ clib_memset (&bl, 0, sizeof (bl));
+ clib_memset (&cpu, 0, sizeof (cpu));
+
+ /* how many does this uarch support */
+ if (!clib_get_pmu_counter_count (&cpu[PERFMON_EVENT_TYPE_FIXED],
+ &cpu[PERFMON_EVENT_TYPE_GENERAL]))
+ return 0;
+
+ /* how many does the bundle require */
+ for (u16 i = 0; i < b->n_events; i++)
+ {
+ /* if source allows us to identify events, otherwise assume general */
+ if (b->src->get_event_type)
+ bl[b->src->get_event_type (b->events[i])]++;
+ else
+ bl[PERFMON_EVENT_TYPE_GENERAL]++;
+ }
+
+ /* consciously ignoring pseudo events here */
+ return cpu[PERFMON_EVENT_TYPE_GENERAL] >= bl[PERFMON_EVENT_TYPE_GENERAL] &&
+ cpu[PERFMON_EVENT_TYPE_FIXED] >= bl[PERFMON_EVENT_TYPE_FIXED];
+}
+
+u8
+intel_bundle_supported (perfmon_bundle_t *b)
+{
+ perfmon_cpu_supports_t *supports = b->cpu_supports;
+
+ if (!is_enough_counters (b))
+ return 0;
+
+ if (!b->cpu_supports)
+ return 1;
+
+ for (int i = 0; i < b->n_cpu_supports; ++i)
+ if (supports[i].cpu_supports ())
+ return 1;
+
+ return 0;
+}
+
PERFMON_REGISTER_SOURCE (intel_core) = {
.name = "intel-core",
.description = "intel arch core events",
.events = events,
.n_events = ARRAY_LEN (events),
.init_fn = intel_core_init,
+ .get_event_type = intel_core_get_event_type,
.format_config = format_intel_core_config,
+ .bundle_support = intel_bundle_supported,
+ .config_dispatch_wrapper = intel_config_dispatch_wrapper,
};
diff --git a/src/plugins/perfmon/intel/core.h b/src/plugins/perfmon/intel/core.h
index cd5c31ba260..b2b0434acb3 100644
--- a/src/plugins/perfmon/intel/core.h
+++ b/src/plugins/perfmon/intel/core.h
@@ -16,12 +16,46 @@
#ifndef __perfmon_intel_h
#define __perfmon_intel_h
+u8 intel_bundle_supported (perfmon_bundle_t *b);
+
#define PERF_INTEL_CODE(event, umask, edge, any, inv, cmask) \
((event) | (umask) << 8 | (edge) << 18 | (any) << 21 | (inv) << 23 | \
(cmask) << 24)
/* EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask
* counter_unit, name, suffix, description */
+#define foreach_perf_intel_peusdo_event \
+ _ (0x00, 0x80, 0, 0, 0, 0x00, TOPDOWN, L1_RETIRING_METRIC, \
+ "TMA retiring slots for an unhalted logical processor.") \
+ _ (0x00, 0x81, 0, 0, 0, 0x00, TOPDOWN, L1_BAD_SPEC_METRIC, \
+ "TMA bad spec slots or an unhalted logical processor.") \
+ _ (0x00, 0x82, 0, 0, 0, 0x00, TOPDOWN, L1_FE_BOUND_METRIC, \
+ "TMA fe bound slots for an unhalted logical processor.") \
+ _ (0x00, 0x83, 0, 0, 0, 0x00, TOPDOWN, L1_BE_BOUND_METRIC, \
+ "TMA be bound slots for an unhalted logical processor.") \
+ _ (0x00, 0x84, 0, 0, 0, 0x00, TOPDOWN, L2_HEAVYOPS_METRIC, \
+ "TMA heavy operations for an unhalted logical processor.") \
+ _ (0x00, 0x85, 0, 0, 0, 0x00, TOPDOWN, L2_BMISPRED_METRIC, \
+ "TMA branch misprediction slots or an unhalted logical processor.") \
+ _ (0x00, 0x86, 0, 0, 0, 0x00, TOPDOWN, L2_FETCHLAT_METRIC, \
+ "TMA fetch latency slots for an unhalted logical processor.") \
+ _ (0x00, 0x87, 0, 0, 0, 0x00, TOPDOWN, L2_MEMBOUND_METRIC, \
+ "TMA mem bound slots for an unhalted logical processor.")
+
+/* EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask
+ * counter_unit, name, suffix, description */
+#define foreach_perf_intel_tremont_event \
+ _ (0xc2, 0x00, 0, 0, 0, 0x00, TOPDOWN, L1_RETIRING_TREMONT, \
+ "TMA retiring slots for an unhalted logical processor.") \
+ _ (0x71, 0x00, 0, 0, 0, 0x00, TOPDOWN, L1_FE_BOUND_TREMONT, \
+ "TMA fe bound slots for an unhalted logical processor.") \
+ _ (0x73, 0x06, 0, 0, 0, 0x00, TOPDOWN, L1_BAD_SPEC_TREMONT, \
+ "TMA bad spec slots or an unhalted logical processor.") \
+ _ (0x74, 0x00, 0, 0, 0, 0x00, TOPDOWN, L1_BE_BOUND_TREMONT, \
+ "TMA be bound slots for an unhalted logical processor.")
+
+/* EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask
+ * counter_unit, name, suffix, description */
#define foreach_perf_intel_core_event \
_ (0x00, 0x02, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, THREAD, \
"Core cycles when the thread is not in halt state") \
@@ -29,8 +63,6 @@
"Reference cycles when the core is not in halt state.") \
_ (0x00, 0x04, 0, 0, 0, 0x00, TOPDOWN, SLOTS, \
"TMA slots available for an unhalted logical processor.") \
- _ (0x00, 0x80, 0, 0, 0, 0x00, TOPDOWN, L1_METRICS, \
- "TMA slots metrics for an unhalted logical processor.") \
_ (0x03, 0x02, 0, 0, 0, 0x00, LD_BLOCKS, STORE_FORWARD, \
"Loads blocked due to overlapping with a preceding store that cannot be" \
" forwarded.") \
@@ -59,6 +91,12 @@
_ (0x0D, 0x01, 0, 0, 0, 0x00, INT_MISC, RECOVERY_CYCLES, \
"Core cycles the allocator was stalled due to recovery from earlier " \
"clear event for this thread (e.g. misprediction or memory nuke)") \
+ _ (0x0D, 0x10, 0, 0, 0, 0x00, INT_MISC, UOP_DROPPING, \
+ "Estimated number of Top-down Microarchitecture Analysis slots that got" \
+ " due to non front-end reasons") \
+ _ (0x0D, 0x80, 0, 0, 0, 0x00, INT_MISC, CLEAR_RESTEER_CYCLES, \
+ "Counts cycles after recovery from a branch misprediction or machine" \
+ "clear till the first uop is issued from the resteered path.") \
_ (0x0E, 0x01, 0, 0, 0, 0x00, UOPS_ISSUED, ANY, \
"Uops that Resource Allocation Table (RAT) issues to Reservation " \
"Station (RS)") \
@@ -93,12 +131,80 @@
_ (0x51, 0x01, 0, 0, 0, 0x00, L1D, REPLACEMENT, \
"L1D data line replacements") \
_ (0x51, 0x04, 0, 0, 0, 0x00, L1D, M_EVICT, "L1D data line evictions") \
+ _ (0x79, 0x04, 0, 0, 0, 0x00, IDQ, MITE_UOPS, \
+ "Counts the number of uops delivered to Instruction Decode Queue (IDQ) " \
+ "from the MITE path.") \
+ _ (0x79, 0x08, 0, 0, 0, 0x00, IDQ, DSB_UOPS, \
+ "Counts the number of uops delivered to Instruction Decode Queue (IDQ) " \
+ "from the Decode Stream Buffer (DSB) path.") \
+ _ (0x79, 0x30, 0, 0, 0, 0x00, IDQ, MS_UOPS, \
+ "Counts the number of uops delivered to Instruction Decode Queue (IDQ) " \
+ "from the Microcode Sequencer (MS) path.") \
+ _ (0x79, 0x30, 1, 0, 0, 0x01, IDQ, MS_SWITCHES, \
+ "Number of switches from DSB or MITE to the MS") \
+ _ ( \
+ 0x80, 0x04, 0, 0, 0, 0x00, ICACHE_16B, IFDATA_STALL, \
+ "Cycles where a code fetch is stalled due to L1 instruction cache miss.") \
+ _ (0x83, 0x04, 0, 0, 0, 0x00, ICACHE_64B, IFTAG_STALL, \
+ "Cycles where a code fetch is stalled due to L1 instruction cache tag " \
+ "miss.") \
_ (0x83, 0x02, 0, 0, 0, 0x00, ICACHE_64B, IFTAG_MISS, \
"Instruction fetch tag lookups that miss in the instruction cache " \
"(L1I). Counts at 64-byte cache-line granularity.") \
- _ (0x9C, 0x01, 0, 0, 0, 0x00, IDQ_UOPS_NOT_DELIVERED, CORE, \
+ _ (0x9C, 0x01, 0, 0, 0, 0x05, IDQ_UOPS_NOT_DELIVERED, CORE, \
"Uops not delivered to Resource Allocation Table (RAT) per thread when " \
"backend of the machine is not stalled") \
+ _ (0x9C, 0x01, 0, 0, 1, 0x01, IDQ_UOPS_NOT_DELIVERED, CYCLES_FE_WAS_OK, \
+ "Cycles with 4 uops delivered by the front end or Resource Allocation " \
+ "Table (RAT) was stalling FE.x") \
+ _ (0x9C, 0x01, 0, 0, 0, 0x01, IDQ_UOPS_NOT_DELIVERED_CYCLES_3_UOP_DELIV, \
+ CORE, "Cycles with 3 uops delivered by the front end.") \
+ _ (0x9C, 0x01, 0, 0, 0, 0x02, IDQ_UOPS_NOT_DELIVERED_CYCLES_2_UOP_DELIV, \
+ CORE, "Cycles with 2 uops delivered by the front end.") \
+ _ (0x9C, 0x01, 0, 0, 0, 0x03, IDQ_UOPS_NOT_DELIVERED_CYCLES_1_UOP_DELIV, \
+ CORE, "Cycles with 1 uops delivered by the front end.") \
+ _ (0x9C, 0x01, 0, 0, 0, 0x04, IDQ_UOPS_NOT_DELIVERED_CYCLES_0_UOP_DELIV, \
+ CORE, "Cycles with 0 uops delivered by the front end.") \
+ _ (0xA1, 0x01, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_0, \
+ "Number of uops executed on port 0") \
+ _ (0xA1, 0x02, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_1, \
+ "Number of uops executed on port 1") \
+ _ (0xA1, 0x04, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_2_3, \
+ "Number of uops executed on port 2 and 3") \
+ _ (0xA1, 0x10, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_4_9, \
+ "Number of uops executed on port 4 and 9") \
+ _ (0xA1, 0x20, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_5, \
+ "Number of uops executed on port 5") \
+ _ (0xA1, 0x40, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_6, \
+ "Number of uops executed on port 6") \
+ _ (0xA1, 0x80, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_7_8, \
+ "Number of uops executed on port 7 and 8") \
+ _ (0xA2, 0x08, 0, 0, 0, 0x00, RESOURCE_STALLS, SB, \
+ "Counts allocation stall cycles caused by the store buffer (SB) being " \
+ "full. This counts cycles that the pipeline back-end blocked uop " \
+ "delivery" \
+ "from the front-end.") \
+ _ (0xA3, 0x04, 0, 0, 0, 0x04, CYCLE_ACTIVITY, STALLS_TOTAL, \
+ "Total execution stalls.") \
+ _ (0xA3, 0x05, 0, 0, 0, 0x05, CYCLE_ACTIVITY, STALLS_L2_MISS, \
+ "Execution stalls while L2 cache miss demand load is outstanding") \
+ _ (0xA3, 0x06, 0, 0, 0, 0x06, CYCLE_ACTIVITY, STALLS_L3_MISS, \
+ "Execution stalls while L3 cache miss demand load is outstanding") \
+ _ (0xA3, 0x0C, 0, 0, 0, 0x0C, CYCLE_ACTIVITY, STALLS_L1D_MISS, \
+ "Execution stalls while L1 cache miss demand load is outstanding") \
+ _ (0xA3, 0x14, 0, 0, 0, 0x14, CYCLE_ACTIVITY, STALLS_MEM_ANY, \
+ "Execution stalls while memory subsystem has an outstanding load.") \
+ _ (0xA6, 0x40, 0, 0, 0, 0x02, EXE_ACTIVITY, BOUND_ON_STORES, \
+ "Cycles where the Store Buffer was full and no loads caused an " \
+ "execution stall.") \
+ _ (0xA8, 0x01, 0, 0, 0, 0x00, LSD, UOPS, \
+ "Counts the number of uops delivered to the back-end by the LSD" \
+ "(Loop Stream Detector)") \
+ _ (0xAB, 0x02, 0, 0, 0, 0x00, DSB2MITE_SWITCHES, PENALTY_CYCLES, \
+ "This event counts fetch penalty cycles when a transition occurs from" \
+ "DSB to MITE.") \
+ _ (0xB1, 0x01, 0, 0, 0, 0x00, UOPS_EXECUTED, THREAD, \
+ "Counts the number of uops to be executed per-thread each cycle.") \
_ (0xC0, 0x00, 0, 0, 0, 0x00, INST_RETIRED, ANY_P, \
"Number of instructions retired. General Counter - architectural event") \
_ (0xC2, 0x02, 0, 0, 0, 0x00, UOPS_RETIRED, RETIRE_SLOTS, \
@@ -109,8 +215,6 @@
"All mispredicted macro branch instructions retired.") \
_ (0xC4, 0x20, 0, 0, 0, 0x00, BR_INST_RETIRED, NEAR_TAKEN, \
"Taken branch instructions retired.") \
- _ (0xD0, 0x81, 0, 0, 0, 0x00, MEM_INST_RETIRED, ALL_LOADS, \
- "All retired load instructions.") \
_ (0xD0, 0x82, 0, 0, 0, 0x00, MEM_INST_RETIRED, ALL_STORES, \
"All retired store instructions.") \
_ (0xD1, 0x01, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L1_HIT, \
@@ -152,10 +256,20 @@
_ (0xD3, 0x08, 0, 0, 0, 0x00, MEM_LOAD_L3_MISS_RETIRED, REMOTE_FWD, \
"Retired load instructions whose data sources was forwarded from a " \
"remote cache") \
+ _ (0xE6, 0x01, 0, 0, 0, 0x00, BACLEARS, ANY, \
+ "Counts the total number when the front end is resteered, mainly when " \
+ "the BPU cannot provide a correct prediction and this is corrected by " \
+ "other branch handling mechanisms at the front end.") \
+ _ (0xEC, 0x02, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, DISTRIBUTED, \
+ "Cycle counts are evenly distributed between active threads in the " \
+ " Core") \
_ (0xF0, 0x40, 0, 0, 0, 0x00, L2_TRANS, L2_WB, \
"L2 writebacks that access L2 cache") \
_ (0xF1, 0x1F, 0, 0, 0, 0x00, L2_LINES_IN, ALL, \
"L2 cache lines filling L2") \
+ _ (0xF4, 0x04, 0, 0, 0, 0x00, SQ_MISC, SQ_FULL, \
+ "Counts the cycles for which the thread is active and the superQ cannot" \
+ "take any more entries.") \
_ (0xFE, 0x02, 0, 0, 0, 0x00, IDI_MISC, WB_UPGRADE, \
"Counts number of cache lines that are allocated and written back to L3" \
" with the intention that they are more likely to be reused shortly") \
@@ -167,9 +281,10 @@ typedef enum
{
#define _(event, umask, edge, any, inv, cmask, name, suffix, desc) \
INTEL_CORE_E_##name##_##suffix,
- foreach_perf_intel_core_event
+ foreach_perf_intel_core_event foreach_perf_intel_peusdo_event
+ foreach_perf_intel_tremont_event
#undef _
- INTEL_CORE_N_EVENTS,
+ INTEL_CORE_N_EVENTS,
} perf_intel_core_event_t;
#endif
diff --git a/src/plugins/perfmon/intel/dispatch_wrapper.c b/src/plugins/perfmon/intel/dispatch_wrapper.c
new file mode 100644
index 00000000000..d424b54b85f
--- /dev/null
+++ b/src/plugins/perfmon/intel/dispatch_wrapper.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "vppinfra/string.h"
+#include <vnet/vnet.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <linux/limits.h>
+#include <sys/ioctl.h>
+
+#include <perfmon/perfmon.h>
+
+vlib_node_function_t *perfmon_dispatch_wrappers[PERF_MAX_EVENTS + 1];
+
+static_always_inline void
+perfmon_read_pmcs (u64 *counters, u32 *indexes, u8 n_counters)
+{
+ for (int i = 0; i < n_counters; i++)
+ counters[i] = _rdpmc (indexes[i] - 1);
+}
+
+static_always_inline uword
+perfmon_dispatch_wrapper_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, u8 n_events)
+{
+ perfmon_main_t *pm = &perfmon_main;
+ perfmon_thread_runtime_t *rt =
+ vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
+ perfmon_node_stats_t *s =
+ vec_elt_at_index (rt->node_stats, node->node_index);
+
+ struct
+ {
+ u64 t[2][PERF_MAX_EVENTS];
+ } samples;
+ uword rv;
+
+ clib_prefetch_load (s);
+
+ perfmon_read_pmcs (&samples.t[0][0], &rt->indexes[0], n_events);
+ rv = node->function (vm, node, frame);
+ perfmon_read_pmcs (&samples.t[1][0], &rt->indexes[0], n_events);
+
+ if (rv == 0)
+ return rv;
+
+ s->n_calls += 1;
+ s->n_packets += rv;
+
+ for (int i = 0; i < n_events; i++)
+ {
+ if (!(rt->preserve_samples & 1 << i))
+ {
+ s->value[i] += samples.t[1][i] - samples.t[0][i];
+ }
+ else
+ {
+ s->t[0].value[i] = samples.t[0][i];
+ s->t[1].value[i] = samples.t[1][i];
+ }
+ }
+
+ return rv;
+}
+
+static_always_inline u32
+perfmon_mmap_read_index (const struct perf_event_mmap_page *mmap_page)
+{
+ u32 idx;
+ u32 seq;
+
+ /* See documentation in /usr/include/linux/perf_event.h, for more details
+ * but the 2 main important things are:
+ * 1) if seq != mmap_page->lock, it means the kernel is currently updating
+ * the user page and we need to read it again
+ * 2) if idx == 0, it means the perf event is currently turned off and we
+ * just need to read the kernel-updated 'offset', otherwise we must also
+ * add the current hw value (hence rdmpc) */
+ do
+ {
+ seq = mmap_page->lock;
+ CLIB_COMPILER_BARRIER ();
+
+ idx = mmap_page->index;
+
+ CLIB_COMPILER_BARRIER ();
+ }
+ while (mmap_page->lock != seq);
+
+ return idx;
+}
+
+static_always_inline clib_error_t *
+read_mmap_indexes (perfmon_bundle_t *b)
+{
+ perfmon_main_t *pm = &perfmon_main;
+ for (int i = 0; i < vec_len (pm->thread_runtimes); i++)
+ {
+ perfmon_thread_runtime_t *tr;
+ tr = vec_elt_at_index (pm->thread_runtimes, i);
+
+ for (int j = 0; j < b->n_events; j++)
+ {
+ tr->indexes[j] = perfmon_mmap_read_index (tr->mmap_pages[j]);
+
+ /* if a zero index is returned generate error */
+ if (!tr->indexes[j])
+ {
+ return clib_error_return (0, "invalid rdpmc index");
+ }
+ }
+ }
+ return 0;
+}
+
+clib_error_t *
+intel_config_dispatch_wrapper (perfmon_bundle_t *b,
+ vlib_node_function_t **dispatch_wrapper)
+{
+ clib_error_t *err = 0;
+ if ((err = read_mmap_indexes (b)) != 0)
+ return err;
+
+ (*dispatch_wrapper) = perfmon_dispatch_wrappers[b->n_events];
+ return 0;
+}
+
+#define foreach_n_events \
+ _ (1) _ (2) _ (3) _ (4) _ (5) _ (6) _ (7) _ (8) _ (9) _ (10) _ (11) _ (12)
+
+#define _(x) \
+ static uword perfmon_dispatch_wrapper##x ( \
+ vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) \
+ { \
+ return perfmon_dispatch_wrapper_inline (vm, node, frame, x); \
+ }
+
+foreach_n_events
+#undef _
+
+ vlib_node_function_t *perfmon_dispatch_wrappers[PERF_MAX_EVENTS + 1] = {
+#define _(x) [x] = &perfmon_dispatch_wrapper##x,
+ foreach_n_events
+#undef _
+ };
diff --git a/src/plugins/perfmon/intel/dispatch_wrapper.h b/src/plugins/perfmon/intel/dispatch_wrapper.h
new file mode 100644
index 00000000000..bcf4885d54d
--- /dev/null
+++ b/src/plugins/perfmon/intel/dispatch_wrapper.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+clib_error_t *
+intel_config_dispatch_wrapper (perfmon_bundle_t *b,
+ vlib_node_function_t **dispatch_wrapper);
diff --git a/src/plugins/perfmon/intel/uncore.c b/src/plugins/perfmon/intel/uncore.c
index e8939cb67c9..316ebb13571 100644
--- a/src/plugins/perfmon/intel/uncore.c
+++ b/src/plugins/perfmon/intel/uncore.c
@@ -15,6 +15,8 @@
#include <vnet/vnet.h>
#include <vppinfra/linux/sysfs.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/unix.h>
#include <perfmon/perfmon.h>
#include <perfmon/intel/core.h>
#include <perfmon/intel/uncore.h>
@@ -35,14 +37,21 @@ VLIB_REGISTER_LOG_CLASS (if_intel_uncore_log, static) = {
((event) | (umask) << 8 | (edge) << 18 | (any) << 21 | (inv) << 23 | \
(cmask) << 24)
+static intel_uncore_unit_type_names_t uncore_unit_names[] = {
+ { INTEL_UNCORE_UNIT_IIO,
+ PERFMON_STRINGS ("PCIe0", "PCIe1", "MCP", "PCIe2", "PCIe3", "CBDMA/DMI") }
+};
+
static perfmon_event_t intel_uncore_events[] = {
-#define _(unit, event, umask, n, suffix, desc) \
+#define _(unit, event, umask, ch_mask, fc_mask, n, suffix, desc) \
[INTEL_UNCORE_E_##unit##_##n##_##suffix] = { \
- .config = (event) | (umask) << 8, \
+ .config = \
+ (event) | (umask) << 8 | (u64) (ch_mask) << 36 | (u64) (fc_mask) << 48, \
.name = #n "." #suffix, \
.description = desc, \
.type_from_instance = 1, \
.instance_type = INTEL_UNCORE_UNIT_##unit, \
+ .implemented = 1, \
},
foreach_intel_uncore_event
@@ -57,6 +66,32 @@ intel_uncore_instance_name_cmp (void *v1, void *v2)
return strcmp (i1->name, i2->name);
}
+static u8 *
+format_instance_name (intel_uncore_unit_type_t u, char *unit_fmt, u8 socket_id,
+ u8 ubox)
+{
+ u8 *s = 0;
+
+ /* uncore ubox may have specific names */
+ for (u8 i = 0; i < ARRAY_LEN (uncore_unit_names); i++)
+ {
+ intel_uncore_unit_type_names_t *n = &uncore_unit_names[i];
+
+ if (n->unit_type == u)
+ {
+ u8 *fmt = 0;
+
+ fmt = format (0, "%s (%s)%c", unit_fmt, (n->unit_names[ubox]), 0);
+ s = format (0, (char *) fmt, socket_id, ubox);
+ vec_free (fmt);
+
+ return s;
+ }
+ }
+
+ return format (0, unit_fmt, socket_id, ubox);
+}
+
static void
intel_uncore_add_unit (perfmon_source_t *src, intel_uncore_unit_type_t u,
char *name, char *type_str, char *fmt,
@@ -94,7 +129,8 @@ intel_uncore_add_unit (perfmon_source_t *src, intel_uncore_unit_type_t u,
in->type = perf_type;
in->cpu = j;
in->pid = -1;
- in->name = (char *) format (0, fmt, socket_by_cpu_id[j], i);
+ in->name =
+ (char *) format_instance_name (u, fmt, socket_by_cpu_id[j], i);
vec_terminate_c_string (in->name);
log_debug ("found %s %s", type_str, in->name);
}
@@ -114,12 +150,9 @@ intel_uncore_init (vlib_main_t *vm, perfmon_source_t *src)
u32 i, j;
u8 *s = 0;
- if ((err = clib_sysfs_read ("/sys/devices/system/node/has_cpu", "%U",
- unformat_bitmap_list, &node_bitmap)))
- {
- clib_error_free (err);
- return clib_error_return (0, "failed to discover numa topology");
- }
+ node_bitmap = os_get_online_cpu_node_bitmap ();
+ if (!node_bitmap)
+ return clib_error_return (0, "failed to discover numa topology");
clib_bitmap_foreach (i, node_bitmap)
{
@@ -132,6 +165,14 @@ intel_uncore_init (vlib_main_t *vm, perfmon_source_t *src)
goto done;
}
+ if (!cpumask)
+ {
+ clib_error_free (err);
+ err = clib_error_return (
+ 0, "while discovering numa topology: cpumask unexpectedly NULL");
+ goto done;
+ }
+
clib_bitmap_foreach (j, cpumask)
{
vec_validate_init_empty (numa_by_cpu_id, j, -1);
@@ -179,4 +220,5 @@ PERFMON_REGISTER_SOURCE (intel_uncore) = {
.n_events = INTEL_UNCORE_N_EVENTS,
.init_fn = intel_uncore_init,
.format_config = format_intel_core_config,
+ .bundle_support = intel_bundle_supported,
};
diff --git a/src/plugins/perfmon/intel/uncore.h b/src/plugins/perfmon/intel/uncore.h
index 03227d6069c..4afbffce858 100644
--- a/src/plugins/perfmon/intel/uncore.h
+++ b/src/plugins/perfmon/intel/uncore.h
@@ -18,7 +18,8 @@
#define foreach_intel_uncore_unit_type \
_ (IMC, "imc", "integrated Memory Controller (iMC)", "iMC%u/%u") \
- _ (UPI, "upi", "Ultra Path Interconnect (UPI)", "UPI%u/%u")
+ _ (UPI, "upi", "Ultra Path Interconnect (UPI)", "UPI%u/%u") \
+ _ (IIO, "iio", "Internal IO (IIO)", "IIO%u/%u")
typedef enum
{
@@ -28,21 +29,60 @@ typedef enum
INTEL_UNCORE_N_UNITS,
} intel_uncore_unit_type_t;
+typedef struct
+{
+ intel_uncore_unit_type_t unit_type;
+ char **unit_names;
+} intel_uncore_unit_type_names_t;
+
#define PERF_INTEL_CODE(event, umask, edge, any, inv, cmask) \
((event) | (umask) << 8 | (edge) << 18 | (any) << 21 | (inv) << 23 | \
(cmask) << 24)
-/* Type, EventCode, UMask, name, suffix, description */
+/* Type, EventCode, UMask, ch_mask, fc_mask, name, suffix, description */
#define foreach_intel_uncore_event \
- _ (IMC, 0x04, 0x03, UNC_M_CAS_COUNT, RD, \
+ _ (IMC, 0x04, 0x03, 0, 0, UNC_M_CAS_COUNT, RD, \
"All DRAM Read CAS Commands issued (including underfills)") \
- _ (IMC, 0x04, 0x0c, UNC_M_CAS_COUNT, WR, \
+ _ (IMC, 0x04, 0x0c, 0, 0, UNC_M_CAS_COUNT, WR, \
"All DRAM Write CAS commands issued") \
- _ (IMC, 0x04, 0x0f, UNC_M_CAS_COUNT, ALL, "All DRAM CAS commands issued")
+ _ (IMC, 0x04, 0x0f, 0, 0, UNC_M_CAS_COUNT, ALL, \
+ "All DRAM CAS commands issued") \
+ _ (IIO, 0x83, 0x01, 0x1, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART0, WR, \
+ "Four byte data request of the CPU : Card writing to DRAM") \
+ _ (IIO, 0x83, 0x01, 0x2, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART1, WR, \
+ "Four byte data request of the CPU : Card writing to DRAM") \
+ _ (IIO, 0x83, 0x01, 0x4, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART2, WR, \
+ "Four byte data request of the CPU : Card writing to DRAM") \
+ _ (IIO, 0x83, 0x01, 0x8, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART3, WR, \
+ "Four byte data request of the CPU : Card writing to DRAM") \
+ _ (IIO, 0x83, 0x04, 0x1, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART0, RD, \
+ "Four byte data request of the CPU : Card reading from DRAM") \
+ _ (IIO, 0x83, 0x04, 0x2, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART1, RD, \
+ "Four byte data request of the CPU : Card reading from DRAM") \
+ _ (IIO, 0x83, 0x04, 0x4, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART2, RD, \
+ "Four byte data request of the CPU : Card reading from DRAM") \
+ _ (IIO, 0x83, 0x04, 0x8, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART3, RD, \
+ "Four byte data request of the CPU : Card reading from DRAM") \
+ _ (IIO, 0xC0, 0x01, 0x1, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART0, WR, \
+ "Data requested by the CPU : Core writing to Card's MMIO space") \
+ _ (IIO, 0xC0, 0x01, 0x2, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART1, WR, \
+ "Data requested by the CPU : Core writing to Card's MMIO space") \
+ _ (IIO, 0xC0, 0x01, 0x4, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART2, WR, \
+ "Data requested by the CPU : Core writing to Card's MMIO space") \
+ _ (IIO, 0xC0, 0x01, 0x8, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART3, WR, \
+ "Data requested by the CPU : Core writing to Card's MMIO space") \
+ _ (IIO, 0x83, 0x80, 0x1, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART0, RD, \
+ "Data requested by the CPU : Core reading from Card's MMIO space") \
+ _ (IIO, 0x83, 0x80, 0x2, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART1, RD, \
+ "Data requested by the CPU : Core reading from Card's MMIO space") \
+ _ (IIO, 0x83, 0x80, 0x4, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART2, RD, \
+ "Data requested by the CPU : Core reading from Card's MMIO space") \
+ _ (IIO, 0x83, 0x80, 0x8, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART3, RD, \
+ "Data requested by the CPU : Core reading from Card's MMIO space")
typedef enum
{
-#define _(unit, event, umask, name, suffix, desc) \
+#define _(unit, event, umask, ch_mask, fc_mask, name, suffix, desc) \
INTEL_UNCORE_E_##unit##_##name##_##suffix,
foreach_intel_uncore_event
#undef _
diff --git a/src/plugins/perfmon/linux.c b/src/plugins/perfmon/linux.c
index 3715267266a..ef21f2d72fd 100644
--- a/src/plugins/perfmon/linux.c
+++ b/src/plugins/perfmon/linux.c
@@ -39,7 +39,12 @@ typedef enum
static perfmon_event_t events[] = {
#define _(n, s) \
- [n] = { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##n, .name = s },
+ [n] = { \
+ .type = PERF_TYPE_SOFTWARE, \
+ .config = PERF_COUNT_SW_##n, \
+ .name = s, \
+ .implemented = 1, \
+ },
foreach_perf_sw_counter
#undef _
};
diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c
index 46c8cf9ca04..e618f9b314a 100644
--- a/src/plugins/perfmon/perfmon.c
+++ b/src/plugins/perfmon/perfmon.c
@@ -70,7 +70,7 @@ perfmon_reset (vlib_main_t *vm)
vec_free (tr->node_stats);
for (int j = 0; j < PERF_MAX_EVENTS; j++)
if (tr->mmap_pages[j])
- munmap (tr->mmap_pages, page_size);
+ munmap (tr->mmap_pages[j], page_size);
}
vec_free (pm->thread_runtimes);
@@ -97,7 +97,7 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
s = b->src;
ASSERT (b->n_events);
- if (b->type == PERFMON_BUNDLE_TYPE_NODE)
+ if (b->active_type == PERFMON_BUNDLE_TYPE_NODE)
is_node = 1;
if (s->instances_by_type == 0)
@@ -141,15 +141,19 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
vec_validate (pm->group_fds, i);
pm->group_fds[i] = -1;
+ u8 n_events_opened = 0;
for (int j = 0; j < b->n_events; j++)
{
int fd;
perfmon_event_t *e = s->events + b->events[j];
+ if (!e->implemented)
+ continue;
struct perf_event_attr pe = {
.size = sizeof (struct perf_event_attr),
.type = e->type_from_instance ? in->type : e->type,
.config = e->config,
+ .config1 = e->config1,
.exclude_kernel = e->exclude_kernel,
.read_format =
(PERF_FORMAT_GROUP | PERF_FORMAT_TOTAL_TIME_ENABLED |
@@ -157,6 +161,7 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
.disabled = 1,
};
+ perf_event_open:
log_debug ("perf_event_open pe.type=%u pe.config=0x%x pid=%d "
"cpu=%d group_fd=%d",
pe.type, pe.config, in->pid, in->cpu, pm->group_fds[i]);
@@ -165,8 +170,17 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
if (fd == -1)
{
- err = clib_error_return_unix (0, "perf_event_open");
- goto error;
+ if (errno ==
+ EOPNOTSUPP) /* 64b counters not supported on aarch64 */
+ {
+ pe.config1 = 2; /* retry with 32b counter width */
+ goto perf_event_open;
+ }
+ else
+ {
+ err = clib_error_return_unix (0, "perf_event_open");
+ goto error;
+ }
}
vec_add1 (pm->fds_to_close, fd);
@@ -178,24 +192,26 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
{
perfmon_thread_runtime_t *tr;
tr = vec_elt_at_index (pm->thread_runtimes, i);
- tr->mmap_pages[j] =
+ tr->mmap_pages[n_events_opened] =
mmap (0, page_size, PROT_READ, MAP_SHARED, fd, 0);
- if (tr->mmap_pages[j] == MAP_FAILED)
+ if (tr->mmap_pages[n_events_opened] == MAP_FAILED)
{
err = clib_error_return_unix (0, "mmap");
goto error;
}
}
+ n_events_opened++;
}
- if (is_node)
+ if (is_node && n_events_opened)
{
perfmon_thread_runtime_t *rt;
rt = vec_elt_at_index (pm->thread_runtimes, i);
rt->bundle = b;
- rt->n_events = b->n_events;
+ rt->n_events = n_events_opened;
rt->n_nodes = n_nodes;
+ rt->preserve_samples = b->preserve_samples;
vec_validate_aligned (rt->node_stats, n_nodes - 1,
CLIB_CACHE_LINE_BYTES);
}
@@ -236,22 +252,20 @@ perfmon_start (vlib_main_t *vm, perfmon_bundle_t *b)
return clib_error_return_unix (0, "ioctl(PERF_EVENT_IOC_ENABLE)");
}
}
- if (b->type == PERFMON_BUNDLE_TYPE_NODE)
+ if (b->active_type == PERFMON_BUNDLE_TYPE_NODE)
{
-
- vlib_node_function_t *funcs[PERFMON_OFFSET_TYPE_MAX];
-#define _(type, pfunc) funcs[type] = pfunc;
-
- foreach_permon_offset_type
-#undef _
-
- ASSERT (funcs[b->offset_type]);
+ vlib_node_function_t *dispatch_wrapper = NULL;
+ err = b->src->config_dispatch_wrapper (b, &dispatch_wrapper);
+ if (err || !dispatch_wrapper)
+ {
+ perfmon_reset (vm);
+ return err;
+ }
for (int i = 0; i < vlib_get_n_threads (); i++)
vlib_node_set_dispatch_wrapper (vlib_get_main_by_index (i),
- funcs[b->offset_type]);
+ dispatch_wrapper);
}
-
pm->sample_time = vlib_time_now (vm);
pm->is_running = 1;
@@ -267,7 +281,7 @@ perfmon_stop (vlib_main_t *vm)
if (pm->is_running != 1)
return clib_error_return (0, "not running");
- if (pm->active_bundle->type == PERFMON_BUNDLE_TYPE_NODE)
+ if (pm->active_bundle->active_type == PERFMON_BUNDLE_TYPE_NODE)
{
for (int i = 0; i < vlib_get_n_threads (); i++)
vlib_node_set_dispatch_wrapper (vlib_get_main_by_index (i), 0);
@@ -311,7 +325,7 @@ perfmon_init (vlib_main_t *vm)
}
hash_set_mem (pm->source_by_name, s->name, s);
- log_debug ("source '%s' regisrtered", s->name);
+ log_debug ("source '%s' registered", s->name);
s = s->next;
}
@@ -320,8 +334,6 @@ perfmon_init (vlib_main_t *vm)
{
clib_error_t *err;
uword *p;
- if (hash_get_mem (pm->bundle_by_name, b->name) != 0)
- clib_panic ("duplicate bundle name '%s'", b->name);
if ((p = hash_get_mem (pm->source_by_name, b->source)) == 0)
{
@@ -332,6 +344,13 @@ perfmon_init (vlib_main_t *vm)
}
b->src = (perfmon_source_t *) p[0];
+ if (b->src->bundle_support && !b->src->bundle_support (b))
+ {
+ log_debug ("skipping bundle '%s' - not supported", b->name);
+ b = b->next;
+ continue;
+ }
+
if (b->init_fn && ((err = (b->init_fn) (vm, b))))
{
log_warn ("skipping bundle '%s' - %U", b->name, format_clib_error,
@@ -341,8 +360,11 @@ perfmon_init (vlib_main_t *vm)
continue;
}
+ if (hash_get_mem (pm->bundle_by_name, b->name) != 0)
+ clib_panic ("duplicate bundle name '%s'", b->name);
+
hash_set_mem (pm->bundle_by_name, b->name, b);
- log_debug ("bundle '%s' regisrtered", b->name);
+ log_debug ("bundle '%s' registered", b->name);
b = b->next;
}
diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h
index bba22cf6b1d..b76cf4b2138 100644
--- a/src/plugins/perfmon/perfmon.h
+++ b/src/plugins/perfmon/perfmon.h
@@ -23,7 +23,19 @@
#include <vppinfra/cpu.h>
#include <vlib/vlib.h>
-#define PERF_MAX_EVENTS 7 /* 3 fixed and 4 programmable */
+#if defined(__x86_64__)
+#define PERF_MAX_EVENTS 12 /* 4 fixed and 8 programable on ICX */
+#elif defined(__aarch64__)
+#define PERF_MAX_EVENTS 7 /* 6 events + 1 CPU cycle counter */
+#endif
+
+typedef enum
+{
+ PERFMON_EVENT_TYPE_GENERAL,
+ PERFMON_EVENT_TYPE_FIXED,
+ PERFMON_EVENT_TYPE_PSEUDO,
+ PERFMON_EVENT_TYPE_MAX,
+} perfmon_event_type_t;
typedef enum
{
@@ -31,19 +43,30 @@ typedef enum
PERFMON_BUNDLE_TYPE_NODE,
PERFMON_BUNDLE_TYPE_THREAD,
PERFMON_BUNDLE_TYPE_SYSTEM,
+ PERFMON_BUNDLE_TYPE_MAX,
+ PERFMON_BUNDLE_TYPE_NODE_OR_THREAD,
} perfmon_bundle_type_t;
+#define foreach_perfmon_bundle_type \
+ _ (PERFMON_BUNDLE_TYPE_UNKNOWN, "not supported") \
+ _ (PERFMON_BUNDLE_TYPE_NODE, "node") \
+ _ (PERFMON_BUNDLE_TYPE_THREAD, "thread") \
+ _ (PERFMON_BUNDLE_TYPE_SYSTEM, "system")
+
typedef enum
{
- PERFMON_OFFSET_TYPE_MMAP,
- PERFMON_OFFSET_TYPE_METRICS,
- PERFMON_OFFSET_TYPE_MAX,
-} perfmon_offset_type_t;
+#define _(e, str) e##_FLAG = 1 << e,
+ foreach_perfmon_bundle_type
+#undef _
+
+} perfmon_bundle_type_flag_t;
typedef struct
{
u32 type_from_instance : 1;
u32 exclude_kernel : 1;
+ u32 config1 : 2;
+ u32 implemented : 1;
union
{
u32 type;
@@ -69,15 +92,15 @@ typedef struct
} perfmon_instance_type_t;
struct perfmon_source;
-vlib_node_function_t perfmon_dispatch_wrapper_mmap;
-vlib_node_function_t perfmon_dispatch_wrapper_metrics;
-
-#define foreach_permon_offset_type \
- _ (PERFMON_OFFSET_TYPE_MMAP, perfmon_dispatch_wrapper_mmap) \
- _ (PERFMON_OFFSET_TYPE_METRICS, perfmon_dispatch_wrapper_metrics)
+typedef struct perfmon_bundle perfmon_bundle_t;
typedef clib_error_t *(perfmon_source_init_fn_t) (vlib_main_t *vm,
struct perfmon_source *);
+typedef perfmon_event_type_t (perfmon_source_get_event_type) (u32 event);
+typedef u8 (perfmon_source_bundle_support_t) (perfmon_bundle_t *);
+typedef clib_error_t *(perfmon_source_config_dispatch_wrapper_t) (
+ perfmon_bundle_t *b, vlib_node_function_t **dispatch_wrapper);
+
typedef struct perfmon_source
{
char *name;
@@ -87,31 +110,52 @@ typedef struct perfmon_source
u32 n_events;
perfmon_instance_type_t *instances_by_type;
format_function_t *format_config;
+ perfmon_source_get_event_type *get_event_type;
perfmon_source_init_fn_t *init_fn;
+ perfmon_source_bundle_support_t *bundle_support;
+ perfmon_source_config_dispatch_wrapper_t *config_dispatch_wrapper;
} perfmon_source_t;
-struct perfmon_bundle;
-
typedef clib_error_t *(perfmon_bundle_init_fn_t) (vlib_main_t *vm,
struct perfmon_bundle *);
+typedef struct
+{
+ clib_cpu_supports_func_t cpu_supports;
+ perfmon_bundle_type_t bundle_type;
+} perfmon_cpu_supports_t;
+
typedef struct perfmon_bundle
{
char *name;
char *description;
char *source;
char *footer;
- perfmon_bundle_type_t type;
- perfmon_offset_type_t offset_type;
+
+ union
+ {
+ perfmon_bundle_type_flag_t type_flags;
+ perfmon_bundle_type_t type;
+ };
+ perfmon_bundle_type_t active_type;
+
u32 events[PERF_MAX_EVENTS];
- u32 metrics[PERF_MAX_EVENTS];
u32 n_events;
+ u32 n_columns;
+
+ uword *event_disabled;
+ uword *column_disabled;
+ u8 *column_events;
+
+ u16 preserve_samples;
+
+ perfmon_cpu_supports_t *cpu_supports;
+ u32 n_cpu_supports;
perfmon_bundle_init_fn_t *init_fn;
char **column_headers;
format_function_t *format_fn;
- clib_cpu_supports_func_t cpu_supports;
/* do not set manually */
perfmon_source_t *src;
@@ -147,6 +191,8 @@ typedef struct
u16 n_nodes;
perfmon_node_stats_t *node_stats;
perfmon_bundle_t *bundle;
+ u32 indexes[PERF_MAX_EVENTS];
+ u16 preserve_samples;
struct perf_event_mmap_page *mmap_pages[PERF_MAX_EVENTS];
} perfmon_thread_runtime_t;
@@ -168,6 +214,41 @@ typedef struct
extern perfmon_main_t perfmon_main;
+#define PERFMON_BUNDLE_TYPE_TO_FLAGS(type) \
+ ({ \
+ uword rtype = 0; \
+ if (type == PERFMON_BUNDLE_TYPE_NODE_OR_THREAD) \
+ rtype = \
+ 1 << PERFMON_BUNDLE_TYPE_THREAD | 1 << PERFMON_BUNDLE_TYPE_NODE; \
+ else \
+ rtype = 1 << type; \
+ rtype; \
+ })
+
+always_inline uword
+perfmon_cpu_update_bundle_type (perfmon_bundle_t *b)
+{
+ perfmon_cpu_supports_t *supports = b->cpu_supports;
+ uword type = 0;
+
+ /* either supports or b->type should be set, but not both */
+ ASSERT (!!supports ^ !!b->type);
+
+ /* if nothing specific for this bundle, go with the defaults */
+ if (!supports)
+ type = PERFMON_BUNDLE_TYPE_TO_FLAGS (b->type);
+ else
+ {
+ /* more than one type may be supported by a given bundle */
+ for (int i = 0; i < b->n_cpu_supports; ++i)
+ if (supports[i].cpu_supports ())
+ type |= PERFMON_BUNDLE_TYPE_TO_FLAGS (supports[i].bundle_type);
+ }
+
+ return type;
+}
+#undef PERFMON_BUNDLE_TYPE_TO_FLAGS
+
#define PERFMON_REGISTER_SOURCE(x) \
perfmon_source_t __perfmon_source_##x; \
static void __clib_constructor __perfmon_source_registration_##x (void) \
@@ -184,6 +265,8 @@ extern perfmon_main_t perfmon_main;
{ \
perfmon_main_t *pm = &perfmon_main; \
__perfmon_bundle_##x.next = pm->bundles; \
+ __perfmon_bundle_##x.type_flags = \
+ perfmon_cpu_update_bundle_type (&__perfmon_bundle_##x); \
pm->bundles = &__perfmon_bundle_##x; \
} \
perfmon_bundle_t __perfmon_bundle_##x
@@ -195,4 +278,7 @@ clib_error_t *perfmon_stop (vlib_main_t *vm);
#define PERFMON_STRINGS(...) \
(char *[]) { __VA_ARGS__, 0 }
+#define PERFMON_COLUMN_EVENTS(...) \
+ (u8[]) { __VA_ARGS__ }
+
#endif
diff --git a/src/plugins/perfmon/table.c b/src/plugins/perfmon/table.c
deleted file mode 100644
index e3fc0982bb0..00000000000
--- a/src/plugins/perfmon/table.c
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- Copyright (c) 2020 Damjan Marion
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-#include <vppinfra/format.h>
-#include "table.h"
-
-static table_text_attr_t default_title = {
- .flags = TTAF_FG_COLOR_SET | TTAF_BOLD,
- .fg_color = TTAC_YELLOW,
- .align = TTAA_CENTER,
-};
-
-static table_text_attr_t default_body = {
- .align = TTAA_RIGHT,
-};
-
-static table_text_attr_t default_header_col = {
- .flags = TTAF_FG_COLOR_SET,
- .fg_color = TTAC_YELLOW,
- .align = TTAA_CENTER,
-};
-
-static table_text_attr_t default_header_row = {
- .flags = TTAF_FG_COLOR_SET | TTAF_BOLD,
- .fg_color = TTAC_GREEN,
- .align = TTAA_LEFT,
-};
-
-u8 *
-format_text_cell (table_t *t, u8 *s, table_cell_t *c, table_text_attr_t *def,
- int size)
-{
- table_text_attr_t _a = {}, *a = &_a;
-
- if (a == 0)
- return format (s, t->no_ansi ? "" : "\x1b[0m");
-
- clib_memcpy (a, def, sizeof (table_text_attr_t));
-
- if (t->no_ansi == 0)
- {
- int *codes = 0;
- if (c->attr.flags & TTAF_FG_COLOR_SET)
- {
- a->fg_color = c->attr.fg_color;
- a->flags |= TTAF_FG_COLOR_SET;
- }
-
- if (c->attr.flags & TTAF_BG_COLOR_SET)
- {
- a->bg_color = c->attr.bg_color;
- a->flags |= TTAF_BG_COLOR_SET;
- }
-
- if (a->flags & TTAF_RESET)
- vec_add1 (codes, 0);
-
- if (a->flags & TTAF_BOLD)
- vec_add1 (codes, 1);
-
- if (a->flags & TTAF_DIM)
- vec_add1 (codes, 2);
-
- if (a->flags & TTAF_UNDERLINE)
- vec_add1 (codes, 4);
-
- if (a->flags & TTAF_FG_COLOR_SET)
- vec_add1 (codes,
- (a->flags & TTAF_FG_COLOR_BRIGHT ? 90 : 30) + a->fg_color);
-
- if (a->flags & TTAF_BG_COLOR_SET)
- vec_add1 (codes,
- (a->flags & TTAF_BG_COLOR_BRIGHT ? 100 : 40) + a->bg_color);
-
- if (codes)
- {
- s = format (s, "\x1b[");
- for (int i = 0; i < vec_len (codes); i++)
- s = format (s, "%s%u", i ? ";" : "", codes[i]);
- s = format (s, "m");
- vec_free (codes);
- }
- }
-
- u8 *fmt = 0;
- table_text_attr_align_t align = c->attr.align;
- if (align == TTAA_DEFAULT)
- align = a->align;
- if (align == TTAA_LEFT)
- fmt = format (fmt, "%%-%uv%c", size, 0);
- else if (align == TTAA_CENTER)
- fmt = format (fmt, "%%=%uv%c", size, 0);
- else
- fmt = format (fmt, "%%%uv%c", size, 0);
- s = format (s, (char *) fmt, c->text);
- vec_free (fmt);
- return format (s, t->no_ansi ? "" : "\x1b[0m");
-}
-
-u8 *
-format_table (u8 *s, va_list *args)
-{
- table_t *t = va_arg (*args, table_t *);
- table_cell_t title_cell = { .text = t->title };
- int table_width = 0;
- for (int i = 0; i < vec_len (t->row_sizes); i++)
- table_width += t->row_sizes[i];
-
- s = format_text_cell (t, s, &title_cell, &default_title, table_width);
- s = format (s, "\n");
-
- for (int c = 0; c < vec_len (t->cells); c++)
- {
- table_text_attr_t *col_default;
-
- if (c < t->n_header_cols)
- col_default = &default_header_col;
- else
- col_default = &default_body;
-
- for (int r = 0; r < vec_len (t->cells[c]); r++)
- {
- table_text_attr_t *row_default = col_default;
- if (r)
- s = format (s, " ");
- if (r < t->n_header_rows && c >= t->n_header_cols)
- row_default = &default_header_row;
- s = format_text_cell (t, s, &t->cells[c][r], row_default,
- t->row_sizes[r]);
- }
- s = format (s, "\n");
- }
-
- return s;
-}
-
-void
-table_format_title (table_t *t, char *fmt, ...)
-{
- va_list va;
-
- va_start (va, fmt);
- t->title = va_format (t->title, fmt, &va);
- va_end (va);
-}
-
-static table_cell_t *
-table_get_cell (table_t *t, int c, int r)
-{
- c += t->n_header_cols;
- r += t->n_header_rows;
-
- /* grow table if needed */
- vec_validate (t->cells, c);
- for (int i = 0; i < vec_len (t->cells); i++)
- vec_validate (t->cells[i], r);
- return &t->cells[c][r];
-}
-
-void
-table_format_cell (table_t *t, int c, int r, char *fmt, ...)
-{
- table_cell_t *cell = table_get_cell (t, c, r);
- va_list va;
-
- c += t->n_header_cols;
- r += t->n_header_rows;
-
- va_start (va, fmt);
- cell->text = va_format (t->cells[c][r].text, fmt, &va);
- va_end (va);
-
- vec_validate (t->row_sizes, r);
- t->row_sizes[r] = clib_max (t->row_sizes[r], vec_len (t->cells[c][r].text));
-}
-
-void
-table_set_cell_align (table_t *t, int c, int r, table_text_attr_align_t a)
-{
- table_cell_t *cell = table_get_cell (t, c, r);
- cell->attr.align = a;
-}
-
-void
-table_set_cell_fg_color (table_t *t, int c, int r, table_text_attr_color_t v)
-{
- table_cell_t *cell = table_get_cell (t, c, r);
- cell->attr.fg_color = v;
- cell->attr.flags |= TTAF_FG_COLOR_SET;
-}
-
-void
-table_set_cell_bg_color (table_t *t, int c, int r, table_text_attr_color_t v)
-{
- table_cell_t *cell = table_get_cell (t, c, r);
- cell->attr.bg_color = v;
- cell->attr.flags |= TTAF_BG_COLOR_SET;
-}
-
-void
-table_free (table_t *t)
-{
- for (int c = 0; c < vec_len (t->cells); c++)
- {
- for (int r = 0; r < vec_len (t->cells[c]); r++)
- vec_free (t->cells[c][r].text);
- vec_free (t->cells[c]);
- }
- vec_free (t->cells);
- vec_free (t->row_sizes);
- vec_free (t->title);
- clib_memset (t, 0, sizeof (table_t));
-}
-
-void
-table_add_header_col (table_t *t, int n_strings, ...)
-{
- va_list arg;
- int r, c = t->n_header_cols++;
- int n_rows;
-
- vec_insert (t->cells, 1, c);
- n_rows = clib_max (n_strings, 1);
- n_rows = clib_max (vec_len (t->row_sizes), n_rows);
- vec_validate (t->cells[c], n_rows - 1);
-
- va_start (arg, n_strings);
- for (r = 0; r < n_rows; r++)
- {
- if (n_strings-- > 0)
- table_format_cell (t, -1, r - t->n_header_rows, "%s",
- va_arg (arg, char *));
- }
- va_end (arg);
-}
-
-void
-table_add_header_row (table_t *t, int n_strings, ...)
-{
- va_list arg;
- int c, r = t->n_header_rows++;
-
- vec_validate (t->cells, n_strings + t->n_header_cols - 1);
-
- va_start (arg, n_strings);
- for (c = t->n_header_cols; c < vec_len (t->cells); c++)
- {
- vec_insert (t->cells[c + t->n_header_cols], 1, r);
- if (n_strings-- > 0)
- table_format_cell (t, c, -1, "%s", va_arg (arg, char *));
- }
- va_end (arg);
-}
diff --git a/src/plugins/perfmon/table.h b/src/plugins/perfmon/table.h
deleted file mode 100644
index 93102a033f0..00000000000
--- a/src/plugins/perfmon/table.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- Copyright (c) 2020 Damjan Marion
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-#ifndef __table_h__
-#define __table_h__
-
-typedef enum
-{
- TTAF_RESET = (1 << 0),
- TTAF_BOLD = (1 << 1),
- TTAF_DIM = (1 << 2),
- TTAF_UNDERLINE = (1 << 3),
- TTAF_FG_COLOR_SET = (1 << 4),
- TTAF_BG_COLOR_SET = (1 << 5),
- TTAF_FG_COLOR_BRIGHT = (1 << 6),
- TTAF_BG_COLOR_BRIGHT = (1 << 7),
-} table_text_attr_flags_t;
-
-typedef enum
-{
- TTAC_BLACK = 0,
- TTAC_RED = 1,
- TTAC_GREEN = 2,
- TTAC_YELLOW = 3,
- TTAC_BLUE = 4,
- TTAC_MAGENTA = 5,
- TTAC_CYAN = 6,
- TTAC_WHITE = 7,
-} table_text_attr_color_t;
-
-typedef enum
-{
- TTAA_DEFAULT = 0,
- TTAA_LEFT = 1,
- TTAA_RIGHT = 2,
- TTAA_CENTER = 3,
-} table_text_attr_align_t;
-
-typedef struct
-{
- table_text_attr_flags_t flags : 16;
- table_text_attr_color_t fg_color : 4;
- table_text_attr_color_t bg_color : 4;
- table_text_attr_align_t align : 4;
-} table_text_attr_t;
-
-typedef struct
-{
- table_text_attr_t attr;
- u8 *text;
-} table_cell_t;
-
-typedef struct
-{
- u8 no_ansi : 1;
- u8 *title;
- table_cell_t **cells;
- int *row_sizes;
- int n_header_cols;
- int n_header_rows;
- int n_footer_cols;
-} table_t;
-
-format_function_t format_table;
-
-void table_format_title (table_t *t, char *fmt, ...);
-void table_format_cell (table_t *t, int c, int r, char *fmt, ...);
-void table_set_cell_align (table_t *t, int c, int r,
- table_text_attr_align_t a);
-void table_set_cell_fg_color (table_t *t, int c, int r,
- table_text_attr_color_t v);
-void table_set_cell_bg_color (table_t *t, int c, int r,
- table_text_attr_color_t v);
-void table_free (table_t *t);
-void table_add_header_col (table_t *t, int n_strings, ...);
-void table_add_header_row (table_t *t, int n_strings, ...);
-
-#endif
diff --git a/src/plugins/ping/CMakeLists.txt b/src/plugins/ping/CMakeLists.txt
index 2828f769fcc..d0040ff373a 100644
--- a/src/plugins/ping/CMakeLists.txt
+++ b/src/plugins/ping/CMakeLists.txt
@@ -14,4 +14,9 @@
add_vpp_plugin(ping
SOURCES
ping.c
+ ping.h
+ ping_api.c
+
+ API_FILES
+ ping.api
)
diff --git a/src/plugins/l2e/l2e.api b/src/plugins/ping/ping.api
index 586e2bae5ca..4cf043f5c31 100644
--- a/src/plugins/l2e/l2e.api
+++ b/src/plugins/ping/ping.api
@@ -1,6 +1,6 @@
/* Hey Emacs use -*- mode: C -*- */
/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2023 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -14,22 +14,29 @@
* limitations under the License.
*/
-option version = "1.0.0";
+option version = "0.1.0";
import "vnet/interface_types.api";
+import "vnet/ip/ip_types.api";
-/** \brief L2 emulation at L3
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param sw_if_index - interface the operation is applied to
- @param enable - Turn the service on or off
-*/
-autoreply define l2_emulation
+autoreply define want_ping_finished_events
{
- option status="in_progress";
u32 client_index;
u32 context;
- vl_api_interface_index_t sw_if_index;
- bool enable;
+ vl_api_address_t address;
+ u32 repeat [default=1];
+ f64 interval [default=1.0];
+};
+
+define ping_finished_event
+{
+ u32 client_index;
+ u32 request_count;
+ u32 reply_count;
+};
+
+service {
+ rpc want_ping_finished_events returns want_ping_finished_events_reply
+ events ping_finished_event;
};
/*
diff --git a/src/plugins/ping/ping.c b/src/plugins/ping/ping.c
index d09babd0be2..40e4495aaf2 100644
--- a/src/plugins/ping/ping.c
+++ b/src/plugins/ping/ping.c
@@ -19,8 +19,9 @@
#include <vlib/unix/unix.h>
#include <vnet/fib/ip6_fib.h>
#include <vnet/fib/ip4_fib.h>
-#include <vnet/fib/fib_sas.h>
+#include <vnet/ip/ip_sas.h>
#include <vnet/ip/ip6_link.h>
+#include <vnet/ip/ip6_ll_table.h>
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
@@ -98,70 +99,6 @@ format_ip46_ping_result (u8 * s, va_list * args)
*
*/
-
-static_always_inline uword
-get_cli_process_id_by_icmp_id_mt (vlib_main_t * vm, u16 icmp_id)
-{
- ping_main_t *pm = &ping_main;
- uword cli_process_id = PING_CLI_UNKNOWN_NODE;
- ping_run_t *pr;
-
- clib_spinlock_lock_if_init (&pm->ping_run_check_lock);
- vec_foreach (pr, pm->active_ping_runs)
- {
- if (pr->icmp_id == icmp_id)
- {
- cli_process_id = pr->cli_process_id;
- break;
- }
- }
- clib_spinlock_unlock_if_init (&pm->ping_run_check_lock);
- return cli_process_id;
-}
-
-
-static_always_inline void
-set_cli_process_id_by_icmp_id_mt (vlib_main_t * vm, u16 icmp_id,
- uword cli_process_id)
-{
- ping_main_t *pm = &ping_main;
- ping_run_t *pr;
-
- clib_spinlock_lock_if_init (&pm->ping_run_check_lock);
- vec_foreach (pr, pm->active_ping_runs)
- {
- if (pr->icmp_id == icmp_id)
- {
- pr->cli_process_id = cli_process_id;
- goto have_found_and_set;
- }
- }
- /* no such key yet - add a new one */
- ping_run_t new_pr = {.icmp_id = icmp_id,.cli_process_id = cli_process_id };
- vec_add1 (pm->active_ping_runs, new_pr);
-have_found_and_set:
- clib_spinlock_unlock_if_init (&pm->ping_run_check_lock);
-}
-
-
-static_always_inline void
-clear_cli_process_id_by_icmp_id_mt (vlib_main_t * vm, u16 icmp_id)
-{
- ping_main_t *pm = &ping_main;
- ping_run_t *pr;
-
- clib_spinlock_lock_if_init (&pm->ping_run_check_lock);
- vec_foreach (pr, pm->active_ping_runs)
- {
- if (pr->icmp_id == icmp_id)
- {
- vec_del1 (pm->active_ping_runs, pm->active_ping_runs - pr);
- break;
- }
- }
- clib_spinlock_unlock_if_init (&pm->ping_run_check_lock);
-}
-
static_always_inline int
ip46_get_icmp_id_and_seq (vlib_main_t * vm, vlib_buffer_t * b0,
u16 * out_icmp_id, u16 * out_icmp_seq, int is_ip6)
@@ -338,7 +275,6 @@ ip6_icmp_echo_reply_node_fn (vlib_main_t * vm,
1 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_echo_reply_node, static) =
{
.function = ip6_icmp_echo_reply_node_fn,
@@ -364,7 +300,6 @@ VLIB_REGISTER_NODE (ip4_icmp_echo_reply_node, static) =
[ICMP46_ECHO_REPLY_NEXT_PUNT] = "ip4-punt",
},
};
-/* *INDENT-ON* */
static uword
ip4_icmp_echo_request (vlib_main_t * vm,
@@ -559,7 +494,6 @@ format_icmp_input_trace (u8 * s, va_list * va)
return s;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = {
.function = ip4_icmp_echo_request,
.name = "ip4-icmp-echo-request",
@@ -573,7 +507,200 @@ VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = {
[0] = "ip4-load-balance",
},
};
-/* *INDENT-ON* */
+
+typedef enum
+{
+ ICMP6_ECHO_REQUEST_NEXT_LOOKUP,
+ ICMP6_ECHO_REQUEST_NEXT_OUTPUT,
+ ICMP6_ECHO_REQUEST_N_NEXT,
+} icmp6_echo_request_next_t;
+
+static uword
+ip6_icmp_echo_request (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+ ip6_main_t *im = &ip6_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 2 && n_left_to_next > 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+ icmp46_header_t *icmp0, *icmp1;
+ ip6_address_t tmp0, tmp1;
+ ip_csum_t sum0, sum1;
+ u32 bi0, bi1;
+ u32 fib_index0, fib_index1;
+ u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+ u32 next1 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ p1 = vlib_get_buffer (vm, bi1);
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ icmp0 = ip6_next_header (ip0);
+ icmp1 = ip6_next_header (ip1);
+
+ /* Check icmp type to echo reply and update icmp checksum. */
+ sum0 = icmp0->checksum;
+ sum1 = icmp1->checksum;
+
+ ASSERT (icmp0->type == ICMP6_echo_request);
+ ASSERT (icmp1->type == ICMP6_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+ sum1 = ip_csum_update (sum1, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+
+ icmp0->checksum = ip_csum_fold (sum0);
+ icmp1->checksum = ip_csum_fold (sum1);
+
+ icmp0->type = ICMP6_echo_reply;
+ icmp1->type = ICMP6_echo_reply;
+
+ /* Swap source and destination address. */
+ tmp0 = ip0->src_address;
+ tmp1 = ip1->src_address;
+
+ ip0->src_address = ip0->dst_address;
+ ip1->src_address = ip1->dst_address;
+
+ ip0->dst_address = tmp0;
+ ip1->dst_address = tmp1;
+
+ /* New hop count. */
+ ip0->hop_limit = im->host_config.ttl;
+ ip1->hop_limit = im->host_config.ttl;
+
+ if (ip6_address_is_link_local_unicast (&ip0->src_address) &&
+ !ip6_address_is_link_local_unicast (&ip0->dst_address))
+ {
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
+ }
+ if (ip6_address_is_link_local_unicast (&ip1->src_address) &&
+ !ip6_address_is_link_local_unicast (&ip1->dst_address))
+ {
+ fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = fib_index1;
+ }
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ p1->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done
+ */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ icmp46_header_t *icmp0;
+ u32 bi0;
+ ip6_address_t tmp0;
+ ip_csum_t sum0;
+ u32 fib_index0;
+ u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ icmp0 = ip6_next_header (ip0);
+
+ /* Check icmp type to echo reply and update icmp checksum. */
+ sum0 = icmp0->checksum;
+
+ ASSERT (icmp0->type == ICMP6_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ icmp0->type = ICMP6_echo_reply;
+
+ /* Swap source and destination address. */
+ tmp0 = ip0->src_address;
+ ip0->src_address = ip0->dst_address;
+ ip0->dst_address = tmp0;
+
+ ip0->hop_limit = im->host_config.ttl;
+
+ if (ip6_address_is_link_local_unicast (&ip0->src_address) &&
+ !ip6_address_is_link_local_unicast (&ip0->dst_address))
+ {
+ /* if original packet was to the link local, then the
+ * fib index is that of the LL table, we can't use that
+ * to foward the response if the new destination
+ * is global, so reset to the fib index of the link.
+ * In other case, the fib index we need has been written
+ * to the buffer already. */
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
+ }
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_error_count (vm, ip6_icmp_input_node.index,
+ ICMP6_ERROR_ECHO_REPLIES_SENT, frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip6_icmp_echo_request_node,static) = {
+ .function = ip6_icmp_echo_request,
+ .name = "ip6-icmp-echo-request",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = ICMP6_ECHO_REQUEST_N_NEXT,
+ .next_nodes = {
+ [ICMP6_ECHO_REQUEST_NEXT_LOOKUP] = "ip6-lookup",
+ [ICMP6_ECHO_REQUEST_NEXT_OUTPUT] = "interface-output",
+ },
+};
/*
* A swarm of address-family agnostic helper functions
@@ -682,13 +809,16 @@ ip46_get_resolving_interface (u32 fib_index, ip46_address_t * pa46,
}
static u32
-ip46_fib_table_get_index_for_sw_if_index (u32 sw_if_index, int is_ip6)
+ip46_fib_table_get_index_for_sw_if_index (u32 sw_if_index, int is_ip6,
+ ip46_address_t *pa46)
{
- u32 fib_table_index = is_ip6 ?
- ip6_fib_table_get_index_for_sw_if_index (sw_if_index) :
- ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
- return fib_table_index;
-
+ if (is_ip6)
+ {
+ if (ip6_address_is_link_local_unicast (&pa46->ip6))
+ return ip6_ll_fib_get (sw_if_index);
+ return ip6_fib_table_get_index_for_sw_if_index (sw_if_index);
+ }
+ return ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
}
@@ -735,13 +865,15 @@ ip46_set_src_address (u32 sw_if_index, vlib_buffer_t * b0, int is_ip6)
{
ip6_header_t *ip6 = vlib_buffer_get_current (b0);
- res = fib_sas6_get (sw_if_index, &ip6->dst_address, &ip6->src_address);
+ res = ip6_sas_by_sw_if_index (sw_if_index, &ip6->dst_address,
+ &ip6->src_address);
}
else
{
ip4_header_t *ip4 = vlib_buffer_get_current (b0);
- res = fib_sas4_get (sw_if_index, &ip4->dst_address, &ip4->src_address);
+ res = ip4_sas_by_sw_if_index (sw_if_index, &ip4->dst_address,
+ &ip4->src_address);
}
return res;
}
@@ -870,12 +1002,10 @@ at_most_a_frame (u32 count)
}
static int
-ip46_enqueue_packet (vlib_main_t * vm, vlib_buffer_t * b0, u32 burst,
- int is_ip6)
+ip46_enqueue_packet (vlib_main_t *vm, vlib_buffer_t *b0, u32 burst,
+ u32 lookup_node_index)
{
vlib_frame_t *f = 0;
- u32 lookup_node_index =
- is_ip6 ? ip6_lookup_node.index : ip4_lookup_node.index;
int n_sent = 0;
u16 n_to_send;
@@ -978,7 +1108,7 @@ send_ip46_ping (vlib_main_t * vm,
}
else
fib_index =
- ip46_fib_table_get_index_for_sw_if_index (sw_if_index, is_ip6);
+ ip46_fib_table_get_index_for_sw_if_index (sw_if_index, is_ip6, pa46);
if (~0 == fib_index)
ERROR_OUT (SEND_PING_NO_TABLE);
@@ -1002,7 +1132,23 @@ send_ip46_ping (vlib_main_t * vm,
ip46_fix_len_and_csum (vm, l4_header_offset, data_len, b0, is_ip6);
- int n_sent = ip46_enqueue_packet (vm, b0, burst, is_ip6);
+ u32 node_index = ip6_lookup_node.index;
+ if (is_ip6)
+ {
+ if (pa46->ip6.as_u32[0] == clib_host_to_net_u32 (0xff020000))
+ {
+ node_index = ip6_rewrite_mcast_node.index;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ ip6_link_get_mcast_adj (sw_if_index);
+ }
+ }
+ else
+ {
+ node_index = ip4_lookup_node.index;
+ }
+ int n_sent = ip46_enqueue_packet (vm, b0, burst, node_index);
if (n_sent < burst)
err = SEND_PING_NO_BUFFERS;
@@ -1015,9 +1161,8 @@ done:
return err;
}
-static send_ip46_ping_result_t
-send_ip6_ping (vlib_main_t * vm,
- u32 table_id, ip6_address_t * pa6,
+send_ip46_ping_result_t
+send_ip6_ping (vlib_main_t *vm, u32 table_id, ip6_address_t *pa6,
u32 sw_if_index, u16 seq_host, u16 id_host, u16 data_len,
u32 burst, u8 verbose)
{
@@ -1027,9 +1172,8 @@ send_ip6_ping (vlib_main_t * vm,
id_host, data_len, burst, verbose, 1 /* is_ip6 */ );
}
-static send_ip46_ping_result_t
-send_ip4_ping (vlib_main_t * vm,
- u32 table_id, ip4_address_t * pa4,
+send_ip46_ping_result_t
+send_ip4_ping (vlib_main_t *vm, u32 table_id, ip4_address_t *pa4,
u32 sw_if_index, u16 seq_host, u16 id_host, u16 data_len,
u32 burst, u8 verbose)
{
@@ -1432,7 +1576,6 @@ done:
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ping_command, static) =
{
.path = "ping",
@@ -1443,7 +1586,6 @@ VLIB_CLI_COMMAND (ping_command, static) =
" [burst <count:1>] [verbose]",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
ping_cli_init (vlib_main_t * vm)
@@ -1461,18 +1603,20 @@ ping_cli_init (vlib_main_t * vm)
ip4_icmp_register_type (vm, ICMP4_echo_request,
ip4_icmp_echo_request_node.index);
+ icmp6_register_type (vm, ICMP6_echo_request,
+ ip6_icmp_echo_request_node.index);
+
+ ping_plugin_api_hookup (vm);
return 0;
}
VLIB_INIT_FUNCTION (ping_cli_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Ping (ping)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ping/ping.h b/src/plugins/ping/ping.h
index 7826945ea8b..fdccd07b57d 100644
--- a/src/plugins/ping/ping.h
+++ b/src/plugins/ping/ping.h
@@ -52,6 +52,9 @@ typedef struct ping_run_t
typedef struct ping_main_t
{
+ /* API message ID base */
+ u16 msg_id_base;
+
ip6_main_t *ip6_main;
ip4_main_t *ip4_main;
/* a vector of current ping runs. */
@@ -69,7 +72,6 @@ extern ping_main_t ping_main;
#define PING_CLI_UNKNOWN_NODE (~0)
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u16 id;
@@ -78,7 +80,6 @@ typedef CLIB_PACKED (struct {
u8 data[0];
}) icmp46_echo_request_t;
-/* *INDENT-ON* */
typedef enum
@@ -88,4 +89,74 @@ typedef enum
ICMP46_ECHO_REPLY_N_NEXT,
} icmp46_echo_reply_next_t;
+static_always_inline uword
+get_cli_process_id_by_icmp_id_mt (vlib_main_t *vm, u16 icmp_id)
+{
+ ping_main_t *pm = &ping_main;
+ uword cli_process_id = PING_CLI_UNKNOWN_NODE;
+ ping_run_t *pr;
+
+ clib_spinlock_lock_if_init (&pm->ping_run_check_lock);
+ vec_foreach (pr, pm->active_ping_runs)
+ {
+ if (pr->icmp_id == icmp_id)
+ {
+ cli_process_id = pr->cli_process_id;
+ break;
+ }
+ }
+ clib_spinlock_unlock_if_init (&pm->ping_run_check_lock);
+ return cli_process_id;
+}
+
+static_always_inline void
+set_cli_process_id_by_icmp_id_mt (vlib_main_t *vm, u16 icmp_id,
+ uword cli_process_id)
+{
+ ping_main_t *pm = &ping_main;
+ ping_run_t *pr;
+
+ clib_spinlock_lock_if_init (&pm->ping_run_check_lock);
+ vec_foreach (pr, pm->active_ping_runs)
+ {
+ if (pr->icmp_id == icmp_id)
+ {
+ pr->cli_process_id = cli_process_id;
+ goto have_found_and_set;
+ }
+ }
+ /* no such key yet - add a new one */
+ ping_run_t new_pr = { .icmp_id = icmp_id, .cli_process_id = cli_process_id };
+ vec_add1 (pm->active_ping_runs, new_pr);
+have_found_and_set:
+ clib_spinlock_unlock_if_init (&pm->ping_run_check_lock);
+}
+
+static_always_inline void
+clear_cli_process_id_by_icmp_id_mt (vlib_main_t *vm, u16 icmp_id)
+{
+ ping_main_t *pm = &ping_main;
+ ping_run_t *pr;
+
+ clib_spinlock_lock_if_init (&pm->ping_run_check_lock);
+ vec_foreach (pr, pm->active_ping_runs)
+ {
+ if (pr->icmp_id == icmp_id)
+ {
+ vec_del1 (pm->active_ping_runs, pr - pm->active_ping_runs);
+ break;
+ }
+ }
+ clib_spinlock_unlock_if_init (&pm->ping_run_check_lock);
+}
+clib_error_t *ping_plugin_api_hookup (vlib_main_t *vm);
+send_ip46_ping_result_t send_ip4_ping (vlib_main_t *vm, u32 table_id,
+ ip4_address_t *pa4, u32 sw_if_index,
+ u16 seq_host, u16 id_host, u16 data_len,
+ u32 burst, u8 verbose);
+send_ip46_ping_result_t send_ip6_ping (vlib_main_t *vm, u32 table_id,
+ ip6_address_t *pa6, u32 sw_if_index,
+ u16 seq_host, u16 id_host, u16 data_len,
+ u32 burst, u8 verbose);
+
#endif /* included_ping_ping_h */
diff --git a/src/plugins/ping/ping_api.c b/src/plugins/ping/ping_api.c
new file mode 100644
index 00000000000..5578fa560f2
--- /dev/null
+++ b/src/plugins/ping/ping_api.c
@@ -0,0 +1,155 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/pci/pci.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/format_fns.h>
+#include <vnet/ip/ip_types_api.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#include <ping/ping.h>
+
+/* define message IDs */
+#include <ping/ping.api_enum.h>
+#include <ping/ping.api_types.h>
+
+#define REPLY_MSG_ID_BASE pm->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static void
+ping_api_send_ping_event (vl_api_want_ping_finished_events_t *mp,
+ u32 request_count, u32 reply_count)
+{
+ ping_main_t *pm = &ping_main;
+
+ vl_api_registration_t *rp;
+ rp = vl_api_client_index_to_registration (mp->client_index);
+
+ vl_api_ping_finished_event_t *e = vl_msg_api_alloc (sizeof (*e));
+ clib_memset (e, 0, sizeof (*e));
+
+ e->_vl_msg_id = htons (VL_API_PING_FINISHED_EVENT + pm->msg_id_base);
+ e->request_count = htonl (request_count);
+ e->reply_count = htonl (reply_count);
+
+ vl_api_send_msg (rp, (u8 *) e);
+}
+
+void
+vl_api_want_ping_finished_events_t_handler (
+ vl_api_want_ping_finished_events_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ ping_main_t *pm = &ping_main;
+ vl_api_want_ping_finished_events_reply_t *rmp;
+
+ uword curr_proc = vlib_current_process (vm);
+
+ u16 icmp_id;
+ static u32 rand_seed = 0;
+
+ if (PREDICT_FALSE (!rand_seed))
+ rand_seed = random_default_seed ();
+
+ icmp_id = random_u32 (&rand_seed) & 0xffff;
+
+ while (~0 != get_cli_process_id_by_icmp_id_mt (vm, icmp_id))
+ icmp_id++;
+
+ set_cli_process_id_by_icmp_id_mt (vm, icmp_id, curr_proc);
+
+ int rv = 0;
+ u32 request_count = 0;
+ u32 reply_count = 0;
+
+ u32 table_id = 0;
+ ip_address_t dst_addr = { 0 };
+ u32 sw_if_index = ~0;
+ f64 ping_interval = clib_net_to_host_f64 (mp->interval);
+ u32 ping_repeat = ntohl (mp->repeat);
+ u32 data_len = PING_DEFAULT_DATA_LEN;
+ u32 ping_burst = 1;
+ u32 verbose = 0;
+ ip_address_decode2 (&mp->address, &dst_addr);
+
+ vl_api_registration_t *rp;
+ rp = vl_api_client_index_to_registration (mp->client_index);
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id =
+ htons ((VL_API_WANT_PING_FINISHED_EVENTS_REPLY) + (REPLY_MSG_ID_BASE));
+ rmp->context = mp->context;
+ rmp->retval = ntohl (rv);
+ vl_api_send_msg (rp, (u8 *) rmp);
+
+ int i;
+ send_ip46_ping_result_t res = SEND_PING_OK;
+ for (i = 1; i <= ping_repeat; i++)
+ {
+ f64 sleep_interval;
+ f64 time_ping_sent = vlib_time_now (vm);
+
+ if (dst_addr.version == AF_IP4)
+ res = send_ip4_ping (vm, table_id, &dst_addr.ip.ip4, sw_if_index, i,
+ icmp_id, data_len, ping_burst, verbose);
+ else
+ res = send_ip6_ping (vm, table_id, &dst_addr.ip.ip6, sw_if_index, i,
+ icmp_id, data_len, ping_burst, verbose);
+
+ if (SEND_PING_OK == res)
+ request_count += 1;
+ else
+ continue;
+
+ while ((sleep_interval =
+ time_ping_sent + ping_interval - vlib_time_now (vm)) > 0.0)
+ {
+ uword event_type;
+ vlib_process_wait_for_event_or_clock (vm, sleep_interval);
+ event_type = vlib_process_get_events (vm, 0);
+
+ if (event_type == ~0)
+ break;
+
+ if (event_type == PING_RESPONSE_IP4 ||
+ event_type == PING_RESPONSE_IP6)
+ reply_count += 1;
+ }
+ }
+
+ ping_api_send_ping_event (mp, request_count, reply_count);
+
+ clear_cli_process_id_by_icmp_id_mt (vm, icmp_id);
+}
+
+/* set tup the API message handling tables */
+#include <ping/ping.api.c>
+
+clib_error_t *
+ping_plugin_api_hookup (vlib_main_t *vm)
+{
+ ping_main_t *pm = &ping_main;
+
+ /* ask for a correctly-sized block of API message decode slots */
+ pm->msg_id_base = setup_message_id_table ();
+
+ return 0;
+} \ No newline at end of file
diff --git a/src/plugins/pppoe/pppoe.c b/src/plugins/pppoe/pppoe.c
index 1589725eddd..0d5f9c1aeac 100644
--- a/src/plugins/pppoe/pppoe.c
+++ b/src/plugins/pppoe/pppoe.c
@@ -77,13 +77,11 @@ pppoe_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
return /* no error */ 0;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (pppoe_device_class,static) = {
.name = "PPPoE",
.format_device_name = format_pppoe_name,
.admin_up_down_function = pppoe_interface_admin_up_down,
};
-/* *INDENT-ON* */
static u8 *
format_pppoe_header_with_length (u8 * s, va_list * args)
@@ -256,7 +254,6 @@ pppoe_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
dpo_reset (&dpo);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (pppoe_hw_class) =
{
.name = "PPPoE",
@@ -265,7 +262,6 @@ VNET_HW_INTERFACE_CLASS (pppoe_hw_class) =
.update_adjacency = pppoe_update_adj,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
#define foreach_copy_field \
_(session_id) \
@@ -353,7 +349,7 @@ int vnet_pppoe_add_del_session
pool_get_aligned (pem->sessions, t, CLIB_CACHE_LINE_BYTES);
clib_memset (t, 0, sizeof (*t));
- clib_memcpy (t->local_mac, hi->hw_address, 6);
+ clib_memcpy (t->local_mac, hi->hw_address, vec_len (hi->hw_address));
/* copy from arg structure */
#define _(x) t->x = a->x;
@@ -374,7 +370,7 @@ int vnet_pppoe_add_del_session
vnet_interface_main_t *im = &vnm->interface_main;
hw_if_index = pem->free_pppoe_session_hw_if_indices
[vec_len (pem->free_pppoe_session_hw_if_indices) - 1];
- _vec_len (pem->free_pppoe_session_hw_if_indices) -= 1;
+ vec_dec_len (pem->free_pppoe_session_hw_if_indices, 1);
hi = vnet_get_hw_interface (vnm, hw_if_index);
hi->dev_instance = t - pem->sessions;
@@ -413,6 +409,8 @@ int vnet_pppoe_add_del_session
si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
vnet_sw_interface_set_flags (vnm, sw_if_index,
VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ vnet_set_interface_l3_output_node (vnm->vlib_main, sw_if_index,
+ (u8 *) "tunnel-output");
/* add reverse route for client ip */
fib_table_entry_path_add (a->decap_fib_index, &pfx,
@@ -431,6 +429,7 @@ int vnet_pppoe_add_del_session
t = pool_elt_at_index (pem->sessions, result.fields.session_index);
sw_if_index = t->sw_if_index;
+ vnet_reset_interface_l3_output_node (vnm->vlib_main, sw_if_index);
vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */ );
vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, t->sw_if_index);
si->flags |= VNET_SW_INTERFACE_FLAG_HIDDEN;
@@ -610,7 +609,6 @@ done:
* @cliexcmd{create pppoe session client-ip 10.0.3.1 session-id 13
* client-mac 00:01:02:03:04:05 del }
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_pppoe_session_command, static) = {
.path = "create pppoe session",
.short_help =
@@ -618,9 +616,7 @@ VLIB_CLI_COMMAND (create_pppoe_session_command, static) = {
" client-mac <client-mac> [decap-vrf-id <nn>] [del]",
.function = pppoe_add_del_session_command_fn,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
static clib_error_t *
show_pppoe_session_command_fn (vlib_main_t * vm,
unformat_input_t * input,
@@ -639,7 +635,6 @@ show_pppoe_session_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-ON* */
/*?
* Display all the PPPoE Session entries.
@@ -651,13 +646,11 @@ show_pppoe_session_command_fn (vlib_main_t * vm,
* local-mac a0:b0:c0:d0:e0:f0 client-mac 00:01:02:03:04:05
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_pppoe_session_command, static) = {
.path = "show pppoe session",
.short_help = "show pppoe session",
.function = show_pppoe_session_command_fn,
};
-/* *INDENT-ON* */
typedef struct pppoe_show_walk_ctx_t_
{
@@ -721,7 +714,7 @@ show_pppoe_fib_command_fn (vlib_main_t * vm,
}
/*?
- * This command dispays the MAC Address entries of the PPPoE FIB table.
+ * This command displays the MAC Address entries of the PPPoE FIB table.
* Output can be filtered to just get the number of MAC Addresses or display
* each MAC Address.
*
@@ -729,18 +722,16 @@ show_pppoe_fib_command_fn (vlib_main_t * vm,
* Example of how to display the number of MAC Address entries in the PPPoE
* FIB table:
* @cliexstart{show pppoe fib}
- * Mac Address session_id Interface sw_if_index session_index
- * 52:54:00:53:18:33 1 GigabitEthernet0/8/0 2 0
- * 52:54:00:53:18:55 2 GigabitEthernet0/8/1 3 1
+ * Mac Address session_id Interface sw_if_index session_index
+ * 52:54:00:53:18:33 1 GigabitEthernet0/8/0 2 0
+ * 52:54:00:53:18:55 2 GigabitEthernet0/8/1 3 1
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_pppoe_fib_command, static) = {
.path = "show pppoe fib",
.short_help = "show pppoe fib",
.function = show_pppoe_fib_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
pppoe_init (vlib_main_t * vm)
@@ -772,12 +763,10 @@ pppoe_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (pppoe_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "PPP over Ethernet (PPPoE)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/pppoe/pppoe.h b/src/plugins/pppoe/pppoe.h
index a72b7349328..444de42f4a5 100644
--- a/src/plugins/pppoe/pppoe.h
+++ b/src/plugins/pppoe/pppoe.h
@@ -106,7 +106,6 @@ extern char *pppoe_error_strings[];
#define PPPOE_NUM_BUCKETS (64 * 1024)
#define PPPOE_MEMORY_SIZE (8<<20)
-/* *INDENT-OFF* */
/*
* The PPPoE key is the mac address and session ID
*/
@@ -127,9 +126,7 @@ typedef struct
u64 raw;
};
} pppoe_entry_key_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
/*
* The PPPoE entry results
*/
@@ -147,7 +144,6 @@ typedef struct
u64 raw;
};
} pppoe_entry_result_t;
-/* *INDENT-ON* */
typedef struct
{
diff --git a/src/plugins/pppoe/pppoe_api.c b/src/plugins/pppoe/pppoe_api.c
index 6705fb6acea..c7099a3491f 100644
--- a/src/plugins/pppoe/pppoe_api.c
+++ b/src/plugins/pppoe/pppoe_api.c
@@ -64,12 +64,10 @@ static void vl_api_pppoe_add_del_session_t_handler
rv = vnet_pppoe_add_del_session (&a, &sw_if_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_PPPOE_ADD_DEL_SESSION_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void send_pppoe_session_details
@@ -120,12 +118,10 @@ vl_api_pppoe_session_dump_t_handler (vl_api_pppoe_session_dump_t * mp)
if (~0 == sw_if_index)
{
- /* *INDENT-OFF* */
pool_foreach (t, pem->sessions)
{
send_pppoe_session_details(t, reg, mp->context);
}
- /* *INDENT-ON* */
}
else
{
@@ -148,9 +144,7 @@ vl_api_pppoe_add_del_cp_t_handler (vl_api_pppoe_add_del_cp_t * mp)
rv = pppoe_add_del_cp (ntohl (mp->sw_if_index), mp->is_add);
- /* *INDENT-OFF* */
REPLY_MACRO(VL_API_PPPOE_ADD_DEL_CP_REPLY);
- /* *INDENT-ON* */
}
#include <pppoe/pppoe.api.c>
diff --git a/src/plugins/pppoe/pppoe_cp.c b/src/plugins/pppoe/pppoe_cp.c
index 6c6ba249fcc..82891d5b654 100644
--- a/src/plugins/pppoe/pppoe_cp.c
+++ b/src/plugins/pppoe/pppoe_cp.c
@@ -97,14 +97,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_pppoe_cp_cmd, static) =
{
.path = "create pppoe cp",
.short_help = "create pppoe cp-if-index <intfc> [del]",
.function = pppoe_add_del_cp_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/pppoe/pppoe_decap.c b/src/plugins/pppoe/pppoe_decap.c
index 71b9874081e..7c456a7a9cc 100644
--- a/src/plugins/pppoe/pppoe_decap.c
+++ b/src/plugins/pppoe/pppoe_decap.c
@@ -133,8 +133,10 @@ VLIB_NODE_FN (pppoe_input_node) (vlib_main_t * vm,
pppoe0 = (pppoe_header_t*)(vlan0+1);
if( type0 != ETHERNET_TYPE_PPPOE_DISCOVERY && type0 != ETHERNET_TYPE_PPPOE_SESSION ) {
error0 = PPPOE_ERROR_BAD_VER_TYPE;
- next0 = PPPOE_INPUT_NEXT_DROP;
- goto trace0;
+ result0.fields.session_index =
+ ~0; // avoid tracing random data
+ next0 = PPPOE_INPUT_NEXT_DROP;
+ goto trace0;
}
} else {
pppoe0 = (pppoe_header_t*)(h0+1);
@@ -152,6 +154,7 @@ VLIB_NODE_FN (pppoe_input_node) (vlib_main_t * vm,
vlib_buffer_advance(b0, sizeof(*h0)+sizeof(*vlan0));
error0 = PPPOE_ERROR_CONTROL_PLANE;
next0 = PPPOE_INPUT_NEXT_CP_INPUT;
+ result0.fields.session_index = ~0;
goto trace0;
}
@@ -228,8 +231,10 @@ VLIB_NODE_FN (pppoe_input_node) (vlib_main_t * vm,
pppoe1 = (pppoe_header_t*)(vlan1+1);
if( type1 != ETHERNET_TYPE_PPPOE_DISCOVERY && type1 != ETHERNET_TYPE_PPPOE_SESSION ) {
error1 = PPPOE_ERROR_BAD_VER_TYPE;
- next1 = PPPOE_INPUT_NEXT_DROP;
- goto trace1;
+ result1.fields.session_index =
+ ~0; // avoid tracing random data
+ next1 = PPPOE_INPUT_NEXT_DROP;
+ goto trace1;
}
} else {
pppoe1 = (pppoe_header_t*)(h1+1);
@@ -247,6 +252,7 @@ VLIB_NODE_FN (pppoe_input_node) (vlib_main_t * vm,
vlib_buffer_advance(b1, sizeof(*h1)+sizeof(*vlan1));
error1 = PPPOE_ERROR_CONTROL_PLANE;
next1 = PPPOE_INPUT_NEXT_CP_INPUT;
+ result1.fields.session_index = ~0;
goto trace1;
}
@@ -354,8 +360,10 @@ VLIB_NODE_FN (pppoe_input_node) (vlib_main_t * vm,
pppoe0 = (pppoe_header_t*)(vlan0+1);
if( type0 != ETHERNET_TYPE_PPPOE_DISCOVERY && type0 != ETHERNET_TYPE_PPPOE_SESSION ) {
error0 = PPPOE_ERROR_BAD_VER_TYPE;
- next0 = PPPOE_INPUT_NEXT_DROP;
- goto trace00;
+ result0.fields.session_index =
+ ~0; // avoid tracing random data
+ next0 = PPPOE_INPUT_NEXT_DROP;
+ goto trace00;
}
} else {
pppoe0 = (pppoe_header_t*)(h0+1);
@@ -372,6 +380,7 @@ VLIB_NODE_FN (pppoe_input_node) (vlib_main_t * vm,
vlib_buffer_advance(b0, sizeof(*h0)+sizeof(*vlan0));
error0 = PPPOE_ERROR_CONTROL_PLANE;
next0 = PPPOE_INPUT_NEXT_CP_INPUT;
+ result0.fields.session_index = ~0;
goto trace00;
}
@@ -485,11 +494,9 @@ VLIB_REGISTER_NODE (pppoe_input_node) = {
.format_trace = format_pppoe_rx_trace,
};
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (pppoe_input_node, static) =
{
.arc_name = "device-input",
.node_name = "pppoe-input",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON */
diff --git a/src/plugins/prom/CMakeLists.txt b/src/plugins/prom/CMakeLists.txt
new file mode 100644
index 00000000000..6c1976c74f3
--- /dev/null
+++ b/src/plugins/prom/CMakeLists.txt
@@ -0,0 +1,21 @@
+# Copyright (c) 2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(prom
+ SOURCES
+ prom.c
+ prom_cli.c
+
+ LINK_LIBRARIES
+ vppapiclient
+)
diff --git a/src/plugins/prom/FEATURE.yaml b/src/plugins/prom/FEATURE.yaml
new file mode 100644
index 00000000000..65fefa7f177
--- /dev/null
+++ b/src/plugins/prom/FEATURE.yaml
@@ -0,0 +1,10 @@
+---
+name: Prom (Prometheus Exporter)
+maintainer: Florin Coras <fcoras@cisco.com>
+features:
+ - Stats scraper
+ - Prometheus exporter
+description: "HTTP static server url handler that scrapes stats and exports
+ them in Prometheus format"
+state: experimental
+properties: [MULTITHREAD]
diff --git a/src/plugins/prom/prom.c b/src/plugins/prom/prom.c
new file mode 100644
index 00000000000..934e8480d3c
--- /dev/null
+++ b/src/plugins/prom/prom.c
@@ -0,0 +1,436 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+#include <prom/prom.h>
+#include <vpp-api/client/stat_client.h>
+#include <vlib/stats/stats.h>
+#include <ctype.h>
+
+static prom_main_t prom_main;
+
+static u8 *
+make_stat_name (char *name)
+{
+ prom_main_t *pm = &prom_main;
+ char *p = name;
+
+ while (*p)
+ {
+ if (!isalnum (*p))
+ *p = '_';
+ p++;
+ }
+
+ /* Reuse vector, instead of always allocating, when building a name. */
+ vec_reset_length (pm->name_scratch_pad);
+ pm->name_scratch_pad =
+ format (pm->name_scratch_pad, "%v%s", pm->stat_name_prefix, name);
+ return pm->name_scratch_pad;
+}
+
+static u8 *
+dump_counter_vector_simple (stat_segment_data_t *res, u8 *s, u8 used_only)
+{
+ u8 need_header = 1;
+ int j, k;
+ u8 *name;
+
+ name = make_stat_name (res->name);
+
+ for (k = 0; k < vec_len (res->simple_counter_vec); k++)
+ for (j = 0; j < vec_len (res->simple_counter_vec[k]); j++)
+ {
+ if (used_only && !res->simple_counter_vec[k][j])
+ continue;
+ if (need_header)
+ {
+ s = format (s, "# TYPE %v counter\n", name);
+ need_header = 0;
+ }
+ s = format (s, "%v{thread=\"%d\",interface=\"%d\"} %lld\n", name, k, j,
+ res->simple_counter_vec[k][j]);
+ }
+
+ return s;
+}
+
+static u8 *
+dump_counter_vector_combined (stat_segment_data_t *res, u8 *s, u8 used_only)
+{
+ u8 need_header = 1;
+ int j, k;
+ u8 *name;
+
+ name = make_stat_name (res->name);
+
+ for (k = 0; k < vec_len (res->simple_counter_vec); k++)
+ for (j = 0; j < vec_len (res->combined_counter_vec[k]); j++)
+ {
+ if (used_only && !res->combined_counter_vec[k][j].packets)
+ continue;
+ if (need_header)
+ {
+ s = format (s, "# TYPE %v_packets counter\n", name);
+ s = format (s, "# TYPE %v_bytes counter\n", name);
+ need_header = 0;
+ }
+ s = format (s, "%v_packets{thread=\"%d\",interface=\"%d\"} %lld\n",
+ name, k, j, res->combined_counter_vec[k][j].packets);
+ s = format (s, "%v_bytes{thread=\"%d\",interface=\"%d\"} %lld\n", name,
+ k, j, res->combined_counter_vec[k][j].bytes);
+ }
+
+ return s;
+}
+
+static u8 *
+dump_scalar_index (stat_segment_data_t *res, u8 *s, u8 used_only)
+{
+ u8 *name;
+
+ if (used_only && !res->scalar_value)
+ return s;
+
+ name = make_stat_name (res->name);
+
+ s = format (s, "# TYPE %v counter\n", name);
+ s = format (s, "%v %.2f\n", name, res->scalar_value);
+
+ return s;
+}
+
+static u8 *
+dump_name_vector (stat_segment_data_t *res, u8 *s, u8 used_only)
+{
+ u8 *name;
+ int k;
+
+ name = make_stat_name (res->name);
+
+ s = format (s, "# TYPE %v_info gauge\n", name);
+ for (k = 0; k < vec_len (res->name_vector); k++)
+ s = format (s, "%v_info{index=\"%d\",name=\"%s\"} 1\n", name, k,
+ res->name_vector[k]);
+
+ return s;
+}
+
+static u8 *
+scrape_stats_segment (u8 *s, u8 **patterns, u8 used_only)
+{
+ stat_segment_data_t *res;
+ static u32 *stats = 0;
+ int i;
+
+ stats = stat_segment_ls (patterns);
+
+retry:
+ res = stat_segment_dump (stats);
+ if (res == 0)
+ { /* Memory layout has changed */
+ if (stats)
+ vec_free (stats);
+ stats = stat_segment_ls (patterns);
+ goto retry;
+ }
+
+ for (i = 0; i < vec_len (res); i++)
+ {
+ switch (res[i].type)
+ {
+ case STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE:
+ s = dump_counter_vector_simple (&res[i], s, used_only);
+ break;
+
+ case STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED:
+ s = dump_counter_vector_combined (&res[i], s, used_only);
+ break;
+
+ case STAT_DIR_TYPE_SCALAR_INDEX:
+ s = dump_scalar_index (&res[i], s, used_only);
+ break;
+
+ case STAT_DIR_TYPE_NAME_VECTOR:
+ s = dump_name_vector (&res[i], s, used_only);
+ break;
+
+ case STAT_DIR_TYPE_EMPTY:
+ break;
+
+ default:
+ clib_warning ("Unknown value %d\n", res[i].type);
+ ;
+ }
+ }
+ stat_segment_data_free (res);
+ vec_free (stats);
+
+ return s;
+}
+
+static void
+send_data_to_hss (hss_session_handle_t sh)
+{
+ hss_url_handler_args_t args = {};
+ prom_main_t *pm = &prom_main;
+
+ args.sh = sh;
+ args.data = vec_dup (pm->stats);
+ args.data_len = vec_len (pm->stats);
+ args.sc = HTTP_STATUS_OK;
+ args.free_vec_data = 1;
+
+ pm->send_data (&args);
+}
+
+static void
+send_data_to_hss_rpc (void *rpc_args)
+{
+ send_data_to_hss (*(hss_session_handle_t *) rpc_args);
+}
+
+static uword
+prom_scraper_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ vlib_frame_t *f)
+{
+ uword *event_data = 0, event_type;
+ prom_main_t *pm = &prom_main;
+ hss_session_handle_t sh;
+ f64 timeout = 10000.0;
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+ event_type = vlib_process_get_events (vm, (uword **) &event_data);
+ switch (event_type)
+ {
+ case ~0:
+ /* timeout, do nothing */
+ break;
+ case PROM_SCRAPER_EVT_RUN:
+ sh.as_u64 = event_data[0];
+ vec_reset_length (pm->stats);
+ pm->stats = scrape_stats_segment (pm->stats, pm->stats_patterns,
+ pm->used_only);
+ session_send_rpc_evt_to_thread_force (sh.thread_index,
+ send_data_to_hss_rpc, &sh);
+ pm->last_scrape = vlib_time_now (vm);
+ break;
+ default:
+ clib_warning ("unexpected event %u", event_type);
+ break;
+ }
+
+ vec_reset_length (event_data);
+ }
+ return 0;
+}
+
+VLIB_REGISTER_NODE (prom_scraper_process_node) = {
+ .function = prom_scraper_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "prom-scraper-process",
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+
+static void
+prom_scraper_process_enable (vlib_main_t *vm)
+{
+ prom_main_t *pm = &prom_main;
+ vlib_node_t *n;
+
+ vlib_node_set_state (vm, prom_scraper_process_node.index,
+ VLIB_NODE_STATE_POLLING);
+ n = vlib_get_node (vm, prom_scraper_process_node.index);
+ vlib_start_process (vm, n->runtime_index);
+
+ pm->scraper_node_index = n->index;
+}
+
+static void
+signal_run_to_scraper (uword *args)
+{
+ prom_main_t *pm = &prom_main;
+ ASSERT (vlib_get_thread_index () == 0);
+ vlib_process_signal_event (pm->vm, pm->scraper_node_index,
+ PROM_SCRAPER_EVT_RUN, *args);
+}
+
+hss_url_handler_rc_t
+prom_stats_dump (hss_url_handler_args_t *args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ f64 now = vlib_time_now (vm);
+ prom_main_t *pm = &prom_main;
+
+ /* If we've recently scraped stats, return data */
+ if ((now - pm->last_scrape) < pm->min_scrape_interval)
+ {
+ send_data_to_hss (args->sh);
+ return HSS_URL_HANDLER_ASYNC;
+ }
+
+ if (vm->thread_index != 0)
+ vl_api_rpc_call_main_thread (signal_run_to_scraper, (u8 *) &args->sh,
+ sizeof (args->sh));
+ else
+ signal_run_to_scraper (&args->sh.as_u64);
+
+ return HSS_URL_HANDLER_ASYNC;
+}
+
+void
+prom_stat_patterns_add (u8 **patterns)
+{
+ prom_main_t *pm = &prom_main;
+
+ u8 **pattern, **existing;
+ u8 found;
+ u32 len;
+
+ vec_foreach (pattern, patterns)
+ {
+ found = 0;
+ len = vec_len (*pattern);
+ if (len == 0)
+ continue;
+ vec_foreach (existing, pm->stats_patterns)
+ {
+ if (vec_len (*existing) != len)
+ continue;
+ if (!memcmp (*existing, *pattern, len - 1))
+ {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ vec_add1 (pm->stats_patterns, *pattern);
+ }
+}
+
+void
+prom_stat_patterns_free (void)
+{
+ prom_main_t *pm = &prom_main;
+ u8 **pattern;
+
+ vec_foreach (pattern, pm->stats_patterns)
+ vec_free (*pattern);
+ vec_free (pm->stats_patterns);
+}
+
+void
+prom_stat_patterns_set (u8 **patterns)
+{
+ prom_stat_patterns_free ();
+ prom_stat_patterns_add (patterns);
+}
+
+u8 **
+prom_stat_patterns_get (void)
+{
+ return prom_main.stats_patterns;
+}
+
+void
+prom_stat_name_prefix_set (u8 *prefix)
+{
+ prom_main_t *pm = &prom_main;
+
+ vec_free (pm->stat_name_prefix);
+ pm->stat_name_prefix = prefix;
+}
+
+void
+prom_report_used_only (u8 used_only)
+{
+ prom_main_t *pm = &prom_main;
+
+ pm->used_only = used_only;
+}
+
+static void
+prom_stat_segment_client_init (void)
+{
+ stat_client_main_t *scm = &stat_client_main;
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ uword size;
+
+ size = sm->memory_size ? sm->memory_size : STAT_SEGMENT_DEFAULT_SIZE;
+ scm->memory_size = size;
+ scm->shared_header = sm->shared_header;
+ scm->directory_vector =
+ stat_segment_adjust (scm, (void *) scm->shared_header->directory_vector);
+}
+
+void
+prom_enable (vlib_main_t *vm)
+{
+ prom_main_t *pm = &prom_main;
+
+ pm->register_url = vlib_get_plugin_symbol ("http_static_plugin.so",
+ "hss_register_url_handler");
+ pm->send_data =
+ vlib_get_plugin_symbol ("http_static_plugin.so", "hss_session_send_data");
+ pm->register_url (prom_stats_dump, "stats.prom", HTTP_REQ_GET);
+
+ pm->is_enabled = 1;
+ pm->vm = vm;
+ if (!pm->stat_name_prefix)
+ pm->stat_name_prefix = format (0, "vpp");
+
+ prom_scraper_process_enable (vm);
+ prom_stat_segment_client_init ();
+}
+
+static clib_error_t *
+prom_init (vlib_main_t *vm)
+{
+ prom_main_t *pm = &prom_main;
+
+ pm->is_enabled = 0;
+ pm->min_scrape_interval = 1;
+ pm->used_only = 0;
+ pm->stat_name_prefix = 0;
+
+ return 0;
+}
+
+prom_main_t *
+prom_get_main (void)
+{
+ return &prom_main;
+}
+
+VLIB_INIT_FUNCTION (prom_init) = {
+ .runs_after = VLIB_INITS ("hss_main_init"),
+};
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Prometheus Stats Exporter",
+ .default_disabled = 0,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/prom/prom.h b/src/plugins/prom/prom.h
new file mode 100644
index 00000000000..898e4c209d1
--- /dev/null
+++ b/src/plugins/prom/prom.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_PLUGINS_PROM_PROM_H_
+#define SRC_PLUGINS_PROM_PROM_H_
+
+#include <vnet/session/session.h>
+#include <http_static/http_static.h>
+
+typedef struct prom_main_
+{
+ u8 *stats;
+ f64 last_scrape;
+ hss_register_url_fn register_url;
+ hss_session_send_fn send_data;
+ u32 scraper_node_index;
+ u8 is_enabled;
+ u8 *name_scratch_pad;
+ vlib_main_t *vm;
+
+ /*
+ * Configs
+ */
+ u8 **stats_patterns;
+ u8 *stat_name_prefix;
+ f64 min_scrape_interval;
+ u8 used_only;
+} prom_main_t;
+
+typedef enum prom_process_evt_codes_
+{
+ PROM_SCRAPER_EVT_RUN,
+} prom_process_evt_codes_t;
+
+void prom_enable (vlib_main_t *vm);
+prom_main_t *prom_get_main (void);
+
+void prom_stat_patterns_set (u8 **patterns);
+void prom_stat_patterns_add (u8 **patterns);
+u8 **prom_stat_patterns_get (void);
+void prom_stat_patterns_free (void);
+
+void prom_stat_name_prefix_set (u8 *prefix);
+void prom_report_used_only (u8 used_only);
+
+#endif /* SRC_PLUGINS_PROM_PROM_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/prom/prom_cli.c b/src/plugins/prom/prom_cli.c
new file mode 100644
index 00000000000..705e54ac1b8
--- /dev/null
+++ b/src/plugins/prom/prom_cli.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <prom/prom.h>
+
+static uword
+unformat_stats_patterns (unformat_input_t *input, va_list *args)
+{
+ u8 ***patterns = va_arg (*args, u8 ***);
+ u8 *pattern;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%s", &pattern))
+ vec_add1 (*patterns, pattern);
+ else
+ return 0;
+ }
+ return 1;
+}
+
+static clib_error_t *
+prom_patterns_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_clear = 0, is_show = 0, **pattern = 0;
+ clib_error_t *error = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "show"))
+ is_show = 1;
+ else if (unformat (line_input, "clear"))
+ is_clear = 1;
+ else if (unformat (line_input, "add %U", unformat_stats_patterns,
+ &pattern))
+ {
+ prom_stat_patterns_add (pattern);
+ vec_free (pattern);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+ unformat_free (line_input);
+
+ if (error)
+ return error;
+
+ if (is_clear)
+ prom_stat_patterns_free ();
+
+ if (is_show)
+ {
+ u8 **patterns = prom_stat_patterns_get ();
+ vec_foreach (pattern, patterns)
+ vlib_cli_output (vm, " %v\n", *pattern);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (prom_patterns_command, static) = {
+ .path = "prom patterns",
+ .short_help = "prom patterns [show] [clear] [add <patterns>...]",
+ .function = prom_patterns_command_fn,
+};
+
+static clib_error_t *
+prom_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 **patterns = 0, *stat_name_prefix = 0;
+ prom_main_t *pm = prom_get_main ();
+ clib_error_t *error = 0;
+ u8 is_enable = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ goto no_input;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "enable"))
+ is_enable = 1;
+ else if (unformat (line_input, "min-scrape-interval %f",
+ &pm->min_scrape_interval))
+ ;
+ else if (unformat (line_input, "used-only"))
+ prom_report_used_only (1 /* used only */);
+ else if (unformat (line_input, "all-stats"))
+ prom_report_used_only (0 /* used only */);
+ else if (unformat (line_input, "stat-name-prefix %_%v%_",
+ &stat_name_prefix))
+ prom_stat_name_prefix_set (stat_name_prefix);
+ else if (unformat (line_input, "stat-patterns %U",
+ unformat_stats_patterns, &patterns))
+ prom_stat_patterns_set (patterns);
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (error)
+ return error;
+
+no_input:
+
+ if (is_enable && !pm->is_enabled)
+ prom_enable (vm);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (prom_enable_command, static) = {
+ .path = "prom",
+ .short_help = "prom [enable] [min-scrape-interval <n>] [used-only] "
+ "[all-stats] [stat-name-prefix <prefix>] "
+ "[stat-patterns <patterns>...]",
+ .function = prom_command_fn,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/quic/CMakeLists.txt b/src/plugins/quic/CMakeLists.txt
index dfed91f51d9..65bdc32a239 100644
--- a/src/plugins/quic/CMakeLists.txt
+++ b/src/plugins/quic/CMakeLists.txt
@@ -12,8 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+if(NOT OPENSSL_FOUND)
+ message(WARNING "OpenSSL not found - quic plugin disabled")
+ return()
+endif()
+
unset(QUIC_LINK_LIBRARIES)
-set(EXPECTED_QUICLY_VERSION "0.1.3-vpp")
+set(EXPECTED_QUICLY_VERSION "0.1.4-vpp")
vpp_find_path(QUICLY_INCLUDE_DIR NAMES quicly.h)
vpp_find_path(PICOTLS_INCLUDE_DIR NAMES picotls.h)
diff --git a/src/plugins/quic/quic.c b/src/plugins/quic/quic.c
index 26f2216a3d8..60d4ac21c19 100644
--- a/src/plugins/quic/quic.c
+++ b/src/plugins/quic/quic.c
@@ -14,6 +14,9 @@
*/
#include <sys/socket.h>
+#include <sys/syscall.h>
+
+#include <openssl/rand.h>
#include <vnet/session/application.h>
#include <vnet/session/transport.h>
@@ -103,7 +106,6 @@ quic_app_cert_key_pair_delete_callback (app_cert_key_pair_t * ckpair)
for (i = 0; i < num_threads; i++)
{
- /* *INDENT-OFF* */
pool_foreach (crctx, qm->wrk_ctx[i].crypto_ctx_pool) {
if (crctx->ckpair_index == ckpair->cert_key_index)
{
@@ -111,7 +113,6 @@ quic_app_cert_key_pair_delete_callback (app_cert_key_pair_t * ckpair)
clib_bihash_add_del_24_8 (&qm->wrk_ctx[i].crypto_context_hash, &kv, 0 /* is_add */ );
}
}
- /* *INDENT-ON* */
}
return 0;
}
@@ -151,11 +152,9 @@ quic_list_crypto_context_command_fn (vlib_main_t * vm,
int i, num_threads = 1 /* main thread */ + vtm->n_threads;
for (i = 0; i < num_threads; i++)
{
- /* *INDENT-OFF* */
pool_foreach (crctx, qm->wrk_ctx[i].crypto_ctx_pool) {
vlib_cli_output (vm, "[%d][Q]%U", i, format_crypto_context, crctx);
}
- /* *INDENT-ON* */
}
return 0;
}
@@ -388,7 +387,8 @@ quic_ctx_alloc (u32 thread_index)
quic_main_t *qm = &quic_main;
quic_ctx_t *ctx;
- pool_get (qm->ctx_pool[thread_index], ctx);
+ pool_get_aligned_safe (qm->ctx_pool[thread_index], ctx,
+ CLIB_CACHE_LINE_BYTES);
clib_memset (ctx, 0, sizeof (quic_ctx_t));
ctx->c_thread_index = thread_index;
@@ -675,6 +675,7 @@ quic_send_datagram (session_t *udp_session, struct iovec *packet,
hdr.is_ip4 = tc->is_ip4;
clib_memcpy (&hdr.lcl_ip, &tc->lcl_ip, sizeof (ip46_address_t));
hdr.lcl_port = tc->lcl_port;
+ hdr.gso_size = 0;
/* Read dest address from quicly-provided sockaddr */
if (hdr.is_ip4)
@@ -782,12 +783,10 @@ quic_on_stream_destroy (quicly_stream_t * stream, int err)
quic_stream_data_t *stream_data = (quic_stream_data_t *) stream->data;
quic_ctx_t *sctx = quic_ctx_get (stream_data->ctx_id,
stream_data->thread_index);
- session_t *stream_session = session_get (sctx->c_s_index,
- sctx->c_thread_index);
QUIC_DBG (2, "DESTROYED_STREAM: session 0x%lx (%U)",
session_handle (stream_session), quic_format_err, err);
- stream_session->session_state = SESSION_STATE_CLOSED;
+ session_transport_closing_notify (&sctx->connection);
session_transport_delete_notify (&sctx->connection);
quic_increment_counter (QUIC_ERROR_CLOSED_STREAM, 1);
@@ -830,12 +829,13 @@ quic_on_receive (quicly_stream_t * stream, size_t off, const void *src,
size_t len)
{
QUIC_DBG (3, "received data: %lu bytes, offset %lu", len, off);
- u32 max_enq, rlen, rv;
+ u32 max_enq;
quic_ctx_t *sctx;
session_t *stream_session;
app_worker_t *app_wrk;
svm_fifo_t *f;
quic_stream_data_t *stream_data;
+ int rlen;
if (!len)
return;
@@ -876,6 +876,14 @@ quic_on_receive (quicly_stream_t * stream, size_t off, const void *src,
{
/* Streams live on the same thread so (f, stream_data) should stay consistent */
rlen = svm_fifo_enqueue (f, len, (u8 *) src);
+ if (PREDICT_FALSE (rlen < 0))
+ {
+ /*
+ * drop, fifo full
+ * drop, fifo grow
+ */
+ return;
+ }
QUIC_DBG (3, "Session [idx %u, app_wrk %u, ti %u, rx-fifo 0x%llx]: "
"Enqueuing %u (rlen %u) at off %u in %u space, ",
stream_session->session_index,
@@ -886,10 +894,7 @@ quic_on_receive (quicly_stream_t * stream, size_t off, const void *src,
app_wrk = app_worker_get_if_valid (stream_session->app_wrk_index);
if (PREDICT_TRUE (app_wrk != 0))
{
- rv = app_worker_lock_and_send_event (app_wrk, stream_session,
- SESSION_IO_EVT_RX);
- if (rv)
- QUIC_ERR ("Failed to ping app for RX");
+ app_worker_rx_notify (app_wrk, stream_session);
}
quic_ack_rx_data (stream_session);
}
@@ -898,6 +903,14 @@ quic_on_receive (quicly_stream_t * stream, size_t off, const void *src,
rlen = svm_fifo_enqueue_with_offset (f,
off - stream_data->app_rx_data_len,
len, (u8 *) src);
+ if (PREDICT_FALSE (rlen < 0))
+ {
+ /*
+ * drop, fifo full
+ * drop, fifo grow
+ */
+ return;
+ }
QUIC_ASSERT (rlen == 0);
}
return;
@@ -1031,6 +1044,8 @@ quic_on_stream_open (quicly_stream_open_t * self, quicly_stream_t * stream)
stream_session->session_type =
session_type_from_proto_and_ip (TRANSPORT_PROTO_QUIC, qctx->udp_is_ip4);
quic_session = session_get (qctx->c_s_index, qctx->c_thread_index);
+ /* Make sure quic session is in listening state */
+ quic_session->session_state = SESSION_STATE_LISTENING;
stream_session->listener_handle = listen_session_get_handle (quic_session);
app_wrk = app_worker_get (stream_session->app_wrk_index);
@@ -1044,6 +1059,7 @@ quic_on_stream_open (quicly_stream_open_t * self, quicly_stream_t * stream)
SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL |
SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY);
+ stream_session->session_state = SESSION_STATE_ACCEPTING;
if ((rv = app_worker_accept_notify (app_wrk, stream_session)))
{
QUIC_ERR ("failed to notify accept worker app");
@@ -1139,9 +1155,8 @@ quic_update_timer (quic_ctx_t * ctx)
quic_session = session_get (ctx->c_s_index, ctx->c_thread_index);
if (svm_fifo_set_event (quic_session->tx_fifo))
{
- rv = session_send_io_evt_to_thread_custom (quic_session,
- quic_session->thread_index,
- SESSION_IO_EVT_BUILTIN_TX);
+ rv = session_send_io_evt_to_thread_custom (
+ quic_session, quic_session->thread_index, SESSION_IO_EVT_TX);
if (PREDICT_FALSE (rv))
QUIC_ERR ("Failed to enqueue builtin_tx %d", rv);
}
@@ -1277,6 +1292,7 @@ quic_connect_stream (session_t * quic_session, session_endpoint_cfg_t * sep)
stream_data->app_rx_data_len = 0;
stream_data->app_tx_data_len = 0;
stream_session->session_state = SESSION_STATE_READY;
+ stream_session->opaque = sep->opaque;
/* For now we only reset streams. Cleanup will be triggered by timers */
if ((rv = app_worker_init_connected (app_wrk, stream_session)))
@@ -1441,7 +1457,8 @@ quic_proto_on_close (u32 ctx_index, u32 thread_index)
}
static u32
-quic_start_listen (u32 quic_listen_session_index, transport_endpoint_t * tep)
+quic_start_listen (u32 quic_listen_session_index,
+ transport_endpoint_cfg_t *tep)
{
vnet_listen_args_t _bargs, *args = &_bargs;
transport_endpt_crypto_cfg_t *ccfg;
@@ -1552,7 +1569,7 @@ format_quic_ctx (u8 * s, va_list * args)
if (!ctx)
return s;
- str = format (str, "[#%d][Q] ", ctx->c_thread_index);
+ str = format (str, "[%d:%d][Q] ", ctx->c_thread_index, ctx->c_s_index);
if (quic_ctx_is_listener (ctx))
str = format (str, "Listener, UDP %ld", ctx->udp_session_handle);
@@ -1670,15 +1687,6 @@ quic_on_quic_session_connected (quic_ctx_t * ctx)
quic_proto_on_close (ctx_id, thread_index);
return;
}
-
- /* If the app opens a stream in its callback it may invalidate ctx */
- ctx = quic_ctx_get (ctx_id, thread_index);
- /*
- * app_worker_connect_notify() might have reallocated pool, reload
- * quic_session pointer
- */
- quic_session = session_get (ctx->c_s_index, thread_index);
- quic_session->session_state = SESSION_STATE_LISTENING;
}
static void
@@ -2105,7 +2113,6 @@ quic_accept_connection (quic_rx_packet_ctx_t * pctx)
quic_session = session_alloc (ctx->c_thread_index);
QUIC_DBG (2, "Allocated quic_session, 0x%lx ctx %u",
session_handle (quic_session), ctx->c_c_index);
- quic_session->session_state = SESSION_STATE_LISTENING;
ctx->c_s_index = quic_session->session_index;
lctx = quic_ctx_get (ctx->listener_ctx_id, 0);
@@ -2131,6 +2138,7 @@ quic_accept_connection (quic_rx_packet_ctx_t * pctx)
}
app_wrk = app_worker_get (quic_session->app_wrk_index);
+ quic_session->session_state = SESSION_STATE_ACCEPTING;
if ((rv = app_worker_accept_notify (app_wrk, quic_session)))
{
QUIC_ERR ("failed to notify accept worker app");
@@ -2416,7 +2424,6 @@ quic_get_transport_endpoint (u32 ctx_index, u32 thread_index,
quic_common_get_transport_endpoint (ctx, tep, is_lcl);
}
-/* *INDENT-OFF* */
static session_cb_vft_t quic_app_cb_vft = {
.session_accept_callback = quic_udp_session_accepted_callback,
.session_disconnect_callback = quic_udp_session_disconnect_callback,
@@ -2452,7 +2459,6 @@ static const transport_proto_vft_t quic_proto = {
.service_type = TRANSPORT_SERVICE_APP,
},
};
-/* *INDENT-ON* */
static quicly_stream_open_t on_stream_open = { quic_on_stream_open };
static quicly_closed_by_remote_t on_closed_by_remote = {
@@ -2498,6 +2504,11 @@ quic_init (vlib_main_t * vm)
u64 options[APP_OPTIONS_N_OPTIONS];
quic_main_t *qm = &quic_main;
u32 num_threads, i;
+ u8 seed[32];
+
+ if (syscall (SYS_getrandom, &seed, sizeof (seed), 0) != sizeof (seed))
+ return clib_error_return_unix (0, "getrandom() failed");
+ RAND_seed (seed, sizeof (seed));
num_threads = 1 /* main thread */ + vtm->n_threads;
@@ -2550,6 +2561,7 @@ quic_init (vlib_main_t * vm)
transport_register_protocol (TRANSPORT_PROTO_QUIC, &quic_proto,
FIB_PROTOCOL_IP6, ~0);
+ quic_load_openssl3_legacy_provider ();
clib_bitmap_alloc (qm->available_crypto_engines,
app_crypto_engine_n_types ());
quic_register_cipher_suite (CRYPTO_ENGINE_PICOTLS,
@@ -2563,14 +2575,19 @@ quic_init (vlib_main_t * vm)
qm->vnet_crypto_enabled = 1;
if (qm->vnet_crypto_enabled == 1)
{
+ u8 empty_key[32] = {};
quic_register_cipher_suite (CRYPTO_ENGINE_VPP,
quic_crypto_cipher_suites);
qm->default_crypto_engine = CRYPTO_ENGINE_VPP;
+ vec_validate (qm->per_thread_crypto_key_indices, num_threads);
+ for (i = 0; i < num_threads; i++)
+ {
+ qm->per_thread_crypto_key_indices[i] = vnet_crypto_key_add (
+ vm, VNET_CRYPTO_ALG_AES_256_CTR, empty_key, 32);
+ }
}
qm->max_packets_per_key = DEFAULT_MAX_PACKETS_PER_KEY;
- clib_rwlock_init (&qm->crypto_keys_quic_rw_lock);
-
qm->default_quic_cc = QUIC_CC_RENO;
vec_free (a->name);
@@ -2651,7 +2668,6 @@ quic_get_counter_value (u32 event_code)
u32 code, i;
u64 c, sum = 0;
- int index = 0;
vm = vlib_get_main ();
em = &vm->error_main;
@@ -2666,7 +2682,6 @@ quic_get_counter_value (u32 event_code)
if (i < vec_len (em->counters_last_clear))
c -= em->counters_last_clear[i];
sum += c;
- index++;
}
return sum;
}
@@ -2683,7 +2698,6 @@ quic_show_aggregated_stats (vlib_main_t * vm)
clib_memset (&agg_stats, 0, sizeof (agg_stats));
for (i = 0; i < num_workers + 1; i++)
{
- /* *INDENT-OFF* */
pool_foreach (ctx, qm->ctx_pool[i])
{
if (quic_ctx_is_conn (ctx) && ctx->conn)
@@ -2703,7 +2717,6 @@ quic_show_aggregated_stats (vlib_main_t * vm)
else if (quic_ctx_is_stream (ctx))
nstream++;
}
- /* *INDENT-ON* */
}
vlib_cli_output (vm, "-------- Connections --------");
vlib_cli_output (vm, "Current: %u", nconn);
@@ -2878,7 +2891,6 @@ quic_show_connections_command_fn (vlib_main_t * vm,
for (int i = 0; i < num_workers + 1; i++)
{
- /* *INDENT-OFF* */
pool_foreach (ctx, qm->ctx_pool[i])
{
if (quic_ctx_is_stream (ctx) && show_stream)
@@ -2888,7 +2900,6 @@ quic_show_connections_command_fn (vlib_main_t * vm,
else if (quic_ctx_is_conn (ctx) && show_conn)
vlib_cli_output (vm, "%U", quic_format_connection_ctx, ctx);
}
- /* *INDENT-ON* */
}
done:
@@ -2896,7 +2907,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (quic_plugin_crypto_command, static) = {
.path = "quic set crypto api",
.short_help = "quic set crypto api [picotls|vpp]",
@@ -2937,7 +2947,6 @@ VLIB_PLUGIN_REGISTER () =
.description = "Quic transport protocol",
.default_disabled = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
quic_config_fn (vlib_main_t * vm, unformat_input_t * input)
@@ -2957,7 +2966,7 @@ quic_config_fn (vlib_main_t * vm, unformat_input_t * input)
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "fifo-size %U", unformat_memory_size, &tmp))
+ if (unformat (line_input, "fifo-size %U", unformat_memory_size, &tmp))
{
if (tmp >= 0x100000000ULL)
{
@@ -2968,9 +2977,9 @@ quic_config_fn (vlib_main_t * vm, unformat_input_t * input)
}
qm->udp_fifo_size = tmp;
}
- else if (unformat (input, "conn-timeout %u", &i))
+ else if (unformat (line_input, "conn-timeout %u", &i))
qm->connection_timeout = i;
- else if (unformat (input, "fifo-prealloc %u", &i))
+ else if (unformat (line_input, "fifo-prealloc %u", &i))
qm->udp_fifo_prealloc = i;
else
{
@@ -2993,7 +3002,6 @@ quic_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (quic_input_node) =
{
.function = quic_node_fn,
@@ -3003,7 +3011,6 @@ VLIB_REGISTER_NODE (quic_input_node) =
.n_errors = ARRAY_LEN (quic_error_strings),
.error_strings = quic_error_strings,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/quic/quic.h b/src/plugins/quic/quic.h
index 901bdbc39b2..2c5a21c01a4 100644
--- a/src/plugins/quic/quic.h
+++ b/src/plugins/quic/quic.h
@@ -263,8 +263,7 @@ typedef struct quic_main_
u32 connection_timeout;
u8 vnet_crypto_enabled;
-
- clib_rwlock_t crypto_keys_quic_rw_lock;
+ u32 *per_thread_crypto_key_indices;
} quic_main_t;
#endif /* __included_quic_h__ */
diff --git a/src/plugins/quic/quic_crypto.c b/src/plugins/quic/quic_crypto.c
index 602b3f8570c..c5cc5a4a714 100644
--- a/src/plugins/quic/quic_crypto.c
+++ b/src/plugins/quic/quic_crypto.c
@@ -15,22 +15,31 @@
#include <quic/quic.h>
#include <quic/quic_crypto.h>
+#include <vnet/session/session.h>
#include <quicly.h>
#include <picotls/openssl.h>
+#include <pthread.h>
#define QUICLY_EPOCH_1RTT 3
extern quic_main_t quic_main;
-extern quic_ctx_t *quic_get_conn_ctx (quicly_conn_t * conn);
+extern quic_ctx_t *quic_get_conn_ctx (quicly_conn_t *conn);
vnet_crypto_main_t *cm = &crypto_main;
+typedef struct crypto_key_
+{
+ vnet_crypto_alg_t algo;
+ u8 key[32];
+ u16 key_len;
+} crypto_key_t;
+
struct cipher_context_t
{
ptls_cipher_context_t super;
vnet_crypto_op_t op;
vnet_crypto_op_id_t id;
- u32 key_index;
+ crypto_key_t key;
};
struct aead_crypto_context_t
@@ -39,7 +48,8 @@ struct aead_crypto_context_t
EVP_CIPHER_CTX *evp_ctx;
uint8_t static_iv[PTLS_MAX_IV_SIZE];
vnet_crypto_op_t op;
- u32 key_index;
+ crypto_key_t key;
+
vnet_crypto_op_id_t id;
uint8_t iv[PTLS_MAX_IV_SIZE];
};
@@ -114,6 +124,29 @@ Exit:
return ret;
}
+static u32
+quic_crypto_set_key (crypto_key_t *key)
+{
+ u8 thread_index = vlib_get_thread_index ();
+ u32 key_id = quic_main.per_thread_crypto_key_indices[thread_index];
+ vnet_crypto_key_t *vnet_key = vnet_crypto_get_key (key_id);
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_crypto_engine_t *engine;
+
+ vec_foreach (engine, cm->engines)
+ if (engine->key_op_handler)
+ engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_DEL, key_id);
+
+ vnet_key->alg = key->algo;
+ clib_memcpy (vnet_key->data, key->key, key->key_len);
+
+ vec_foreach (engine, cm->engines)
+ if (engine->key_op_handler)
+ engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_ADD, key_id);
+
+ return key_id;
+}
+
static size_t
quic_crypto_aead_decrypt (quic_ctx_t *qctx, ptls_aead_context_t *_ctx,
void *_output, const void *input, size_t inlen,
@@ -132,7 +165,7 @@ quic_crypto_aead_decrypt (quic_ctx_t *qctx, ptls_aead_context_t *_ctx,
decrypted_pn);
ctx->op.src = (u8 *) input;
ctx->op.dst = _output;
- ctx->op.key_index = ctx->key_index;
+ ctx->op.key_index = quic_crypto_set_key (&ctx->key);
ctx->op.len = inlen - ctx->super.algo->tag_size;
ctx->op.tag_len = ctx->super.algo->tag_size;
ctx->op.tag = ctx->op.src + ctx->op.len;
@@ -143,7 +176,7 @@ quic_crypto_aead_decrypt (quic_ctx_t *qctx, ptls_aead_context_t *_ctx,
}
void
-quic_crypto_decrypt_packet (quic_ctx_t * qctx, quic_rx_packet_ctx_t * pctx)
+quic_crypto_decrypt_packet (quic_ctx_t *qctx, quic_rx_packet_ctx_t *pctx)
{
ptls_cipher_context_t *header_protection = NULL;
ptls_aead_context_t *aead = NULL;
@@ -172,28 +205,26 @@ quic_crypto_decrypt_packet (quic_ctx_t * qctx, quic_rx_packet_ctx_t * pctx)
/* decipher the header protection, as well as obtaining pnbits, pnlen */
if (encrypted_len < header_protection->algo->iv_size + QUICLY_MAX_PN_SIZE)
return;
- ptls_cipher_init (header_protection,
- pctx->packet.octets.base + pctx->packet.encrypted_off +
- QUICLY_MAX_PN_SIZE);
+ ptls_cipher_init (header_protection, pctx->packet.octets.base +
+ pctx->packet.encrypted_off +
+ QUICLY_MAX_PN_SIZE);
ptls_cipher_encrypt (header_protection, hpmask, hpmask, sizeof (hpmask));
pctx->packet.octets.base[0] ^=
- hpmask[0] & (QUICLY_PACKET_IS_LONG_HEADER (pctx->packet.octets.base[0]) ?
- 0xf : 0x1f);
+ hpmask[0] &
+ (QUICLY_PACKET_IS_LONG_HEADER (pctx->packet.octets.base[0]) ? 0xf : 0x1f);
pnlen = (pctx->packet.octets.base[0] & 0x3) + 1;
for (i = 0; i != pnlen; ++i)
{
pctx->packet.octets.base[pctx->packet.encrypted_off + i] ^=
hpmask[i + 1];
- pnbits =
- (pnbits << 8) | pctx->packet.octets.base[pctx->packet.encrypted_off +
- i];
+ pnbits = (pnbits << 8) |
+ pctx->packet.octets.base[pctx->packet.encrypted_off + i];
}
size_t aead_off = pctx->packet.encrypted_off + pnlen;
- pn =
- quicly_determine_packet_number (pnbits, pnlen * 8,
- next_expected_packet_number);
+ pn = quicly_determine_packet_number (pnbits, pnlen * 8,
+ next_expected_packet_number);
int key_phase_bit =
(pctx->packet.octets.base[0] & QUICLY_KEY_PHASE_BIT) != 0;
@@ -203,7 +234,7 @@ quic_crypto_decrypt_packet (quic_ctx_t * qctx, quic_rx_packet_ctx_t * pctx)
pctx->packet.octets.base[0] ^=
hpmask[0] &
(QUICLY_PACKET_IS_LONG_HEADER (pctx->packet.octets.base[0]) ? 0xf :
- 0x1f);
+ 0x1f);
for (i = 0; i != pnlen; ++i)
{
pctx->packet.octets.base[pctx->packet.encrypted_off + i] ^=
@@ -218,8 +249,8 @@ quic_crypto_decrypt_packet (quic_ctx_t * qctx, quic_rx_packet_ctx_t * pctx)
pctx->packet.octets.len - aead_off, pn, pctx->packet.octets.base,
aead_off)) == SIZE_MAX)
{
- fprintf (stderr,
- "%s: aead decryption failure (pn: %d)\n", __FUNCTION__, pn);
+ fprintf (stderr, "%s: aead decryption failure (pn: %d)\n", __FUNCTION__,
+ pn);
return;
}
@@ -260,7 +291,7 @@ quic_crypto_encrypt_packet (struct st_quicly_crypto_engine_t *engine,
aead_ctx->op.iv = aead_ctx->iv;
ptls_aead__build_iv (aead_ctx->super.algo, aead_ctx->op.iv,
aead_ctx->static_iv, packet_number);
- aead_ctx->op.key_index = aead_ctx->key_index;
+ aead_ctx->op.key_index = quic_crypto_set_key (&aead_ctx->key);
aead_ctx->op.src = (u8 *) input;
aead_ctx->op.dst = output;
aead_ctx->op.len = inlen;
@@ -280,7 +311,8 @@ quic_crypto_encrypt_packet (struct st_quicly_crypto_engine_t *engine,
vnet_crypto_op_init (&hp_ctx->op, hp_ctx->id);
memset (supp.output, 0, sizeof (supp.output));
hp_ctx->op.iv = (u8 *) supp.input;
- hp_ctx->op.key_index = hp_ctx->key_index;
+ hp_ctx->op.key_index = quic_crypto_set_key (&hp_ctx->key);
+ ;
hp_ctx->op.src = (u8 *) supp.output;
hp_ctx->op.dst = (u8 *) supp.output;
hp_ctx->op.len = sizeof (supp.output);
@@ -301,7 +333,6 @@ quic_crypto_cipher_setup_crypto (ptls_cipher_context_t *_ctx, int is_enc,
{
struct cipher_context_t *ctx = (struct cipher_context_t *) _ctx;
- vlib_main_t *vm = vlib_get_main ();
vnet_crypto_alg_t algo;
if (!strcmp (ctx->super.algo->name, "AES128-CTR"))
{
@@ -326,24 +357,26 @@ quic_crypto_cipher_setup_crypto (ptls_cipher_context_t *_ctx, int is_enc,
if (quic_main.vnet_crypto_enabled)
{
- clib_rwlock_writer_lock (&quic_main.crypto_keys_quic_rw_lock);
- ctx->key_index =
- vnet_crypto_key_add (vm, algo, (u8 *) key, _ctx->algo->key_size);
- clib_rwlock_writer_unlock (&quic_main.crypto_keys_quic_rw_lock);
+ // ctx->key_index =
+ // quic_crypto_go_setup_key (algo, key, _ctx->algo->key_size);
+ ctx->key.algo = algo;
+ ctx->key.key_len = _ctx->algo->key_size;
+ assert (ctx->key.key_len <= 32);
+ clib_memcpy (&ctx->key.key, key, ctx->key.key_len);
}
return 0;
}
static int
-quic_crypto_aes128ctr_setup_crypto (ptls_cipher_context_t * ctx, int is_enc,
+quic_crypto_aes128ctr_setup_crypto (ptls_cipher_context_t *ctx, int is_enc,
const void *key)
{
return quic_crypto_cipher_setup_crypto (ctx, 1, key, EVP_aes_128_ctr ());
}
static int
-quic_crypto_aes256ctr_setup_crypto (ptls_cipher_context_t * ctx, int is_enc,
+quic_crypto_aes256ctr_setup_crypto (ptls_cipher_context_t *ctx, int is_enc,
const void *key)
{
return quic_crypto_cipher_setup_crypto (ctx, 1, key, EVP_aes_256_ctr ());
@@ -354,7 +387,6 @@ quic_crypto_aead_setup_crypto (ptls_aead_context_t *_ctx, int is_enc,
const void *key, const void *iv,
const EVP_CIPHER *cipher)
{
- vlib_main_t *vm = vlib_get_main ();
struct aead_crypto_context_t *ctx = (struct aead_crypto_context_t *) _ctx;
vnet_crypto_alg_t algo;
@@ -382,11 +414,12 @@ quic_crypto_aead_setup_crypto (ptls_aead_context_t *_ctx, int is_enc,
if (quic_main.vnet_crypto_enabled)
{
clib_memcpy (ctx->static_iv, iv, ctx->super.algo->iv_size);
-
- clib_rwlock_writer_lock (&quic_main.crypto_keys_quic_rw_lock);
- ctx->key_index = vnet_crypto_key_add (vm, algo,
- (u8 *) key, _ctx->algo->key_size);
- clib_rwlock_writer_unlock (&quic_main.crypto_keys_quic_rw_lock);
+ // ctx->key_index =
+ // quic_crypto_go_setup_key (algo, key, _ctx->algo->key_size);
+ ctx->key.algo = algo;
+ ctx->key.key_len = _ctx->algo->key_size;
+ assert (ctx->key.key_len <= 32);
+ clib_memcpy (&ctx->key.key, key, ctx->key.key_len);
}
return 0;
@@ -469,6 +502,7 @@ ptls_cipher_algorithm_t quic_crypto_aes256ctr = {
quic_crypto_aes256ctr_setup_crypto
};
+#define PTLS_X86_CACHE_LINE_ALIGN_BITS 6
ptls_aead_algorithm_t quic_crypto_aes128gcm = {
"AES128-GCM",
PTLS_AESGCM_CONFIDENTIALITY_LIMIT,
@@ -478,6 +512,9 @@ ptls_aead_algorithm_t quic_crypto_aes128gcm = {
PTLS_AES128_KEY_SIZE,
PTLS_AESGCM_IV_SIZE,
PTLS_AESGCM_TAG_SIZE,
+ { PTLS_TLS12_AESGCM_FIXED_IV_SIZE, PTLS_TLS12_AESGCM_RECORD_IV_SIZE },
+ 1,
+ PTLS_X86_CACHE_LINE_ALIGN_BITS,
sizeof (struct aead_crypto_context_t),
quic_crypto_aead_aes128gcm_setup_crypto
};
@@ -491,18 +528,21 @@ ptls_aead_algorithm_t quic_crypto_aes256gcm = {
PTLS_AES256_KEY_SIZE,
PTLS_AESGCM_IV_SIZE,
PTLS_AESGCM_TAG_SIZE,
+ { PTLS_TLS12_AESGCM_FIXED_IV_SIZE, PTLS_TLS12_AESGCM_RECORD_IV_SIZE },
+ 1,
+ PTLS_X86_CACHE_LINE_ALIGN_BITS,
sizeof (struct aead_crypto_context_t),
quic_crypto_aead_aes256gcm_setup_crypto
};
ptls_cipher_suite_t quic_crypto_aes128gcmsha256 = {
- PTLS_CIPHER_SUITE_AES_128_GCM_SHA256,
- &quic_crypto_aes128gcm, &ptls_openssl_sha256
+ PTLS_CIPHER_SUITE_AES_128_GCM_SHA256, &quic_crypto_aes128gcm,
+ &ptls_openssl_sha256
};
ptls_cipher_suite_t quic_crypto_aes256gcmsha384 = {
- PTLS_CIPHER_SUITE_AES_256_GCM_SHA384,
- &quic_crypto_aes256gcm, &ptls_openssl_sha384
+ PTLS_CIPHER_SUITE_AES_256_GCM_SHA384, &quic_crypto_aes256gcm,
+ &ptls_openssl_sha384
};
ptls_cipher_suite_t *quic_crypto_cipher_suites[] = {
diff --git a/src/plugins/quic/quic_crypto.h b/src/plugins/quic/quic_crypto.h
index 2adb20237a3..7299b613053 100644
--- a/src/plugins/quic/quic_crypto.h
+++ b/src/plugins/quic/quic_crypto.h
@@ -18,6 +18,19 @@
#include <quicly.h>
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+#include <openssl/provider.h>
+
+#define quic_load_openssl3_legacy_provider() \
+ do \
+ { \
+ (void) OSSL_PROVIDER_load (NULL, "legacy"); \
+ } \
+ while (0)
+#else
+#define quic_load_openssl3_legacy_provider()
+#endif
+
struct quic_ctx_t;
extern ptls_cipher_suite_t *quic_crypto_cipher_suites[];
diff --git a/src/plugins/rdma/CMakeLists.txt b/src/plugins/rdma/CMakeLists.txt
index f598ff8c701..ef8bc90c6dd 100644
--- a/src/plugins/rdma/CMakeLists.txt
+++ b/src/plugins/rdma/CMakeLists.txt
@@ -19,17 +19,16 @@ if (NOT IBVERBS_INCLUDE_DIR)
endif()
vpp_plugin_find_library(rdma IBVERBS_LIB libibverbs.a)
-vpp_plugin_find_library(rdma RDMA_UTIL_LIB librdma_util.a)
vpp_plugin_find_library(rdma MLX5_LIB libmlx5.a)
-if (NOT IBVERBS_LIB OR NOT RDMA_UTIL_LIB OR NOT MLX5_LIB)
+if (NOT IBVERBS_LIB OR NOT MLX5_LIB)
message(WARNING "rdma plugin - ibverbs not found - rdma plugin disabled")
return()
endif()
-string_append(RDMA_LINK_FLAGS "-Wl,--whole-archive,${MLX5_LIB},--no-whole-archive")
+string_append(RDMA_LINK_FLAGS "-Wl,--whole-archive,${MLX5_LIB},--no-whole-archive -Wl,--exclude-libs,ALL")
-set(CMAKE_REQUIRED_FLAGS "-fPIC -shared -pthread -Wno-unused-command-line-argument ${RDMA_LINK_FLAGS} ${IBVERBS_LIB} ${RDMA_UTIL_LIB}")
+set(CMAKE_REQUIRED_FLAGS "-fPIC -shared -pthread -Wno-unused-command-line-argument ${RDMA_LINK_FLAGS} ${IBVERBS_LIB}")
set(CMAKE_REQUIRED_INCLUDES "${IBVERBS_INCLUDE_DIR}")
set(CMAKE_REQUIRED_LIBRARIES "c") # force linkage by including libc explicitely
CHECK_C_SOURCE_COMPILES("
@@ -73,5 +72,4 @@ add_vpp_plugin(rdma
LINK_LIBRARIES
${IBVERBS_LIB}
- ${RDMA_UTIL_LIB}
)
diff --git a/src/plugins/rdma/api.c b/src/plugins/rdma/api.c
index 7fe77105596..3fb17ff6ee0 100644
--- a/src/plugins/rdma/api.c
+++ b/src/plugins/rdma/api.c
@@ -27,6 +27,7 @@
#include <rdma/rdma.api_enum.h>
#include <rdma/rdma.api_types.h>
+#define REPLY_MSG_ID_BASE (rm->msg_id_base)
#include <vlibapi/api_helper_macros.h>
static rdma_mode_t
@@ -41,6 +42,8 @@ rdma_api_mode (vl_api_rdma_mode_t mode)
case RDMA_API_MODE_DV:
return RDMA_MODE_DV;
}
+ /* Fail the debug build. Useful for investigating endian issues. */
+ ASSERT (0);
return RDMA_MODE_AUTO;
}
@@ -79,6 +82,35 @@ rdma_api_rss6 (const vl_api_rdma_rss6_t rss6)
}
static void
+vl_api_rdma_create_v4_t_handler (vl_api_rdma_create_v4_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ rdma_main_t *rm = &rdma_main;
+ vl_api_rdma_create_v4_reply_t *rmp;
+ rdma_create_if_args_t args;
+ int rv;
+
+ clib_memset (&args, 0, sizeof (rdma_create_if_args_t));
+
+ args.ifname = mp->host_if;
+ args.name = mp->name;
+ args.rxq_num = mp->rxq_num;
+ args.rxq_size = mp->rxq_size;
+ args.txq_size = mp->txq_size;
+ args.mode = rdma_api_mode (mp->mode);
+ args.disable_striding_rq = 0;
+ args.no_multi_seg = mp->no_multi_seg;
+ args.max_pktlen = mp->max_pktlen;
+ args.rss4 = rdma_api_rss4 (mp->rss4);
+ args.rss6 = rdma_api_rss6 (mp->rss6);
+ rdma_create_if (vm, &args);
+ rv = args.rv;
+
+ REPLY_MACRO2_END (VL_API_RDMA_CREATE_V4_REPLY,
+ ({ rmp->sw_if_index = args.sw_if_index; }));
+}
+
+static void
vl_api_rdma_create_v3_t_handler (vl_api_rdma_create_v3_t *mp)
{
vlib_main_t *vm = vlib_get_main ();
@@ -103,7 +135,7 @@ vl_api_rdma_create_v3_t_handler (vl_api_rdma_create_v3_t *mp)
rdma_create_if (vm, &args);
rv = args.rv;
- REPLY_MACRO2 (VL_API_RDMA_CREATE_V3_REPLY + rm->msg_id_base,
+ REPLY_MACRO2 (VL_API_RDMA_CREATE_V3_REPLY,
({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
}
@@ -130,12 +162,8 @@ vl_api_rdma_create_v2_t_handler (vl_api_rdma_create_v2_t * mp)
rdma_create_if (vm, &args);
rv = args.rv;
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_RDMA_CREATE_V2_REPLY + rm->msg_id_base,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_RDMA_CREATE_V2_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
}
static void
@@ -162,12 +190,8 @@ vl_api_rdma_create_t_handler (vl_api_rdma_create_t * mp)
rdma_create_if (vm, &args);
rv = args.rv;
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_RDMA_CREATE_REPLY + rm->msg_id_base,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_RDMA_CREATE_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
}
static void
@@ -195,7 +219,7 @@ vl_api_rdma_delete_t_handler (vl_api_rdma_delete_t * mp)
rdma_delete_if (vm, rd);
reply:
- REPLY_MACRO (VL_API_RDMA_DELETE_REPLY + rm->msg_id_base);
+ REPLY_MACRO (VL_API_RDMA_DELETE_REPLY);
}
/* set tup the API message handling tables */
diff --git a/src/plugins/rdma/cli.c b/src/plugins/rdma/cli.c
index 8f191e34b63..bcedd625220 100644
--- a/src/plugins/rdma/cli.c
+++ b/src/plugins/rdma/cli.c
@@ -44,17 +44,15 @@ rdma_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (rdma_create_command, static) = {
.path = "create interface rdma",
.short_help = "create interface rdma <host-if ifname> [name <name>]"
- " [rx-queue-size <size>] [tx-queue-size <size>]"
- " [num-rx-queues <size>] [mode <auto|ibv|dv]"
- " [no-multi-seg] [no-striding]"
- " [max-pktlen <size>]",
+ " [rx-queue-size <size>] [tx-queue-size <size>]"
+ " [num-rx-queues <size>] [mode <auto|ibv|dv>]"
+ " [no-multi-seg] [no-striding]"
+ " [max-pktlen <size>]",
.function = rdma_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
rdma_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -99,14 +97,12 @@ rdma_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (rdma_delete_command, static) = {
.path = "delete interface rdma",
.short_help = "delete interface rdma "
"{<interface> | sw_if_index <sw_idx>}",
.function = rdma_delete_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_rdma_dump_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -162,13 +158,11 @@ test_rdma_dump_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_rdma_mlx5dv_dump_command, static) = {
.path = "test rdma dump",
.short_help = "test rdma dump {<interface> | sw_if_index <sw_idx>}",
.function = test_rdma_dump_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
rdma_cli_init (vlib_main_t * vm)
diff --git a/src/plugins/rdma/device.c b/src/plugins/rdma/device.c
index 1198d99b14e..8aeb586a42d 100644
--- a/src/plugins/rdma/device.c
+++ b/src/plugins/rdma/device.c
@@ -183,11 +183,11 @@ rdma_mac_change (vnet_hw_interface_t * hw, const u8 * old, const u8 * new)
return 0;
}
-static u32
-rdma_dev_change_mtu (rdma_device_t * rd)
+static clib_error_t *
+rdma_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+ u32 frame_size)
{
- rdma_log__ (VLIB_LOG_LEVEL_ERR, rd, "MTU change not supported");
- return ~0;
+ return vnet_error (VNET_ERR_UNSUPPORTED, 0);
}
static u32
@@ -202,8 +202,6 @@ rdma_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags)
return rdma_dev_set_ucast (rd);
case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
return rdma_dev_set_promisc (rd);
- case ETHERNET_INTERFACE_FLAG_MTU:
- return rdma_dev_change_mtu (rd);
}
rdma_log__ (VLIB_LOG_LEVEL_ERR, rd, "unknown flag %x requested", flags);
@@ -355,18 +353,20 @@ rdma_async_event_cleanup (rdma_device_t * rd)
static clib_error_t *
rdma_register_interface (vnet_main_t * vnm, rdma_device_t * rd)
{
- clib_error_t *err =
- ethernet_register_interface (vnm, rdma_device_class.index,
- rd->dev_instance, rd->hwaddr.bytes,
- &rd->hw_if_index, rdma_flag_change);
-
+ vnet_eth_interface_registration_t eir = {};
+
+ eir.dev_class_index = rdma_device_class.index;
+ eir.dev_instance = rd->dev_instance;
+ eir.address = rd->hwaddr.bytes;
+ eir.cb.flag_change = rdma_flag_change;
+ eir.cb.set_max_frame_size = rdma_set_max_frame_size;
+ rd->hw_if_index = vnet_eth_register_interface (vnm, &eir);
/* Indicate ability to support L3 DMAC filtering and
* initialize interface to L3 non-promisc mode */
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rd->hw_if_index);
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER;
+ vnet_hw_if_set_caps (vnm, rd->hw_if_index, VNET_HW_IF_CAP_MAC_FILTER);
ethernet_set_flags (vnm, rd->hw_if_index,
ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
- return err;
+ return 0;
}
static void
@@ -445,9 +445,10 @@ rdma_rxq_init (vlib_main_t * vm, rdma_device_t * rd, u16 qid, u32 n_desc,
if (is_mlx5dv)
{
struct mlx5dv_cq_init_attr dvcq = { };
- dvcq.comp_mask = MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
+ dvcq.comp_mask = MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE |
+ MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE;
dvcq.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
-
+ dvcq.cqe_size = 64;
if ((cqex = mlx5dv_create_cq (rd->ctx, &cqa, &dvcq)) == 0)
return clib_error_return_unix (0, "Create mlx5dv rx CQ Failed");
}
@@ -717,15 +718,30 @@ rdma_txq_init (vlib_main_t * vm, rdma_device_t * rd, u16 qid, u32 n_desc)
struct ibv_qp_init_attr qpia;
struct ibv_qp_attr qpa;
int qp_flags;
+ int is_mlx5dv = !!(rd->flags & RDMA_DEVICE_F_MLX5DV);
vec_validate_aligned (rd->txqs, qid, CLIB_CACHE_LINE_BYTES);
txq = vec_elt_at_index (rd->txqs, qid);
ASSERT (is_pow2 (n_desc));
txq->bufs_log2sz = min_log2 (n_desc);
vec_validate_aligned (txq->bufs, n_desc - 1, CLIB_CACHE_LINE_BYTES);
-
- if ((txq->cq = ibv_create_cq (rd->ctx, n_desc, NULL, NULL, 0)) == 0)
- return clib_error_return_unix (0, "Create CQ Failed");
+ if (is_mlx5dv)
+ {
+ struct ibv_cq_init_attr_ex cqa = {};
+ struct ibv_cq_ex *cqex;
+ struct mlx5dv_cq_init_attr dvcq = {};
+ dvcq.comp_mask = MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE;
+ dvcq.cqe_size = 64;
+ cqa.cqe = n_desc;
+ if ((cqex = mlx5dv_create_cq (rd->ctx, &cqa, &dvcq)) == 0)
+ return clib_error_return_unix (0, "Create mlx5dv tx CQ Failed");
+ txq->cq = ibv_cq_ex_to_cq (cqex);
+ }
+ else
+ {
+ if ((txq->cq = ibv_create_cq (rd->ctx, n_desc, NULL, NULL, 0)) == 0)
+ return clib_error_return_unix (0, "Create CQ Failed");
+ }
memset (&qpia, 0, sizeof (qpia));
qpia.send_cq = txq->cq;
@@ -866,7 +882,7 @@ sysfs_path_to_pci_addr (char *path, vlib_pci_addr_t * addr)
unformat_input_t in;
u8 *s;
- s = clib_sysfs_link_to_name (path);
+ s = clib_file_get_resolved_basename (path);
if (!s)
return 0;
@@ -1022,7 +1038,7 @@ are explicitly disabled, and if the interface supports it.*/
/*
* FIXME: add support for interrupt mode
* vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, rd->hw_if_index);
- * hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ * hw->caps |= VNET_HW_IF_CAP_INT_MODE;
*/
vnet_hw_if_set_input_node (vnm, rd->hw_if_index, rdma_input_node.index);
@@ -1136,15 +1152,4 @@ rdma_init (vlib_main_t * vm)
return 0;
}
-VLIB_INIT_FUNCTION (rdma_init) =
-{
- .runs_after = VLIB_INITS ("pci_bus_init"),
-};
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+VLIB_INIT_FUNCTION (rdma_init);
diff --git a/src/plugins/rdma/format.c b/src/plugins/rdma/format.c
index aada52a1ec3..a999460bd55 100644
--- a/src/plugins/rdma/format.c
+++ b/src/plugins/rdma/format.c
@@ -58,13 +58,13 @@ format_rdma_bit_flag (u8 * s, va_list * args)
while (flags)
{
- if ((flags & (1 << i)))
+ if ((flags & ((u64) 1 << i)))
{
if (i < n_strs && strs[i] != 0)
s = format (s, " %s", strs[i]);
else
s = format (s, " unknown(%u)", i);
- flags ^= 1 << i;
+ flags ^= (u64) 1 << i;
}
i++;
}
@@ -122,8 +122,8 @@ format_rdma_device (u8 * s, va_list * args)
format_vlib_pci_addr, &rd->pci->addr);
if ((d = vlib_pci_get_device_info (vm, &rd->pci->addr, 0)))
{
- s = format (s, "%Uproduct name: %s\n", format_white_space, indent,
- d->product_name ? (char *) d->product_name : "");
+ s = format (s, "%Uproduct name: %v\n", format_white_space, indent,
+ d->product_name);
s = format (s, "%Upart number: %U\n", format_white_space, indent,
format_vlib_pci_vpd, d->vpd_r, "PN");
s = format (s, "%Urevision: %U\n", format_white_space, indent,
@@ -281,7 +281,7 @@ format_rdma_rxq (u8 * s, va_list * args)
if (rd->flags & RDMA_DEVICE_F_MLX5DV)
{
- u32 next_cqe_index = rxq->cq_ci & (rxq->size - 1);
+ u32 next_cqe_index = rxq->cq_ci & ((1 << rxq->log2_cq_size) - 1);
s = format (s, "\n%Uwq: stride %u wqe-cnt %u",
format_white_space, indent + 2, rxq->wq_stride,
rxq->wqe_cnt);
@@ -292,9 +292,8 @@ format_rdma_rxq (u8 * s, va_list * args)
next_cqe_index);
s = format (s, "\n%U%U", format_white_space, indent + 6,
format_mlx5_cqe_rx, rxq->cqes + next_cqe_index);
- s = format (s, "\n%U%U", format_white_space, indent + 6,
- format_hexdump, rxq->cqes + next_cqe_index,
- sizeof (mlx5dv_cqe_t));
+ s = format (s, "\n%U%U", format_white_space, indent + 6, format_hexdump,
+ rxq->cqes + next_cqe_index, (u32) sizeof (mlx5dv_cqe_t));
}
return s;
diff --git a/src/plugins/rdma/input.c b/src/plugins/rdma/input.c
index f1c508affa2..a7d41a1684d 100644
--- a/src/plugins/rdma/input.c
+++ b/src/plugins/rdma/input.c
@@ -228,7 +228,6 @@ rdma_device_input_refill (vlib_main_t * vm, rdma_device_t * rd,
about what RDMA core does (CYCLIC_RQ or LINKED_LIST_RQ). In cyclic
mode, the SRQ header is ignored anyways... */
-/* *INDENT-OFF* */
if (is_striding && !(current_data_seg & (wqe_sz - 1)))
*(mlx5dv_wqe_srq_next_t *) wqe = (mlx5dv_wqe_srq_next_t)
{
@@ -237,7 +236,6 @@ rdma_device_input_refill (vlib_main_t * vm, rdma_device_t * rd,
.signature = 0,
.rsvd1 = {0}
};
-/* *INDENT-ON* */
/* TODO: when log_skip_wqe > 2, hw_prefetcher doesn't work, lots of LLC store
misses occur for wqes, to be fixed... */
@@ -609,6 +607,7 @@ rdma_device_poll_cq_mlx5dv (rdma_device_t * rd, rdma_rxq_t * rxq,
n_rx_packets++;
cq_ci++;
byte_cnt++;
+ cqe_flags++;
continue;
}
@@ -670,46 +669,77 @@ rdma_device_mlx5dv_l3_validate_and_swap_bc (rdma_per_thread_data_t
* ptd, int n_rx_packets, u32 * bc)
{
u16 mask = CQE_FLAG_L3_HDR_TYPE_MASK | CQE_FLAG_L3_OK;
- u16 match = CQE_FLAG_L3_HDR_TYPE_IP4 << CQE_FLAG_L3_HDR_TYPE_SHIFT;
+ u16 match =
+ CQE_FLAG_L3_HDR_TYPE_IP4 << CQE_FLAG_L3_HDR_TYPE_SHIFT | CQE_FLAG_L3_OK;
+
+ /* convert mask/match to big endian for subsequant comparison */
+ mask = clib_host_to_net_u16 (mask);
+ match = clib_host_to_net_u16 (match);
/* verify that all ip4 packets have l3_ok flag set and convert packet
length from network to host byte order */
int skip_ip4_cksum = 1;
+ int n_left = n_rx_packets;
+ u16 *cqe_flags = ptd->cqe_flags;
#if defined CLIB_HAVE_VEC256
- u16x16 mask16 = u16x16_splat (mask);
- u16x16 match16 = u16x16_splat (match);
- u16x16 r = { };
+ if (n_left >= 16)
+ {
+ u16x16 mask16 = u16x16_splat (mask);
+ u16x16 match16 = u16x16_splat (match);
+ u16x16 r16 = {};
+
+ while (n_left >= 16)
+ {
+ r16 |= (*(u16x16 *) cqe_flags & mask16) != match16;
- for (int i = 0; i * 16 < n_rx_packets; i++)
- r |= (ptd->cqe_flags16[i] & mask16) != match16;
+ *(u32x8 *) bc = u32x8_byte_swap (*(u32x8 *) bc);
+ *(u32x8 *) (bc + 8) = u32x8_byte_swap (*(u32x8 *) (bc + 8));
- if (!u16x16_is_all_zero (r))
- skip_ip4_cksum = 0;
+ cqe_flags += 16;
+ bc += 16;
+ n_left -= 16;
+ }
- for (int i = 0; i < n_rx_packets; i += 8)
- *(u32x8 *) (bc + i) = u32x8_byte_swap (*(u32x8 *) (bc + i));
+ if (!u16x16_is_all_zero (r16))
+ skip_ip4_cksum = 0;
+ }
#elif defined CLIB_HAVE_VEC128
- u16x8 mask8 = u16x8_splat (mask);
- u16x8 match8 = u16x8_splat (match);
- u16x8 r = { };
+ if (n_left >= 8)
+ {
+ u16x8 mask8 = u16x8_splat (mask);
+ u16x8 match8 = u16x8_splat (match);
+ u16x8 r8 = {};
- for (int i = 0; i * 8 < n_rx_packets; i++)
- r |= (ptd->cqe_flags8[i] & mask8) != match8;
+ while (n_left >= 8)
+ {
+ r8 |= (*(u16x8 *) cqe_flags & mask8) != match8;
- if (!u16x8_is_all_zero (r))
- skip_ip4_cksum = 0;
+ *(u32x4 *) bc = u32x4_byte_swap (*(u32x4 *) bc);
+ *(u32x4 *) (bc + 4) = u32x4_byte_swap (*(u32x4 *) (bc + 4));
- for (int i = 0; i < n_rx_packets; i += 4)
- *(u32x4 *) (bc + i) = u32x4_byte_swap (*(u32x4 *) (bc + i));
-#else
- for (int i = 0; i < n_rx_packets; i++)
- if ((ptd->cqe_flags[i] & mask) != match)
- skip_ip4_cksum = 0;
+ cqe_flags += 8;
+ bc += 8;
+ n_left -= 8;
+ }
- for (int i = 0; i < n_rx_packets; i++)
- bc[i] = clib_net_to_host_u32 (bc[i]);
+ if (!u16x8_is_all_zero (r8))
+ skip_ip4_cksum = 0;
+ }
#endif
+
+ while (n_left >= 1)
+ {
+ if ((cqe_flags[0] & mask) != match)
+ skip_ip4_cksum = 0;
+
+ bc[0] = clib_net_to_host_u32 (bc[0]);
+
+ cqe_flags += 1;
+ bc += 1;
+ n_left -= 1;
+ }
+
return skip_ip4_cksum;
}
@@ -945,7 +975,7 @@ rdma_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* update buffer template for input feature arcs if any */
next_index = rd->per_interface_next_index;
if (PREDICT_FALSE (vnet_device_input_have_features (rd->sw_if_index)))
- vnet_feature_start_device_input_x1 (rd->sw_if_index, &next_index, &bt);
+ vnet_feature_start_device_input (rd->sw_if_index, &next_index, &bt);
vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
@@ -1028,7 +1058,7 @@ VLIB_NODE_FN (rdma_input_node) (vlib_main_t * vm,
if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_ADMIN_UP) == 0)
continue;
- if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_ERROR))
+ if (PREDICT_FALSE (rd->flags & RDMA_DEVICE_F_ERROR))
continue;
if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_MLX5DV))
@@ -1041,7 +1071,6 @@ VLIB_NODE_FN (rdma_input_node) (vlib_main_t * vm,
return n_rx;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (rdma_input_node) = {
.name = "rdma-input",
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
@@ -1053,7 +1082,6 @@ VLIB_REGISTER_NODE (rdma_input_node) = {
.error_strings = rdma_input_error_strings,
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/rdma/output.c b/src/plugins/rdma/output.c
index 3cc3ab74437..8574ac32e61 100644
--- a/src/plugins/rdma/output.c
+++ b/src/plugins/rdma/output.c
@@ -480,20 +480,20 @@ rdma_device_output_tx_ibverb (vlib_main_t * vm,
* common tx/free functions
*/
-static_always_inline void
-rdma_device_output_free (vlib_main_t * vm, const vlib_node_runtime_t * node,
- rdma_txq_t * txq, int is_mlx5dv)
+static void
+rdma_device_output_free (vlib_main_t *vm, const vlib_node_runtime_t *node,
+ const rdma_device_t *rd, rdma_txq_t *txq)
{
- if (is_mlx5dv)
+ if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_MLX5DV))
rdma_device_output_free_mlx5 (vm, node, txq);
else
rdma_device_output_free_ibverb (vm, node, txq);
}
-static_always_inline u32
-rdma_device_output_tx_try (vlib_main_t * vm, const vlib_node_runtime_t * node,
- const rdma_device_t * rd, rdma_txq_t * txq,
- u32 n_left_from, u32 * bi, int is_mlx5dv)
+static u32
+rdma_device_output_tx_try (vlib_main_t *vm, const vlib_node_runtime_t *node,
+ const rdma_device_t *rd, rdma_txq_t *txq,
+ u32 n_left_from, u32 *bi)
{
vlib_buffer_t *b[VLIB_FRAME_SIZE];
const u32 mask = pow2_mask (txq->bufs_log2sz);
@@ -511,30 +511,28 @@ rdma_device_output_tx_try (vlib_main_t * vm, const vlib_node_runtime_t * node,
vlib_get_buffers (vm, bi, b, n_left_from);
- n_left_from = is_mlx5dv ?
- rdma_device_output_tx_mlx5 (vm, node, rd, txq, n_left_from, bi,
- b) : rdma_device_output_tx_ibverb (vm, node,
- rd, txq,
- n_left_from,
- bi, b);
+ if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_MLX5DV))
+ n_left_from =
+ rdma_device_output_tx_mlx5 (vm, node, rd, txq, n_left_from, bi, b);
+ else
+ n_left_from =
+ rdma_device_output_tx_ibverb (vm, node, rd, txq, n_left_from, bi, b);
return n_left_from;
}
-static_always_inline uword
+static uword
rdma_device_output_tx (vlib_main_t *vm, vlib_node_runtime_t *node,
rdma_device_t *rd, rdma_txq_t *txq, u32 *from,
- u32 n_left_from, int is_mlx5dv)
+ u32 n_left_from)
{
int i;
for (i = 0; i < RDMA_TX_RETRIES && n_left_from > 0; i++)
{
u32 n_enq;
- rdma_device_output_free (vm, node, txq, is_mlx5dv);
- n_enq = rdma_device_output_tx_try (vm, node, rd, txq, n_left_from, from,
- is_mlx5dv);
-
+ rdma_device_output_free (vm, node, rd, txq);
+ n_enq = rdma_device_output_tx_try (vm, node, rd, txq, n_left_from, from);
n_left_from -= n_enq;
from += n_enq;
}
@@ -560,12 +558,7 @@ VNET_DEVICE_CLASS_TX_FN (rdma_device_class) (vlib_main_t * vm,
clib_spinlock_lock_if_init (&txq->lock);
- if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_MLX5DV))
- n_left = rdma_device_output_tx (vm, node, rd, txq, from, n_buffers,
- 1 /* is_mlx5dv */);
- else
- n_left = rdma_device_output_tx (vm, node, rd, txq, from, n_buffers,
- 0 /* is_mlx5dv */);
+ n_left = rdma_device_output_tx (vm, node, rd, txq, from, n_buffers);
clib_spinlock_unlock_if_init (&txq->lock);
diff --git a/src/plugins/rdma/plugin.c b/src/plugins/rdma/plugin.c
index b0dddee42b6..0d2cccc96f8 100644
--- a/src/plugins/rdma/plugin.c
+++ b/src/plugins/rdma/plugin.c
@@ -19,12 +19,10 @@
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "RDMA IBverbs Device Driver",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/rdma/rdma.api b/src/plugins/rdma/rdma.api
index f2c70c7e514..4c06d8c6658 100644
--- a/src/plugins/rdma/rdma.api
+++ b/src/plugins/rdma/rdma.api
@@ -98,6 +98,8 @@ enum rdma_rss6
};
/** \brief
+ Same as v4, just not an autoendian (expect buggy handling of flag values).
+
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param host_if - Linux netdev interface name
@@ -114,6 +116,9 @@ enum rdma_rss6
define rdma_create_v3
{
+ option deprecated;
+ option replaced_by="rdma_create_v4";
+
u32 client_index;
u32 context;
@@ -130,6 +135,38 @@ define rdma_create_v3
option vat_help = "<host-if ifname> [name <name>] [rx-queue-size <size>] [tx-queue-size <size>] [num-rx-queues <size>] [mode <auto|ibv|dv>] [no-multi-seg] [max-pktlen <size>] [rss <ipv4|ipv4-udp|ipv4-tcp>] [rss <ipv6|ipv6-udp|ipv6-tcp>]";
};
+/** \brief
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param host_if - Linux netdev interface name
+ @param name - new rdma interface name
+ @param rxq_num - number of receive queues (optional)
+ @param rxq_size - receive queue size (optional)
+ @param txq_size - transmit queue size (optional)
+ @param mode - operation mode (optional)
+ @param no_multi_seg (optional) - disable chained buffer RX
+ @param max_pktlen (optional) - maximal RX packet size.
+ @param rss4 (optional) - IPv4 RSS
+ @param rss6 (optional) - IPv6 RSS
+*/
+
+autoendian define rdma_create_v4
+{
+ u32 client_index;
+ u32 context;
+
+ string host_if[64];
+ string name[64];
+ u16 rxq_num [default=1];
+ u16 rxq_size [default=1024];
+ u16 txq_size [default=1024];
+ vl_api_rdma_mode_t mode [default=0];
+ bool no_multi_seg [default=0];
+ u16 max_pktlen [default=0];
+ vl_api_rdma_rss4_t rss4 [default=0];
+ vl_api_rdma_rss6_t rss6 [default=0];
+ option vat_help = "<host-if ifname> [name <name>] [rx-queue-size <size>] [tx-queue-size <size>] [num-rx-queues <size>] [mode <auto|ibv|dv>] [no-multi-seg] [max-pktlen <size>] [rss <ipv4|ipv4-udp|ipv4-tcp>] [rss <ipv6|ipv6-udp|ipv6-tcp>]";
+};
/** \brief
@param context - sender context, to match reply w/ request
@@ -139,6 +176,8 @@ define rdma_create_v3
define rdma_create_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
@@ -152,6 +191,8 @@ define rdma_create_reply
define rdma_create_v2_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
@@ -176,6 +217,19 @@ define rdma_create_v3_reply
@param sw_if_index - interface index
*/
+autoendian define rdma_create_v4_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index
+*/
+
autoreply define rdma_delete
{
u32 client_index;
diff --git a/src/plugins/rdma/rdma_doc.md b/src/plugins/rdma/rdma_doc.md
deleted file mode 100644
index 3fed5b6fc49..00000000000
--- a/src/plugins/rdma/rdma_doc.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# RDMA (ibverb) Ethernet driver {#rdma_doc}
-
-This driver relies on Linux rdma-core (libibverb) userspace poll-mode driver
-to rx/tx Ethernet packets. Despite using the RDMA APIs, this is **not** about
-RDMA (no Infiniband, no RoCE, no iWARP), just pure traditional Ethernet
-packets.
-
-## Maturity level
-Under development: it should work, but has not been thoroughly tested.
-
-## Supported Hardware
- - Mellanox ConnectX-4
- - Mellanox ConnectX-5
-
-## Features
- - bifurcation: MAC based flow steering for transparent sharing of a single
-physical port between multiple virtual interfaces including Linux netdev
- - multiqueue
-
-## Security considerations
-When creating a rdma interface, it will receive all packets to the MAC address
-attributed to the interface plus a copy of all broadcast and multicast
-traffic.
-The MAC address is under the control of VPP: **the user controlling VPP can
-divert all traffic of any MAC address to the VPP process, including the Linux
-netdev MAC address as long as it can create a rdma interface**.
-The rights to create a rdma interface are controlled by the access rights of
-the `/dev/infiniband/uverbs[0-9]+`device nodes.
-
-## Quickstart
-1. Make sure the `ib_uverbs` module is loaded:
-```
-~# modprobe ib_uverbs
-```
-2. In VPP, create a new rdma virtual interface tied to the Linux netdev of the
-physical port you want to use (`enp94s0f0` in this example):
-```
-vpp# create int rdma host-if enp94s0f0 name rdma-0
-```
-3. Use the interface as usual, eg.:
-```
-vpp# set int ip addr rdma-0 1.1.1.1/24
-vpp# set int st rdma-0 up
-vpp# ping 1.1.1.100`
-```
-
-## Containers support
-It should work in containers as long as:
- - the `ib_uverbs` module is loaded
- - the device nodes `/dev/infiniband/uverbs[0-9]+` are usable from the
- container (but see [security considerations](#Security considerations))
-
-## SR-IOV VFs support
-It should work on SR-IOV VFs the same way it does with PFs. Because of VFs
-security containment features, make sure the MAC address of the rdma VPP
-interface matches the MAC address assigned to the underlying VF.
-For example:
-```
-host# echo 1 > /sys/class/infiniband/mlx5_0/device/sriov_numvfs
-host# ip l set dev enp94s0f0 vf 0 mac 92:5d:f5:df:b1:6f spoof on trust off
-host# ip l set dev enp94s0f2 up
-vpp# create int rdma host-if enp94s0f2 name rdma-0
-vpp# set int mac address rdma-0 92:5d:f5:df:b1:6f
-```
-If you plan to use L2 features such as switching, make sure the underlying
-VF is configured in trusted mode and spoof-checking is disabled (of course, be
-aware of the [security considerations](#Security considerations)):
-```
-host# ip l set dev enp94s0f0 vf 0 spoof off trust on
-```
-
-## Direct Verb mode
-Direct Verb allows the driver to access the NIC HW RX/TX rings directly
-instead of having to go through libibverb and suffering associated overhead.
-It will be automatically selected if the adapter supports it.
diff --git a/src/plugins/rdma/rdma_doc.rst b/src/plugins/rdma/rdma_doc.rst
new file mode 100644
index 00000000000..c22ea550a75
--- /dev/null
+++ b/src/plugins/rdma/rdma_doc.rst
@@ -0,0 +1,102 @@
+RDMA (ibverb) device driver
+===========================
+
+This driver relies on Linux rdma-core (libibverb) userspace poll-mode
+driver to rx/tx Ethernet packets. Despite using the RDMA APIs, this is
+**not** about RDMA (no Infiniband, no RoCE, no iWARP), just pure
+traditional Ethernet packets.
+
+Maturity level
+--------------
+
+Under development: it should work, but has not been thoroughly tested.
+
+Supported Hardware
+------------------
+
+- Mellanox ConnectX-4
+- Mellanox ConnectX-5
+
+Features
+--------
+
+- bifurcation: MAC based flow steering for transparent sharing of a
+ single physical port between multiple virtual interfaces including
+ Linux netdev
+- multiqueue
+
+Security considerations
+-----------------------
+
+When creating a rdma interface, it will receive all packets to the MAC
+address attributed to the interface plus a copy of all broadcast and
+multicast traffic. The MAC address is under the control of VPP: **the
+user controlling VPP can divert all traffic of any MAC address to the
+VPP process, including the Linux netdev MAC address as long as it can
+create a rdma interface**. The rights to create a rdma interface are
+controlled by the access rights of the
+``/dev/infiniband/uverbs[0-9]+``\ device nodes.
+
+Quickstart
+----------
+
+1. Make sure the ``ib_uverbs`` module is loaded:
+
+::
+
+ ~# modprobe ib_uverbs
+
+2. In VPP, create a new rdma virtual interface tied to the Linux netdev
+ of the physical port you want to use (``enp94s0f0`` in this example):
+
+::
+
+ vpp# create int rdma host-if enp94s0f0 name rdma-0
+
+3. Use the interface as usual, e.g.:
+
+::
+
+ vpp# set int ip addr rdma-0 1.1.1.1/24
+ vpp# set int st rdma-0 up
+ vpp# ping 1.1.1.100`
+
+Containers support
+------------------
+
+It should work in containers as long as: - the ``ib_uverbs`` module is
+loaded - the device nodes ``/dev/infiniband/uverbs[0-9]+`` are usable
+from the container (but see `security
+considerations <#Security%20considerations>`__)
+
+SR-IOV VFs support
+------------------
+
+It should work on SR-IOV VFs the same way it does with PFs. Because of
+VFs security containment features, make sure the MAC address of the rdma
+VPP interface matches the MAC address assigned to the underlying VF. For
+example:
+
+::
+
+ host# echo 1 > /sys/class/infiniband/mlx5_0/device/sriov_numvfs
+ host# ip l set dev enp94s0f0 vf 0 mac 92:5d:f5:df:b1:6f spoof on trust off
+ host# ip l set dev enp94s0f2 up
+ vpp# create int rdma host-if enp94s0f2 name rdma-0
+ vpp# set int mac address rdma-0 92:5d:f5:df:b1:6f
+
+If you plan to use L2 features such as switching, make sure the
+underlying VF is configured in trusted mode and spoof-checking is
+disabled (of course, be aware of the `security
+considerations <#Security%20considerations>`__):
+
+::
+
+ host# ip l set dev enp94s0f0 vf 0 spoof off trust on
+
+Direct Verb mode
+----------------
+
+Direct Verb allows the driver to access the NIC HW RX/TX rings directly
+instead of having to go through libibverb and suffering associated
+overhead. It will be automatically selected if the adapter supports it.
diff --git a/src/plugins/rdma/rdma_mlx5dv.h b/src/plugins/rdma/rdma_mlx5dv.h
index efcefe7fbf7..bf01a3a37d6 100644
--- a/src/plugins/rdma/rdma_mlx5dv.h
+++ b/src/plugins/rdma/rdma_mlx5dv.h
@@ -24,16 +24,16 @@
#include <vppinfra/types.h>
#include <vppinfra/error.h>
/* CQE flags - bits 16-31 of qword at offset 0x1c */
-#define CQE_FLAG_L4_OK 10
-#define CQE_FLAG_L3_OK 9
-#define CQE_FLAG_L2_OK 8
-#define CQE_FLAG_IP_FRAG 7
+#define CQE_FLAG_L4_OK (1 << 10)
+#define CQE_FLAG_L3_OK (1 << 9)
+#define CQE_FLAG_L2_OK (1 << 8)
+#define CQE_FLAG_IP_FRAG (1 << 7)
#define CQE_FLAG_L4_HDR_TYPE(f) (((f) >> 4) & 7)
#define CQE_FLAG_L3_HDR_TYPE_SHIFT (2)
#define CQE_FLAG_L3_HDR_TYPE_MASK (3 << CQE_FLAG_L3_HDR_TYPE_SHIFT)
#define CQE_FLAG_L3_HDR_TYPE(f) (((f) & CQE_FLAG_L3_HDR_TYPE_MASK) >> CQE_FLAG_L3_HDR_TYPE_SHIFT)
-#define CQE_FLAG_L3_HDR_TYPE_IP4 1
-#define CQE_FLAG_L3_HDR_TYPE_IP6 2
+#define CQE_FLAG_L3_HDR_TYPE_IP4 2
+#define CQE_FLAG_L3_HDR_TYPE_IP6 1
#define CQE_FLAG_IP_EXT_OPTS 1
/* CQE byte count (Striding RQ) */
diff --git a/src/plugins/rdma/test_api.c b/src/plugins/rdma/test_api.c
index e9d5fcaad98..4ec4d3bf345 100644
--- a/src/plugins/rdma/test_api.c
+++ b/src/plugins/rdma/test_api.c
@@ -189,6 +189,41 @@ api_rdma_create_v3 (vat_main_t *vam)
return ret;
}
+static int
+api_rdma_create_v4 (vat_main_t *vam)
+{
+ vl_api_rdma_create_v4_t *mp;
+ rdma_create_if_args_t args;
+ int ret;
+
+ if (!unformat_user (vam->input, unformat_rdma_create_if_args, &args))
+ {
+ clib_warning ("unknown input `%U'", format_unformat_error, vam->input);
+ return -99;
+ }
+
+ M (RDMA_CREATE_V4, mp);
+
+ snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s", args.ifname);
+ if (args.name)
+ snprintf ((char *) mp->name, sizeof (mp->name), "%s", args.name);
+ else
+ mp->name[0] = 0;
+ mp->rxq_num = args.rxq_num;
+ mp->rxq_size = args.rxq_size;
+ mp->txq_size = args.txq_size;
+ mp->mode = api_rdma_mode (args.mode);
+ mp->no_multi_seg = args.no_multi_seg;
+ mp->max_pktlen = args.max_pktlen;
+ mp->rss4 = api_rdma_rss4 (args.rss4);
+ mp->rss6 = api_rdma_rss6 (args.rss6);
+
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
/* rdma-create reply handler */
static void
vl_api_rdma_create_reply_t_handler (vl_api_rdma_create_reply_t * mp)
@@ -243,6 +278,24 @@ vl_api_rdma_create_v3_reply_t_handler (vl_api_rdma_create_v3_reply_t *mp)
vam->regenerate_interface_table = 1;
}
+/* rdma-create reply handler v4 */
+static void
+vl_api_rdma_create_v4_reply_t_handler (vl_api_rdma_create_v4_reply_t *mp)
+{
+ vat_main_t *vam = rdma_test_main.vat_main;
+ i32 retval = mp->retval;
+
+ if (retval == 0)
+ {
+ fformat (vam->ofp, "created rdma with sw_if_index %d\n",
+ ntohl (mp->sw_if_index));
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+ vam->regenerate_interface_table = 1;
+}
+
/* rdma delete API */
static int
api_rdma_delete (vat_main_t * vam)
diff --git a/src/plugins/snort/cli.c b/src/plugins/snort/cli.c
index cbb33c7abe8..08740f41b37 100644
--- a/src/plugins/snort/cli.c
+++ b/src/plugins/snort/cli.c
@@ -85,6 +85,7 @@ snort_attach_command_fn (vlib_main_t *vm, unformat_input_t *input,
clib_error_t *err = 0;
u8 *name = 0;
u32 sw_if_index = ~0;
+ snort_attach_dir_t dir = SNORT_INOUT;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -97,6 +98,12 @@ snort_attach_command_fn (vlib_main_t *vm, unformat_input_t *input,
;
else if (unformat (line_input, "instance %s", &name))
;
+ else if (unformat (line_input, "input"))
+ dir = SNORT_INPUT;
+ else if (unformat (line_input, "output"))
+ dir = SNORT_OUTPUT;
+ else if (unformat (line_input, "inout"))
+ dir = SNORT_INOUT;
else
{
err = clib_error_return (0, "unknown input `%U'",
@@ -117,7 +124,8 @@ snort_attach_command_fn (vlib_main_t *vm, unformat_input_t *input,
goto done;
}
- err = snort_interface_enable_disable (vm, (char *) name, sw_if_index, 1);
+ err =
+ snort_interface_enable_disable (vm, (char *) name, sw_if_index, 1, dir);
done:
vec_free (name);
@@ -127,7 +135,8 @@ done:
VLIB_CLI_COMMAND (snort_attach_command, static) = {
.path = "snort attach",
- .short_help = "snort attach instance <name> interface <if-name>",
+ .short_help = "snort attach instance <name> interface <if-name> "
+ "[input|ouput|inout]",
.function = snort_attach_command_fn,
};
@@ -163,7 +172,7 @@ snort_detach_command_fn (vlib_main_t *vm, unformat_input_t *input,
goto done;
}
- err = snort_interface_enable_disable (vm, 0, sw_if_index, 0);
+ err = snort_interface_enable_disable (vm, 0, sw_if_index, 0, SNORT_INOUT);
done:
unformat_free (line_input);
diff --git a/src/plugins/snort/daq_vpp.c b/src/plugins/snort/daq_vpp.c
index 090b28af6f4..386092a0382 100644
--- a/src/plugins/snort/daq_vpp.c
+++ b/src/plugins/snort/daq_vpp.c
@@ -113,6 +113,7 @@ typedef struct _vpp_context
daq_vpp_input_mode_t input_mode;
const char *socket_name;
+ volatile bool interrupted;
} VPP_Context_t;
static VPP_Context_t *global_vpp_ctx = 0;
@@ -480,6 +481,16 @@ vpp_daq_start (void *handle)
}
static int
+vpp_daq_interrupt (void *handle)
+{
+ VPP_Context_t *vc = (VPP_Context_t *) handle;
+
+ vc->interrupted = true;
+
+ return DAQ_SUCCESS;
+}
+
+static int
vpp_daq_get_stats (void *handle, DAQ_Stats_t *stats)
{
memset (stats, 0, sizeof (DAQ_Stats_t));
@@ -532,6 +543,7 @@ vpp_daq_msg_receive_one (VPP_Context_t *vc, VPPQueuePair *qp,
dd->pkthdr.pktlen = d->length;
dd->pkthdr.address_space_id = d->address_space_id;
dd->msg.data = vc->bpools[d->buffer_pool].base + d->offset;
+ dd->msg.data_len = d->length;
next = next + 1;
msgs[0] = &dd->msg;
@@ -550,7 +562,16 @@ vpp_daq_msg_receive (void *handle, const unsigned max_recv,
{
VPP_Context_t *vc = (VPP_Context_t *) handle;
uint32_t n_qpairs_left = vc->num_qpairs;
- uint32_t n, n_events, n_recv = 0;
+ uint32_t n, n_recv = 0;
+ int32_t n_events;
+
+ /* If the receive has been interrupted, break out of loop and return. */
+ if (vc->interrupted)
+ {
+ vc->interrupted = false;
+ *rstat = DAQ_RSTAT_INTERRUPTED;
+ return 0;
+ }
/* first, we visit all qpairs. If we find any work there then we can give
* it back immediatelly. To avoid bias towards qpair 0 we remeber what
@@ -586,9 +607,14 @@ vpp_daq_msg_receive (void *handle, const unsigned max_recv,
n_events = epoll_wait (vc->epoll_fd, vc->epoll_events, vc->num_qpairs, 1000);
- if (n_events < 1)
+ if (n_events == 0)
{
- *rstat = n_events == -1 ? DAQ_RSTAT_ERROR : DAQ_RSTAT_TIMEOUT;
+ *rstat = DAQ_RSTAT_TIMEOUT;
+ return 0;
+ }
+ if (n_events < 0)
+ {
+ *rstat = errno == EINTR ? DAQ_RSTAT_TIMEOUT : DAQ_RSTAT_ERROR;
return 0;
}
@@ -602,8 +628,7 @@ vpp_daq_msg_receive (void *handle, const unsigned max_recv,
msgs += n;
n_recv += n;
}
-
- (void) read (qp->enq_fd, &ctr, sizeof (ctr));
+ ssize_t __clib_unused size = read (qp->enq_fd, &ctr, sizeof (ctr));
}
*rstat = DAQ_RSTAT_OK;
@@ -676,7 +701,7 @@ const DAQ_ModuleAPI_t DAQ_MODULE_DATA = {
/* .start = */ vpp_daq_start,
/* .inject = */ NULL,
/* .inject_relative = */ NULL,
- /* .interrupt = */ NULL,
+ /* .interrupt = */ vpp_daq_interrupt,
/* .stop = */ NULL,
/* .ioctl = */ NULL,
/* .get_stats = */ vpp_daq_get_stats,
diff --git a/src/plugins/snort/daq_vpp.h b/src/plugins/snort/daq_vpp.h
index 3b875aa15ad..ebec55435f3 100644
--- a/src/plugins/snort/daq_vpp.h
+++ b/src/plugins/snort/daq_vpp.h
@@ -67,7 +67,7 @@ typedef enum
typedef struct
{
- uint32_t offset;
+ uint64_t offset;
uint16_t length;
uint16_t address_space_id;
uint8_t buffer_pool;
diff --git a/src/plugins/snort/dequeue.c b/src/plugins/snort/dequeue.c
index d597b88f7a8..31745de404c 100644
--- a/src/plugins/snort/dequeue.c
+++ b/src/plugins/snort/dequeue.c
@@ -187,9 +187,9 @@ snort_deq_node_interrupt (vlib_main_t *vm, vlib_node_runtime_t *node,
snort_instance_t *si;
int inst = -1;
- while ((inst = clib_interrupt_get_next (ptd->interrupts, inst)) != -1)
+ while ((inst = clib_interrupt_get_next_and_clear (ptd->interrupts, inst)) !=
+ -1)
{
- clib_interrupt_clear (ptd->interrupts, inst);
si = vec_elt_at_index (sm->instances, inst);
qp = vec_elt_at_index (si->qpairs, vm->thread_index);
u32 ready = __atomic_load_n (&qp->ready, __ATOMIC_ACQUIRE);
diff --git a/src/plugins/snort/enqueue.c b/src/plugins/snort/enqueue.c
index 3f44e8013fd..409c0e49078 100644
--- a/src/plugins/snort/enqueue.c
+++ b/src/plugins/snort/enqueue.c
@@ -75,9 +75,16 @@ snort_enq_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
while (n_left)
{
+ u64 fa_data;
u32 instance_index, next_index, n;
- instance_index =
- *(u32 *) vnet_feature_next_with_data (&next_index, b[0], sizeof (u32));
+ u32 l3_offset;
+
+ fa_data =
+ *(u64 *) vnet_feature_next_with_data (&next_index, b[0], sizeof (u64));
+
+ instance_index = (u32) (fa_data & 0xffffffff);
+ l3_offset =
+ (fa_data >> 32) ? vnet_buffer (b[0])->ip.save_rewrite_length : 0;
si = vec_elt_at_index (sm->instances, instance_index);
/* if client isn't connected skip enqueue and take default action */
@@ -108,7 +115,7 @@ snort_enq_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
/* fill descriptor */
d->buffer_pool = b[0]->buffer_pool_index;
d->length = b[0]->current_length;
- d->offset = (u8 *) b[0]->data + b[0]->current_data -
+ d->offset = (u8 *) b[0]->data + b[0]->current_data + l3_offset -
sm->buffer_pool_base_addrs[d->buffer_pool];
d->address_space_id = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
}
@@ -190,7 +197,7 @@ snort_enq_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
__atomic_store_n (qp->enq_head, head, __ATOMIC_RELEASE);
- _vec_len (qp->freelist) = freelist_len;
+ vec_set_len (qp->freelist, freelist_len);
if (sm->input_mode == VLIB_NODE_STATE_INTERRUPT)
{
if (write (qp->enq_fd, &ctr, sizeof (ctr)) < 0)
diff --git a/src/plugins/snort/main.c b/src/plugins/snort/main.c
index 37b517215bc..2430fcdc5c2 100644
--- a/src/plugins/snort/main.c
+++ b/src/plugins/snort/main.c
@@ -13,7 +13,6 @@ snort_main_t snort_main;
VLIB_REGISTER_LOG_CLASS (snort_log, static) = {
.class_name = "snort",
- .default_syslog_level = VLIB_LOG_LEVEL_DEBUG,
};
#define log_debug(fmt, ...) vlib_log_debug (snort_log.class, fmt, __VA_ARGS__)
@@ -196,9 +195,18 @@ snort_deq_ready (clib_file_t *uf)
snort_per_thread_data_t *ptd =
vec_elt_at_index (sm->per_thread_data, vm->thread_index);
u64 counter;
+ ssize_t bytes_read;
- if (read (uf->file_descriptor, &counter, sizeof (counter)) < 0)
- return clib_error_return (0, "client closed socket");
+ bytes_read = read (uf->file_descriptor, &counter, sizeof (counter));
+ if (bytes_read < 0)
+ {
+ return clib_error_return (0, "client closed socket");
+ }
+
+ if (bytes_read < sizeof (counter))
+ {
+ return clib_error_return (0, "unexpected truncated read");
+ }
clib_interrupt_set (ptd->interrupts, uf->private_data);
vlib_node_set_interrupt_pending (vm, snort_deq_node.index);
@@ -251,8 +259,10 @@ snort_listener_init (vlib_main_t *vm)
s = clib_mem_alloc (sizeof (clib_socket_t));
clib_memset (s, 0, sizeof (clib_socket_t));
s->config = (char *) sm->socket_name;
- s->flags = CLIB_SOCKET_F_IS_SERVER | CLIB_SOCKET_F_ALLOW_GROUP_WRITE |
- CLIB_SOCKET_F_SEQPACKET | CLIB_SOCKET_F_PASSCRED;
+ s->is_server = 1;
+ s->allow_group_write = 1;
+ s->is_seqpacket = 1;
+ s->passcred = 1;
if ((err = clib_socket_init (s)))
{
@@ -299,8 +309,8 @@ snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz,
/* enq and deq head pointer */
qpair_mem_sz += 2 * round_pow2 (sizeof (u32), align);
- size =
- round_pow2 (tm->n_vlib_mains * qpair_mem_sz, clib_mem_get_page_size ());
+ size = round_pow2 ((uword) tm->n_vlib_mains * qpair_mem_sz,
+ clib_mem_get_page_size ());
fd = clib_mem_vm_create_fd (CLIB_MEM_PAGE_SZ_DEFAULT, "snort instance %s",
name);
@@ -386,7 +396,7 @@ snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz,
for (i = 0; i < vlib_get_n_threads (); i++)
vlib_node_set_state (vlib_get_main_by_index (i), snort_deq_node.index,
- VLIB_NODE_STATE_INTERRUPT);
+ sm->input_mode);
done:
if (err)
@@ -401,12 +411,14 @@ done:
clib_error_t *
snort_interface_enable_disable (vlib_main_t *vm, char *instance_name,
- u32 sw_if_index, int is_enable)
+ u32 sw_if_index, int is_enable,
+ snort_attach_dir_t snort_dir)
{
snort_main_t *sm = &snort_main;
vnet_main_t *vnm = vnet_get_main ();
snort_instance_t *si;
clib_error_t *err = 0;
+ u64 fa_data;
u32 index;
if (is_enable)
@@ -432,8 +444,18 @@ snort_interface_enable_disable (vlib_main_t *vm, char *instance_name,
}
index = sm->instance_by_sw_if_index[sw_if_index] = si->index;
- vnet_feature_enable_disable ("ip4-unicast", "snort-enq", sw_if_index, 1,
- &index, sizeof (index));
+ if (snort_dir & SNORT_INPUT)
+ {
+ fa_data = (u64) index;
+ vnet_feature_enable_disable ("ip4-unicast", "snort-enq", sw_if_index,
+ 1, &fa_data, sizeof (fa_data));
+ }
+ if (snort_dir & SNORT_OUTPUT)
+ {
+ fa_data = (1LL << 32 | index);
+ vnet_feature_enable_disable ("ip4-output", "snort-enq", sw_if_index,
+ 1, &fa_data, sizeof (fa_data));
+ }
}
else
{
@@ -451,8 +473,18 @@ snort_interface_enable_disable (vlib_main_t *vm, char *instance_name,
si = vec_elt_at_index (sm->instances, index);
sm->instance_by_sw_if_index[sw_if_index] = ~0;
- vnet_feature_enable_disable ("ip4-unicast", "snort-enq", sw_if_index, 0,
- &index, sizeof (index));
+ if (snort_dir & SNORT_INPUT)
+ {
+ fa_data = (u64) index;
+ vnet_feature_enable_disable ("ip4-unicast", "snort-enq", sw_if_index,
+ 0, &fa_data, sizeof (fa_data));
+ }
+ if (snort_dir & SNORT_OUTPUT)
+ {
+ fa_data = (1LL << 32 | index);
+ vnet_feature_enable_disable ("ip4-output", "snort-enq", sw_if_index,
+ 0, &fa_data, sizeof (fa_data));
+ }
}
done:
@@ -490,6 +522,7 @@ static clib_error_t *
snort_init (vlib_main_t *vm)
{
snort_main_t *sm = &snort_main;
+ sm->input_mode = VLIB_NODE_STATE_INTERRUPT;
sm->instance_by_name = hash_create_string (0, sizeof (uword));
vlib_buffer_pool_t *bp;
@@ -518,3 +551,9 @@ VNET_FEATURE_INIT (snort_enq, static) = {
.node_name = "snort-enq",
.runs_before = VNET_FEATURES ("ip4-lookup"),
};
+
+VNET_FEATURE_INIT (snort_enq_out, static) = {
+ .arc_name = "ip4-output",
+ .node_name = "snort-enq",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
diff --git a/src/plugins/snort/snort.h b/src/plugins/snort/snort.h
index d069fa01661..79299aa6d91 100644
--- a/src/plugins/snort/snort.h
+++ b/src/plugins/snort/snort.h
@@ -90,6 +90,13 @@ typedef enum
SNORT_ENQ_N_NEXT_NODES,
} snort_enq_next_t;
+typedef enum
+{
+ SNORT_INPUT = 1,
+ SNORT_OUTPUT = 2,
+ SNORT_INOUT = 3
+} snort_attach_dir_t;
+
#define SNORT_ENQ_NEXT_NODES \
{ \
[SNORT_ENQ_NEXT_DROP] = "error-drop", \
@@ -100,7 +107,8 @@ clib_error_t *snort_instance_create (vlib_main_t *vm, char *name,
u8 log2_queue_sz, u8 drop_on_disconnect);
clib_error_t *snort_interface_enable_disable (vlib_main_t *vm,
char *instance_name,
- u32 sw_if_index, int is_enable);
+ u32 sw_if_index, int is_enable,
+ snort_attach_dir_t dir);
clib_error_t *snort_set_node_mode (vlib_main_t *vm, u32 mode);
always_inline void
diff --git a/src/plugins/srtp/srtp.c b/src/plugins/srtp/srtp.c
index 58a35c31606..bb54e672918 100644
--- a/src/plugins/srtp/srtp.c
+++ b/src/plugins/srtp/srtp.c
@@ -26,7 +26,9 @@ static inline u32
srtp_ctx_alloc_w_thread (u32 thread_index)
{
srtp_tc_t *ctx;
- pool_get_zero (srtp_main.ctx_pool[thread_index], ctx);
+ pool_get_aligned_safe (srtp_main.ctx_pool[thread_index], ctx,
+ CLIB_CACHE_LINE_BYTES);
+ clib_memset (ctx, 0, sizeof (*ctx));
ctx->c_thread_index = thread_index;
ctx->srtp_ctx_handle = ctx - srtp_main.ctx_pool[thread_index];
ctx->app_session_handle = SESSION_INVALID_HANDLE;
@@ -50,6 +52,7 @@ srtp_init_policy (srtp_tc_t *ctx, transport_endpt_cfg_srtp_t *cfg)
{
sp = &ctx->srtp_policy[i];
sp_cfg = &cfg->policies[i];
+ clib_memset (sp, 0, sizeof (*sp));
srtp_crypto_policy_set_rtp_default (&sp->rtp);
srtp_crypto_policy_set_rtcp_default (&sp->rtcp);
@@ -57,7 +60,6 @@ srtp_init_policy (srtp_tc_t *ctx, transport_endpt_cfg_srtp_t *cfg)
sp->ssrc.value = sp_cfg->ssrc_value;
sp->key = clib_mem_alloc (sp_cfg->key_len);
clib_memcpy (sp->key, sp_cfg->key, sp_cfg->key_len);
- sp->ekt = 0;
sp->next = i < 1 ? &ctx->srtp_policy[i + 1] : 0;
sp->window_size = sp_cfg->window_size;
sp->allow_repeat_tx = sp_cfg->allow_repeat_tx;
@@ -84,7 +86,8 @@ srtp_ctx_attach (u32 thread_index, void *ctx_ptr)
{
srtp_tc_t *ctx;
- pool_get (srtp_main.ctx_pool[thread_index], ctx);
+ pool_get_aligned_safe (srtp_main.ctx_pool[thread_index], ctx,
+ CLIB_CACHE_LINE_BYTES);
clib_memcpy (ctx, ctx_ptr, sizeof (*ctx));
ctx->c_thread_index = thread_index;
@@ -151,6 +154,7 @@ srtp_ctx_init_client (srtp_tc_t *ctx)
app_session = session_get (ctx->c_s_index, ctx->c_thread_index);
app_session->app_wrk_index = ctx->parent_app_wrk_index;
app_session->connection_index = ctx->srtp_ctx_handle;
+ app_session->opaque = ctx->parent_app_api_context;
app_session->session_type =
session_type_from_proto_and_ip (TRANSPORT_PROTO_SRTP, ctx->udp_is_ip4);
@@ -227,7 +231,7 @@ srtp_ctx_write (srtp_tc_t *ctx, session_t *app_session,
{
u32 n_wrote = 0, to_deq, dgram_sz;
session_dgram_pre_hdr_t hdr;
- app_session_transport_t at;
+ app_session_transport_t at = {};
svm_msg_q_t *mq;
session_t *us;
u8 buf[2000];
@@ -238,12 +242,13 @@ srtp_ctx_write (srtp_tc_t *ctx, session_t *app_session,
us = session_get_from_handle (ctx->srtp_session_handle);
to_deq = svm_fifo_max_dequeue_cons (app_session->tx_fifo);
mq = session_main_get_vpp_event_queue (us->thread_index);
+ sp->bytes_dequeued = to_deq;
while (to_deq > 0)
{
/* Peeking only pre-header dgram because the session is connected */
rv = svm_fifo_peek (app_session->tx_fifo, 0, sizeof (hdr), (u8 *) &hdr);
- ASSERT (rv == sizeof (hdr) && hdr.data_length < vec_len (buf));
+ ASSERT (rv == sizeof (hdr) && hdr.data_length < 2000);
ASSERT (to_deq >= hdr.data_length + SESSION_CONN_HDR_LEN);
dgram_sz = hdr.data_length + SESSION_CONN_HDR_LEN;
@@ -296,14 +301,16 @@ done:
session_transport_closed_notify (&ctx->connection);
}
+ ASSERT (sp->bytes_dequeued >= to_deq);
+ sp->bytes_dequeued -= to_deq;
+
return n_wrote > 0 ? clib_max (n_wrote / TRANSPORT_PACER_MIN_MSS, 1) : 0;
}
int
srtp_add_vpp_q_builtin_rx_evt (session_t *s)
{
- if (svm_fifo_set_event (s->rx_fifo))
- session_send_io_evt_to_thread (s->rx_fifo, SESSION_IO_EVT_BUILTIN_RX);
+ session_enqueue_notify (s);
return 0;
}
@@ -313,7 +320,7 @@ srtp_notify_app_enqueue (srtp_tc_t *ctx, session_t *app_session)
app_worker_t *app_wrk;
app_wrk = app_worker_get_if_valid (app_session->app_wrk_index);
if (PREDICT_TRUE (app_wrk != 0))
- app_worker_lock_and_send_event (app_wrk, app_session, SESSION_IO_EVT_RX);
+ app_worker_rx_notify (app_wrk, app_session);
}
static inline int
@@ -649,6 +656,7 @@ srtp_connect (transport_endpoint_cfg_t *tep)
ctx->parent_app_api_context = sep->opaque;
ctx->udp_is_ip4 = sep->is_ip4;
ctx->srtp_ctx_handle = ctx_index;
+ ctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
srtp_init_policy (ctx, (transport_endpt_cfg_srtp_t *) sep->ext_cfg->data);
@@ -702,7 +710,7 @@ srtp_disconnect (u32 ctx_handle, u32 thread_index)
}
static u32
-srtp_start_listen (u32 app_listener_index, transport_endpoint_t *tep)
+srtp_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
{
vnet_listen_args_t _bargs, *args = &_bargs;
session_handle_t udp_al_handle;
@@ -745,6 +753,8 @@ srtp_start_listen (u32 app_listener_index, transport_endpoint_t *tep)
lctx->srtp_session_handle = udp_al_handle;
lctx->app_session_handle = listen_session_get_handle (app_listener);
lctx->udp_is_ip4 = sep->is_ip4;
+ lctx->c_s_index = app_listener_index;
+ lctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
srtp_init_policy (lctx, (transport_endpt_cfg_srtp_t *) sep->ext_cfg->data);
@@ -812,7 +822,6 @@ srtp_custom_tx_callback (void *session, transport_send_params_t *sp)
SESSION_STATE_TRANSPORT_CLOSED))
return 0;
- sp->flags = 0;
ctx = srtp_ctx_get_w_thread (app_session->connection_index,
app_session->thread_index);
if (PREDICT_FALSE (ctx->is_migrated))
diff --git a/src/plugins/srtp/srtp_plugin.md b/src/plugins/srtp/srtp_plugin.md
deleted file mode 100644
index 81185864dbe..00000000000
--- a/src/plugins/srtp/srtp_plugin.md
+++ /dev/null
@@ -1,72 +0,0 @@
-# SRTP (Secure Real-time Transport Protocol) {#srtp_doc}
-
-libsrtp2 based SRTP transport protocol implementation.
-
-## Maturity level
-Experimental
-
-## Quickstart
-
-1. Install libsrtp2-dev. On debian based OS:
-
-```
-sudo apt get install libsrtp2-dev
-```
-
-2. Build vpp
-
-```
-make build
-```
-
-3. Test protocol using vcl test server and client. On server side, start vpp and server app:
-
-```
-export VT_PATH=$WS/build-root/build-vpp_debug-native/vpp/bin
-$VT_PATH/vcl_test_server 1234 -p srtp
-```
-
-On client side:
-
-```
-export VT_PATH=$WS/build-root/build-vpp_debug-native/vpp/bin
-$VT_PATH/vcl_test_client <server-ip> 1234 -U -X -S -N 10000 -T 128 -p srtp
-```
-
-## Custom libsrtp2 build
-
-1. Create `build/external/packages/srtp.mk` with following example contents:
-
-```
-srtp_version := 2.3.0
-srtp_tarball := srtp_$(srtp_version).tar.gz
-srtp_tarball_md5sum := da38ee5d9c31be212a12964c22d7f795
-srtp_tarball_strip_dirs := 1
-srtp_url := https://github.com/cisco/libsrtp/archive/v$(srtp_version).tar.gz
-
-define srtp_build_cmds
- @cd $(srtp_build_dir) && \
- $(CMAKE) -DCMAKE_INSTALL_PREFIX:PATH=$(srtp_install_dir) \
- -DCMAKE_C_FLAGS='-fPIC -fvisibility=hidden' $(srtp_src_dir) > $(srtp_build_log)
- @$(MAKE) $(MAKE_ARGS) -C $(srtp_build_dir) > $(srtp_build_log)
-endef
-
-define srtp_config_cmds
- @true
-endef
-
-define srtp_install_cmds
- @$(MAKE) $(MAKE_ARGS) -C $(srtp_build_dir) install > $(srtp_install_log)
-endef
-
-
-$(eval $(call package,srtp))
-```
-
-2. Include `srtp.mk` in `build/external/Makefile` and add to install target.
-
-3. Rebuild external dependencies:
-
-```
-make install-ext-deps
-```
diff --git a/src/plugins/srtp/srtp_plugin.rst b/src/plugins/srtp/srtp_plugin.rst
new file mode 100644
index 00000000000..568ebb66f01
--- /dev/null
+++ b/src/plugins/srtp/srtp_plugin.rst
@@ -0,0 +1,82 @@
+SRTP Protocol
+=============
+
+This document describe the VPP SRTP (Secure Real-time Transport
+Protocol) implementation libsrtp2 based SRTP transport protocol
+implementation.
+
+Maturity level
+--------------
+
+Experimental
+
+Quickstart
+----------
+
+1. Install libsrtp2-dev. On debian based OS:
+
+::
+
+ sudo apt get install libsrtp2-dev
+
+2. Build vpp
+
+::
+
+ make build
+
+3. Test protocol using vcl test server and client. On server side, start
+ vpp and server app:
+
+::
+
+ export VT_PATH=$WS/build-root/build-vpp_debug-native/vpp/bin
+ $VT_PATH/vcl_test_server 1234 -p srtp
+
+On client side:
+
+::
+
+ export VT_PATH=$WS/build-root/build-vpp_debug-native/vpp/bin
+ $VT_PATH/vcl_test_client <server-ip> 1234 -U -X -S -N 10000 -T 128 -p srtp
+
+Custom libsrtp2 build
+---------------------
+
+1. Create ``build/external/packages/srtp.mk`` with following example
+ contents:
+
+::
+
+ srtp_version := 2.3.0
+ srtp_tarball := srtp_$(srtp_version).tar.gz
+ srtp_tarball_md5sum := da38ee5d9c31be212a12964c22d7f795
+ srtp_tarball_strip_dirs := 1
+ srtp_url := https://github.com/cisco/libsrtp/archive/v$(srtp_version).tar.gz
+
+ define srtp_build_cmds
+ @cd $(srtp_build_dir) && \
+ $(CMAKE) -DCMAKE_INSTALL_PREFIX:PATH=$(srtp_install_dir) \
+ -DCMAKE_C_FLAGS='-fPIC -fvisibility=hidden' $(srtp_src_dir) > $(srtp_build_log)
+ @$(MAKE) $(MAKE_ARGS) -C $(srtp_build_dir) > $(srtp_build_log)
+ endef
+
+ define srtp_config_cmds
+ @true
+ endef
+
+ define srtp_install_cmds
+ @$(MAKE) $(MAKE_ARGS) -C $(srtp_build_dir) install > $(srtp_install_log)
+ endef
+
+
+ $(eval $(call package,srtp))
+
+2. Include ``srtp.mk`` in ``build/external/Makefile`` and add to install
+ target.
+
+3. Rebuild external dependencies:
+
+::
+
+ make install-ext-deps
diff --git a/src/plugins/srv6-ad-flow/ad-flow.c b/src/plugins/srv6-ad-flow/ad-flow.c
index fd9706dabe1..d13a1c95969 100644
--- a/src/plugins/srv6-ad-flow/ad-flow.c
+++ b/src/plugins/srv6-ad-flow/ad-flow.c
@@ -94,14 +94,6 @@ srv6_ad_flow_localsid_creation_fn (ip6_sr_localsid_t *localsid)
return SID_CREATE_INVALID_IFACE_INDEX;
}
- vnet_sw_interface_t *sw =
- vnet_get_sw_interface (sm->vnet_main, ls_mem->sw_if_index_in);
- if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
- {
- adj_unlock (ls_mem->nh_adj);
- clib_mem_free (ls_mem);
- return SID_CREATE_INVALID_IFACE_TYPE;
- }
if (ls_mem->inner_type == AD_TYPE_IP4)
{
@@ -366,7 +358,7 @@ unformat_srv6_ad_flow_localsid (unformat_input_t *input, va_list *args)
}
/* Allocate and initialize memory block for local SID parameters */
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
clib_memset (ls_mem, 0, sizeof *ls_mem);
*plugin_mem_p = ls_mem;
diff --git a/src/plugins/srv6-ad-flow/ad_flow_plugin_doc.md b/src/plugins/srv6-ad-flow/ad_flow_plugin_doc.md
deleted file mode 100644
index 1f58fc2b663..00000000000
--- a/src/plugins/srv6-ad-flow/ad_flow_plugin_doc.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# SRv6 endpoint to SR-unaware appliance via per-flow dynamic proxy {#srv6_ad_flow_plugin_doc}
-
-## Overview
-
-TBD
-
-## CLI configuration
-
-The following command instantiates a new End.AD.Flow segment that sends the inner
-packets on interface `IFACE-OUT` towards an appliance at address `S-ADDR` and
-restores the encapsulation headers of the packets coming back on interface
-`IFACE-IN`.
-
-```
-sr localsid address SID behavior end.ad.flow nh S-ADDR oif IFACE-OUT iif IFACE-IN
-```
-
-For example, the below command configures the SID `1::A1` with an End.AD.Flow
-function for sending traffic on interface `GigabitEthernet0/8/0` to the
-appliance at address `A1::`, and receiving it back on interface
-`GigabitEthernet0/9/0`.
-
-```
-sr localsid address 1::A1 behavior end.ad.flow nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0
-```
diff --git a/src/plugins/srv6-ad-flow/ad_flow_plugin_doc.rst b/src/plugins/srv6-ad-flow/ad_flow_plugin_doc.rst
new file mode 100644
index 00000000000..7e628742f84
--- /dev/null
+++ b/src/plugins/srv6-ad-flow/ad_flow_plugin_doc.rst
@@ -0,0 +1,31 @@
+SRv6 per-flow dynamic proxy
+===========================
+
+This document describes SRv6 endpoint to SR-unaware appliance via
+per-flow dynamic proxy
+
+Overview
+--------
+
+TBD
+
+CLI configuration
+-----------------
+
+The following command instantiates a new End.AD.Flow segment that sends
+the inner packets on interface ``IFACE-OUT`` towards an appliance at
+address ``S-ADDR`` and restores the encapsulation headers of the packets
+coming back on interface ``IFACE-IN``.
+
+::
+
+ sr localsid address SID behavior end.ad.flow nh S-ADDR oif IFACE-OUT iif IFACE-IN
+
+For example, the below command configures the SID ``1::A1`` with an
+End.AD.Flow function for sending traffic on interface
+``GigabitEthernet0/8/0`` to the appliance at address ``A1::``, and
+receiving it back on interface ``GigabitEthernet0/9/0``.
+
+::
+
+ sr localsid address 1::A1 behavior end.ad.flow nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0
diff --git a/src/plugins/srv6-ad/ad.c b/src/plugins/srv6-ad/ad.c
index 045ddeb466d..fc8527d0f82 100644
--- a/src/plugins/srv6-ad/ad.c
+++ b/src/plugins/srv6-ad/ad.c
@@ -362,7 +362,7 @@ unformat_srv6_ad_localsid (unformat_input_t * input, va_list * args)
}
/* Allocate and initialize memory block for local SID parameters */
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
clib_memset (ls_mem, 0, sizeof *ls_mem);
*plugin_mem_p = ls_mem;
@@ -447,7 +447,6 @@ srv6_ad_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_ad2_rewrite, static) =
{
.arc_name = "device-input",
@@ -475,7 +474,6 @@ VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Dynamic Segment Routing for IPv6 (SRv6) Proxy",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-ad/ad_plugin_doc.md b/src/plugins/srv6-ad/ad_plugin_doc.md
deleted file mode 100644
index 993eeb63589..00000000000
--- a/src/plugins/srv6-ad/ad_plugin_doc.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# SRv6 endpoint to SR-unaware appliance via dynamic proxy (End.AD) {#srv6_ad_plugin_doc}
-
-## Overview
-
-The dynamic proxy is an improvement over the static proxy (@ref srv6_as_plugin_doc)
-that dynamically learns the SR information before removing it from the incoming
-traffic. The same information can then be re-attached to the traffic returning
-from the SF. As opposed to the static SR proxy, no CACHE information needs to be
-configured. Instead, the dynamic SR proxy relies on a local caching mechanism on
-the node instantiating this segment. Therefore, a dynamic proxy segment cannot
-be the last segment in an SR SC policy. A different SR behavior should thus be
-used if the SF is meant to be the final destination of an SR SC policy.
-
-Upon receiving a packet whose active segment matches a dynamic SR proxy
-function, the proxy node pops the top MPLS label or applies the SRv6 End
-behavior, then compares the updated SR information with the cache entry for the
-current segment. If the cache is empty or different, it is updated with the new
-SR information. The SR information is then removed and the inner packet is sent
-towards the SF.
-
-The cache entry is not mapped to any particular packet, but instead to an SR SC
-policy identified by the receiving interface (IFACE-IN). Any non-link-local IP
-packet or non-local Ethernet frame received on that interface will be
-re-encapsulated with the cached headers as described in @ref srv6_as_plugin_doc. The
-SF may thus drop, modify or generate new packets without affecting the proxy.
-
-For more information, please see
-[draft-xuclad-spring-sr-service-chaining](https://datatracker.ietf.org/doc/draft-xuclad-spring-sr-service-chaining/).
-
-## CLI configuration
-
-The following command instantiates a new End.AD segment that sends the inner
-packets on interface `IFACE-OUT` towards an appliance at address `S-ADDR` and
-restores the encapsulation headers of the packets coming back on interface
-`IFACE-IN`.
-
-```
-sr localsid address SID behavior end.ad nh S-ADDR oif IFACE-OUT iif IFACE-IN
-```
-
-For example, the below command configures the SID `1::A1` with an End.AD
-function for sending traffic on interface `GigabitEthernet0/8/0` to the
-appliance at address `A1::`, and receiving it back on interface
-`GigabitEthernet0/9/0`.
-
-```
-sr localsid address 1::A1 behavior end.ad nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0
-```
-
-## Pseudocode
-
-The dynamic proxy SRv6 pseudocode is obtained by inserting the following
-instructions between lines 1 and 2 of the static proxy SRv6 pseudocode.
-
-```
-IF NH=SRH & SL > 0 THEN
- Decrement SL and update the IPv6 DA with SRH[SL]
- IF C(IFACE-IN) different from IPv6 encaps THEN ;; Ref1
- Copy the IPv6 encaps into C(IFACE-IN) ;; Ref2
-ELSE
- Drop the packet
-```
-
-**Ref1:** "IPv6 encaps" represents the IPv6 header and any attached extension
-header.
-
-**Ref2:** C(IFACE-IN) represents the cache entry associated to the dynamic SR proxy
-segment. It is identified with IFACE-IN in order to efficiently retrieve the
-right SR information when a packet arrives on this interface.
-
-In addition, the inbound policy should check that C(IFACE-IN) has been defined
-before attempting to restore the IPv6 encapsulation, and drop the packet
-otherwise.
diff --git a/src/plugins/srv6-ad/ad_plugin_doc.rst b/src/plugins/srv6-ad/ad_plugin_doc.rst
new file mode 100644
index 00000000000..cfb6cea7a15
--- /dev/null
+++ b/src/plugins/srv6-ad/ad_plugin_doc.rst
@@ -0,0 +1,86 @@
+.. _srv6_ad_plugin_doc:
+
+SRv6 dynamic proxy
+==================
+
+SRv6 endpoint to SR-unaware appliance via dynamic proxy (End.AD)
+----------------------------------------------------------------
+
+Overview
+~~~~~~~~
+
+The dynamic proxy is an improvement over the static proxy (@ref
+srv6_as_plugin_doc) that dynamically learns the SR information before
+removing it from the incoming traffic. The same information can then be
+re-attached to the traffic returning from the SF. As opposed to the
+static SR proxy, no CACHE information needs to be configured. Instead,
+the dynamic SR proxy relies on a local caching mechanism on the node
+instantiating this segment. Therefore, a dynamic proxy segment cannot be
+the last segment in an SR SC policy. A different SR behavior should thus
+be used if the SF is meant to be the final destination of an SR SC
+policy.
+
+Upon receiving a packet whose active segment matches a dynamic SR proxy
+function, the proxy node pops the top MPLS label or applies the SRv6 End
+behavior, then compares the updated SR information with the cache entry
+for the current segment. If the cache is empty or different, it is
+updated with the new SR information. The SR information is then removed
+and the inner packet is sent towards the SF.
+
+The cache entry is not mapped to any particular packet, but instead to
+an SR SC policy identified by the receiving interface (IFACE-IN). Any
+non-link-local IP packet or non-local Ethernet frame received on that
+interface will be re-encapsulated with the cached headers as described
+in @ref srv6_as_plugin_doc. The SF may thus drop, modify or generate new
+packets without affecting the proxy.
+
+For more information, please see
+`draft-xuclad-spring-sr-service-chaining <https://datatracker.ietf.org/doc/draft-xuclad-spring-sr-service-chaining/>`__.
+
+CLI configuration
+~~~~~~~~~~~~~~~~~
+
+The following command instantiates a new End.AD segment that sends the
+inner packets on interface ``IFACE-OUT`` towards an appliance at address
+``S-ADDR`` and restores the encapsulation headers of the packets coming
+back on interface ``IFACE-IN``.
+
+::
+
+ sr localsid address SID behavior end.ad nh S-ADDR oif IFACE-OUT iif IFACE-IN
+
+For example, the below command configures the SID ``1::A1`` with an
+End.AD function for sending traffic on interface
+``GigabitEthernet0/8/0`` to the appliance at address ``A1::``, and
+receiving it back on interface ``GigabitEthernet0/9/0``.
+
+::
+
+ sr localsid address 1::A1 behavior end.ad nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0
+
+Pseudocode
+~~~~~~~~~~
+
+The dynamic proxy SRv6 pseudocode is obtained by inserting the following
+instructions between lines 1 and 2 of the static proxy SRv6 pseudocode.
+
+::
+
+ IF NH=SRH & SL > 0 THEN
+ Decrement SL and update the IPv6 DA with SRH[SL]
+ IF C(IFACE-IN) different from IPv6 encaps THEN ;; Ref1
+ Copy the IPv6 encaps into C(IFACE-IN) ;; Ref2
+ ELSE
+ Drop the packet
+
+**Ref1:** “IPv6 encaps” represents the IPv6 header and any attached
+extension header.
+
+**Ref2:** C(IFACE-IN) represents the cache entry associated to the
+dynamic SR proxy segment. It is identified with IFACE-IN in order to
+efficiently retrieve the right SR information when a packet arrives on
+this interface.
+
+In addition, the inbound policy should check that C(IFACE-IN) has been
+defined before attempting to restore the IPv6 encapsulation, and drop
+the packet otherwise.
diff --git a/src/plugins/srv6-ad/node.c b/src/plugins/srv6-ad/node.c
index 9d4ea44e19b..2b1d56b6570 100644
--- a/src/plugins/srv6-ad/node.c
+++ b/src/plugins/srv6-ad/node.c
@@ -203,7 +203,6 @@ srv6_ad_localsid_fn (vlib_main_t * vm,
{
ip6_sr_main_t *sm = &sr_main;
u32 n_left_from, next_index, *from, *to_next;
- u32 cnt_packets = 0;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -264,7 +263,6 @@ srv6_ad_localsid_fn (vlib_main_t * vm,
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
- cnt_packets++;
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -273,7 +271,6 @@ srv6_ad_localsid_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_ad_localsid_node) = {
.function = srv6_ad_localsid_fn,
.name = "srv6-ad-localsid",
@@ -288,7 +285,6 @@ VLIB_REGISTER_NODE (srv6_ad_localsid_node) = {
[SRV6_AD_LOCALSID_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/******************************* Rewriting node *******************************/
@@ -409,7 +405,6 @@ srv6_ad2_rewrite_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_ad2_rewrite_node) = {
.function = srv6_ad2_rewrite_fn,
.name = "srv6-ad2-rewrite",
@@ -424,7 +419,6 @@ VLIB_REGISTER_NODE (srv6_ad2_rewrite_node) = {
[SRV6_AD_REWRITE_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/**
@@ -552,7 +546,6 @@ srv6_ad4_rewrite_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_ad4_rewrite_node) = {
.function = srv6_ad4_rewrite_fn,
.name = "srv6-ad4-rewrite",
@@ -567,7 +560,6 @@ VLIB_REGISTER_NODE (srv6_ad4_rewrite_node) = {
[SRV6_AD_REWRITE_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/**
@@ -690,7 +682,6 @@ srv6_ad6_rewrite_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_ad6_rewrite_node) = {
.function = srv6_ad6_rewrite_fn,
.name = "srv6-ad6-rewrite",
@@ -705,7 +696,6 @@ VLIB_REGISTER_NODE (srv6_ad6_rewrite_node) = {
[SRV6_AD_REWRITE_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-am/am.c b/src/plugins/srv6-am/am.c
index 1408ebc470b..f308b5167d9 100644
--- a/src/plugins/srv6-am/am.c
+++ b/src/plugins/srv6-am/am.c
@@ -139,7 +139,7 @@ unformat_srv6_am_localsid (unformat_input_t * input, va_list * args)
unformat_vnet_sw_interface, vnm, &sw_if_index_in))
{
/* Allocate a portion of memory */
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
/* Set to zero the memory */
clib_memset (ls_mem, 0, sizeof *ls_mem);
@@ -226,7 +226,6 @@ srv6_am_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_am_rewrite, static) =
{
.arc_name = "ip6-unicast",
@@ -240,7 +239,6 @@ VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Masquerading Segment Routing for IPv6 (SRv6) Proxy",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-am/am_plugin_doc.md b/src/plugins/srv6-am/am_plugin_doc.md
deleted file mode 100644
index 11aad855408..00000000000
--- a/src/plugins/srv6-am/am_plugin_doc.md
+++ /dev/null
@@ -1,100 +0,0 @@
-# SRv6 endpoint to SR-unaware appliance via masquerading (End.AM) {#srv6_am_plugin_doc}
-
-The masquerading proxy is an SR endpoint behavior for processing SRv6 traffic on
-behalf of an SR-unaware SF. This proxy thus receives SR traffic that is formed
-of an IPv6 header and an SRH on top of an inner payload. The masquerading
-behavior is independent from the inner payload type. Hence, the inner payload
-can be of any type but it is usually expected to be a transport layer packet,
-such as TCP or UDP.
-
-A masquerading SR proxy segment is associated with the following mandatory
-parameters:
-
-- S-ADDR: Ethernet or IPv6 address of the SF
-- IFACE-OUT: Local interface for sending traffic towards the SF
-- IFACE-IN: Local interface receiving the traffic coming back from the SF
-
-A masquerading SR proxy segment is thus defined for a specific SF and bound to a
-pair of directed interfaces or sub-interfaces on the proxy. As opposed to the
-static and dynamic SR proxies, a masquerading segment can be present at the same
-time in any number of SR SC policies and the same interfaces can be bound to
-multiple masquerading proxy segments. The only restriction is that a
-masquerading proxy segment cannot be the last segment in an SR SC policy.
-
-The first part of the masquerading behavior is triggered when the proxy node
-receives an IPv6 packet whose Destination Address matches a masquerading proxy
-segment. The proxy inspects the IPv6 extension headers and substitutes the
-Destination Address with the last segment in the SRH attached to the IPv6
-header, which represents the final destination of the IPv6 packet. The packet is
-then sent out towards the SF.
-
-The SF receives an IPv6 packet whose source and destination addresses are
-respectively the original source and final destination. It does not attempt to
-inspect the SRH, as RFC8200 specifies that routing extension headers are not
-examined or processed by transit nodes. Instead, the SF simply forwards the
-packet based on its current Destination Address. In this scenario, we assume
-that the SF can only inspect, drop or perform limited changes to the packets.
-For example, Intrusion Detection Systems, Deep Packet Inspectors and non-NAT
-Firewalls are among the SFs that can be supported by a masquerading SR proxy.
-
-The second part of the masquerading behavior, also called de- masquerading, is
-an inbound policy attached to the proxy interface receiving the traffic
-returning from the SF, IFACE-IN. This policy inspects the incoming traffic and
-triggers a regular SRv6 endpoint processing (End) on any IPv6 packet that
-contains an SRH. This processing occurs before any lookup on the packet
-Destination Address is performed and it is sufficient to restore the right
-active segment as the Destination Address of the IPv6 packet.
-
-For more information, please see
-[draft-xuclad-spring-sr-service-chaining](https://datatracker.ietf.org/doc/draft-xuclad-spring-sr-service-chaining/).
-
-## CLI configuration
-
-The following command instantiates a new End.AM segment that sends masqueraded
-traffic on interface `IFACE-OUT` towards an appliance at address `S-ADDR` and
-restores the active segment in the IPv6 header of the packets coming back on
-interface `IFACE-IN`.
-
-```
-sr localsid address SID behavior end.am nh S-ADDR oif IFACE-OUT iif IFACE-IN
-```
-
-For example, the below command configures the SID `1::A1` with an End.AM
-function for sending traffic on interface `GigabitEthernet0/8/0` to the
-appliance at address `A1::`, and receiving it back on interface
-`GigabitEthernet0/9/0`.
-
-```
-sr localsid address 1::A1 behavior end.am nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0
-```
-
-## Pseudocode
-
-### Masquerading
-
-Upon receiving a packet destined for S, where S is an IPv6 masquerading proxy
-segment, a node N processes it as follows.
-
-```
-IF NH=SRH & SL > 0 THEN
- Update the IPv6 DA with SRH[0]
- Forward the packet on IFACE-OUT
-ELSE
- Drop the packet
-```
-
-### De-masquerading
-
-Upon receiving a non-link-local IPv6 packet on IFACE-IN, a node N processes it
-as follows.
-
-```
-IF NH=SRH & SL > 0 THEN
- Decrement SL
- Update the IPv6 DA with SRH[SL] ;; Ref1
- Lookup DA in appropriate table and proceed accordingly
-```
-
-**Ref1:** This pseudocode can be augmented to support the Penultimate Segment
-Popping (PSP) endpoint flavor. The exact pseudocode modification are provided in
-[draft-filsfils-spring-srv6-network-programming](https://datatracker.ietf.org/doc/draft-filsfils-spring-srv6-network-programming/).
diff --git a/src/plugins/srv6-am/am_plugin_doc.rst b/src/plugins/srv6-am/am_plugin_doc.rst
new file mode 100644
index 00000000000..576379868fd
--- /dev/null
+++ b/src/plugins/srv6-am/am_plugin_doc.rst
@@ -0,0 +1,116 @@
+.. _srv6_am_plugin_doc:
+
+SRv6 masquerading
+=================
+
+SRv6 endpoint to SR-unaware appliance via masquerading (End.AM)
+---------------------------------------------------------------
+
+The masquerading proxy is an SR endpoint behavior for processing SRv6
+traffic on behalf of an SR-unaware SF. This proxy thus receives SR
+traffic that is formed of an IPv6 header and an SRH on top of an inner
+payload. The masquerading behavior is independent from the inner payload
+type. Hence, the inner payload can be of any type but it is usually
+expected to be a transport layer packet, such as TCP or UDP.
+
+A masquerading SR proxy segment is associated with the following
+mandatory parameters:
+
+- S-ADDR: Ethernet or IPv6 address of the SF
+- IFACE-OUT: Local interface for sending traffic towards the SF
+- IFACE-IN: Local interface receiving the traffic coming back from the
+ SF
+
+A masquerading SR proxy segment is thus defined for a specific SF and
+bound to a pair of directed interfaces or sub-interfaces on the proxy.
+As opposed to the static and dynamic SR proxies, a masquerading segment
+can be present at the same time in any number of SR SC policies and the
+same interfaces can be bound to multiple masquerading proxy segments.
+The only restriction is that a masquerading proxy segment cannot be the
+last segment in an SR SC policy.
+
+The first part of the masquerading behavior is triggered when the proxy
+node receives an IPv6 packet whose Destination Address matches a
+masquerading proxy segment. The proxy inspects the IPv6 extension
+headers and substitutes the Destination Address with the last segment in
+the SRH attached to the IPv6 header, which represents the final
+destination of the IPv6 packet. The packet is then sent out towards the
+SF.
+
+The SF receives an IPv6 packet whose source and destination addresses
+are respectively the original source and final destination. It does not
+attempt to inspect the SRH, as RFC8200 specifies that routing extension
+headers are not examined or processed by transit nodes. Instead, the SF
+simply forwards the packet based on its current Destination Address. In
+this scenario, we assume that the SF can only inspect, drop or perform
+limited changes to the packets. For example, Intrusion Detection
+Systems, Deep Packet Inspectors and non-NAT Firewalls are among the SFs
+that can be supported by a masquerading SR proxy.
+
+The second part of the masquerading behavior, also called de-
+masquerading, is an inbound policy attached to the proxy interface
+receiving the traffic returning from the SF, IFACE-IN. This policy
+inspects the incoming traffic and triggers a regular SRv6 endpoint
+processing (End) on any IPv6 packet that contains an SRH. This
+processing occurs before any lookup on the packet Destination Address is
+performed and it is sufficient to restore the right active segment as
+the Destination Address of the IPv6 packet.
+
+For more information, please see
+`draft-xuclad-spring-sr-service-chaining <https://datatracker.ietf.org/doc/draft-xuclad-spring-sr-service-chaining/>`__.
+
+CLI configuration
+~~~~~~~~~~~~~~~~~
+
+The following command instantiates a new End.AM segment that sends
+masqueraded traffic on interface ``IFACE-OUT`` towards an appliance at
+address ``S-ADDR`` and restores the active segment in the IPv6 header of
+the packets coming back on interface ``IFACE-IN``.
+
+::
+
+ sr localsid address SID behavior end.am nh S-ADDR oif IFACE-OUT iif IFACE-IN
+
+For example, the below command configures the SID ``1::A1`` with an
+End.AM function for sending traffic on interface
+``GigabitEthernet0/8/0`` to the appliance at address ``A1::``, and
+receiving it back on interface ``GigabitEthernet0/9/0``.
+
+::
+
+ sr localsid address 1::A1 behavior end.am nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0
+
+Pseudocode
+~~~~~~~~~~
+
+Masquerading
+^^^^^^^^^^^^
+
+Upon receiving a packet destined for S, where S is an IPv6 masquerading
+proxy segment, a node N processes it as follows.
+
+::
+
+ IF NH=SRH & SL > 0 THEN
+ Update the IPv6 DA with SRH[0]
+ Forward the packet on IFACE-OUT
+ ELSE
+ Drop the packet
+
+De-masquerading
+^^^^^^^^^^^^^^^
+
+Upon receiving a non-link-local IPv6 packet on IFACE-IN, a node N
+processes it as follows.
+
+::
+
+ IF NH=SRH & SL > 0 THEN
+ Decrement SL
+ Update the IPv6 DA with SRH[SL] ;; Ref1
+ Lookup DA in appropriate table and proceed accordingly
+
+**Ref1:** This pseudocode can be augmented to support the Penultimate
+Segment Popping (PSP) endpoint flavor. The exact pseudocode modification
+are provided in
+`draft-filsfils-spring-srv6-network-programming <https://datatracker.ietf.org/doc/draft-filsfils-spring-srv6-network-programming/>`__.
diff --git a/src/plugins/srv6-am/node.c b/src/plugins/srv6-am/node.c
index dd71e60d5a2..beef6a30910 100644
--- a/src/plugins/srv6-am/node.c
+++ b/src/plugins/srv6-am/node.c
@@ -142,7 +142,6 @@ srv6_am_localsid_fn (vlib_main_t * vm,
{
ip6_sr_main_t *sm = &sr_main;
u32 n_left_from, next_index, *from, *to_next;
- u32 cnt_packets = 0;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -203,8 +202,6 @@ srv6_am_localsid_fn (vlib_main_t * vm,
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
-
- cnt_packets++;
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -213,7 +210,6 @@ srv6_am_localsid_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_am_localsid_node) = {
.function = srv6_am_localsid_fn,
.name = "srv6-am-localsid",
@@ -226,7 +222,6 @@ VLIB_REGISTER_NODE (srv6_am_localsid_node) = {
[SRV6_AM_LOCALSID_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/******************************* Rewriting node *******************************/
@@ -329,7 +324,6 @@ srv6_am_rewrite_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_am_rewrite_node) = {
.function = srv6_am_rewrite_fn,
.name = "srv6-am-rewrite",
@@ -344,7 +338,6 @@ VLIB_REGISTER_NODE (srv6_am_rewrite_node) = {
[SRV6_AM_REWRITE_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-as/as.c b/src/plugins/srv6-as/as.c
index d9dbd8aa608..bdf17527ae8 100644
--- a/src/plugins/srv6-as/as.c
+++ b/src/plugins/srv6-as/as.c
@@ -470,7 +470,7 @@ unformat_srv6_as_localsid (unformat_input_t * input, va_list * args)
}
/* Allocate and initialize memory block for local SID parameters */
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
clib_memset (ls_mem, 0, sizeof *ls_mem);
*plugin_mem_p = ls_mem;
@@ -557,7 +557,6 @@ srv6_as_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_as2_rewrite, static) =
{
.arc_name = "device-input",
@@ -585,7 +584,6 @@ VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Static Segment Routing for IPv6 (SRv6) Proxy",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-as/as_plugin_doc.md b/src/plugins/srv6-as/as_plugin_doc.md
deleted file mode 100644
index 7cda08b60d9..00000000000
--- a/src/plugins/srv6-as/as_plugin_doc.md
+++ /dev/null
@@ -1,152 +0,0 @@
-# SRv6 endpoint to SR-unaware appliance via static proxy (End.AS) {#srv6_as_plugin_doc}
-
-## Overview
-
-The static proxy is an SR endpoint behavior for processing SR-MPLS or SRv6
-encapsulated traffic on behalf of an SR-unaware SF. This proxy thus receives SR
-traffic that is formed of an MPLS label stack or an IPv6 header on top of an
-inner packet, which can be Ethernet, IPv4 or IPv6.
-
-A static SR proxy segment is associated with the following mandatory parameters:
-
-- INNER-TYPE: Inner packet type
-- S-ADDR: Ethernet or IP address of the SF (only for inner type IPv4 and IPv6)
-- IFACE-OUT: Local interface for sending traffic towards the SF
-- IFACE-IN: Local interface receiving the traffic coming back from the SF
-- CACHE: SR information to be attached on the traffic coming back from the SF,
-including at least
- * CACHE.SA: IPv6 source address (SRv6 only)
- * CACHE.LIST: Segment list expressed as MPLS labels or IPv6 address
-
-A static SR proxy segment is thus defined for a specific SF, inner packet type
-and cached SR information. It is also bound to a pair of directed interfaces on
-the proxy. These may be both directions of a single interface, or opposite
-directions of two different interfaces. The latter is recommended in case the SF
-is to be used as part of a bi-directional SR SC policy. If the proxy and the SF
-both support 802.1Q, IFACE-OUT and IFACE-IN can also represent sub-interfaces.
-
-The first part of this behavior is triggered when the proxy node receives a
-packet whose active segment matches a segment associated with the static proxy
-behavior. It removes the SR information from the packet then sends it on a
-specific interface towards the associated SF. This SR information corresponds to
-the full label stack for SR-MPLS or to the encapsulation IPv6 header with any
-attached extension header in the case of SRv6.
-
-The second part is an inbound policy attached to the proxy interface receiving
-the traffic returning from the SF, IFACE-IN. This policy attaches to the
-incoming traffic the cached SR information associated with the SR proxy segment.
-If the proxy segment uses the SR-MPLS data plane, CACHE contains a stack of
-labels to be pushed on top the packets. With the SRv6 data plane, CACHE is
-defined as a source address, an active segment and an optional SRH (tag,
-segments left, segment list and metadata). The proxy encapsulates the packets
-with an IPv6 header that has the source address, the active segment as
-destination address and the SRH as a routing extension header. After the SR
-information has been attached, the packets are forwarded according to the active
-segment, which is represented by the top MPLS label or the IPv6 Destination
-Address.
-
-In this scenario, there are no restrictions on the operations that can be
-performed by the SF on the stream of packets. It may operate at all protocol
-layers, terminate transport layer connections, generate new packets and initiate
-transport layer connections. This behavior may also be used to integrate an
-IPv4-only SF into an SRv6 policy. However, a static SR proxy segment can be used
-in only one service chain at a time. As opposed to most other segment types, a
-static SR proxy segment is bound to a unique list of segments, which represents
-a directed SR SC policy. This is due to the cached SR information being defined
-in the segment configuration. This limitation only prevents multiple segment
-lists from using the same static SR proxy segment at the same time, but a single
-segment list can be shared by any number of traffic flows. Besides, since the
-returning traffic from the SF is re-classified based on the incoming interface,
-an interface can be used as receiving interface (IFACE-IN) only for a single SR
-proxy segment at a time. In the case of a bi-directional SR SC policy, a
-different SR proxy segment and receiving interface are required for the return
-direction.
-
-For more information, please see
-[draft-xuclad-spring-sr-service-chaining](https://datatracker.ietf.org/doc/draft-xuclad-spring-sr-service-chaining/).
-
-## CLI configuration
-
-The following command instantiates a new End.AS segment that sends the inner
-packets on interface `IFACE-OUT` towards an appliance at address `S-ADDR` and
-restores the segment list ``<S1, S2, S3>`` with a source address `SRC-ADDR` on
-the packets coming back on interface `IFACE-IN`.
-
-```
-sr localsid address SID behavior end.ad nh S-ADDR oif IFACE-OUT iif IFACE-IN src SRC-ADDR next S1 next S2 next S3
-```
-
-For example, the below command configures the SID `1::A1` with an End.AS
-function for sending traffic on interface `GigabitEthernet0/8/0` to the
-appliance at address `A1::`, and receiving it back on interface
-`GigabitEthernet0/9/0`.
-
-```
-sr localsid address 1::A1 behavior end.ad nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0 src 1:: next 2::20 next 3::30 next 4::40
-```
-
-## Pseudocode
-
-### Static proxy for inner type IPv4
-
-Upon receiving an IPv6 packet destined for S, where S is an IPv6 static proxy
-segment for IPv4 traffic, a node N does:
-
-```
-IF ENH == 4 THEN ;; Ref1
- Remove the (outer) IPv6 header and its extension headers
- Forward the exposed packet on IFACE-OUT towards S-ADDR
-ELSE
- Drop the packet
-```
-
-**Ref1:** 4 refers to IPv4 encapsulation as defined by IANA allocation for Internet
-Protocol Numbers.
-
-Upon receiving a non link-local IPv4 packet on IFACE-IN, a node N does:
-
-```
-Decrement TTL and update checksum
-IF CACHE.SRH THEN ;; Ref2
- Push CACHE.SRH on top of the existing IPv4 header
- Set NH value of the pushed SRH to 4
-Push outer IPv6 header with SA, DA and traffic class from CACHE
-Set outer payload length and flow label
-Set NH value to 43 if an SRH was added, or 4 otherwise
-Lookup outer DA in appropriate table and proceed accordingly
-```
-
-**Ref2:** CACHE.SRH represents the SRH defined in CACHE, if any, for the static SR
-proxy segment associated with IFACE-IN.
-
-### Static proxy for inner type IPv6
-
-Upon receiving an IPv6 packet destined for S, where S is an IPv6 static proxy
-segment for IPv6 traffic, a node N does:
-
-```
-IF ENH == 41 THEN ;; Ref1
- Remove the (outer) IPv6 header and its extension headers
- Forward the exposed packet on IFACE-OUT towards S-ADDR
-ELSE
- Drop the packet
-```
-
-**Ref1:** 41 refers to IPv6 encapsulation as defined by IANA allocation for Internet
-Protocol Numbers.
-
-Upon receiving a non-link-local IPv6 packet on IFACE-IN, a node N does:
-
-```
-Decrement Hop Limit
-IF CACHE.SRH THEN ;; Ref2
- Push CACHE.SRH on top of the existing IPv6 header
- Set NH value of the pushed SRH to 41
-Push outer IPv6 header with SA, DA and traffic class from CACHE
-Set outer payload length and flow label
-Set NH value to 43 if an SRH was added, or 41 otherwise
-Lookup outer DA in appropriate table and proceed accordingly
-```
-
-**Ref2:** CACHE.SRH represents the SRH defined in CACHE, if any, for the static SR
-proxy segment associated with IFACE-IN.
diff --git a/src/plugins/srv6-as/as_plugin_doc.rst b/src/plugins/srv6-as/as_plugin_doc.rst
new file mode 100644
index 00000000000..9fa7f8fc19e
--- /dev/null
+++ b/src/plugins/srv6-as/as_plugin_doc.rst
@@ -0,0 +1,172 @@
+.. _srv6_as_plugin_doc:
+
+SRv6 static proxy
+=================
+
+The document describes SRv6 endpoint to SR-unaware appliance via static
+proxy (End.AS)
+
+Overview
+--------
+
+The static proxy is an SR endpoint behavior for processing SR-MPLS or
+SRv6 encapsulated traffic on behalf of an SR-unaware SF. This proxy thus
+receives SR traffic that is formed of an MPLS label stack or an IPv6
+header on top of an inner packet, which can be Ethernet, IPv4 or IPv6.
+
+A static SR proxy segment is associated with the following mandatory
+parameters:
+
+- INNER-TYPE: Inner packet type
+- S-ADDR: Ethernet or IP address of the SF (only for inner type IPv4
+ and IPv6)
+- IFACE-OUT: Local interface for sending traffic towards the SF
+- IFACE-IN: Local interface receiving the traffic coming back from the
+ SF
+- CACHE: SR information to be attached on the traffic coming back from
+ the SF, including at least
+
+ - CACHE.SA: IPv6 source address (SRv6 only)
+ - CACHE.LIST: Segment list expressed as MPLS labels or IPv6 address
+
+A static SR proxy segment is thus defined for a specific SF, inner
+packet type and cached SR information. It is also bound to a pair of
+directed interfaces on the proxy. These may be both directions of a
+single interface, or opposite directions of two different interfaces.
+The latter is recommended in case the SF is to be used as part of a
+bi-directional SR SC policy. If the proxy and the SF both support
+802.1Q, IFACE-OUT and IFACE-IN can also represent sub-interfaces.
+
+The first part of this behavior is triggered when the proxy node
+receives a packet whose active segment matches a segment associated with
+the static proxy behavior. It removes the SR information from the packet
+then sends it on a specific interface towards the associated SF. This SR
+information corresponds to the full label stack for SR-MPLS or to the
+encapsulation IPv6 header with any attached extension header in the case
+of SRv6.
+
+The second part is an inbound policy attached to the proxy interface
+receiving the traffic returning from the SF, IFACE-IN. This policy
+attaches to the incoming traffic the cached SR information associated
+with the SR proxy segment. If the proxy segment uses the SR-MPLS data
+plane, CACHE contains a stack of labels to be pushed on top the packets.
+With the SRv6 data plane, CACHE is defined as a source address, an
+active segment and an optional SRH (tag, segments left, segment list and
+metadata). The proxy encapsulates the packets with an IPv6 header that
+has the source address, the active segment as destination address and
+the SRH as a routing extension header. After the SR information has been
+attached, the packets are forwarded according to the active segment,
+which is represented by the top MPLS label or the IPv6 Destination
+Address.
+
+In this scenario, there are no restrictions on the operations that can
+be performed by the SF on the stream of packets. It may operate at all
+protocol layers, terminate transport layer connections, generate new
+packets and initiate transport layer connections. This behavior may also
+be used to integrate an IPv4-only SF into an SRv6 policy. However, a
+static SR proxy segment can be used in only one service chain at a time.
+As opposed to most other segment types, a static SR proxy segment is
+bound to a unique list of segments, which represents a directed SR SC
+policy. This is due to the cached SR information being defined in the
+segment configuration. This limitation only prevents multiple segment
+lists from using the same static SR proxy segment at the same time, but
+a single segment list can be shared by any number of traffic flows.
+Besides, since the returning traffic from the SF is re-classified based
+on the incoming interface, an interface can be used as receiving
+interface (IFACE-IN) only for a single SR proxy segment at a time. In
+the case of a bi-directional SR SC policy, a different SR proxy segment
+and receiving interface are required for the return direction.
+
+For more information, please see
+`draft-xuclad-spring-sr-service-chaining <https://datatracker.ietf.org/doc/draft-xuclad-spring-sr-service-chaining/>`__.
+
+CLI configuration
+-----------------
+
+The following command instantiates a new End.AS segment that sends the
+inner packets on interface ``IFACE-OUT`` towards an appliance at address
+``S-ADDR`` and restores the segment list ``<S1, S2, S3>`` with a source
+address ``SRC-ADDR`` on the packets coming back on interface
+``IFACE-IN``.
+
+::
+
+ sr localsid address SID behavior end.as nh S-ADDR oif IFACE-OUT iif IFACE-IN src SRC-ADDR next S1 next S2 next S3
+
+For example, the below command configures the SID ``1::A1`` with an
+End.AS function for sending traffic on interface
+``GigabitEthernet0/8/0`` to the appliance at address ``A1::``, and
+receiving it back on interface ``GigabitEthernet0/9/0``.
+
+::
+
+ sr localsid address 1::A1 behavior end.as nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0 src 1:: next 2::20 next 3::30 next 4::40
+
+Pseudocode
+----------
+
+Static proxy for inner type IPv4
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Upon receiving an IPv6 packet destined for S, where S is an IPv6 static
+proxy segment for IPv4 traffic, a node N does:
+
+::
+
+ IF ENH == 4 THEN ;; Ref1
+ Remove the (outer) IPv6 header and its extension headers
+ Forward the exposed packet on IFACE-OUT towards S-ADDR
+ ELSE
+ Drop the packet
+
+**Ref1:** 4 refers to IPv4 encapsulation as defined by IANA allocation
+for Internet Protocol Numbers.
+
+Upon receiving a non link-local IPv4 packet on IFACE-IN, a node N does:
+
+::
+
+ Decrement TTL and update checksum
+ IF CACHE.SRH THEN ;; Ref2
+ Push CACHE.SRH on top of the existing IPv4 header
+ Set NH value of the pushed SRH to 4
+ Push outer IPv6 header with SA, DA and traffic class from CACHE
+ Set outer payload length and flow label
+ Set NH value to 43 if an SRH was added, or 4 otherwise
+ Lookup outer DA in appropriate table and proceed accordingly
+
+**Ref2:** CACHE.SRH represents the SRH defined in CACHE, if any, for the
+static SR proxy segment associated with IFACE-IN.
+
+Static proxy for inner type IPv6
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Upon receiving an IPv6 packet destined for S, where S is an IPv6 static
+proxy segment for IPv6 traffic, a node N does:
+
+::
+
+ IF ENH == 41 THEN ;; Ref1
+ Remove the (outer) IPv6 header and its extension headers
+ Forward the exposed packet on IFACE-OUT towards S-ADDR
+ ELSE
+ Drop the packet
+
+**Ref1:** 41 refers to IPv6 encapsulation as defined by IANA allocation
+for Internet Protocol Numbers.
+
+Upon receiving a non-link-local IPv6 packet on IFACE-IN, a node N does:
+
+::
+
+ Decrement Hop Limit
+ IF CACHE.SRH THEN ;; Ref2
+ Push CACHE.SRH on top of the existing IPv6 header
+ Set NH value of the pushed SRH to 41
+ Push outer IPv6 header with SA, DA and traffic class from CACHE
+ Set outer payload length and flow label
+ Set NH value to 43 if an SRH was added, or 41 otherwise
+ Lookup outer DA in appropriate table and proceed accordingly
+
+**Ref2:** CACHE.SRH represents the SRH defined in CACHE, if any, for the
+static SR proxy segment associated with IFACE-IN.
diff --git a/src/plugins/srv6-as/node.c b/src/plugins/srv6-as/node.c
index 9e84a9848e5..e81881982af 100644
--- a/src/plugins/srv6-as/node.c
+++ b/src/plugins/srv6-as/node.c
@@ -169,7 +169,6 @@ srv6_as_localsid_fn (vlib_main_t * vm,
{
ip6_sr_main_t *sm = &sr_main;
u32 n_left_from, next_index, *from, *to_next;
- u32 cnt_packets = 0;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -227,8 +226,6 @@ srv6_as_localsid_fn (vlib_main_t * vm,
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
-
- cnt_packets++;
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -237,7 +234,6 @@ srv6_as_localsid_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_as_localsid_node) = {
.function = srv6_as_localsid_fn,
.name = "srv6-as-localsid",
@@ -252,7 +248,6 @@ VLIB_REGISTER_NODE (srv6_as_localsid_node) = {
[SRV6_AS_LOCALSID_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/******************************* Rewriting node *******************************/
@@ -373,7 +368,6 @@ srv6_as2_rewrite_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_as2_rewrite_node) = {
.function = srv6_as2_rewrite_fn,
.name = "srv6-as2-rewrite",
@@ -388,7 +382,6 @@ VLIB_REGISTER_NODE (srv6_as2_rewrite_node) = {
[SRV6_AS_REWRITE_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/**
@@ -517,7 +510,6 @@ srv6_as4_rewrite_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_as4_rewrite_node) = {
.function = srv6_as4_rewrite_fn,
.name = "srv6-as4-rewrite",
@@ -532,7 +524,6 @@ VLIB_REGISTER_NODE (srv6_as4_rewrite_node) = {
[SRV6_AS_REWRITE_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/**
@@ -656,7 +647,6 @@ srv6_as6_rewrite_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_as6_rewrite_node) = {
.function = srv6_as6_rewrite_fn,
.name = "srv6-as6-rewrite",
@@ -671,7 +661,6 @@ VLIB_REGISTER_NODE (srv6_as6_rewrite_node) = {
[SRV6_AS_REWRITE_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/CMakeLists.txt b/src/plugins/srv6-mobile/CMakeLists.txt
index 5a9945c2e4f..a917c8ded82 100644
--- a/src/plugins/srv6-mobile/CMakeLists.txt
+++ b/src/plugins/srv6-mobile/CMakeLists.txt
@@ -21,6 +21,11 @@ add_vpp_plugin(srv6mobile
gtp6_d_di.c
gtp6_dt.c
node.c
+ sr_mobile_api.c
+
+ API_FILES
+ sr_mobile.api
+ sr_mobile_types.api
INSTALL_HEADERS
mobile.h
diff --git a/src/plugins/srv6-mobile/FEATURE.yaml b/src/plugins/srv6-mobile/FEATURE.yaml
index 3289b890506..45fb7da0201 100644
--- a/src/plugins/srv6-mobile/FEATURE.yaml
+++ b/src/plugins/srv6-mobile/FEATURE.yaml
@@ -1,5 +1,5 @@
---
-name: SRv6 Mobuile
+name: SRv6 Mobile
maintainer: Tetsuya Murakami <tetsuya.mrk@gmail.com>
features:
- GTP4.D
diff --git a/src/plugins/srv6-mobile/extra/Dockerfile.j2 b/src/plugins/srv6-mobile/extra/Dockerfile.j2
index 8e42af09d56..e8120bb736b 100644
--- a/src/plugins/srv6-mobile/extra/Dockerfile.j2
+++ b/src/plugins/srv6-mobile/extra/Dockerfile.j2
@@ -12,6 +12,7 @@ RUN set -eux; \
net-tools \
iproute2 \
tcpdump \
+ python3-cffi \
asciidoc \
xmlto \
libssl-dev \
diff --git a/src/plugins/srv6-mobile/extra/Dockerfile.j2.release b/src/plugins/srv6-mobile/extra/Dockerfile.j2.release
index aec520bfb85..7507f50403b 100644
--- a/src/plugins/srv6-mobile/extra/Dockerfile.j2.release
+++ b/src/plugins/srv6-mobile/extra/Dockerfile.j2.release
@@ -15,6 +15,7 @@ RUN set -eux; \
net-tools \
iproute2 \
tcpdump \
+ python3-cffi \
python2.7 \
libssl-dev \
netcat; \
diff --git a/src/plugins/srv6-mobile/extra/runner.py b/src/plugins/srv6-mobile/extra/runner.py
index b18fdc32766..31201935120 100755
--- a/src/plugins/srv6-mobile/extra/runner.py
+++ b/src/plugins/srv6-mobile/extra/runner.py
@@ -1,10 +1,8 @@
#!/usr/bin/env python3
-from os.path import dirname, realpath, split,\
- join, isdir, exists
+from os.path import dirname, realpath, split, join, isdir, exists
from os import remove, system, mkdir
-from logging import getLogger, basicConfig,\
- DEBUG, INFO, ERROR
+from logging import getLogger, basicConfig, DEBUG, INFO, ERROR
from argparse import ArgumentParser
from atexit import register
from shutil import rmtree
@@ -18,10 +16,7 @@ from scapy.contrib.gtp import *
from scapy.all import *
-verbose_levels = {
- 'error': ERROR,
- 'debug': DEBUG,
- 'info': INFO}
+verbose_levels = {"error": ERROR, "debug": DEBUG, "info": INFO}
class ContainerStartupError(Exception):
@@ -29,7 +24,6 @@ class ContainerStartupError(Exception):
class Container(object):
-
tmp = "/tmp"
cmd = "vppctl -s 0:5002"
cmd_bash = "/bin/bash"
@@ -74,7 +68,6 @@ class Container(object):
@classmethod
def new(cls, client, image, name):
-
temp = join(cls.tmp, name)
if isdir(temp):
rmtree(temp)
@@ -87,10 +80,8 @@ class Container(object):
image=image,
name=name,
privileged=True,
- volumes={
- temp: {
- 'bind': '/mnt',
- 'mode': 'rw'}})
+ volumes={temp: {"bind": "/mnt", "mode": "rw"}},
+ )
obj = cls.get(client, name)
if not obj:
@@ -119,7 +110,7 @@ class Container(object):
def vppctl_exec(self, cmd):
ec, resp = self._ref.exec_run(cmd="{} {}".format(self.cmd, cmd))
- assert(ec == 0)
+ assert ec == 0
return resp
def setup_host_interface(self, name, ip):
@@ -134,8 +125,7 @@ class Container(object):
self.vppctl_exec("create packet-generator interface pg0")
self.vppctl_exec("set int mac address pg0 {}".format(local_mac))
self.vppctl_exec("set int ip addr pg0 {}".format(local_ip))
- self.vppctl_exec(
- "set ip neighbor pg0 {} {}".format(remote_ip, remote_mac))
+ self.vppctl_exec("set ip neighbor pg0 {} {}".format(remote_ip, remote_mac))
self.vppctl_exec("set int state pg0 up")
def pg_create_interface4(self, local_ip, remote_ip, local_mac, remote_mac):
@@ -158,24 +148,32 @@ class Container(object):
self.vppctl_exec("set ip neighbor pg0 {} {}".format(remote_ip, remote_mac))
self.vppctl_exec("set int state pg0 up")
- def pg_create_interface4_name(self, ifname, local_ip, remote_ip, local_mac, remote_mac):
+ def pg_create_interface4_name(
+ self, ifname, local_ip, remote_ip, local_mac, remote_mac
+ ):
# remote_ip can't have subnet mask
time.sleep(2)
self.vppctl_exec("create packet-generator interface {}".format(ifname))
self.vppctl_exec("set int mac address {} {}".format(ifname, local_mac))
self.vppctl_exec("set int ip addr {} {}".format(ifname, local_ip))
- self.vppctl_exec("set ip neighbor {} {} {}".format(ifname, remote_ip, remote_mac))
+ self.vppctl_exec(
+ "set ip neighbor {} {} {}".format(ifname, remote_ip, remote_mac)
+ )
self.vppctl_exec("set int state {} up".format(ifname))
- def pg_create_interface6_name(self, ifname, local_ip, remote_ip, local_mac, remote_mac):
+ def pg_create_interface6_name(
+ self, ifname, local_ip, remote_ip, local_mac, remote_mac
+ ):
# remote_ip can't have subnet mask
time.sleep(2)
self.vppctl_exec("create packet-generator interface {}".format(ifname))
self.vppctl_exec("set int mac address {} {}".format(ifname, local_mac))
self.vppctl_exec("set int ip addr {} {}".format(ifname, local_ip))
- self.vppctl_exec("set ip neighbor {} {} {}".format(ifname, remote_ip, remote_mac))
+ self.vppctl_exec(
+ "set ip neighbor {} {} {}".format(ifname, remote_ip, remote_mac)
+ )
self.vppctl_exec("set int state {} up".format(ifname))
def pg_enable(self):
@@ -186,98 +184,106 @@ class Container(object):
wrpcap(self.pg_input_file, stream)
self.vppctl_exec(
"packet-generator new name pg-stream "
- "node ethernet-input pcap {}".format(
- self.pg_input_file_in))
+ "node ethernet-input pcap {}".format(self.pg_input_file_in)
+ )
def pg_start_capture(self):
if exists(self.pg_output_file):
remove(self.pg_output_file)
self.vppctl_exec(
- "packet-generator capture pg0 pcap {}".format(
- self.pg_output_file_in))
+ "packet-generator capture pg0 pcap {}".format(self.pg_output_file_in)
+ )
def pg_start_capture_name(self, ifname):
if exists(self.pg_output_file):
remove(self.pg_output_file)
self.vppctl_exec(
- "packet-generator capture {} pcap {}".format(
- ifname, self.pg_output_file_in))
+ "packet-generator capture {} pcap {}".format(ifname, self.pg_output_file_in)
+ )
def pg_read_packets(self):
return rdpcap(self.pg_output_file)
def set_ipv6_route(self, out_if_name, next_hop_ip, subnet):
self.vppctl_exec(
- "ip route add {} via host-{} {}".format(
- subnet, out_if_name, next_hop_ip))
+ "ip route add {} via host-{} {}".format(subnet, out_if_name, next_hop_ip)
+ )
def set_ipv6_route2(self, out_if_name, next_hop_ip, subnet):
self.vppctl_exec(
- "ip route add {} via {} {}".format(
- subnet, out_if_name, next_hop_ip))
+ "ip route add {} via {} {}".format(subnet, out_if_name, next_hop_ip)
+ )
def set_ip_pgroute(self, out_if_name, next_hop_ip, subnet):
- self.vppctl_exec("ip route add {} via {} {}".format(
- subnet, out_if_name, next_hop_ip))
+ self.vppctl_exec(
+ "ip route add {} via {} {}".format(subnet, out_if_name, next_hop_ip)
+ )
def set_ipv6_pgroute(self, out_if_name, next_hop_ip, subnet):
- self.vppctl_exec("ip route add {} via {} {}".format(
- subnet, out_if_name, next_hop_ip))
+ self.vppctl_exec(
+ "ip route add {} via {} {}".format(subnet, out_if_name, next_hop_ip)
+ )
def set_ipv6_default_route(self, out_if_name, next_hop_ip):
self.vppctl_exec(
- "ip route add ::/0 via host-{} {}".format(
- out_if_name, next_hop_ip))
+ "ip route add ::/0 via host-{} {}".format(out_if_name, next_hop_ip)
+ )
def enable_trace(self, count):
self.vppctl_exec("trace add af-packet-input {}".format(count))
class Containers(object):
-
def __init__(self, client, image):
self.client = client
self.image = image
def tmp_render(self, path, template, kwargs):
-
with open(path, "w") as fo:
fo.write(template.render(**kwargs))
register(lambda: remove(path))
def build(self, path, vpp_path):
- env = Environment(loader=FileSystemLoader(path),
- autoescape=True,
- trim_blocks=True)
-
- self.tmp_render(join(vpp_path, "Dockerfile"),
- env.get_template("Dockerfile.j2"),
- {'vpp_path': vpp_path})
-
- self.tmp_render(join(vpp_path, "startup.conf"),
- env.get_template("startup.conf.j2"),
- {'vpp_path': vpp_path})
-
- ref, _ = self.client.images.build(path=vpp_path,
- tag=self.image, rm=True)
+ env = Environment(
+ loader=FileSystemLoader(path), autoescape=True, trim_blocks=True
+ )
+
+ self.tmp_render(
+ join(vpp_path, "Dockerfile"),
+ env.get_template("Dockerfile.j2"),
+ {"vpp_path": vpp_path},
+ )
+
+ self.tmp_render(
+ join(vpp_path, "startup.conf"),
+ env.get_template("startup.conf.j2"),
+ {"vpp_path": vpp_path},
+ )
+
+ ref, _ = self.client.images.build(path=vpp_path, tag=self.image, rm=True)
return ref
def release(self, path, vpp_path):
- env = Environment(loader=FileSystemLoader(path),
- autoescape=True,
- trim_blocks=True)
-
- self.tmp_render(join(vpp_path, "Dockerfile"),
- env.get_template("Dockerfile.j2.release"),
- {'vpp_path': vpp_path})
-
- self.tmp_render(join(vpp_path, "startup.conf"),
- env.get_template("startup.conf.j2"),
- {'vpp_path': vpp_path})
-
- ref, _ = self.client.images.build(path=vpp_path,
- tag="srv6m-release-image", rm=True)
+ env = Environment(
+ loader=FileSystemLoader(path), autoescape=True, trim_blocks=True
+ )
+
+ self.tmp_render(
+ join(vpp_path, "Dockerfile"),
+ env.get_template("Dockerfile.j2.release"),
+ {"vpp_path": vpp_path},
+ )
+
+ self.tmp_render(
+ join(vpp_path, "startup.conf"),
+ env.get_template("startup.conf.j2"),
+ {"vpp_path": vpp_path},
+ )
+
+ ref, _ = self.client.images.build(
+ path=vpp_path, tag="srv6m-release-image", rm=True
+ )
return ref
def new(self, name):
@@ -299,7 +305,6 @@ class Containers(object):
class Network(object):
-
def __init__(self, ref, name):
self._name = name
self._ref = ref
@@ -310,8 +315,7 @@ class Network(object):
@classmethod
def new(cls, client, name):
- ref = client.networks.create(name, driver="bridge",
- check_duplicate=True)
+ ref = client.networks.create(name, driver="bridge", check_duplicate=True)
return cls(ref, name)
@classmethod
@@ -331,7 +335,6 @@ class Network(object):
class Networks(object):
-
def __init__(self, client):
self.client = client
@@ -343,7 +346,6 @@ class Networks(object):
class Program(object):
-
image = "srv6m-image"
name_prefix = "hck"
@@ -352,14 +354,9 @@ class Program(object):
# for exmaple what the vpp is supposed to be
# in our topoloty overview
- instance_names = ["vpp-1",
- "vpp-2",
- "vpp-3",
- "vpp-4"]
+ instance_names = ["vpp-1", "vpp-2", "vpp-3", "vpp-4"]
- network_names = ["net-1",
- "net-2",
- "net-3"]
+ network_names = ["net-1", "net-2", "net-3"]
def __init__(self, image=None, prefix=None):
self.path = dirname(realpath(__file__))
@@ -385,7 +382,6 @@ class Program(object):
return "{}-{}".format(self.name_prefix, name)
def stop_containers(self):
-
for name in self.instance_names:
instance = self.containers.get(self.get_name(name))
if instance:
@@ -397,7 +393,6 @@ class Program(object):
network.rem()
def start_containers(self):
-
self.stop_containers()
networks = list()
@@ -469,15 +464,20 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="B::2") / ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="B::2")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -516,16 +516,17 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr D1::")
- c1.vppctl_exec(
- "sr policy add bsid D1::999:1 next D2:: next D3:: next D4::")
+ c1.vppctl_exec("sr policy add bsid D1::999:1 next D2:: next D3:: next D4::")
c1.vppctl_exec("sr steer l3 B::/120 via bsid D1::999:1")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -539,8 +540,11 @@ class Program(object):
c3.set_ipv6_route("eth2", "A3::2", "D4::/128")
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="B::2") / ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="B::2")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -561,7 +565,7 @@ class Program(object):
for p in c4.pg_read_packets():
p.show2()
- ''' T.Map is obsolete
+ """ T.Map is obsolete
def test_tmap(self):
# TESTS:
# trace add af-packet-input 10
@@ -844,7 +848,7 @@ class Program(object):
print("Receiving packet on {}:".format(c4.name))
for p in c4.pg_read_packets():
p.show2()
- '''
+ """
def test_gtp4(self):
# TESTS:
@@ -863,16 +867,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -880,8 +888,8 @@ class Program(object):
c3.vppctl_exec("sr localsid address D3:: behavior end")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D3::/128")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -889,17 +897,19 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IP(src="172.99.0.1", dst="172.99.0.2") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IP(src="172.99.0.1", dst="172.99.0.2")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
- time.sleep(10)
+ time.sleep(10)
c1.enable_trace(10)
c4.enable_trace(10)
@@ -933,16 +943,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:1111:aaaa:bbbb::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid prefix D2:1111:aaaa::/48 behavior end usid 16")
@@ -950,8 +964,8 @@ class Program(object):
c3.vppctl_exec("sr localsid prefix D2:1111:bbbb::/48 behavior end usid 16")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D2:1111:bbbb::/48")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -959,17 +973,19 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IP(src="172.99.0.1", dst="172.99.0.2") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IP(src="172.99.0.1", dst="172.99.0.2")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
- time.sleep(10)
+ time.sleep(10)
c1.enable_trace(10)
c4.enable_trace(10)
@@ -1003,16 +1019,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1020,8 +1040,8 @@ class Program(object):
c3.vppctl_exec("sr localsid address D3:: behavior end")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D3::/128")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -1029,13 +1049,15 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- GTPPDUSessionContainer(type=1, R=1, QFI=3) /
- IP(src="172.99.0.1", dst="172.99.0.2") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / GTPPDUSessionContainer(type=1, R=1, QFI=3)
+ / IP(src="172.99.0.1", dst="172.99.0.2")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1073,16 +1095,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1090,8 +1116,8 @@ class Program(object):
c3.vppctl_exec("sr localsid address D3:: behavior end")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D3::/128")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -1099,10 +1125,12 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="echo_request", S=1, teid=200, seq=200))
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="echo_request", S=1, teid=200, seq=200)
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1140,16 +1168,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1157,8 +1189,8 @@ class Program(object):
c3.vppctl_exec("sr localsid address D3:: behavior end")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D3::/128")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -1166,10 +1198,12 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="echo_response", S=1, teid=200, seq=200))
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="echo_response", S=1, teid=200, seq=200)
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1207,16 +1241,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1224,8 +1262,8 @@ class Program(object):
c3.vppctl_exec("sr localsid address D3:: behavior end")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D3::/128")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -1233,12 +1271,15 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="error_indication", S=1, teid=200, seq=200)/
- IE_TEIDI(TEIDI=65535)/IE_GSNAddress(address="1.1.1.1")/
- IE_PrivateExtension(extention_value="z"))
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="error_indication", S=1, teid=200, seq=200)
+ / IE_TEIDI(TEIDI=65535)
+ / IE_GSNAddress(address="1.1.1.1")
+ / IE_PrivateExtension(extention_value="z")
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1276,16 +1317,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1293,8 +1338,8 @@ class Program(object):
c3.vppctl_exec("sr localsid address D3:: behavior end")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D3::/128")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -1302,12 +1347,14 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IPv6(src="2001::1", dst="2002::1") /
- ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IPv6(src="2001::1", dst="2002::1")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1345,16 +1392,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1362,8 +1413,8 @@ class Program(object):
c3.vppctl_exec("sr localsid address D3:: behavior end")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D3::/128")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -1371,13 +1422,15 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- GTPPDUSessionContainer(R=1, QFI=3) /
- IPv6(src="2001::1", dst="2002::1") /
- ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / GTPPDUSessionContainer(R=1, QFI=3)
+ / IPv6(src="2001::1", dst="2002::1")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1419,18 +1472,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1447,12 +1501,14 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IP(src="172.99.0.1", dst="172.99.0.2") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IP(src="172.99.0.1", dst="172.99.0.2")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1494,18 +1550,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1522,13 +1579,15 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- GTPPDUSessionContainer(type=1, R=1, QFI=3) /
- IP(src="172.99.0.1", dst="172.99.0.2") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / GTPPDUSessionContainer(type=1, R=1, QFI=3)
+ / IP(src="172.99.0.1", dst="172.99.0.2")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1570,18 +1629,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1598,10 +1658,12 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="echo_request", S=1, teid=200, seq=300))
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="echo_request", S=1, teid=200, seq=300)
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1643,18 +1705,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1671,10 +1734,12 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="echo_response", S=1, teid=200, seq=300))
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="echo_response", S=1, teid=200, seq=300)
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1716,18 +1781,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1744,12 +1810,15 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="error_indication", S=1, teid=200, seq=300)/
- IE_TEIDI(TEIDI=65535)/IE_GSNAddress(address="1.1.1.1")/
- IE_PrivateExtension(extention_value="z"))
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="error_indication", S=1, teid=200, seq=300)
+ / IE_TEIDI(TEIDI=65535)
+ / IE_GSNAddress(address="1.1.1.1")
+ / IE_PrivateExtension(extention_value="z")
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1791,18 +1860,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1819,12 +1889,14 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IPv6(src="2001::1", dst="2002::1") /
- ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IPv6(src="2001::1", dst="2002::1")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1866,18 +1938,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1894,13 +1967,15 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- GTPPDUSessionContainer(R=1, QFI=3) /
- IPv6(src="2001::1", dst="2002::1") /
- ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / GTPPDUSessionContainer(R=1, QFI=3)
+ / IPv6(src="2001::1", dst="2002::1")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1942,18 +2017,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1971,12 +2047,14 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IP(src="172.100.0.1", dst="172.200.0.1") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IP(src="172.100.0.1", dst="172.200.0.1")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -2018,18 +2096,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -2047,13 +2126,15 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- GTPPDUSessionContainer(R=1, QFI=3) /
- IP(src="172.100.0.1", dst="172.200.0.1") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / GTPPDUSessionContainer(R=1, QFI=3)
+ / IP(src="172.100.0.1", dst="172.200.0.1")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -2095,18 +2176,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -2124,12 +2206,14 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IPv6(src="2001::1", dst="2002::1") /
- ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IPv6(src="2001::1", dst="2002::1")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -2171,18 +2255,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -2200,13 +2285,15 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- GTPPDUSessionContainer(R=1, QFI=3) /
- IPv6(src="2001::1", dst="2002::1") /
- ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / GTPPDUSessionContainer(R=1, QFI=3)
+ / IPv6(src="2001::1", dst="2002::1")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -2246,19 +2333,22 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c1.pg_create_interface4_name(
ifname="pg1",
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.dt46 fib-table 0 local-fib-table 0")
+ "sr localsid prefix D::/64 behavior end.m.gtp6.dt46 fib-table 0 local-fib-table 0"
+ )
c1.vppctl_exec("set ip neighbor pg1 1.0.0.1 aa:bb:cc:dd:ee:22")
c1.set_ip_pgroute("pg1", "1.0.0.1", "172.200.0.1/32")
@@ -2266,12 +2356,14 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IP(src="172.100.0.1", dst="172.200.0.1") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IP(src="172.100.0.1", dst="172.200.0.1")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -2310,14 +2402,16 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c1.pg_create_interface4_name(
ifname="pg1",
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.dt4 fib-table 0")
@@ -2329,12 +2423,14 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IP(src="172.100.0.1", dst="172.200.0.1") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IP(src="172.100.0.1", dst="172.200.0.1")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -2355,22 +2451,25 @@ class Program(object):
p.show2()
def status_containers(self):
-
print("Instances:")
for i, name in enumerate(self.instance_names):
name = self.get_name(name)
- print("\t[{}] {} - {}".format(
- i, name,
- "running" if self.containers.get(name) else "missing"))
+ print(
+ "\t[{}] {} - {}".format(
+ i, name, "running" if self.containers.get(name) else "missing"
+ )
+ )
print("Networks:")
for i, name in enumerate(self.network_names):
name = self.get_name(name)
- print("\t[{}] {} - {}".format(
- i, name,
- "running" if self.networks.get(name) else "missing"))
+ print(
+ "\t[{}] {} - {}".format(
+ i, name, "running" if self.networks.get(name) else "missing"
+ )
+ )
def build_image(self):
print("VPP Path (build): {}".format(self.vpp_path))
@@ -2382,7 +2481,9 @@ class Program(object):
system(
"docker cp release-build:{}/vpp-package.tgz {}/".format(
- self.vpp_path, self.vpp_path))
+ self.vpp_path, self.vpp_path
+ )
+ )
instance.rem()
@@ -2408,39 +2509,30 @@ class Program(object):
def get_args():
parser = ArgumentParser()
- parser.add_argument("--verbose", choices=['error', 'debug', 'info'])
+ parser.add_argument("--verbose", choices=["error", "debug", "info"])
- parser.add_argument('--image', choices=['debug', 'release'])
+ parser.add_argument("--image", choices=["debug", "release"])
subparsers = parser.add_subparsers()
- p1 = subparsers.add_parser(
- "infra", help="Infrastructure related commands.")
+ p1 = subparsers.add_parser("infra", help="Infrastructure related commands.")
p1.add_argument(
- "op",
- choices=[
- 'stop',
- 'start',
- 'status',
- 'restart',
- 'build',
- 'release'])
+ "op", choices=["stop", "start", "status", "restart", "build", "release"]
+ )
p1.add_argument("--prefix")
p1.add_argument("--image")
p2 = subparsers.add_parser("cmd", help="Instance related commands.")
- p2.add_argument("op", choices=['vppctl', 'bash'])
+ p2.add_argument("op", choices=["vppctl", "bash"])
p2.add_argument(
- "index",
- type=int,
- help="Container instance index. (./runner.py infra status)")
+ "index", type=int, help="Container instance index. (./runner.py infra status)"
+ )
- p2.add_argument(
- "--command", help="Only vppctl supports this optional argument.")
+ p2.add_argument("--command", help="Only vppctl supports this optional argument.")
p3 = subparsers.add_parser("test", help="Test related commands.")
@@ -2473,7 +2565,9 @@ def get_args():
"gtp6_ipv6",
"gtp6_ipv6_5g",
"gtp6_dt",
- "gtp4_dt"])
+ "gtp4_dt",
+ ],
+ )
args = parser.parse_args()
if not hasattr(args, "op") or not args.op:
@@ -2483,15 +2577,13 @@ def get_args():
return vars(args)
-def main(op=None, prefix=None, verbose=None,
- image=None, index=None, command=None):
-
+def main(op=None, prefix=None, verbose=None, image=None, index=None, command=None):
if verbose:
basicConfig(level=verbose_levels[verbose])
- if image == 'release':
+ if image == "release":
image = "srv6m-release-image"
- elif image == 'debug':
+ elif image == "debug":
image = "srv6m-image"
else:
image = "srv6m-image"
@@ -2501,23 +2593,23 @@ def main(op=None, prefix=None, verbose=None,
program = Program(image, prefix)
try:
- if op == 'build':
+ if op == "build":
program.build_image()
- elif op == 'release':
+ elif op == "release":
program.release_image()
- elif op == 'stop':
+ elif op == "stop":
program.stop_containers()
- elif op == 'start':
+ elif op == "start":
program.start_containers()
- elif op == 'status':
+ elif op == "status":
program.status_containers()
- elif op == 'vppctl':
+ elif op == "vppctl":
program.vppctl(index, command)
- elif op == 'bash':
+ elif op == "bash":
program.bash(index)
- elif op == 'ping':
+ elif op == "ping":
program.test_ping()
- elif op == 'srv6':
+ elif op == "srv6":
program.test_srv6()
# elif op == 'tmap':
# program.test_tmap()
@@ -2527,47 +2619,47 @@ def main(op=None, prefix=None, verbose=None,
# program.test_tmap_ipv6()
# elif op == 'tmap_ipv6_5g':
# program.test_tmap_ipv6_5g()
- elif op == 'gtp4':
+ elif op == "gtp4":
program.test_gtp4()
- elif op == 'gtp4_usid':
+ elif op == "gtp4_usid":
program.test_gtp4_usid()
- elif op == 'gtp4_5g':
+ elif op == "gtp4_5g":
program.test_gtp4_5g()
- elif op == 'gtp4_echo':
+ elif op == "gtp4_echo":
program.test_gtp4_echo()
- elif op == 'gtp4_reply':
+ elif op == "gtp4_reply":
program.test_gtp4_reply()
- elif op == 'gtp4_error':
+ elif op == "gtp4_error":
program.test_gtp4_error()
- elif op == 'gtp4_ipv6':
+ elif op == "gtp4_ipv6":
program.test_gtp4_ipv6()
- elif op == 'gtp4_ipv6_5g':
+ elif op == "gtp4_ipv6_5g":
program.test_gtp4_ipv6_5g()
- elif op == 'gtp6_drop_in':
+ elif op == "gtp6_drop_in":
program.test_gtp6_drop_in()
- elif op == 'gtp6_drop_in_5g':
+ elif op == "gtp6_drop_in_5g":
program.test_gtp6_drop_in_5g()
- elif op == 'gtp6_drop_in_echo':
+ elif op == "gtp6_drop_in_echo":
program.test_gtp6_drop_in_echo()
- elif op == 'gtp6_drop_in_reply':
+ elif op == "gtp6_drop_in_reply":
program.test_gtp6_drop_in_reply()
- elif op == 'gtp6_drop_in_error':
+ elif op == "gtp6_drop_in_error":
program.test_gtp6_drop_in_error()
- elif op == 'gtp6_drop_in_ipv6':
+ elif op == "gtp6_drop_in_ipv6":
program.test_gtp6_drop_in_ipv6()
- elif op == 'gtp6_drop_in_ipv6_5g':
+ elif op == "gtp6_drop_in_ipv6_5g":
program.test_gtp6_drop_in_ipv6_5g()
- elif op == 'gtp6':
+ elif op == "gtp6":
program.test_gtp6()
- elif op == 'gtp6_5g':
+ elif op == "gtp6_5g":
program.test_gtp6_5g()
- elif op == 'gtp6_ipv6':
+ elif op == "gtp6_ipv6":
program.test_gtp6_ipv6()
- elif op == 'gtp6_ipv6_5g':
+ elif op == "gtp6_ipv6_5g":
program.test_gtp6_ipv6_5g()
- elif op == 'gtp6_dt':
+ elif op == "gtp6_dt":
program.test_gtp6_dt()
- elif op == 'gtp4_dt':
+ elif op == "gtp4_dt":
program.test_gtp4_dt()
except Exception:
diff --git a/src/plugins/srv6-mobile/extra/runner_doc.md b/src/plugins/srv6-mobile/extra/runner_doc.md
deleted file mode 100644
index 64f06d77299..00000000000
--- a/src/plugins/srv6-mobile/extra/runner_doc.md
+++ /dev/null
@@ -1,105 +0,0 @@
-# What's `runner.py` doing? {#srv6_mobile_runner_doc}
-
-## Common configurations
-
-### VPP1
-```
-create host-interface name eth1
-set int ip addr host-eth1 A1::1/120
-set int state host-eth1 up
-ip route add ::/0 via host-eth1 A1::2
-```
-
-
-### VPP2
-
-```
-create host-interface name eth1
-set int ip addr host-eth1 A1::2/120
-create host-interface name eth2
-set int ip addr host-eth2 A2::1/120
-set int state host-eth1 up
-set int state host-eth2 up
-ip route add ::/0 via host-eth2 A2::2
-```
-
-
-### VPP3
-
-```
-create host-interface name eth1
-set int ip addr host-eth1 A2::2/120
-create host-interface name eth2
-set int ip addr host-eth2 A3::1/120
-set int state host-eth1 up
-set int state host-eth2 up
-ip route add ::/0 via host-eth1 A2::1
-```
-
-### VPP4
-
-```
-create host-interface name eth1
-set int ip addr host-eth1 A3::2/120
-set int state host-eth1 up
-ip route add ::/0 via host-eth1 A3::1
-```
-
-
-## Drop-in for GTP-U over IPv4
-
-Drop-in mode is handy to test both GTP-U-to-SRv6 and SRv6-to-GTP-U functions at same time. Let's see what's happened when you run `test gtp4`:
-
- $ ./runner.py test gtp4
-
-
-Setting up a virtual interface of packet generator:
-
-#### VPP1
-
-```
-create packet-generator interface pg0
-set int mac address pg0 aa:bb:cc:dd:ee:01
-set int ip addr pg0 172.16.0.1/30
-set ip arp pg0 172.16.0.2/30 aa:bb:cc:dd:ee:02
-```
-
-#### VPP4
-
-```
-create packet-generator interface pg0
-set int mac address pg0 aa:bb:cc:dd:ee:11
-set int ip addr pg0 1.0.0.2/30
-set ip arp pg0 1.0.0.1 aa:bb:cc:dd:ee:22
-```
-
-SRv6 and IP routing settings:
-
-#### VPP1
-
-```
-sr policy add bsid D4:: next D2:: next D3::
-sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4
-sr steer l3 172.20.0.1/32 via bsid D5::
-```
-
-#### VPP2
-
-```
-sr localsid address D2:: behavior end
-ip route add D3::/128 via host-eth2 A2::2
-```
-
-#### VPP3
-
-```
-sr localsid address D3:: behavior end
-ip route add D4::/32 via host-eth2 A3::2
-```
-
-#### VPP4
-
-```
-sr localsid prefix D4::/32 behavior end.m.gtp4.e v4src_position 64
-ip route add 172.20.0.1/32 via pg0 1.0.0.1
-```
diff --git a/src/plugins/srv6-mobile/extra/runner_doc.rst b/src/plugins/srv6-mobile/extra/runner_doc.rst
new file mode 100644
index 00000000000..b5be91cbfc8
--- /dev/null
+++ b/src/plugins/srv6-mobile/extra/runner_doc.rst
@@ -0,0 +1,135 @@
+.. _srv6_mobile_runner_doc:
+
+SRv6 Mobile Runner
+==================
+
+What’s ``runner.py`` doing?
+
+Common configurations
+---------------------
+
+VPP1
+~~~~
+
+::
+
+ create host-interface name eth1
+ set int ip addr host-eth1 A1::1/120
+ set int state host-eth1 up
+ ip route add ::/0 via host-eth1 A1::2
+
+VPP2
+~~~~
+
+::
+
+ create host-interface name eth1
+ set int ip addr host-eth1 A1::2/120
+ create host-interface name eth2
+ set int ip addr host-eth2 A2::1/120
+ set int state host-eth1 up
+ set int state host-eth2 up
+ ip route add ::/0 via host-eth2 A2::2
+
+VPP3
+~~~~
+
+::
+
+ create host-interface name eth1
+ set int ip addr host-eth1 A2::2/120
+ create host-interface name eth2
+ set int ip addr host-eth2 A3::1/120
+ set int state host-eth1 up
+ set int state host-eth2 up
+ ip route add ::/0 via host-eth1 A2::1
+
+VPP4
+~~~~
+
+::
+
+ create host-interface name eth1
+ set int ip addr host-eth1 A3::2/120
+ set int state host-eth1 up
+ ip route add ::/0 via host-eth1 A3::1
+
+Drop-in for GTP-U over IPv4
+---------------------------
+
+Drop-in mode is handy to test both GTP-U-to-SRv6 and SRv6-to-GTP-U
+functions at same time. Let’s see what’s happened when you run
+``test gtp4``:
+
+::
+
+ $ ./runner.py test gtp4
+
+Setting up a virtual interface of packet generator:
+
+.. _vpp1-1:
+
+VPP1
+~~~~
+
+::
+
+ create packet-generator interface pg0
+ set int mac address pg0 aa:bb:cc:dd:ee:01
+ set int ip addr pg0 172.16.0.1/30
+ set ip arp pg0 172.16.0.2/30 aa:bb:cc:dd:ee:02
+
+.. _vpp4-1:
+
+VPP4
+~~~~
+
+::
+
+ create packet-generator interface pg0
+ set int mac address pg0 aa:bb:cc:dd:ee:11
+ set int ip addr pg0 1.0.0.2/30
+ set ip arp pg0 1.0.0.1 aa:bb:cc:dd:ee:22
+
+SRv6 and IP routing settings:
+
+.. _vpp1-2:
+
+VPP1
+~~~~
+
+::
+
+ sr policy add bsid D4:: next D2:: next D3::
+ sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4
+ sr steer l3 172.20.0.1/32 via bsid D5::
+
+.. _vpp2-1:
+
+VPP2
+~~~~
+
+::
+
+ sr localsid address D2:: behavior end
+ ip route add D3::/128 via host-eth2 A2::2
+
+.. _vpp3-1:
+
+VPP3
+~~~~
+
+::
+
+ sr localsid address D3:: behavior end
+ ip route add D4::/32 via host-eth2 A3::2
+
+.. _vpp4-2:
+
+VPP4
+~~~~
+
+::
+
+ sr localsid prefix D4::/32 behavior end.m.gtp4.e v4src_position 64
+ ip route add 172.20.0.1/32 via pg0 1.0.0.1
diff --git a/src/plugins/srv6-mobile/gtp4_d.c b/src/plugins/srv6-mobile/gtp4_d.c
index 7bafa560810..f519b4840cc 100644
--- a/src/plugins/srv6-mobile/gtp4_d.c
+++ b/src/plugins/srv6-mobile/gtp4_d.c
@@ -68,12 +68,13 @@ static u8 keyword_str[] = "t.m.gtp4.d";
static u8 def_str[] =
"Transit function with decapsulation for IPv4/GTP tunnel";
static u8 param_str[] =
- "<sr-prefix>/<sr-prefixlen> v6src_prefix <v6src_prefix>/<prefixlen> [nhtype <nhtype>]";
+ "<sr-prefix>/<sr-prefixlen> v6src_prefix <v6src_prefix>/<prefixlen> [nhtype "
+ "<nhtype>] fib-table <id>";
static u8 *
clb_format_srv6_t_m_gtp4_d (u8 * s, va_list * args)
{
- srv6_end_gtp4_param_t *ls_mem = va_arg (*args, void *);
+ srv6_end_gtp4_d_param_t *ls_mem = va_arg (*args, void *);
s = format (s, "SRv6 T.M.GTP4.D\n\t");
@@ -88,75 +89,114 @@ clb_format_srv6_t_m_gtp4_d (u8 * s, va_list * args)
if (ls_mem->nhtype != SRV6_NHTYPE_NONE)
{
if (ls_mem->nhtype == SRV6_NHTYPE_IPV4)
- s = format (s, ", NHType IPv4\n");
+ s = format (s, ", NHType IPv4");
else if (ls_mem->nhtype == SRV6_NHTYPE_IPV6)
- s = format (s, ", NHType IPv6\n");
+ s = format (s, ", NHType IPv6");
else if (ls_mem->nhtype == SRV6_NHTYPE_NON_IP)
- s = format (s, ", NHType Non-IP\n");
+ s = format (s, ", NHType Non-IP");
else
- s = format (s, ", NHType Unknow(%d)\n", ls_mem->nhtype);
+ s = format (s, ", NHType Unknow(%d)", ls_mem->nhtype);
}
- else
- s = format (s, "\n");
+
+ s = format (s, ", FIB table %d", ls_mem->fib_table);
+
+ s = format (s, ", Drop In %d\n", ls_mem->drop_in);
return s;
}
+void
+alloc_param_srv6_t_m_gtp4_d (void **plugin_mem_p, const void *v6src_prefix,
+ const u32 v6src_prefixlen, const void *sr_prefix,
+ const u32 sr_prefixlen, const u32 fib_index,
+ const u8 nhtype, const bool drop_in)
+{
+ srv6_end_gtp4_d_param_t *ls_mem;
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
+ clib_memset (ls_mem, 0, sizeof *ls_mem);
+ *plugin_mem_p = ls_mem;
+
+ ls_mem->v6src_prefixlen = v6src_prefixlen;
+ memcpy (&ls_mem->v6src_prefix, v6src_prefix, sizeof (ip6_address_t));
+ ls_mem->sr_prefixlen = sr_prefixlen;
+ memcpy (&ls_mem->sr_prefix, sr_prefix, sizeof (ip6_address_t));
+
+ ls_mem->nhtype = nhtype;
+ ls_mem->drop_in = drop_in;
+ ls_mem->fib_table = fib_index;
+ ls_mem->fib4_index = ip4_fib_index_from_table_id (fib_index);
+ ls_mem->fib6_index = ip6_fib_index_from_table_id (fib_index);
+}
+
static uword
clb_unformat_srv6_t_m_gtp4_d (unformat_input_t * input, va_list * args)
{
void **plugin_mem_p = va_arg (*args, void **);
- srv6_end_gtp4_param_t *ls_mem;
ip6_address_t sr_prefix;
u32 sr_prefixlen;
ip6_address_t v6src_prefix;
u32 v6src_prefixlen;
- u8 nhtype;
+ u32 fib_table = 0;
+ bool drop_in = false;
+ u8 nhtype = SRV6_NHTYPE_NONE;
+ bool config = false;
- if (unformat (input, "t.m.gtp4.d %U/%d v6src_prefix %U/%d nhtype ipv4",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen,
- unformat_ip6_address, &v6src_prefix, &v6src_prefixlen))
- {
- nhtype = SRV6_NHTYPE_IPV4;
- }
- else
- if (unformat
- (input, "t.m.gtp4.d %U/%d v6src_prefix %U/%d nhtype ipv6",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen,
- unformat_ip6_address, &v6src_prefix, &v6src_prefixlen))
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- nhtype = SRV6_NHTYPE_IPV6;
- }
- else
- if (unformat
- (input, "t.m.gtp4.d %U/%d v6src_prefix %U/%d nhtype non-ip",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen,
- unformat_ip6_address, &v6src_prefix, &v6src_prefixlen))
- {
- nhtype = SRV6_NHTYPE_NON_IP;
- }
- else if (unformat (input, "t.m.gtp4.d %U/%d v6src_prefix %U/%d",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen,
- unformat_ip6_address, &v6src_prefix, &v6src_prefixlen))
- {
- nhtype = SRV6_NHTYPE_NONE;
+ if (unformat (
+ input,
+ "t.m.gtp4.d %U/%d v6src_prefix %U/%d nhtype ipv4 fib-table %d",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen,
+ unformat_ip6_address, &v6src_prefix, &v6src_prefixlen, &fib_table))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_IPV4;
+ }
+ else if (unformat (input,
+ "t.m.gtp4.d %U/%d v6src_prefix %U/%d nhtype ipv6 "
+ "fib-table %d",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen,
+ unformat_ip6_address, &v6src_prefix, &v6src_prefixlen,
+ &fib_table))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_IPV6;
+ }
+ else if (unformat (
+ input, "t.m.gtp4.d %U/%d v6src_prefix %U/%d nhtype non-ip",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen,
+ unformat_ip6_address, &v6src_prefix, &v6src_prefixlen))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_NON_IP;
+ }
+ else if (unformat (input,
+ "t.m.gtp4.d %U/%d v6src_prefix %U/%d fib-table %d",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen,
+ unformat_ip6_address, &v6src_prefix, &v6src_prefixlen,
+ &fib_table))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_NONE;
+ }
+ else if (unformat (input, "drop-in"))
+ {
+ drop_in = true;
+ }
+ else
+ {
+ return 0;
+ }
}
- else
+
+ if (!config)
{
return 0;
}
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
- clib_memset (ls_mem, 0, sizeof *ls_mem);
- *plugin_mem_p = ls_mem;
-
- ls_mem->sr_prefix = sr_prefix;
- ls_mem->sr_prefixlen = sr_prefixlen;
-
- ls_mem->v6src_prefix = v6src_prefix;
- ls_mem->v6src_prefixlen = v6src_prefixlen;
-
- ls_mem->nhtype = nhtype;
+ alloc_param_srv6_t_m_gtp4_d (plugin_mem_p, &v6src_prefix, v6src_prefixlen,
+ &sr_prefix, sr_prefixlen, fib_table, nhtype,
+ drop_in);
return 1;
}
@@ -170,9 +210,9 @@ clb_creation_srv6_t_m_gtp4_d (ip6_sr_policy_t * sr_policy)
static int
clb_removal_srv6_t_m_gtp4_d (ip6_sr_policy_t * sr_policy)
{
- srv6_end_gtp4_param_t *ls_mem;
+ srv6_end_gtp4_d_param_t *ls_mem;
- ls_mem = (srv6_end_gtp4_param_t *) sr_policy->plugin_mem;
+ ls_mem = (srv6_end_gtp4_d_param_t *) sr_policy->plugin_mem;
clib_mem_free (ls_mem);
@@ -220,7 +260,6 @@ srv6_t_m_gtp4_d_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_t_m_gtp4_d, static) =
{
.arc_name = "ip4-unicast",
@@ -229,7 +268,6 @@ VNET_FEATURE_INIT (srv6_t_m_gtp4_d, static) =
};
VLIB_INIT_FUNCTION (srv6_t_m_gtp4_d_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/gtp4_dt.c b/src/plugins/srv6-mobile/gtp4_dt.c
index 44a4af34c2f..10cea640036 100644
--- a/src/plugins/srv6-mobile/gtp4_dt.c
+++ b/src/plugins/srv6-mobile/gtp4_dt.c
@@ -90,11 +90,31 @@ clb_format_srv6_t_m_gtp4_dt (u8 * s, va_list * args)
return s;
}
+void
+alloc_param_srv6_t_m_gtp4_dt (void **plugin_mem_p, const u32 fib_index,
+ const u32 local_fib_index, const u8 type)
+{
+ srv6_t_gtp4_dt_param_t *ls_mem;
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
+ clib_memset (ls_mem, 0, sizeof *ls_mem);
+ *plugin_mem_p = ls_mem;
+
+ ls_mem->fib4_index = fib_table_find (FIB_PROTOCOL_IP4, fib_index);
+ ls_mem->fib6_index = fib_table_find (FIB_PROTOCOL_IP6, fib_index);
+
+ if (type == SRV6_GTP4_DT6 || type == SRV6_GTP4_DT46)
+ {
+ ls_mem->local_fib_index =
+ fib_table_find (FIB_PROTOCOL_IP6, local_fib_index);
+ }
+
+ ls_mem->type = type;
+}
+
static uword
clb_unformat_srv6_t_m_gtp4_dt (unformat_input_t * input, va_list * args)
{
void **plugin_mem_p = va_arg (*args, void **);
- srv6_t_gtp4_dt_param_t *ls_mem;
u32 fib_index = 0;
u32 local_fib_index = 0;
u32 type;
@@ -118,20 +138,8 @@ clb_unformat_srv6_t_m_gtp4_dt (unformat_input_t * input, va_list * args)
return 0;
}
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
- clib_memset (ls_mem, 0, sizeof *ls_mem);
- *plugin_mem_p = ls_mem;
-
- ls_mem->fib4_index = fib_table_find (FIB_PROTOCOL_IP4, fib_index);
- ls_mem->fib6_index = fib_table_find (FIB_PROTOCOL_IP6, fib_index);
-
- if (type == SRV6_GTP4_DT6 || type == SRV6_GTP4_DT46)
- {
- ls_mem->local_fib_index =
- fib_table_find (FIB_PROTOCOL_IP6, local_fib_index);
- }
-
- ls_mem->type = type;
+ alloc_param_srv6_t_m_gtp4_dt (plugin_mem_p, fib_index, local_fib_index,
+ type);
return 1;
}
@@ -185,7 +193,6 @@ srv6_t_m_gtp4_dt_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_t_m_gtp4_dt, static) =
{
.arc_name = "ip4-unicast",
@@ -194,7 +201,6 @@ VNET_FEATURE_INIT (srv6_t_m_gtp4_dt, static) =
};
VLIB_INIT_FUNCTION (srv6_t_m_gtp4_dt_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/gtp4_e.c b/src/plugins/srv6-mobile/gtp4_e.c
index 211e95d11de..52f1615aa85 100644
--- a/src/plugins/srv6-mobile/gtp4_e.c
+++ b/src/plugins/srv6-mobile/gtp4_e.c
@@ -66,30 +66,70 @@ static u8 param_str[] = "";
static u8 *
clb_format_srv6_end_m_gtp4_e (u8 * s, va_list * args)
{
- srv6_end_gtp4_param_t *ls_mem = va_arg (*args, void *);
+ srv6_end_gtp4_e_param_t *ls_mem = va_arg (*args, void *);
- s = format (s, "SRv6 End gtp4.e\n\t");
+ s = format (s, "SRv6 End gtp4.e\n");
- s = format (s, "IPv4 address position: %d\n", ls_mem->v4src_position);
+ s = format (s, "\tIPv4 address position: %d\n", ls_mem->v4src_position);
+
+ s = format (s, "\tIPv4 source address: %U\n", format_ip4_address,
+ &ls_mem->v4src_addr);
+
+ s = format (s, "\tFib Table %d\n", ls_mem->fib_table);
return s;
}
+void
+alloc_param_srv6_end_m_gtp4_e (void **plugin_mem_p, const void *v4src_addr,
+ const u32 v4src_position, const u32 fib_table)
+{
+ srv6_end_gtp4_e_param_t *ls_mem;
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
+ clib_memset (ls_mem, 0, sizeof *ls_mem);
+ *plugin_mem_p = ls_mem;
+ ls_mem->v4src_position = v4src_position;
+ memcpy (&ls_mem->v4src_addr, v4src_addr, sizeof (ip4_address_t));
+
+ ls_mem->fib_table = fib_table;
+ ls_mem->fib4_index = ip4_fib_index_from_table_id (fib_table);
+ ls_mem->fib6_index = ip6_fib_index_from_table_id (fib_table);
+}
+
static uword
clb_unformat_srv6_end_m_gtp4_e (unformat_input_t * input, va_list * args)
{
void **plugin_mem_p = va_arg (*args, void **);
- srv6_end_gtp4_param_t *ls_mem;
- u32 v4src_position;
-
- if (!unformat (input, "end.m.gtp4.e v4src_position %d", &v4src_position))
+ ip4_address_t v4src_addr;
+ u32 v4src_position = 0;
+ u32 fib_table;
+ bool config = false;
+
+ memset (&v4src_addr, 0, sizeof (ip4_address_t));
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "end.m.gtp4.e v4src_position %d fib-table %d",
+ &v4src_position, &fib_table))
+ {
+ config = true;
+ }
+ else if (unformat (input, "end.m.gtp4.e v4src_addr %U fib-table %d",
+ unformat_ip4_address, &v4src_addr, &fib_table))
+ {
+ config = true;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+ if (!config)
return 0;
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
- clib_memset (ls_mem, 0, sizeof *ls_mem);
- *plugin_mem_p = ls_mem;
-
- ls_mem->v4src_position = v4src_position;
+ alloc_param_srv6_end_m_gtp4_e (plugin_mem_p, &v4src_addr, v4src_position,
+ fib_table);
return 1;
}
@@ -103,7 +143,7 @@ clb_creation_srv6_end_m_gtp4_e (ip6_sr_localsid_t * localsid)
static int
clb_removal_srv6_end_m_gtp4_e (ip6_sr_localsid_t * localsid)
{
- srv6_end_gtp4_param_t *ls_mem;
+ srv6_end_gtp4_e_param_t *ls_mem;
ls_mem = localsid->plugin_mem;
@@ -163,7 +203,6 @@ srv6_end_m_gtp4_e_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_end_m_gtp4_e, static) =
{
.arc_name = "ip6-unicast",
@@ -177,7 +216,6 @@ VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "SRv6 GTP Endpoint Functions",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/gtp6_d.c b/src/plugins/srv6-mobile/gtp6_d.c
index c62320b33fd..ef831ba750c 100644
--- a/src/plugins/srv6-mobile/gtp6_d.c
+++ b/src/plugins/srv6-mobile/gtp6_d.c
@@ -61,12 +61,13 @@ static u8 fn_name[] = "SRv6-End.M.GTP6.D-plugin";
static u8 keyword_str[] = "end.m.gtp6.d";
static u8 def_str[] =
"Endpoint function with dencapsulation for IPv6/GTP tunnel";
-static u8 param_str[] = "<sr-prefix>/<sr-prefixlen> [nhtype <nhtype>]";
+static u8 param_str[] =
+ "<sr-prefix>/<sr-prefixlen> [nhtype <nhtype>] fib-table <id>";
static u8 *
clb_format_srv6_end_m_gtp6_d (u8 * s, va_list * args)
{
- srv6_end_gtp6_param_t *ls_mem = va_arg (*args, void *);
+ srv6_end_gtp6_d_param_t *ls_mem = va_arg (*args, void *);
s = format (s, "SRv6 End gtp6.d\n\t");
@@ -77,62 +78,98 @@ clb_format_srv6_end_m_gtp6_d (u8 * s, va_list * args)
if (ls_mem->nhtype != SRV6_NHTYPE_NONE)
{
if (ls_mem->nhtype == SRV6_NHTYPE_IPV4)
- s = format (s, ", NHType IPv4\n");
+ s = format (s, ", NHType IPv4");
else if (ls_mem->nhtype == SRV6_NHTYPE_IPV6)
- s = format (s, ", NHType IPv6\n");
+ s = format (s, ", NHType IPv6");
else if (ls_mem->nhtype == SRV6_NHTYPE_NON_IP)
- s = format (s, ", NHType Non-IP\n");
+ s = format (s, ", NHType Non-IP");
else
- s = format (s, ", NHType Unknow(%d)\n", ls_mem->nhtype);
+ s = format (s, ", NHType Unknow(%d)", ls_mem->nhtype);
}
- else
- s = format (s, "\n");
+
+ s = format (s, " FIB table %d", ls_mem->fib_table);
+
+ s = format (s, " Drop In %d", ls_mem->drop_in);
return s;
}
+void
+alloc_param_srv6_end_m_gtp6_d (void **plugin_mem_p, const void *sr_prefix,
+ const u32 sr_prefixlen, const u8 nhtype,
+ const bool drop_in, const u32 fib_table)
+{
+ srv6_end_gtp6_d_param_t *ls_mem;
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
+ clib_memset (ls_mem, 0, sizeof *ls_mem);
+ *plugin_mem_p = ls_mem;
+
+ ls_mem->sr_prefixlen = sr_prefixlen;
+ memcpy (&ls_mem->sr_prefix, sr_prefix, sizeof (ip6_address_t));
+ ls_mem->nhtype = nhtype;
+ ls_mem->drop_in = drop_in;
+ ls_mem->fib_table = fib_table;
+ ls_mem->fib4_index = ip4_fib_index_from_table_id (fib_table);
+ ls_mem->fib6_index = ip6_fib_index_from_table_id (fib_table);
+}
+
static uword
clb_unformat_srv6_end_m_gtp6_d (unformat_input_t * input, va_list * args)
{
void **plugin_mem_p = va_arg (*args, void **);
- srv6_end_gtp6_param_t *ls_mem;
ip6_address_t sr_prefix;
u32 sr_prefixlen;
- u8 nhtype;
+ u8 nhtype = SRV6_NHTYPE_NONE;
+ bool drop_in = false;
+ bool config = false;
+ u32 fib_table = 0;
- if (unformat (input, "end.m.gtp6.d %U/%d nh-type ipv4",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen))
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- nhtype = SRV6_NHTYPE_IPV4;
- }
- else if (unformat (input, "end.m.gtp6.d %U/%d nh-type ipv6",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen))
- {
- nhtype = SRV6_NHTYPE_IPV6;
- }
- else if (unformat (input, "end.m.gtp6.d %U/%d nh-type none",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen))
- {
- nhtype = SRV6_NHTYPE_NON_IP;
- }
- else if (unformat (input, "end.m.gtp6.d %U/%d",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen))
- {
- nhtype = SRV6_NHTYPE_NONE;
+ if (unformat (input, "end.m.gtp6.d %U/%d nh-type ipv4 fib-table %d",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen,
+ &fib_table))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_IPV4;
+ }
+ else if (unformat (input, "end.m.gtp6.d %U/%d nh-type ipv6 fib-table %d",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen,
+ &fib_table))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_IPV6;
+ }
+ else if (unformat (input, "end.m.gtp6.d %U/%d nh-type none",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_NON_IP;
+ }
+ else if (unformat (input, "end.m.gtp6.d %U/%d fib-table %d",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen,
+ &fib_table))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_NONE;
+ }
+ else if (unformat (input, "drop-in"))
+ {
+ drop_in = true;
+ }
+ else
+ {
+ return 0;
+ }
}
- else
+
+ if (!config)
{
return 0;
}
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
- clib_memset (ls_mem, 0, sizeof *ls_mem);
- *plugin_mem_p = ls_mem;
-
- ls_mem->sr_prefix = sr_prefix;
- ls_mem->sr_prefixlen = sr_prefixlen;
-
- ls_mem->nhtype = nhtype;
+ alloc_param_srv6_end_m_gtp6_d (plugin_mem_p, &sr_prefix, sr_prefixlen,
+ nhtype, drop_in, fib_table);
return 1;
}
@@ -144,9 +181,15 @@ clb_creation_srv6_end_m_gtp6_d (ip6_sr_localsid_t * localsid)
}
static int
+clb_creation_srv6_end_m_gtp6_d_2 (ip6_sr_policy_t *sr_policy)
+{
+ return 0;
+}
+
+static int
clb_removal_srv6_end_m_gtp6_d (ip6_sr_localsid_t * localsid)
{
- srv6_end_gtp6_param_t *ls_mem;
+ srv6_end_gtp6_d_param_t *ls_mem;
ls_mem = localsid->plugin_mem;
@@ -155,6 +198,18 @@ clb_removal_srv6_end_m_gtp6_d (ip6_sr_localsid_t * localsid)
return 0;
}
+static int
+clb_removal_srv6_end_m_gtp6_d_2 (ip6_sr_policy_t *sr_policy)
+{
+ srv6_end_gtp6_d_param_t *ls_mem;
+
+ ls_mem = sr_policy->plugin_mem;
+
+ clib_mem_free (ls_mem);
+
+ return 0;
+}
+
static clib_error_t *
srv6_end_m_gtp6_d_init (vlib_main_t * vm)
{
@@ -193,10 +248,18 @@ srv6_end_m_gtp6_d_init (vlib_main_t * vm)
if (rc < 0)
clib_error_return (0, "SRv6 Endpoint GTP6.D LocalSID function"
"couldn't be registered");
+
+ rc = sr_policy_register_function (
+ vm, fn_name, keyword_str, def_str, param_str, 128, // prefix len
+ &dpo_type, clb_format_srv6_end_m_gtp6_d, clb_unformat_srv6_end_m_gtp6_d,
+ clb_creation_srv6_end_m_gtp6_d_2, clb_removal_srv6_end_m_gtp6_d_2);
+ if (rc < 0)
+ clib_error_return (0, "SRv6 GTP6.D Steering function"
+ "couldn't be registered");
+
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_end_m_gtp6_d, static) =
{
.arc_name = "ip6-unicast",
@@ -205,7 +268,6 @@ VNET_FEATURE_INIT (srv6_end_m_gtp6_d, static) =
};
VLIB_INIT_FUNCTION (srv6_end_m_gtp6_d_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/gtp6_d_di.c b/src/plugins/srv6-mobile/gtp6_d_di.c
index 14318562e84..94bc684161d 100644
--- a/src/plugins/srv6-mobile/gtp6_d_di.c
+++ b/src/plugins/srv6-mobile/gtp6_d_di.c
@@ -66,7 +66,7 @@ static u8 param_str[] = "<sr-prefix>/<sr-prefixlen> [nhtype <nhtype>]";
static u8 *
clb_format_srv6_end_m_gtp6_d_di (u8 * s, va_list * args)
{
- srv6_end_gtp6_param_t *ls_mem = va_arg (*args, void *);
+ srv6_end_gtp6_d_param_t *ls_mem = va_arg (*args, void *);
s = format (s, "SRv6 End gtp6.d Drop-in\n\t");
@@ -91,11 +91,24 @@ clb_format_srv6_end_m_gtp6_d_di (u8 * s, va_list * args)
return s;
}
+void
+alloc_param_srv6_end_m_gtp6_di (void **plugin_mem_p, const void *sr_prefix,
+ const u32 sr_prefixlen, const u8 nhtype)
+{
+ srv6_end_gtp6_d_param_t *ls_mem;
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
+ clib_memset (ls_mem, 0, sizeof *ls_mem);
+ *plugin_mem_p = ls_mem;
+
+ ls_mem->sr_prefixlen = sr_prefixlen;
+ memcpy (&ls_mem->sr_prefix, sr_prefix, sizeof (ip6_address_t));
+ ls_mem->nhtype = nhtype;
+}
+
static uword
clb_unformat_srv6_end_m_gtp6_d_di (unformat_input_t * input, va_list * args)
{
void **plugin_mem_p = va_arg (*args, void **);
- srv6_end_gtp6_param_t *ls_mem;
ip6_address_t sr_prefix;
u32 sr_prefixlen = 0;
u8 nhtype;
@@ -125,13 +138,8 @@ clb_unformat_srv6_end_m_gtp6_d_di (unformat_input_t * input, va_list * args)
return 0;
}
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
- clib_memset (ls_mem, 0, sizeof *ls_mem);
- *plugin_mem_p = ls_mem;
-
- ls_mem->sr_prefix = sr_prefix;
- ls_mem->sr_prefixlen = sr_prefixlen;
- ls_mem->nhtype = nhtype;
+ alloc_param_srv6_end_m_gtp6_di (plugin_mem_p, &sr_prefix, sr_prefixlen,
+ nhtype);
return 1;
}
@@ -145,7 +153,7 @@ clb_creation_srv6_end_m_gtp6_d_di (ip6_sr_localsid_t * localsid)
static int
clb_removal_srv6_end_m_gtp6_d_di (ip6_sr_localsid_t * localsid)
{
- srv6_end_gtp6_param_t *ls_mem;
+ srv6_end_gtp6_d_param_t *ls_mem;
ls_mem = localsid->plugin_mem;
@@ -198,7 +206,6 @@ srv6_end_m_gtp6_d_di_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_end_m_gtp6_d_di, static) =
{
.arc_name = "ip6-unicast",
@@ -207,7 +214,6 @@ VNET_FEATURE_INIT (srv6_end_m_gtp6_d_di, static) =
};
VLIB_INIT_FUNCTION (srv6_end_m_gtp6_d_di_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/gtp6_dt.c b/src/plugins/srv6-mobile/gtp6_dt.c
index cbd2327cc47..c4d4175e321 100644
--- a/src/plugins/srv6-mobile/gtp6_dt.c
+++ b/src/plugins/srv6-mobile/gtp6_dt.c
@@ -84,11 +84,31 @@ clb_format_srv6_end_m_gtp6_dt (u8 * s, va_list * args)
return s;
}
+void
+alloc_param_srv6_end_m_gtp6_dt (void **plugin_mem_p, const u32 fib_index,
+ const u32 local_fib_index, const u32 type)
+{
+ srv6_end_gtp6_dt_param_t *ls_mem;
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
+ clib_memset (ls_mem, 0, sizeof *ls_mem);
+ *plugin_mem_p = ls_mem;
+
+ ls_mem->fib4_index = fib_table_find (FIB_PROTOCOL_IP4, fib_index);
+ ls_mem->fib6_index = fib_table_find (FIB_PROTOCOL_IP6, fib_index);
+
+ if (type == SRV6_GTP6_DT6 || type == SRV6_GTP6_DT46)
+ {
+ ls_mem->local_fib_index =
+ fib_table_find (FIB_PROTOCOL_IP6, local_fib_index);
+ }
+
+ ls_mem->type = type;
+}
+
static uword
clb_unformat_srv6_end_m_gtp6_dt (unformat_input_t * input, va_list * args)
{
void **plugin_mem_p = va_arg (*args, void **);
- srv6_end_gtp6_dt_param_t *ls_mem;
u32 fib_index = 0;
u32 local_fib_index = 0;
u32 type;
@@ -111,22 +131,8 @@ clb_unformat_srv6_end_m_gtp6_dt (unformat_input_t * input, va_list * args)
{
return 0;
}
-
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
- clib_memset (ls_mem, 0, sizeof *ls_mem);
- *plugin_mem_p = ls_mem;
-
- ls_mem->fib4_index = fib_table_find (FIB_PROTOCOL_IP4, fib_index);
- ls_mem->fib6_index = fib_table_find (FIB_PROTOCOL_IP6, fib_index);
-
- if (type == SRV6_GTP6_DT6 || type == SRV6_GTP6_DT46)
- {
- ls_mem->local_fib_index =
- fib_table_find (FIB_PROTOCOL_IP6, local_fib_index);
- }
-
- ls_mem->type = type;
-
+ alloc_param_srv6_end_m_gtp6_dt (plugin_mem_p, fib_index, local_fib_index,
+ type);
return 1;
}
@@ -179,7 +185,6 @@ srv6_end_m_gtp6_dt_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_end_m_gtp6_dt, static) =
{
.arc_name = "ip6-unicast",
@@ -188,7 +193,6 @@ VNET_FEATURE_INIT (srv6_end_m_gtp6_dt, static) =
};
VLIB_INIT_FUNCTION (srv6_end_m_gtp6_dt_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/gtp6_e.c b/src/plugins/srv6-mobile/gtp6_e.c
index d139a649409..dd3a889928b 100644
--- a/src/plugins/srv6-mobile/gtp6_e.c
+++ b/src/plugins/srv6-mobile/gtp6_e.c
@@ -66,15 +66,40 @@ static u8 param_str[] = "";
static u8 *
clb_format_srv6_end_m_gtp6_e (u8 * s, va_list * args)
{
- s = format (s, "SRv6 End format function unsupported.");
+ srv6_end_gtp6_e_param_t *ls_mem = va_arg (*args, void *);
+ ;
+
+ s = format (s, "SRv6 End.M.GTP6.E function.");
+
+ s = format (s, "\tFib Table %d\n", ls_mem->fib_table);
+
return s;
}
+void
+alloc_param_srv6_end_m_gtp6_e (void **plugin_mem_p, const u32 fib_table)
+{
+ srv6_end_gtp6_e_param_t *ls_mem;
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
+ clib_memset (ls_mem, 0, sizeof *ls_mem);
+ *plugin_mem_p = ls_mem;
+
+ ls_mem->fib_table = fib_table;
+ ls_mem->fib4_index = ip4_fib_index_from_table_id (fib_table);
+ ls_mem->fib6_index = ip6_fib_index_from_table_id (fib_table);
+}
+
static uword
-clb_unformat_srv6_end_m_gtp6_e (unformat_input_t * input, va_list * args)
+clb_unformat_srv6_end_m_gtp6_e (unformat_input_t *input, va_list *args)
{
- if (!unformat (input, "end.m.gtp6.e"))
+ void **plugin_mem_p = va_arg (*args, void **);
+ u32 fib_table;
+
+ if (!unformat (input, "end.m.gtp6.e fib-table %d", &fib_table))
return 0;
+
+ alloc_param_srv6_end_m_gtp6_e (plugin_mem_p, fib_table);
+
return 1;
}
@@ -87,6 +112,12 @@ clb_creation_srv6_end_m_gtp6_e (ip6_sr_localsid_t * localsid)
static int
clb_removal_srv6_end_m_gtp6_e (ip6_sr_localsid_t * localsid)
{
+ srv6_end_gtp6_e_param_t *ls_mem;
+
+ ls_mem = localsid->plugin_mem;
+
+ clib_mem_free (ls_mem);
+
return 0;
}
@@ -137,7 +168,6 @@ srv6_end_m_gtp6_e_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_end_m_gtp6_e, static) =
{
.arc_name = "ip6-unicast",
@@ -146,7 +176,6 @@ VNET_FEATURE_INIT (srv6_end_m_gtp6_e, static) =
};
VLIB_INIT_FUNCTION (srv6_end_m_gtp6_e_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/mobile.h b/src/plugins/srv6-mobile/mobile.h
index 517e7c8f84c..a305a25b811 100644
--- a/src/plugins/srv6-mobile/mobile.h
+++ b/src/plugins/srv6-mobile/mobile.h
@@ -20,6 +20,8 @@
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
#include <vnet/srv6/sr.h>
#include <vnet/srv6/sr_packet.h>
@@ -69,24 +71,41 @@
#define GTPU_IE_MAX_SIZ 256
#define SRH_TLV_USER_PLANE_CONTAINER 0x0a /* tentative */
-/* *INDENT-OFF* */
+typedef enum mobile_policy_function_list
+{
+ SRV6_MOBILE_POLICY_UNKNOWN_FUNCTION = 0,
+ SRV6_MOBILE_POLICY_T_M_GTP4_D,
+ SRV6_MOBILE_POLICY_T_M_GTP4_DT4,
+ SRV6_MOBILE_POLICY_T_M_GTP4_DT6,
+ SRV6_MOBILE_POLICY_T_M_GTP4_DT46,
+ SRV6_MOBILE_POLICY_END_M_GTP6_D,
+} mobile_policy_function_list_t;
+
+typedef enum mobile_localsid_function_list
+{
+ SRV6_MOBILE_LOCALSID_UNKNOWN_FUNCTION = 0,
+ SRV6_MOBILE_LOCALSID_END_M_GTP4_E,
+ SRV6_MOBILE_LOCALSID_END_M_GTP6_E,
+ SRV6_MOBILE_LOCALSID_END_M_GTP6_D,
+ SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DI,
+ SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT4,
+ SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT6,
+ SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT46,
+} mobile_localsid_function_list_t;
+
typedef struct
{
u8 type;
u8 restart_counter;
} __attribute__ ((packed)) gtpu_recovery_ie;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef struct
{
u16 seq;
u8 npdu_num;
u8 nextexthdr;
} __attribute__ ((packed)) gtpu_exthdr_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef struct
{
u8 ver_flags;
@@ -95,7 +114,6 @@ typedef struct
u32 teid;
gtpu_exthdr_t ext[0];
} __attribute__ ((packed)) gtpu_header_t;
-/* *INDENT-ON* */
#define GTPU_TYPE_ECHO_REQUEST 1
#define GTPU_TYPE_ECHO_REPLY 2
@@ -103,7 +121,6 @@ typedef struct
#define GTPU_TYPE_END_MARKER 254
#define GTPU_TYPE_GTPU 255
-/* *INDENT-OFF* */
typedef struct
{
BITALIGN2 (u8 ppi:3,
@@ -111,9 +128,7 @@ typedef struct
u8 padding[3];
} __attribute__ ((packed)) gtpu_paging_policy_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef struct
{
u8 exthdrlen;
@@ -131,7 +146,6 @@ typedef struct
gtpu_paging_policy_t paging[0];
u8 nextexthdr;
} __attribute__ ((packed)) gtpu_pdu_session_t;
-/* *INDENT-ON* */
#define GTPU_PDU_SESSION_P_BIT_MASK 0x80
#define GTPU_PDU_SESSION_R_BIT_MASK 0x40
@@ -141,47 +155,51 @@ typedef struct
#define SRV6_PDU_SESSION_R_BIT_MASK 0x02
#define SRV6_PDU_SESSION_QFI_MASK 0xfC
-/* *INDENT-OFF* */
typedef struct
{
ip4_header_t ip4; /* 20 bytes */
udp_header_t udp; /* 8 bytes */
gtpu_header_t gtpu; /* 8 bytes */
} __attribute__ ((packed)) ip4_gtpu_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef struct
{
ip6_header_t ip6; /* 40 bytes */
udp_header_t udp; /* 8 bytes */
gtpu_header_t gtpu; /* 8 bytes */
} __attribute__ ((packed)) ip6_gtpu_header_t;
-/* *INDENT-ON* */
#define GTPU_V1_VER (1<<5)
#define GTPU_PT_GTP (1<<4)
-/* *INDENT-OFF* */
typedef struct
{
u8 type;
u8 length;
u8 value[0];
} __attribute__ ((packed)) user_plane_sub_tlv_t;
-/* *INDENT-ON* */
#define USER_PLANE_SUB_TLV_IE 0x01
-typedef struct srv6_end_gtp6_param_s
+/* SRv6 mobile Plugin Params */
+
+/* GTP6.D, GTP6.Di */
+typedef struct srv6_end_gtp6_d_param_s
{
u8 nhtype;
ip6_address_t sr_prefix;
u32 sr_prefixlen;
-} srv6_end_gtp6_param_t;
+ bool drop_in;
+
+ u32 fib_table;
+ u32 fib4_index;
+ u32 fib6_index;
+} srv6_end_gtp6_d_param_t;
+
+/* GTP6.DT */
typedef struct srv6_end_gtp6_dt_param_s
{
u8 type;
@@ -191,6 +209,15 @@ typedef struct srv6_end_gtp6_dt_param_s
u32 local_fib_index;
} srv6_end_gtp6_dt_param_t;
+/* GTP6.E */
+typedef struct srv6_end_gtp6_e_param_s
+{
+ u32 fib_table;
+ u32 fib4_index;
+ u32 fib6_index;
+} srv6_end_gtp6_e_param_t;
+
+/* GTP4.DT */
typedef struct srv6_t_gtp4_dt_param_s
{
u8 type;
@@ -200,7 +227,19 @@ typedef struct srv6_t_gtp4_dt_param_s
u32 local_fib_index;
} srv6_t_gtp4_dt_param_t;
-typedef struct srv6_end_gtp4_param_s
+/* GTP4.E */
+typedef struct srv6_end_gtp4_e_param_s
+{
+ u32 v4src_position;
+ ip4_address_t v4src_addr;
+
+ u32 fib_table;
+ u32 fib4_index;
+ u32 fib6_index;
+} srv6_end_gtp4_e_param_t;
+
+/* GTP4.D */
+typedef struct srv6_end_gtp4_d_param_s
{
u8 nhtype;
@@ -210,8 +249,12 @@ typedef struct srv6_end_gtp4_param_s
ip6_address_t v6src_prefix;
u32 v6src_prefixlen;
- u32 v4src_position;
-} srv6_end_gtp4_param_t;
+ bool drop_in;
+
+ u32 fib_table;
+ u32 fib4_index;
+ u32 fib6_index;
+} srv6_end_gtp4_d_param_t;
typedef struct srv6_end_main_v4_s
{
diff --git a/src/plugins/srv6-mobile/mobile_plugin_doc.md b/src/plugins/srv6-mobile/mobile_plugin_doc.md
deleted file mode 100644
index 3a44e795838..00000000000
--- a/src/plugins/srv6-mobile/mobile_plugin_doc.md
+++ /dev/null
@@ -1,201 +0,0 @@
-SRv6 Mobile User Plane Plugins {#srv6_mobile_plugin_doc}
-========================
-
-# Introduction
-
-This plugin module can provide the stateless mobile user plane protocols translation between GTP-U and SRv6. The plugin also provides FIB table lookup for an IPv4/IPv6 packet encapsulated in GTP-U. These plugin functions take advantage of SRv6 network programmability.
-
-[SRv6 Mobile User Plane](https://tools.ietf.org/html/draft-ietf-dmm-srv6-mobile-uplane) defines the user plane protocol using SRv6
-including following stateless translation functions:
-
-- **T.M.GTP4.D:**
- GTP-U over UDP/IPv4 -> SRv6
-- **End.M.GTP4.E:**
- SRv6 -> GTP-U over UDP/IPv4
-- **End.M.GTP6.D:**
- GTP-U over UDP/IPv6 -> SRv6
-- **End.M.GTP6.E:**
- SRv6 -> GTP-U over UDP/IPv6
-
-These functions benefit user plane(overlay) to be able to utilize data plane(underlay) networks properly. And also it benefits data plane to be able to handle user plane in routing paradigm.
-
-In addition to the above functions, the plugin supports following functions:
-
-- **T.M.GTP4.DT{4|6|46}:**
- FIB table lookup for IPv4/IP6 encapsulated in GTP-U over UDP/IPv4
-- **End.M.GTP6.DT{4|6|46}:**
- FIB table lookup for IPv4/IP6 encapsulated in GTP-U over UDP/IPv6
-
-Noted that the prefix of function names follow naming convention of SRv6 network programming. "T" means transit function, "End" means end function, "M" means Mobility specific function. The suffix "D" and "E" mean that "decapsulation" and "encapsulation" respectively.
-
-
-# Implementation
-
-All SRv6 mobile functions are implemented as VPP plugin modules. The plugin modules leverage the sr_policy and sr_localsid mechanisms.
-
-# Configurations
-
-## GTP-U to SRv6
-
-The GTP-U tunnel and flow identifiers of a receiving packet are mapped to a Segment Identifier(SID) of sending SRv6 packets.
-
-### IPv4 infrastructure case
-
-In case that **IPv4** networks are the infrastructure of GTP-U, T.M.GTP4.D function translates the receiving GTP-U packets to SRv6 packets.
-
-A T.M.GTP4.D function is associated with the following mandatory parameters:
-
-- SID: A SRv6 SID to represents the function
-- DST-PREFIX: Prefix of remote SRv6 segment. The destination address or last SID of out packets consists of the prefix followed by dst IPv4 address, QFI and TEID of the receiving packets.
-- SRC-PREFIX: Prefix for src address of sending packets. The src IPv6 address consists of the prefix followed by the src IPv4 address of the receiving packets.
-
-The following command instantiates a new T.M.GTP4.D function.
-
-```
-sr policy add bsid SID behavior t.m.gtp4.d DST-PREFIX v6src_prefix SRC-PREFIX [nhtype {ipv4|ipv6|non-ip}]
-```
-
-For example, the below command configures the SID 2001:db8::1 with `t.m.gtp4.d` behavior for translating receiving GTP-U over IPv4 packets to SRv6 packets with next-header type is IPv4.
-
-```
-sr policy add bsid 2001:db8::1 behavior t.m.gtp4.d D1::/32 v6src_prefix A1::/64 nhtype ipv4
-```
-
-It should be interesting how a SRv6 BSID works to decapsulate the receiving GTP-U packets over IPv4 header. To utilize ```t.m.gtp4.d``` function, you need to configure some SR steering policy like:
-
-```
-sr steer l3 172.20.0.1/32 via bsid 2001:db8::1
-```
-
-The above steering policy with the BSID of `t.m.gtp4.d` would work properly for the GTP-U packets destined to 172.20.0.1.
-
-If you have a SID(s) list of SR policy which the configured gtp4.d function to be applied, the SR Policy can be configured as following:
-
-```
-sr policy add bsid D1:: next A1:: next B1:: next C1::
-```
-
-### IPv6 infrastructure case
-
-In case that GTP-U is deployed over **IPv6** infrastructure, you don't need to configure T.M.GTP4.D function and associated SR steering policy. Instead of that, you just need to configure a localsid of End.M.GTP6.D segment.
-
-An End.M.GTP6.D segment is associated with the following mandatory parameters:
-
-- SID-PREFIX: SRv6 SID prefix to represent the function. In this function, it should be the dst address of receiving GTP-U packets.
-- DST-PREFIX: Prefix of remote SRv6 Segment. The destination address or last SID of output packets consists of the prefix followed by QFI and TEID of the receiving packets.
-
-The following command instantiates a new End.M.GTP6.D function.
-
-```
-sr localsid prefix SID-PREFIX behavior end.m.gtp6.d DST-PREFIX [nhtype {ipv4|ipv6|non-ip}]
-```
-For example, the below command configures the SID prefix 2001:db8::/64 with `end.m.gtp6.d` behavior for translating receiving GTP-U over IPv6 packets which have IPv6 destination addresses within 2001:db8::/64 to SRv6 packets. The dst IPv6 address of the outgoing packets consists of D4::/64 followed by QFI and TEID.
-
-```
-sr localsid prefix 2001:db8::/64 behavior end.m.gtp6.d D4::/64
-```
-
-In another case, the translated packets from GTP-U over IPv6 to SRv6 will be re-translated back to GTP-U, which is so called 'Drop-In' mode.
-
-In Drop-In mode, an additional IPv6 specific end segment is required, named End.M.GTP6.D.Di. It is because that unlike `end.m.gtp6.d`, it needs to preserve original IPv6 dst address as the last SID in the SRH.
-
-Regardless of that difference exists, the required configuration parameters are same as `end.m.gtp6.d`.
-
-The following command instantiates a new End.M.GTP6.D.Di function.
-
-```
-sr localsid prefix 2001:db8::/64 behavior end.m.gtp6.d.di D4::/64
-```
-
-
-## SRv6 to GTP-U
-
-The SRv6 Mobile functions on SRv6 to GTP-U direction are End.M.GTP4.E and End.M.GTP6.D.
-
-In this direction with GTP-U over IPv4 infrastructure, an End.M.GTP4.E segment is associated with the following mandatory parameters:
-
-- SID-PREFIX: SRv6 SID prefix to represent the function.
-- V4SRC-ADDR-POSITION: Integer number indicates bit position where IPv4 src address embedded.
-
-The following command instantiates a new End.M.GTP4.E function.
-
-```
-sr localsid prefix SID-PREFIX behavior end.m.gtp4.e v4src_position V4SRC-ADDR-POSITION
-```
-
-For example, the below command configures the SID prefix 2001:db8::/32 with `end.m.gtp4.e` behavior for translating the receiving SRv6 packets to GTP-U packets encapsulated with UDP/IPv4 header. All the GTP-U tunnel and flow identifiers are extracted from the active SID in the receiving packets. The src IPv4 address of sending GTP-U packets is extracted from the configured bit position in the src IPv6 address.
-
-```
-sr localsid prefix 2001:db8::/32 behavior end.m.gtp4.e v4src_position 64
-```
-
-In IPv6 infrastructure case, an End.M.GTP6.E segment is associated with the following mandatory parameters:
-
-- SID-PREFIX: SRv6 SID prefix to represent the function.
-
-The following command instantiates a new End.M.GTP6.E function.
-
-```
-sr localsid prefix SID-PREFIX behavior end.m.gtp6.e
-```
-
-For example, the below command configures the SID prefix 2001:db8::/64 with `end.m.gtp6.e` behavior for translating the receiving SRv6 packets to GTP-U packets encapsulated with UDP/IPv6 header. While the last SID indicates GTP-U dst IPv6 address, 32-bits GTP-U TEID and 6-bits QFI are extracted from the active SID in the receiving packets.
-
-```
-sr localsid prefix 2001:db8::/64 behavior end.m.gtp6.e
-```
-
-## FIB Table Lookup for Inner IPv4/IPv6 packet
-
-SRv6 Mobile functions of `t.m.gtp4.dt*` and `end.m.gtp6.dt*` support decapsulating outer IP/UDP/GTP-U headers and forwarding inner IP packet based on specific fib table.
-
-In case of the both outer and inner IP address families are IPv4, `t.m.gtp4.dt4` function supports GTP-U decapsulation and fib lookup for inner IPv4 with an associated steering policy and the following parameters:
-
-- SID: A SRv6 SID to represents the function
-- FIB: fib-table number for inner IPv4 packet lookup and forwarding
-
-The following command instantiates a new T.M.GTP4.DT4 function.
-
-```
-sr policy add bsid SID behavior t.m.gtp4.dt4 fib-table FIB
-```
-
-For example, the below commands configure D5:: as the SID instantiates `t.m.gtp4.dt4` function. A steering policy for packets destine to 172.20.0.1 binds to the SID.
-
-```
-sr steer l3 172.20.0.1/32 via bsid D5::
-sr policy add bsid D5:: behavior t.m.gtp4.dt4 fib-table 0
-```
-
-In addition, inner IPv6, or mix of IPv4 and IPv6 inner packet cases require the function to be configured with local-fib table.
-
-- LOCAL-FIB: fib-table number for lookup and forward GTP-U packet based on outer IP destination address
-
-This is inner IPv6 case specific. The reason is that GTP-U encapsulates link local IPv6 packet for NDP (Neighber Discovery Protocol). Outer GTP-U header should be kept until the packets reach to the node responsible for NDP handling. It is typically UPF(User Plane Function) node.
-
-The following command instantiate a new T.M.GTP4.DT6 function.
-
-```
-sr policy add bsid D5:: behavior t.m.gtp4.dt6 fib-table 0 local-fib-table LOCAL-FIB
-```
-
-Following example configures fib 0 for inner packet and fib 1 for outer GTP-U packet forwarding:
-
-```
-sr policy add bsid D5:: behavior t.m.gtp4.dt6 fib-table 0 local-fib-table 1
-```
-
-If you need to suport both IPv4 and IPv6 inner packet lookup with just one SID, you can configure `t.m.gtp4.dt46` function:
-
-```
-sr policy add bsid D5:: behavior t.m.gtp4.dt46 fib-table 0 local-fib-table 1
-```
-
-In case of GTP-U over IPv6 case, `end.m.gtp6.dt4`, `end.m.gtp6.dt6` and `end.m.gtp6.dt46` functions support inner IPv4, IPv6 and IPv4/IPv6 lookup and forwarding respectively. Specifiyng fib table for inner IP packet forwarding is required as same as GTP-U over IPv4 case, and local-fib table for inner IPv6 and IPv4/IPv6 cases as well.
-
-```
-sr localsid prefix D::/64 behavior end.m.gtp6.dt46 fib-table 0 local-fib-table 0
-```
-
-To run some demo setup please refer to: @subpage srv6_mobile_runner_doc
-
diff --git a/src/plugins/srv6-mobile/mobile_plugin_doc.rst b/src/plugins/srv6-mobile/mobile_plugin_doc.rst
new file mode 100644
index 00000000000..1aca3aaf229
--- /dev/null
+++ b/src/plugins/srv6-mobile/mobile_plugin_doc.rst
@@ -0,0 +1,278 @@
+.. _srv6_mobile_plugin_doc:
+
+SRv6 Mobile User Plane
+======================
+
+Introduction
+------------
+
+This plugin module can provide the stateless mobile user plane protocols
+translation between GTP-U and SRv6. The plugin also provides FIB table
+lookup for an IPv4/IPv6 packet encapsulated in GTP-U. These plugin
+functions take advantage of SRv6 network programmability.
+
+`SRv6 Mobile User
+Plane <https://tools.ietf.org/html/draft-ietf-dmm-srv6-mobile-uplane>`__
+defines the user plane protocol using SRv6 including following stateless
+translation functions:
+
+- **T.M.GTP4.D:** GTP-U over UDP/IPv4 -> SRv6
+- **End.M.GTP4.E:** SRv6 -> GTP-U over UDP/IPv4
+- **End.M.GTP6.D:** GTP-U over UDP/IPv6 -> SRv6
+- **End.M.GTP6.E:** SRv6 -> GTP-U over UDP/IPv6
+
+These functions benefit user plane(overlay) to be able to utilize data
+plane(underlay) networks properly. And also it benefits data plane to be
+able to handle user plane in routing paradigm.
+
+In addition to the above functions, the plugin supports following
+functions:
+
+- **T.M.GTP4.DT{4|6|46}:** FIB table lookup for IPv4/IP6 encapsulated
+ in GTP-U over UDP/IPv4
+- **End.M.GTP6.DT{4|6|46}:** FIB table lookup for IPv4/IP6 encapsulated
+ in GTP-U over UDP/IPv6
+
+Noted that the prefix of function names follow naming convention of SRv6
+network programming. “T” means transit function, “End” means end
+function, “M” means Mobility specific function. The suffix “D” and “E”
+mean that “decapsulation” and “encapsulation” respectively.
+
+Implementation
+--------------
+
+All SRv6 mobile functions are implemented as VPP plugin modules. The
+plugin modules leverage the sr_policy and sr_localsid mechanisms.
+
+Configurations
+--------------
+
+GTP-U to SRv6
+~~~~~~~~~~~~~
+
+The GTP-U tunnel and flow identifiers of a receiving packet are mapped
+to a Segment Identifier(SID) of sending SRv6 packets.
+
+IPv4 infrastructure case
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+In case that **IPv4** networks are the infrastructure of GTP-U,
+T.M.GTP4.D function translates the receiving GTP-U packets to SRv6
+packets.
+
+A T.M.GTP4.D function is associated with the following mandatory
+parameters:
+
+- SID: A SRv6 SID to represents the function
+- DST-PREFIX: Prefix of remote SRv6 segment. The destination address or
+ last SID of out packets consists of the prefix followed by dst IPv4
+ address, QFI and TEID of the receiving packets.
+- SRC-PREFIX: Prefix for src address of sending packets. The src IPv6
+ address consists of the prefix followed by the src IPv4 address of
+ the receiving packets.
+
+The following command instantiates a new T.M.GTP4.D function.
+
+::
+
+ sr policy add bsid SID behavior t.m.gtp4.d DST-PREFIX v6src_prefix SRC-PREFIX [nhtype {ipv4|ipv6|non-ip}]
+
+For example, the below command configures the SID 2001:db8::1 with
+``t.m.gtp4.d`` behavior for translating receiving GTP-U over IPv4
+packets to SRv6 packets with next-header type is IPv4.
+
+::
+
+ sr policy add bsid 2001:db8::1 behavior t.m.gtp4.d D1::/32 v6src_prefix A1::/64 nhtype ipv4
+
+It should be interesting how a SRv6 BSID works to decapsulate the
+receiving GTP-U packets over IPv4 header. To utilize ``t.m.gtp4.d``
+function, you need to configure some SR steering policy like:
+
+::
+
+ sr steer l3 172.20.0.1/32 via bsid 2001:db8::1
+
+The above steering policy with the BSID of ``t.m.gtp4.d`` would work
+properly for the GTP-U packets destined to 172.20.0.1.
+
+If you have a SID(s) list of SR policy which the configured gtp4.d
+function to be applied, the SR Policy can be configured as following:
+
+::
+
+ sr policy add bsid D1:: next A1:: next B1:: next C1::
+
+IPv6 infrastructure case
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+In case that GTP-U is deployed over **IPv6** infrastructure, you don’t
+need to configure T.M.GTP4.D function and associated SR steering policy.
+Instead of that, you just need to configure a localsid of End.M.GTP6.D
+segment.
+
+An End.M.GTP6.D segment is associated with the following mandatory
+parameters:
+
+- SID-PREFIX: SRv6 SID prefix to represent the function. In this
+ function, it should be the dst address of receiving GTP-U packets.
+- DST-PREFIX: Prefix of remote SRv6 Segment. The destination address or
+ last SID of output packets consists of the prefix followed by QFI and
+ TEID of the receiving packets.
+
+The following command instantiates a new End.M.GTP6.D function.
+
+::
+
+ sr localsid prefix SID-PREFIX behavior end.m.gtp6.d DST-PREFIX [nhtype {ipv4|ipv6|non-ip}]
+
+For example, the below command configures the SID prefix 2001:db8::/64
+with ``end.m.gtp6.d`` behavior for translating receiving GTP-U over IPv6
+packets which have IPv6 destination addresses within 2001:db8::/64 to
+SRv6 packets. The dst IPv6 address of the outgoing packets consists of
+D4::/64 followed by QFI and TEID.
+
+::
+
+ sr localsid prefix 2001:db8::/64 behavior end.m.gtp6.d D4::/64
+
+In another case, the translated packets from GTP-U over IPv6 to SRv6
+will be re-translated back to GTP-U, which is so called ‘Drop-In’ mode.
+
+In Drop-In mode, an additional IPv6 specific end segment is required,
+named End.M.GTP6.D.Di. It is because that unlike ``end.m.gtp6.d``, it
+needs to preserve original IPv6 dst address as the last SID in the SRH.
+
+Regardless of that difference exists, the required configuration
+parameters are same as ``end.m.gtp6.d``.
+
+The following command instantiates a new End.M.GTP6.D.Di function.
+
+::
+
+ sr localsid prefix 2001:db8::/64 behavior end.m.gtp6.d.di D4::/64
+
+SRv6 to GTP-U
+~~~~~~~~~~~~~
+
+The SRv6 Mobile functions on SRv6 to GTP-U direction are End.M.GTP4.E
+and End.M.GTP6.D.
+
+In this direction with GTP-U over IPv4 infrastructure, an End.M.GTP4.E
+segment is associated with the following mandatory parameters:
+
+- SID-PREFIX: SRv6 SID prefix to represent the function.
+- V4SRC-ADDR-POSITION: Integer number indicates bit position where IPv4
+ src address embedded.
+
+The following command instantiates a new End.M.GTP4.E function.
+
+::
+
+ sr localsid prefix SID-PREFIX behavior end.m.gtp4.e v4src_position V4SRC-ADDR-POSITION
+
+For example, the below command configures the SID prefix 2001:db8::/32
+with ``end.m.gtp4.e`` behavior for translating the receiving SRv6
+packets to GTP-U packets encapsulated with UDP/IPv4 header. All the
+GTP-U tunnel and flow identifiers are extracted from the active SID in
+the receiving packets. The src IPv4 address of sending GTP-U packets is
+extracted from the configured bit position in the src IPv6 address.
+
+::
+
+ sr localsid prefix 2001:db8::/32 behavior end.m.gtp4.e v4src_position 64
+
+In IPv6 infrastructure case, an End.M.GTP6.E segment is associated with
+the following mandatory parameters:
+
+- SID-PREFIX: SRv6 SID prefix to represent the function.
+
+The following command instantiates a new End.M.GTP6.E function.
+
+::
+
+ sr localsid prefix SID-PREFIX behavior end.m.gtp6.e
+
+For example, the below command configures the SID prefix 2001:db8::/64
+with ``end.m.gtp6.e`` behavior for translating the receiving SRv6
+packets to GTP-U packets encapsulated with UDP/IPv6 header. While the
+last SID indicates GTP-U dst IPv6 address, 32-bits GTP-U TEID and 6-bits
+QFI are extracted from the active SID in the receiving packets.
+
+::
+
+ sr localsid prefix 2001:db8::/64 behavior end.m.gtp6.e
+
+FIB Table Lookup for Inner IPv4/IPv6 packet
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+SRv6 Mobile functions of ``t.m.gtp4.dt*`` and ``end.m.gtp6.dt*`` support
+decapsulating outer IP/UDP/GTP-U headers and forwarding inner IP packet
+based on specific fib table.
+
+In case of the both outer and inner IP address families are IPv4,
+``t.m.gtp4.dt4`` function supports GTP-U decapsulation and fib lookup
+for inner IPv4 with an associated steering policy and the following
+parameters:
+
+- SID: A SRv6 SID to represents the function
+- FIB: fib-table number for inner IPv4 packet lookup and forwarding
+
+The following command instantiates a new T.M.GTP4.DT4 function.
+
+::
+
+ sr policy add bsid SID behavior t.m.gtp4.dt4 fib-table FIB
+
+For example, the below commands configure D5:: as the SID instantiates
+``t.m.gtp4.dt4`` function. A steering policy for packets destine to
+172.20.0.1 binds to the SID.
+
+::
+
+ sr steer l3 172.20.0.1/32 via bsid D5::
+ sr policy add bsid D5:: behavior t.m.gtp4.dt4 fib-table 0
+
+In addition, inner IPv6, or mix of IPv4 and IPv6 inner packet cases
+require the function to be configured with local-fib table.
+
+- LOCAL-FIB: fib-table number for lookup and forward GTP-U packet based
+ on outer IP destination address
+
+This is inner IPv6 case specific. The reason is that GTP-U encapsulates
+link local IPv6 packet for NDP (Neighbor Discovery Protocol). Outer
+GTP-U header should be kept until the packets reach to the node
+responsible for NDP handling. It is typically UPF(User Plane Function)
+node.
+
+The following command instantiate a new T.M.GTP4.DT6 function.
+
+::
+
+ sr policy add bsid D5:: behavior t.m.gtp4.dt6 fib-table 0 local-fib-table LOCAL-FIB
+
+Following example configures fib 0 for inner packet and fib 1 for outer
+GTP-U packet forwarding:
+
+::
+
+ sr policy add bsid D5:: behavior t.m.gtp4.dt6 fib-table 0 local-fib-table 1
+
+If you need to support both IPv4 and IPv6 inner packet lookup with just
+one SID, you can configure ``t.m.gtp4.dt46`` function:
+
+::
+
+ sr policy add bsid D5:: behavior t.m.gtp4.dt46 fib-table 0 local-fib-table 1
+
+In case of GTP-U over IPv6 case, ``end.m.gtp6.dt4``, ``end.m.gtp6.dt6``
+and ``end.m.gtp6.dt46`` functions support inner IPv4, IPv6 and IPv4/IPv6
+lookup and forwarding respectively. Specifying fib table for inner IP
+packet forwarding is required as same as GTP-U over IPv4 case, and
+local-fib table for inner IPv6 and IPv4/IPv6 cases as well.
+
+::
+
+ sr localsid prefix D::/64 behavior end.m.gtp6.dt46 fib-table 0 local-fib-table 0
+
+To run some demo setup please refer to: :ref:`srv6_mobile_runner_doc`
diff --git a/src/plugins/srv6-mobile/node.c b/src/plugins/srv6-mobile/node.c
index 448d6332b15..ed0697a8009 100644
--- a/src/plugins/srv6-mobile/node.c
+++ b/src/plugins/srv6-mobile/node.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arrcus Inc and/or its affiliates.
+ * Copyright (c) 2019 Arrcus Inc and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -102,11 +102,11 @@ format_srv6_end_rewrite_trace6 (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
srv6_end_rewrite_trace_t *t = va_arg (*args, srv6_end_rewrite_trace_t *);
- return format (s,
- "SRv6-END-rewrite: src %U dst %U\n\tTEID: 0x%x\n\tsr_prefix: %U/%d",
- format_ip6_address, &t->src, format_ip6_address, &t->dst,
- clib_net_to_host_u32 (t->teid), format_ip6_address,
- &t->sr_prefix, t->sr_prefixlen);
+ return format (
+ s, "SRv6-END-rewrite: src %U dst %U\n\tTEID: 0x%x\n\tsr_prefix: %U/%d",
+ format_ip6_address, &t->src, format_ip6_address, &t->dst,
+ clib_net_to_host_u32 (t->teid), format_ip6_address, &t->sr_prefix,
+ t->sr_prefixlen);
}
#define foreach_srv6_end_v4_error \
@@ -245,7 +245,8 @@ typedef enum
typedef enum
{
SRV6_T_M_GTP4_D_NEXT_DROP,
- SRV6_T_M_GTP4_D_NEXT_LOOKUP,
+ SRV6_T_M_GTP4_D_NEXT_LOOKUP4,
+ SRV6_T_M_GTP4_D_NEXT_LOOKUP6,
SRV6_T_M_GTP4_D_N_NEXT,
} srv6_T_m_gtp4_d_next_t;
@@ -259,7 +260,8 @@ typedef enum
typedef enum
{
SRV6_END_M_GTP6_D_NEXT_DROP,
- SRV6_END_M_GTP6_D_NEXT_LOOKUP,
+ SRV6_END_M_GTP6_D_NEXT_LOOKUP4,
+ SRV6_END_M_GTP6_D_NEXT_LOOKUP6,
SRV6_END_M_GTP6_D_N_NEXT,
} srv6_end_m_gtp6_d_next_t;
@@ -317,9 +319,8 @@ gtpu_type_get (u16 tag)
}
// Function for SRv6 GTP4.E function.
-VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (srv6_end_m_gtp4_e)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
srv6_end_main_v4_t *sm = &srv6_end_main_v4;
ip6_sr_main_t *sm2 = &sr_main;
@@ -343,7 +344,7 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
u32 bi0;
vlib_buffer_t *b0;
ip6_sr_localsid_t *ls0;
- srv6_end_gtp4_param_t *ls_param;
+ srv6_end_gtp4_e_param_t *ls_param;
ip6srv_combo_header_t *ip6srv0;
ip6_address_t src0, dst0;
@@ -362,11 +363,10 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
- ls0 =
- pool_elt_at_index (sm2->localsids,
- vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls0 = pool_elt_at_index (sm2->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
- ls_param = (srv6_end_gtp4_param_t *) ls0->plugin_mem;
+ ls_param = (srv6_end_gtp4_e_param_t *) ls0->plugin_mem;
ip6srv0 = vlib_buffer_get_current (b0);
src0 = ip6srv0->ip.src_address;
@@ -374,10 +374,10 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
len0 = vlib_buffer_length_in_chain (vm, b0);
- if ((ip6srv0->ip.protocol == IPPROTO_IPV6_ROUTE
- && len0 <
- sizeof (ip6srv_combo_header_t) + ip6srv0->sr.length * 8)
- || (len0 < sizeof (ip6_header_t)))
+ if ((ip6srv0->ip.protocol == IPPROTO_IPV6_ROUTE &&
+ len0 <
+ sizeof (ip6srv_combo_header_t) + ip6srv0->sr.length * 8) ||
+ (len0 < sizeof (ip6_header_t)))
{
next0 = SRV6_END_M_GTP4_E_NEXT_DROP;
@@ -388,7 +388,7 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
u8 gtpu_type = 0;
u16 tag = 0;
u32 teid = 0;
- u8 *teid8p = (u8 *) & teid;
+ u8 *teid8p = (u8 *) &teid;
u8 qfi = 0;
u16 seq = 0;
u32 index;
@@ -418,9 +418,9 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
qfi = dst0.as_u8[offset + 4];
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
clib_memcpy_fast (&seq, &dst0.as_u8[offset + 5], 2);
}
@@ -443,11 +443,11 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
qfi |= dst0.as_u8[offset + 4] << shift;
qfi |= dst0.as_u8[offset + 5] >> (8 - shift);
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
- sp = (u8 *) & seq;
+ sp = (u8 *) &seq;
for (index = 0; index < 2; index++)
{
sp[index] = dst0.as_u8[offset + 5 + index] << shift;
@@ -472,9 +472,9 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
hdrlen =
sizeof (gtpu_exthdr_t) + sizeof (gtpu_pdu_session_t);
}
- else if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ else if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
hdrlen = sizeof (gtpu_exthdr_t);
}
@@ -494,11 +494,10 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
if (ext_len >
sizeof (ip6_address_t) * (ip6srv0->sr.last_entry + 1))
{
- tlv =
- (ip6_sr_tlv_t *) ((u8 *) & ip6srv0->sr +
- sizeof (ip6_sr_header_t) +
- sizeof (ip6_address_t) *
- (ip6srv0->sr.last_entry + 1));
+ tlv = (ip6_sr_tlv_t *) ((u8 *) &ip6srv0->sr +
+ sizeof (ip6_sr_header_t) +
+ sizeof (ip6_address_t) *
+ (ip6srv0->sr.last_entry + 1));
if (tlv->type == SRH_TLV_USER_PLANE_CONTAINER)
{
@@ -518,7 +517,7 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
{
vlib_buffer_advance (b0,
(word) sizeof (ip6srv_combo_header_t) +
- ip6srv0->sr.length * 8);
+ ip6srv0->sr.length * 8);
}
else
{
@@ -549,38 +548,9 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
hdr0->gtpu.type = gtpu_type;
- if (qfi)
- {
- u8 type = 0;
- gtpu_pdu_session_t *sess;
-
- hdr0->gtpu.ver_flags |= GTPU_EXTHDR_FLAG;
-
- hdr0->gtpu.ext->seq = 0;
-
- hdr0->gtpu.ext->npdu_num = 0;
- hdr0->gtpu.ext->nextexthdr = GTPU_EXTHDR_PDU_SESSION;
-
- type = qfi & SRV6_PDU_SESSION_U_BIT_MASK;
-
- qfi =
- ((qfi & SRV6_PDU_SESSION_QFI_MASK) >> 2) |
- ((qfi & SRV6_PDU_SESSION_R_BIT_MASK) << 5);
-
- sess =
- (gtpu_pdu_session_t *) (((char *) hdr0) +
- sizeof (ip4_gtpu_header_t) +
- sizeof (gtpu_exthdr_t));
- sess->exthdrlen = 1;
- sess->type = type;
- sess->spare = 0;
- sess->u.val = qfi;
- sess->nextexthdr = 0;
- }
-
- if (gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
hdr0->gtpu.ver_flags |= GTPU_SEQ_FLAG;
hdr0->gtpu.ext->seq = seq;
@@ -609,41 +579,80 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
}
}
}
+ else
+ {
+ if (qfi)
+ {
+ hdr0->gtpu.ext->seq = 0;
+ hdr0->gtpu.ext->npdu_num = 0;
+ }
+ }
+
+ if (qfi)
+ {
+ u8 type = 0;
+ gtpu_pdu_session_t *sess;
+
+ hdr0->gtpu.ver_flags |= GTPU_EXTHDR_FLAG;
+
+ hdr0->gtpu.ext->nextexthdr = GTPU_EXTHDR_PDU_SESSION;
+
+ type = qfi & SRV6_PDU_SESSION_U_BIT_MASK;
+
+ qfi = ((qfi & SRV6_PDU_SESSION_QFI_MASK) >> 2) |
+ ((qfi & SRV6_PDU_SESSION_R_BIT_MASK) << 5);
+
+ sess = (gtpu_pdu_session_t *) (((char *) hdr0) +
+ sizeof (ip4_gtpu_header_t) +
+ sizeof (gtpu_exthdr_t));
+ sess->exthdrlen = 1;
+ sess->type = type;
+ sess->spare = 0;
+ sess->u.val = qfi;
+ sess->nextexthdr = 0;
+ }
- offset = ls_param->v4src_position / 8;
- shift = ls_param->v4src_position % 8;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls_param->fib4_index;
- if (PREDICT_TRUE (shift == 0))
+ if (ls_param->v4src_position)
{
- for (index = 0; index < 4; index++)
+ offset = ls_param->v4src_position / 8;
+ shift = ls_param->v4src_position % 8;
+
+ if (PREDICT_TRUE (shift == 0))
+ {
+ for (index = 0; index < 4; index++)
+ {
+ hdr0->ip4.src_address.as_u8[index] =
+ src0.as_u8[offset + index];
+ }
+ }
+ else
{
- hdr0->ip4.src_address.as_u8[index] =
- src0.as_u8[offset + index];
+ for (index = 0; index < 4; index++)
+ {
+ hdr0->ip4.src_address.as_u8[index] =
+ src0.as_u8[offset + index] << shift;
+ hdr0->ip4.src_address.as_u8[index] |=
+ src0.as_u8[offset + index + 1] >> (8 - shift);
+ }
}
}
else
{
- for (index = 0; index < 4; index++)
- {
- hdr0->ip4.src_address.as_u8[index] =
- src0.as_u8[offset + index] << shift;
- hdr0->ip4.src_address.as_u8[index] |=
- src0.as_u8[offset + index + 1] >> (8 - shift);
- }
+ clib_memcpy_fast (&hdr0->ip4.src_address,
+ &ls_param->v4src_addr, 4);
}
key = hash_memory (p, plen < 40 ? plen : 40, 0);
port = hash_uword_to_u16 (&key);
hdr0->udp.src_port = port;
- hdr0->udp.length = clib_host_to_net_u16 (len0 +
- sizeof (udp_header_t) +
- sizeof
- (gtpu_header_t));
+ hdr0->udp.length = clib_host_to_net_u16 (
+ len0 + sizeof (udp_header_t) + sizeof (gtpu_header_t));
- hdr0->ip4.length = clib_host_to_net_u16 (len0 +
- sizeof
- (ip4_gtpu_header_t));
+ hdr0->ip4.length =
+ clib_host_to_net_u16 (len0 + sizeof (ip4_gtpu_header_t));
hdr0->ip4.checksum = ip4_header_checksum (&hdr0->ip4);
@@ -662,11 +671,12 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
}
}
- vlib_increment_combined_counter
- (((next0 ==
- SRV6_END_M_GTP4_E_NEXT_DROP) ? &(sm2->sr_ls_invalid_counters) :
- &(sm2->sr_ls_valid_counters)), thread_index,
- ls0 - sm2->localsids, 1, vlib_buffer_length_in_chain (vm, b0));
+ vlib_increment_combined_counter (
+ ((next0 == SRV6_END_M_GTP4_E_NEXT_DROP) ?
+ &(sm2->sr_ls_invalid_counters) :
+ &(sm2->sr_ls_valid_counters)),
+ thread_index, ls0 - sm2->localsids, 1,
+ vlib_buffer_length_in_chain (vm, b0));
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
@@ -685,515 +695,590 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
}
// Function for SRv6 GTP4.D function.
-VLIB_NODE_FN (srv6_t_m_gtp4_d) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+static inline u32
+srv6_gtp4_decap_processing (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t *b0)
{
srv6_t_main_v4_decap_t *sm = &srv6_t_main_v4_decap;
ip6_sr_main_t *sm2 = &sr_main;
- u32 n_left_from, next_index, *from, *to_next;
- u32 good_n = 0, bad_n = 0;
+ ip6_sr_sl_t *sl0;
+ srv6_end_gtp4_d_param_t *ls_param;
+ ip4_header_t *ip4;
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
+ uword len0;
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
+ u32 next0 = SRV6_T_M_GTP4_D_NEXT_LOOKUP6;
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ sl0 = pool_elt_at_index (sm2->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
- while (n_left_from > 0 && n_left_to_next > 0)
+ ls_param = (srv6_end_gtp4_d_param_t *) sl0->plugin_mem;
+
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ ip4 = vlib_buffer_get_current (b0);
+
+ if (ip4->protocol != IP_PROTOCOL_UDP || len0 < sizeof (ip4_gtpu_header_t))
+ {
+ next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
+ }
+ else
+ {
+ uword *p;
+ ip6_sr_policy_t *sr_policy = NULL;
+ ip6_sr_sl_t *sl = NULL;
+ u32 *sl_index;
+ u32 hdr_len;
+
+ ip4_gtpu_header_t *hdr;
+ ip4_address_t src, dst;
+ u8 *srcp, *dstp;
+ ip6_header_t *encap = NULL;
+ ip6_address_t seg;
+ ip6_address_t src6;
+ u8 gtpu_type;
+ u32 teid;
+ u8 *teidp;
+ u8 qfi = 0;
+ u8 *qfip = NULL;
+ u16 seq = 0;
+ u8 *seqp;
+ u32 offset, shift, index;
+ ip6srv_combo_header_t *ip6srv;
+ gtpu_pdu_session_t *sess = NULL;
+ int ie_size = 0;
+ u16 tlv_siz = 0;
+ u8 ie_buf[GTPU_IE_MAX_SIZ];
+
+ // Decap from GTP-U.
+ hdr = (ip4_gtpu_header_t *) ip4;
+
+ hdr_len = sizeof (ip4_gtpu_header_t);
+
+ teid = hdr->gtpu.teid;
+ teidp = (u8 *) &teid;
+
+ seqp = (u8 *) &seq;
+
+ gtpu_type = hdr->gtpu.type;
+
+ if (hdr->gtpu.ver_flags & (GTPU_EXTHDR_FLAG | GTPU_SEQ_FLAG))
{
- u32 bi0;
- vlib_buffer_t *b0;
- ip6_sr_sl_t *sl0;
- srv6_end_gtp4_param_t *ls_param;
- ip4_header_t *ip4;
+ // Extention header.
+ hdr_len += sizeof (gtpu_exthdr_t);
- uword len0;
+ seq = hdr->gtpu.ext->seq;
- u32 next0 = SRV6_T_M_GTP4_D_NEXT_LOOKUP;
+ if (hdr->gtpu.ext->nextexthdr == GTPU_EXTHDR_PDU_SESSION)
+ {
+ // PDU Session Container.
+ sess = (gtpu_pdu_session_t *) (((char *) hdr) + hdr_len);
+ qfi = sess->u.val & ~GTPU_PDU_SESSION_P_BIT_MASK;
+ qfip = (u8 *) &qfi;
- // defaults
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
+ hdr_len += sizeof (gtpu_pdu_session_t);
- b0 = vlib_get_buffer (vm, bi0);
+ if (sess->u.val & GTPU_PDU_SESSION_P_BIT_MASK)
+ {
+ hdr_len += sizeof (gtpu_paging_policy_t);
+ }
+ }
+ }
- sl0 =
- pool_elt_at_index (sm2->sid_lists,
- vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ src = hdr->ip4.src_address;
+ srcp = (u8 *) &src;
- ls_param = (srv6_end_gtp4_param_t *) sl0->plugin_mem;
+ dst = hdr->ip4.dst_address;
+ dstp = (u8 *) &dst;
- len0 = vlib_buffer_length_in_chain (vm, b0);
+ seg = ls_param->sr_prefix;
- ip4 = vlib_buffer_get_current (b0);
+ offset = ls_param->sr_prefixlen / 8;
+ shift = ls_param->sr_prefixlen % 8;
+
+ if (PREDICT_TRUE (shift == 0))
+ {
+ clib_memcpy_fast (&seg.as_u8[offset], dstp, 4);
- if (ip4->protocol != IP_PROTOCOL_UDP
- || len0 < sizeof (ip4_gtpu_header_t))
+ if (qfip)
{
- next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
+ qfi = ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
+ ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
- bad_n++;
+ if (sess->type)
+ {
+ qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
+ }
+
+ seg.as_u8[offset + 4] = qfi;
+ }
+
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ {
+ clib_memcpy_fast (&seg.as_u8[offset + 5], seqp, 2);
}
else
{
- uword *p;
- ip6_sr_policy_t *sr_policy = NULL;
- ip6_sr_sl_t *sl = NULL;
- u32 *sl_index;
- u32 hdr_len;
-
- ip4_gtpu_header_t *hdr;
- ip4_address_t src, dst;
- u8 *srcp, *dstp;
- ip6_header_t *encap = NULL;
- ip6_address_t seg;
- ip6_address_t src6;
- u8 gtpu_type;
- u32 teid;
- u8 *teidp;
- u8 qfi = 0;
- u8 *qfip = NULL;
- u16 seq = 0;
- u8 *seqp;
- u32 offset, shift, index;
- ip6srv_combo_header_t *ip6srv;
- gtpu_pdu_session_t *sess = NULL;
- int ie_size = 0;
- u16 tlv_siz = 0;
- u8 ie_buf[GTPU_IE_MAX_SIZ];
+ clib_memcpy_fast (&seg.as_u8[offset + 5], teidp, 4);
+ }
+ }
+ else
+ {
+ for (index = 0; index < 4; index++)
+ {
+ seg.as_u8[offset + index] |= dstp[index] >> shift;
+ seg.as_u8[offset + index + 1] |= dstp[index] << (8 - shift);
+ }
- // Decap from GTP-U.
- hdr = (ip4_gtpu_header_t *) ip4;
+ if (qfip)
+ {
+ qfi = ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
+ ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
- hdr_len = sizeof (ip4_gtpu_header_t);
+ if (sess->type)
+ {
+ qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
+ }
- teid = hdr->gtpu.teid;
- teidp = (u8 *) & teid;
+ seg.as_u8[offset + 4] |= qfi >> shift;
+ seg.as_u8[offset + 5] |= qfi << (8 - shift);
+ }
- seqp = (u8 *) & seq;
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ {
+ for (index = 0; index < 2; index++)
+ {
+ seg.as_u8[offset + 5 + index] |= seqp[index] >> shift;
+ seg.as_u8[offset + 6 + index] |= seqp[index] << (8 - shift);
+ }
+ }
+ else
+ {
+ for (index = 0; index < 4; index++)
+ {
+ seg.as_u8[offset + index + 5] |= teidp[index] >> shift;
+ seg.as_u8[offset + index + 6] |= teidp[index] << (8 - shift);
+ }
+ }
+ }
- gtpu_type = hdr->gtpu.type;
+ if (PREDICT_FALSE (gtpu_type == GTPU_TYPE_ERROR_INDICATION))
+ {
+ u16 payload_len;
- if (hdr->gtpu.ver_flags & (GTPU_EXTHDR_FLAG | GTPU_SEQ_FLAG))
+ payload_len = clib_net_to_host_u16 (hdr->gtpu.length);
+ if (payload_len != 0)
+ {
+ ie_size = payload_len - (hdr_len - sizeof (ip4_gtpu_header_t));
+ if (ie_size > 0)
{
- // Extention header.
- hdr_len += sizeof (gtpu_exthdr_t);
+ u8 *ies;
- seq = hdr->gtpu.ext->seq;
+ ies = (u8 *) ((u8 *) hdr + hdr_len);
+ clib_memcpy_fast (ie_buf, ies, ie_size);
+ hdr_len += ie_size;
+ }
+ }
+ }
- if (hdr->gtpu.ext->nextexthdr == GTPU_EXTHDR_PDU_SESSION)
- {
- // PDU Session Container.
- sess =
- (gtpu_pdu_session_t *) (((char *) hdr) + hdr_len);
- qfi = sess->u.val & ~GTPU_PDU_SESSION_P_BIT_MASK;
- qfip = (u8 *) & qfi;
+ src6 = ls_param->v6src_prefix;
- hdr_len += sizeof (gtpu_pdu_session_t);
+ offset = ls_param->v6src_prefixlen / 8;
+ shift = ls_param->v6src_prefixlen % 8;
- if (sess->u.val & GTPU_PDU_SESSION_P_BIT_MASK)
- {
- hdr_len += sizeof (gtpu_paging_policy_t);
- }
- }
- }
+ if (PREDICT_TRUE (shift == 0))
+ {
+ clib_memcpy_fast (&src6.as_u8[offset], srcp, 4);
+ }
+ else
+ {
+ for (index = 0; index < 4; index++)
+ {
+ src6.as_u8[offset + index] |= srcp[offset] >> shift;
+ src6.as_u8[offset + index + 1] |= srcp[offset] << (8 - shift);
+ }
+ }
- src = hdr->ip4.src_address;
- srcp = (u8 *) & src;
+ vlib_buffer_advance (b0, (word) hdr_len);
- dst = hdr->ip4.dst_address;
- dstp = (u8 *) & dst;
+ // Encap to SRv6.
+ if (PREDICT_TRUE (gtpu_type == GTPU_TYPE_GTPU))
+ {
+ encap = vlib_buffer_get_current (b0);
+ }
- seg = ls_param->sr_prefix;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
- offset = ls_param->sr_prefixlen / 8;
- shift = ls_param->sr_prefixlen % 8;
+ p = mhash_get (&sm2->sr_policies_index_hash, &ls_param->sr_prefix);
+ if (p)
+ {
+ sr_policy = pool_elt_at_index (sm2->sr_policies, p[0]);
+ }
- if (PREDICT_TRUE (shift == 0))
- {
- clib_memcpy_fast (&seg.as_u8[offset], dstp, 4);
+ if (sr_policy)
+ {
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ sl = pool_elt_at_index (sm2->sid_lists, *sl_index);
+ if (sl != NULL)
+ break;
+ }
+ }
- if (qfip)
- {
- qfi =
- ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
- ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
+ if (sl)
+ {
+ hdr_len = sizeof (ip6srv_combo_header_t);
+ hdr_len += vec_len (sl->segments) * sizeof (ip6_address_t);
+ hdr_len += sizeof (ip6_address_t);
+ }
+ else
+ {
+ hdr_len = sizeof (ip6_header_t);
- if (sess->type)
- {
- qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
- }
+ if (PREDICT_FALSE (gtpu_type != GTPU_TYPE_GTPU))
+ {
+ hdr_len += sizeof (ip6_sr_header_t);
+ hdr_len += sizeof (ip6_address_t);
+ }
+ }
- seg.as_u8[offset + 4] = qfi;
- }
+ if (ie_size)
+ {
+ tlv_siz =
+ sizeof (ip6_sr_tlv_t) + sizeof (user_plane_sub_tlv_t) + ie_size;
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
- {
- clib_memcpy_fast (&seg.as_u8[offset + 5], seqp, 2);
- }
- else
- {
- clib_memcpy_fast (&seg.as_u8[offset + 5], teidp, 4);
- }
- }
- else
- {
- for (index = 0; index < 4; index++)
- {
- seg.as_u8[offset + index] |= dstp[index] >> shift;
- seg.as_u8[offset + index + 1] |=
- dstp[index] << (8 - shift);
- }
+ tlv_siz = (tlv_siz & ~0x07) + (tlv_siz & 0x07 ? 0x08 : 0x0);
+ hdr_len += tlv_siz;
+ }
- if (qfip)
- {
- qfi =
- ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
- ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
+ vlib_buffer_advance (b0, -(word) hdr_len);
+ ip6srv = vlib_buffer_get_current (b0);
- if (sess->type)
- {
- qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
- }
+ if (sl)
+ {
+ clib_memcpy_fast (ip6srv, sl->rewrite, vec_len (sl->rewrite));
- seg.as_u8[offset + 4] |= qfi >> shift;
- seg.as_u8[offset + 5] |= qfi << (8 - shift);
- }
+ if (vec_len (sl->segments) > 1)
+ {
+ ip6srv->sr.tag = clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
- {
- for (index = 0; index < 2; index++)
- {
- seg.as_u8[offset + 5 + index] |=
- seqp[index] >> shift;
- seg.as_u8[offset + 6 + index] |=
- seqp[index] << (8 - shift);
- }
- }
- else
- {
- for (index = 0; index < 4; index++)
- {
- seg.as_u8[offset + index + 5] |=
- teidp[index] >> shift;
- seg.as_u8[offset + index + 6] |=
- teidp[index] << (8 - shift);
- }
- }
- }
+ ip6srv->sr.segments_left += 1;
+ ip6srv->sr.last_entry += 1;
- if (PREDICT_FALSE (gtpu_type == GTPU_TYPE_ERROR_INDICATION))
- {
- u16 payload_len;
+ ip6srv->sr.length += sizeof (ip6_address_t) / 8;
+ ip6srv->sr.segments[0] = seg;
- payload_len = clib_net_to_host_u16 (hdr->gtpu.length);
- if (payload_len != 0)
- {
- ie_size =
- payload_len - (hdr_len - sizeof (ip4_gtpu_header_t));
- if (ie_size > 0)
- {
- u8 *ies;
+ clib_memcpy_fast (&ip6srv->sr.segments[1],
+ (u8 *) (sl->rewrite + sizeof (ip6_header_t) +
+ sizeof (ip6_sr_header_t)),
+ vec_len (sl->segments) *
+ sizeof (ip6_address_t));
+ }
+ else
+ {
+ ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
- ies = (u8 *) ((u8 *) hdr + hdr_len);
- clib_memcpy_fast (ie_buf, ies, ie_size);
- hdr_len += ie_size;
- }
- }
- }
+ ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
- src6 = ls_param->v6src_prefix;
+ ip6srv->sr.segments_left = 1;
+ ip6srv->sr.last_entry = 0;
- offset = ls_param->v6src_prefixlen / 8;
- shift = ls_param->v6src_prefixlen % 8;
+ ip6srv->sr.length =
+ ((sizeof (ip6_sr_header_t) + sizeof (ip6_address_t)) / 8) - 1;
+ ip6srv->sr.flags = 0;
- if (PREDICT_TRUE (shift == 0))
+ ip6srv->sr.tag = clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+
+ ip6srv->sr.segments[0] = seg;
+ if (vec_len (sl->segments))
{
- clib_memcpy_fast (&src6.as_u8[offset], srcp, 4);
+ ip6srv->sr.segments[1] = sl->segments[0];
+ ip6srv->sr.length += sizeof (ip6_address_t) / 8;
+ ip6srv->sr.last_entry++;
}
- else
+ }
+
+ if (PREDICT_TRUE (encap != NULL))
+ {
+ if (ls_param->nhtype == SRV6_NHTYPE_NONE)
{
- for (index = 0; index < 4; index++)
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) == 6)
+ ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
+ else
+ ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
+ }
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
+ {
+ ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 4)
{
- src6.as_u8[offset + index] |= srcp[offset] >> shift;
- src6.as_u8[offset + index + 1] |=
- srcp[offset] << (8 - shift);
+ // Bad encap packet.
+ next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
+ goto DONE;
}
}
-
- vlib_buffer_advance (b0, (word) hdr_len);
-
- // Encap to SRv6.
- if (PREDICT_TRUE (gtpu_type == GTPU_TYPE_GTPU))
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
{
- encap = vlib_buffer_get_current (b0);
+ ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6)
+ {
+ // Bad encap packet.
+ next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
+ goto DONE;
+ }
}
-
- len0 = vlib_buffer_length_in_chain (vm, b0);
-
- p =
- mhash_get (&sm2->sr_policies_index_hash,
- &ls_param->sr_prefix);
- if (p)
+ else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
{
- sr_policy = pool_elt_at_index (sm2->sr_policies, p[0]);
+ ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
}
+ }
+ else
+ {
+ ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
+ }
+ }
+ else
+ {
+ clib_memcpy_fast (ip6srv, &sm->cache_hdr, sizeof (ip6_header_t));
- if (sr_policy)
+ ip6srv->ip.dst_address = seg;
+
+ if (PREDICT_FALSE (gtpu_type != GTPU_TYPE_GTPU))
+ {
+ ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
+
+ ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
+
+ ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
+
+ ip6srv->sr.tag = clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+
+ ip6srv->sr.segments_left = 0;
+ ip6srv->sr.last_entry = 0;
+
+ ip6srv->sr.length = sizeof (ip6_address_t) / 8;
+ ip6srv->sr.segments[0] = seg;
+ }
+ else
+ {
+ if (ls_param->nhtype == SRV6_NHTYPE_NONE)
{
- vec_foreach (sl_index, sr_policy->segments_lists)
- {
- sl = pool_elt_at_index (sm2->sid_lists, *sl_index);
- if (sl != NULL)
- break;
- }
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) == 6)
+ ip6srv->ip.protocol = IP_PROTOCOL_IPV6;
+ else
+ ip6srv->ip.protocol = IP_PROTOCOL_IP_IN_IP;
}
-
- if (sl)
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
{
- hdr_len = sizeof (ip6srv_combo_header_t);
- hdr_len += vec_len (sl->segments) * sizeof (ip6_address_t);
- hdr_len += sizeof (ip6_address_t);
+ ip6srv->ip.protocol = IP_PROTOCOL_IP_IN_IP;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 4)
+ {
+ // Bad encap packet.
+ next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
+ goto DONE;
+ }
}
- else
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
{
- hdr_len = sizeof (ip6_header_t);
-
- if (PREDICT_FALSE (gtpu_type != GTPU_TYPE_GTPU))
+ ip6srv->ip.protocol = IP_PROTOCOL_IPV6;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6)
{
- hdr_len += sizeof (ip6_sr_header_t);
- hdr_len += sizeof (ip6_address_t);
+ // Bad encap packet.
+ next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
+ goto DONE;
}
}
-
- if (ie_size)
+ else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
{
- tlv_siz =
- sizeof (ip6_sr_tlv_t) + sizeof (user_plane_sub_tlv_t) +
- ie_size;
-
- tlv_siz = (tlv_siz & ~0x07) + (tlv_siz & 0x07 ? 0x08 : 0x0);
- hdr_len += tlv_siz;
+ ip6srv->ip.protocol = IP_PROTOCOL_IP6_ETHERNET;
}
+ }
+ }
- vlib_buffer_advance (b0, -(word) hdr_len);
- ip6srv = vlib_buffer_get_current (b0);
+ ip6srv->ip.src_address = src6;
- if (sl)
- {
- clib_memcpy_fast (ip6srv, sl->rewrite,
- vec_len (sl->rewrite));
+ if (PREDICT_FALSE (ie_size))
+ {
+ ip6_sr_tlv_t *tlv;
+ user_plane_sub_tlv_t *sub_tlv;
- if (vec_len (sl->segments) > 1)
- {
- ip6srv->sr.tag =
- clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+ tlv = (ip6_sr_tlv_t *) ((u8 *) ip6srv + (hdr_len - tlv_siz));
+ tlv->type = SRH_TLV_USER_PLANE_CONTAINER;
+ tlv->length = (u8) (tlv_siz - sizeof (ip6_sr_tlv_t));
+ clib_memset (tlv->value, 0, tlv->length);
- ip6srv->sr.segments_left += 1;
- ip6srv->sr.last_entry += 1;
+ sub_tlv = (user_plane_sub_tlv_t *) tlv->value;
+ sub_tlv->type = USER_PLANE_SUB_TLV_IE;
+ sub_tlv->length = (u8) ie_size;
+ clib_memcpy_fast (sub_tlv->value, ie_buf, ie_size);
- ip6srv->sr.length += sizeof (ip6_address_t) / 8;
- ip6srv->sr.segments[0] = seg;
+ ip6srv->sr.length += (u8) (tlv_siz / 8);
+ }
- clib_memcpy_fast (&ip6srv->sr.segments[1],
- (u8 *) (sl->rewrite +
- sizeof (ip6_header_t) +
- sizeof (ip6_sr_header_t)),
- vec_len (sl->segments) *
- sizeof (ip6_address_t));
- }
- else
- {
- ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
+ ip6srv->ip.payload_length =
+ clib_host_to_net_u16 (len0 + hdr_len - sizeof (ip6_header_t));
- ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0; /* default FIB */
- ip6srv->sr.segments_left = 1;
- ip6srv->sr.last_entry = 0;
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ srv6_end_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip6srv->ip.src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip6srv->ip.dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+ }
- ip6srv->sr.length =
- ((sizeof (ip6_sr_header_t) +
- sizeof (ip6_address_t)) / 8) - 1;
- ip6srv->sr.flags = 0;
+DONE:
+ return next0;
+}
- ip6srv->sr.tag =
- clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+VLIB_NODE_FN (srv6_t_m_gtp4_d)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ srv6_t_main_v4_decap_t *sm = &srv6_t_main_v4_decap;
+ ip6_sr_main_t *sm2 = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
- ip6srv->sr.segments[0] = seg;
- if (vec_len (sl->segments))
- {
- ip6srv->sr.segments[1] = sl->segments[0];
- ip6srv->sr.length += sizeof (ip6_address_t) / 8;
- ip6srv->sr.last_entry++;
- }
- }
+ ip6_sr_sl_t *sl0;
+ srv6_end_gtp4_d_param_t *ls_param;
- if (PREDICT_TRUE (encap != NULL))
- {
- if (ls_param->nhtype == SRV6_NHTYPE_NONE)
- {
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) == 6)
- ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
- else
- ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 4)
- {
- // Bad encap packet.
- next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 6)
- {
- // Bad encap packet.
- next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
- }
- }
- else
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
- }
- }
- else
- {
- clib_memcpy_fast (ip6srv, &sm->cache_hdr,
- sizeof (ip6_header_t));
+ u32 good_n = 0, bad_n = 0;
- ip6srv->ip.dst_address = seg;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
- if (PREDICT_FALSE (gtpu_type != GTPU_TYPE_GTPU))
- {
- ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
- ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
- ip6srv->sr.tag =
- clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
- ip6srv->sr.segments_left = 0;
- ip6srv->sr.last_entry = 0;
+ u32 next0;
- ip6srv->sr.length = sizeof (ip6_address_t) / 8;
- ip6srv->sr.segments[0] = seg;
- }
- else
+ ip4_gtpu_header_t *hdr;
+ u32 hdrlen;
+ u8 gtpu_type;
+ bool gtp4;
+ bool ipv4;
+
+ // defaults
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sl0 = pool_elt_at_index (sm2->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+ ls_param = (srv6_end_gtp4_d_param_t *) sl0->plugin_mem;
+
+ hdr = vlib_buffer_get_current (b0);
+ gtpu_type = hdr->gtpu.type;
+
+ gtp4 = false;
+ ipv4 = true;
+
+ if (PREDICT_FALSE (gtpu_type != GTPU_TYPE_GTPU || ls_param->drop_in))
+ {
+ gtp4 = true;
+ }
+ else
+ {
+ ip6_header_t *ip6;
+
+ hdrlen = sizeof (ip4_gtpu_header_t);
+
+ if (hdr->gtpu.ver_flags & (GTPU_EXTHDR_FLAG | GTPU_SEQ_FLAG))
+ {
+ hdrlen += sizeof (gtpu_exthdr_t);
+ if (hdr->gtpu.ext->nextexthdr == GTPU_EXTHDR_PDU_SESSION)
{
- if (ls_param->nhtype == SRV6_NHTYPE_NONE)
- {
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) == 6)
- ip6srv->ip.protocol = IP_PROTOCOL_IPV6;
- else
- ip6srv->ip.protocol = IP_PROTOCOL_IP_IN_IP;
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
- {
- ip6srv->ip.protocol = IP_PROTOCOL_IP_IN_IP;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 4)
- {
- // Bad encap packet.
- next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
- {
- ip6srv->ip.protocol = IP_PROTOCOL_IPV6;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 6)
- {
- // Bad encap packet.
- next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
+ gtpu_pdu_session_t *sess;
+ sess = (gtpu_pdu_session_t *) (((char *) hdr) + hdrlen);
+ hdrlen += sizeof (gtpu_pdu_session_t);
+
+ if (sess->u.val & GTPU_PDU_SESSION_P_BIT_MASK)
{
- ip6srv->ip.protocol = IP_PROTOCOL_IP6_ETHERNET;
+ hdrlen += sizeof (gtpu_paging_policy_t);
}
}
}
- ip6srv->ip.src_address = src6;
-
- if (PREDICT_FALSE (ie_size))
+ ip6 = (ip6_header_t *) (((char *) hdr) + hdrlen);
+ if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) == 6)
{
- ip6_sr_tlv_t *tlv;
- user_plane_sub_tlv_t *sub_tlv;
-
- tlv =
- (ip6_sr_tlv_t *) ((u8 *) ip6srv + (hdr_len - tlv_siz));
- tlv->type = SRH_TLV_USER_PLANE_CONTAINER;
- tlv->length = (u8) (tlv_siz - sizeof (ip6_sr_tlv_t));
- clib_memset (tlv->value, 0, tlv->length);
-
- sub_tlv = (user_plane_sub_tlv_t *) tlv->value;
- sub_tlv->type = USER_PLANE_SUB_TLV_IE;
- sub_tlv->length = (u8) ie_size;
- clib_memcpy_fast (sub_tlv->value, ie_buf, ie_size);
-
- ip6srv->sr.length += (u8) (tlv_siz / 8);
+ ipv4 = false;
+ if (((ip6->dst_address.as_u8[0] == 0xff) &&
+ (ip6->dst_address.as_u8[1] == 0x02)) ||
+ ((ip6->dst_address.as_u8[0] == 0xfe) &&
+ ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80)))
+ {
+ // Inner desitnation is IPv6 link local
+ gtp4 = true;
+ }
}
+ }
- ip6srv->ip.payload_length =
- clib_host_to_net_u16 (len0 + hdr_len - sizeof (ip6_header_t));
-
- good_n++;
+ if (gtp4)
+ {
+ next0 = srv6_gtp4_decap_processing (vm, node, b0);
+ if (PREDICT_TRUE (next0 == SRV6_T_M_GTP4_D_NEXT_LOOKUP6))
+ good_n++;
+ else
+ bad_n++;
+ }
+ else
+ {
+ /* Strip off the outer header (IPv4 + GTP + UDP + IEs) */
+ vlib_buffer_advance (b0, (word) hdrlen);
- if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
- PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ if (ipv4)
{
- srv6_end_rewrite_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
- clib_memcpy (tr->src.as_u8, ip6srv->ip.src_address.as_u8,
- sizeof (tr->src.as_u8));
- clib_memcpy (tr->dst.as_u8, ip6srv->ip.dst_address.as_u8,
- sizeof (tr->dst.as_u8));
+ next0 = SRV6_T_M_GTP4_D_NEXT_LOOKUP4;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ ls_param->fib4_index;
+ }
+ else
+ {
+ next0 = SRV6_T_M_GTP4_D_NEXT_LOOKUP6;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ ls_param->fib6_index;
}
}
- DONE:
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
}
@@ -1218,8 +1303,8 @@ VLIB_REGISTER_NODE (srv6_end_m_gtp4_e) =
srv6_end_error_v4_strings,.n_next_nodes =
SRV6_END_M_GTP4_E_N_NEXT,.next_nodes =
{
- [SRV6_END_M_GTP4_E_NEXT_DROP] =
- "error-drop",[SRV6_END_M_GTP4_E_NEXT_LOOKUP] = "ip4-lookup",}
+ [SRV6_END_M_GTP4_E_NEXT_DROP] = "error-drop",
+ [SRV6_END_M_GTP4_E_NEXT_LOOKUP] = "ip4-lookup",}
,};
VLIB_REGISTER_NODE (srv6_t_m_gtp4_d) =
@@ -1230,14 +1315,14 @@ VLIB_REGISTER_NODE (srv6_t_m_gtp4_d) =
srv6_t_error_v4_d_strings,.n_next_nodes =
SRV6_T_M_GTP4_D_N_NEXT,.next_nodes =
{
- [SRV6_T_M_GTP4_D_NEXT_DROP] =
- "error-drop",[SRV6_T_M_GTP4_D_NEXT_LOOKUP] = "ip6-lookup",}
+ [SRV6_T_M_GTP4_D_NEXT_DROP] = "error-drop",
+ [SRV6_T_M_GTP4_D_NEXT_LOOKUP4] = "ip4-lookup",
+ [SRV6_T_M_GTP4_D_NEXT_LOOKUP6] = "ip6-lookup",}
,};
// Function for SRv6 GTP6.E function
-VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (srv6_end_m_gtp6_e)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
srv6_end_main_v6_t *sm = &srv6_end_main_v6;
ip6_sr_main_t *sm2 = &sr_main;
@@ -1261,6 +1346,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
u32 bi0;
vlib_buffer_t *b0;
ip6_sr_localsid_t *ls0;
+ srv6_end_gtp6_e_param_t *ls_param;
ip6srv_combo_header_t *ip6srv0;
ip6_address_t dst0, src0, seg0;
@@ -1284,9 +1370,10 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
- ls0 =
- pool_elt_at_index (sm2->localsids,
- vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls0 = pool_elt_at_index (sm2->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+ ls_param = (srv6_end_gtp6_e_param_t *) ls0->plugin_mem;
ip6srv0 = vlib_buffer_get_current (b0);
dst0 = ip6srv0->ip.dst_address;
@@ -1297,9 +1384,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
len0 = vlib_buffer_length_in_chain (vm, b0);
- if ((ip6srv0->ip.protocol != IPPROTO_IPV6_ROUTE)
- || (len0 <
- sizeof (ip6srv_combo_header_t) + 8 * ip6srv0->sr.length))
+ if ((ip6srv0->ip.protocol != IPPROTO_IPV6_ROUTE) ||
+ (len0 < sizeof (ip6srv_combo_header_t) + 8 * ip6srv0->sr.length))
{
next0 = SRV6_END_M_GTP6_E_NEXT_DROP;
@@ -1313,7 +1399,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
// logic
u32 teid = 0;
- u8 *teid8p = (u8 *) & teid;
+ u8 *teid8p = (u8 *) &teid;
u8 qfi = 0;
u16 seq = 0;
u8 gtpu_type = 0;
@@ -1332,10 +1418,9 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
if (PREDICT_TRUE (shift == 0))
{
qfi = dst0.as_u8[offset];
-
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
clib_memcpy_fast (&seq, &dst0.as_u8[offset + 1], 2);
}
@@ -1351,14 +1436,14 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
qfi |= dst0.as_u8[offset] << shift;
qfi |= dst0.as_u8[offset + 1] >> (8 - shift);
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
- sp = (u8 *) & seq;
+ sp = (u8 *) &seq;
for (index = 0; index < 2; index++)
{
- sp[index] = dst0.as_u8[offset + index + 1] << shift;
+ sp[index] = dst0.as_u8[offset + 1 + index] << shift;
sp[index] |=
dst0.as_u8[offset + index + 2] >> (8 - shift);
}
@@ -1380,9 +1465,9 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
hdrlen =
sizeof (gtpu_exthdr_t) + sizeof (gtpu_pdu_session_t);
}
- else if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ else if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
hdrlen = sizeof (gtpu_exthdr_t);
}
@@ -1402,11 +1487,10 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
if (ext_len >
sizeof (ip6_address_t) * (ip6srv0->sr.last_entry + 1))
{
- tlv =
- (ip6_sr_tlv_t *) ((u8 *) & ip6srv0->sr +
- sizeof (ip6_sr_header_t) +
- sizeof (ip6_address_t) *
- (ip6srv0->sr.last_entry + 1));
+ tlv = (ip6_sr_tlv_t *) ((u8 *) &ip6srv0->sr +
+ sizeof (ip6_sr_header_t) +
+ sizeof (ip6_address_t) *
+ (ip6srv0->sr.last_entry + 1));
if (tlv->type == SRH_TLV_USER_PLANE_CONTAINER)
{
@@ -1422,9 +1506,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
}
}
- vlib_buffer_advance (b0,
- (word) sizeof (ip6srv_combo_header_t) +
- ip6srv0->sr.length * 8);
+ vlib_buffer_advance (b0, (word) sizeof (ip6srv_combo_header_t) +
+ ip6srv0->sr.length * 8);
// get length of encapsulated IPv6 packet (the remaining part)
p = vlib_buffer_get_current (b0);
@@ -1447,37 +1530,9 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
hdr0->gtpu.type = gtpu_type;
- if (qfi)
- {
- u8 type = 0;
- gtpu_pdu_session_t *sess;
-
- hdr0->gtpu.ver_flags |= GTPU_EXTHDR_FLAG;
-
- hdr0->gtpu.ext->seq = 0;
- hdr0->gtpu.ext->npdu_num = 0;
- hdr0->gtpu.ext->nextexthdr = GTPU_EXTHDR_PDU_SESSION;
-
- type = qfi & SRV6_PDU_SESSION_U_BIT_MASK;
-
- qfi =
- ((qfi & SRV6_PDU_SESSION_QFI_MASK) >> 2) |
- ((qfi & SRV6_PDU_SESSION_R_BIT_MASK) << 5);
-
- sess =
- (gtpu_pdu_session_t *) (((char *) hdr0) +
- sizeof (ip6_gtpu_header_t) +
- sizeof (gtpu_exthdr_t));
- sess->exthdrlen = 1;
- sess->type = type;
- sess->spare = 0;
- sess->u.val = qfi;
- sess->nextexthdr = 0;
- }
-
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
hdr0->gtpu.ver_flags |= GTPU_SEQ_FLAG;
hdr0->gtpu.ext->seq = seq;
@@ -1506,29 +1561,57 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
}
}
}
+ else
+ {
+ if (qfi)
+ {
+ hdr0->gtpu.ext->seq = 0;
+ hdr0->gtpu.ext->npdu_num = 0;
+ }
+ }
+
+ if (qfi)
+ {
+ u8 type = 0;
+ gtpu_pdu_session_t *sess;
+
+ hdr0->gtpu.ver_flags |= GTPU_EXTHDR_FLAG;
+
+ hdr0->gtpu.ext->nextexthdr = GTPU_EXTHDR_PDU_SESSION;
+
+ type = qfi & SRV6_PDU_SESSION_U_BIT_MASK;
+
+ qfi = ((qfi & SRV6_PDU_SESSION_QFI_MASK) >> 2) |
+ ((qfi & SRV6_PDU_SESSION_R_BIT_MASK) << 5);
+
+ sess = (gtpu_pdu_session_t *) (((char *) hdr0) +
+ sizeof (ip6_gtpu_header_t) +
+ sizeof (gtpu_exthdr_t));
+ sess->exthdrlen = 1;
+ sess->type = type;
+ sess->spare = 0;
+ sess->u.val = qfi;
+ sess->nextexthdr = 0;
+ }
- hdr0->udp.length = clib_host_to_net_u16 (len0 +
- sizeof (udp_header_t) +
- sizeof
- (gtpu_header_t));
+ hdr0->udp.length = clib_host_to_net_u16 (
+ len0 + sizeof (udp_header_t) + sizeof (gtpu_header_t));
clib_memcpy_fast (hdr0->ip6.src_address.as_u8, src0.as_u8,
sizeof (ip6_address_t));
clib_memcpy_fast (hdr0->ip6.dst_address.as_u8, &seg0.as_u8,
sizeof (ip6_address_t));
- hdr0->ip6.payload_length = clib_host_to_net_u16 (len0 +
- sizeof
- (udp_header_t)
- +
- sizeof
- (gtpu_header_t));
+ hdr0->ip6.payload_length = clib_host_to_net_u16 (
+ len0 + sizeof (udp_header_t) + sizeof (gtpu_header_t));
// UDP source port.
key = hash_memory (p, plen < 40 ? plen : 40, 0);
port = hash_uword_to_u16 (&key);
hdr0->udp.src_port = port;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls_param->fib6_index;
+
good_n++;
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
@@ -1544,11 +1627,12 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
}
}
- vlib_increment_combined_counter
- (((next0 ==
- SRV6_END_M_GTP6_E_NEXT_DROP) ? &(sm2->sr_ls_invalid_counters) :
- &(sm2->sr_ls_valid_counters)), thread_index,
- ls0 - sm2->localsids, 1, vlib_buffer_length_in_chain (vm, b0));
+ vlib_increment_combined_counter (
+ ((next0 == SRV6_END_M_GTP6_E_NEXT_DROP) ?
+ &(sm2->sr_ls_invalid_counters) :
+ &(sm2->sr_ls_valid_counters)),
+ thread_index, ls0 - sm2->localsids, 1,
+ vlib_buffer_length_in_chain (vm, b0));
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
@@ -1567,491 +1651,570 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
}
// Function for SRv6 GTP6.D function
-VLIB_NODE_FN (srv6_end_m_gtp6_d) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+static inline u32
+srv6_gtp6_decap_processing (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t *b0)
{
srv6_end_main_v6_decap_t *sm = &srv6_end_main_v6_decap;
ip6_sr_main_t *sm2 = &sr_main;
- u32 n_left_from, next_index, *from, *to_next;
- u32 thread_index = vm->thread_index;
- u32 good_n = 0, bad_n = 0;
+ ip6_sr_localsid_t *ls0;
+ srv6_end_gtp6_d_param_t *ls_param;
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
+ ip6_gtpu_header_t *hdr0 = NULL;
+ uword len0;
- while (n_left_from > 0)
+ ip6_address_t seg0, src0, dst0;
+ u32 teid = 0;
+ u8 *teidp;
+ u8 gtpu_type = 0;
+ u8 qfi;
+ u8 *qfip = NULL;
+ u16 seq = 0;
+ u8 *seqp;
+ u32 offset, shift;
+ u32 hdrlen;
+ ip6_header_t *encap = NULL;
+ gtpu_pdu_session_t *sess = NULL;
+ int ie_size = 0;
+ u16 tlv_siz = 0;
+ u8 ie_buf[GTPU_IE_MAX_SIZ];
+
+ u32 next0 = SRV6_END_M_GTP6_D_NEXT_LOOKUP6;
+
+ ls0 = pool_elt_at_index (sm2->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+ ls_param = (srv6_end_gtp6_d_param_t *) ls0->plugin_mem;
+
+ hdr0 = vlib_buffer_get_current (b0);
+
+ hdrlen = sizeof (ip6_gtpu_header_t);
+
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ if ((hdr0->ip6.protocol != IP_PROTOCOL_UDP) ||
+ (hdr0->udp.dst_port != clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT)) ||
+ (len0 < sizeof (ip6_gtpu_header_t)))
{
- u32 n_left_to_next;
+ next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
+ }
+ else
+ {
+ seg0 = ls_param->sr_prefix;
+ src0 = hdr0->ip6.src_address;
+ dst0 = hdr0->ip6.dst_address;
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ gtpu_type = hdr0->gtpu.type;
- while (n_left_from > 0 && n_left_to_next > 0)
+ teid = hdr0->gtpu.teid;
+ teidp = (u8 *) &teid;
+
+ seqp = (u8 *) &seq;
+
+ if (hdr0->gtpu.ver_flags & (GTPU_EXTHDR_FLAG | GTPU_SEQ_FLAG))
{
- u32 bi0;
- vlib_buffer_t *b0;
- ip6_sr_localsid_t *ls0;
- srv6_end_gtp6_param_t *ls_param;
+ // Extention header.
+ hdrlen += sizeof (gtpu_exthdr_t);
- ip6_gtpu_header_t *hdr0 = NULL;
- uword len0;
+ seq = hdr0->gtpu.ext->seq;
- ip6_address_t seg0, src0;
- u32 teid = 0;
- u8 *teidp;
- u8 gtpu_type = 0;
- u8 qfi;
- u8 *qfip = NULL;
- u16 seq = 0;
- u8 *seqp;
- u32 offset, shift;
- u32 hdrlen;
- ip6_header_t *encap = NULL;
- gtpu_pdu_session_t *sess = NULL;
- int ie_size = 0;
- u16 tlv_siz = 0;
- u8 ie_buf[GTPU_IE_MAX_SIZ];
+ if (hdr0->gtpu.ext->nextexthdr == GTPU_EXTHDR_PDU_SESSION)
+ {
+ // PDU Session Container.
+ sess = (gtpu_pdu_session_t *) (((char *) hdr0) +
+ sizeof (ip6_gtpu_header_t) +
+ sizeof (gtpu_exthdr_t));
+ qfi = sess->u.val & ~GTPU_PDU_SESSION_P_BIT_MASK;
+ qfip = (u8 *) &qfi;
- u32 next0 = SRV6_END_M_GTP6_D_NEXT_LOOKUP;
+ hdrlen += sizeof (gtpu_pdu_session_t);
- // defaults
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
+ if (sess->u.val & GTPU_PDU_SESSION_P_BIT_MASK)
+ {
+ hdrlen += sizeof (gtpu_paging_policy_t);
+ }
+ }
+ }
- b0 = vlib_get_buffer (vm, bi0);
- ls0 =
- pool_elt_at_index (sm2->localsids,
- vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ offset = ls_param->sr_prefixlen / 8;
+ shift = ls_param->sr_prefixlen % 8;
- ls_param = (srv6_end_gtp6_param_t *) ls0->plugin_mem;
+ if (PREDICT_TRUE (shift == 0))
+ {
+ if (qfip)
+ {
+ qfi = ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
+ ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
- hdr0 = vlib_buffer_get_current (b0);
+ if (sess->type)
+ {
+ qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
+ }
- hdrlen = sizeof (ip6_gtpu_header_t);
+ seg0.as_u8[offset] = qfi;
+ }
- len0 = vlib_buffer_length_in_chain (vm, b0);
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ {
+ clib_memcpy_fast (&seg0.as_u8[offset + 1], seqp, 2);
+ }
+ else
+ {
+ clib_memcpy_fast (&seg0.as_u8[offset + 1], teidp, 4);
+ }
+ }
+ else
+ {
+ int idx;
- if ((hdr0->ip6.protocol != IP_PROTOCOL_UDP)
- || (hdr0->udp.dst_port !=
- clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT))
- || (len0 < sizeof (ip6_gtpu_header_t)))
+ if (qfip)
{
- next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
+ qfi = ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
+ ((qfi & ~GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
- bad_n++;
+ if (sess->type)
+ {
+ qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
+ }
+
+ seg0.as_u8[offset] |= qfi >> shift;
+ seg0.as_u8[offset + 1] |= qfi << (8 - shift);
+ }
+
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ {
+ for (idx = 0; idx < 2; idx++)
+ {
+ seg0.as_u8[offset + idx + 1] |= seqp[idx] >> shift;
+ seg0.as_u8[offset + idx + 2] |= seqp[idx] << (8 - shift);
+ }
}
else
{
- seg0 = ls_param->sr_prefix;
- src0 = hdr0->ip6.src_address;
+ for (idx = 0; idx < 4; idx++)
+ {
+ seg0.as_u8[offset + idx + 1] |= teidp[idx] >> shift;
+ seg0.as_u8[offset + idx + 2] |= teidp[idx] << (8 - shift);
+ }
+ }
+ }
- gtpu_type = hdr0->gtpu.type;
+ if (PREDICT_FALSE (gtpu_type == GTPU_TYPE_ERROR_INDICATION))
+ {
+ u16 payload_len;
- teid = hdr0->gtpu.teid;
- teidp = (u8 *) & teid;
+ payload_len = clib_net_to_host_u16 (hdr0->gtpu.length);
+ if (payload_len != 0)
+ {
+ ie_size = payload_len - (hdrlen - sizeof (ip6_gtpu_header_t));
+ if (ie_size > 0)
+ {
+ u8 *ies;
- seqp = (u8 *) & seq;
+ ies = (u8 *) ((u8 *) hdr0 + hdrlen);
+ clib_memcpy_fast (ie_buf, ies, ie_size);
+ hdrlen += ie_size;
+ }
+ }
+ }
- if (hdr0->gtpu.ver_flags & (GTPU_EXTHDR_FLAG | GTPU_SEQ_FLAG))
- {
- // Extention header.
- hdrlen += sizeof (gtpu_exthdr_t);
+ // jump over variable length data
+ vlib_buffer_advance (b0, (word) hdrlen);
- seq = hdr0->gtpu.ext->seq;
+ // get length of encapsulated IPv6 packet (the remaining part)
+ len0 = vlib_buffer_length_in_chain (vm, b0);
- if (hdr0->gtpu.ext->nextexthdr == GTPU_EXTHDR_PDU_SESSION)
- {
- // PDU Session Container.
- sess =
- (gtpu_pdu_session_t *) (((char *) hdr0) +
- sizeof (ip6_gtpu_header_t) +
- sizeof (gtpu_exthdr_t));
- qfi = sess->u.val & ~GTPU_PDU_SESSION_P_BIT_MASK;
- qfip = (u8 *) & qfi;
+ if (PREDICT_TRUE (gtpu_type == GTPU_TYPE_GTPU))
+ {
+ encap = vlib_buffer_get_current (b0);
+ }
- hdrlen += sizeof (gtpu_pdu_session_t);
+ uword *p;
+ ip6srv_combo_header_t *ip6srv;
+ ip6_sr_policy_t *sr_policy = NULL;
+ ip6_sr_sl_t *sl = NULL;
+ u32 *sl_index;
+ u32 hdr_len;
- if (sess->u.val & GTPU_PDU_SESSION_P_BIT_MASK)
- {
- hdrlen += sizeof (gtpu_paging_policy_t);
- }
- }
- }
+ p = mhash_get (&sm2->sr_policies_index_hash, &ls_param->sr_prefix);
+ if (p)
+ {
+ sr_policy = pool_elt_at_index (sm2->sr_policies, p[0]);
+ }
- offset = ls_param->sr_prefixlen / 8;
- shift = ls_param->sr_prefixlen % 8;
+ if (sr_policy)
+ {
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ sl = pool_elt_at_index (sm2->sid_lists, *sl_index);
+ if (sl != NULL)
+ break;
+ }
+ }
- if (PREDICT_TRUE (shift == 0))
- {
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
- {
- clib_memcpy_fast (&seg0.as_u8[offset + 1], seqp, 2);
- }
- else
- {
- clib_memcpy_fast (&seg0.as_u8[offset + 1], teidp, 4);
- }
+ if (sl)
+ {
+ hdr_len = sizeof (ip6srv_combo_header_t);
+ hdr_len += vec_len (sl->segments) * sizeof (ip6_address_t);
+ hdr_len += sizeof (ip6_address_t) * 2;
+ }
+ else
+ {
+ hdr_len = sizeof (ip6_header_t);
+ hdr_len += sizeof (ip6_sr_header_t);
+ hdr_len += sizeof (ip6_address_t) * 2;
+ }
- if (qfip)
- {
- qfi =
- ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
- ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
+ if (ie_size)
+ {
+ tlv_siz =
+ sizeof (ip6_sr_tlv_t) + sizeof (user_plane_sub_tlv_t) + ie_size;
- if (sess->type)
- {
- qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
- }
+ tlv_siz = (tlv_siz & ~0x07) + (tlv_siz & 0x07 ? 0x08 : 0x0);
+ hdr_len += tlv_siz;
+ }
- seg0.as_u8[offset] = qfi;
- }
- }
- else
- {
- int idx;
+ // jump back to data[0] or pre_data if required
+ vlib_buffer_advance (b0, -(word) hdr_len);
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
- {
- for (idx = 0; idx < 2; idx++)
- {
- seg0.as_u8[offset + idx + 1] |= seqp[idx] >> shift;
- seg0.as_u8[offset + idx + 2] |=
- seqp[idx] << (8 - shift);
- }
- }
- else
- {
- for (idx = 0; idx < 4; idx++)
- {
- seg0.as_u8[offset + idx + 1] |= teidp[idx] >> shift;
- seg0.as_u8[offset + idx + 2] |=
- teidp[idx] << (8 - shift);
- }
- }
+ ip6srv = vlib_buffer_get_current (b0);
- if (qfip)
- {
- qfi =
- ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
- ((qfi & ~GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
+ if (sl)
+ {
+ clib_memcpy_fast (ip6srv, sl->rewrite, vec_len (sl->rewrite));
- if (sess->type)
- {
- qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
- }
+ if (vec_len (sl->segments) > 1)
+ {
+ ip6srv->ip.src_address = src0;
- seg0.as_u8[offset] |= qfi >> shift;
- seg0.as_u8[offset + 1] |= qfi << (8 - shift);
- }
- }
+ ip6srv->sr.tag = clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
- if (PREDICT_FALSE (gtpu_type == GTPU_TYPE_ERROR_INDICATION))
- {
- u16 payload_len;
+ ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
+ ip6srv->sr.segments_left += 2;
+ ip6srv->sr.last_entry += 2;
- payload_len = clib_net_to_host_u16 (hdr0->gtpu.length);
- if (payload_len != 0)
- {
- ie_size =
- payload_len - (hdrlen - sizeof (ip6_gtpu_header_t));
- if (ie_size > 0)
- {
- u8 *ies;
+ ip6srv->sr.length += (sizeof (ip6_address_t) * 2) / 8;
+ ip6srv->sr.segments[0] = dst0;
+ ip6srv->sr.segments[1] = seg0;
- ies = (u8 *) ((u8 *) hdr0 + hdrlen);
- clib_memcpy_fast (ie_buf, ies, ie_size);
- hdrlen += ie_size;
- }
- }
- }
+ clib_memcpy_fast (&ip6srv->sr.segments[2],
+ (u8 *) (sl->rewrite + sizeof (ip6_header_t) +
+ sizeof (ip6_sr_header_t)),
+ vec_len (sl->segments) *
+ sizeof (ip6_address_t));
+ }
+ else
+ {
+ ip6srv->ip.src_address = src0;
+ ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
- // jump over variable length data
- vlib_buffer_advance (b0, (word) hdrlen);
+ ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
+ ip6srv->sr.segments_left = 2;
+ ip6srv->sr.last_entry = 1;
+ ip6srv->sr.length = (sizeof (ip6_address_t) * 2) / 8;
+ ip6srv->sr.flags = 0;
- // get length of encapsulated IPv6 packet (the remaining part)
- len0 = vlib_buffer_length_in_chain (vm, b0);
+ ip6srv->sr.tag = clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
- if (PREDICT_TRUE (gtpu_type == GTPU_TYPE_GTPU))
+ ip6srv->sr.segments[0] = dst0;
+ ip6srv->sr.segments[1] = seg0;
+
+ if (vec_len (sl->segments))
{
- encap = vlib_buffer_get_current (b0);
+ ip6srv->sr.segments[2] = sl->segments[0];
+ ip6srv->sr.last_entry++;
+ ip6srv->sr.length += sizeof (ip6_address_t) / 8;
}
+ }
- uword *p;
- ip6srv_combo_header_t *ip6srv;
- ip6_sr_policy_t *sr_policy = NULL;
- ip6_sr_sl_t *sl = NULL;
- u32 *sl_index;
- u32 hdr_len;
-
- p =
- mhash_get (&sm2->sr_policies_index_hash,
- &ls_param->sr_prefix);
- if (p)
+ if (PREDICT_TRUE (encap != NULL))
+ {
+ if (ls_param->nhtype == SRV6_NHTYPE_NONE)
{
- sr_policy = pool_elt_at_index (sm2->sr_policies, p[0]);
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) == 6)
+ ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
+ else
+ ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
}
-
- if (sr_policy)
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
{
- vec_foreach (sl_index, sr_policy->segments_lists)
- {
- sl = pool_elt_at_index (sm2->sid_lists, *sl_index);
- if (sl != NULL)
- break;
- }
+ ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 4)
+ {
+ // Bad encap packet.
+ next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
+ goto DONE;
+ }
+ }
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
+ {
+ ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6)
+ {
+ // Bad encap packet.
+ next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
+ goto DONE;
+ }
+ }
+ else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
+ {
+ ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
}
+ }
+ else
+ {
+ ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
+ }
+ }
+ else
+ {
+ clib_memcpy_fast (ip6srv, &sm->cache_hdr, sizeof (ip6_header_t));
- if (sl)
+ ip6srv->ip.src_address = src0;
+ ip6srv->ip.dst_address = seg0;
+
+ ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
+
+ ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
+ ip6srv->sr.segments_left = 1;
+ ip6srv->sr.last_entry = 1;
+
+ ip6srv->sr.length = (sizeof (ip6_address_t) * 2) / 8;
+ ip6srv->sr.segments[0] = dst0;
+ ip6srv->sr.segments[1] = seg0;
+
+ if (PREDICT_FALSE (gtpu_type) != GTPU_TYPE_GTPU)
+ {
+ ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
+ ip6srv->sr.tag = clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+ }
+ else
+ {
+ if (ls_param->nhtype == SRV6_NHTYPE_NONE)
{
- hdr_len = sizeof (ip6srv_combo_header_t);
- hdr_len += vec_len (sl->segments) * sizeof (ip6_address_t);
- hdr_len += sizeof (ip6_address_t);
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6)
+ ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
}
- else
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
{
- hdr_len = sizeof (ip6_header_t);
- if (PREDICT_FALSE (gtpu_type) != GTPU_TYPE_GTPU)
+ ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 4)
{
- hdr_len += sizeof (ip6_sr_header_t);
- hdr_len += sizeof (ip6_address_t);
+ // Bad encap packet.
+ next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
+ goto DONE;
}
}
-
- if (ie_size)
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
{
- tlv_siz =
- sizeof (ip6_sr_tlv_t) + sizeof (user_plane_sub_tlv_t) +
- ie_size;
-
- tlv_siz = (tlv_siz & ~0x07) + (tlv_siz & 0x07 ? 0x08 : 0x0);
- hdr_len += tlv_siz;
+ ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6)
+ {
+ // Bad encap packet.
+ next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
+ goto DONE;
+ }
+ }
+ else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
+ {
+ ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
}
+ }
+ }
- // jump back to data[0] or pre_data if required
- vlib_buffer_advance (b0, -(word) hdr_len);
+ if (PREDICT_FALSE (ie_size))
+ {
+ ip6_sr_tlv_t *tlv;
+ user_plane_sub_tlv_t *sub_tlv;
- ip6srv = vlib_buffer_get_current (b0);
+ tlv = (ip6_sr_tlv_t *) ((u8 *) ip6srv + (hdr_len - tlv_siz));
+ tlv->type = SRH_TLV_USER_PLANE_CONTAINER;
+ tlv->length = (u8) (tlv_siz - sizeof (ip6_sr_tlv_t));
+ clib_memset (tlv->value, 0, tlv->length);
- if (sl)
- {
- clib_memcpy_fast (ip6srv, sl->rewrite,
- vec_len (sl->rewrite));
+ sub_tlv = (user_plane_sub_tlv_t *) tlv->value;
+ sub_tlv->type = USER_PLANE_SUB_TLV_IE;
+ sub_tlv->length = (u8) ie_size;
+ clib_memcpy_fast (sub_tlv->value, ie_buf, ie_size);
- if (vec_len (sl->segments) > 1)
- {
- ip6srv->ip.src_address = src0;
+ ip6srv->sr.length += (u8) (tlv_siz / 8);
+ }
- ip6srv->sr.tag =
- clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+ ip6srv->ip.payload_length =
+ clib_host_to_net_u16 (len0 + hdr_len - sizeof (ip6_header_t));
- ip6srv->sr.segments_left += 1;
- ip6srv->sr.last_entry += 1;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0; /* default FIB */
- ip6srv->sr.length += sizeof (ip6_address_t) / 8;
- ip6srv->sr.segments[0] = seg0;
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ srv6_end_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip6srv->ip.src_address.as_u8,
+ sizeof (ip6_address_t));
+ clib_memcpy (tr->dst.as_u8, ip6srv->ip.dst_address.as_u8,
+ sizeof (ip6_address_t));
+ tr->teid = teid;
+ clib_memcpy (tr->sr_prefix.as_u8, ls_param->sr_prefix.as_u8,
+ sizeof (ip6_address_t));
+ tr->sr_prefixlen = ls_param->sr_prefixlen;
+ }
+ }
- clib_memcpy_fast (&ip6srv->sr.segments[1],
- (u8 *) (sl->rewrite +
- sizeof (ip6_header_t) +
- sizeof (ip6_sr_header_t)),
- vec_len (sl->segments) *
- sizeof (ip6_address_t));
- }
- else
- {
- ip6srv->ip.src_address = src0;
- ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
+DONE:
+ return next0;
+}
- ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
- ip6srv->sr.segments_left = 1;
- ip6srv->sr.last_entry = 0;
- ip6srv->sr.length =
- ((sizeof (ip6_sr_header_t) +
- sizeof (ip6_address_t)) / 8) - 1;
- ip6srv->sr.flags = 0;
+VLIB_NODE_FN (srv6_end_m_gtp6_d)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ srv6_end_main_v6_decap_t *sm = &srv6_end_main_v6_decap;
+ ip6_sr_main_t *sm2 = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
+ u32 thread_index = vm->thread_index;
+ ip6_sr_localsid_t *ls0;
+ srv6_end_gtp6_d_param_t *ls_param;
- ip6srv->sr.tag =
- clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+ u32 good_n = 0, bad_n = 0;
- ip6srv->sr.segments[0] = seg0;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
- if (vec_len (sl->segments))
- {
- ip6srv->sr.segments[1] = sl->segments[0];
- ip6srv->sr.last_entry++;
- ip6srv->sr.length += sizeof (ip6_address_t) / 8;
- }
- }
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
- if (PREDICT_TRUE (encap != NULL))
- {
- if (ls_param->nhtype == SRV6_NHTYPE_NONE)
- {
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) == 6)
- ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
- else
- ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 4)
- {
- // Bad encap packet.
- next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 6)
- {
- // Bad encap packet.
- next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
- }
- }
- else
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
- }
- }
- else
- {
- clib_memcpy_fast (ip6srv, &sm->cache_hdr,
- sizeof (ip6_header_t));
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
- ip6srv->ip.src_address = src0;
- ip6srv->ip.dst_address = seg0;
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
- if (PREDICT_FALSE (gtpu_type) != GTPU_TYPE_GTPU)
- {
- ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
+ u32 next0;
- ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
+ ip6_gtpu_header_t *hdr;
+ u32 hdrlen;
+ u8 gtpu_type;
+ bool gtp6;
+ bool ipv4;
- ip6srv->sr.tag =
- clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+ // defaults
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
- ip6srv->sr.segments_left = 0;
- ip6srv->sr.last_entry = 0;
+ ls0 = pool_elt_at_index (sm2->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
- ip6srv->sr.length = sizeof (ip6_address_t) / 8;
- ip6srv->sr.segments[0] = seg0;
- }
- else
+ ls_param = (srv6_end_gtp6_d_param_t *) ls0->plugin_mem;
+
+ hdr = vlib_buffer_get_current (b0);
+ gtpu_type = hdr->gtpu.type;
+
+ gtp6 = false;
+ ipv4 = true;
+
+ if (PREDICT_FALSE (gtpu_type != GTPU_TYPE_GTPU || ls_param->drop_in))
+ {
+ gtp6 = true;
+ }
+ else
+ {
+ ip6_header_t *ip6;
+
+ hdrlen = sizeof (ip6_gtpu_header_t);
+
+ if (hdr->gtpu.ver_flags & (GTPU_EXTHDR_FLAG | GTPU_SEQ_FLAG))
+ {
+ hdrlen += sizeof (gtpu_exthdr_t);
+ if (hdr->gtpu.ext->nextexthdr == GTPU_EXTHDR_PDU_SESSION)
{
- if (ls_param->nhtype == SRV6_NHTYPE_NONE)
- {
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 6)
- ip6srv->ip.protocol = IP_PROTOCOL_IP_IN_IP;
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
- {
- ip6srv->ip.protocol = IP_PROTOCOL_IP_IN_IP;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 4)
- {
- // Bad encap packet.
- next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
- {
- ip6srv->ip.protocol = IP_PROTOCOL_IPV6;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 6)
- {
- // Bad encap packet.
- next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
+ gtpu_pdu_session_t *sess;
+ sess = (gtpu_pdu_session_t *) (((char *) hdr) + hdrlen);
+ hdrlen += sizeof (gtpu_pdu_session_t);
+
+ if (sess->u.val & GTPU_PDU_SESSION_P_BIT_MASK)
{
- ip6srv->ip.protocol = IP_PROTOCOL_IP6_ETHERNET;
+ hdrlen += sizeof (gtpu_paging_policy_t);
}
}
}
- if (PREDICT_FALSE (ie_size))
+ ip6 = (ip6_header_t *) (((char *) hdr) + hdrlen);
+ if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) == 6)
{
- ip6_sr_tlv_t *tlv;
- user_plane_sub_tlv_t *sub_tlv;
-
- tlv =
- (ip6_sr_tlv_t *) ((u8 *) ip6srv + (hdr_len - tlv_siz));
- tlv->type = SRH_TLV_USER_PLANE_CONTAINER;
- tlv->length = (u8) (tlv_siz - sizeof (ip6_sr_tlv_t));
- clib_memset (tlv->value, 0, tlv->length);
-
- sub_tlv = (user_plane_sub_tlv_t *) tlv->value;
- sub_tlv->type = USER_PLANE_SUB_TLV_IE;
- sub_tlv->length = (u8) ie_size;
- clib_memcpy_fast (sub_tlv->value, ie_buf, ie_size);
-
- ip6srv->sr.length += (u8) (tlv_siz / 8);
+ ipv4 = false;
+ if (((ip6->dst_address.as_u8[0] == 0xff) &&
+ (ip6->dst_address.as_u8[1] == 0x02)) ||
+ ((ip6->dst_address.as_u8[0] == 0xfe) &&
+ ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80)))
+ {
+ // Inner desitnation is IPv6 link local
+ gtp6 = true;
+ }
}
+ }
- ip6srv->ip.payload_length =
- clib_host_to_net_u16 (len0 + hdr_len - sizeof (ip6_header_t));
-
- good_n++;
+ if (gtp6)
+ {
+ next0 = srv6_gtp6_decap_processing (vm, node, b0);
+ if (PREDICT_TRUE (next0 == SRV6_END_M_GTP6_D_NEXT_LOOKUP6))
+ good_n++;
+ else
+ bad_n++;
+
+ vlib_increment_combined_counter (
+ ((next0 == SRV6_END_M_GTP6_D_NEXT_DROP) ?
+ &(sm2->sr_ls_invalid_counters) :
+ &(sm2->sr_ls_valid_counters)),
+ thread_index, ls0 - sm2->localsids, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ }
+ else
+ {
+ /* Strip off the outer header (IPv6 + GTP + UDP + IEs) */
+ vlib_buffer_advance (b0, (word) hdrlen);
- if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
- PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ if (ipv4)
{
- srv6_end_rewrite_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
- clib_memcpy (tr->src.as_u8, ip6srv->ip.src_address.as_u8,
- sizeof (ip6_address_t));
- clib_memcpy (tr->dst.as_u8, ip6srv->ip.dst_address.as_u8,
- sizeof (ip6_address_t));
- tr->teid = teid;
- clib_memcpy (tr->sr_prefix.as_u8, ls_param->sr_prefix.as_u8,
- sizeof (ip6_address_t));
- tr->sr_prefixlen = ls_param->sr_prefixlen;
+ next0 = SRV6_END_M_GTP6_D_NEXT_LOOKUP4;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ ls_param->fib4_index;
+ }
+ else
+ {
+ next0 = SRV6_END_M_GTP6_D_NEXT_LOOKUP6;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ ls_param->fib6_index;
}
}
- DONE:
- vlib_increment_combined_counter
- (((next0 ==
- SRV6_END_M_GTP6_D_NEXT_DROP) ? &(sm2->sr_ls_invalid_counters) :
- &(sm2->sr_ls_valid_counters)), thread_index,
- ls0 - sm2->localsids, 1, vlib_buffer_length_in_chain (vm, b0));
-
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
}
@@ -2069,15 +2232,14 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d) (vlib_main_t * vm,
}
// Function for SRv6 GTP6.D.DI function
-VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (srv6_end_m_gtp6_d_di)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
srv6_end_main_v6_decap_di_t *sm = &srv6_end_main_v6_decap_di;
ip6_sr_main_t *sm2 = &sr_main;
u32 n_left_from, next_index, *from, *to_next;
u32 thread_index = vm->thread_index;
- srv6_end_gtp6_param_t *ls_param;
+ srv6_end_gtp6_d_param_t *ls_param;
u32 good_n = 0, bad_n = 0;
@@ -2129,11 +2291,10 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
- ls0 =
- pool_elt_at_index (sm2->localsids,
- vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls0 = pool_elt_at_index (sm2->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
- ls_param = (srv6_end_gtp6_param_t *) ls0->plugin_mem;
+ ls_param = (srv6_end_gtp6_d_param_t *) ls0->plugin_mem;
hdr0 = vlib_buffer_get_current (b0);
@@ -2141,10 +2302,10 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
len0 = vlib_buffer_length_in_chain (vm, b0);
- if ((hdr0->ip6.protocol != IP_PROTOCOL_UDP)
- || (hdr0->udp.dst_port !=
- clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT))
- || (len0 < sizeof (ip6_gtpu_header_t)))
+ if ((hdr0->ip6.protocol != IP_PROTOCOL_UDP) ||
+ (hdr0->udp.dst_port !=
+ clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT)) ||
+ (len0 < sizeof (ip6_gtpu_header_t)))
{
next0 = SRV6_END_M_GTP6_D_DI_NEXT_DROP;
@@ -2159,9 +2320,9 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
seg0 = ls_param->sr_prefix;
teid = hdr0->gtpu.teid;
- teidp = (u8 *) & teid;
+ teidp = (u8 *) &teid;
- seqp = (u8 *) & seq;
+ seqp = (u8 *) &seq;
if (hdr0->gtpu.ver_flags & (GTPU_EXTHDR_FLAG | GTPU_SEQ_FLAG))
{
@@ -2173,8 +2334,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
if (hdr0->gtpu.ext->nextexthdr == GTPU_EXTHDR_PDU_SESSION)
{
// PDU Session Container.
- sess =
- (gtpu_pdu_session_t *) (((char *) hdr0) + hdrlen);
+ sess = (gtpu_pdu_session_t *) (((char *) hdr0) + hdrlen);
qfi = sess->u.val & ~GTPU_PDU_SESSION_P_BIT_MASK;
qfip = &qfi;
@@ -2190,71 +2350,70 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
offset = ls_param->sr_prefixlen / 8;
shift = ls_param->sr_prefixlen % 8;
+ offset += 1;
if (PREDICT_TRUE (shift == 0))
{
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
- clib_memcpy_fast (&seg0.as_u8[offset + 1], seqp, 2);
+ clib_memcpy_fast (&seg0.as_u8[offset], seqp, 2);
}
else
{
- clib_memcpy_fast (&seg0.as_u8[offset + 1], teidp, 4);
+ clib_memcpy_fast (&seg0.as_u8[offset], teidp, 4);
}
if (qfip)
{
- qfi =
- ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
- ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
+ qfi = ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
+ ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
if (sess->type)
{
qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
}
- seg0.as_u8[offset] = qfi;
+ seg0.as_u8[offset + 4] = qfi;
}
}
else
{
int idx;
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
for (idx = 0; idx < 2; idx++)
{
- seg0.as_u8[offset + idx + 1] |= seqp[idx] >> shift;
- seg0.as_u8[offset + idx + 2] |=
- seqp[idx] << (8 - shift);
+ seg0.as_u8[offset + idx] |= seqp[idx] >> shift;
+ seg0.as_u8[offset + idx + 1] |= seqp[idx]
+ << (8 - shift);
}
}
else
{
for (idx = 0; idx < 4; idx++)
{
- seg0.as_u8[offset + idx + 1] |= teidp[idx] >> shift;
- seg0.as_u8[offset + idx + 2] |=
- teidp[idx] << (8 - shift);
+ seg0.as_u8[offset + idx] |= teidp[idx] >> shift;
+ seg0.as_u8[offset + idx + 1] |= teidp[idx]
+ << (8 - shift);
}
}
if (qfip)
{
- qfi =
- ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
- ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
+ qfi = ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
+ ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
if (sess->type)
{
qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
}
- seg0.as_u8[offset] |= qfi >> shift;
- seg0.as_u8[offset + 1] |= qfi << (8 - shift);
+ seg0.as_u8[offset + 4] |= qfi >> shift;
+ seg0.as_u8[offset + 5] |= qfi << (8 - shift);
}
}
@@ -2297,8 +2456,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
u32 hdr_len;
p =
- mhash_get (&sm2->sr_policies_index_hash,
- &ls_param->sr_prefix);
+ mhash_get (&sm2->sr_policies_index_hash, &ls_param->sr_prefix);
if (p)
{
sr_policy = pool_elt_at_index (sm2->sr_policies, p[0]);
@@ -2307,11 +2465,11 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
if (sr_policy)
{
vec_foreach (sl_index, sr_policy->segments_lists)
- {
- sl = pool_elt_at_index (sm2->sid_lists, *sl_index);
- if (sl != NULL)
- break;
- }
+ {
+ sl = pool_elt_at_index (sm2->sid_lists, *sl_index);
+ if (sl != NULL)
+ break;
+ }
}
hdr_len = sizeof (ip6srv_combo_header_t);
@@ -2323,9 +2481,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
if (ie_size)
{
- tlv_siz =
- sizeof (ip6_sr_tlv_t) + sizeof (user_plane_sub_tlv_t) +
- ie_size;
+ tlv_siz = sizeof (ip6_sr_tlv_t) +
+ sizeof (user_plane_sub_tlv_t) + ie_size;
tlv_siz = (tlv_siz & ~0x07) + (tlv_siz & 0x07 ? 0x08 : 0x0);
hdr_len += tlv_siz;
@@ -2356,12 +2513,11 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
ip6srv->sr.segments[0] = dst0;
ip6srv->sr.segments[1] = seg0;
- clib_memcpy_fast (&ip6srv->sr.segments[2],
- (u8 *) (sl->rewrite +
- sizeof (ip6_header_t) +
- sizeof (ip6_sr_header_t)),
- vec_len (sl->segments) *
- sizeof (ip6_address_t));
+ clib_memcpy_fast (
+ &ip6srv->sr.segments[2],
+ (u8 *) (sl->rewrite + sizeof (ip6_header_t) +
+ sizeof (ip6_sr_header_t)),
+ vec_len (sl->segments) * sizeof (ip6_address_t));
}
else
{
@@ -2371,9 +2527,10 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
ip6srv->sr.segments_left = 2;
ip6srv->sr.last_entry = 1;
- ip6srv->sr.length =
- ((sizeof (ip6_sr_header_t) +
- 2 * sizeof (ip6_address_t)) / 8) - 1;
+ ip6srv->sr.length = ((sizeof (ip6_sr_header_t) +
+ 2 * sizeof (ip6_address_t)) /
+ 8) -
+ 1;
ip6srv->sr.flags = 0;
ip6srv->sr.tag =
@@ -2402,8 +2559,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
ip6srv->sr.segments_left = 1;
ip6srv->sr.last_entry = 0;
ip6srv->sr.length =
- ((sizeof (ip6_sr_header_t) +
- sizeof (ip6_address_t)) / 8) - 1;
+ ((sizeof (ip6_sr_header_t) + sizeof (ip6_address_t)) / 8) -
+ 1;
ip6srv->sr.flags = 0;
ip6srv->sr.tag =
@@ -2417,8 +2574,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
ip6_sr_tlv_t *tlv;
user_plane_sub_tlv_t *sub_tlv;
- tlv =
- (ip6_sr_tlv_t *) ((u8 *) ip6srv + (hdr_len - tlv_siz));
+ tlv = (ip6_sr_tlv_t *) ((u8 *) ip6srv + (hdr_len - tlv_siz));
tlv->type = SRH_TLV_USER_PLANE_CONTAINER;
tlv->length = (u8) (tlv_siz - sizeof (ip6_sr_tlv_t));
clib_memset (tlv->value, 0, tlv->length);
@@ -2438,8 +2594,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
{
if (ls_param->nhtype == SRV6_NHTYPE_NONE)
{
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label) >>
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
28) == 6)
ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
else
@@ -2448,8 +2604,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
{
ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label) >>
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
28) != 4)
{
// Bad encap packet.
@@ -2461,8 +2617,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
{
ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label) >>
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
28) != 6)
{
// Bad encap packet.
@@ -2500,12 +2656,12 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
}
DONE:
- vlib_increment_combined_counter
- (((next0 ==
- SRV6_END_M_GTP6_D_DI_NEXT_DROP) ?
- &(sm2->sr_ls_invalid_counters) : &(sm2->sr_ls_valid_counters)),
- thread_index, ls0 - sm2->localsids, 1,
- vlib_buffer_length_in_chain (vm, b0));
+ vlib_increment_combined_counter (
+ ((next0 == SRV6_END_M_GTP6_D_DI_NEXT_DROP) ?
+ &(sm2->sr_ls_invalid_counters) :
+ &(sm2->sr_ls_valid_counters)),
+ thread_index, ls0 - sm2->localsids, 1,
+ vlib_buffer_length_in_chain (vm, b0));
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
@@ -2524,9 +2680,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
}
// Function for SRv6 GTP6.DT function
-VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (srv6_end_m_gtp6_dt)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
srv6_end_main_v6_dt_t *sm = &srv6_end_main_v6_dt;
ip6_sr_main_t *sm2 = &sr_main;
@@ -2570,9 +2725,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
- ls0 =
- pool_elt_at_index (sm2->localsids,
- vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls0 = pool_elt_at_index (sm2->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
ls_param = (srv6_end_gtp6_dt_param_t *) ls0->plugin_mem;
@@ -2582,10 +2736,10 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
len0 = vlib_buffer_length_in_chain (vm, b0);
- if ((hdr0->ip6.protocol != IP_PROTOCOL_UDP)
- || (hdr0->udp.dst_port !=
- clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT))
- || (len0 < sizeof (ip6_gtpu_header_t)))
+ if ((hdr0->ip6.protocol != IP_PROTOCOL_UDP) ||
+ (hdr0->udp.dst_port !=
+ clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT)) ||
+ (len0 < sizeof (ip6_gtpu_header_t)))
{
next0 = SRV6_END_M_GTP6_DT_NEXT_DROP;
@@ -2638,9 +2792,9 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
else if (ls_param->type == SRV6_GTP6_DT6)
{
ip6 = (ip6_header_t *) ((u8 *) hdr0 + hdrlen);
- if ((clib_net_to_host_u32
- (ip6->ip_version_traffic_class_and_flow_label) >> 28)
- != 6)
+ if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6)
{
next0 = SRV6_END_M_GTP6_DT_NEXT_DROP;
bad_n++;
@@ -2648,8 +2802,10 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
}
next0 = SRV6_END_M_GTP6_DT_NEXT_LOOKUP6;
- if ((ip6->dst_address.as_u8[0] == 0xff)
- && ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80))
+ if (((ip6->dst_address.as_u8[0] == 0xff) &&
+ (ip6->dst_address.as_u8[1] == 0x02)) ||
+ ((ip6->dst_address.as_u8[0] == 0xfe) &&
+ ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80)))
{
vnet_buffer (b0)->sw_if_index[VLIB_TX] =
ls_param->local_fib_index;
@@ -2664,13 +2820,15 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
else if (ls_param->type == SRV6_GTP6_DT46)
{
ip6 = (ip6_header_t *) ((u8 *) hdr0 + hdrlen);
- if ((clib_net_to_host_u32
- (ip6->ip_version_traffic_class_and_flow_label) >> 28)
- == 6)
+ if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) == 6)
{
next0 = SRV6_END_M_GTP6_DT_NEXT_LOOKUP6;
- if ((ip6->dst_address.as_u8[0] == 0xff)
- && ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80))
+ if (((ip6->dst_address.as_u8[0] == 0xff) &&
+ (ip6->dst_address.as_u8[1] == 0x02)) ||
+ ((ip6->dst_address.as_u8[0] == 0xfe) &&
+ ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80)))
{
vnet_buffer (b0)->sw_if_index[VLIB_TX] =
ls_param->local_fib_index;
@@ -2682,10 +2840,9 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
ls_param->fib6_index;
}
}
- else
- if ((clib_net_to_host_u32
- (ip6->ip_version_traffic_class_and_flow_label) >> 28)
- == 4)
+ else if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) == 4)
{
vlib_buffer_advance (b0, (word) hdrlen);
next0 = SRV6_END_M_GTP6_DT_NEXT_LOOKUP4;
@@ -2722,11 +2879,12 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
}
DONE:
- vlib_increment_combined_counter
- (((next0 ==
- SRV6_END_M_GTP6_DT_NEXT_DROP) ? &(sm2->sr_ls_invalid_counters)
- : &(sm2->sr_ls_valid_counters)), thread_index,
- ls0 - sm2->localsids, 1, vlib_buffer_length_in_chain (vm, b0));
+ vlib_increment_combined_counter (
+ ((next0 == SRV6_END_M_GTP6_DT_NEXT_DROP) ?
+ &(sm2->sr_ls_invalid_counters) :
+ &(sm2->sr_ls_valid_counters)),
+ thread_index, ls0 - sm2->localsids, 1,
+ vlib_buffer_length_in_chain (vm, b0));
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
@@ -2745,9 +2903,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
}
// Function for SRv6 GTP4.DT function
-VLIB_NODE_FN (srv6_t_m_gtp4_dt) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (srv6_t_m_gtp4_dt)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
srv6_t_main_v4_dt_t *sm = &srv6_t_main_v4_dt;
ip6_sr_main_t *sm2 = &sr_main;
@@ -2790,9 +2947,8 @@ VLIB_NODE_FN (srv6_t_m_gtp4_dt) (vlib_main_t * vm,
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
- ls0 =
- pool_elt_at_index (sm2->sid_lists,
- vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls0 = pool_elt_at_index (sm2->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
ls_param = (srv6_t_gtp4_dt_param_t *) ls0->plugin_mem;
@@ -2802,10 +2958,10 @@ VLIB_NODE_FN (srv6_t_m_gtp4_dt) (vlib_main_t * vm,
len0 = vlib_buffer_length_in_chain (vm, b0);
- if ((hdr0->ip4.protocol != IP_PROTOCOL_UDP)
- || (hdr0->udp.dst_port !=
- clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT))
- || (len0 < sizeof (ip4_gtpu_header_t)))
+ if ((hdr0->ip4.protocol != IP_PROTOCOL_UDP) ||
+ (hdr0->udp.dst_port !=
+ clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT)) ||
+ (len0 < sizeof (ip4_gtpu_header_t)))
{
next0 = SRV6_T_M_GTP4_DT_NEXT_DROP;
@@ -2858,9 +3014,9 @@ VLIB_NODE_FN (srv6_t_m_gtp4_dt) (vlib_main_t * vm,
else if (ls_param->type == SRV6_GTP4_DT6)
{
ip6 = (ip6_header_t *) ((u8 *) hdr0 + hdrlen);
- if ((clib_net_to_host_u32
- (ip6->ip_version_traffic_class_and_flow_label) >> 28)
- != 6)
+ if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6)
{
next0 = SRV6_T_M_GTP4_DT_NEXT_DROP;
bad_n++;
@@ -2868,8 +3024,10 @@ VLIB_NODE_FN (srv6_t_m_gtp4_dt) (vlib_main_t * vm,
}
next0 = SRV6_T_M_GTP4_DT_NEXT_LOOKUP6;
- if ((ip6->dst_address.as_u8[0] == 0xff)
- && ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80))
+ if (((ip6->dst_address.as_u8[0] == 0xff) &&
+ (ip6->dst_address.as_u8[1] == 0x02)) ||
+ ((ip6->dst_address.as_u8[0] == 0xfe) &&
+ ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80)))
{
next0 = SRV6_T_M_GTP4_DT_NEXT_LOOKUP4;
vnet_buffer (b0)->sw_if_index[VLIB_TX] =
@@ -2885,13 +3043,15 @@ VLIB_NODE_FN (srv6_t_m_gtp4_dt) (vlib_main_t * vm,
else if (ls_param->type == SRV6_GTP4_DT46)
{
ip6 = (ip6_header_t *) ((u8 *) hdr0 + hdrlen);
- if ((clib_net_to_host_u32
- (ip6->ip_version_traffic_class_and_flow_label) >> 28)
- == 6)
+ if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) == 6)
{
next0 = SRV6_T_M_GTP4_DT_NEXT_LOOKUP6;
- if ((ip6->dst_address.as_u8[0] == 0xff)
- && ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80))
+ if (((ip6->dst_address.as_u8[0] == 0xff) &&
+ (ip6->dst_address.as_u8[1] == 0x02)) ||
+ ((ip6->dst_address.as_u8[0] == 0xfe) &&
+ ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80)))
{
next0 = SRV6_T_M_GTP4_DT_NEXT_LOOKUP4;
vnet_buffer (b0)->sw_if_index[VLIB_TX] =
@@ -2904,10 +3064,9 @@ VLIB_NODE_FN (srv6_t_m_gtp4_dt) (vlib_main_t * vm,
ls_param->fib6_index;
}
}
- else
- if ((clib_net_to_host_u32
- (ip6->ip_version_traffic_class_and_flow_label) >> 28)
- == 4)
+ else if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) == 4)
{
vlib_buffer_advance (b0, (word) hdrlen);
next0 = SRV6_T_M_GTP4_DT_NEXT_LOOKUP4;
@@ -2968,8 +3127,8 @@ VLIB_REGISTER_NODE (srv6_end_m_gtp6_e) =
srv6_end_error_v6_e_strings,.n_next_nodes =
SRV6_END_M_GTP6_E_N_NEXT,.next_nodes =
{
- [SRV6_END_M_GTP6_E_NEXT_DROP] =
- "error-drop",[SRV6_END_M_GTP6_E_NEXT_LOOKUP] = "ip6-lookup",}
+ [SRV6_END_M_GTP6_E_NEXT_DROP] = "error-drop",
+ [SRV6_END_M_GTP6_E_NEXT_LOOKUP] = "ip6-lookup",}
,};
VLIB_REGISTER_NODE (srv6_end_m_gtp6_d) =
@@ -2980,8 +3139,9 @@ VLIB_REGISTER_NODE (srv6_end_m_gtp6_d) =
srv6_end_error_v6_d_strings,.n_next_nodes =
SRV6_END_M_GTP6_D_N_NEXT,.next_nodes =
{
- [SRV6_END_M_GTP6_D_NEXT_DROP] =
- "error-drop",[SRV6_END_M_GTP6_D_NEXT_LOOKUP] = "ip6-lookup",}
+ [SRV6_END_M_GTP6_D_NEXT_DROP] = "error-drop",
+ [SRV6_END_M_GTP6_D_NEXT_LOOKUP4] = "ip4-lookup",
+ [SRV6_END_M_GTP6_D_NEXT_LOOKUP6] = "ip6-lookup",}
,};
VLIB_REGISTER_NODE (srv6_end_m_gtp6_d_di) =
@@ -2993,7 +3153,7 @@ VLIB_REGISTER_NODE (srv6_end_m_gtp6_d_di) =
SRV6_END_M_GTP6_D_DI_N_NEXT,.next_nodes =
{
[SRV6_END_M_GTP6_D_DI_NEXT_DROP] = "error-drop",
- [SRV6_END_M_GTP6_D_DI_NEXT_LOOKUP] = "ip6-lookup",}
+ [SRV6_END_M_GTP6_D_DI_NEXT_LOOKUP] = "ip6-lookup",}
,};
VLIB_REGISTER_NODE (srv6_end_m_gtp6_dt) =
@@ -3004,10 +3164,9 @@ VLIB_REGISTER_NODE (srv6_end_m_gtp6_dt) =
srv6_end_error_v6_dt_strings,.n_next_nodes =
SRV6_END_M_GTP6_DT_N_NEXT,.next_nodes =
{
- [SRV6_END_M_GTP6_DT_NEXT_DROP] =
- "error-drop",
- [SRV6_END_M_GTP6_DT_NEXT_LOOKUP4]
- = "ip4-lookup",[SRV6_END_M_GTP6_DT_NEXT_LOOKUP6] = "ip6-lookup",}
+ [SRV6_END_M_GTP6_DT_NEXT_DROP] = "error-drop",
+ [SRV6_END_M_GTP6_DT_NEXT_LOOKUP4] = "ip4-lookup",
+ [SRV6_END_M_GTP6_DT_NEXT_LOOKUP6] = "ip6-lookup",}
,};
VLIB_REGISTER_NODE (srv6_t_m_gtp4_dt) =
@@ -3018,10 +3177,9 @@ VLIB_REGISTER_NODE (srv6_t_m_gtp4_dt) =
srv6_t_error_v4_dt_strings,.n_next_nodes =
SRV6_T_M_GTP4_DT_N_NEXT,.next_nodes =
{
- [SRV6_T_M_GTP4_DT_NEXT_DROP] =
- "error-drop",
- [SRV6_T_M_GTP4_DT_NEXT_LOOKUP4] =
- "ip4-lookup",[SRV6_T_M_GTP4_DT_NEXT_LOOKUP6] = "ip6-lookup",}
+ [SRV6_T_M_GTP4_DT_NEXT_DROP] = "error-drop",
+ [SRV6_T_M_GTP4_DT_NEXT_LOOKUP4] = "ip4-lookup",
+ [SRV6_T_M_GTP4_DT_NEXT_LOOKUP6] = "ip6-lookup",}
,};
/*
diff --git a/src/plugins/srv6-mobile/sr_mobile.api b/src/plugins/srv6-mobile/sr_mobile.api
new file mode 100644
index 00000000000..1487085a695
--- /dev/null
+++ b/src/plugins/srv6-mobile/sr_mobile.api
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022 BBSakura Networks Inc and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "0.1.0";
+
+import "vnet/interface_types.api";
+import "vnet/ip/ip_types.api";
+import "vnet/srv6/sr_types.api";
+import "vnet/srv6/sr.api";
+import "plugins/srv6-mobile/sr_mobile_types.api";
+
+/** \brief IPv6 SR for Mobile LocalSID add/del request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_del - Boolean of whether its a delete instruction
+ @param localsid_prefix - IPv6 address of the localsid
+ @param behavior - the behavior of the SR policy.
+ @param fib_table - FIB table in which we should install the localsid entry
+ @param local_fib_table - lookup and forward GTP-U packet based on outer IP destination address. optional
+ @param drop_in - that reconverts to GTPv1 mode. optional
+ @param nhtype - next-header type. optional.
+ @param sr_prefix - v6 src ip encoding prefix.optional.
+ @param v4src_position - bit position where IPv4 src address embedded. optional.
+*/
+autoreply define sr_mobile_localsid_add_del
+{
+ u32 client_index;
+ u32 context;
+ bool is_del [default=false];
+ vl_api_ip6_prefix_t localsid_prefix;
+ string behavior[64];
+ u32 fib_table;
+ u32 local_fib_table;
+ bool drop_in;
+ vl_api_sr_mobile_nhtype_t nhtype;
+ vl_api_ip6_prefix_t sr_prefix;
+ vl_api_ip4_address_t v4src_addr;
+ u32 v4src_position;
+};
+
+/** \brief IPv6 SR for Mobile policy add
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid - the bindingSID of the SR Policy
+ @param sr_prefix - v6 dst ip encoding prefix. optional
+ @param v6src_position - v6 src prefix. optional
+ @param behavior - the behavior of the SR policy.
+ @param fib_table - the VRF where to install the FIB entry for the BSID
+ @param encap_src is a encaps IPv6 source addr. optional
+ @param local_fib_table - lookup and forward GTP-U packet based on outer IP destination address. optional
+ @param drop_in - that reconverts to GTPv1 mode. optional
+ @param nhtype - next-header type.
+*/
+autoreply define sr_mobile_policy_add
+{
+ u32 client_index;
+ u32 context;
+ vl_api_ip6_address_t bsid_addr;
+ vl_api_ip6_prefix_t sr_prefix;
+ vl_api_ip6_prefix_t v6src_prefix;
+ string behavior[64];
+ u32 fib_table;
+ u32 local_fib_table;
+ vl_api_ip6_address_t encap_src;
+ bool drop_in;
+ vl_api_sr_mobile_nhtype_t nhtype;
+};
diff --git a/src/plugins/srv6-mobile/sr_mobile_api.c b/src/plugins/srv6-mobile/sr_mobile_api.c
new file mode 100644
index 00000000000..51199317a3b
--- /dev/null
+++ b/src/plugins/srv6-mobile/sr_mobile_api.c
@@ -0,0 +1,339 @@
+/*
+ *------------------------------------------------------------------
+ * sr_mobile_api.c - ipv6 segment routing for mobile u-plane api
+ *
+ * Copyright (c) 2022 BBSakura Networks Inc and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdint.h>
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/feature/feature.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/ip/ip_types_api.h>
+
+#include <vnet/format_fns.h>
+#include <vnet/srv6/sr.api_enum.h>
+#include <vnet/srv6/sr.api_types.h>
+
+#include <srv6-mobile/mobile.h>
+#include <srv6-mobile/sr_mobile.api_types.h>
+#include <srv6-mobile/sr_mobile_types.api_types.h>
+#include <srv6-mobile/sr_mobile.api_enum.h>
+
+#include <srv6-mobile/sr_mobile_api.h>
+
+u16 msg_id_base;
+#define REPLY_MSG_ID_BASE msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static inline uint16_t
+sr_plugin_localsid_fn_num_find_by (ip6_sr_main_t *sm, const char *keyword_str,
+ size_t keyword_len)
+{
+ sr_localsid_fn_registration_t *plugin = 0, **vec_plugins = 0;
+ sr_localsid_fn_registration_t **plugin_it = 0;
+ pool_foreach (plugin, sm->plugin_functions)
+ {
+ vec_add1 (vec_plugins, plugin);
+ }
+
+ vec_foreach (plugin_it, vec_plugins)
+ {
+ if (!srv6_mobile_strcmp_with_size (keyword_str, keyword_len,
+ (char *) (*plugin_it)->keyword_str))
+ {
+ return (*plugin_it)->sr_localsid_function_number;
+ }
+ }
+ return UINT16_MAX;
+}
+
+static inline uint16_t
+sr_plugin_policy_fn_num_find_by (ip6_sr_main_t *sm, const char *keyword_str,
+ size_t keyword_len)
+{
+ sr_policy_fn_registration_t *plugin = 0, **vec_plugins = 0;
+ sr_policy_fn_registration_t **plugin_it = 0;
+ pool_foreach (plugin, sm->policy_plugin_functions)
+ {
+ vec_add1 (vec_plugins, plugin);
+ }
+
+ vec_foreach (plugin_it, vec_plugins)
+ {
+ if (!srv6_mobile_strcmp_with_size (keyword_str, keyword_len,
+ (char *) (*plugin_it)->keyword_str))
+ {
+ return (*plugin_it)->sr_policy_function_number;
+ }
+ }
+ return UINT16_MAX;
+}
+
+static void
+vl_api_sr_mobile_localsid_add_del_t_handler (
+ vl_api_sr_mobile_localsid_add_del_t *mp)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ vl_api_sr_mobile_localsid_add_del_reply_t *rmp;
+ int rv = 0;
+ ip6_address_t localsid;
+ u16 localsid_prefix_len = 128;
+ void *ls_plugin_mem = 0;
+ u16 behavior = 0;
+ u32 dt_type;
+ size_t behavior_size = 0;
+ mobile_localsid_function_list_t kind_fn =
+ SRV6_MOBILE_LOCALSID_UNKNOWN_FUNCTION;
+
+ mp->behavior[sizeof (mp->behavior) - 1] = '\0';
+ behavior_size = sizeof (mp->behavior);
+ // search behavior index
+ if (mp->behavior[0])
+ {
+ if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior, behavior_size,
+ "end.m.gtp4.e"))
+ {
+ kind_fn = SRV6_MOBILE_LOCALSID_END_M_GTP4_E;
+ }
+ else if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior,
+ behavior_size, "end.m.gtp6.e"))
+ {
+ kind_fn = SRV6_MOBILE_LOCALSID_END_M_GTP6_E;
+ }
+ else if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior,
+ behavior_size, "end.m.gtp6.d"))
+ {
+ kind_fn = SRV6_MOBILE_LOCALSID_END_M_GTP6_D;
+ }
+ else if (!srv6_mobile_strcmp_with_size (
+ (char *) mp->behavior, behavior_size, "end.m.gtp6.d.di"))
+ {
+ kind_fn = SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DI;
+ }
+ else if (!srv6_mobile_strcmp_with_size (
+ (char *) mp->behavior, behavior_size, "end.m.gtp6.d.dt4"))
+ {
+ kind_fn = SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT4;
+ dt_type = SRV6_GTP6_DT4;
+ }
+ else if (!srv6_mobile_strcmp_with_size (
+ (char *) mp->behavior, behavior_size, "end.m.gtp6.d.dt6"))
+ {
+ kind_fn = SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT6;
+ dt_type = SRV6_GTP6_DT6;
+ }
+ else if (!srv6_mobile_strcmp_with_size (
+ (char *) mp->behavior, behavior_size, "end.m.gtp6.d.dt46"))
+ {
+ kind_fn = SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT46;
+ dt_type = SRV6_GTP6_DT46;
+ }
+ else
+ {
+ return;
+ }
+ switch (kind_fn)
+ {
+ case SRV6_MOBILE_LOCALSID_END_M_GTP4_E:
+ alloc_param_srv6_end_m_gtp4_e (&ls_plugin_mem, &mp->v4src_addr,
+ ntohl (mp->v4src_position),
+ ntohl (mp->fib_table));
+ break;
+ case SRV6_MOBILE_LOCALSID_END_M_GTP6_E:
+ alloc_param_srv6_end_m_gtp6_e (&ls_plugin_mem,
+ ntohl (mp->fib_table));
+ break;
+ case SRV6_MOBILE_LOCALSID_END_M_GTP6_D:
+ alloc_param_srv6_end_m_gtp6_d (
+ &ls_plugin_mem, &mp->sr_prefix.address, mp->sr_prefix.len,
+ (u8) ntohl (mp->nhtype), mp->drop_in, ntohl (mp->fib_table));
+ break;
+ case SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DI:
+ alloc_param_srv6_end_m_gtp6_di (
+ &ls_plugin_mem, &mp->sr_prefix.address, mp->sr_prefix.len,
+ (u8) ntohl (mp->nhtype));
+ break;
+ case SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT4:
+ case SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT6:
+ case SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT46:
+ alloc_param_srv6_end_m_gtp6_dt (
+ &ls_plugin_mem, ntohl (mp->fib_table), ntohl (mp->local_fib_table),
+ dt_type);
+ break;
+ case SRV6_MOBILE_LOCALSID_UNKNOWN_FUNCTION:
+ default:
+ return; // error
+ }
+ behavior = sr_plugin_localsid_fn_num_find_by (sm, (char *) mp->behavior,
+ behavior_size);
+ if (behavior == UINT16_MAX)
+ return;
+ }
+ else
+ {
+ return;
+ }
+ ip6_address_decode (mp->localsid_prefix.address, &localsid);
+ localsid_prefix_len = mp->localsid_prefix.len;
+
+ rv = sr_cli_localsid (mp->is_del, &localsid, localsid_prefix_len,
+ 0, // ignore end_psp
+ behavior,
+ 0, // ignore sw_if_index
+ 0, // ignore vlan_index
+ ntohl (mp->fib_table),
+ NULL, // ignore nh_addr
+ 0, // ignore usid_len
+ ls_plugin_mem);
+
+ REPLY_MACRO (VL_API_SR_MOBILE_LOCALSID_ADD_DEL_REPLY);
+}
+
+static void
+vl_api_sr_mobile_policy_add_t_handler (vl_api_sr_mobile_policy_add_t *mp)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ vl_api_sr_mobile_policy_add_reply_t *rmp;
+ ip6_address_t bsid_addr;
+ ip6_address_t encap_src;
+ void *ls_plugin_mem = 0;
+ u16 behavior = 0;
+ size_t behavior_size = 0;
+
+ u32 dt_type;
+ mobile_policy_function_list_t kind_fn = SRV6_MOBILE_POLICY_UNKNOWN_FUNCTION;
+
+ ip6_address_decode (mp->bsid_addr, &bsid_addr);
+ ip6_address_decode (mp->encap_src, &encap_src);
+ if (ip6_address_is_zero (&encap_src))
+ {
+ encap_src = *sr_get_encaps_source ();
+ }
+ mp->behavior[sizeof (mp->behavior) - 1] = '\0';
+ behavior_size = sizeof (mp->behavior);
+
+ // search behavior index
+ if (mp->behavior[0])
+ {
+ if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior, behavior_size,
+ "t.m.gtp4.d"))
+ {
+ kind_fn = SRV6_MOBILE_POLICY_T_M_GTP4_D;
+ }
+ else if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior,
+ behavior_size, "t.m.gtp4.dt4"))
+ {
+ kind_fn = SRV6_MOBILE_POLICY_T_M_GTP4_DT4;
+ dt_type = SRV6_GTP4_DT4;
+ }
+ else if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior,
+ behavior_size, "t.m.gtp4.dt6"))
+ {
+ kind_fn = SRV6_MOBILE_POLICY_T_M_GTP4_DT6;
+ dt_type = SRV6_GTP4_DT6;
+ }
+ else if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior,
+ behavior_size, "t.m.gtp4.dt46"))
+ {
+ kind_fn = SRV6_MOBILE_POLICY_T_M_GTP4_DT46;
+ dt_type = SRV6_GTP4_DT46;
+ }
+ else if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior,
+ behavior_size, "end.m.gtp6.d"))
+ {
+ kind_fn = SRV6_MOBILE_POLICY_END_M_GTP6_D;
+ }
+ else
+ {
+ return;
+ }
+
+ switch (kind_fn)
+ {
+ case SRV6_MOBILE_POLICY_T_M_GTP4_D:
+ alloc_param_srv6_t_m_gtp4_d (
+ &ls_plugin_mem, &mp->v6src_prefix.address, mp->v6src_prefix.len,
+ &mp->sr_prefix.address, mp->sr_prefix.len, ntohl (mp->fib_table),
+ mp->nhtype, mp->drop_in);
+ break;
+ case SRV6_MOBILE_POLICY_END_M_GTP6_D:
+ alloc_param_srv6_end_m_gtp6_d (
+ &ls_plugin_mem, &mp->sr_prefix.address, mp->sr_prefix.len,
+ mp->nhtype, mp->drop_in, ntohl (mp->fib_table));
+ break;
+ case SRV6_MOBILE_POLICY_T_M_GTP4_DT4:
+ case SRV6_MOBILE_POLICY_T_M_GTP4_DT6:
+ case SRV6_MOBILE_POLICY_T_M_GTP4_DT46:
+ alloc_param_srv6_t_m_gtp4_dt (&ls_plugin_mem, ntohl (mp->fib_table),
+ ntohl (mp->local_fib_table), dt_type);
+ break;
+ case SRV6_MOBILE_POLICY_UNKNOWN_FUNCTION:
+ default:
+ return; // error
+ }
+
+ behavior = sr_plugin_policy_fn_num_find_by (sm, (char *) mp->behavior,
+ behavior_size);
+ if (behavior == UINT16_MAX)
+ return;
+ }
+ else
+ {
+ return;
+ }
+
+ int rv = 0;
+ ip6_address_t *segments = 0, *this_seg;
+ vec_add2 (segments, this_seg, 1);
+ clib_memset (this_seg, 0, sizeof (*this_seg));
+
+ rv = sr_policy_add (&bsid_addr,
+ segments, // ignore segments
+ &encap_src,
+ (u32) ~0, // ignore weight
+ SR_POLICY_TYPE_DEFAULT, // ignore type
+ (u32) ~0, // ignore fib_table
+ 1, // ignore is_encap,
+ behavior, ls_plugin_mem);
+ vec_free (segments);
+ REPLY_MACRO (VL_API_SR_MOBILE_POLICY_ADD_REPLY);
+}
+
+#include <srv6-mobile/sr_mobile.api.c>
+static clib_error_t *
+sr_mobile_api_hookup (vlib_main_t *vm)
+{
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ REPLY_MSG_ID_BASE = setup_message_id_table ();
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (sr_mobile_api_hookup);
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/srv6-mobile/sr_mobile_api.h b/src/plugins/srv6-mobile/sr_mobile_api.h
new file mode 100644
index 00000000000..28979b1875c
--- /dev/null
+++ b/src/plugins/srv6-mobile/sr_mobile_api.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2022 BBSakura Networks Inc and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Segment Routing for mobile u-plane api
+ *
+ */
+
+#ifndef included_sr_mobile_api_h
+#define included_sr_mobile_api_h
+#include <stdint.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/ip/ip_types_api.h>
+
+#define srv6_mobile_strcmp_with_size(s1, s1len, s2) \
+ ({ \
+ int __indicator = 0; \
+ strcmp_s_inline (s1, s1len, s2, &__indicator); \
+ __indicator; \
+ })
+
+void alloc_param_srv6_end_m_gtp4_e (void **plugin_mem_p,
+ const void *v4src_addr,
+ const u32 v4src_position,
+ const u32 fib_table);
+
+void alloc_param_srv6_end_m_gtp6_e (void **plugin_mem_p, const u32 fib_table);
+
+void alloc_param_srv6_end_m_gtp6_d (void **plugin_mem_p, const void *sr_prefix,
+ const u32 sr_prefixlen, const u8 nhtype,
+ const bool drop_in, const u32 fib_table);
+
+void alloc_param_srv6_end_m_gtp6_di (void **plugin_mem_p,
+ const void *sr_prefix,
+ const u32 sr_prefixlen, const u8 nhtype);
+
+void alloc_param_srv6_end_m_gtp6_dt (void **plugin_mem_p, const u32 fib_index,
+ const u32 local_fib_index,
+ const u32 type);
+
+void alloc_param_srv6_t_m_gtp4_d (void **plugin_mem_p,
+ const void *v6src_prefix,
+ const u32 v6src_prefixlen,
+ const void *sr_prefix,
+ const u32 sr_prefixlen, const u32 fib_index,
+ const u8 nhtype, const bool drop_in);
+
+void alloc_param_srv6_t_m_gtp4_dt (void **plugin_mem_p, const u32 fib_index,
+ const u32 local_fib_index, const u8 type);
+
+#endif /* included_sr_mobile_api_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/srv6-mobile/sr_mobile_types.api b/src/plugins/srv6-mobile/sr_mobile_types.api
new file mode 100644
index 00000000000..f2dbe302d00
--- /dev/null
+++ b/src/plugins/srv6-mobile/sr_mobile_types.api
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 BBSakura Networks Inc and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "0.1.0";
+
+enum sr_mobile_nhtype : u8
+{
+ SRV6_NHTYPE_API_NONE = 0,
+ SRV6_NHTYPE_API_IPV4 = 1,
+ SRV6_NHTYPE_API_IPV6 = 2,
+ SRV6_NHTYPE_API_NON_IP = 3,
+}; \ No newline at end of file
diff --git a/src/plugins/stn/stn.c b/src/plugins/stn/stn.c
index 241f7169e8b..c0ac0d0b3a6 100644
--- a/src/plugins/stn/stn.c
+++ b/src/plugins/stn/stn.c
@@ -49,7 +49,7 @@ format_stn_rule (u8 * s, va_list * args)
s = format (s, "%Uiface: %U (%d)\n", format_white_space, indent,
format_vnet_sw_if_index_name, vnet_get_main(), r->sw_if_index,
r->sw_if_index);
- s = format (s, "%Unext_node: %s (%d)", format_white_space, indent,
+ s = format (s, "%Unext_node: %v (%d)", format_white_space, indent,
next_node->name, next_node->index);
return s;
}
@@ -195,7 +195,6 @@ stn_ip6_punt_fn (vlib_main_t * vm,
return stn_ip46_punt_fn(vm, node, frame, 0);
}
-/** *INDENT-OFF* */
VLIB_REGISTER_NODE (stn_ip6_punt, static) =
{
.function = stn_ip6_punt_fn,
@@ -215,7 +214,6 @@ VNET_FEATURE_INIT (stn_ip6_punt_feat_node, static) = {
.node_name = "stn-ip6-punt",
.runs_before = VNET_FEATURES("ip6-punt-redirect"),
};
-/** *INDENT-ON* */
u8 *
format_stn_ip4_punt_trace (u8 * s, va_list * args)
@@ -230,7 +228,6 @@ stn_ip4_punt_fn (vlib_main_t * vm,
return stn_ip46_punt_fn(vm, node, frame, 1);
}
-/** *INDENT-OFF* */
VLIB_REGISTER_NODE (stn_ip4_punt, static) =
{
.function = stn_ip4_punt_fn,
@@ -250,7 +247,6 @@ VNET_FEATURE_INIT (stn_ip4_punt_feat_node, static) = {
.node_name = "stn-ip4-punt",
.runs_before = VNET_FEATURES("ip4-punt-redirect"),
};
-/** *INDENT-ON* */
clib_error_t *
stn_init (vlib_main_t * vm)
@@ -275,12 +271,10 @@ stn_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (stn_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "VPP Steals the NIC (STN) for Container Integration",
};
-/* *INDENT-ON* */
int stn_rule_add_del (stn_rule_add_del_args_t *args)
{
diff --git a/src/plugins/stn/stn_api.c b/src/plugins/stn/stn_api.c
index e8685931db5..4d1af36d448 100644
--- a/src/plugins/stn/stn_api.c
+++ b/src/plugins/stn/stn_api.c
@@ -29,13 +29,11 @@
#define REPLY_MSG_ID_BASE stn_main.msg_id_base
#include <vlibapi/api_helper_macros.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-/* Macro to finish up custom dump fns */
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
static void
vl_api_stn_add_del_rule_t_handler (vl_api_stn_add_del_rule_t * mp)
@@ -84,11 +82,9 @@ vl_api_stn_rules_dump_t_handler (vl_api_stn_rules_dump_t * mp)
if (reg == 0)
return;
- /* *INDENT-OFF* */
pool_foreach (r, stn->rules) {
send_stn_rules_details (r, reg, mp->context);
}
- /* *INDENT-ON* */
}
#include <stn/stn.api.c>
diff --git a/src/plugins/stn/stn_test.c b/src/plugins/stn/stn_test.c
index c7514cf77c5..2499ba7b6ec 100644
--- a/src/plugins/stn/stn_test.c
+++ b/src/plugins/stn/stn_test.c
@@ -23,9 +23,9 @@
#include <vlibapi/vat_helper_macros.h>
/* Declare message IDs */
-#include <vpp/api/vpe.api_types.h>
#include <stn/stn.api_enum.h>
#include <stn/stn.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
diff --git a/src/plugins/svs/svs.c b/src/plugins/svs/svs.c
index 555283397ff..4da7fb9263d 100644
--- a/src/plugins/svs/svs.c
+++ b/src/plugins/svs/svs.c
@@ -363,7 +363,6 @@ format_svs_input_trace (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (svs_ip4_node) =
{
.function = svs_input_ip4,
@@ -402,7 +401,6 @@ VNET_FEATURE_INIT (svs_ip6_feat, static) =
.arc_name = "ip6-unicast",
.node_name = "svs-ip6",
};
-/* *INDENT-ON* */
static clib_error_t *
svs_table_cli (vlib_main_t * vm,
@@ -443,13 +441,11 @@ svs_table_cli (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (svs_table_cmd_cli, static) = {
.path = "svs table",
.short_help = "Source VRF select table [add|delete] [ip4|ip6] table-id X",
.function = svs_table_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
svs_enable_cli (vlib_main_t * vm,
@@ -497,13 +493,11 @@ svs_enable_cli (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (svs_enable_cli_cmd, static) = {
.path = "svs enable",
.short_help = "Source VRF select [enable|disable] [ip4|ip6] <table-id> X <interface>",
.function = svs_enable_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
svs_route_cli (vlib_main_t * vm,
@@ -559,13 +553,11 @@ svs_route_cli (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (svs_route_cmd_cli, static) = {
.path = "svs route",
.short_help = "Source VRF select route [add|delete] <table-id> <prefix> <src-table-id>",
.function = svs_route_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
svs_show_cli (vlib_main_t * vm,
@@ -588,13 +580,11 @@ svs_show_cli (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (svs_show_cli_cmd, static) = {
.path = "show svs",
.short_help = "Source VRF select show",
.function = svs_show_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
svs_init (vlib_main_t * vm)
diff --git a/src/plugins/svs/svs_api.c b/src/plugins/svs/svs_api.c
index 628acd01b01..b1660bc97dc 100644
--- a/src/plugins/svs/svs_api.c
+++ b/src/plugins/svs/svs_api.c
@@ -35,6 +35,7 @@
* Base message ID fot the plugin
*/
static u32 svs_base_msg_id;
+#define REPLY_MSG_ID_BASE (svs_base_msg_id)
#include <vlibapi/api_helper_macros.h>
static void
@@ -80,7 +81,7 @@ vl_api_svs_table_add_del_t_handler (vl_api_svs_table_add_del_t * mp)
}
error:
- REPLY_MACRO (VL_API_SVS_TABLE_ADD_DEL_REPLY + svs_base_msg_id);
+ REPLY_MACRO (VL_API_SVS_TABLE_ADD_DEL_REPLY);
}
static void
@@ -102,7 +103,7 @@ vl_api_svs_route_add_del_t_handler (vl_api_svs_route_add_del_t * mp)
rv = svs_route_delete (ntohl (mp->table_id), &pfx);
}
- REPLY_MACRO (VL_API_SVS_ROUTE_ADD_DEL_REPLY + svs_base_msg_id);
+ REPLY_MACRO (VL_API_SVS_ROUTE_ADD_DEL_REPLY);
}
static void
@@ -130,7 +131,7 @@ vl_api_svs_enable_disable_t_handler (vl_api_svs_enable_disable_t * mp)
BAD_SW_IF_INDEX_LABEL;
error:
- REPLY_MACRO (VL_API_SVS_ENABLE_DISABLE_REPLY + svs_base_msg_id);
+ REPLY_MACRO (VL_API_SVS_ENABLE_DISABLE_REPLY);
}
typedef struct svs_dump_walk_ctx_t_
@@ -191,12 +192,10 @@ svs_api_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (svs_api_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
- .version = VPP_BUILD_VER,
- .description = "Source Virtual Routing and Fowarding (VRF) Select",
+ .version = VPP_BUILD_VER,
+ .description = "Source Virtual Routing and Forwarding (VRF) Select",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/tlsmbedtls/tls_mbedtls.c b/src/plugins/tlsmbedtls/tls_mbedtls.c
index 3fccba2ec5a..af04f1adeb0 100644
--- a/src/plugins/tlsmbedtls/tls_mbedtls.c
+++ b/src/plugins/tlsmbedtls/tls_mbedtls.c
@@ -74,7 +74,8 @@ mbedtls_ctx_alloc (void)
mbedtls_main_t *tm = &mbedtls_main;
mbedtls_ctx_t **ctx;
- pool_get (tm->ctx_pool[thread_index], ctx);
+ pool_get_aligned_safe (tm->ctx_pool[thread_index], ctx,
+ CLIB_CACHE_LINE_BYTES);
if (!(*ctx))
*ctx = clib_mem_alloc (sizeof (mbedtls_ctx_t));
@@ -90,7 +91,8 @@ mbedtls_ctx_free (tls_ctx_t * ctx)
{
mbedtls_ctx_t *mc = (mbedtls_ctx_t *) ctx;
- if (mc->ssl.state == MBEDTLS_SSL_HANDSHAKE_OVER && !ctx->is_passive_close)
+ if (mc->ssl.state == MBEDTLS_SSL_HANDSHAKE_OVER &&
+ !(ctx->flags & TLS_CONN_F_PASSIVE_CLOSE))
mbedtls_ssl_close_notify (&mc->ssl);
if (mc->ssl.conf->endpoint == MBEDTLS_SSL_IS_SERVER)
{
@@ -550,11 +552,32 @@ mbedtls_transport_close (tls_ctx_t * ctx)
}
static int
+mbedtls_transport_reset (tls_ctx_t *ctx)
+{
+ if (!mbedtls_handshake_is_over (ctx))
+ {
+ session_close (session_get_from_handle (ctx->tls_session_handle));
+ return 0;
+ }
+
+ session_transport_reset_notify (&ctx->connection);
+ session_transport_closed_notify (&ctx->connection);
+ tls_disconnect_transport (ctx);
+ return 0;
+}
+
+static int
mbedtls_app_close (tls_ctx_t * ctx)
{
tls_disconnect_transport (ctx);
session_transport_delete_notify (&ctx->connection);
- mbedtls_ctx_free (ctx);
+ return 0;
+}
+
+static int
+mbedtls_reinit_ca_chain (void)
+{
+ /* Not supported Yet */
return 0;
}
@@ -571,7 +594,9 @@ const static tls_engine_vft_t mbedtls_engine = {
.ctx_start_listen = mbedtls_start_listen,
.ctx_stop_listen = mbedtls_stop_listen,
.ctx_transport_close = mbedtls_transport_close,
+ .ctx_transport_reset = mbedtls_transport_reset,
.ctx_app_close = mbedtls_app_close,
+ .ctx_reinit_cachain = mbedtls_reinit_ca_chain,
};
int
@@ -663,19 +688,15 @@ tls_mbedtls_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (tls_mbedtls_init) =
{
.runs_after = VLIB_INITS("tls_init"),
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Transport Layer Security (TLS) Engine, Mbedtls Based",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/tlsopenssl/CMakeLists.txt b/src/plugins/tlsopenssl/CMakeLists.txt
index eb67e4cceaf..70a62aedf9c 100644
--- a/src/plugins/tlsopenssl/CMakeLists.txt
+++ b/src/plugins/tlsopenssl/CMakeLists.txt
@@ -14,6 +14,7 @@
include (CheckFunctionExists)
if(OPENSSL_FOUND AND OPENSSL_VERSION VERSION_GREATER_EQUAL "1.1.0")
include_directories(${OPENSSL_INCLUDE_DIR})
+ add_compile_definitions(OPENSSL_SUPPRESS_DEPRECATED)
add_vpp_plugin(tlsopenssl
SOURCES
tls_bio.c
diff --git a/src/plugins/tlsopenssl/tls_async.c b/src/plugins/tlsopenssl/tls_async.c
index 89b4f77e331..d85af686d21 100644
--- a/src/plugins/tlsopenssl/tls_async.c
+++ b/src/plugins/tlsopenssl/tls_async.c
@@ -437,7 +437,7 @@ tls_async_do_job (int eidx, u32 thread_index)
if (ctx)
{
- ctx->resume = 1;
+ ctx->flags |= TLS_CONN_F_RESUME;
session_send_rpc_evt_to_thread (thread_index, event_handler, event);
}
return 1;
@@ -510,7 +510,6 @@ tls_async_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
VLIB_INIT_FUNCTION (tls_async_init);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tls_async_process_node,static) = {
.function = tls_async_process,
.type = VLIB_NODE_TYPE_INPUT,
@@ -518,7 +517,6 @@ VLIB_REGISTER_NODE (tls_async_process_node,static) = {
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/tlsopenssl/tls_openssl.c b/src/plugins/tlsopenssl/tls_openssl.c
index 05cd13c9970..5d172a0adcf 100644
--- a/src/plugins/tlsopenssl/tls_openssl.c
+++ b/src/plugins/tlsopenssl/tls_openssl.c
@@ -27,6 +27,8 @@
#include <ctype.h>
#include <tlsopenssl/tls_openssl.h>
#include <tlsopenssl/tls_bios.h>
+#include <openssl/x509_vfy.h>
+#include <openssl/x509v3.h>
#define MAX_CRYPTO_LEN 64
@@ -38,7 +40,8 @@ openssl_ctx_alloc_w_thread (u32 thread_index)
openssl_main_t *om = &openssl_main;
openssl_ctx_t **ctx;
- pool_get (om->ctx_pool[thread_index], ctx);
+ pool_get_aligned_safe (om->ctx_pool[thread_index], ctx, 0);
+
if (!(*ctx))
*ctx = clib_mem_alloc (sizeof (openssl_ctx_t));
@@ -62,14 +65,15 @@ openssl_ctx_free (tls_ctx_t * ctx)
openssl_ctx_t *oc = (openssl_ctx_t *) ctx;
/* Cleanup ssl ctx unless migrated */
- if (!ctx->is_migrated)
+ if (!(ctx->flags & TLS_CONN_F_MIGRATED))
{
- if (SSL_is_init_finished (oc->ssl) && !ctx->is_passive_close)
+ if (SSL_is_init_finished (oc->ssl) &&
+ !(ctx->flags & TLS_CONN_F_PASSIVE_CLOSE))
SSL_shutdown (oc->ssl);
SSL_free (oc->ssl);
vec_free (ctx->srv_hostname);
-
+ SSL_CTX_free (oc->client_ssl_ctx);
#ifdef HAVE_OPENSSL_ASYNC
openssl_evt_free (ctx->evt_index, ctx->c_thread_index);
#endif
@@ -97,7 +101,7 @@ openssl_ctx_attach (u32 thread_index, void *ctx_ptr)
session_handle_t sh;
openssl_ctx_t **oc;
- pool_get (om->ctx_pool[thread_index], oc);
+ pool_get_aligned_safe (om->ctx_pool[thread_index], oc, 0);
/* Free the old instance instead of looking for an empty spot */
if (*oc)
clib_mem_free (*oc);
@@ -155,8 +159,12 @@ openssl_lctx_get (u32 lctx_index)
return pool_elt_at_index (openssl_main.lctx_pool, lctx_index);
}
+#define ossl_check_err_is_fatal(_ssl, _rv) \
+ if (PREDICT_FALSE (_rv < 0 && SSL_get_error (_ssl, _rv) == SSL_ERROR_SSL)) \
+ return -1;
+
static int
-openssl_read_from_ssl_into_fifo (svm_fifo_t * f, SSL * ssl)
+openssl_read_from_ssl_into_fifo (svm_fifo_t *f, SSL *ssl, u32 max_len)
{
int read, rv, n_fs, i;
const int n_segs = 2;
@@ -167,6 +175,7 @@ openssl_read_from_ssl_into_fifo (svm_fifo_t * f, SSL * ssl)
if (!max_enq)
return 0;
+ max_enq = clib_min (max_len, max_enq);
n_fs = svm_fifo_provision_chunks (f, fs, n_segs, max_enq);
if (n_fs < 0)
return 0;
@@ -174,17 +183,25 @@ openssl_read_from_ssl_into_fifo (svm_fifo_t * f, SSL * ssl)
/* Return early if we can't read anything */
read = SSL_read (ssl, fs[0].data, fs[0].len);
if (read <= 0)
- return 0;
+ {
+ ossl_check_err_is_fatal (ssl, read);
+ return 0;
+ }
- for (i = 1; i < n_fs; i++)
+ if (read == (int) fs[0].len)
{
- rv = SSL_read (ssl, fs[i].data, fs[i].len);
- read += rv > 0 ? rv : 0;
+ for (i = 1; i < n_fs; i++)
+ {
+ rv = SSL_read (ssl, fs[i].data, fs[i].len);
+ read += rv > 0 ? rv : 0;
- if (rv < (int) fs[i].len)
- break;
+ if (rv < (int) fs[i].len)
+ {
+ ossl_check_err_is_fatal (ssl, rv);
+ break;
+ }
+ }
}
-
svm_fifo_enqueue_nocopy (f, read);
return read;
@@ -194,10 +211,10 @@ static int
openssl_write_from_fifo_into_ssl (svm_fifo_t *f, SSL *ssl, u32 max_len)
{
int wrote = 0, rv, i = 0, len;
- const int n_segs = 2;
+ u32 n_segs = 2;
svm_fifo_seg_t fs[n_segs];
- len = svm_fifo_segments (f, 0, fs, n_segs, max_len);
+ len = svm_fifo_segments (f, 0, fs, &n_segs, max_len);
if (len <= 0)
return 0;
@@ -206,7 +223,10 @@ openssl_write_from_fifo_into_ssl (svm_fifo_t *f, SSL *ssl, u32 max_len)
rv = SSL_write (ssl, fs[i].data, fs[i].len);
wrote += (rv > 0) ? rv : 0;
if (rv < (int) fs[i].len)
- break;
+ {
+ ossl_check_err_is_fatal (ssl, rv);
+ break;
+ }
i++;
}
@@ -243,22 +263,18 @@ openssl_check_async_status (tls_ctx_t * ctx, openssl_resume_handler * handler,
static void
openssl_handle_handshake_failure (tls_ctx_t * ctx)
{
- session_t *app_session;
+ /* Failed to renegotiate handshake */
+ if (ctx->flags & TLS_CONN_F_HS_DONE)
+ {
+ tls_notify_app_io_error (ctx);
+ tls_disconnect_transport (ctx);
+ return;
+ }
if (SSL_is_server (((openssl_ctx_t *) ctx)->ssl))
{
- /*
- * Cleanup pre-allocated app session and close transport
- */
- app_session =
- session_get_if_valid (ctx->c_s_index, ctx->c_thread_index);
- if (app_session)
- {
- session_free (app_session);
- ctx->no_app_session = 1;
- ctx->c_s_index = SESSION_INVALID_INDEX;
- tls_disconnect_transport (ctx);
- }
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
+ tls_disconnect_transport (ctx);
}
else
{
@@ -266,6 +282,7 @@ openssl_handle_handshake_failure (tls_ctx_t * ctx)
* Also handles cleanup of the pre-allocated session
*/
tls_notify_app_connected (ctx, SESSION_E_TLS_HANDSHAKE);
+ tls_disconnect_transport (ctx);
}
}
@@ -277,9 +294,9 @@ openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session)
while (SSL_in_init (oc->ssl))
{
- if (ctx->resume)
+ if (ctx->flags & TLS_CONN_F_RESUME)
{
- ctx->resume = 0;
+ ctx->flags &= ~TLS_CONN_F_RESUME;
}
else if (!svm_fifo_max_dequeue_cons (tls_session->rx_fifo))
break;
@@ -313,6 +330,10 @@ openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session)
if (SSL_in_init (oc->ssl))
return -1;
+ /* Renegotiated handshake, app must not be notified */
+ if (PREDICT_FALSE (ctx->flags & TLS_CONN_F_HS_DONE))
+ return 0;
+
/*
* Handshake complete
*/
@@ -331,16 +352,20 @@ openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session)
*/
if (ctx->srv_hostname)
{
- tls_notify_app_connected (ctx, SESSION_E_TLS_HANDSHAKE);
+ openssl_handle_handshake_failure (ctx);
return -1;
}
}
- tls_notify_app_connected (ctx, SESSION_E_NONE);
+ if (tls_notify_app_connected (ctx, SESSION_E_NONE))
+ {
+ tls_disconnect_transport (ctx);
+ return -1;
+ }
}
else
{
/* Need to check transport status */
- if (ctx->is_passive_close)
+ if (ctx->flags & TLS_CONN_F_PASSIVE_CLOSE)
{
openssl_handle_handshake_failure (ctx);
return -1;
@@ -354,7 +379,7 @@ openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session)
return -1;
}
}
-
+ ctx->flags |= TLS_CONN_F_HS_DONE;
TLS_DBG (1, "Handshake for %u complete. TLS cipher is %s",
oc->openssl_ctx_index, SSL_get_cipher (oc->ssl));
return rv;
@@ -363,6 +388,8 @@ openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session)
static void
openssl_confirm_app_close (tls_ctx_t * ctx)
{
+ openssl_ctx_t *oc = (openssl_ctx_t *) ctx;
+ SSL_shutdown (oc->ssl);
tls_disconnect_transport (ctx);
session_transport_closed_notify (&ctx->connection);
}
@@ -399,6 +426,14 @@ openssl_ctx_write_tls (tls_ctx_t *ctx, session_t *app_session,
goto check_tls_fifo;
wrote = openssl_write_from_fifo_into_ssl (f, oc->ssl, deq_max);
+
+ /* Unrecoverable protocol error. Reset connection */
+ if (PREDICT_FALSE (wrote < 0))
+ {
+ tls_notify_app_io_error (ctx);
+ return 0;
+ }
+
if (!wrote)
goto check_tls_fifo;
@@ -407,7 +442,8 @@ openssl_ctx_write_tls (tls_ctx_t *ctx, session_t *app_session,
check_tls_fifo:
- if (PREDICT_FALSE (ctx->app_closed && BIO_ctrl_pending (oc->rbio) <= 0))
+ if (PREDICT_FALSE ((ctx->flags & TLS_CONN_F_APP_CLOSED) &&
+ BIO_ctrl_pending (oc->rbio) <= 0))
openssl_confirm_app_close (ctx);
/* Deschedule and wait for deq notification if fifo is almost full */
@@ -419,8 +455,11 @@ check_tls_fifo:
sp->flags |= TRANSPORT_SND_F_DESCHED;
}
else
- /* Request tx reschedule of the app session */
- app_session->flags |= SESSION_F_CUSTOM_TX;
+ {
+ /* Request tx reschedule of the app session */
+ if (wrote)
+ app_session->flags |= SESSION_F_CUSTOM_TX;
+ }
return wrote;
}
@@ -479,7 +518,7 @@ done:
if (read)
tls_add_vpp_q_tx_evt (us);
- if (PREDICT_FALSE (ctx->app_closed &&
+ if (PREDICT_FALSE ((ctx->flags & TLS_CONN_F_APP_CLOSED) &&
!svm_fifo_max_enqueue_prod (us->rx_fifo)))
openssl_confirm_app_close (ctx);
@@ -500,23 +539,33 @@ static inline int
openssl_ctx_read_tls (tls_ctx_t *ctx, session_t *tls_session)
{
openssl_ctx_t *oc = (openssl_ctx_t *) ctx;
+ const u32 max_len = 128 << 10;
session_t *app_session;
- int read;
svm_fifo_t *f;
+ int read;
if (PREDICT_FALSE (SSL_in_init (oc->ssl)))
{
if (openssl_ctx_handshake_rx (ctx, tls_session) < 0)
return 0;
+
+ /* Application might force a session pool realloc on accept */
+ tls_session = session_get_from_handle (ctx->tls_session_handle);
}
app_session = session_get_from_handle (ctx->app_session_handle);
f = app_session->rx_fifo;
- read = openssl_read_from_ssl_into_fifo (f, oc->ssl);
+ read = openssl_read_from_ssl_into_fifo (f, oc->ssl, max_len);
- /* If handshake just completed, session may still be in accepting state */
- if (read && app_session->session_state >= SESSION_STATE_READY)
+ /* Unrecoverable protocol error. Reset connection */
+ if (PREDICT_FALSE (read < 0))
+ {
+ tls_notify_app_io_error (ctx);
+ return 0;
+ }
+
+ if (read)
tls_notify_app_enqueue (ctx, app_session);
if ((SSL_pending (oc->ssl) > 0) ||
@@ -597,6 +646,88 @@ openssl_ctx_read (tls_ctx_t *ctx, session_t *ts)
}
static int
+openssl_set_ckpair (SSL *ssl, u32 ckpair_index)
+{
+ app_cert_key_pair_t *ckpair;
+ BIO *cert_bio;
+ EVP_PKEY *pkey;
+ X509 *srvcert;
+
+ /* Configure a ckpair index only if non-default/test provided */
+ if (ckpair_index == 0)
+ return 0;
+
+ ckpair = app_cert_key_pair_get_if_valid (ckpair_index);
+ if (!ckpair)
+ return -1;
+
+ if (!ckpair->cert || !ckpair->key)
+ {
+ TLS_DBG (1, "tls cert and/or key not configured");
+ return -1;
+ }
+ /*
+ * Set the key and cert
+ */
+ cert_bio = BIO_new (BIO_s_mem ());
+ BIO_write (cert_bio, ckpair->cert, vec_len (ckpair->cert));
+ srvcert = PEM_read_bio_X509 (cert_bio, NULL, NULL, NULL);
+ if (!srvcert)
+ {
+ clib_warning ("unable to parse certificate");
+ return -1;
+ }
+ SSL_use_certificate (ssl, srvcert);
+ BIO_free (cert_bio);
+
+ cert_bio = BIO_new (BIO_s_mem ());
+ BIO_write (cert_bio, ckpair->key, vec_len (ckpair->key));
+ pkey = PEM_read_bio_PrivateKey (cert_bio, NULL, NULL, NULL);
+ if (!pkey)
+ {
+ clib_warning ("unable to parse pkey");
+ return -1;
+ }
+ SSL_use_PrivateKey (ssl, pkey);
+ BIO_free (cert_bio);
+ TLS_DBG (1, "TLS client using ckpair index: %d", ckpair_index);
+ return 0;
+}
+
+static int
+openssl_client_init_verify (SSL *ssl, const char *srv_hostname,
+ int set_hostname_verification,
+ int set_hostname_strict_check)
+{
+ if (set_hostname_verification)
+ {
+ X509_VERIFY_PARAM *param = SSL_get0_param (ssl);
+ if (!param)
+ {
+ TLS_DBG (1, "Couldn't fetch SSL param");
+ return -1;
+ }
+
+ if (set_hostname_strict_check)
+ X509_VERIFY_PARAM_set_hostflags (param,
+ X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS);
+
+ if (!X509_VERIFY_PARAM_set1_host (param, srv_hostname, 0))
+ {
+ TLS_DBG (1, "Couldn't set hostname for verification");
+ return -1;
+ }
+ SSL_set_verify (ssl, SSL_VERIFY_PEER, 0);
+ }
+ if (!SSL_set_tlsext_host_name (ssl, srv_hostname))
+ {
+ TLS_DBG (1, "Couldn't set hostname");
+ return -1;
+ }
+ return 0;
+}
+
+static int
openssl_ctx_init_client (tls_ctx_t * ctx)
{
long flags = SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 | SSL_OP_NO_COMPRESSION;
@@ -613,30 +744,31 @@ openssl_ctx_init_client (tls_ctx_t * ctx)
return -1;
}
- oc->ssl_ctx = SSL_CTX_new (method);
- if (oc->ssl_ctx == NULL)
+ oc->client_ssl_ctx = SSL_CTX_new (method);
+ if (oc->client_ssl_ctx == NULL)
{
TLS_DBG (1, "SSL_CTX_new returned null");
return -1;
}
- SSL_CTX_set_ecdh_auto (oc->ssl_ctx, 1);
- SSL_CTX_set_mode (oc->ssl_ctx, SSL_MODE_ENABLE_PARTIAL_WRITE);
+ SSL_CTX_set_ecdh_auto (oc->client_ssl_ctx, 1);
+ SSL_CTX_set_mode (oc->client_ssl_ctx, SSL_MODE_ENABLE_PARTIAL_WRITE);
#ifdef HAVE_OPENSSL_ASYNC
if (om->async)
- SSL_CTX_set_mode (oc->ssl_ctx, SSL_MODE_ASYNC);
+ SSL_CTX_set_mode (oc->client_ssl_ctx, SSL_MODE_ASYNC);
#endif
- rv = SSL_CTX_set_cipher_list (oc->ssl_ctx, (const char *) om->ciphers);
+ rv =
+ SSL_CTX_set_cipher_list (oc->client_ssl_ctx, (const char *) om->ciphers);
if (rv != 1)
{
TLS_DBG (1, "Couldn't set cipher");
return -1;
}
- SSL_CTX_set_options (oc->ssl_ctx, flags);
- SSL_CTX_set_cert_store (oc->ssl_ctx, om->cert_store);
+ SSL_CTX_set_options (oc->client_ssl_ctx, flags);
+ SSL_CTX_set1_cert_store (oc->client_ssl_ctx, om->cert_store);
- oc->ssl = SSL_new (oc->ssl_ctx);
+ oc->ssl = SSL_new (oc->client_ssl_ctx);
if (oc->ssl == NULL)
{
TLS_DBG (1, "Couldn't initialize ssl struct");
@@ -657,12 +789,18 @@ openssl_ctx_init_client (tls_ctx_t * ctx)
SSL_set_bio (oc->ssl, oc->wbio, oc->rbio);
SSL_set_connect_state (oc->ssl);
- rv = SSL_set_tlsext_host_name (oc->ssl, ctx->srv_hostname);
- if (rv != 1)
+ /* Hostname validation and strict check by name are disabled by default */
+ rv = openssl_client_init_verify (oc->ssl, (const char *) ctx->srv_hostname,
+ 0, 0);
+ if (rv)
{
- TLS_DBG (1, "Couldn't set hostname");
+ TLS_DBG (1, "ERROR:verify init failed:%d", rv);
return -1;
}
+ if (openssl_set_ckpair (oc->ssl, ctx->ckpair_index))
+ {
+ TLS_DBG (1, "Couldn't set client certificate-key pair");
+ }
/*
* 2. Do the first steps in the handshake.
@@ -749,29 +887,59 @@ openssl_start_listen (tls_ctx_t * lctx)
return -1;
}
+ /* use the default OpenSSL built-in DH parameters */
+ rv = SSL_CTX_set_dh_auto (ssl_ctx, 1);
+ if (rv != 1)
+ {
+ TLS_DBG (1, "Couldn't set temp DH parameters");
+ return -1;
+ }
+
/*
* Set the key and cert
*/
cert_bio = BIO_new (BIO_s_mem ());
+ if (!cert_bio)
+ {
+ clib_warning ("unable to allocate memory");
+ return -1;
+ }
BIO_write (cert_bio, ckpair->cert, vec_len (ckpair->cert));
srvcert = PEM_read_bio_X509 (cert_bio, NULL, NULL, NULL);
if (!srvcert)
{
clib_warning ("unable to parse certificate");
- return -1;
+ goto err;
}
- SSL_CTX_use_certificate (ssl_ctx, srvcert);
+ rv = SSL_CTX_use_certificate (ssl_ctx, srvcert);
+ if (rv != 1)
+ {
+ clib_warning ("unable to use SSL certificate");
+ goto err;
+ }
+
BIO_free (cert_bio);
cert_bio = BIO_new (BIO_s_mem ());
+ if (!cert_bio)
+ {
+ clib_warning ("unable to allocate memory");
+ return -1;
+ }
BIO_write (cert_bio, ckpair->key, vec_len (ckpair->key));
pkey = PEM_read_bio_PrivateKey (cert_bio, NULL, NULL, NULL);
if (!pkey)
{
clib_warning ("unable to parse pkey");
- return -1;
+ goto err;
}
- SSL_CTX_use_PrivateKey (ssl_ctx, pkey);
+ rv = SSL_CTX_use_PrivateKey (ssl_ctx, pkey);
+ if (rv != 1)
+ {
+ clib_warning ("unable to use SSL PrivateKey");
+ goto err;
+ }
+
BIO_free (cert_bio);
olc_index = openssl_listen_ctx_alloc ();
@@ -785,6 +953,10 @@ openssl_start_listen (tls_ctx_t * lctx)
return 0;
+err:
+ if (cert_bio)
+ BIO_free (cert_bio);
+ return -1;
}
static int
@@ -892,6 +1064,22 @@ openssl_transport_close (tls_ctx_t * ctx)
}
static int
+openssl_transport_reset (tls_ctx_t *ctx)
+{
+ if (!openssl_handshake_is_over (ctx))
+ {
+ openssl_handle_handshake_failure (ctx);
+ return 0;
+ }
+
+ session_transport_reset_notify (&ctx->connection);
+ session_transport_closed_notify (&ctx->connection);
+ tls_disconnect_transport (ctx);
+
+ return 0;
+}
+
+static int
openssl_app_close (tls_ctx_t * ctx)
{
openssl_ctx_t *oc = (openssl_ctx_t *) ctx;
@@ -902,30 +1090,9 @@ openssl_app_close (tls_ctx_t * ctx)
if (BIO_ctrl_pending (oc->rbio) <= 0
&& !svm_fifo_max_dequeue_cons (app_session->tx_fifo))
openssl_confirm_app_close (ctx);
- else
- ctx->app_closed = 1;
return 0;
}
-const static tls_engine_vft_t openssl_engine = {
- .ctx_alloc = openssl_ctx_alloc,
- .ctx_alloc_w_thread = openssl_ctx_alloc_w_thread,
- .ctx_free = openssl_ctx_free,
- .ctx_attach = openssl_ctx_attach,
- .ctx_detach = openssl_ctx_detach,
- .ctx_get = openssl_ctx_get,
- .ctx_get_w_thread = openssl_ctx_get_w_thread,
- .ctx_init_server = openssl_ctx_init_server,
- .ctx_init_client = openssl_ctx_init_client,
- .ctx_write = openssl_ctx_write,
- .ctx_read = openssl_ctx_read,
- .ctx_handshake_is_over = openssl_handshake_is_over,
- .ctx_start_listen = openssl_start_listen,
- .ctx_stop_listen = openssl_stop_listen,
- .ctx_transport_close = openssl_transport_close,
- .ctx_app_close = openssl_app_close,
-};
-
int
tls_init_ca_chain (void)
{
@@ -975,21 +1142,50 @@ tls_init_ca_chain (void)
}
int
+openssl_reinit_ca_chain (void)
+{
+ openssl_main_t *om = &openssl_main;
+
+ /* Remove/free existing x509_store */
+ if (om->cert_store)
+ {
+ X509_STORE_free (om->cert_store);
+ }
+ return tls_init_ca_chain ();
+}
+
+const static tls_engine_vft_t openssl_engine = {
+ .ctx_alloc = openssl_ctx_alloc,
+ .ctx_alloc_w_thread = openssl_ctx_alloc_w_thread,
+ .ctx_free = openssl_ctx_free,
+ .ctx_attach = openssl_ctx_attach,
+ .ctx_detach = openssl_ctx_detach,
+ .ctx_get = openssl_ctx_get,
+ .ctx_get_w_thread = openssl_ctx_get_w_thread,
+ .ctx_init_server = openssl_ctx_init_server,
+ .ctx_init_client = openssl_ctx_init_client,
+ .ctx_write = openssl_ctx_write,
+ .ctx_read = openssl_ctx_read,
+ .ctx_handshake_is_over = openssl_handshake_is_over,
+ .ctx_start_listen = openssl_start_listen,
+ .ctx_stop_listen = openssl_stop_listen,
+ .ctx_transport_close = openssl_transport_close,
+ .ctx_transport_reset = openssl_transport_reset,
+ .ctx_app_close = openssl_app_close,
+ .ctx_reinit_cachain = openssl_reinit_ca_chain,
+};
+
+int
tls_openssl_set_ciphers (char *ciphers)
{
openssl_main_t *om = &openssl_main;
- int i;
if (!ciphers)
{
return -1;
}
- vec_validate (om->ciphers, strlen (ciphers) - 1);
- for (i = 0; i < vec_len (om->ciphers); i++)
- {
- om->ciphers[i] = toupper (ciphers[i]);
- }
+ vec_validate_init_c_string (om->ciphers, ciphers, strlen (ciphers));
return 0;
@@ -1009,12 +1205,6 @@ tls_openssl_init (vlib_main_t * vm)
SSL_library_init ();
SSL_load_error_strings ();
- if (tls_init_ca_chain ())
- {
- clib_warning ("failed to initialize TLS CA chain");
- return 0;
- }
-
vec_validate (om->ctx_pool, num_threads - 1);
vec_validate (om->rx_bufs, num_threads - 1);
vec_validate (om->tx_bufs, num_threads - 1);
@@ -1031,14 +1221,18 @@ tls_openssl_init (vlib_main_t * vm)
tls_openssl_set_ciphers
("ALL:!ADH:!LOW:!EXP:!MD5:!RC4-SHA:!DES-CBC3-SHA:@STRENGTH");
+ if (tls_init_ca_chain ())
+ {
+ clib_warning ("failed to initialize TLS CA chain");
+ return 0;
+ }
+
return error;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (tls_openssl_init) =
{
.runs_after = VLIB_INITS("tls_init"),
};
-/* *INDENT-ON* */
#ifdef HAVE_OPENSSL_ASYNC
static clib_error_t *
@@ -1109,22 +1303,18 @@ tls_openssl_set_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tls_openssl_set_command, static) =
{
.path = "tls openssl set",
.short_help = "tls openssl set [engine <engine name>] [alg [algorithm] [async]",
.function = tls_openssl_set_command_fn,
};
-/* *INDENT-ON* */
#endif
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Transport Layer Security (TLS) Engine, OpenSSL Based",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/tlsopenssl/tls_openssl.h b/src/plugins/tlsopenssl/tls_openssl.h
index a4beecc8ec1..1600cd77aba 100644
--- a/src/plugins/tlsopenssl/tls_openssl.h
+++ b/src/plugins/tlsopenssl/tls_openssl.h
@@ -33,7 +33,7 @@ typedef struct tls_ctx_openssl_
{
tls_ctx_t ctx; /**< First */
u32 openssl_ctx_index;
- SSL_CTX *ssl_ctx;
+ SSL_CTX *client_ssl_ctx;
SSL *ssl;
BIO *rbio;
BIO *wbio;
diff --git a/src/plugins/tlsopenssl/tls_openssl_api.c b/src/plugins/tlsopenssl/tls_openssl_api.c
index c34829f0b29..0b17271313d 100644
--- a/src/plugins/tlsopenssl/tls_openssl_api.c
+++ b/src/plugins/tlsopenssl/tls_openssl_api.c
@@ -23,7 +23,6 @@
#include <tlsopenssl/tls_openssl.api_enum.h>
#include <tlsopenssl/tls_openssl.api_types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define REPLY_MSG_ID_BASE om->msg_id_base
#include <vlibapi/api_helper_macros.h>
diff --git a/src/plugins/tlspicotls/CMakeLists.txt b/src/plugins/tlspicotls/CMakeLists.txt
index f23ae6ccb8a..e60a0e0ebd4 100644
--- a/src/plugins/tlspicotls/CMakeLists.txt
+++ b/src/plugins/tlspicotls/CMakeLists.txt
@@ -1,11 +1,16 @@
include (CheckFunctionExists)
+if(NOT OPENSSL_FOUND)
+ message(WARNING "OpenSSL not found - tlspicotls plugin disabled")
+ return()
+endif()
+
# We should rely on a picotls specific version, but as
# we expect dependancies to be built with vpp-ext-deps
# it's reasonable to make this check to avoid breaking
# existing builds when upgrading the quicly/picotls
# versions
-set(EXPECTED_QUICLY_VERSION "0.1.3-vpp")
+set(EXPECTED_QUICLY_VERSION "0.1.4-vpp")
vpp_find_path(QUICLY_INCLUDE_DIR NAMES quicly.h)
diff --git a/src/plugins/tlspicotls/pico_vpp_crypto.c b/src/plugins/tlspicotls/pico_vpp_crypto.c
index 9af0f2f4d92..3d28d50b352 100644
--- a/src/plugins/tlspicotls/pico_vpp_crypto.c
+++ b/src/plugins/tlspicotls/pico_vpp_crypto.c
@@ -31,19 +31,22 @@ struct cipher_context_t
{
ptls_cipher_context_t super;
vnet_crypto_op_t op;
+ vnet_crypto_op_id_t id;
u32 key_index;
};
struct vpp_aead_context_t
{
ptls_aead_context_t super;
+ EVP_CIPHER_CTX *evp_ctx;
+ uint8_t static_iv[PTLS_MAX_IV_SIZE];
vnet_crypto_op_t op;
+ u32 key_index;
+ vnet_crypto_op_id_t id;
vnet_crypto_op_chunk_t chunks[2];
vnet_crypto_alg_t alg;
- u32 key_index;
u32 chunk_index;
uint8_t iv[PTLS_MAX_IV_SIZE];
- uint8_t static_iv[PTLS_MAX_IV_SIZE];
};
static void
@@ -51,23 +54,7 @@ ptls_vpp_crypto_cipher_do_init (ptls_cipher_context_t * _ctx, const void *iv)
{
struct cipher_context_t *ctx = (struct cipher_context_t *) _ctx;
- vnet_crypto_op_id_t id;
- if (!strcmp (ctx->super.algo->name, "AES128-CTR"))
- {
- id = VNET_CRYPTO_OP_AES_128_CTR_ENC;
- }
- else if (!strcmp (ctx->super.algo->name, "AES256-CTR"))
- {
- id = VNET_CRYPTO_OP_AES_256_CTR_ENC;
- }
- else
- {
- TLS_DBG (1, "%s, Invalid crypto cipher : ", __FUNCTION__,
- _ctx->algo->name);
- assert (0);
- }
-
- vnet_crypto_op_init (&ctx->op, id);
+ vnet_crypto_op_init (&ctx->op, ctx->id);
ctx->op.iv = (u8 *) iv;
ctx->op.key_index = ctx->key_index;
}
@@ -109,10 +96,14 @@ ptls_vpp_crypto_cipher_setup_crypto (ptls_cipher_context_t * _ctx, int is_enc,
if (!strcmp (ctx->super.algo->name, "AES128-CTR"))
{
algo = VNET_CRYPTO_ALG_AES_128_CTR;
+ ctx->id = is_enc ? VNET_CRYPTO_OP_AES_128_CTR_ENC :
+ VNET_CRYPTO_OP_AES_128_CTR_DEC;
}
else if (!strcmp (ctx->super.algo->name, "AES256-CTR"))
{
algo = VNET_CRYPTO_ALG_AES_256_CTR;
+ ctx->id = is_enc ? VNET_CRYPTO_OP_AES_256_CTR_ENC :
+ VNET_CRYPTO_OP_AES_256_CTR_DEC;
}
else
{
@@ -138,20 +129,22 @@ ptls_vpp_crypto_aead_decrypt (ptls_aead_context_t *_ctx, void *_output,
struct vpp_aead_context_t *ctx = (struct vpp_aead_context_t *) _ctx;
int tag_size = ctx->super.algo->tag_size;
- ctx->op.dst = _output;
- ctx->op.src = (void *) input;
- ctx->op.len = inlen - tag_size;;
+ vnet_crypto_op_init (&ctx->op, ctx->id);
+ ctx->op.aad = (u8 *) aad;
+ ctx->op.aad_len = aadlen;
ctx->op.iv = ctx->iv;
ptls_aead__build_iv (ctx->super.algo, ctx->op.iv, ctx->static_iv, seq);
- ctx->op.aad = (void *) aad;
- ctx->op.aad_len = aadlen;
- ctx->op.tag = (void *) input + inlen - tag_size;
+ ctx->op.src = (u8 *) input;
+ ctx->op.dst = _output;
+ ctx->op.key_index = ctx->key_index;
+ ctx->op.len = inlen - tag_size;
ctx->op.tag_len = tag_size;
+ ctx->op.tag = ctx->op.src + ctx->op.len;
vnet_crypto_process_ops (vm, &(ctx->op), 1);
assert (ctx->op.status == VNET_CRYPTO_OP_STATUS_COMPLETED);
- return inlen - tag_size;
+ return ctx->op.len;
}
static void
@@ -159,10 +152,13 @@ ptls_vpp_crypto_aead_encrypt_init (ptls_aead_context_t *_ctx, uint64_t seq,
const void *aad, size_t aadlen)
{
struct vpp_aead_context_t *ctx = (struct vpp_aead_context_t *) _ctx;
- ctx->op.iv = ctx->iv;
- ptls_aead__build_iv (ctx->super.algo, ctx->op.iv, ctx->static_iv, seq);
+
+ vnet_crypto_op_init (&ctx->op, ctx->id);
ctx->op.aad = (void *) aad;
ctx->op.aad_len = aadlen;
+ ctx->op.iv = ctx->iv;
+ ptls_aead__build_iv (ctx->super.algo, ctx->op.iv, ctx->static_iv, seq);
+ ctx->op.key_index = ctx->key_index;
ctx->op.n_chunks = 2;
ctx->op.chunk_index = 0;
@@ -201,7 +197,12 @@ ptls_vpp_crypto_aead_encrypt_final (ptls_aead_context_t * _ctx, void *_output)
static void
ptls_vpp_crypto_aead_dispose_crypto (ptls_aead_context_t * _ctx)
{
- /* Do nothing */
+ vlib_main_t *vm = vlib_get_main ();
+ struct vpp_aead_context_t *ctx = (struct vpp_aead_context_t *) _ctx;
+
+ clib_rwlock_writer_lock (&picotls_main.crypto_keys_rw_lock);
+ vnet_crypto_key_del (vm, ctx->key_index);
+ clib_rwlock_writer_unlock (&picotls_main.crypto_keys_rw_lock);
}
static int
@@ -213,23 +214,15 @@ ptls_vpp_crypto_aead_setup_crypto (ptls_aead_context_t *_ctx, int is_enc,
struct vpp_aead_context_t *ctx = (struct vpp_aead_context_t *) _ctx;
u16 key_len = ctx->super.algo->key_size;
- memset (&(ctx->op), 0, sizeof (vnet_crypto_op_t));
-
if (alg == VNET_CRYPTO_ALG_AES_128_GCM)
{
- if (is_enc)
- vnet_crypto_op_init (&(ctx->op), VNET_CRYPTO_OP_AES_128_GCM_ENC);
- else
- vnet_crypto_op_init (&(ctx->op), VNET_CRYPTO_OP_AES_128_GCM_DEC);
+ ctx->id = is_enc ? VNET_CRYPTO_OP_AES_128_GCM_ENC :
+ VNET_CRYPTO_OP_AES_128_GCM_DEC;
}
else if (alg == VNET_CRYPTO_ALG_AES_256_GCM)
{
- if (is_enc)
- {
- vnet_crypto_op_init (&(ctx->op), VNET_CRYPTO_OP_AES_256_GCM_ENC);
- }
- else
- vnet_crypto_op_init (&(ctx->op), VNET_CRYPTO_OP_AES_256_GCM_DEC);
+ ctx->id = is_enc ? VNET_CRYPTO_OP_AES_256_GCM_ENC :
+ VNET_CRYPTO_OP_AES_256_GCM_DEC;
}
else
{
@@ -239,18 +232,23 @@ ptls_vpp_crypto_aead_setup_crypto (ptls_aead_context_t *_ctx, int is_enc,
}
ctx->alg = alg;
+ ctx->chunk_index = 0;
+ clib_memcpy (ctx->static_iv, iv, ctx->super.algo->iv_size);
clib_rwlock_writer_lock (&picotls_main.crypto_keys_rw_lock);
- ctx->op.key_index =
- vnet_crypto_key_add (vm, ctx->alg, (void *) key, key_len);
+ ctx->key_index = vnet_crypto_key_add (vm, alg, (void *) key, key_len);
clib_rwlock_writer_unlock (&picotls_main.crypto_keys_rw_lock);
- ctx->chunk_index = 0;
- clib_memcpy (ctx->static_iv, iv, ctx->super.algo->iv_size);
- ctx->super.do_decrypt = ptls_vpp_crypto_aead_decrypt;
- ctx->super.do_encrypt_init = ptls_vpp_crypto_aead_encrypt_init;
- ctx->super.do_encrypt_update = ptls_vpp_crypto_aead_encrypt_update;
- ctx->super.do_encrypt_final = ptls_vpp_crypto_aead_encrypt_final;
+ if (is_enc)
+ {
+ ctx->super.do_encrypt_init = ptls_vpp_crypto_aead_encrypt_init;
+ ctx->super.do_encrypt_update = ptls_vpp_crypto_aead_encrypt_update;
+ ctx->super.do_encrypt_final = ptls_vpp_crypto_aead_encrypt_final;
+ }
+ else
+ {
+ ctx->super.do_decrypt = ptls_vpp_crypto_aead_decrypt;
+ }
ctx->super.dispose_crypto = ptls_vpp_crypto_aead_dispose_crypto;
return 0;
@@ -308,6 +306,7 @@ ptls_cipher_algorithm_t ptls_vpp_crypto_aes256ctr = {
ptls_vpp_crypto_aes256ctr_setup_crypto
};
+#define PTLS_X86_CACHE_LINE_ALIGN_BITS 6
ptls_aead_algorithm_t ptls_vpp_crypto_aes128gcm = {
"AES128-GCM",
PTLS_AESGCM_CONFIDENTIALITY_LIMIT,
@@ -317,6 +316,9 @@ ptls_aead_algorithm_t ptls_vpp_crypto_aes128gcm = {
PTLS_AES128_KEY_SIZE,
PTLS_AESGCM_IV_SIZE,
PTLS_AESGCM_TAG_SIZE,
+ { PTLS_TLS12_AESGCM_FIXED_IV_SIZE, PTLS_TLS12_AESGCM_RECORD_IV_SIZE },
+ 1,
+ PTLS_X86_CACHE_LINE_ALIGN_BITS,
sizeof (struct vpp_aead_context_t),
ptls_vpp_crypto_aead_aes128gcm_setup_crypto
};
@@ -330,6 +332,9 @@ ptls_aead_algorithm_t ptls_vpp_crypto_aes256gcm = {
PTLS_AES256_KEY_SIZE,
PTLS_AESGCM_IV_SIZE,
PTLS_AESGCM_TAG_SIZE,
+ { PTLS_TLS12_AESGCM_FIXED_IV_SIZE, PTLS_TLS12_AESGCM_RECORD_IV_SIZE },
+ 1,
+ PTLS_X86_CACHE_LINE_ALIGN_BITS,
sizeof (struct vpp_aead_context_t),
ptls_vpp_crypto_aead_aes256gcm_setup_crypto
};
diff --git a/src/plugins/tlspicotls/tls_picotls.c b/src/plugins/tlspicotls/tls_picotls.c
index ef02f66a552..7375b928206 100644
--- a/src/plugins/tlspicotls/tls_picotls.c
+++ b/src/plugins/tlspicotls/tls_picotls.c
@@ -27,11 +27,11 @@ static ptls_key_exchange_algorithm_t *default_key_exchange[] = {
static u32
picotls_ctx_alloc (void)
{
- u8 thread_id = vlib_get_thread_index ();
+ u32 thread_id = vlib_get_thread_index ();
picotls_main_t *pm = &picotls_main;
picotls_ctx_t **ctx;
- pool_get (pm->ctx_pool[thread_id], ctx);
+ pool_get_aligned_safe (pm->ctx_pool[thread_id], ctx, CLIB_CACHE_LINE_BYTES);
if (!(*ctx))
*ctx = clib_mem_alloc (sizeof (picotls_ctx_t));
@@ -48,7 +48,7 @@ picotls_ctx_free (tls_ctx_t * ctx)
{
picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx;
vec_free (ptls_ctx->rx_content);
- vec_free (ptls_ctx->write_content);
+ ptls_free (ptls_ctx->tls);
pool_put_index (picotls_main.ctx_pool[ctx->c_thread_index],
ptls_ctx->ptls_ctx_idx);
}
@@ -179,8 +179,7 @@ picotls_stop_listen (tls_ctx_t * lctx)
static void
picotls_handle_handshake_failure (tls_ctx_t * ctx)
{
- session_free (session_get (ctx->c_s_index, ctx->c_thread_index));
- ctx->no_app_session = 1;
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
ctx->c_s_index = SESSION_INVALID_INDEX;
tls_disconnect_transport (ctx);
}
@@ -200,13 +199,27 @@ picotls_transport_close (tls_ctx_t * ctx)
picotls_handle_handshake_failure (ctx);
return 0;
}
- picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx;
- ptls_free (ptls_ctx->tls);
session_transport_closing_notify (&ctx->connection);
return 0;
}
static int
+picotls_transport_reset (tls_ctx_t *ctx)
+{
+ if (!picotls_handshake_is_over (ctx))
+ {
+ picotls_handle_handshake_failure (ctx);
+ return 0;
+ }
+
+ session_transport_reset_notify (&ctx->connection);
+ session_transport_closed_notify (&ctx->connection);
+ tls_disconnect_transport (ctx);
+
+ return 0;
+}
+
+static int
picotls_app_close (tls_ctx_t * ctx)
{
session_t *app_session;
@@ -215,309 +228,435 @@ picotls_app_close (tls_ctx_t * ctx)
if (!svm_fifo_max_dequeue_cons (app_session->tx_fifo))
picotls_confirm_app_close (ctx);
else
- ctx->app_closed = 1;
+ ctx->flags |= TLS_CONN_F_APP_CLOSED;
return 0;
}
static inline int
-picotls_do_handshake (picotls_ctx_t * ptls_ctx, session_t * tls_session,
- u8 * input, int input_len)
+picotls_do_handshake (picotls_ctx_t *ptls_ctx, session_t *tcp_session)
{
+ int rv = PTLS_ERROR_IN_PROGRESS, write = 0, i = 0, read = 0, len;
+ svm_fifo_t *tcp_rx_fifo = tcp_session->rx_fifo;
+ ptls_buffer_t *buf = &ptls_ctx->read_buffer;
+ u32 n_segs = 2, max_len = 16384;
ptls_t *tls = ptls_ctx->tls;
- ptls_buffer_t buf;
- int rv = PTLS_ERROR_IN_PROGRESS;
- int write = 0, off;
+ svm_fifo_seg_t fs[n_segs];
+ uword deq_now;
+
+ ptls_buffer_init (buf, "", 0);
+
+ len = svm_fifo_segments (tcp_rx_fifo, 0, fs, &n_segs, max_len);
+ if (len <= 0)
+ return 0;
- do
+ while (read < len && i < n_segs)
{
- off = 0;
- do
+ deq_now = fs[i].len;
+ rv = ptls_handshake (tls, buf, fs[i].data, &deq_now, NULL);
+
+ write += picotls_try_handshake_write (ptls_ctx, tcp_session, buf);
+ read += deq_now;
+
+ if (!(rv == 0 || rv == PTLS_ERROR_IN_PROGRESS))
{
- ptls_buffer_init (&buf, "", 0);
- size_t consumed = input_len - off;
- rv = ptls_handshake (tls, &buf, input + off, &consumed, NULL);
- off += consumed;
- ptls_ctx->rx_offset += consumed;
- if ((rv == 0 || rv == PTLS_ERROR_IN_PROGRESS) && buf.off != 0)
- {
- write = picotls_try_handshake_write (ptls_ctx, tls_session,
- &buf);
- }
- ptls_buffer_dispose (&buf);
+ clib_error ("unexpected error %u", rv);
+ break;
}
- while (rv == PTLS_ERROR_IN_PROGRESS && input_len != off);
+
+ if (!rv)
+ break;
+
+ if (deq_now < fs[i].len)
+ {
+ fs[i].data += deq_now;
+ fs[i].len -= deq_now;
+ }
+ else
+ i++;
}
- while (rv == PTLS_ERROR_IN_PROGRESS);
+
+ if (read)
+ svm_fifo_dequeue_drop (tcp_rx_fifo, read);
+
+ ptls_buffer_dispose (buf);
return write;
}
static inline int
-picotls_ctx_read (tls_ctx_t * ctx, session_t * tls_session)
+ptls_copy_buf_to_fs (ptls_buffer_t *buf, u32 to_copy, svm_fifo_seg_t *fs,
+ u32 *fs_idx, u32 max_fs)
{
- picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx;
- int from_tls_len = 0, off, crypto_len, ret;
- u32 deq_max, deq_now;
- u32 enq_max;
- ptls_buffer_t *buf = &ptls_ctx->read_buffer;
- svm_fifo_t *tls_rx_fifo, *app_rx_fifo;
- session_t *app_session;
-
- tls_rx_fifo = tls_session->rx_fifo;
+ u32 idx = *fs_idx;
- if (!picotls_handshake_is_over (ctx))
+ while (to_copy)
{
- deq_max = svm_fifo_max_dequeue_cons (tls_rx_fifo);
- if (!deq_max)
- goto done_hs;
-
- vec_validate (ptls_ctx->rx_content, deq_max);
- ptls_ctx->rx_offset = 0;
- ptls_ctx->rx_len = 0;
+ if (fs[idx].len <= to_copy)
+ {
+ clib_memcpy_fast (fs[idx].data, buf->base + (buf->off - to_copy),
+ fs[idx].len);
+ to_copy -= fs[idx].len;
+ idx += 1;
+ /* no more space in the app's rx fifo */
+ if (idx == max_fs)
+ break;
+ }
+ else
+ {
+ clib_memcpy_fast (fs[idx].data, buf->base + (buf->off - to_copy),
+ to_copy);
+ fs[idx].len -= to_copy;
+ fs[idx].data += to_copy;
+ to_copy = 0;
+ }
+ }
- off = svm_fifo_dequeue (tls_rx_fifo, deq_max, TLS_RX_LEN (ptls_ctx));
- from_tls_len += off;
- ptls_ctx->rx_len += off;
+ *fs_idx = idx;
- picotls_do_handshake (ptls_ctx, tls_session, TLS_RX_OFFSET (ptls_ctx),
- from_tls_len);
- if (picotls_handshake_is_over (ctx))
- ret = ptls_is_server (ptls_ctx->tls) ?
- tls_notify_app_accept (ctx) :
- tls_notify_app_connected (ctx, SESSION_E_NONE);
+ return to_copy;
+}
- done_hs:
- if (!TLS_RX_IS_LEFT (ptls_ctx))
- return 0;
- }
+static u32
+ptls_tcp_to_app_write (picotls_ctx_t *ptls_ctx, svm_fifo_t *app_rx_fifo,
+ svm_fifo_t *tcp_rx_fifo)
+{
+ u32 ai = 0, thread_index, min_buf_len, to_copy, left, wrote = 0;
+ ptls_buffer_t *buf = &ptls_ctx->read_buffer;
+ int ret, i = 0, read = 0, tcp_len, n_fs_app;
+ u32 n_segs = 4, max_len = 1 << 16;
+ svm_fifo_seg_t tcp_fs[n_segs], app_fs[n_segs];
+ picotls_main_t *pm = &picotls_main;
+ uword deq_now;
+ u8 is_nocopy;
- app_session = session_get_from_handle (ctx->app_session_handle);
- app_rx_fifo = app_session->rx_fifo;
+ thread_index = ptls_ctx->ctx.c_thread_index;
- if (TLS_READ_IS_LEFT (ptls_ctx))
- goto enq_buf;
+ n_fs_app = svm_fifo_provision_chunks (app_rx_fifo, app_fs, n_segs, max_len);
+ if (n_fs_app <= 0)
+ return 0;
- ptls_buffer_init (buf, "", 0);
- ptls_ctx->read_buffer_offset = 0;
+ tcp_len = svm_fifo_segments (tcp_rx_fifo, 0, tcp_fs, &n_segs, max_len);
+ if (tcp_len <= 0)
+ return 0;
- if (!TLS_RX_IS_LEFT (ptls_ctx))
+ if (ptls_ctx->read_buffer_offset)
{
- deq_max = svm_fifo_max_dequeue_cons (tls_rx_fifo);
- if (!deq_max)
- goto app_fifo;
-
- deq_now = clib_min (deq_max, svm_fifo_max_read_chunk (tls_rx_fifo));
+ to_copy = buf->off - ptls_ctx->read_buffer_offset;
+ left = ptls_copy_buf_to_fs (buf, to_copy, app_fs, &ai, n_fs_app);
+ wrote += to_copy - left;
+ if (left)
+ {
+ ptls_ctx->read_buffer_offset = buf->off - left;
+ goto do_checks;
+ }
+ ptls_ctx->read_buffer_offset = 0;
+ }
- if (PREDICT_FALSE (deq_now < deq_max))
+ while (ai < n_fs_app && read < tcp_len)
+ {
+ deq_now = clib_min (tcp_fs[i].len, tcp_len - read);
+ min_buf_len = deq_now + (16 << 10);
+ is_nocopy = app_fs[ai].len < min_buf_len ? 0 : 1;
+ if (is_nocopy)
{
- off =
- svm_fifo_dequeue (tls_rx_fifo, deq_max, TLS_RX_LEN (ptls_ctx));
- from_tls_len += off;
- ptls_ctx->rx_len += off;
+ ptls_buffer_init (buf, app_fs[ai].data, app_fs[ai].len);
+ ret = ptls_receive (ptls_ctx->tls, buf, tcp_fs[i].data, &deq_now);
+ assert (ret == 0 || ret == PTLS_ERROR_IN_PROGRESS);
+
+ wrote += buf->off;
+ if (buf->off == app_fs[ai].len)
+ {
+ ai++;
+ }
+ else
+ {
+ app_fs[ai].len -= buf->off;
+ app_fs[ai].data += buf->off;
+ }
}
else
{
- ret =
- ptls_receive (ptls_ctx->tls, buf, svm_fifo_head (tls_rx_fifo),
- (size_t *) & deq_now);
- svm_fifo_dequeue_drop (tls_rx_fifo, deq_now);
- goto enq_buf;
- }
- }
+ vec_validate (pm->rx_bufs[thread_index], min_buf_len);
+ ptls_buffer_init (buf, pm->rx_bufs[thread_index], min_buf_len);
+ ret = ptls_receive (ptls_ctx->tls, buf, tcp_fs[i].data, &deq_now);
+ assert (ret == 0 || ret == PTLS_ERROR_IN_PROGRESS);
-app_fifo:
+ left = ptls_copy_buf_to_fs (buf, buf->off, app_fs, &ai, n_fs_app);
+ if (!left)
+ {
+ ptls_ctx->read_buffer_offset = 0;
+ wrote += buf->off;
+ }
+ else
+ {
+ ptls_ctx->read_buffer_offset = buf->off - left;
+ wrote += ptls_ctx->read_buffer_offset;
+ }
+ }
- enq_max = svm_fifo_max_enqueue_prod (app_rx_fifo);
- if (!enq_max)
- goto final;
+ assert (deq_now <= tcp_fs[i].len);
+ read += deq_now;
+ if (deq_now < tcp_fs[i].len)
+ {
+ tcp_fs[i].data += deq_now;
+ tcp_fs[i].len -= deq_now;
+ }
+ else
+ i++;
+ }
- crypto_len = clib_min (enq_max, TLS_RX_LEFT_LEN (ptls_ctx));
- off = 0;
+do_checks:
- do
+ if (read)
{
- size_t consumed = crypto_len - off;
- ret =
- ptls_receive (ptls_ctx->tls, buf,
- TLS_RX_OFFSET (ptls_ctx), &consumed);
- off += consumed;
- ptls_ctx->rx_offset += off;
+ svm_fifo_dequeue_drop (tcp_rx_fifo, read);
+ if (svm_fifo_needs_deq_ntf (tcp_rx_fifo, read))
+ {
+ svm_fifo_clear_deq_ntf (tcp_rx_fifo);
+ session_send_io_evt_to_thread (tcp_rx_fifo, SESSION_IO_EVT_RX);
+ }
}
- while (ret == 0 && off < crypto_len);
-enq_buf:
+ if (wrote)
+ svm_fifo_enqueue_nocopy (app_rx_fifo, wrote);
- off =
- svm_fifo_enqueue (app_rx_fifo, TLS_READ_LEFT_LEN (ptls_ctx),
- TLS_READ_OFFSET (ptls_ctx));
- if (off < 0)
- {
- tls_add_vpp_q_builtin_rx_evt (tls_session);
- return 0;
- }
+ return wrote;
+}
+
+static inline int
+picotls_ctx_read (tls_ctx_t *ctx, session_t *tcp_session)
+{
+ picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx;
+ svm_fifo_t *tcp_rx_fifo;
+ session_t *app_session;
+ int wrote;
- ptls_ctx->read_buffer_offset += off;
- if (!TLS_RX_IS_LEFT (ptls_ctx))
+ if (PREDICT_FALSE (!ptls_handshake_is_complete (ptls_ctx->tls)))
{
- ptls_ctx->rx_len = 0;
- ptls_ctx->rx_offset = 0;
+ picotls_do_handshake (ptls_ctx, tcp_session);
+ if (picotls_handshake_is_over (ctx))
+ {
+ if (ptls_is_server (ptls_ctx->tls))
+ {
+ if (tls_notify_app_accept (ctx))
+ {
+ ctx->c_s_index = SESSION_INVALID_INDEX;
+ tls_disconnect_transport (ctx);
+ return -1;
+ }
+ }
+ else
+ {
+ tls_notify_app_connected (ctx, SESSION_E_NONE);
+ }
+ }
+
+ ctx->flags |= TLS_CONN_F_HS_DONE;
+ if (!svm_fifo_max_dequeue (tcp_session->rx_fifo))
+ return 0;
}
-final:
- ptls_buffer_dispose (buf);
+ tcp_rx_fifo = tcp_session->rx_fifo;
+ app_session = session_get_from_handle (ctx->app_session_handle);
+ wrote = ptls_tcp_to_app_write (ptls_ctx, app_session->rx_fifo, tcp_rx_fifo);
- if (app_session->session_state >= SESSION_STATE_READY)
+ if (wrote)
tls_notify_app_enqueue (ctx, app_session);
- if (TLS_RX_IS_LEFT (ptls_ctx) || TLS_READ_IS_LEFT (ptls_ctx)
- || svm_fifo_max_dequeue (tls_rx_fifo))
- tls_add_vpp_q_builtin_rx_evt (tls_session);
+ if (ptls_ctx->read_buffer_offset || svm_fifo_max_dequeue (tcp_rx_fifo))
+ tls_add_vpp_q_builtin_rx_evt (tcp_session);
- return from_tls_len;
+ return wrote;
}
-static inline int
-picotls_content_process (picotls_ctx_t * ptls_ctx, svm_fifo_t * src_fifo,
- svm_fifo_t * dst_fifo, int content_len,
- int total_record_overhead, int is_no_copy)
+static inline u32
+ptls_compute_deq_len (picotls_ctx_t *ptls_ctx, u32 dst_chunk, u32 src_chunk,
+ u32 dst_space, u8 *is_nocopy)
{
- ptls_buffer_t *buf = &ptls_ctx->write_buffer;
- int total_length = content_len + total_record_overhead;
- int to_dst_len;
- if (is_no_copy)
- {
- ptls_buffer_init (buf, svm_fifo_tail (dst_fifo), total_length);
- ptls_send (ptls_ctx->tls, buf, svm_fifo_head (src_fifo), content_len);
-
- assert (!buf->is_allocated);
- assert (buf->base == svm_fifo_tail (dst_fifo));
+ int record_overhead = ptls_get_record_overhead (ptls_ctx->tls);
+ int num_records;
+ u32 deq_len, total_overhead;
- svm_fifo_dequeue_drop (src_fifo, content_len);
- svm_fifo_enqueue_nocopy (dst_fifo, buf->off);
- to_dst_len = buf->off;
+ if (dst_chunk >= clib_min (8192, src_chunk + record_overhead))
+ {
+ *is_nocopy = 1;
+ deq_len = clib_min (src_chunk, dst_chunk);
+ num_records = ceil ((f64) deq_len / PTLS_MAX_PLAINTEXT_RECORD_SIZE);
+ total_overhead = num_records * record_overhead;
+ if (deq_len + total_overhead > dst_chunk)
+ deq_len = dst_chunk - total_overhead;
}
else
{
- assert (!TLS_WRITE_IS_LEFT (ptls_ctx));
- vec_validate (ptls_ctx->write_content, total_length);
- ptls_buffer_init (buf, ptls_ctx->write_content, total_length);
-
- ptls_send (ptls_ctx->tls, buf, svm_fifo_head (src_fifo), content_len);
- svm_fifo_dequeue_drop (src_fifo, content_len);
-
- to_dst_len = svm_fifo_enqueue (dst_fifo, buf->off, buf->base);
+ deq_len = clib_min (src_chunk, dst_space);
+ num_records = ceil ((f64) deq_len / PTLS_MAX_PLAINTEXT_RECORD_SIZE);
+ total_overhead = num_records * record_overhead;
+ if (deq_len + total_overhead > dst_space)
+ deq_len = dst_space - total_overhead;
}
- ptls_ctx->write_buffer_offset += to_dst_len;
- return to_dst_len;
+
+ return deq_len;
}
-static inline int
-picotls_ctx_write (tls_ctx_t * ctx, session_t * app_session,
- transport_send_params_t * sp)
+static u32
+ptls_app_to_tcp_write (picotls_ctx_t *ptls_ctx, session_t *app_session,
+ svm_fifo_t *tcp_tx_fifo, u32 max_len)
{
- picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx;
- u32 deq_max, deq_now;
- u32 enq_max, enq_now;
- int from_app_len = 0, to_tls_len = 0, is_nocopy = 0;
- svm_fifo_t *tls_tx_fifo, *app_tx_fifo;
- session_t *tls_session;
-
- int record_overhead = ptls_get_record_overhead (ptls_ctx->tls);
- int num_records, total_overhead;
+ u32 wrote = 0, max_enq, thread_index, app_buf_len, left, ti = 0;
+ int read = 0, rv, i = 0, len, n_tcp_segs = 4, deq_len;
+ u32 n_app_segs = 2, min_chunk = 2048;
+ svm_fifo_seg_t app_fs[n_app_segs], tcp_fs[n_tcp_segs];
+ picotls_main_t *pm = &picotls_main;
+ ptls_buffer_t _buf, *buf = &_buf;
+ svm_fifo_t *app_tx_fifo;
+ u8 is_nocopy, *app_buf;
+ u32 first_chunk_len;
- tls_session = session_get_from_handle (ctx->tls_session_handle);
- tls_tx_fifo = tls_session->tx_fifo;
+ thread_index = app_session->thread_index;
app_tx_fifo = app_session->tx_fifo;
- if (PREDICT_FALSE (TLS_WRITE_IS_LEFT (ptls_ctx)))
+ len = svm_fifo_segments (app_tx_fifo, 0, app_fs, &n_app_segs, max_len);
+ if (len <= 0)
+ return 0;
+
+ n_tcp_segs = svm_fifo_provision_chunks (tcp_tx_fifo, tcp_fs, n_tcp_segs,
+ 1000 + max_len);
+ if (n_tcp_segs <= 0)
+ return 0;
+
+ while ((left = len - read) && ti < n_tcp_segs)
{
- enq_max = svm_fifo_max_enqueue_prod (tls_tx_fifo);
- int to_write = clib_min (enq_max,
- ptls_ctx->write_buffer.off -
- ptls_ctx->write_buffer_offset);
- to_tls_len =
- svm_fifo_enqueue (tls_tx_fifo, to_write, TLS_WRITE_OFFSET (ptls_ctx));
- if (to_tls_len < 0)
- {
- app_session->flags |= SESSION_F_CUSTOM_TX;
- return 0;
- }
- ptls_ctx->write_buffer_offset += to_tls_len;
+ /* If we wrote something and are left with few bytes, postpone write
+ * as we may be able to encrypt a bigger chunk next time */
+ if (wrote && left < min_chunk)
+ break;
- if (TLS_WRITE_IS_LEFT (ptls_ctx))
+ /* Avoid short records if possible */
+ if (app_fs[i].len < min_chunk && min_chunk < left)
{
- app_session->flags |= SESSION_F_CUSTOM_TX;
- return to_tls_len;
+ app_buf_len = app_fs[i].len + app_fs[i + 1].len;
+ app_buf = pm->rx_bufs[thread_index];
+ vec_validate (pm->rx_bufs[thread_index], app_buf_len);
+ clib_memcpy_fast (pm->rx_bufs[thread_index], app_fs[i].data,
+ app_fs[i].len);
+ clib_memcpy_fast (pm->rx_bufs[thread_index] + app_fs[i].len,
+ app_fs[i + 1].data, app_buf_len - app_fs[i].len);
+ first_chunk_len = app_fs[i].len;
+ i += 1;
}
else
{
- ptls_buffer_init (&ptls_ctx->write_buffer, "", 0);
- ptls_ctx->write_buffer_offset = 0;
+ app_buf = app_fs[i].data;
+ app_buf_len = app_fs[i].len;
+ first_chunk_len = 0;
}
- }
+ is_nocopy = 0;
+ max_enq = tcp_fs[ti].len;
+ max_enq += ti < (n_tcp_segs - 1) ? tcp_fs[ti + 1].len : 0;
- deq_max = svm_fifo_max_dequeue_cons (app_tx_fifo);
- if (!deq_max)
- return deq_max;
+ deq_len = ptls_compute_deq_len (ptls_ctx, tcp_fs[ti].len, app_buf_len,
+ max_enq, &is_nocopy);
+ if (is_nocopy)
+ {
+ ptls_buffer_init (buf, tcp_fs[ti].data, tcp_fs[ti].len);
+ rv = ptls_send (ptls_ctx->tls, buf, app_buf, deq_len);
- deq_now = clib_min (deq_max, sp->max_burst_size);
- deq_now = clib_min (deq_now, svm_fifo_max_read_chunk (app_tx_fifo));
+ assert (rv == 0);
+ wrote += buf->off;
- enq_max = svm_fifo_max_enqueue_prod (tls_tx_fifo);
- /** There is no engough enqueue space for one record **/
- if (enq_max <= record_overhead)
- {
- app_session->flags |= SESSION_F_CUSTOM_TX;
- return 0;
- }
+ tcp_fs[ti].len -= buf->off;
+ tcp_fs[ti].data += buf->off;
+ if (!tcp_fs[ti].len)
+ ti += 1;
+ }
+ else
+ {
+ vec_validate (pm->tx_bufs[thread_index], max_enq);
+ ptls_buffer_init (buf, pm->tx_bufs[thread_index], max_enq);
+ rv = ptls_send (ptls_ctx->tls, buf, app_buf, deq_len);
- enq_now = clib_min (enq_max, svm_fifo_max_write_chunk (tls_tx_fifo));
+ assert (rv == 0);
+ wrote += buf->off;
- /** Allowed to execute no-copy crypto operation **/
- if (enq_now > record_overhead)
- {
- is_nocopy = 1;
- from_app_len = clib_min (deq_now, enq_now);
- num_records =
- ceil ((f64) from_app_len / PTLS_MAX_PLAINTEXT_RECORD_SIZE);
- total_overhead = num_records * record_overhead;
- if (from_app_len + total_overhead > enq_now)
- from_app_len = enq_now - total_overhead;
+ left = ptls_copy_buf_to_fs (buf, buf->off, tcp_fs, &ti, n_tcp_segs);
+ assert (left == 0);
+ }
+
+ read += deq_len;
+ ASSERT (deq_len >= first_chunk_len);
+
+ if (deq_len == app_buf_len)
+ {
+ i += 1;
+ }
+ else
+ {
+ app_fs[i].len -= deq_len - first_chunk_len;
+ app_fs[i].data += deq_len - first_chunk_len;
+ }
}
- else
+
+ if (read)
{
- from_app_len = clib_min (deq_now, enq_max);
- num_records =
- ceil ((f64) from_app_len / PTLS_MAX_PLAINTEXT_RECORD_SIZE);
- total_overhead = num_records * record_overhead;
- if (from_app_len + total_overhead > enq_max)
- from_app_len = enq_max - total_overhead;
+ svm_fifo_dequeue_drop (app_tx_fifo, read);
+ if (svm_fifo_needs_deq_ntf (app_tx_fifo, read))
+ session_dequeue_notify (app_session);
}
- to_tls_len =
- picotls_content_process (ptls_ctx, app_tx_fifo, tls_tx_fifo,
- from_app_len, total_overhead, is_nocopy);
- if (!TLS_WRITE_IS_LEFT (ptls_ctx))
+ if (wrote)
{
- ptls_ctx->write_buffer_offset = 0;
- ptls_buffer_init (&ptls_ctx->write_buffer, "", 0);
+ svm_fifo_enqueue_nocopy (tcp_tx_fifo, wrote);
+ if (svm_fifo_set_event (tcp_tx_fifo))
+ session_send_io_evt_to_thread (tcp_tx_fifo, SESSION_IO_EVT_TX);
}
- if (svm_fifo_needs_deq_ntf (app_tx_fifo, from_app_len))
- session_dequeue_notify (app_session);
+ return wrote;
+}
+
+static inline int
+picotls_ctx_write (tls_ctx_t *ctx, session_t *app_session,
+ transport_send_params_t *sp)
+{
+ picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx;
+ u32 deq_max, deq_now, enq_max, enq_buf, wrote = 0;
+ svm_fifo_t *tcp_tx_fifo;
+ session_t *tcp_session;
- if (to_tls_len)
- tls_add_vpp_q_tx_evt (tls_session);
+ tcp_session = session_get_from_handle (ctx->tls_session_handle);
+ tcp_tx_fifo = tcp_session->tx_fifo;
- if (from_app_len < deq_max || TLS_WRITE_IS_LEFT (ptls_ctx))
- app_session->flags |= SESSION_F_CUSTOM_TX;
+ enq_max = svm_fifo_max_enqueue_prod (tcp_tx_fifo);
+ if (enq_max < 2048)
+ goto check_tls_fifo;
+
+ deq_max = svm_fifo_max_dequeue_cons (app_session->tx_fifo);
+ deq_max = clib_min (deq_max, enq_max);
+ if (!deq_max)
+ goto check_tls_fifo;
+
+ deq_now = clib_min (deq_max, sp->max_burst_size);
+ wrote = ptls_app_to_tcp_write (ptls_ctx, app_session, tcp_tx_fifo, deq_now);
+
+check_tls_fifo:
- if (ctx->app_closed)
+ if (ctx->flags & TLS_CONN_F_APP_CLOSED)
picotls_app_close (ctx);
- return to_tls_len;
+ /* Deschedule and wait for deq notification if fifo is almost full */
+ enq_buf = clib_min (svm_fifo_size (tcp_tx_fifo) / 2, TLSP_MIN_ENQ_SPACE);
+ if (enq_max < wrote + enq_buf)
+ {
+ svm_fifo_add_want_deq_ntf (tcp_tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ transport_connection_deschedule (&ctx->connection);
+ sp->flags |= TRANSPORT_SND_F_DESCHED;
+ }
+ else
+ /* Request tx reschedule of the app session */
+ app_session->flags |= SESSION_F_CUSTOM_TX;
+
+ return wrote;
}
static int
@@ -538,7 +677,6 @@ picotls_ctx_init_server (tls_ctx_t * ctx)
ptls_ctx->rx_len = 0;
ptls_ctx->rx_offset = 0;
- ptls_ctx->write_buffer_offset = 0;
return 0;
}
@@ -562,7 +700,6 @@ picotls_ctx_init_client (tls_ctx_t *ctx)
ptls_ctx->rx_len = 0;
ptls_ctx->rx_offset = 0;
- ptls_ctx->write_buffer_offset = 0;
ptls_buffer_init (&hs_buf, "", 0);
if (ptls_handshake (ptls_ctx->tls, &hs_buf, NULL, NULL, &hsprop) !=
@@ -601,6 +738,13 @@ picotls_init_client_ptls_ctx (ptls_context_t **client_ptls_ctx)
return 0;
}
+int
+picotls_reinit_ca_chain (void)
+{
+ /* Not supported yet */
+ return 0;
+}
+
const static tls_engine_vft_t picotls_engine = {
.ctx_alloc = picotls_ctx_alloc,
.ctx_free = picotls_ctx_free,
@@ -614,7 +758,9 @@ const static tls_engine_vft_t picotls_engine = {
.ctx_read = picotls_ctx_read,
.ctx_write = picotls_ctx_write,
.ctx_transport_close = picotls_transport_close,
+ .ctx_transport_reset = picotls_transport_reset,
.ctx_app_close = picotls_app_close,
+ .ctx_reinit_cachain = picotls_reinit_ca_chain,
};
static clib_error_t *
@@ -628,6 +774,8 @@ tls_picotls_init (vlib_main_t * vm)
num_threads = 1 + vtm->n_threads;
vec_validate (pm->ctx_pool, num_threads - 1);
+ vec_validate (pm->rx_bufs, num_threads - 1);
+ vec_validate (pm->tx_bufs, num_threads - 1);
clib_rwlock_init (&picotls_main.crypto_keys_rw_lock);
@@ -638,18 +786,14 @@ tls_picotls_init (vlib_main_t * vm)
return error;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (tls_picotls_init) = {
.runs_after = VLIB_INITS ("tls_init"),
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Transport Layer Security (TLS) Engine, Picotls Based",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/tlspicotls/tls_picotls.h b/src/plugins/tlspicotls/tls_picotls.h
index 29b279c7a83..d24d7e2f172 100644
--- a/src/plugins/tlspicotls/tls_picotls.h
+++ b/src/plugins/tlspicotls/tls_picotls.h
@@ -16,9 +16,7 @@
#define TLS_READ_IS_LEFT(x) ((x)->read_buffer.off != 0 && (x)->read_buffer.off != (x)->read_buffer_offset)
#define TLS_READ_LEFT_LEN(x) ((x)->read_buffer.off - (x)->read_buffer_offset)
-#define TLS_WRITE_OFFSET(x) ((x)->write_buffer.base + (x)->write_buffer_offset)
-#define TLS_WRITE_IS_LEFT(x) ((x)->write_buffer.off != 0 && (x)->write_buffer.off != (x)->write_buffer_offset)
-
+#define TLSP_MIN_ENQ_SPACE (1 << 16)
typedef struct tls_ctx_picotls_
{
@@ -29,10 +27,7 @@ typedef struct tls_ctx_picotls_
int rx_offset;
int rx_len;
ptls_buffer_t read_buffer;
- ptls_buffer_t write_buffer;
- uint8_t *write_content;
int read_buffer_offset;
- int write_buffer_offset;
} picotls_ctx_t;
typedef struct tls_listen_ctx_picotls_
@@ -45,6 +40,8 @@ typedef struct picotls_main_
{
picotls_ctx_t ***ctx_pool;
picotls_listen_ctx_t *lctx_pool;
+ u8 **tx_bufs;
+ u8 **rx_bufs;
ptls_context_t *client_ptls_ctx;
clib_rwlock_t crypto_keys_rw_lock;
} picotls_main_t;
diff --git a/src/plugins/tracedump/CMakeLists.txt b/src/plugins/tracedump/CMakeLists.txt
index 7860d95bc11..6dffdedcc81 100644
--- a/src/plugins/tracedump/CMakeLists.txt
+++ b/src/plugins/tracedump/CMakeLists.txt
@@ -26,7 +26,7 @@ add_vpp_plugin(tracedump
API_TEST_SOURCES
graph_test.c
tracedump_test.c
-)
-# API_TEST_SOURCES
-# tracedump_test.c
+ COMPONENT
+ vpp-plugin-devtools
+)
diff --git a/src/plugins/tracedump/graph_api.c b/src/plugins/tracedump/graph_api.c
index 0626f7d6b42..20eb1b920fc 100644
--- a/src/plugins/tracedump/graph_api.c
+++ b/src/plugins/tracedump/graph_api.c
@@ -252,7 +252,7 @@ graph_api_hookup (vlib_main_t * vm)
gmp->msg_id_base = setup_message_id_table ();
- am->is_mp_safe[gmp->msg_id_base + VL_API_GRAPH_NODE_GET] = 1;
+ vl_api_set_msg_thread_safe (am, gmp->msg_id_base + VL_API_GRAPH_NODE_GET, 1);
return 0;
}
diff --git a/src/plugins/tracedump/graph_cli.c b/src/plugins/tracedump/graph_cli.c
index 2440295a1a7..6af4706f87d 100644
--- a/src/plugins/tracedump/graph_cli.c
+++ b/src/plugins/tracedump/graph_cli.c
@@ -16,7 +16,11 @@
*/
#include <sys/socket.h>
+#ifdef __linux__
#include <linux/if.h>
+#else
+#include <net/if.h>
+#endif /* __linux__ */
#include <vnet/vnet.h>
#include <vnet/plugin/plugin.h>
@@ -75,9 +79,9 @@ graph_node_show_cmd (vlib_main_t * vm,
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "node %d", &index))
- n = vlib_get_node (vm, index);
- else if (unformat (input, "node %v", &name))
- n = vlib_get_node_by_name (vm, name);
+ n = vlib_get_node (vm, index);
+ else if (unformat (input, "node %s", &name))
+ n = vlib_get_node_by_name (vm, name);
else if (unformat (input, "want_arcs"))
want_arcs = true;
@@ -132,13 +136,11 @@ graph_node_show_cmd (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (graph_node_show_command, static) = {
.path = "show graph",
.short_help = "show graph [node <index>|<name>] [want_arcs] [input|trace_supported] [drop] [output] [punt] [handoff] [no_free] [polling] [interrupt]",
.function = graph_node_show_cmd,
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/tracedump/graph_test.c b/src/plugins/tracedump/graph_test.c
index 79e1df61c5f..37dfbcdcaa0 100644
--- a/src/plugins/tracedump/graph_test.c
+++ b/src/plugins/tracedump/graph_test.c
@@ -27,7 +27,7 @@
#include <vnet/format_fns.h>
#include <tracedump/graph.api_enum.h>
#include <tracedump/graph.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
diff --git a/src/plugins/tracedump/setup.pg b/src/plugins/tracedump/setup.pg
index 91d5ebe2d2a..1ebdb0dd7f0 100644
--- a/src/plugins/tracedump/setup.pg
+++ b/src/plugins/tracedump/setup.pg
@@ -1,33 +1,33 @@
set term pag off
-packet-generator new {
- name worker0
- worker 0
- limit 12
- rate 1.2e7
- size 128-128
- interface local0
- node ethernet-input
- data { IP4: 1.2.40 -> 3cfd.fed0.b6c8
- UDP: 192.168.40.1 - 192.168.40.100 -> 192.168.50.10
- UDP: 1234 -> 2345
- incrementing 114
- }
+packet-generator new { \
+ name worker0 \
+ worker 0 \
+ limit 12 \
+ rate 1.2e7 \
+ size 128-128 \
+ interface local0 \
+ node ethernet-input \
+ data { IP4: 1.2.40 -> 3cfd.fed0.b6c8 \
+ UDP: 192.168.40.1 - 192.168.40.100 -> 192.168.50.10 \
+ UDP: 1234 -> 2345 \
+ incrementing 114 \
+ } \
}
-packet-generator new {
- name worker1
- worker 1
- limit 12
- rate 1.2e7
- size 128-128
- interface local0
- node ethernet-input
- data { IP4: 1.2.4 -> 3cfd.fed0.b6c9
- UDP: 192.168.41.1 - 192.168.41.100 -> 192.168.51.10
- UDP: 1234 -> 2345
- incrementing 114
- }
+packet-generator new { \
+ name worker1 \
+ worker 1 \
+ limit 12 \
+ rate 1.2e7 \
+ size 128-128 \
+ interface local0 \
+ node ethernet-input \
+ data { IP4: 1.2.4 -> 3cfd.fed0.b6c9 \
+ UDP: 192.168.41.1 - 192.168.41.100 -> 192.168.51.10 \
+ UDP: 1234 -> 2345 \
+ incrementing 114 \
+ } \
}
trace add pg-input 20
diff --git a/src/plugins/tracedump/tracedump.api b/src/plugins/tracedump/tracedump.api
index 540b0664074..1b3813fb184 100644
--- a/src/plugins/tracedump/tracedump.api
+++ b/src/plugins/tracedump/tracedump.api
@@ -25,7 +25,7 @@
*/
-option version = "0.1.0";
+option version = "0.2.0";
enum trace_filter_flag : u32
{
@@ -147,3 +147,69 @@ define trace_details {
u32 packet_number;
string trace_data[];
};
+
+/** \brief trace_clear_cache
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+autoreply define trace_clear_cache {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief trace_v2_dump
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param thread_id - specific thread to dump from, ~0 to dump from all
+ @param position - position of the first packet to dump in the per thread cache, ~0 to only clear the cache
+ @param max - maximum of packets to dump from each thread
+ @param clear_cache - dispose of any cached data before we begin
+*/
+define trace_v2_dump {
+ u32 client_index;
+ u32 context;
+
+ u32 thread_id [default=0xffffffff];
+ u32 position;
+ u32 max [default=50];
+ bool clear_cache;
+
+ option vat_help = "trace_v2_dump [thread_id <tid>] [position <pos>] [max <max>]";
+};
+
+/** \brief trace_v2_details
+ @param context - sender context, to match reply w/ request
+ @param thread_id - thread index from which the packet come from
+ @param position - position of the packet in its thread cache
+ @param more - true if there is still more packets to dump for this thread
+ @param trace_data - string packet data
+*/
+define trace_v2_details {
+ u32 context;
+
+ u32 thread_id;
+ u32 position;
+ bool more;
+
+ string trace_data[];
+};
+
+autoreply define trace_set_filter_function
+{
+ u32 client_index;
+ u32 context;
+
+ string filter_function_name[];
+};
+
+define trace_filter_function_dump {
+ u32 client_index;
+ u32 context;
+};
+
+define trace_filter_function_details {
+ u32 context;
+
+ bool selected;
+ string name[];
+}; \ No newline at end of file
diff --git a/src/plugins/tracedump/tracedump.c b/src/plugins/tracedump/tracedump.c
index f1073fe247d..6a26865c1f0 100644
--- a/src/plugins/tracedump/tracedump.c
+++ b/src/plugins/tracedump/tracedump.c
@@ -213,12 +213,15 @@ vl_api_trace_dump_t_handler (vl_api_trace_dump_t * mp)
iterator_position = clib_net_to_host_u32 (mp->position);
max_records = clib_net_to_host_u32 (mp->max_records);
- /* Don't overflow the existing queue space. */
- svm_queue_t *q = rp->vl_input_queue;
- u32 queue_slots_available = q->maxsize - q->cursize;
- int chunk = (queue_slots_available > 0) ? queue_slots_available - 1 : 0;
- if (chunk < max_records)
- max_records = chunk;
+ /* Don't overflow the existing queue space for shared memory API clients. */
+ if (rp->vl_input_queue)
+ {
+ svm_queue_t *q = rp->vl_input_queue;
+ u32 queue_slots_available = q->maxsize - q->cursize;
+ int chunk = (queue_slots_available > 0) ? queue_slots_available - 1 : 0;
+ if (chunk < max_records)
+ max_records = chunk;
+ }
/* Need a fresh cache for this client? */
if (vec_len (client_trace_cache) == 0
@@ -285,9 +288,9 @@ vl_api_trace_dump_t_handler (vl_api_trace_dump_t * mp)
{
/* More threads, but not more in this thread? */
if (j == (vec_len (client_trace_cache[i]) - 1))
- dmp->more_threads = 1;
+ last_more_threads = dmp->more_threads = 1;
else
- dmp->more_this_thread = 1;
+ last_more_this_thread = dmp->more_this_thread = 1;
}
/* Done, may or may not be at the end of a batch. */
dmp->done = 0;
@@ -332,6 +335,199 @@ doublebreak:;
vec_free (s);
}
+/* API message handler */
+static void
+vl_api_trace_v2_dump_t_handler (vl_api_trace_v2_dump_t *mp)
+{
+ vl_api_registration_t *rp;
+ vl_api_trace_v2_details_t *dmp;
+ tracedump_main_t *tdmp = &tracedump_main;
+ vlib_trace_header_t ***client_trace_cache, **th;
+ int i, j;
+ u32 client_index;
+ u32 first_position, max, first_thread_id, last_thread_id;
+ u32 n_threads = vlib_get_n_threads ();
+ u8 *s = 0;
+
+ rp = vl_api_client_index_to_registration (mp->client_index);
+ if (rp == 0)
+ return;
+
+ client_index = rp->vl_api_registration_pool_index;
+
+ vec_validate_init_empty (tdmp->traces, client_index, 0);
+
+ client_trace_cache = tdmp->traces[client_index];
+
+ if (mp->clear_cache)
+ {
+ toss_client_cache (tdmp, client_index, client_trace_cache);
+ client_trace_cache = 0;
+ }
+
+ /* Now, where were we? */
+ first_thread_id = last_thread_id = clib_net_to_host_u32 (mp->thread_id);
+ first_position = clib_net_to_host_u32 (mp->position);
+ max = clib_net_to_host_u32 (mp->max);
+
+ if (first_thread_id == ~0)
+ {
+ first_thread_id = 0;
+ last_thread_id = n_threads - 1;
+ }
+
+ /* Don't overflow the existing queue space for shared memory API clients. */
+ if (rp->vl_input_queue)
+ {
+ svm_queue_t *q = rp->vl_input_queue;
+ u32 queue_slots_available = q->maxsize - q->cursize;
+ int chunk = (queue_slots_available > 0) ? queue_slots_available - 1 : 0;
+ /* split available slots among requested threads */
+ if (chunk < max * (last_thread_id - first_thread_id + 1))
+ max = chunk / (last_thread_id - first_thread_id + 1);
+ }
+
+ /* Need a fresh cache for this client? */
+ if (vec_len (client_trace_cache) == 0 && first_position != ~0)
+ {
+ vlib_worker_thread_barrier_sync (vlib_get_first_main ());
+
+ /* Make a slot for each worker thread */
+ vec_validate (client_trace_cache, n_threads - 1);
+ i = 0;
+
+ foreach_vlib_main ()
+ {
+ vlib_trace_main_t *tm = &this_vlib_main->trace_main;
+
+ /* Filter as directed */
+ trace_apply_filter (this_vlib_main);
+
+ pool_foreach (th, tm->trace_buffer_pool)
+ {
+ vec_add1 (client_trace_cache[i], th[0]);
+ }
+
+ /* Sort them by increasing time. */
+ if (vec_len (client_trace_cache[i]))
+ vec_sort_with_function (client_trace_cache[i], trace_cmp);
+
+ i++;
+ }
+ vlib_worker_thread_barrier_release (vlib_get_first_main ());
+ }
+
+ /* Save the cache, one way or the other */
+ tdmp->traces[client_index] = client_trace_cache;
+
+ for (i = first_thread_id;
+ i <= last_thread_id && i < vec_len (client_trace_cache); i++)
+ {
+ // dump a number of 'max' packets per thead
+ for (j = first_position;
+ j < vec_len (client_trace_cache[i]) && j < first_position + max;
+ j++)
+ {
+ th = &client_trace_cache[i][j];
+
+ vec_reset_length (s);
+
+ s =
+ format (s, "%U", format_vlib_trace, vlib_get_first_main (), th[0]);
+
+ dmp = vl_msg_api_alloc (sizeof (*dmp) + vec_len (s));
+ dmp->_vl_msg_id =
+ htons (VL_API_TRACE_V2_DETAILS + (tdmp->msg_id_base));
+ dmp->context = mp->context;
+ dmp->thread_id = ntohl (i);
+ dmp->position = ntohl (j);
+ dmp->more = j < vec_len (client_trace_cache[i]) - 1;
+ vl_api_vec_to_api_string (s, &dmp->trace_data);
+
+ vl_api_send_msg (rp, (u8 *) dmp);
+ }
+ }
+
+ vec_free (s);
+}
+
+static void
+vl_api_trace_clear_cache_t_handler (vl_api_trace_clear_cache_t *mp)
+{
+ vl_api_registration_t *rp;
+ tracedump_main_t *tdmp = &tracedump_main;
+ vlib_trace_header_t ***client_trace_cache;
+ vl_api_trace_clear_cache_reply_t *rmp;
+ u32 client_index;
+
+ rp = vl_api_client_index_to_registration (mp->client_index);
+ if (rp == 0)
+ return;
+
+ client_index = rp->vl_api_registration_pool_index;
+ vec_validate_init_empty (tdmp->traces, client_index, 0);
+ client_trace_cache = tdmp->traces[client_index];
+ toss_client_cache (tdmp, client_index, client_trace_cache);
+
+ int rv = 0;
+ REPLY_MACRO (VL_API_TRACE_CLEAR_CACHE_REPLY);
+}
+
+static void
+vl_api_trace_set_filter_function_t_handler (
+ vl_api_trace_set_filter_function_t *mp)
+{
+ vl_api_trace_set_filter_function_reply_t *rmp;
+ tracedump_main_t *tdmp = &tracedump_main;
+ unformat_input_t input = { 0 };
+ vlib_is_packet_traced_fn_t *f;
+ char *filter_name;
+ int rv = 0;
+ filter_name = vl_api_from_api_to_new_c_string (&mp->filter_function_name);
+ unformat_init_cstring (&input, filter_name);
+ if (unformat (&input, "%U", unformat_vlib_trace_filter_function, &f) == 0)
+ {
+ rv = -1;
+ goto done;
+ }
+ vlib_set_trace_filter_function (f);
+done:
+ unformat_free (&input);
+ vec_free (filter_name);
+ REPLY_MACRO (VL_API_TRACE_SET_FILTER_FUNCTION_REPLY);
+}
+
+static void
+vl_api_trace_filter_function_dump_t_handler (
+ vl_api_trace_filter_function_dump_t *mp)
+{
+ vl_api_registration_t *rp;
+ vl_api_trace_filter_function_details_t *dmp;
+ tracedump_main_t *tdmp = &tracedump_main;
+ vlib_trace_filter_main_t *tfm = &vlib_trace_filter_main;
+ vlib_trace_filter_function_registration_t *reg =
+ tfm->trace_filter_registration;
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_is_packet_traced_fn_t *current =
+ vm->trace_main.current_trace_filter_function;
+ rp = vl_api_client_index_to_registration (mp->client_index);
+
+ if (rp == 0)
+ return;
+
+ while (reg)
+ {
+ dmp = vl_msg_api_alloc (sizeof (*dmp) + strlen (reg->name));
+ dmp->_vl_msg_id =
+ htons (VL_API_TRACE_FILTER_FUNCTION_DETAILS + (tdmp->msg_id_base));
+ dmp->context = mp->context;
+ vl_api_c_string_to_api_string (reg->name, &dmp->name);
+ dmp->selected = current == reg->function;
+ vl_api_send_msg (rp, (u8 *) dmp);
+ reg = reg->next;
+ }
+}
+
/* API definitions */
#include <tracedump/tracedump.api.c>
@@ -349,19 +545,18 @@ tracedump_init (vlib_main_t * vm)
/* Add our API messages to the global name_crc hash table */
tdmp->msg_id_base = setup_message_id_table ();
- am->is_mp_safe[tdmp->msg_id_base + VL_API_TRACE_DUMP] = 1;
+ vl_api_set_msg_thread_safe (am, tdmp->msg_id_base + VL_API_TRACE_DUMP, 1);
+ vl_api_set_msg_thread_safe (am, tdmp->msg_id_base + VL_API_TRACE_V2_DUMP, 1);
return error;
}
VLIB_INIT_FUNCTION (tracedump_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Streaming packet trace dump plugin",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/tracedump/tracedump_test.c b/src/plugins/tracedump/tracedump_test.c
index 3bf50efb4ac..b813acc3ecc 100644
--- a/src/plugins/tracedump/tracedump_test.c
+++ b/src/plugins/tracedump/tracedump_test.c
@@ -155,6 +155,18 @@ vl_api_trace_details_t_handler (vl_api_trace_details_t * dmp)
packet_number, vl_api_format_string, (&dmp->trace_data));
}
+static void
+vl_api_trace_v2_details_t_handler (vl_api_trace_v2_details_t *dmp)
+{
+ u32 thread_id, position;
+
+ thread_id = clib_net_to_host_u32 (dmp->thread_id);
+ position = clib_net_to_host_u32 (dmp->position);
+ fformat (stdout, "thread %d position %d more %d", thread_id, position,
+ dmp->more);
+ fformat (stdout, "Packet %d\n%U\n\n", position, vl_api_format_string,
+ (&dmp->trace_data));
+}
static void
vl_api_trace_dump_reply_t_handler (vl_api_trace_dump_reply_t * rmp)
@@ -203,7 +215,7 @@ vl_api_trace_dump_reply_t_handler (vl_api_trace_dump_reply_t * rmp)
}
static int
-api_trace_dump (vat_main_t * vam)
+api_trace_dump (vat_main_t *vam)
{
vl_api_trace_dump_t *mp;
int ret;
@@ -220,8 +232,26 @@ api_trace_dump (vat_main_t * vam)
return ret;
}
+static int
+api_trace_v2_dump (vat_main_t *vam)
+{
+ vl_api_trace_v2_dump_t *mp;
+ int ret;
+
+ M (TRACE_V2_DUMP, mp);
+ mp->clear_cache = 1;
+ mp->thread_id = ~0;
+ mp->position = 0;
+ mp->max = clib_host_to_net_u32 (10);
+
+ S (mp);
+
+ W (ret);
+ return ret;
+}
+
int
-api_trace_clear_capture (vat_main_t * vam)
+api_trace_clear_capture (vat_main_t *vam)
{
vl_api_trace_clear_capture_t *mp;
int ret;
@@ -232,26 +262,75 @@ api_trace_clear_capture (vat_main_t * vam)
return ret;
}
+static int
+api_trace_clear_cache (vat_main_t *vam)
+{
+ vl_api_trace_clear_capture_t *mp;
+ int ret;
+
+ M (TRACE_CLEAR_CACHE, mp);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_trace_set_filter_function (vat_main_t *vam)
+{
+ vl_api_trace_set_filter_function_t *mp;
+ int ret;
+ M (TRACE_SET_FILTER_FUNCTION, mp);
+ S (mp);
+ W (ret);
+ return ret;
+}
+static int
+api_trace_filter_function_dump (vat_main_t *vam)
+{
+ vl_api_trace_filter_function_dump_t *mp;
+ int ret;
+
+ M (TRACE_FILTER_FUNCTION_DUMP, mp);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_trace_filter_function_details_t_handler (
+ vl_api_trace_filter_function_details_t *dmp)
+{
+ fformat (stdout, "name: %U, selected: %u\n\n", vl_api_format_string,
+ &dmp->name, dmp->selected);
+}
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define vl_endianfun
#include <tracedump/tracedump.api.h>
#undef vl_endianfun
#define vl_printfun
#include <tracedump/tracedump.api.h>
#undef vl_printfun
+#define vl_calcsizefun
+#include <tracedump/tracedump.api.h>
+#undef vl_calcsizefun
void
manual_setup_message_id_table (vat_main_t * vam)
{
- vl_msg_api_set_handlers (VL_API_TRACE_DETAILS
- + tracedump_test_main.msg_id_base, "trace_details",
- vl_api_trace_details_t_handler, vl_noop_handler,
- vl_api_trace_details_t_endian,
- vl_api_trace_details_t_print,
- sizeof (vl_api_trace_details_t), 1);
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){
+ .id = VL_API_TRACE_DETAILS + tracedump_test_main.msg_id_base,
+ .name = "trace_details",
+ .handler = vl_api_trace_details_t_handler,
+ .endian = vl_api_trace_details_t_endian,
+ .format_fn = vl_api_trace_details_t_format,
+ .size = sizeof (vl_api_trace_details_t),
+ .traced = 1,
+ .tojson = vl_api_trace_details_t_tojson,
+ .fromjson = vl_api_trace_details_t_fromjson,
+ .calc_size = vl_api_trace_details_t_calc_size,
+ });
}
#define VL_API_LOCAL_SETUP_MESSAGE_ID_TABLE manual_setup_message_id_table
diff --git a/src/plugins/tracenode/CMakeLists.txt b/src/plugins/tracenode/CMakeLists.txt
new file mode 100644
index 00000000000..6b6ba2e9865
--- /dev/null
+++ b/src/plugins/tracenode/CMakeLists.txt
@@ -0,0 +1,37 @@
+
+# Copyright (c) 2023 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(tracenode
+ SOURCES
+ node.c
+ api.c
+ cli.c
+ plugin.c
+ tracenode.c
+
+ MULTIARCH_SOURCES
+ node.c
+
+ API_FILES
+ tracenode.api
+
+ INSTALL_HEADERS
+ tracenode.h
+
+ API_TEST_SOURCES
+ test.c
+
+ COMPONENT
+ vpp-plugin-devtools
+)
diff --git a/src/plugins/tracenode/FEATURE.yaml b/src/plugins/tracenode/FEATURE.yaml
new file mode 100644
index 00000000000..c405dd11d59
--- /dev/null
+++ b/src/plugins/tracenode/FEATURE.yaml
@@ -0,0 +1,8 @@
+---
+name: Trace node
+maintainer: Maxime Peim <mpeim@cisco.com>
+features:
+ - allow trace filtering on encapsulated (inner) packets
+description: "Allow tracing on IP feature arc. Encapsulated packets can then be traced and filtered."
+state: experimental
+properties: [CLI, API]
diff --git a/src/plugins/tracenode/api.c b/src/plugins/tracenode/api.c
new file mode 100644
index 00000000000..0b01ad8b9f5
--- /dev/null
+++ b/src/plugins/tracenode/api.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <tracenode/tracenode.h>
+#include <vlibmemory/api.h>
+
+/* define message IDs */
+#include <tracenode/tracenode.api_enum.h>
+#include <tracenode/tracenode.api_types.h>
+
+#define REPLY_MSG_ID_BASE (tnm->msg_id_base)
+#include <vlibapi/api_helper_macros.h>
+
+static void
+vl_api_tracenode_enable_disable_t_handler (
+ vl_api_tracenode_enable_disable_t *mp)
+{
+ tracenode_main_t *tnm = &tracenode_main;
+ vl_api_tracenode_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = tracenode_feature_enable_disable (ntohl (mp->sw_if_index), mp->is_pcap,
+ mp->enable);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_TRACENODE_ENABLE_DISABLE_REPLY);
+}
+
+#include <tracenode/tracenode.api.c>
+
+clib_error_t *
+tracenode_plugin_api_hookup (vlib_main_t *vm)
+{
+ tracenode_main_t *tnm = &tracenode_main;
+
+ /* ask for a correctly-sized block of API message decode slots */
+ tnm->msg_id_base = setup_message_id_table ();
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */ \ No newline at end of file
diff --git a/src/plugins/tracenode/cli.c b/src/plugins/tracenode/cli.c
new file mode 100644
index 00000000000..8d0ed4176d6
--- /dev/null
+++ b/src/plugins/tracenode/cli.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <tracenode/tracenode.h>
+
+static clib_error_t *
+tracenode_feature_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 sw_if_index = ~0;
+ int enable = 1, is_pcap = 0;
+ int rv;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "disable"))
+ enable = 0;
+ else if (unformat (line_input, "pcap"))
+ is_pcap = 1;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
+ vnet_get_main (), &sw_if_index))
+ {
+ if (sw_if_index == 0)
+ return clib_error_return (0, "Local interface not supported...");
+ }
+
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Software interface required");
+
+ if ((rv = tracenode_feature_enable_disable (sw_if_index, is_pcap, enable)) !=
+ 0)
+ return clib_error_return (
+ 0, "vnet_enable_disable_tracenode_feature returned %d", rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (tracenode_feature, static) = {
+ .path = "tracenode feature",
+ .short_help = "tracenode feature <intfc> [disable] [pcap]",
+ .function = tracenode_feature_cmd_fn,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/tracenode/node.c b/src/plugins/tracenode/node.c
new file mode 100644
index 00000000000..444d93f1708
--- /dev/null
+++ b/src/plugins/tracenode/node.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/feature/feature.h>
+#include <vnet/classify/pcap_classify.h>
+
+typedef struct
+{
+ u32 sw_if_index;
+} tracenode_trace_t;
+
+static u8 *
+format_tracenode_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ tracenode_trace_t *t = va_arg (*args, tracenode_trace_t *);
+
+ s = format (s, "Packet traced from interface %U added",
+ format_vnet_sw_if_index_name, vnm, t->sw_if_index);
+ return s;
+}
+
+static_always_inline u32
+tracenode_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int is_pcap)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_pcap_t *pp = &vnm->pcap;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
+ u32 *from = vlib_frame_vector_args (frame), *from0 = from;
+ const u32 n_tot = frame->n_vectors;
+ u32 n_left = n_tot;
+
+ vlib_get_buffers (vm, from, b, n_tot);
+
+ while (n_left > 0)
+ {
+ /* TODO: dual/quad loop */
+
+ /* enqueue b0 to the current next frame */
+ vnet_feature_next_u16 (next, b[0]);
+
+ /* buffer already traced */
+ if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+ goto skip;
+
+ if (is_pcap && vnet_is_packet_pcaped (pp, b[0], ~0))
+ {
+ pcap_add_buffer (&pp->pcap_main, vm, from0[0],
+ pp->max_bytes_per_pkt);
+ }
+ else if (!is_pcap && vlib_trace_buffer (vm, node, next[0], b[0],
+ 1 /* follow_chain */))
+ {
+ tracenode_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof *tr);
+ tr->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+ }
+
+ skip:
+ b++;
+ from0++;
+ next++;
+ n_left--;
+ }
+
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, n_tot);
+ return n_tot;
+}
+
+VLIB_NODE_FN (trace_filtering_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return tracenode_inline (vm, node, frame, 0 /* is_pcap */);
+}
+
+VLIB_NODE_FN (pcap_filtering_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return tracenode_inline (vm, node, frame, 1 /* is_pcap */);
+}
+
+VLIB_REGISTER_NODE (trace_filtering_node) = {
+ .name = "trace-filtering",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .format_trace = format_tracenode_trace,
+};
+
+VLIB_REGISTER_NODE (pcap_filtering_node) = {
+ .name = "pcap-filtering",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .format_trace = format_tracenode_trace,
+};
+
+VNET_FEATURE_INIT (trace_filtering4, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "trace-filtering",
+ .runs_after = VNET_FEATURES ("ip4-full-reassembly-feature",
+ "ip4-sv-reassembly-feature"),
+};
+
+VNET_FEATURE_INIT (trace_filtering6, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "trace-filtering",
+ .runs_after = VNET_FEATURES ("ip6-full-reassembly-feature",
+ "ip6-sv-reassembly-feature"),
+};
+
+VNET_FEATURE_INIT (pcap_filtering4, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "pcap-filtering",
+ .runs_after = VNET_FEATURES ("ip4-full-reassembly-feature",
+ "ip4-sv-reassembly-feature"),
+};
+
+VNET_FEATURE_INIT (pcap_filtering6, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "pcap-filtering",
+ .runs_after = VNET_FEATURES ("ip6-full-reassembly-feature",
+ "ip6-sv-reassembly-feature"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/gbp/gbp_types.h b/src/plugins/tracenode/plugin.c
index ac983b1cdd2..19ce6ba5610 100644
--- a/src/plugins/gbp/gbp_types.h
+++ b/src/plugins/tracenode/plugin.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Copyright (c) 2023 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -13,19 +13,14 @@
* limitations under the License.
*/
-#ifndef __GBP_TYPES_H__
-#define __GBP_TYPES_H__
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
-#include <vnet/vnet.h>
-
-typedef u32 vnid_t;
-#define VNID_INVALID ((u16)~0)
-
-typedef u16 gbp_scope_t;
-typedef u16 sclass_t;
-#define SCLASS_INVALID ((u16)~0)
-
-#endif
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Tracing packet node",
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/tracenode/test.c b/src/plugins/tracenode/test.c
new file mode 100644
index 00000000000..a409fd2a59a
--- /dev/null
+++ b/src/plugins/tracenode/test.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/error.h>
+#include <vnet/api_errno.h>
+#include <stdbool.h>
+
+#define __plugin_msg_base tracenode_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* Declare message IDs */
+#include <tracenode/tracenode.api_enum.h>
+#include <tracenode/tracenode.api_types.h>
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} tracenode_test_main_t;
+
+tracenode_test_main_t tracenode_test_main;
+
+int
+api_tracenode_enable_disable (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_tracenode_enable_disable_t *mp;
+ u32 sw_if_index;
+ bool is_pcap, enable;
+
+ sw_if_index = ~0;
+ is_pcap = false;
+ enable = true;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "disable"))
+ enable = 0;
+ else if (unformat (i, "pcap"))
+ is_pcap = 1;
+ else if (unformat (i, "%U", unformat_vnet_sw_interface, vnet_get_main (),
+ &sw_if_index))
+ {
+ if (sw_if_index == 0)
+ {
+ clib_warning ("Local interface not supported...");
+ return -99;
+ }
+ }
+
+ else
+ {
+ clib_warning ("Unknown input: %U\n", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (TRACENODE_ENABLE_DISABLE, mp);
+ mp->sw_if_index = htonl (sw_if_index);
+ mp->is_pcap = is_pcap;
+ mp->enable = enable;
+
+ int ret = 0;
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
+#include <tracenode/tracenode.api_test.c>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/tracenode/tracenode.api b/src/plugins/tracenode/tracenode.api
new file mode 100644
index 00000000000..198f8218b55
--- /dev/null
+++ b/src/plugins/tracenode/tracenode.api
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "0.1.0";
+
+import "vnet/interface_types.api";
+
+/** \brief Enable/disable trace filtering feature
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface on which to enable/disable trace filtering feature
+ @param is_pcap - if non-zero enable the feature for pcap capture, else for trace
+ @param enable - if non-zero then enable the feature, else disable it
+*/
+autoreply define tracenode_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ bool is_pcap [default=false];
+ bool enable [default=true];
+
+ option vat_help = "tracenode_enable_disable <intfc> [disable] [pcap]";
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/tracenode/tracenode.c b/src/plugins/tracenode/tracenode.c
new file mode 100644
index 00000000000..e292c7da95c
--- /dev/null
+++ b/src/plugins/tracenode/tracenode.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <tracenode/tracenode.h>
+
+tracenode_main_t tracenode_main;
+
+int
+tracenode_feature_enable_disable (u32 sw_if_index, bool is_pcap, bool enable)
+{
+ tracenode_main_t *tnm = &tracenode_main;
+ char *node_name = is_pcap ? "pcap-filtering" : "trace-filtering";
+ int rv = 0;
+
+ if (pool_is_free_index (tnm->vnet_main->interface_main.sw_interfaces,
+ sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (clib_bitmap_get (tnm->feature_enabled_by_sw_if, sw_if_index) == enable)
+ return 0;
+
+ if ((rv = vnet_feature_enable_disable ("ip4-unicast", node_name, sw_if_index,
+ enable, 0, 0)) != 0)
+ return rv;
+
+ if ((rv = vnet_feature_enable_disable ("ip6-unicast", node_name, sw_if_index,
+ enable, 0, 0)) != 0)
+ return rv;
+
+ tnm->feature_enabled_by_sw_if =
+ clib_bitmap_set (tnm->feature_enabled_by_sw_if, sw_if_index, enable);
+
+ return 0;
+}
+
+static clib_error_t *
+tracenode_init (vlib_main_t *vm)
+{
+ tracenode_main_t *tnm = &tracenode_main;
+ clib_error_t *error = 0;
+
+ memset (tnm, 0, sizeof (*tnm));
+
+ tnm->vnet_main = vnet_get_main ();
+
+ error = tracenode_plugin_api_hookup (vm);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (tracenode_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/tracenode/tracenode.h b/src/plugins/tracenode/tracenode.h
new file mode 100644
index 00000000000..7af60aa20b1
--- /dev/null
+++ b/src/plugins/tracenode/tracenode.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef _TRACENODE_H_
+#define _TRACENODE_H_
+#include <vlib/vlib.h>
+#include <vnet/feature/feature.h>
+#include <stdbool.h>
+
+typedef struct
+{
+ vnet_main_t *vnet_main;
+ uword *feature_enabled_by_sw_if;
+ u16 msg_id_base;
+} tracenode_main_t;
+
+extern tracenode_main_t tracenode_main;
+
+clib_error_t *tracenode_plugin_api_hookup (vlib_main_t *vm);
+
+int tracenode_feature_enable_disable (u32 sw_if_index, bool is_pcap,
+ bool enable);
+
+#endif /* _TRACENODE_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/unittest/CMakeLists.txt b/src/plugins/unittest/CMakeLists.txt
index 115ced3393b..0382841379c 100644
--- a/src/plugins/unittest/CMakeLists.txt
+++ b/src/plugins/unittest/CMakeLists.txt
@@ -15,9 +15,10 @@ set(chacha20_poly1305)
if (OPENSSL_VERSION VERSION_GREATER_EQUAL 1.1.0)
set(chacha20_poly1305 crypto/chacha20_poly1305.c)
endif()
-
+include_directories(${CMAKE_SOURCE_DIR}/vpp-api ${CMAKE_CURRENT_BINARY_DIR}/../../vpp-api)
add_vpp_plugin(unittest
SOURCES
+ api_test.c
api_fuzz_test.c
bier_test.c
bihash_test.c
@@ -25,6 +26,7 @@ add_vpp_plugin(unittest
crypto/aes_cbc.c
crypto/aes_ctr.c
crypto/aes_gcm.c
+ crypto/aes_gmac.c
${chacha20_poly1305}
crypto/rfc2202_hmac_md5.c
crypto/rfc2202_hmac_sha1.c
@@ -32,8 +34,11 @@ add_vpp_plugin(unittest
crypto/sha.c
crypto_test.c
fib_test.c
+ gso_test.c
+ hash_test.c
interface_test.c
ipsec_test.c
+ ip_psh_cksum_test.c
llist_test.c
mactime_test.c
mem_bulk_test.c
@@ -54,4 +59,8 @@ add_vpp_plugin(unittest
util_test.c
vlib_test.c
counter_test.c
+
+ COMPONENT
+ vpp-plugin-devtools
+ LINK_LIBRARIES vapiclient
)
diff --git a/src/plugins/unittest/api_fuzz_test.c b/src/plugins/unittest/api_fuzz_test.c
index 113835300bb..121c52a310b 100644
--- a/src/plugins/unittest/api_fuzz_test.c
+++ b/src/plugins/unittest/api_fuzz_test.c
@@ -27,7 +27,7 @@ static u32 fuzz_seed = 0xdeaddabe;
static u16 fuzz_first;
static u16 fuzz_cli_first, fuzz_cli_last;
-extern void (*vl_msg_api_fuzz_hook) (u16, void *);
+extern void (*vl_mem_api_fuzz_hook) (u16, void *);
static void
fuzz_hook (u16 id, void *the_msg)
@@ -114,10 +114,10 @@ test_api_fuzz_command_fn (vlib_main_t * vm,
if (fuzz_first == 0xFFFF)
{
- vl_msg_api_fuzz_hook = 0;
+ vl_mem_api_fuzz_hook = 0;
return clib_error_return (0, "fuzz_first is ~0, fuzzing disabled");
}
- vl_msg_api_fuzz_hook = fuzz_hook;
+ vl_mem_api_fuzz_hook = fuzz_hook;
vlib_cli_output (vm, "Fuzzing enabled: first %d, skip cli range %d - %d",
(u32) fuzz_first, (u32) fuzz_cli_first,
@@ -126,14 +126,12 @@ test_api_fuzz_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_api_fuzz, static) = {
.path = "test api fuzz",
.short_help = "test api fuzz [disable][seed nnn]\n"
" [fuzz-first nn][fuzz-cli-first nn][fuzz-cli-last nn]",
.function = test_api_fuzz_command_fn,
};
-/* *INDENT-ON* */
static u8 main_loop_enter_enable_api_fuzz;
@@ -172,7 +170,7 @@ api_fuzz_api_init (vlib_main_t * vm)
(0, "Couldn't find 'memclnt_keepalive_reply' ID");
}
/* Turn on fuzzing */
- vl_msg_api_fuzz_hook = fuzz_hook;
+ vl_mem_api_fuzz_hook = fuzz_hook;
return 0;
}
diff --git a/src/plugins/unittest/api_test.c b/src/plugins/unittest/api_test.c
new file mode 100644
index 00000000000..515bafefa36
--- /dev/null
+++ b/src/plugins/unittest/api_test.c
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp/app/version.h>
+#include <stdbool.h>
+#include <vapi/vapi.h>
+
+#include <vapi/memclnt.api.vapi.h>
+#include <vapi/vlib.api.vapi.h>
+#include <vapi/vpe.api.vapi.h>
+
+/*
+ * Example of how to call the VPP binary API from an internal API client.
+ * Using the VAPI C language binding.
+ */
+
+DEFINE_VAPI_MSG_IDS_VPE_API_JSON;
+
+/*
+ * Connect an VPP binary API client to VPP API
+ */
+static vapi_ctx_t
+connect_to_vpp (void)
+{
+ vapi_ctx_t ctx;
+ if (vapi_ctx_alloc (&ctx) != VAPI_OK)
+ {
+ clib_warning ("ctx_alloc failed");
+ return 0;
+ }
+ if (vapi_connect_from_vpp (ctx, "apifromplugin", 64, 32, VAPI_MODE_BLOCKING,
+ true) != VAPI_OK)
+ {
+ clib_warning ("vapi_connect failed");
+ vapi_ctx_free (ctx);
+ return 0;
+ }
+ return ctx;
+}
+
+/*
+ * Gets called when the show_version_reply message is received
+ */
+vapi_error_e
+show_version_cb (vapi_ctx_t ctx, void *caller_ctx, vapi_error_e rv,
+ bool is_last, vapi_payload_show_version_reply *p)
+{
+ if (rv != VAPI_OK)
+ clib_warning ("Return value: %d", rv);
+ fformat (
+ stdout,
+ "show_version_reply: program: `%s', version: `%s', build directory: "
+ "`%s', build date: `%s'\n",
+ p->program, p->version, p->build_directory, p->build_date);
+ return VAPI_OK;
+}
+
+static void *
+api_show_version_blocking_fn (void *args)
+{
+ vapi_ctx_t ctx;
+
+ if ((ctx = connect_to_vpp ()) == 0)
+ return clib_error_return (0, "API connection failed");
+
+ int called;
+ vapi_msg_show_version *sv = vapi_alloc_show_version (ctx);
+ vapi_error_e vapi_rv = vapi_show_version (ctx, sv, show_version_cb, &called);
+ if (vapi_rv != VAPI_OK)
+ clib_warning ("call failed");
+
+ vapi_disconnect_from_vpp (ctx);
+ vapi_ctx_free (ctx);
+
+ return 0;
+}
+
+static clib_error_t *
+test_api_test_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ /* Run call in a pthread */
+ pthread_t thread;
+ int rv = pthread_create (&thread, NULL, api_show_version_blocking_fn, 0);
+ if (rv)
+ {
+ return clib_error_return (0, "API call failed");
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (test_api_command, static) = {
+ .path = "test api internal",
+ .short_help = "test internal api client",
+ .function = test_api_test_command_fn,
+};
diff --git a/src/plugins/unittest/bier_test.c b/src/plugins/unittest/bier_test.c
index 9b4ba67e7cd..924c9212929 100644
--- a/src/plugins/unittest/bier_test.c
+++ b/src/plugins/unittest/bier_test.c
@@ -118,30 +118,29 @@ bier_test_mk_intf (u32 ninterfaces)
for (i = 0; i < ninterfaces; i++)
{
- hw_address[5] = i;
-
- error = ethernet_register_interface(vnet_get_main(),
- test_interface_device_class.index,
- i /* instance */,
- hw_address,
- &tm->hw_if_indicies[i],
- /* flag change */ 0);
-
- error = vnet_hw_interface_set_flags(vnet_get_main(),
- tm->hw_if_indicies[i],
- VNET_HW_INTERFACE_FLAG_LINK_UP);
- BIER_TEST((NULL == error), "ADD interface %d", i);
-
- tm->hw[i] = vnet_get_hw_interface(vnet_get_main(),
- tm->hw_if_indicies[i]);
- vec_validate (ip4_main.fib_index_by_sw_if_index, tm->hw[i]->sw_if_index);
- vec_validate (ip6_main.fib_index_by_sw_if_index, tm->hw[i]->sw_if_index);
- ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
- ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
- error = vnet_sw_interface_set_flags(vnet_get_main(),
- tm->hw[i]->sw_if_index,
- VNET_SW_INTERFACE_FLAG_ADMIN_UP);
- BIER_TEST((NULL == error), "UP interface %d", i);
+ vnet_eth_interface_registration_t eir = {};
+ vnet_main_t *vnm = vnet_get_main ();
+
+ hw_address[5] = i;
+
+ eir.dev_class_index = test_interface_device_class.index;
+ eir.dev_instance = i;
+ eir.address = hw_address;
+ tm->hw_if_indicies[i] = vnet_eth_register_interface (vnm, &eir);
+
+ error =
+ vnet_hw_interface_set_flags (vnet_get_main (), tm->hw_if_indicies[i],
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ BIER_TEST ((NULL == error), "ADD interface %d", i);
+
+ tm->hw[i] =
+ vnet_get_hw_interface (vnet_get_main (), tm->hw_if_indicies[i]);
+ ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+ ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+ error =
+ vnet_sw_interface_set_flags (vnet_get_main (), tm->hw[i]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ BIER_TEST ((NULL == error), "UP interface %d", i);
}
/*
* re-eval after the inevitable realloc
@@ -770,10 +769,9 @@ bier_test_mpls_imp (void)
.frp_flags = FIB_ROUTE_PATH_BIER_IMP,
.frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
};
- mfib_table_entry_path_update(0, // default table
- &pfx_1_1_1_1_c_239_1_1_1 ,
- MFIB_SOURCE_API,
- &path_via_bier_imp_1);
+ mfib_table_entry_path_update (0, // default table
+ &pfx_1_1_1_1_c_239_1_1_1, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_bier_imp_1);
mfib_table_entry_delete(0,
&pfx_1_1_1_1_c_239_1_1_1 ,
MFIB_SOURCE_API);
diff --git a/src/plugins/unittest/bihash_test.c b/src/plugins/unittest/bihash_test.c
index c5cc752ade0..1d3aeeca23d 100644
--- a/src/plugins/unittest/bihash_test.c
+++ b/src/plugins/unittest/bihash_test.c
@@ -207,16 +207,16 @@ test_bihash_threads (bihash_test_main_t * tm)
tm->thread_barrier = 1;
/* Start the worker threads */
+ tm->threads_running = 0;
for (i = 0; i < tm->nthreads; i++)
{
rv = pthread_create (&handle, NULL, test_bihash_thread_fn,
(void *) (uword) i);
if (rv)
- {
- clib_unix_warning ("pthread_create returned %d", rv);
- }
+ clib_unix_warning ("pthread_create returned %d", rv);
+ else
+ tm->threads_running++;
}
- tm->threads_running = i;
tm->sequence_number = 0;
CLIB_MEMORY_BARRIER ();
@@ -338,14 +338,18 @@ test_bihash (bihash_test_main_t * tm)
{
kv.key = tm->keys[i];
if (BV (clib_bihash_search) (h, &kv, &kv) < 0)
- if (BV (clib_bihash_search) (h, &kv, &kv) < 0)
- clib_warning
- ("[%d] search for key %lld failed unexpectedly\n", i,
- tm->keys[i]);
+ {
+ if (BV (clib_bihash_search) (h, &kv, &kv) < 0)
+ {
+ return clib_error_return (
+ 0, "[%d] search for key %lld failed unexpectedly\n", i,
+ tm->keys[i]);
+ }
+ }
if (kv.value != (u64) (i + 1))
- clib_warning
- ("[%d] search for key %lld returned %lld, not %lld\n", i,
- tm->keys, kv.value, (u64) (i + 1));
+ return clib_error_return (
+ 0, "[%d] search for key %lld returned %lld, not %lld\n", i,
+ tm->keys, kv.value, (u64) (i + 1));
}
}
@@ -373,7 +377,8 @@ test_bihash (bihash_test_main_t * tm)
{
p = hash_get (tm->key_hash, tm->keys[i]);
if (p == 0 || p[0] != (uword) (i + 1))
- clib_warning ("ugh, couldn't find %lld\n", tm->keys[i]);
+ return clib_error_return (0, "ugh, couldn't find %lld\n",
+ tm->keys[i]);
}
}
@@ -401,8 +406,8 @@ test_bihash (bihash_test_main_t * tm)
rv = BV (clib_bihash_add_del) (h, &kv, 0 /* is_add */ );
if (rv < 0)
- clib_warning ("delete key %lld not ok but should be",
- tm->keys[i]);
+ return clib_error_return (
+ 0, "delete key %lld not ok but should be", tm->keys[i]);
if (tm->careful_delete_tests)
{
@@ -412,14 +417,14 @@ test_bihash (bihash_test_main_t * tm)
rv = BV (clib_bihash_search) (h, &kv, &kv);
if (j <= i && rv >= 0)
{
- clib_warning
- ("i %d j %d search ok but should not be, value %lld",
- i, j, kv.value);
+ return clib_error_return (
+ 0, "i %d j %d search ok but should not be, value %lld",
+ i, j, kv.value);
}
if (j > i && rv < 0)
{
- clib_warning ("i %d j %d search not ok but should be",
- i, j);
+ return clib_error_return (
+ 0, "i %d j %d search not ok but should be", i, j);
}
}
}
@@ -471,6 +476,7 @@ test_bihash_command_fn (vlib_main_t * vm,
tm->ncycles = 10;
tm->report_every_n = 50000;
tm->seed = 0x1badf00d;
+ tm->search_iter = 1;
memset (&tm->stats, 0, sizeof (tm->stats));
@@ -512,7 +518,7 @@ test_bihash_command_fn (vlib_main_t * vm,
/* Preallocate hash table, key vector */
tm->key_hash = hash_create (tm->nitems, sizeof (uword));
vec_validate (tm->keys, tm->nitems - 1);
- _vec_len (tm->keys) = 0;
+ vec_set_len (tm->keys, 0);
switch (which)
{
@@ -535,14 +541,12 @@ test_bihash_command_fn (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_bihash_command, static) =
{
.path = "test bihash",
.short_help = "test bihash",
.function = test_bihash_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
bihash_test_init (vlib_main_t * vm)
diff --git a/src/plugins/unittest/bitmap_test.c b/src/plugins/unittest/bitmap_test.c
index 04a06d39a18..1b05be7b333 100644
--- a/src/plugins/unittest/bitmap_test.c
+++ b/src/plugins/unittest/bitmap_test.c
@@ -12,60 +12,219 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+#include <stdbool.h>
#include <vlib/vlib.h>
#include <vppinfra/bitmap.h>
static clib_error_t *
+check_bitmap (const char *test_name, const uword *bm, u32 expected_len, ...)
+{
+ clib_error_t *error = 0;
+ u32 i;
+ uword expected_value;
+
+ va_list va;
+ va_start (va, expected_len);
+
+ if (vec_len (bm) != expected_len)
+ {
+ error = clib_error_create ("%s failed, wrong "
+ "bitmap's size (%u != %u expected)",
+ test_name, vec_len (bm), expected_len);
+ goto done;
+ }
+
+ for (i = 0; i < expected_len; ++i)
+ {
+ expected_value = va_arg (va, uword);
+ if (bm[i] != expected_value)
+ {
+ error = clib_error_create (
+ "%s failed, wrong "
+ "bitmap's value at index %u (%u != %u expected)",
+ test_name, i, bm[i], expected_value);
+ break;
+ }
+ }
+
+done:
+ va_end (va);
+ return error;
+}
+
+static clib_error_t *
+check_bitmap_will_expand (const char *test_name, uword **bm, uword index,
+ bool expected_will_expand)
+{
+ uword max_bytes = vec_max_bytes (*bm);
+ bool result;
+
+ result = clib_bitmap_will_expand (*bm, index);
+ if (result != expected_will_expand)
+ {
+ return clib_error_create (
+ "%s failed, wrong "
+ "bitmap's expansion before set (%u != %u expected)",
+ test_name, result, expected_will_expand);
+ }
+
+ *bm = clib_bitmap_set (*bm, index, 1);
+ result = vec_max_bytes (*bm) > max_bytes;
+ if (result != expected_will_expand)
+ {
+ return clib_error_create (
+ "%s failed, wrong "
+ "bitmap's expansion after set (%u != %u expected)",
+ test_name, result, expected_will_expand);
+ }
+
+ return 0;
+}
+
+static clib_error_t *
test_bitmap_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
- u64 *bm = 0;
- u64 *bm2 = 0;
- u64 *dup;
- uword junk;
+ clib_error_t *error = 0;
+ uword *bm = 0;
+ uword *bm2 = 0;
+ uword *bm3 = 0;
+ uword *dup = 0;
- bm = clib_bitmap_set_multiple (bm, 2, ~0ULL, BITS (uword));
+ /* bm should look like:
+ * bm[0] bm[1]
+ * LSB |0011...11|1100...00| MSB
+ */
+ bm = clib_bitmap_set_multiple (0, 2, ~0ULL, BITS (uword));
+ error = check_bitmap ("clib_bitmap_set_multiple 1", bm, 2, ~0ULL << 2, 3);
+ if (error != 0)
+ goto done;
- junk = clib_bitmap_next_clear (bm, 3);
- junk = clib_bitmap_next_clear (bm, 65);
+ /* bm2 should look like:
+ * bm2[0]
+ * LSB |11...11| MSB
+ */
+ bm2 = clib_bitmap_set_multiple (0, 0, ~0ULL, BITS (uword));
+ error = check_bitmap ("clib_bitmap_set_multiple 2", bm2, 1, ~0ULL);
+ if (error != 0)
+ goto done;
- bm2 = clib_bitmap_set_multiple (bm2, 0, ~0ULL, BITS (uword));
- _vec_len (bm2) = 1;
- junk = clib_bitmap_next_clear (bm2, 0);
+ /* bm should look like:
+ * bm[0] bm[1]
+ * LSB |0011...1100|000...000| MSB
+ */
+ bm = clib_bitmap_set_multiple (bm, 2, pow2_mask (BITS (uword) - 3),
+ BITS (uword));
+ error = check_bitmap ("clib_bitmap_set_multiple 3", bm, 2,
+ pow2_mask (BITS (uword) - 3) << 2, 0);
+ if (error != 0)
+ goto done;
+ /* bm2 should look like:
+ * bm2[0]
+ * LSB |101...111| MSB
+ */
+ bm2 = clib_bitmap_xori (bm2, 1);
+ error = check_bitmap ("clib_bitmap_xori 1", bm2, 1, ~0ULL ^ 2);
+ if (error != 0)
+ goto done;
- bm = clib_bitmap_set_multiple (bm, 2, ~0ULL, BITS (uword) - 3);
- junk = clib_bitmap_get_multiple (bm, 2, BITS (uword));
- junk = clib_bitmap_first_set (bm);
- junk = 1 << 3;
- bm = clib_bitmap_xori (bm, junk);
- bm = clib_bitmap_andi (bm, junk);
- bm = clib_bitmap_xori_notrim (bm, junk);
- bm = clib_bitmap_andi_notrim (bm, junk);
+ /* bm should look like:
+ * bm[0] bm[1]
+ * LSB |0011...1100|000...001| MSB
+ */
+ bm = clib_bitmap_xori (bm, 2 * BITS (uword) - 1);
+ error = check_bitmap ("clib_bitmap_xori 2", bm, 2,
+ pow2_mask (BITS (uword) - 3) << 2,
+ 1ULL << (BITS (uword) - 1));
+ if (error != 0)
+ goto done;
- bm = clib_bitmap_set_multiple (bm, 2, ~0ULL, BITS (uword) - 3);
- bm2 = clib_bitmap_set_multiple (bm2, 2, ~0ULL, BITS (uword) - 3);
+ /* bm should look like:
+ * bm[0] bm[1]
+ * LSB |00100...00|000...001| MSB
+ */
+ bm = clib_bitmap_andi (bm, 2);
+ error =
+ check_bitmap ("clib_bitmap_andi", bm, 2, 4, 1ULL << (BITS (uword) - 1));
+ if (error != 0)
+ goto done;
+ /* bm should look like:
+ * bm[0]
+ * LSB |00100...00| MSB
+ */
+ bm = clib_bitmap_xori (bm, 2 * BITS (uword) - 1);
+ error = check_bitmap ("clib_bitmap_xori 3", bm, 1, 4);
+ if (error != 0)
+ goto done;
+
+ /* bm and bm2 should look like:
+ * bm[0] bm[1]
+ * LSB |0011...11|1100...00| MSB
+ * bm2[0] bm2[1]
+ * LSB |101...111|0011...11| MSB
+ */
+ bm = clib_bitmap_set_multiple (bm, 2, ~0ULL, BITS (uword));
+ bm2 =
+ clib_bitmap_set_multiple (bm2, BITS (uword) + 2, ~0ULL, BITS (uword) - 3);
dup = clib_bitmap_dup_and (bm, bm2);
- vec_free (dup);
- dup = clib_bitmap_dup_andnot (bm, bm2);
- vec_free (dup);
+ error = check_bitmap ("clib_bitmap_dup_and", dup, 1, bm[0] & bm2[0]);
+ if (error != 0)
+ goto done;
+
+ /* bm should look like:
+ * bm[0] bm[1] ... bm[3]
+ * LSB |0011...11|11...11| ... |11...11| MSB
+ */
+ bm = clib_bitmap_set_region (bm, 5, 1, 4 * BITS (uword) - 5);
+ error = check_bitmap ("clib_bitmap_set_region 1", bm, 4, ~0ULL << 2, ~0ULL,
+ ~0ULL, ~0ULL);
+ if (error != 0)
+ goto done;
+
+ /* bm should look like:
+ * bm[0] bm[1] ... bm[3]
+ * LSB |0011...11|11...11| ... |11...1100000| MSB
+ */
+ bm = clib_bitmap_set_region (bm, 4 * BITS (uword) - 5, 0, 5);
+ error = check_bitmap ("clib_bitmap_set_region 2", bm, 4, ~0ULL << 2, ~0ULL,
+ ~0ULL, pow2_mask (BITS (uword) - 5));
+ if (error != 0)
+ goto done;
+
+ error = check_bitmap_will_expand ("clib_bitmap_will_expand 1", &bm, 0, 0);
+ if (error != 0)
+ goto done;
+
+ error = check_bitmap_will_expand ("clib_bitmap_will_expand 2", &bm,
+ vec_max_len (bm) * BITS (uword) - 1, 0);
+ if (error != 0)
+ goto done;
+
+ error = check_bitmap_will_expand ("clib_bitmap_will_expand 3", &bm,
+ vec_max_len (bm) * BITS (uword), 1);
+ if (error != 0)
+ goto done;
+
+ error = check_bitmap_will_expand ("clib_bitmap_will_expand 4", &bm3, 0, 1);
+ if (error != 0)
+ goto done;
+
+done:
vec_free (bm);
vec_free (bm2);
+ vec_free (bm3);
+ vec_free (dup);
- return 0;
+ return error;
}
-
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (test_bihash_command, static) =
-{
+VLIB_CLI_COMMAND (test_bitmap_command, static) = {
.path = "test bitmap",
.short_help = "Coverage test for bitmap.h",
.function = test_bitmap_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/counter_test.c b/src/plugins/unittest/counter_test.c
index 24b9e1e386e..71f8f93f94d 100644
--- a/src/plugins/unittest/counter_test.c
+++ b/src/plugins/unittest/counter_test.c
@@ -19,7 +19,7 @@
#include <vppinfra/error.h>
#include <vlib/counter.h>
-#include <vpp/stats/stat_segment.h>
+#include <vlib/stats/stats.h>
enum
{
@@ -38,31 +38,10 @@ enum
static uint64_t
get_stats_epoch ()
{
- stat_segment_main_t *sm = &stat_segment_main;
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
return sm->shared_header->epoch;
}
-/*
- * Return the maximum element count of the vector based on its allocated
- * memory.
- */
-static int
-get_vec_mem_size (void *v, uword data_size)
-{
- stat_segment_main_t *sm = &stat_segment_main;
-
- if (v == 0)
- return 0;
-
- uword aligned_header_bytes = vec_header_bytes (0);
- void *p = v - aligned_header_bytes;
- void *oldheap = clib_mem_set_heap (sm->heap);
- int mem_size = (clib_mem_size (p) - aligned_header_bytes) / data_size;
- clib_mem_set_heap (oldheap);
-
- return mem_size;
-}
-
/* number of times to repeat the counter expand tests */
#define EXPAND_TEST_ROUNDS 3
@@ -90,8 +69,7 @@ test_simple_counter_expand (vlib_main_t *vm)
// Check how many elements fit into the counter vector without expanding
// that. The next validate calls should not increase the stats segment
// epoch.
- int mem_size = get_vec_mem_size (counter.counters[0],
- sizeof ((counter.counters[0])[0]));
+ int mem_size = vec_max_len (counter.counters[0]);
for (index = 1; index <= mem_size - 1; index++)
{
vlib_validate_simple_counter (&counter, index);
@@ -111,6 +89,9 @@ test_simple_counter_expand (vlib_main_t *vm)
epoch = new_epoch;
}
+ vlib_free_simple_counter (&counter);
+ vlib_validate_simple_counter (&counter, 0);
+
return 0;
}
@@ -138,8 +119,7 @@ test_combined_counter_expand (vlib_main_t *vm)
// Check how many elements fit into the counter vector without expanding
// that. The next validate calls should not increase the stats segment
// epoch.
- int mem_size = get_vec_mem_size (counter.counters[0],
- sizeof ((counter.counters[0])[0]));
+ int mem_size = vec_max_len (counter.counters[0]);
for (index = 1; index <= mem_size - 1; index++)
{
vlib_validate_combined_counter (&counter, index);
@@ -159,6 +139,9 @@ test_combined_counter_expand (vlib_main_t *vm)
epoch = new_epoch;
}
+ vlib_free_combined_counter (&counter);
+ vlib_validate_combined_counter (&counter, 0);
+
return 0;
}
diff --git a/src/plugins/unittest/crypto/aes_cbc.c b/src/plugins/unittest/crypto/aes_cbc.c
index 05a16c29a5c..89b0163207f 100644
--- a/src/plugins/unittest/crypto/aes_cbc.c
+++ b/src/plugins/unittest/crypto/aes_cbc.c
@@ -53,7 +53,6 @@ static u8 ciphertext128[] = {
0x12, 0x0E, 0xCA, 0x30, 0x75, 0x86, 0xE1, 0xA7,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (nist_aes128_cbc) = {
.name = "NIST SP 800-38A",
.alg = VNET_CRYPTO_ALG_AES_128_CBC,
@@ -63,7 +62,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (nist_aes128_cbc) = {
.ciphertext = TEST_DATA (ciphertext128),
};
-/* *INDENT-ON* */
static u8 key192[24] = {
0x8E, 0x73, 0xB0, 0xF7, 0xDA, 0x0E, 0x64, 0x52,
@@ -82,7 +80,6 @@ static u8 ciphertext192[64] = {
0xD9, 0x20, 0xA9, 0xE6, 0x4F, 0x56, 0x15, 0xCD,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (nist_aes192_cbc) = {
.name = "NIST SP 800-38A",
.alg = VNET_CRYPTO_ALG_AES_192_CBC,
@@ -92,7 +89,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (nist_aes192_cbc) = {
.ciphertext = TEST_DATA (ciphertext192),
};
-/* *INDENT-ON* */
static u8 key256[32] = {
0x60, 0x3D, 0xEB, 0x10, 0x15, 0xCA, 0x71, 0xBE,
@@ -112,7 +108,6 @@ static u8 ciphertext256[64] = {
0xDA, 0x6C, 0x19, 0x07, 0x8C, 0x6A, 0x9D, 0x1B,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (nist_aes256_cbc) = {
.name = "NIST SP 800-38A",
.alg = VNET_CRYPTO_ALG_AES_256_CBC,
@@ -151,7 +146,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (nist_aes256_incr2) = {
.key.length = 32,
.plaintext_incremental = 1056,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/crypto/aes_ctr.c b/src/plugins/unittest/crypto/aes_ctr.c
index 80e66111c12..581e283b0c3 100644
--- a/src/plugins/unittest/crypto/aes_ctr.c
+++ b/src/plugins/unittest/crypto/aes_ctr.c
@@ -41,7 +41,6 @@ static u8 tc1_ciphertext[] = {
0x1b, 0xef, 0x68, 0x64, 0x99, 0x0d, 0xb6, 0xce,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (nist_aes128_ctr_tc1) = {
.name = "CTR-AES128 TC1",
.alg = VNET_CRYPTO_ALG_AES_128_CTR,
@@ -50,7 +49,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (nist_aes128_ctr_tc1) = {
.plaintext = TEST_DATA (tc1_plaintext),
.ciphertext = TEST_DATA (tc1_ciphertext),
};
-/* *INDENT-ON* */
static u8 tc1_192_key[] = {
0x8e, 0x73, 0xb0, 0xf7, 0xda, 0x0e, 0x64, 0x52,
@@ -68,7 +66,6 @@ static u8 tc1_192_ciphertext[] = {
0x4f, 0x2b, 0x04, 0x59, 0xfe, 0x7e, 0x6e, 0x0b,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (nist_aes192_ctr_tc1) = {
.name = "CTR-AES192 TC1",
.alg = VNET_CRYPTO_ALG_AES_192_CTR,
@@ -77,7 +74,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (nist_aes192_ctr_tc1) = {
.plaintext = TEST_DATA (tc1_plaintext),
.ciphertext = TEST_DATA (tc1_192_ciphertext),
};
-/* *INDENT-ON* */
static u8 tc1_256_key[] = {
0x60, 0x3d, 0xeb, 0x10, 0x15, 0xca, 0x71, 0xbe,
@@ -96,7 +92,6 @@ static u8 tc1_256_ciphertext[] = {
0xb7, 0xa7, 0xf5, 0x04, 0xbb, 0xf3, 0xd2, 0x28,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (nist_aes256_ctr_tc1) = {
.name = "CTR-AES256 TC1",
.alg = VNET_CRYPTO_ALG_AES_256_CTR,
@@ -105,7 +100,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (nist_aes256_ctr_tc1) = {
.plaintext = TEST_DATA (tc1_plaintext),
.ciphertext = TEST_DATA (tc1_256_ciphertext),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/crypto/aes_gcm.c b/src/plugins/unittest/crypto/aes_gcm.c
index 3d1b221bf32..daa9ef11dcc 100644
--- a/src/plugins/unittest/crypto/aes_gcm.c
+++ b/src/plugins/unittest/crypto/aes_gcm.c
@@ -166,7 +166,6 @@ static u8 tc4_tag256[] = {
0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (aes_gcm128_tc1) = {
.name = "128-GCM Spec. TC1",
.alg = VNET_CRYPTO_ALG_AES_128_GCM,
@@ -318,7 +317,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (aes_gcm256_inc5) = {
.aad.length = 20,
.tag.length = 16,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/crypto/aes_gmac.c b/src/plugins/unittest/crypto/aes_gmac.c
new file mode 100644
index 00000000000..f58b8cdc389
--- /dev/null
+++ b/src/plugins/unittest/crypto/aes_gmac.c
@@ -0,0 +1,3029 @@
+/* Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+
+/* Test vectors published in NIST Cryptographic Algorithm Validation Program
+ * (CAVP)
+ * https://csrc.nist.gov/Projects/Cryptographic-Algorithm-Validation-Program/CAVP-TESTING-BLOCK-CIPHER-MODES#GCMVS
+ */
+
+#include <vppinfra/clib.h>
+#include <vnet/crypto/crypto.h>
+#include <unittest/crypto/crypto.h>
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc0) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x11\x75\x4c\xd7\x2a\xec\x30\x9b\xf5\x2f\x76\x87\x21\x2e\x89\x57"),
+ .iv = TEST_DATA_STR ("\x3c\x81\x9d\x9a\x9b\xed\x08\x76\x15\x03\x0b\x65"),
+ .tag = TEST_DATA_STR (
+ "\x25\x03\x27\xc6\x74\xaa\xf4\x77\xae\xf2\x67\x57\x48\xcf\x69\x71"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc1) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xca\x47\x24\x8a\xc0\xb6\xf8\x37\x2a\x97\xac\x43\x50\x83\x08\xed"),
+ .iv = TEST_DATA_STR ("\xff\xd2\xb5\x98\xfe\xab\xc9\x01\x92\x62\xd2\xbe"),
+ .tag = TEST_DATA_STR (
+ "\x60\xd2\x04\x04\xaf\x52\x7d\x24\x8d\x89\x3a\xe4\x95\x70\x7d\x1a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc2) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xdb\x1a\xd0\xbd\x1c\xf6\xdb\x0b\x5d\x86\xef\xdd\x89\x14\xb2\x18"),
+ .iv = TEST_DATA_STR ("\x36\xfa\xd6\xac\xb3\xc9\x8e\x01\x38\xae\xb9\xb1"),
+ .tag = TEST_DATA_STR (
+ "\x5e\xe2\xba\x73\x7d\x3f\x2a\x94\x4b\x33\x5a\x81\xf6\x65\x3c\xce"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc3) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x1c\x71\x35\xaf\x62\x7c\x04\xc3\x29\x57\xf3\x3f\x9a\xc0\x85\x90"),
+ .iv = TEST_DATA_STR ("\x35\x5c\x09\x4f\xa0\x9c\x8e\x92\x81\x17\x8d\x34"),
+ .tag = TEST_DATA_STR (
+ "\xb6\xab\x2c\x7d\x90\x6c\x9d\x9e\xc4\xc1\x49\x8d\x2c\xbb\x50\x29"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc4) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x6c\xa2\xc1\x12\x05\xa6\xe5\x5a\xb5\x04\xdb\xf3\x49\x1f\x8b\xdc"),
+ .iv = TEST_DATA_STR ("\xb1\x00\x8b\x65\x0a\x2f\xee\x64\x21\x75\xc6\x0d"),
+ .tag = TEST_DATA_STR (
+ "\x7a\x9a\x22\x5d\x5f\x9a\x0e\xbf\xe0\xe6\x9f\x37\x18\x71\xa6\x72"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc5) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x69\xf2\xca\x78\xbb\x56\x90\xac\xc6\x58\x73\x02\x62\x88\x28\xd5"),
+ .iv = TEST_DATA_STR ("\x70\x1d\xa2\x82\xcb\x6b\x60\x18\xda\xbd\x00\xd3"),
+ .tag = TEST_DATA_STR (
+ "\xab\x1d\x40\xdd\xa1\x79\x8d\x56\x68\x78\x92\xe2\x15\x9d\xec\xfd"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc6) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xdc\xf4\xe3\x39\xc4\x87\xb6\x79\x7a\xac\xa9\x31\x72\x5f\x7b\xbd"),
+ .iv = TEST_DATA_STR ("\x2c\x1d\x95\x5e\x35\x36\x67\x60\xea\xd8\x81\x7c"),
+ .tag = TEST_DATA_STR (
+ "\x32\xb5\x42\xc5\xf3\x44\xcc\xec\xeb\x46\x0a\x02\x93\x8d\x6b\x0c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc7) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x76\x58\xcd\xbb\x81\x57\x2a\x23\xa7\x8e\xe4\x59\x6f\x84\x4e\xe9"),
+ .iv = TEST_DATA_STR ("\x1c\x3b\xaa\xe9\xb9\x06\x59\x61\x84\x2c\xbe\x52"),
+ .tag = TEST_DATA_STR (
+ "\x70\xc7\x12\x3f\xc8\x19\xaa\x06\x0e\xd2\xd3\xc1\x59\xb6\xea\x41"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc8) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x28\x1a\x57\x0b\x1e\x8f\x26\x5e\xe0\x93\x03\xec\xae\x0c\xc4\x6d"),
+ .iv = TEST_DATA_STR ("\x8c\x29\x41\xf7\x3c\xf8\x71\x3a\xd5\xbc\x13\xdf"),
+ .tag = TEST_DATA_STR (
+ "\xa4\x2e\x5e\x5f\x6f\xb0\x0a\x9f\x12\x06\xb3\x02\xed\xbf\xd8\x7c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc9) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xcd\x33\x2a\x98\x6f\x82\xd9\x8c\x21\x52\x78\x13\x1a\xd3\x87\xb7"),
+ .iv = TEST_DATA_STR ("\x1d\x12\xb2\x59\xf4\x4b\x87\x3d\x39\x42\xbc\x11"),
+ .tag = TEST_DATA_STR (
+ "\x34\x23\x80\x23\x64\x81\x85\xd7\xef\x0c\xfc\xf5\x83\x6e\x93\xcc"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc10) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x80\xe1\xd9\x8d\x10\xb2\x72\x37\x38\x6f\x02\x91\x89\xec\x04\x48"),
+ .iv = TEST_DATA_STR ("\x23\x9e\xba\xb2\xf5\x24\xfd\x62\xc5\x54\xa1\x90"),
+ .tag = TEST_DATA_STR (
+ "\x4c\x0f\x29\xd9\x63\xf0\xed\x68\xdc\xcf\x34\x49\x6c\xf4\x3d\x00"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc11) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x40\x65\x0c\xdb\x61\xe3\xe1\x9a\x1a\x98\xfb\x4e\x05\x37\x7d\x35"),
+ .iv = TEST_DATA_STR ("\x69\xf0\xa8\x1a\xaf\x6b\xb8\x48\x62\x82\xf1\xb9"),
+ .tag = TEST_DATA_STR (
+ "\x26\x57\xe1\x2d\xec\x21\xc3\xec\xf0\x71\xaf\x61\x79\x52\x9f\xb4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc12) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x1e\x89\xa6\xcd\x75\x28\xcc\xe1\xe2\xb2\xb5\xf7\xfd\x2b\x6b\x52"),
+ .iv = TEST_DATA_STR ("\xe1\x1f\xd4\x27\xa7\x82\xd5\x43\xf7\x8e\xfc\x60"),
+ .tag = TEST_DATA_STR (
+ "\xee\xed\xff\x87\x4c\x8e\xde\xea\x53\xe8\xbe\x2a\x13\xaf\xd8\x1b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc13) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x2a\x7a\xd6\x14\x66\x76\x05\x7d\xb7\x77\xde\xa4\x68\x3d\x0d\x45"),
+ .iv = TEST_DATA_STR ("\xed\x72\x1e\xa6\x74\x56\xd4\x59\x4a\xaf\xbd\x51"),
+ .tag = TEST_DATA_STR (
+ "\xee\x3c\xab\x57\x78\x88\x84\x39\xd9\x0f\xa7\x18\xb7\x57\x38\xad"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc14) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xa3\x64\xf4\x94\xa4\xcd\x01\x47\xc3\x47\x31\x07\x4d\xc1\xa8\x5b"),
+ .iv = TEST_DATA_STR ("\x4a\xa8\x47\x0d\xd4\x04\xe4\x05\x4b\x30\x09\x3a"),
+ .tag = TEST_DATA_STR (
+ "\xd8\xa7\xbb\xa3\xa4\x51\x90\x2e\x3a\xdc\x01\x06\x0c\x3c\x91\xa7"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc0) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x77\xbe\x63\x70\x89\x71\xc4\xe2\x40\xd1\xcb\x79\xe8\xd7\x7f\xeb"),
+ .iv = TEST_DATA_STR ("\xe0\xe0\x0f\x19\xfe\xd7\xba\x01\x36\xa7\x97\xf3"),
+ .aad = TEST_DATA_STR (
+ "\x7a\x43\xec\x1d\x9c\x0a\x5a\x78\xa0\xb1\x65\x33\xa6\x21\x3c\xab"),
+ .tag = TEST_DATA_STR (
+ "\x20\x9f\xcc\x8d\x36\x75\xed\x93\x8e\x9c\x71\x66\x70\x9d\xd9\x46"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc1) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x76\x80\xc5\xd3\xca\x61\x54\x75\x8e\x51\x0f\x4d\x25\xb9\x88\x20"),
+ .iv = TEST_DATA_STR ("\xf8\xf1\x05\xf9\xc3\xdf\x49\x65\x78\x03\x21\xf8"),
+ .aad = TEST_DATA_STR (
+ "\xc9\x4c\x41\x01\x94\xc7\x65\xe3\xdc\xc7\x96\x43\x79\x75\x8e\xd3"),
+ .tag = TEST_DATA_STR (
+ "\x94\xdc\xa8\xed\xfc\xf9\x0b\xb7\x4b\x15\x3c\x8d\x48\xa1\x79\x30"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc2) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xa8\x2b\xb1\xed\xc7\xc0\x1a\x36\x89\x00\x6f\x34\xbf\xed\x78\x3e"),
+ .iv = TEST_DATA_STR ("\x96\x38\x36\xb6\x7b\x18\x8b\xec\xf9\xba\x14\x11"),
+ .aad = TEST_DATA_STR (
+ "\x9d\x11\x5b\xb9\xbb\xd1\x19\xfb\x77\x7b\x63\x16\x06\x5a\x9a\xc8"),
+ .tag = TEST_DATA_STR (
+ "\xc4\x91\x88\x9f\xa3\xec\xa4\x54\x4b\xa0\xd5\x1b\x8e\x0f\x38\x37"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc3) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xb9\x78\x2d\x0a\x59\x86\xc6\x3f\x35\x2d\x3b\xc4\xc7\xec\xc9\x6d"),
+ .iv = TEST_DATA_STR ("\x45\x41\xe1\x5b\x92\xed\xea\x44\xec\xeb\x1f\x2a"),
+ .aad = TEST_DATA_STR (
+ "\xf1\xa9\xf0\x72\x34\x29\xc5\xb2\x61\x85\xac\x3e\xa7\xe1\x3d\x7a"),
+ .tag = TEST_DATA_STR (
+ "\x74\xd0\xd3\x69\x49\xf0\x27\x66\x70\xf9\xdd\xc5\x79\xe9\x4f\x3a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc4) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x59\xb9\x57\x85\xb3\x0f\x20\x56\x79\xfc\x4f\x3f\x9a\x90\x10\x2f"),
+ .iv = TEST_DATA_STR ("\x19\x08\x78\x7c\xc1\xe1\x88\x0a\x6e\xf5\xdd\x17"),
+ .aad = TEST_DATA_STR (
+ "\x39\x85\x2d\x31\x82\x94\x4a\x51\x77\xdb\x27\x7b\x63\x91\x07\x02"),
+ .tag = TEST_DATA_STR (
+ "\x8f\x9a\x96\xc0\x13\x99\x24\x85\xb4\x3e\x2b\x62\x74\x5a\xd1\x73"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc5) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x34\xdd\x79\x26\xab\x13\xd4\x07\x81\x60\xd8\x7d\xe2\xe3\xc7\x24"),
+ .iv = TEST_DATA_STR ("\xc1\x1c\xcd\xaf\x79\x8a\xb0\x3a\xf2\xd9\x7e\xf9"),
+ .aad = TEST_DATA_STR (
+ "\xaf\x69\x87\x17\xa6\xd7\x90\xb3\xbf\xc3\x91\x95\x85\x7b\xb5\xff"),
+ .tag = TEST_DATA_STR (
+ "\x48\x11\x60\x50\xbb\xd9\x11\x82\x70\xd0\xbe\x25\x2d\x29\xd5\xd4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc6) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x8e\xc8\x6f\xab\x55\xaa\xab\x0e\x77\x45\x5e\x9c\xd3\xdb\xc7\x8e"),
+ .iv = TEST_DATA_STR ("\x15\xfd\x90\xa9\x86\x7e\x14\xf0\xd6\x3b\x53\xb9"),
+ .aad = TEST_DATA_STR (
+ "\xe7\x50\x9e\x27\x62\x09\xa6\xd3\xec\xfa\xbb\x53\xcc\xdc\xd2\x36"),
+ .tag = TEST_DATA_STR (
+ "\xd9\x6d\x6a\xc0\xd3\x09\xce\xbe\xde\xba\x2a\xf9\xf2\x62\x13\x2f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc7) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x66\xb2\x47\x3d\x9e\x01\x21\x66\x6d\x47\x63\x3f\x70\x08\xeb\x1c"),
+ .iv = TEST_DATA_STR ("\xc1\x71\x6c\x68\xa2\x4d\x57\x77\x0b\x86\x7e\x51"),
+ .aad = TEST_DATA_STR (
+ "\xc2\x0f\x68\x63\x17\xd6\x7e\x53\xdd\x79\xba\xe5\xc4\x6d\xc1\x11"),
+ .tag = TEST_DATA_STR (
+ "\x9a\x08\x61\x68\x09\xcf\x15\x24\x7d\xfe\xb9\x75\x6b\xa4\xf6\x09"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc8) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x5b\x26\x2a\x9d\x00\x90\x4d\x30\xa2\x58\x7c\xaa\xde\x09\x13\x81"),
+ .iv = TEST_DATA_STR ("\xf7\xbc\x15\x4c\xa5\x62\xe8\xf2\xc1\x84\x55\x98"),
+ .aad = TEST_DATA_STR (
+ "\x23\x11\x2d\x07\x8c\x99\x14\xfa\x3d\xfe\x52\x18\xcd\x19\x10\x16"),
+ .tag = TEST_DATA_STR (
+ "\x98\x85\x4d\x19\x3a\x06\xdb\xe3\x2c\xe4\x49\x7e\xec\x5c\x9a\x8b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc9) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x2e\x4f\xb9\xcc\x32\x01\x88\xa6\xf1\xfa\x89\xa7\xa2\x52\x27\x3a"),
+ .iv = TEST_DATA_STR ("\x7a\x6d\x4e\xe6\x9c\x72\x56\xc1\x4f\xba\x8f\x5e"),
+ .aad = TEST_DATA_STR (
+ "\x80\xba\x4a\x20\x2a\x68\xc3\x59\x0d\x65\x57\x91\x2c\x6f\x87\x8e"),
+ .tag = TEST_DATA_STR (
+ "\x92\x80\x31\x32\x73\xbe\xfb\x8a\xfa\x0b\xce\xca\x5a\x96\x6d\x85"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc10) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x5e\xa9\x49\x73\xd8\x61\x6d\xaf\xa7\xf3\x1d\xb0\x71\x6d\x17\x29"),
+ .iv = TEST_DATA_STR ("\xa0\x5b\x62\x66\x9d\x25\x0e\x61\xb0\x77\xd2\x8a"),
+ .aad = TEST_DATA_STR (
+ "\x96\x20\xba\xf2\xf5\x8d\x01\x3f\x8a\x4c\x48\x71\x98\x9c\x1b\x17"),
+ .tag = TEST_DATA_STR (
+ "\x7e\x55\x03\x98\xde\xe7\x28\x25\x6d\x69\x28\xcd\xaa\xc4\x3b\x73"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc11) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x91\x03\x85\xf6\xf0\x7f\x9e\x57\xe4\x83\xc4\x7d\xd5\x20\x6b\xcc"),
+ .iv = TEST_DATA_STR ("\x51\x8f\x56\xe3\x36\x58\xdf\x31\x1d\x42\xd9\xfe"),
+ .aad = TEST_DATA_STR (
+ "\x5d\x15\x79\x09\xa2\xa4\x60\x71\x17\xe7\x7d\xa0\xe4\x49\x3b\x88"),
+ .tag = TEST_DATA_STR (
+ "\xa7\x04\x1e\xa4\xa1\xd7\x4d\x9e\x66\xb9\x57\x1b\x59\xb6\xa1\xd8"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc12) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xca\xb3\xaf\x7a\x15\xb4\x30\xe0\x34\xe7\x93\xbb\x30\xdb\x8a\xb2"),
+ .iv = TEST_DATA_STR ("\x96\x3a\x56\xe2\xe1\x2f\x38\x70\x62\xe1\x84\x98"),
+ .aad = TEST_DATA_STR (
+ "\xa0\x94\xa1\xdd\x11\x21\xd3\xaa\x52\xc8\x1e\x8f\x10\xbf\x9f\x0c"),
+ .tag = TEST_DATA_STR (
+ "\x1a\x31\xd2\x95\x60\x1e\xb3\xc8\x2a\x54\xb2\x34\x98\x4f\xfd\xf5"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc13) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x89\xc9\x49\xe9\xc8\x04\xaf\x01\x4d\x56\x04\xb3\x94\x59\xf2\xc8"),
+ .iv = TEST_DATA_STR ("\xd1\xb1\x04\xc8\x15\xbf\x1e\x94\xe2\x8c\x8f\x16"),
+ .aad = TEST_DATA_STR (
+ "\x82\xad\xcd\x63\x8d\x3f\xa9\xd9\xf3\xe8\x41\x00\xd6\x1e\x07\x77"),
+ .tag = TEST_DATA_STR (
+ "\x88\xdb\x9d\x62\x17\x2e\xd0\x43\xaa\x10\xf1\x6d\x22\x7d\xc4\x1b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc14) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xa4\xd9\x94\xc4\xac\x5a\xc0\xf0\x29\x13\x24\x57\x14\xfb\xe2\x35"),
+ .iv = TEST_DATA_STR ("\xa9\x47\x2d\xad\xcc\xa8\xd7\xe0\xe3\xb8\x08\x4d"),
+ .aad = TEST_DATA_STR (
+ "\xeb\x31\x8b\x9e\x17\x57\x52\x03\xdd\x29\xeb\xed\x20\xec\x82\xf9"),
+ .tag = TEST_DATA_STR (
+ "\x32\x3d\xf7\xf3\x36\x94\x10\x6f\x56\x73\x9d\xe0\x97\x32\x16\xa3"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc0) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x2f\xb4\x5e\x5b\x8f\x99\x3a\x2b\xfe\xbc\x4b\x15\xb5\x33\xe0\xb4"),
+ .iv = TEST_DATA_STR ("\x5b\x05\x75\x5f\x98\x4d\x2b\x90\xf9\x4b\x80\x27"),
+ .aad = TEST_DATA_STR ("\xe8\x54\x91\xb2\x20\x2c\xaf\x1d\x7d\xce\x03\xb9\x7e"
+ "\x09\x33\x1c\x32\x47\x39\x41"),
+ .tag = TEST_DATA_STR (
+ "\xc7\x5b\x78\x32\xb2\xa2\xd9\xbd\x82\x74\x12\xb6\xef\x57\x69\xdb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc1) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x95\x21\x17\x04\x8f\x77\xe2\x76\xc2\xef\x65\x80\x53\x7c\x14\x03"),
+ .iv = TEST_DATA_STR ("\x07\x0b\x8f\xb4\x6a\x7a\xd5\x28\x85\xbe\x1b\x26"),
+ .aad = TEST_DATA_STR ("\x34\xb0\x88\xf9\x82\x81\x8b\x5f\x07\xda\xbe\x2b\x62"
+ "\xf9\x54\x7f\x4e\xd0\x99\x12"),
+ .tag = TEST_DATA_STR (
+ "\xbe\xdd\x4c\xf3\x0f\xd7\xa4\xab\xc4\x9b\xdc\xc3\xf3\xb2\x48\xb1"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc2) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x7f\x64\x53\xb3\x9b\xde\x01\x85\x60\xa1\x6a\x27\x04\x21\x75\x43"),
+ .iv = TEST_DATA_STR ("\x0f\x3e\xec\xf4\x8d\x68\x35\x32\x26\xa7\x7f\xe4"),
+ .aad = TEST_DATA_STR ("\x11\xe4\xec\xb2\x56\xeb\xff\x56\x45\x3f\xa2\xe7\x5e"
+ "\x43\xeb\x9d\x64\x10\x49\xe6"),
+ .tag = TEST_DATA_STR (
+ "\xb5\x12\x62\x3a\x12\xd5\x49\x2b\x7d\x76\xd3\x9b\xe0\xdf\x57\x77"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc3) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x93\x32\xe4\x33\xbf\x61\x00\xc6\xcc\x23\xb0\x87\x10\x62\x7c\x40"),
+ .iv = TEST_DATA_STR ("\xaa\xb3\xdb\x30\x15\xb2\x9d\x24\xf3\x29\xbe\xb4"),
+ .aad = TEST_DATA_STR ("\xbd\x84\x3a\x08\xf0\xa8\x22\xf8\xf4\xf7\x6c\x36\x48"
+ "\x38\x0a\xab\x76\x22\xe7\x19"),
+ .tag = TEST_DATA_STR (
+ "\xe5\x4f\x1d\x18\xc6\x1d\x8b\xe1\x54\x84\x72\x76\x05\xb5\xa5\xdc"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc4) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x57\x73\x75\x0a\x49\x30\x96\xa9\x9d\x84\xc0\x56\x3f\xc2\x93\xe9"),
+ .iv = TEST_DATA_STR ("\xc3\x90\xed\x70\xdc\x94\x97\x23\x44\x13\xad\x52"),
+ .aad = TEST_DATA_STR ("\x60\x12\x51\x72\x58\x71\x6c\x1f\x00\x35\xef\xa6\x0a"
+ "\x0f\x36\xb5\xc6\x5e\x73\x79"),
+ .tag = TEST_DATA_STR (
+ "\xb0\x11\xb2\x64\x61\x0e\x58\x08\x27\x05\x47\x6f\x04\x0b\x8c\x86"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc5) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x41\xb0\xd0\xfc\xe5\xd3\x13\x59\xcf\xd5\xdb\x40\x64\xe2\xd4\x6b"),
+ .iv = TEST_DATA_STR ("\xb9\x03\xe9\xd0\xce\xa2\x57\x95\xa8\x2e\x73\xe3"),
+ .aad = TEST_DATA_STR ("\x4c\xba\x50\x18\x76\xf3\x3e\x1f\xda\x9c\xd4\x56\xe3"
+ "\x18\x06\x83\xe3\x86\x3b\xd9"),
+ .tag = TEST_DATA_STR (
+ "\x18\xbc\x39\xd0\xb9\x5c\xf0\x59\xcd\x8c\x25\x00\x4f\x5e\x50\x7c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc6) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x47\x48\xb7\x82\xe3\xfe\x5e\x4e\xff\xeb\x7c\x67\x23\x2d\x2b\x07"),
+ .iv = TEST_DATA_STR ("\xc5\xe4\xdc\xf1\x8f\x86\x07\x6b\x88\xa5\xd5\xe9"),
+ .aad = TEST_DATA_STR ("\x3b\x2f\xca\xd8\x73\x9e\xd8\x7e\x1d\x02\xe8\x08\x45"
+ "\xf1\x20\xe2\x49\xea\x92\xb1"),
+ .tag = TEST_DATA_STR (
+ "\xb8\xae\x71\x8e\x28\x79\xc9\xcb\x65\x8d\x5d\x11\x22\xe6\x9b\xb7"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc7) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xe3\x0c\xc2\x20\x77\xd5\x95\x12\x16\xd0\x7f\x37\xc5\x1b\x58\xf9"),
+ .iv = TEST_DATA_STR ("\xfc\x58\x3a\xd1\x59\xb5\x2e\x0b\x63\x78\x15\x7e"),
+ .aad = TEST_DATA_STR ("\xc3\xcb\x7b\xe8\x88\x8e\xf4\x4c\xa5\xaa\x93\xdd\xe2"
+ "\x6d\x27\x51\x28\x8e\x1f\x5a"),
+ .tag = TEST_DATA_STR (
+ "\xa8\xce\x25\xb5\xdc\x8f\x84\xe2\xf5\xda\xe5\xf0\x85\xaa\xcc\xd4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc8) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x7c\x8b\x10\xba\x75\xee\x6a\xb4\xa9\x97\xd3\xf5\x98\xb7\x9d\x40"),
+ .iv = TEST_DATA_STR ("\x6f\xb5\x51\x88\xdd\xf0\x0d\xde\x09\x59\x65\x87"),
+ .aad = TEST_DATA_STR ("\x2d\xdc\x0a\xcf\x97\x05\xf8\xd1\x8f\x90\x5b\x8f\x9d"
+ "\x47\x2e\x7d\xbf\x6b\x91\xe3"),
+ .tag = TEST_DATA_STR (
+ "\x57\x91\xd3\x80\x51\x09\xc5\xe1\x8a\xdf\xf4\xe8\x09\x06\xa0\x18"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc9) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x72\xc7\xdb\x6c\xa2\x9f\x83\x64\x1c\x3f\xff\x5b\x71\xc4\xbc\x30"),
+ .iv = TEST_DATA_STR ("\xf2\x00\x07\x42\xe2\x49\xac\x56\xd5\xb2\xf6\x5f"),
+ .aad = TEST_DATA_STR ("\xcd\x99\x4d\x2d\x08\x23\x27\x70\x92\x7d\x85\x4e\xf2"
+ "\xb6\xca\x2f\x08\x73\x70\xcf"),
+ .tag = TEST_DATA_STR (
+ "\xa5\x96\x6d\xf3\x9f\xee\xba\x03\x36\xf0\xb9\xa3\xf4\xff\xe6\xc3"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc10) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x28\x33\xcc\x10\x19\x50\x30\xe4\xa1\x15\x55\x32\x66\x6c\xb0\x49"),
+ .iv = TEST_DATA_STR ("\xad\x80\x2b\x9a\x5c\x94\x09\xfa\x3e\x7d\xcf\xcc"),
+ .aad = TEST_DATA_STR ("\xb3\xec\xbe\xa2\x79\x7d\x00\x6c\x07\xb8\xce\x62\x1b"
+ "\xe3\xb0\xec\xcd\x37\xc3\xec"),
+ .tag = TEST_DATA_STR (
+ "\x81\xde\xab\x8b\xde\xe0\xd3\x91\x49\x5e\xed\x40\x29\xa6\xd2\x05"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc11) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xd8\x98\x5b\xb5\xac\x02\x58\xad\xad\x86\x66\x0e\xbb\xc6\xd1\x9f"),
+ .iv = TEST_DATA_STR ("\xb5\xee\x26\xf8\xc4\x63\xbb\xfc\x27\x11\x5b\x0a"),
+ .aad = TEST_DATA_STR ("\x61\x3f\x51\xf8\x32\xfb\xf4\x34\xb8\xe3\xfe\x94\x54"
+ "\xae\x46\xa8\x62\xd8\x31\xf0"),
+ .tag = TEST_DATA_STR (
+ "\xfe\x9f\x0b\x1b\xdc\x68\xde\xe6\xe8\xdc\x2c\xe1\x26\x65\xd3\x36"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc12) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x9b\x8f\x69\x24\xdc\x22\xf1\x07\x3c\x1a\x38\x44\x8a\x2f\x04\x47"),
+ .iv = TEST_DATA_STR ("\x09\xcd\xab\xf8\x7d\x82\x82\x8e\xca\x1c\x0c\x7f"),
+ .aad = TEST_DATA_STR ("\x69\x21\x0e\x4e\x0a\x1c\xfd\x50\x38\x75\x66\x52\x79"
+ "\x0b\x9a\x8c\xfb\xbd\x94\x3d"),
+ .tag = TEST_DATA_STR (
+ "\xa6\x0c\x10\x4a\x6f\xb4\x63\x84\x27\xa8\x8a\x86\xc0\x49\x23\xbd"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc13) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x72\x13\x22\x13\xd5\xd9\x53\x09\xbf\x7e\x10\xf8\x31\x8d\x7c\x20"),
+ .iv = TEST_DATA_STR ("\xfb\x90\xbf\x28\x3c\x54\x11\x23\x03\x55\xd7\xa1"),
+ .aad = TEST_DATA_STR ("\xa3\x0b\xb1\x7c\x80\x89\xc6\xf5\xf6\x1b\x25\x0a\x94"
+ "\xcb\xbb\xfd\xf5\xf2\xa3\xe6"),
+ .tag = TEST_DATA_STR (
+ "\x09\x19\x1a\xf4\x18\x94\x9f\xe6\xbe\x8d\xbf\x13\xe0\x06\x52\x7a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc14) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x65\x2f\xfb\xad\x4e\x1f\xcb\xe7\x55\x64\x39\x5e\x6c\x1c\x39\x24"),
+ .iv = TEST_DATA_STR ("\x11\x13\x49\x63\x6d\x10\x6f\xd5\xf6\xa1\xe0\x88"),
+ .aad = TEST_DATA_STR ("\x5f\x52\xaa\x85\xdc\x3a\xc0\x42\x64\x7e\x32\xad\xa0"
+ "\x50\xd6\x7e\x59\xb5\x19\xaa"),
+ .tag = TEST_DATA_STR (
+ "\x28\xd9\x80\xd7\xbf\xd8\x78\xc2\x27\xc1\x40\xde\x34\x82\x76\x5b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc0) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x99\xe3\xe8\x79\x3e\x68\x6e\x57\x1d\x82\x85\xc5\x64\xf7\x5e\x2b"),
+ .iv = TEST_DATA_STR ("\xc2\xdd\x0a\xb8\x68\xda\x6a\xa8\xad\x9c\x0d\x23"),
+ .aad = TEST_DATA_STR (
+ "\xb6\x68\xe4\x2d\x4e\x44\x4c\xa8\xb2\x3c\xfd\xd9\x5a\x9f\xed\xd5\x17\x8a"
+ "\xa5\x21\x14\x48\x90\xb0\x93\x73\x3c\xf5\xcf\x22\x52\x6c\x59\x17\xee\x47"
+ "\x65\x41\x80\x9a\xc6\x86\x7a\x8c\x39\x93\x09\xfc"),
+ .tag = TEST_DATA_STR (
+ "\x3f\x4f\xba\x10\x0e\xaf\x1f\x34\xb0\xba\xad\xaa\xe9\x99\x5d\x85"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc1) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xf8\xe2\x9e\xfd\x00\xa4\x23\xc4\xea\x94\x56\x86\x3f\x83\xc5\x4f"),
+ .iv = TEST_DATA_STR ("\x2d\x3c\xf6\x7c\xbc\xe6\x9d\x63\x9b\xd1\xc0\x92"),
+ .aad = TEST_DATA_STR (
+ "\x02\xc7\x0f\xc8\xa2\x54\x46\x19\xc1\xc3\xe9\xfc\xe6\xb3\xc6\xc3\xbc\x24"
+ "\x64\x3e\x0f\x14\x0e\x6b\x48\xac\x50\x5e\xa6\x66\xcd\x9a\x20\x10\xc3\xa8"
+ "\xe2\xf5\xf1\x04\x37\x88\x7f\xe8\x03\xb5\x4d\xb3"),
+ .tag = TEST_DATA_STR (
+ "\x96\x3c\xb5\x0a\xca\x3e\x09\xdd\x0d\x9a\x01\x3c\x87\x34\x15\x5f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc2) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x00\xe3\x49\x1d\xfc\xf3\xbe\xc3\x9c\x89\xcc\xfd\x80\xa5\xa8\x96"),
+ .iv = TEST_DATA_STR ("\x29\xf6\xff\x4e\xdc\x4a\xc3\xe9\x7f\xfb\x16\x80"),
+ .aad = TEST_DATA_STR (
+ "\x73\x81\x33\x51\xb3\x9f\x5e\x40\x00\xa9\xee\x8d\x2b\x85\xf1\x31\x63\x4a"
+ "\xca\xed\xe0\xdd\x25\xd6\x91\xa2\xb8\x29\xad\x4f\xe9\xea\x69\x9f\x12\x24"
+ "\x25\x19\x84\x7c\xb0\x83\xb0\xb4\xd3\xd8\xb3\xbc"),
+ .tag = TEST_DATA_STR (
+ "\x01\xb2\xe9\xba\x71\x9a\xd7\x7c\x75\x3b\x36\x4e\xcc\x5a\xab\xeb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc3) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x0a\xd0\x6f\x4c\x19\xaf\x1d\x5f\x60\x2b\x38\xf8\x6e\x56\x29\x1c"),
+ .iv = TEST_DATA_STR ("\x0b\x23\x5c\x6a\x75\xce\xcd\xfc\xba\x90\x01\xce"),
+ .aad = TEST_DATA_STR (
+ "\x7d\x4f\x26\xf7\x89\x5b\x2e\xf3\xda\x2e\x4f\x93\xe4\x11\xcd\xb7\x40\x25"
+ "\xc7\x75\x9c\x03\x8d\x87\x23\x44\xa4\x5c\xe5\x6d\x92\xa5\x81\x86\x2c\x3b"
+ "\xac\xe0\x39\x09\x0a\x2c\xcf\xa4\x3b\x62\x3d\xcb"),
+ .tag = TEST_DATA_STR (
+ "\xb4\xbc\x9c\xe1\x47\x5d\x0c\x93\xdf\xd5\xa5\xd8\xd4\x5b\xd8\xe5"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc4) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xee\xeb\x33\xe0\xc8\xa4\x06\xea\x23\x6a\x07\x5c\xdb\xe9\xd6\xf9"),
+ .iv = TEST_DATA_STR ("\xb9\x35\xe8\xee\xd6\x62\x27\x83\x6e\xde\x18\x9a"),
+ .aad = TEST_DATA_STR (
+ "\x9a\x42\x91\xac\xb9\x92\x4b\xba\x42\x41\xb0\xc9\xc3\xc2\xe1\x26\x2b\x25"
+ "\xa7\xc7\xf0\x2c\x92\xad\xea\xdf\x92\x25\x4d\x61\x8a\xb5\x93\x88\xaa\x30"
+ "\xb4\x7e\xaf\xa5\x88\x99\xc3\x57\xcf\x28\x1e\x31"),
+ .tag = TEST_DATA_STR (
+ "\x14\x3d\x69\x54\xeb\x6f\xe7\x0a\xff\x70\xda\x97\x8c\xcd\x45\x09"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc5) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x60\x0b\x54\x42\xa0\xb5\x50\xa3\x8f\x85\xd2\xfb\x0a\xcc\x9c\x96"),
+ .iv = TEST_DATA_STR ("\x5e\x65\xdd\x6e\x8b\x20\xd6\xb2\x93\x1f\xe6\xc2"),
+ .aad = TEST_DATA_STR (
+ "\x46\x1e\x54\xa0\x92\xf8\x39\x24\x66\x84\x9f\xb0\x37\x0a\xe3\x0c\x14\xc1"
+ "\xbf\x39\x87\xab\x2e\xbb\xe9\x8e\x18\xd1\x3f\x04\x1d\x09\xd0\x43\xf7\xae"
+ "\xa7\x8b\xfc\xc4\x2f\x86\x4a\x9f\xb4\x0f\x00\x31"),
+ .tag = TEST_DATA_STR (
+ "\x2c\xd6\x26\xf9\xa0\x68\x63\x00\xcf\x23\xc0\xbc\x59\x7c\x63\xb4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc6) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xce\x8d\x11\x03\x10\x0f\xa2\x90\xf9\x53\xfb\xb4\x39\xef\xde\xe4"),
+ .iv = TEST_DATA_STR ("\x48\x74\xc6\xf8\x08\x23\x66\xfc\x7e\x49\xb9\x33"),
+ .aad = TEST_DATA_STR (
+ "\xd6\x9d\x03\x3c\x32\x02\x97\x89\x26\x3c\x68\x9e\x11\xff\x7e\x9e\x8e\xef"
+ "\xc4\x8d\xdb\xc4\xe1\x0e\xea\xe1\xc9\xed\xbb\x44\xf0\x4e\x7c\xc6\x47\x15"
+ "\x01\xea\xdd\xa3\x94\x0a\xb4\x33\xd0\xa8\xc2\x10"),
+ .tag = TEST_DATA_STR (
+ "\xa5\x96\x4b\x77\xaf\x0b\x8a\xec\xd8\x44\xd6\xad\xec\x8b\x7b\x1c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc7) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xae\x71\x14\xc0\x9f\xfa\x04\x29\x88\x34\x41\x2f\x6a\x8d\xe4\x53"),
+ .iv = TEST_DATA_STR ("\xf3\x80\xc2\xd8\x60\xbe\x2a\xf4\x1e\x1b\xe5\xc6"),
+ .aad = TEST_DATA_STR (
+ "\x7e\x16\x08\x2f\x68\x9c\x63\xe8\xad\xdd\xd5\xcb\x2d\xa6\x10\xbb\xfb\x88"
+ "\xd0\x73\xcf\x8b\x20\x43\x84\xa9\x37\xaa\xb0\x37\x65\x23\xa5\x0d\x3d\x5f"
+ "\x13\x92\x97\x8f\x79\x60\x9f\x12\xdf\x8f\xc2\x88"),
+ .tag = TEST_DATA_STR (
+ "\x40\xd3\xa3\x63\x58\xa6\xf6\xca\xaa\x6a\xf9\x2c\xfd\x87\x4a\x22"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc8) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xd8\xf5\x20\xb6\xf3\xcf\x6b\x83\x5c\xe4\xcc\xe4\x8f\x4c\xb0\x33"),
+ .iv = TEST_DATA_STR ("\x01\x9a\x55\xc9\x86\x15\xc0\x22\xaf\xff\x96\x44"),
+ .aad = TEST_DATA_STR (
+ "\xc3\xfb\x51\x8d\xdb\x2d\x73\x41\x7e\x24\x33\x59\xa0\xed\x8c\x12\x67\x50"
+ "\xeb\x16\x3e\x7b\xd8\x45\x63\x71\x59\x39\x70\x75\xe3\xdb\x1d\xb7\x2f\xe2"
+ "\xf0\xe1\x3b\x59\x9c\x33\x3c\x47\x3f\xeb\x22\x45"),
+ .tag = TEST_DATA_STR (
+ "\x46\x7c\xfa\xd5\xaf\x11\x85\x2d\x6e\xca\x28\x9c\x86\xf9\x67\xad"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc9) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x13\xba\x95\x60\x6b\x01\xaf\x03\x5b\xf9\x61\xe3\x98\x52\xe3\x4b"),
+ .iv = TEST_DATA_STR ("\x9e\xc9\xcf\x3b\x00\x2c\xfe\xd9\xe7\x61\x93\x4f"),
+ .aad = TEST_DATA_STR (
+ "\xbb\x9d\xe5\x63\x83\x6d\x1f\x1b\x1d\xe9\x64\x51\x4e\xce\xbb\x8a\xd1\x05"
+ "\x01\xdb\x56\x22\x80\xb7\xbd\x98\x80\x48\x14\x73\x58\x17\x90\x8b\x28\x56"
+ "\xca\xfa\xde\xcd\x40\xb0\x48\x32\xfb\xde\x2b\xfb"),
+ .tag = TEST_DATA_STR (
+ "\x17\x2a\x3b\xcb\xc5\x00\x1d\xfd\x38\x15\x17\x5a\x88\xf7\x05\x6c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc10) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x1c\x97\xda\x5f\xc5\xa9\x64\x0f\x28\x96\x22\x84\x24\x08\xcb\xa2"),
+ .iv = TEST_DATA_STR ("\x6d\x76\x5a\x98\x8e\x93\x45\x88\x16\x3e\x29\xb7"),
+ .aad = TEST_DATA_STR (
+ "\x10\x26\xa5\x90\x81\x6d\x2e\x1a\xa6\x7a\xa0\xd1\x3d\x50\xa8\x41\x3a\xf4"
+ "\xd8\xee\x9b\x1f\xa5\xce\xb8\xde\xac\xc9\xf4\x1e\x8e\x76\x4b\x3a\xc1\x5f"
+ "\x98\x29\x5e\x88\x00\xad\xf6\xa7\x17\x54\x48\xcd"),
+ .tag = TEST_DATA_STR (
+ "\x49\x45\xa7\x9d\x5e\xdb\xb9\x34\xc5\xcf\x94\x39\x5c\x35\x9d\xeb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc11) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x8d\xd4\x6f\x27\x1a\x20\x1c\xc2\x1c\xa0\x82\x32\x48\x15\x7e\x6b"),
+ .iv = TEST_DATA_STR ("\x18\x21\xb3\x10\xce\x2d\xba\x99\x9c\xdf\x75\x76"),
+ .aad = TEST_DATA_STR (
+ "\x34\xba\x40\x99\x97\xce\xba\x06\x5f\x4a\x54\x57\x07\x8a\x9e\x23\x2a\x84"
+ "\xf5\x94\x01\x1a\xec\xfd\xbf\xbd\x24\xa8\x02\xca\x12\x9e\x01\xcb\x13\x27"
+ "\xe2\x65\xb4\xa9\x00\x4f\xb4\xc5\x00\x3f\xff\xd3"),
+ .tag = TEST_DATA_STR (
+ "\x30\x4c\xc2\xcd\x2f\xcd\xd4\xab\xc8\x44\xbc\x9c\x1c\xbe\x02\x41"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc12) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x0c\x54\x5d\x95\x33\x3b\x6a\xcf\x8b\x29\x28\xf3\xef\xd0\x83\xde"),
+ .iv = TEST_DATA_STR ("\x31\xde\x89\xd0\x7e\x75\x77\x95\x6f\xa9\x5e\xf3"),
+ .aad = TEST_DATA_STR (
+ "\x55\x74\xd6\x5f\x5a\xff\xfb\x2d\x31\xcc\xa8\xf5\x8c\xf5\x94\x5b\x83\x55"
+ "\x3c\xd4\x5d\x2d\xba\x0e\x05\xfa\x54\xe4\x2a\xa3\xf5\xa0\x51\xe1\x62\x4d"
+ "\xe1\x6d\x4b\x93\xcb\xab\x79\x88\xc6\xd9\x5f\x8c"),
+ .tag = TEST_DATA_STR (
+ "\x4e\xd9\x1c\xfe\x90\xa4\x99\x00\xe0\x56\x56\x97\xbc\x82\xb6\x59"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc13) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x79\x0b\x39\xf3\x01\x38\x3a\x82\xb3\x77\xf5\x85\xd3\xbf\x0f\x26"),
+ .iv = TEST_DATA_STR ("\x2f\xd9\xc1\x42\xb5\xfc\x62\xe8\x7e\xff\xf1\xfd"),
+ .aad = TEST_DATA_STR (
+ "\x45\x63\x4e\x0a\xfc\x59\xae\x9f\x6e\x30\xf7\xf5\xfe\x43\xcf\x5a\x4e\x1f"
+ "\x78\xd0\xae\xbb\x9e\x5a\x7a\xd9\xd8\x6f\x25\x27\x8e\x52\x1f\x48\x45\xd4"
+ "\x9d\x6c\xb5\x33\xca\xc6\x43\x98\x39\x64\x7f\xd0"),
+ .tag = TEST_DATA_STR (
+ "\x69\x63\x7c\x3f\x92\x33\xda\x23\xf8\xdf\x7b\x09\xe8\xcf\xb2\x52"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc14) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x8f\x63\x65\x26\x32\xd0\x7b\x2a\x4a\x83\xc2\x6d\xed\xd3\x26\x57"),
+ .iv = TEST_DATA_STR ("\x74\x7b\xee\x0e\x1d\x46\x2a\x90\x16\xf1\x46\x8d"),
+ .aad = TEST_DATA_STR (
+ "\x9c\x00\xff\x96\x9b\x55\xa4\x97\xdc\x52\x3f\xa0\xce\xda\xa3\x39\xdc\x3c"
+ "\x6c\xe1\x8e\x61\xc7\xbf\x80\x0c\x36\x12\x01\x35\x1b\xc4\x97\x28\xc3\xbb"
+ "\x15\x06\x7e\x90\x61\x62\xee\x79\x1b\x8d\x33\x3a"),
+ .tag = TEST_DATA_STR (
+ "\xbd\x5a\x0c\xbf\x85\x9a\x61\x33\xa7\xf2\xd5\x04\xd9\x7c\xae\x05"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc0) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x20\xb5\xb6\xb8\x54\xe1\x87\xb0\x58\xa8\x4d\x57\xbc\x15\x38\xb6"),
+ .iv = TEST_DATA_STR ("\x94\xc1\x93\x5a\xfc\x06\x1c\xbf\x25\x4b\x93\x6f"),
+ .aad = TEST_DATA_STR (
+ "\xca\x41\x8e\x71\xdb\xf8\x10\x03\x81\x74\xea\xa3\x71\x9b\x3f\xcb\x80\x53"
+ "\x1c\x71\x10\xad\x91\x92\xd1\x05\xee\xaa\xfa\x15\xb8\x19\xac\x00\x56\x68"
+ "\x75\x2b\x34\x4e\xd1\xb2\x2f\xaf\x77\x04\x8b\xaf\x03\xdb\xdd\xb3\xb4\x7d"
+ "\x6b\x00\xe9\x5c\x4f\x00\x5e\x0c\xc9\xb7\x62\x7c\xca\xfd\x3f\x21\xb3\x31"
+ "\x2a\xa8\xd9\x1d\x3f\xa0\x89\x3f\xe5\xbf\xf7\xd4\x4c\xa4\x6f\x23\xaf"
+ "\xe0"),
+ .tag = TEST_DATA_STR (
+ "\xb3\x72\x86\xeb\xaf\x4a\x54\xe0\xff\xc2\xa1\xde\xaf\xc9\xf6\xdb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc1) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x7a\xa5\x31\x88\xa9\xc5\x97\x12\x6a\x10\xd2\x48\x60\x3e\xbb\x62"),
+ .iv = TEST_DATA_STR ("\xaa\x45\xca\x5d\xac\x41\xa8\x25\xc4\x5d\x36\xbf"),
+ .aad = TEST_DATA_STR (
+ "\x41\x7f\xd5\x14\x7d\x56\xde\x0c\x74\x32\x95\x97\x82\x4e\xc2\x78\x8a\x34"
+ "\x4f\xb6\x0b\x40\x3e\xdf\x01\x87\xaf\xa1\x2e\x72\xa0\x50\x09\xbb\x70\xf8"
+ "\x3c\xca\xd1\x1e\xfa\x48\x7c\x19\x65\xcf\x84\xfe\xac\x06\x7c\x1f\xfd\xbf"
+ "\x53\x1f\xca\x97\xc5\x54\xf8\x75\xc4\xa1\xa1\xd3\xab\x3c\x53\xc8\xa7\x4e"
+ "\xf3\xee\x94\x15\xa8\x7e\x23\x16\x99\xc8\x2d\x76\x4d\xeb\xed\xa1\x81"
+ "\x32"),
+ .tag = TEST_DATA_STR (
+ "\x99\x7b\xf8\x46\x54\xbb\x96\x16\xc0\xcc\x9b\x45\xf8\x2c\x76\x73"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc2) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x72\xb5\x84\x8e\xd1\xd2\xba\xdb\xd4\x27\xe1\x6f\xc3\xb3\xe4\x4d"),
+ .iv = TEST_DATA_STR ("\xa8\x4c\x7e\x92\x8d\xc6\xe6\x37\x9a\x51\x3a\x20"),
+ .aad = TEST_DATA_STR (
+ "\x1c\x0d\xfc\xec\xbd\x7b\xb0\xe6\x80\xce\x04\x2d\x08\xb2\xd9\xa7\x41\x26"
+ "\x7b\xd1\xda\x76\x8d\xf2\xba\x08\x37\x92\x33\xa9\x97\x3f\x14\x92\x8e\x9d"
+ "\xa6\x35\x37\x68\xb9\xb2\x60\x1c\x03\x3f\xd9\x64\xb1\x6a\x16\xda\xaa\x3e"
+ "\xa3\x5a\xd7\xce\xf7\xe3\x1e\xb1\xf7\x34\x0a\xa3\x4e\x8b\xfc\x08\xb0\xa6"
+ "\xe6\x20\x52\x92\x57\x0c\xed\x43\x31\x68\x76\xd0\xd4\x99\xd9\x19\x2e"
+ "\x6b"),
+ .tag = TEST_DATA_STR (
+ "\x27\x0c\xd7\x86\xb9\x5e\x68\x20\xcd\xb6\x5a\x23\x1b\x75\x30\xed"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc3) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x6d\x05\x12\xeb\xf2\xe7\x3d\x63\xf4\x28\x49\xc5\x7f\x07\x3f\xd0"),
+ .iv = TEST_DATA_STR ("\xc1\xc4\x69\x27\xc7\x4c\x03\xf1\x93\x42\xc3\x3a"),
+ .aad = TEST_DATA_STR (
+ "\x28\xbf\x89\x03\xb2\xdf\xb7\xe6\x9f\x1a\x73\x51\x21\xc7\xef\xe9\xa4\xc4"
+ "\x2b\x6a\x29\x53\x27\xbc\xeb\x02\x46\xc8\x5d\x78\x2c\xe6\x2b\xf0\x75\xdb"
+ "\xdf\x6e\x8e\xc6\x58\x9c\x26\xd3\x06\x96\xcc\xce\xef\x03\x87\x0b\xd0\xab"
+ "\xfd\x26\xd3\x06\x00\xea\xfc\x65\x61\x37\x40\xb5\x4d\x77\x7d\x37\x9e\x8a"
+ "\xac\xf2\x41\xec\xfb\xa1\x1b\x06\x01\x86\xac\x06\x5d\xb1\x71\xaa\xb0"
+ "\x99"),
+ .tag = TEST_DATA_STR (
+ "\xa6\x86\xf5\x94\x1c\xeb\x51\x0e\x12\x6a\x63\x16\xe3\x40\x4d\xc0"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc4) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x64\x38\xbc\x79\x52\x0d\xef\x5d\xb5\x8e\x49\x63\x97\x74\x68\x7a"),
+ .iv = TEST_DATA_STR ("\xd6\x82\xb4\x74\x18\xce\xb5\xbc\x09\xc7\x13\xc2"),
+ .aad = TEST_DATA_STR (
+ "\xd2\x52\xb1\x64\xae\x55\x9e\xd1\x55\xc8\x41\x7b\x96\x65\x25\x29\xdf\x15"
+ "\x1f\x24\xcc\xf1\xce\x98\xd0\xc7\xdd\xf2\x93\xf4\xf1\x23\x66\x30\xa1\x9b"
+ "\x24\xdc\x23\x97\x8d\x33\x77\xa0\x99\x06\x5d\x0b\xa7\x1d\x4b\xb8\xa7\xdc"
+ "\x0c\xb7\x67\x60\xca\x7c\x4a\x0e\x12\xc8\xcb\x56\xc6\x10\x26\x46\x32\x3c"
+ "\x08\xc4\xf4\xf5\x62\x26\xfd\x5b\x71\xa8\x45\x90\x91\x3a\xd2\x0d\xa2"
+ "\x87"),
+ .tag = TEST_DATA_STR (
+ "\x04\xe7\x87\x96\xdb\xf4\x2e\x9f\xfa\x6b\xb9\xe3\x46\x58\x1f\x13"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc5) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x11\x7a\x0a\xa5\x92\xff\xf1\x7a\xe3\x6c\x94\x91\x7d\xb1\x6c\x65"),
+ .iv = TEST_DATA_STR ("\xc3\x53\x7b\xe6\x02\x9d\x54\xff\xef\xab\x27\x30"),
+ .aad = TEST_DATA_STR (
+ "\x29\xe9\x59\xb9\x68\x17\x54\x7a\xe0\x6b\xf8\x5f\xe1\x64\xe8\x2a\x26\x93"
+ "\xf8\x2a\x7a\xeb\x66\xd5\x35\xf0\xd2\xc3\xbf\xfd\x1b\xa1\x8e\x94\xef\x45"
+ "\x79\x39\xf0\xc0\x73\x3e\xda\x47\x38\xd1\x36\x38\x0f\xc8\x76\x07\x5c\x49"
+ "\x43\x22\x02\x37\xa5\x92\x9b\x01\xb3\x2d\xa2\xbc\x2a\x6a\xfd\x6a\xe1\xd8"
+ "\x9f\xd4\x70\x09\x38\x35\x96\x2f\xf6\x70\x8b\xb3\x9b\xa3\x65\x20\x2f"
+ "\x56"),
+ .tag = TEST_DATA_STR (
+ "\xb8\x7f\xcc\x4d\x5c\x48\x4e\x68\xea\x52\xc0\x1b\x55\xff\xa4\x38"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc6) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x5d\x99\x5a\x33\x8e\xd6\x0f\x8a\xb0\xb5\x9d\xa6\xc9\xa4\x0c\x52"),
+ .iv = TEST_DATA_STR ("\x27\x23\xc5\x4e\x31\xc5\xc5\x7f\x02\x36\xe8\x16"),
+ .aad = TEST_DATA_STR (
+ "\x23\x9c\x80\x68\x3f\xeb\x6a\xfd\x38\xf8\x75\x9a\x27\xcb\x5f\x35\x0f\xbc"
+ "\x2f\x75\x78\x38\xc4\x08\x58\xc9\xd0\x8f\x69\x9c\xc5\x6c\x42\x36\xf4\xa7"
+ "\x7b\xd8\x0d\xf0\xe8\xe4\x1d\x5f\x9b\xa7\x32\xdb\x2e\x0a\x3a\x5e\x95\x2e"
+ "\xde\x7b\xfd\xd5\xfc\xbe\xbd\x23\xd0\x72\x71\x13\x4d\xb5\xb8\x24\x61\x53"
+ "\x7c\x47\xe2\xca\x51\xb3\x48\xb0\x83\x0f\x5e\xe5\x75\xad\x4b\x44\x14"
+ "\xdc"),
+ .tag = TEST_DATA_STR (
+ "\x94\x35\x6a\x3b\xfa\xf0\x7f\x2e\xf0\xeb\xe3\xa5\x07\x07\x6b\x16"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc7) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xc8\xa8\x63\xa1\xeb\xaf\x10\xc0\xfc\x0e\x80\xdf\x12\x44\x4e\x6e"),
+ .iv = TEST_DATA_STR ("\xc3\xe8\xcd\xf0\x86\x82\x7f\xee\x70\x95\xd0\xea"),
+ .aad = TEST_DATA_STR (
+ "\x99\x27\xda\x88\xc5\xd3\x36\x25\x66\x99\xc7\x68\x45\xe9\x46\xdc\x53\xc8"
+ "\x7b\xf0\xe1\x1e\x4b\xec\x94\x50\x98\x16\x02\xb3\x20\x10\xd2\xb5\x2b\xfc"
+ "\x91\x28\x3a\x63\x29\xd4\x55\x59\x89\x98\xed\xe2\xe6\x1e\x35\x2e\x55\x31"
+ "\x10\x15\x4b\x4d\xa5\xce\x66\x8d\x66\x4b\x83\xf6\x71\xc0\x10\xbf\x22\x0b"
+ "\x7d\x32\xb3\x4f\x4c\xa6\x9b\x66\xcc\x87\x23\x3d\x79\x23\x37\xcb\x2b"
+ "\xff"),
+ .tag = TEST_DATA_STR (
+ "\x09\x88\x37\xde\x27\x70\x7e\xa3\x59\x3e\x31\xce\xb8\x27\x67\x32"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc8) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x69\xcc\x28\xb1\x61\xf2\x14\xa5\x80\xe6\xba\x4b\xc2\xe3\xde\x9d"),
+ .iv = TEST_DATA_STR ("\xf2\xa5\x66\xf9\xcf\x83\xfd\x28\x0c\x8f\xe0\x8e"),
+ .aad = TEST_DATA_STR (
+ "\xf8\xc5\x26\x3a\x4e\x06\xb4\x9e\x18\x45\x89\xa1\xe0\x71\x97\x86\x43\xc3"
+ "\x53\xaa\x27\xb4\x81\x7f\xe3\x9e\x45\xab\xc4\x42\xe2\x2a\xb5\xd6\x83\xbc"
+ "\xee\x5d\xbb\xd5\x89\xfa\x58\x3f\x17\x1b\xb5\x95\x36\xad\xdd\x2b\x6c\xef"
+ "\xd4\x98\x23\x41\x30\x05\xef\xb2\xa6\x65\xe2\x6a\x60\x29\xc9\x27\xd3\x89"
+ "\x1c\xb0\xd4\xf2\x3e\x8c\xcc\x60\xcf\xd0\x2c\xe8\x97\x8c\x45\x1d\xdc"
+ "\x11"),
+ .tag = TEST_DATA_STR (
+ "\xc9\xc8\x06\xcb\x8b\x1a\x88\x98\x09\x69\x5c\x2e\xc5\xa7\xa8\x6e"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc9) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xbb\xf3\x59\x20\xfc\xab\x2c\xed\xaa\xfd\xf3\xf0\x03\x21\xf5\x44"),
+ .iv = TEST_DATA_STR ("\x2c\x7e\xe3\xff\x1d\xf8\x4f\x36\x50\xbc\x92\x98"),
+ .aad = TEST_DATA_STR (
+ "\xa7\x5f\x50\xba\x9a\x50\xf4\x87\x99\x59\x4b\x61\x95\xb3\x12\x5e\xd9\x2d"
+ "\xf7\x31\x44\xbf\xcb\x62\x4c\xe6\x73\x23\xd8\x34\xba\x1a\xfa\xf0\xdf\x4c"
+ "\x6c\x02\x2c\x11\xd4\x8b\xd7\x5c\x86\x67\x5a\x59\x27\xac\x12\x50\x03\x0f"
+ "\x72\x0f\x97\x49\x8d\x4f\xe0\x78\x7b\xae\x65\x5d\xc5\x53\x7a\xc1\xbc\xac"
+ "\x19\x8a\x89\x3f\x9a\xf7\xc2\xef\x9b\x97\x1d\xd6\x4f\x7e\x7b\x62\x60"
+ "\x3e"),
+ .tag = TEST_DATA_STR (
+ "\xc7\xcd\x3f\x93\x8f\x4a\xb1\x86\x42\xd8\x62\x34\xed\xfc\x17\xed"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc10) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x96\x90\xde\x66\x97\x02\xba\x72\xae\xb9\x34\xf5\xac\x50\xe0\x3c"),
+ .iv = TEST_DATA_STR ("\xda\x87\x13\xfe\x2b\x20\x58\xc4\x38\xaf\xf2\x60"),
+ .aad = TEST_DATA_STR (
+ "\xf3\x0e\xe9\x50\xda\x37\xc7\x22\x4b\x5c\x93\xe9\xa2\x9c\xaf\xdb\xf8\xe2"
+ "\x07\x0f\x65\xc2\x26\x24\x4b\x1a\x68\x34\x59\xe0\xc5\xc1\x1c\x9b\x77\xc8"
+ "\xfc\x28\x6d\x42\x98\xa5\xb9\xcd\x1f\xee\x3e\x13\xd4\x69\x0a\x88\x78\x0d"
+ "\x35\xb5\x58\xb5\xd9\xe5\x2b\x1a\x67\xfc\x88\x57\x07\x66\x91\xdc\xa7\xf5"
+ "\xfe\x8e\xf2\x20\x65\xcc\x5d\x9c\x00\x3f\xfd\x25\xeb\xe2\x3e\x61\x44"
+ "\x0e"),
+ .tag = TEST_DATA_STR (
+ "\x7f\x92\x91\x45\x18\xdd\xbe\x84\x2b\x06\x77\x1f\x64\xc4\x0f\x59"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc11) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xe5\xd8\xc6\xe2\xac\x69\x35\xc8\x5e\x81\xee\x0e\xf7\x23\xea\xcf"),
+ .iv = TEST_DATA_STR ("\xc7\x31\x40\xee\x90\xcc\x1d\xcf\x88\x45\x7d\xa2"),
+ .aad = TEST_DATA_STR (
+ "\xf6\xc2\x67\xa6\xae\x5c\xe3\xcf\x4b\xcd\xf5\x9c\xfd\x1f\x77\x7c\x66\x13"
+ "\x3e\x0e\xc4\x77\x27\x85\xf3\x3e\x5f\xa8\x00\xd3\x10\xb2\x4b\x57\x73\xbc"
+ "\x60\x3a\x76\xb3\x0f\xc3\x23\x28\xa8\xe4\x0f\x02\xf8\x23\xa8\x13\xa9\xe4"
+ "\xb4\xfa\xc7\x26\xe9\x92\xc1\x83\xbd\x08\x15\x11\x1c\x1d\x3a\x35\x88\x4a"
+ "\x4e\xff\x32\x02\x7b\xa6\x0d\xba\x67\x9b\x46\x9a\xf3\x1b\xc5\x0c\x05"
+ "\x91"),
+ .tag = TEST_DATA_STR (
+ "\xf9\x38\xfd\x0d\x8c\x14\x8d\x81\x76\x51\x09\xdf\x66\xda\xc9\xaa"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc12) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xe2\x34\x58\xf6\xb3\x04\xc2\xd8\xfe\xb3\xde\xdd\x37\x41\xbc\x24"),
+ .iv = TEST_DATA_STR ("\x46\x19\x03\x6b\x50\xba\x01\x2f\xe5\x0b\xe1\xd7"),
+ .aad = TEST_DATA_STR (
+ "\x74\xbf\xdc\x6b\xc4\xbf\xc3\x8d\x66\x6b\x98\x5c\xfe\x04\x3c\x67\x79\x8b"
+ "\x2d\xb9\x8f\x14\x92\x68\xdb\xa2\x44\x36\xca\xb8\x3e\x9a\x91\xf2\x44\xff"
+ "\xc5\x74\x8c\x93\xf8\xdf\x33\x9a\xe2\x4b\xa4\x31\x8c\x50\xda\x01\x1a\xb3"
+ "\x68\xd3\x16\x7c\x16\xe5\x03\x30\x9b\x01\x35\x1a\x11\xf1\x4d\x06\x7c\xc6"
+ "\x76\x9b\x99\x89\xc7\xd9\x52\xe3\x31\x50\x11\xee\x2e\xa0\x34\xdb\x8c"
+ "\xb8"),
+ .tag = TEST_DATA_STR (
+ "\x60\x53\xab\x80\xc7\x46\x82\x1e\xc5\x0c\x97\xe5\xa1\x42\x4a\x85"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc13) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x53\x72\xac\x5d\x3b\x08\xd8\x60\x91\x91\x10\xbd\xeb\x7f\x31\xdf"),
+ .iv = TEST_DATA_STR ("\x06\xca\x97\x9d\x8c\x25\x0d\x9b\x7b\xe4\x55\x73"),
+ .aad = TEST_DATA_STR (
+ "\xe1\xf9\x58\x83\x4e\x63\xc7\x5c\x8c\x75\x8b\xaf\xaa\x2f\x25\x7e\xa5\x68"
+ "\x9d\x0d\x55\xb8\x77\xb4\xd6\x7b\x8b\x73\xc2\x5c\xe2\x4e\x9b\x09\x4b\x97"
+ "\x6d\xb9\x20\xa1\x59\x96\x8d\xa9\xd3\x3c\x51\x1a\xa8\x99\x9a\xba\x42\xb8"
+ "\xbb\x88\x6e\x65\x45\xdd\x10\x86\x93\x15\x0a\xf3\x57\x49\x6b\xb5\x89\x8b"
+ "\x4e\x8f\x72\x5d\x50\xef\x47\x4a\xfb\x83\x6a\x33\x58\xda\x22\x17\xbb"
+ "\x93"),
+ .tag = TEST_DATA_STR (
+ "\x93\x38\xe1\x4f\xe0\xb0\x8a\x96\x9a\x10\x4c\x82\x85\x28\xa6\xa4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc14) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xbf\x1c\xb4\x9e\x98\x0c\xec\x0b\x15\x3f\xe3\x57\x38\x75\xac\x6c"),
+ .iv = TEST_DATA_STR ("\x54\x26\x66\x9d\x25\x52\x40\x36\xfb\xe8\x1e\x89"),
+ .aad = TEST_DATA_STR (
+ "\xb3\x36\x94\x97\x66\xe9\x94\x8a\x7e\x6f\x36\xa2\xd3\x77\xb8\x4a\x25\xc4"
+ "\xb4\x98\x87\x94\xf3\xde\xab\x7a\xf4\xb1\x4a\x12\xda\xc6\x41\xe2\x5f\xe2"
+ "\xae\x9f\xf5\x34\x50\xac\xe1\x51\x3a\xcd\x0b\x28\x4a\x49\x0b\x45\x5f\x04"
+ "\xf4\x0a\xf9\x44\x18\xc8\x79\x2e\xc1\xa0\x98\x3f\xb1\xd9\xa3\x1d\x93\xdc"
+ "\x3e\xd2\xc7\x5e\x6a\x6c\xe0\x92\x11\x1e\xab\xad\x03\x9b\xac\x2a\x49"
+ "\xf6"),
+ .tag = TEST_DATA_STR (
+ "\xe2\x99\x6a\x2b\x3b\x6b\xf5\x22\x17\xcf\xc4\xd0\xf5\xbb\x35\x1b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc0) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xaa\x74\x0a\xbf\xad\xcd\xa7\x79\x22\x0d\x3b\x40\x6c"
+ "\x5d\x7e\xc0\x9a\x77\xfe\x9d\x94\x10\x45\x39"),
+ .iv = TEST_DATA_STR ("\xab\x22\x65\xb4\xc1\x68\x95\x55\x61\xf0\x43\x15"),
+ .tag = TEST_DATA_STR (
+ "\xf1\x49\xe2\xb5\xf0\xad\xaa\x98\x42\xca\x5f\x45\xb7\x68\xa8\xfc"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc1) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x1b\xb1\xd6\xa3\xbf\xc7\x48\x78\x6f\x39\x51\xe4\x3c"
+ "\x18\x05\x4b\xfc\x8c\xe6\xab\x3d\xc3\xd3\x98"),
+ .iv = TEST_DATA_STR ("\xfe\xa5\x6a\x5c\xe5\xf7\xd4\xc8\x16\x80\x19\x5d"),
+ .tag = TEST_DATA_STR (
+ "\x0b\x0b\xc0\x76\x8b\x02\xf1\x26\xa2\x9b\xcb\x14\x4a\xbc\x6e\x4b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc2) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xb6\x28\x0d\x41\xff\x79\xec\x0a\x8d\xb1\x7c\x1e\x22"
+ "\xf4\x3c\x5f\xdc\x92\x89\x88\x46\xe6\x46\xb5"),
+ .iv = TEST_DATA_STR ("\xc4\x78\x88\xc1\x48\xb7\xdc\x05\x5f\x99\xaa\x08"),
+ .tag = TEST_DATA_STR (
+ "\xe3\x05\xfa\x02\x47\x24\x60\x6e\x14\x03\x26\x95\x9b\xfc\x53\x18"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc3) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x02\x87\xa6\xe3\xf9\x00\x56\xcb\x4b\x3d\x3f\x72\x23"
+ "\xe8\xd0\xaf\x58\xc0\x9f\x15\x6c\xb3\xb8\x05"),
+ .iv = TEST_DATA_STR ("\xea\x0c\xd8\x6c\x79\x7c\x34\x2c\xfb\xb5\xa9\xcc"),
+ .tag = TEST_DATA_STR (
+ "\x54\x9e\xc0\xd0\xff\xcf\xc1\x38\x1e\xd7\x09\xc3\xea\x8e\xac\xda"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc4) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x47\x3e\xa0\xd2\x98\xf6\x45\xcb\x01\xfa\x64\x83\x7b"
+ "\x47\x09\xb2\x83\x74\xf3\xe9\x55\xe7\xfb\xa2"),
+ .iv = TEST_DATA_STR ("\x25\x5b\x70\x4f\x33\xad\x24\xbb\x93\xd2\x67\x3a"),
+ .tag = TEST_DATA_STR (
+ "\xec\xbe\xe0\x40\xa6\xba\x2b\xd2\x71\x0f\x0d\xa5\x8b\x10\x96\xc2"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc5) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x6d\xa0\xbd\x47\x43\x24\x4d\xc4\xb3\xbe\x99\x3a\xb3"
+ "\x95\x4c\xcd\xc9\x00\x77\xff\x31\x1e\xa6\x57"),
+ .iv = TEST_DATA_STR ("\xd1\x9f\xaf\x29\xfe\xaf\xd0\x70\x83\x94\x38\x16"),
+ .tag = TEST_DATA_STR (
+ "\xbd\xd8\xdb\xa9\x1c\xf8\xaf\xfc\x30\x9f\x91\xb3\xe2\x39\x18\xaa"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc6) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xc9\xaa\x36\xf1\x7b\x9e\x57\xcf\xac\xb9\x28\xe1\x34"
+ "\x66\x00\xc2\x2d\x36\x7d\x18\x28\x54\xb4\x64"),
+ .iv = TEST_DATA_STR ("\x6d\x12\x0c\x4a\x05\xe2\xb4\x52\xcc\x22\x13\xc4"),
+ .tag = TEST_DATA_STR (
+ "\xd2\x3a\x67\x29\x61\x9f\xa7\xc3\x58\x79\x4b\x63\xbf\x5e\xe8\xd7"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc7) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x5e\xde\xf8\xc4\x17\x1c\x44\x58\x7c\xbb\xf0\xf4\x2d"
+ "\xc7\xdc\x05\xb3\x6c\x84\x91\xd6\x7c\x75\x82"),
+ .iv = TEST_DATA_STR ("\x21\xf2\x65\xfe\x1e\x26\xff\xe4\x91\xa0\xe5\x94"),
+ .tag = TEST_DATA_STR (
+ "\x9e\x73\xbc\x5f\x26\xd5\xf1\xc8\x5f\xd5\xc0\xdf\x48\x63\x61\x04"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc8) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x44\x17\x16\x09\x6e\x80\xa1\xa8\x62\x86\xd2\xe1\xdf"
+ "\x48\x94\xae\xf5\xfa\x91\x4d\x7f\x6b\xde\xda"),
+ .iv = TEST_DATA_STR ("\xb0\x10\x72\xd8\x25\xeb\x24\xba\x3c\x0a\xbc\x95"),
+ .tag = TEST_DATA_STR (
+ "\x28\xaf\x88\xfe\x13\x40\x68\xe9\xc8\x44\x5a\x19\x47\x84\x3e\xd2"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc9) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xd3\x6e\x99\x04\xe0\xab\x25\x51\xc8\xa4\x12\x56\xbc"
+ "\x66\xad\x25\x37\xf4\xd7\x8c\x56\x18\x33\x73"),
+ .iv = TEST_DATA_STR ("\xfc\xc8\x50\xfa\xc3\x38\x79\x4f\x3a\xdf\x50\x50"),
+ .tag = TEST_DATA_STR (
+ "\xa8\x8c\x92\xd6\xec\x0a\xbe\x95\x0c\x14\x7a\xf3\xb6\xbf\xae\xca"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc10) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x50\x04\xb2\x3a\xa5\x4b\x45\x3c\xe7\x98\xd0\xfa\xe7"
+ "\x07\x35\x00\xc4\xfb\xd4\x94\xc3\x50\xc5\xd0"),
+ .iv = TEST_DATA_STR ("\x31\x11\x9a\xc3\x7e\x06\x63\x25\x05\x48\xd8\x9a"),
+ .tag = TEST_DATA_STR (
+ "\x1b\x47\x45\x91\x10\x76\x4a\xae\x49\xf9\x44\xaf\x0c\x74\xd1\xf3"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc11) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xd7\xfe\x59\x41\x1c\x9a\x7e\xf5\x5c\xf5\x6e\xee\xe2"
+ "\xd9\x60\xc5\xcd\x65\x5f\x4f\xab\xce\x69\x69"),
+ .iv = TEST_DATA_STR ("\x56\x00\x39\x99\x1d\x07\x81\x71\xb8\x6e\x2e\x36"),
+ .tag = TEST_DATA_STR (
+ "\x33\x25\xa6\x83\x1b\x9a\x8d\xa5\x26\xad\x3a\x9c\x30\xbf\x89\x64"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc12) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x00\x60\xc6\x4b\x6b\x16\x81\xe4\x60\x30\x0a\x17\x63"
+ "\x39\x88\xee\xfc\x6f\xc2\xcb\xd4\x7a\xe6\xc9"),
+ .iv = TEST_DATA_STR ("\x14\xd0\x54\x6f\xda\x5d\x9c\x36\x46\x18\x9d\xd4"),
+ .tag = TEST_DATA_STR (
+ "\x66\x78\x4d\x25\xfb\x39\xfa\xcb\xdd\x80\xae\xfa\x7d\xa0\xf0\x2f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc13) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x9c\x4f\x21\xe3\x7b\x91\xfe\x41\x9e\x35\xb6\xfc\xdc"
+ "\x4e\x70\xd0\x32\x55\x75\xf9\x11\xc1\x3b\x43"),
+ .iv = TEST_DATA_STR ("\xf5\x7a\xf6\x28\x46\xb2\x71\xe8\x02\xd6\x70\x1f"),
+ .tag = TEST_DATA_STR (
+ "\x6b\x1e\x00\x9e\x6e\xdf\x78\x9f\xc9\x43\x85\x73\x4d\xd5\x7d\x2f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc14) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x8b\xed\xa5\x6e\xe0\xe1\xe6\xd8\x09\x8b\xa2\x67\xf0"
+ "\x9b\x96\xd8\x9a\x3a\x46\x22\xa6\x41\xe7\x9d"),
+ .iv = TEST_DATA_STR ("\x81\x29\x96\x6d\x15\xbd\xb7\x0e\x0d\x2f\xcc\xef"),
+ .tag = TEST_DATA_STR (
+ "\x28\x73\xdf\x0e\x03\x54\x86\x46\x81\x46\x30\xe0\xca\xc1\xe4\xe5"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc0) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x41\xc5\xda\x86\x67\xef\x72\x52\x20\xff\xe3\x9a\xe0"
+ "\xac\x59\x0a\xc9\xfc\xa7\x29\xab\x60\xad\xa0"),
+ .iv = TEST_DATA_STR ("\x05\xad\x13\xa5\xe2\xc2\xab\x66\x7e\x1a\x6f\xbc"),
+ .aad = TEST_DATA_STR (
+ "\x8b\x5c\x12\x4b\xef\x6e\x2f\x0f\xe4\xd8\xc9\x5c\xd5\xfa\x4c\xf1"),
+ .tag = TEST_DATA_STR (
+ "\x20\x4b\xdb\x1b\xd6\x21\x54\xbf\x08\x92\x2a\xaa\x54\xee\xd7\x05"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc1) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xae\xf7\x60\xf0\xcb\x88\x11\xf4\xab\x4a\x05\xfa\xa3"
+ "\x5f\xe8\xb9\x85\x4a\xde\x54\x8e\x04\x0e\x7f"),
+ .iv = TEST_DATA_STR ("\xd5\xda\xed\xc1\xd4\x95\x9a\x5d\x74\x4b\xc5\xf2"),
+ .aad = TEST_DATA_STR (
+ "\xae\xa8\xce\x76\xe3\xcf\x40\xd4\x73\xf6\x1a\x08\xd5\x9e\x53\xf5"),
+ .tag = TEST_DATA_STR (
+ "\x92\x6c\x61\xde\xf0\x2e\xd3\x0e\xd1\x58\xe3\x55\xac\x5d\x57\x10"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc2) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x12\x00\x0b\x16\x46\x22\x25\x44\x6c\xfe\x6e\x80\xd8"
+ "\x4d\x47\x1d\xbc\x9a\xa3\xf4\x78\xd4\x65\x83"),
+ .iv = TEST_DATA_STR ("\x6c\x88\x3c\xb8\xf9\xff\x7e\x57\x90\x34\x52\x4e"),
+ .aad = TEST_DATA_STR (
+ "\x23\x27\x13\xb8\xde\x07\x44\xb0\x82\x51\x54\x9a\xaa\x19\x15\x4f"),
+ .tag = TEST_DATA_STR (
+ "\x20\x94\xf4\x98\x9f\x85\x0a\xf3\xbb\xfc\x48\xb1\x89\x5e\xc1\xde"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc3) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xb3\xe8\xa0\xd0\x5d\xbc\xd4\x53\xbd\xc8\xec\x09\x75"
+ "\xf2\xbb\x06\x3a\x21\xd0\x39\x1d\xc9\x46\x45"),
+ .iv = TEST_DATA_STR ("\x3f\xaf\x8b\xf9\x1d\x4d\x95\xa7\xf9\x62\x8a\x65"),
+ .aad = TEST_DATA_STR (
+ "\x6e\x69\x26\x61\x76\x14\xbe\xf6\x15\x3a\x4c\xe6\x29\xa9\x1b\x69"),
+ .tag = TEST_DATA_STR (
+ "\xac\xbb\x55\xb7\x10\x2e\x86\x17\x75\x42\xbc\x5a\x7f\xc7\x17\xa1"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc4) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xdf\x91\x67\x43\xd0\x21\x80\xa2\x88\x80\xbc\x45\xe0"
+ "\xd0\xb7\x37\x47\x74\x69\x26\x01\xeb\x55\x56"),
+ .iv = TEST_DATA_STR ("\x48\x31\x18\x9f\x72\x75\x17\xd7\xf4\x22\xf1\x2f"),
+ .aad = TEST_DATA_STR (
+ "\x4a\x4e\x9b\x8c\xda\x53\x57\xf9\x5c\x37\x26\x48\x64\xe3\x89\xa9"),
+ .tag = TEST_DATA_STR (
+ "\xde\x3b\x10\xaf\x6d\x8c\x1e\x3c\xd5\x80\x20\xce\xac\x9c\x5f\x41"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc5) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xcd\x58\xa2\xca\x49\x5b\x28\x14\x79\x61\x61\x91\x1a"
+ "\xf0\x82\xa8\x52\xc8\xcb\xd3\xfe\x1c\xed\xb4"),
+ .iv = TEST_DATA_STR ("\x78\x66\xd4\x58\x8a\xce\x52\xed\x1d\x07\xd3\x46"),
+ .aad = TEST_DATA_STR (
+ "\xe0\xe6\xf8\x5c\x52\xab\xa6\x87\x3d\x7d\xb5\x0d\x80\x2e\xd6\x16"),
+ .tag = TEST_DATA_STR (
+ "\x0d\x2b\xd7\xc5\x1f\x7f\x88\x16\x20\xbf\x50\x8f\x4b\x66\x2d\xa6"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc6) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xd8\x0f\x1f\x01\xe6\x6c\xa7\x04\x1f\x12\xa9\xde\xc4"
+ "\x6c\xed\xfd\xf7\x5a\xef\x66\x4c\x58\xb2\x33"),
+ .iv = TEST_DATA_STR ("\x24\x08\xb3\x5e\x9b\xa6\x93\xe0\x89\x31\xf7\xf3"),
+ .aad = TEST_DATA_STR (
+ "\x28\x03\x5a\x77\xe8\xb1\xdf\x98\x20\x9b\xd5\x29\xe4\x72\xbe\x1c"),
+ .tag = TEST_DATA_STR (
+ "\xc1\xa2\x9c\xb9\x1f\x13\x12\xb8\xc6\xc8\x6a\xd8\x33\xa9\x73\x74"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc7) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x58\x29\x1d\xe8\x38\xf4\x7f\x6b\x30\xf2\xeb\x4f\x55"
+ "\x6b\xf2\xfd\x81\xb8\x49\xb9\xe8\x76\xf0\x48"),
+ .iv = TEST_DATA_STR ("\xdb\x4f\xa1\xcc\xc0\xed\x55\xdb\xe5\x33\xee\x90"),
+ .aad = TEST_DATA_STR (
+ "\x57\x6b\x6e\xaf\x76\x21\x1b\xe4\xd5\x40\x36\x3b\x23\xac\x29\x9d"),
+ .tag = TEST_DATA_STR (
+ "\x9d\x3f\xc9\x6b\xa6\xcc\x39\xaa\x30\x74\xb0\x1e\xe2\xcb\xa4\xd4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc8) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xab\x3c\x5c\x4a\x37\x2e\xc0\x5f\xeb\x74\x23\xa5\x55"
+ "\xed\x6c\xc6\x6c\x5d\x3b\xd8\x55\x7e\xff\xa7"),
+ .iv = TEST_DATA_STR ("\xfb\x79\x7a\x5f\xa6\x3a\x38\x88\x0e\xd3\x80\xc6"),
+ .aad = TEST_DATA_STR (
+ "\x67\x63\xc5\x0c\x5d\xe0\xdb\x7f\x67\x5f\xe1\x6d\x0a\x5d\x5a\x79"),
+ .tag = TEST_DATA_STR (
+ "\x6a\xe6\xc7\x8d\xe5\xdf\xea\x5c\xb3\xe9\x6e\xe9\x59\x71\x37\x41"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc9) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x86\xcb\xf5\x79\xe9\xb0\x77\x0e\xc2\xa1\x37\x21\x8e"
+ "\x9f\xf7\x1a\xeb\xf0\x51\xf6\x4a\x31\x8c\x74"),
+ .iv = TEST_DATA_STR ("\x4d\xf9\xe3\xdd\x72\x0d\xce\x9e\xcc\xb3\x81\x76"),
+ .aad = TEST_DATA_STR (
+ "\xba\xbf\x21\xb7\x2e\x05\x67\xf2\x2e\x6f\xb1\x72\x11\x5b\x61\x2f"),
+ .tag = TEST_DATA_STR (
+ "\x51\x86\xbd\x05\x73\x93\x81\x1b\xc9\xc2\x8e\x8e\xb7\x71\x4b\x32"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc10) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xfd\x39\xed\xc5\x1d\xf0\x8e\x69\xf3\x97\x68\xdd\xff"
+ "\x3f\xa9\xa7\xf6\x76\x42\xd7\x3b\x2c\x33\xdd"),
+ .iv = TEST_DATA_STR ("\xf0\xf8\x07\x50\x24\xbb\x50\x97\x82\x79\xc5\x37"),
+ .aad = TEST_DATA_STR (
+ "\x7c\x95\xd4\xa5\x59\x15\xcf\x13\x7d\x3f\xa2\xbc\x0b\x9d\x5e\x99"),
+ .tag = TEST_DATA_STR (
+ "\xe9\x61\xe7\x9c\xd3\x49\x46\x1a\x14\x3b\x13\xe6\x2c\xf6\x9d\x3f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc11) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x31\xc4\x45\x71\x32\x99\x96\x28\x7e\x98\xfc\x3d\xc7"
+ "\x18\x15\x68\xcd\x48\xa3\x35\xfd\x37\x97\x2f"),
+ .iv = TEST_DATA_STR ("\xb2\x34\x1f\xaa\x66\x1d\xc0\x49\x25\xf5\xa6\xb5"),
+ .aad = TEST_DATA_STR (
+ "\x2a\x0e\x83\xf4\xff\x96\x7e\xdd\xdc\x09\xdd\xc4\xc1\x69\xd5\x5d"),
+ .tag = TEST_DATA_STR (
+ "\x9b\xd9\x1d\x5d\xf6\x8a\xfc\x6d\x45\xbe\xbd\xe9\x0f\xcd\xb1\xee"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc12) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x23\xf9\x54\x34\x4d\x93\xa2\x7d\x5a\xbb\xb1\x92\x0a"
+ "\xd8\xe8\x9a\x05\x10\x67\x21\x71\x31\x64\x48"),
+ .iv = TEST_DATA_STR ("\x56\x0f\x42\x9c\x3d\xf4\x31\x41\x3e\x08\x6a\x75"),
+ .aad = TEST_DATA_STR (
+ "\x2d\x78\x32\xa2\xc4\x6b\x63\x44\xfe\x35\xf1\x48\xb5\xbf\x64\x1d"),
+ .tag = TEST_DATA_STR (
+ "\x6d\xd3\x5d\x46\x8e\xfd\xc9\xc9\x73\x97\x82\x33\x20\xc9\xb0\x69"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc13) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x11\xfa\xfc\xf1\x40\x9c\x57\x52\x05\x3d\x5c\xd1\x07"
+ "\x70\xce\xf2\x70\x77\xdf\x64\x55\xfb\x27\x3b"),
+ .iv = TEST_DATA_STR ("\xa6\x90\xc8\x70\x54\x65\x8e\xdc\x49\x94\x14\xd8"),
+ .aad = TEST_DATA_STR (
+ "\xd1\x51\x2c\x14\x46\x12\xb4\x5a\x77\x42\x65\x88\xc1\xc0\x25\x4d"),
+ .tag = TEST_DATA_STR (
+ "\xb1\x40\x66\x1e\xa4\xa7\x93\xbc\x67\xda\xa0\xfa\x00\x9a\x18\x5b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc14) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xd7\x1e\x1c\x94\xb2\x11\x84\x08\x8e\x6a\x63\xf3\xca"
+ "\xba\x9a\x9c\xcf\x4a\x15\xf0\xbc\x53\xfb\x02"),
+ .iv = TEST_DATA_STR ("\x82\x7c\xd7\x65\xa6\xdc\x8e\x4d\xe2\xe7\x66\x49"),
+ .aad = TEST_DATA_STR (
+ "\x4f\xc6\x66\xa1\xcf\x04\xcf\xdb\x0f\x5f\x68\x1b\x6f\x19\x86\xbb"),
+ .tag = TEST_DATA_STR (
+ "\x9c\xf4\x07\xee\x84\x47\x6d\x54\x8e\x05\x93\x9c\x3b\xeb\x9f\x53"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc0) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x09\x3e\xf7\x55\x1e\xbb\xff\x8e\xb0\xc0\xa8\xa4\xa6"
+ "\x2b\x19\x8f\x0c\x2e\x83\x8d\xe1\x0e\xee\xee"),
+ .iv = TEST_DATA_STR ("\xe6\x56\xe9\x39\x30\xed\x52\x10\xba\x3f\x03\x22"),
+ .aad = TEST_DATA_STR ("\x3d\xa2\x2d\xac\xfd\x11\xb2\x1b\x0a\x71\x31\x57\xf6"
+ "\x0a\xec\x0c\xd2\x2f\x1a\xdd"),
+ .tag = TEST_DATA_STR (
+ "\x1b\x2d\x27\x64\x57\x3e\x20\xae\x64\x0b\xf2\x9d\x48\xe5\xfe\x05"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc1) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x08\x63\xe6\xe0\xe9\x76\x3a\x30\x21\xed\x49\x45\x81"
+ "\x2b\xec\xf2\x7b\x82\x20\xd4\xb3\x29\x73\x57"),
+ .iv = TEST_DATA_STR ("\xad\xb4\xe4\xe6\x29\xcf\x4a\x86\x54\x0e\xfe\x1e"),
+ .aad = TEST_DATA_STR ("\x58\x14\x24\xd6\x33\xf3\xf9\x69\xd1\xb4\xf8\x35\x19"
+ "\x7a\x74\x0a\x69\x5b\x2c\x3b"),
+ .tag = TEST_DATA_STR (
+ "\x79\x47\x52\x19\xe6\x34\x9b\x68\xac\x71\x27\xfb\x55\x11\xe9\x20"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc2) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xb4\x33\x29\x9e\x54\xca\xcc\x5e\x6f\x7b\x34\xc0\xa1"
+ "\xe0\x55\x52\x24\xa6\xa7\x66\xf8\xae\x21\x01"),
+ .iv = TEST_DATA_STR ("\x8f\x36\x11\xd4\xf6\x97\xae\x52\x48\x25\xd2\x39"),
+ .aad = TEST_DATA_STR ("\xee\xbc\x05\x29\x17\xef\xb0\x31\x65\x0f\x38\xce\x70"
+ "\x4e\x92\xd4\xbf\x59\xf9\x41"),
+ .tag = TEST_DATA_STR (
+ "\x91\xd1\x76\x2a\x7f\x19\xfe\x1e\x75\xdc\xa5\x1e\xc8\x5c\xe3\x19"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc3) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xf3\x73\xef\xaf\xb2\x39\x44\x55\xe0\x64\xb4\x2d\x23"
+ "\x4d\x21\xbd\xf4\x52\xdf\x03\x64\x52\xcf\x2c"),
+ .iv = TEST_DATA_STR ("\xd6\x63\x86\x02\x84\xd5\xb8\x33\x32\xa3\xa0\x25"),
+ .aad = TEST_DATA_STR ("\xee\x1e\x7f\x47\x29\x85\xa6\x39\x74\x39\xb2\x8a\x52"
+ "\x6b\x6c\xed\xf5\x95\x3a\xb1"),
+ .tag = TEST_DATA_STR (
+ "\xd8\x19\x77\x43\x49\xbf\x96\x3a\x9d\xf7\xed\x46\x26\x1f\xc5\xca"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc4) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xd5\x04\x53\xfd\x22\x3b\x11\x0d\x2d\xf3\x12\xc4\x81"
+ "\x7d\x97\x6a\x59\x19\x4a\xda\x77\x7f\x97\x41"),
+ .iv = TEST_DATA_STR ("\x6a\x88\xcd\x46\x85\x86\xcb\x22\x7b\x92\x85\x40"),
+ .aad = TEST_DATA_STR ("\x3d\x7e\x66\x93\xe1\x63\x9d\xed\x24\x2e\x3c\x0b\x93"
+ "\x1b\x32\xe7\x2a\xdc\x70\x5f"),
+ .tag = TEST_DATA_STR (
+ "\xb3\x88\xde\x6f\x83\x48\xee\xea\xda\x78\xf6\xe9\xe0\x04\xdb\xe5"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc5) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xe6\xe6\x86\xc7\xbe\xf0\x1b\x17\x04\xab\x1b\xf1\x14"
+ "\x99\x0c\xea\xad\x41\xbe\x84\x80\x17\x74\x60"),
+ .iv = TEST_DATA_STR ("\xbe\x55\x59\xdc\x38\xe9\x79\x4c\x24\xac\x7a\x83"),
+ .aad = TEST_DATA_STR ("\xd8\x21\x8c\xc4\x56\xa5\x4f\x79\x1a\x3f\xd8\x78\x90"
+ "\x89\x09\x46\x35\x3b\x9b\xfb"),
+ .tag = TEST_DATA_STR (
+ "\xe3\x94\xe6\xff\x9e\x9a\x75\x40\xb7\x42\xff\xf8\xdc\x92\x3a\x59"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc6) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x85\xba\x70\xce\x5a\xa7\xcb\x81\x95\x70\x74\xfa\xb4"
+ "\xdf\x72\xc4\x6c\x9e\xc4\x46\x6b\xa0\xb4\x8a"),
+ .iv = TEST_DATA_STR ("\xb9\x1d\x11\xf7\x2f\x6c\x5f\xab\x56\x53\x5e\x5c"),
+ .aad = TEST_DATA_STR ("\x4e\x8a\xfc\x98\xf8\x27\x20\xcd\x8e\x35\xea\x8f\x8a"
+ "\xff\x20\xd2\x39\xde\x14\x07"),
+ .tag = TEST_DATA_STR (
+ "\x41\x3f\xc7\x28\x3d\x57\x8b\xaa\x1e\x62\x13\xf5\x41\x59\x04\x62"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc7) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x7e\xd5\xb2\xa4\xbf\x74\x27\xdb\xb2\xf6\x4e\xfb\xcd"
+ "\x06\x76\xab\x91\x2f\x12\xaf\xae\x85\xf8\xf7"),
+ .iv = TEST_DATA_STR ("\x1a\x06\xb0\xa2\x07\xb8\x9c\x19\x3c\xfb\xdb\x20"),
+ .aad = TEST_DATA_STR ("\x93\xbf\x5a\xbe\x39\x7e\xe6\xa9\x79\xc3\x88\x7c\xb5"
+ "\x7a\xf0\x1d\xf8\x3d\xf2\x91"),
+ .tag = TEST_DATA_STR (
+ "\x10\xca\x8e\xe1\x68\x70\xb9\x51\xc9\x18\x0e\xa1\x85\x36\x50\xbb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc8) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x28\x1c\x87\xcc\x27\x80\x53\x75\x78\x6e\x11\x78\xd1"
+ "\xb0\xe2\x2a\x7a\x85\x46\xcf\x6f\x2f\xe1\x2b"),
+ .iv = TEST_DATA_STR ("\xb9\xc5\x70\x39\x2f\x02\x53\x89\x05\x5c\x9c\x35"),
+ .aad = TEST_DATA_STR ("\x54\xc8\x26\xa0\xca\x02\x76\x33\x51\x59\xa7\x54\x2e"
+ "\x22\x8c\x3d\xae\xbd\x38\x9a"),
+ .tag = TEST_DATA_STR (
+ "\x56\x0a\x32\x1c\xff\x6a\x8c\x1e\xac\x06\x01\x49\xc5\x95\x5f\xf8"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc9) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x90\xa1\x96\xe8\xc5\xd5\x77\xa6\xc5\x14\x38\x1b\xad"
+ "\xdb\xba\x7e\xd8\xe4\xd1\xe0\xa7\x96\x1f\x32"),
+ .iv = TEST_DATA_STR ("\x1c\x2c\x7c\x8b\xd0\x15\x33\x68\xb2\xa8\xc3\x49"),
+ .aad = TEST_DATA_STR ("\x0f\x40\x9b\xa3\x68\xc2\xef\x04\x33\xb9\xbd\x96\xff"
+ "\x73\x51\x1f\xce\x63\x93\x18"),
+ .tag = TEST_DATA_STR (
+ "\x7a\x7a\xc7\x70\xa5\xa4\xc6\x6b\x78\x7a\xa3\xa1\x26\x7b\xa3\x45"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc10) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x36\x37\x62\x07\xa2\xc1\xf2\x5e\xc2\x0f\x62\x1b\x06"
+ "\xdb\x26\x0c\x20\xbe\x7b\x39\xe7\x0b\x89\x3c"),
+ .iv = TEST_DATA_STR ("\xc9\xe9\x2a\x66\x62\xa7\xd6\xbb\x84\x94\x5c\x95"),
+ .aad = TEST_DATA_STR ("\xb2\xcd\xe6\x02\x9e\x0f\x93\x30\x92\xe9\x74\x74\xdc"
+ "\x8b\x0b\x17\x4d\xe5\x53\x52"),
+ .tag = TEST_DATA_STR (
+ "\x80\x3b\x69\x53\x80\x1b\xf9\x79\x09\x64\x37\xe0\x2f\x3c\xb1\x31"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc11) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x68\xe6\x21\xb4\x76\xdd\x7d\xda\xda\x51\x56\xc0\xc5"
+ "\x65\xc4\xc0\x05\xbc\xf9\x93\x08\x2a\x6c\x68"),
+ .iv = TEST_DATA_STR ("\xac\x0b\xd5\x9b\xf7\xb9\x34\x5d\x01\xec\x7d\x99"),
+ .aad = TEST_DATA_STR ("\xe3\x18\xce\x39\xbe\xae\x93\x72\xde\xe2\xba\xc3\x56"
+ "\x8c\xa3\x7e\xf8\x71\x4b\x1f"),
+ .tag = TEST_DATA_STR (
+ "\xf8\x42\x6c\x2c\x39\x0a\x5b\xd5\xde\x2f\x4f\x31\xb8\x9a\x8f\xf8"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc12) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xe5\x76\x06\xe3\x9d\xc3\x6d\xca\xcc\xfb\x3a\x13\x23"
+ "\xb5\xc1\x80\x3c\xea\x6d\x76\xcd\x96\x44\x18"),
+ .iv = TEST_DATA_STR ("\xf6\x35\xc6\xe1\x2b\xf9\x39\x46\x5c\xd7\x10\x45"),
+ .aad = TEST_DATA_STR ("\xdf\x1b\xef\x3c\xeb\x77\xb6\x7f\xf6\xdc\x7a\x16\x63"
+ "\xc5\x72\xfb\x00\x22\x05\x59"),
+ .tag = TEST_DATA_STR (
+ "\x59\xd5\xf4\x63\x40\x2a\x08\x95\x4f\xa9\xd1\x65\x44\x9d\x95\x1c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc13) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x09\xab\x6b\x91\x2e\x3b\x17\x4d\xea\x38\xa7\x27\x0b"
+ "\x36\xc3\x88\xe1\x08\xbc\x76\x0b\xa4\x81\xbf"),
+ .iv = TEST_DATA_STR ("\x13\x2e\x01\x26\x95\xf1\xe9\xb7\x99\x50\x5c\xef"),
+ .aad = TEST_DATA_STR ("\xd7\x91\xd5\x0c\xd3\x13\xdb\x40\x60\x75\xc9\x7b\x12"
+ "\x8b\x07\x8d\xa5\xb6\x8c\xa1"),
+ .tag = TEST_DATA_STR (
+ "\x72\x15\x2f\x6d\x3a\x95\x0d\x32\x3f\xd6\x19\xbe\x3d\x5b\x0c\x6f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc14) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xc1\x3e\xf7\x2b\x80\xac\xfd\x6a\xdc\x0b\xb8\xf0\x37"
+ "\x72\x23\xcb\xa7\x33\x93\x9b\x50\x58\xf3\x36"),
+ .iv = TEST_DATA_STR ("\xc4\x57\x6f\x76\xb3\x8c\x9c\x91\xbb\x08\xb8\x3f"),
+ .aad = TEST_DATA_STR ("\x3f\x89\x65\x17\x15\x6c\xde\x96\xb2\x39\x09\xf2\x98"
+ "\xa7\x6c\xde\x59\x04\x7a\xe0"),
+ .tag = TEST_DATA_STR (
+ "\xba\x43\xda\x6f\x40\xaa\x9c\x3a\x66\xdc\x37\x2e\x3b\x3f\x94\x0c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc0) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xa8\x3a\xc8\x25\x54\xd5\xc3\x47\x5d\x03\x51\x4a\x94"
+ "\x21\xc4\x09\xcc\xad\x9b\xc4\x4a\x5f\x83\x07"),
+ .iv = TEST_DATA_STR ("\x38\xaa\x26\xf7\xb6\x8d\xc6\x74\xca\xe9\x0b\x84"),
+ .aad = TEST_DATA_STR (
+ "\x03\x97\xa7\x14\x93\x9f\x55\xc1\x8d\xa0\x13\x27\x13\x48\xbd\x23\x1e\x14"
+ "\xd0\x7f\x39\x7c\xa0\xdb\x20\xd3\xa7\x7c\x42\xf3\xf4\x1c\x25\xc6\x4f\xd9"
+ "\x3b\xd3\xbd\x9f\xcd\x35\x5a\x0b\xde\x4f\x19\x61"),
+ .tag = TEST_DATA_STR (
+ "\xe4\x8b\x36\xdc\x68\x3f\x32\xdb\xae\x3b\x13\xc3\xad\xb1\xb7\x89"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc1) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x5f\x1a\x7f\x44\x39\xf8\xc7\xbc\x30\x1e\xa5\xb8\x95"
+ "\x5f\x3c\x67\x98\x26\x3b\xe4\x7d\xac\xe3\x9c"),
+ .iv = TEST_DATA_STR ("\x2d\xcc\x19\xde\x07\x65\x5f\x72\x43\xfa\xb0\x45"),
+ .aad = TEST_DATA_STR (
+ "\x7e\x76\x78\x36\xe5\xd1\x6d\xfc\x44\x26\x23\x7e\xfc\x91\x4f\xc4\x0b\xfe"
+ "\x59\x4b\x54\x94\x6e\xd5\xf2\x00\x20\x3c\x93\xce\x58\x5c\x4c\xb4\xa2\x4a"
+ "\x33\x64\xcc\xb9\x80\x3a\x64\xac\x4e\x38\xde\x2d"),
+ .tag = TEST_DATA_STR (
+ "\x9d\x34\xef\xdb\x37\x01\x49\x49\x13\xe3\x86\x36\x81\xa9\xb4\x02"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc2) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x8d\xce\x48\xb9\x16\x91\x63\x83\x5a\x0b\x6a\x4a\x62"
+ "\x7c\x29\x03\x0b\x5b\xef\x3f\xf3\xba\xe1\xca"),
+ .iv = TEST_DATA_STR ("\xa0\x0d\x44\x38\xe9\x6e\x7a\x22\xe5\x72\x65\xce"),
+ .aad = TEST_DATA_STR (
+ "\x7c\xc8\x27\x3e\x62\x59\x55\x83\xd4\x27\xbb\xf4\x59\x2c\xd2\xc2\x52\x5a"
+ "\x28\xbb\x9e\x14\x3a\x9c\x9a\xf0\x63\x41\x10\xf2\xb6\x9c\xcb\x4e\xc0\x0c"
+ "\xc2\xaf\xaa\x86\xc9\x86\xd3\xef\x2c\x44\x76\xa9"),
+ .tag = TEST_DATA_STR (
+ "\xe2\x76\xfe\xda\x74\x32\xfa\xa0\xe4\xab\xd4\x6d\x59\x2b\x8f\xee"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc3) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x0e\xce\x58\x69\x34\x92\x86\xef\x12\x2b\xb4\xf0\x5a"
+ "\x0c\x0d\xf5\xc7\xc6\xdf\x49\x60\x79\x20\xff"),
+ .iv = TEST_DATA_STR ("\x8d\xa2\x36\x71\xb7\x2e\xc6\xaa\xab\x27\x46\x3e"),
+ .aad = TEST_DATA_STR (
+ "\x95\x82\x4e\xa2\xb8\x0c\x4e\x97\x91\x1f\xff\xa3\x9e\x3f\x0c\x21\xfc\x81"
+ "\xed\xd2\x68\x8a\x5a\x1e\x58\x3b\xa3\x62\xb4\x7b\x97\x97\x31\xbb\x25\x6c"
+ "\xff\x1a\x47\x9f\x27\xa3\x12\x40\x89\x1e\x57\xe2"),
+ .tag = TEST_DATA_STR (
+ "\xfb\xd7\x57\xb8\x96\x3b\xbb\x32\x6c\xda\x80\xf3\xd5\x08\xf8\x9b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc4) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x0a\xbd\x4a\xac\x60\x34\x31\x26\x17\x38\x61\x53\x4e"
+ "\x8f\x46\xfc\x46\x0f\x8f\x3e\x21\x69\xf5\xc5"),
+ .iv = TEST_DATA_STR ("\x5e\x46\xfe\x13\xe6\xc2\x44\xe8\x34\x19\x8d\x3d"),
+ .aad = TEST_DATA_STR (
+ "\x62\x48\x64\xae\xa6\x0c\x08\xe9\xa1\x3a\x8a\x9c\x09\x44\x57\xc9\xda\x22"
+ "\x26\x24\x7a\x77\x1a\xae\xd5\x97\xc5\xe2\xcb\xc3\xd6\xe6\x17\x9d\xef\x86"
+ "\xc9\xd0\x4f\x1f\x6e\x8c\xe5\xb9\x9f\x78\x9e\x3b"),
+ .tag = TEST_DATA_STR (
+ "\x67\x74\x56\xc4\x84\xab\x6b\xb1\xc3\x22\xf1\x00\xff\x9f\x8c\x43"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc5) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x82\xea\xe3\xc1\xde\xac\x84\x84\xe9\x9e\x97\xe6\x97"
+ "\xc7\xa4\x2d\xb0\x26\xd7\x11\xc3\xdb\x60\x0e"),
+ .iv = TEST_DATA_STR ("\x8f\xa3\xf1\x6b\xb6\xce\xf8\x75\x2c\x8e\x31\xef"),
+ .aad = TEST_DATA_STR (
+ "\x61\xe8\xf8\x8a\xe8\xc0\x55\xf7\xd9\xe6\x7e\x0f\x1d\x49\x93\xa3\xe5\xf7"
+ "\x3f\x36\x62\xdc\x1c\xa8\x88\x66\x33\xab\x9b\x2a\x8c\x69\x28\xdb\x5b\x7a"
+ "\x30\xfd\xec\xaa\x29\xdb\xbe\x01\xfd\xb1\x20\xbb"),
+ .tag = TEST_DATA_STR (
+ "\x7d\xe2\x16\x8f\x5c\x43\x4c\x06\xb7\xc4\xaf\x15\x37\x27\x45\x22"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc6) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x04\x2d\x69\x65\x4b\x27\xa1\x38\x0a\xde\xcc\x9b\xa7"
+ "\x50\x90\xf1\xca\x42\x2b\x72\x5a\x47\x93\xe0"),
+ .iv = TEST_DATA_STR ("\x87\xdb\x23\x7e\x9b\xf6\xcc\xbd\x08\x69\xf0\xf9"),
+ .aad = TEST_DATA_STR (
+ "\x49\x6e\xff\x4c\x74\xac\x08\xbc\xcd\xec\xec\x7a\x49\x40\xdd\xbe\xb8\x0b"
+ "\xa1\xa5\x58\x24\x7e\xaa\x18\xa4\x66\x72\xd8\x74\xd7\xde\x6d\xd3\xa5\x77"
+ "\x9e\xbc\xd9\x84\xc2\x29\x91\x3d\x10\xf6\xf7\xcc"),
+ .tag = TEST_DATA_STR (
+ "\xba\x06\xea\xab\x5b\x16\x66\x20\xef\xc8\x07\x2f\xa3\xa5\xb4\xb8"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc7) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xc1\x79\x23\x25\x76\xee\xb3\x8c\x98\xf8\x47\x87\x3d"
+ "\x00\x4b\x96\x46\x65\xa3\x87\xa0\xa7\xf0\x14"),
+ .iv = TEST_DATA_STR ("\x85\xd2\x99\x6d\x00\x3e\xf9\xfd\xc4\xa5\x4c\xe9"),
+ .aad = TEST_DATA_STR (
+ "\x92\x68\x24\x53\x5c\x61\x3f\xde\x98\x69\xdf\x1a\xaf\x76\x4a\x54\xc1\x36"
+ "\x16\x67\x7f\x09\x92\x09\x14\x2d\xa4\xb6\x5d\x9a\x86\x64\xd1\x78\x53\xec"
+ "\x10\x2f\xfa\x1b\x16\x88\x80\x6d\xbe\x50\x3a\x33"),
+ .tag = TEST_DATA_STR (
+ "\xdc\x13\x50\x36\xf7\x4e\x62\x34\xc4\xe3\x27\xfb\xb0\xae\xb9\x25"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc8) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xda\x39\xc0\xef\xb1\x00\xfd\x9c\xf2\xd9\x01\x70\x5a"
+ "\xa6\x35\x90\x3c\xe5\x3b\x66\x9e\xbd\xb5\xca"),
+ .iv = TEST_DATA_STR ("\x0e\x95\xbd\xcb\x66\x98\x24\xdb\xd7\xff\xc8\x8f"),
+ .aad = TEST_DATA_STR (
+ "\x46\x42\x87\x5e\x8e\x20\xc1\x65\xb5\xb1\x7f\x12\xfd\xc6\x30\x99\x6b\x58"
+ "\xb8\x57\x1c\x5a\x15\x94\x4c\xe1\x94\x50\x8c\x87\x12\x3a\xd5\x00\x41\xf5"
+ "\x9a\xfe\x02\xea\xc3\xac\x1e\x6b\xa5\xed\x92\x8b"),
+ .tag = TEST_DATA_STR (
+ "\x59\xf9\x96\xe9\xa7\x23\x14\xfc\x76\x75\xe5\xa9\x13\xfe\x8e\x36"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc9) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x5a\x41\x3f\xa7\x5a\x13\xb0\x36\x53\x81\x82\xad\x51"
+ "\x50\x6f\xdd\x77\x33\xf1\xba\x39\x00\x04\x84"),
+ .iv = TEST_DATA_STR ("\xa6\xcd\xa5\xb0\x22\xec\xfc\x5a\x2b\x75\x90\x13"),
+ .aad = TEST_DATA_STR (
+ "\x1e\xed\x51\xef\xc1\xf5\xca\xe5\x76\x90\xe0\x32\x06\xb4\x5a\x7b\x5c\xb4"
+ "\x58\x56\xab\x36\x31\x32\x34\x94\x85\x01\xdd\x02\xea\x4f\x24\xae\x90\xb5"
+ "\xb2\x46\x28\x91\xe4\x93\x3a\x1b\xd0\x38\x74\x63"),
+ .tag = TEST_DATA_STR (
+ "\x57\x29\x61\xb6\xe8\x50\xad\xb4\x60\x16\x64\xe0\xeb\x3e\x07\x36"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc10) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x80\x08\xf9\xd2\x5a\x1a\x70\x5b\x5f\x00\x79\xd3\xe3"
+ "\x9c\x49\x87\x28\x65\x37\x10\x06\x61\xde\x6e"),
+ .iv = TEST_DATA_STR ("\xa9\xdd\x20\xd7\x51\x2c\xe5\xb3\x54\x83\xa0\x82"),
+ .aad = TEST_DATA_STR (
+ "\xfb\xd2\x16\x02\x37\x74\x2f\x4c\xa7\x2f\x0b\x7e\xd6\x16\xa8\x47\xaf\x65"
+ "\xed\xd2\x81\x67\x97\xb1\xc9\xc8\xb0\xb7\x37\x7b\x57\x59\x3c\x56\xc5\x80"
+ "\x63\xc9\x6a\x30\x69\x8c\x51\xbe\xb6\x78\x6e\x74"),
+ .tag = TEST_DATA_STR (
+ "\xb2\xe2\x25\x89\x00\xd7\xfd\x7a\xc4\xe9\x63\x92\x38\xd6\x63\x8a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc11) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xcc\x95\x61\xf6\xd3\x0d\x9a\x2f\x25\x75\x07\x52\xd3"
+ "\x9a\x1f\x0b\xc8\x00\xe0\xe7\x24\x42\x7e\x64"),
+ .iv = TEST_DATA_STR ("\x3c\x56\x51\x80\x3f\xee\x90\x98\xbd\x69\x04\xed"),
+ .aad = TEST_DATA_STR (
+ "\x0e\x28\x55\x40\x35\x82\x98\xa1\x87\xd4\xf6\x82\x3f\xf8\x6c\xea\xb1\x23"
+ "\x4d\xbc\xef\xc0\x9b\x23\x33\xe7\x45\xf2\x3b\xb6\x0e\x63\x65\xcd\x36\x3d"
+ "\x9e\x9b\x3d\xfa\x9f\xb9\x27\x0d\x6a\x9a\x52\xab"),
+ .tag = TEST_DATA_STR (
+ "\x01\x53\xf9\x5c\x4c\x0b\x4b\x47\x98\x9d\xa7\x1e\xe7\x2c\x34\xc6"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc12) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x61\xab\xa2\x81\xeb\x81\x20\x5d\xc6\xd9\xbb\x6b\x17"
+ "\x87\xb7\x42\x51\x63\x87\x24\x1c\x15\x3c\xc2"),
+ .iv = TEST_DATA_STR ("\x72\x4b\x42\x24\x31\x2a\x59\x6f\xf2\x30\x03\x93"),
+ .aad = TEST_DATA_STR (
+ "\xdd\x06\xc9\xe0\x6a\x6f\xd9\xd8\xfe\xa3\x56\x25\x5c\xbf\x90\x93\x86\xf7"
+ "\xac\x5e\x9b\x5e\xaa\x5c\x55\x28\x20\x54\x82\x7f\x74\xe9\xe7\x43\x46\xac"
+ "\xff\x57\x25\x09\x73\x53\xe8\x6b\xff\xeb\x6d\xc6"),
+ .tag = TEST_DATA_STR (
+ "\x26\x44\xe8\xe6\x52\xc2\x58\xab\x02\x8b\x86\xcd\x7e\xf5\x5f\x5c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc13) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x73\xa4\x90\x52\xef\x91\x18\xf0\x54\x88\x81\x0c\x20"
+ "\x80\xd7\x38\x99\x39\x38\x6c\x18\x6d\x92\xb3"),
+ .iv = TEST_DATA_STR ("\x38\xf0\x06\xb2\xe8\x5e\x7a\xa2\xf4\xc8\x81\x89"),
+ .aad = TEST_DATA_STR (
+ "\x21\xfa\x5a\xbb\x18\xb2\xfb\xcc\xe3\xa1\x9b\x2e\xac\x8b\xe7\xa3\x01\x92"
+ "\x3f\xa2\x58\x10\x52\x86\x13\x3e\xd5\xf4\x78\x34\x84\x2a\x63\x84\xc4\xfc"
+ "\x0a\x39\x86\xe1\xa2\x5b\xba\x83\x47\x9f\x68\x16"),
+ .tag = TEST_DATA_STR (
+ "\x9d\xbd\x74\x84\xc9\xaa\xed\x54\xdf\x7e\xd6\x4b\xbe\xd2\x0c\x68"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc14) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x1e\x11\x8d\x10\x94\x26\xb2\xab\x64\x46\xb0\x65\x99"
+ "\xa4\xc9\x71\xf6\x68\x3a\x34\x35\x68\xef\x97"),
+ .iv = TEST_DATA_STR ("\xcc\x87\x23\x42\x15\xc9\x74\xfd\x44\x68\x9e\x25"),
+ .aad = TEST_DATA_STR (
+ "\x48\x67\x4b\xf3\x86\x06\x46\x02\xd0\x0f\xd7\x2a\x17\x39\x20\xaf\x9b\x4c"
+ "\x4f\x9a\xfb\xf1\x9e\xa7\x63\xff\x44\xe4\x7e\xf8\x9a\x10\x65\x80\xc2\x89"
+ "\xc3\x98\xf9\x7f\xaa\x60\xba\xf4\x9d\xc1\xa2\xaf"),
+ .tag = TEST_DATA_STR (
+ "\x97\xf1\x3f\x94\x2a\xf7\xb7\x79\x7e\xa0\x9c\xea\xbd\xc7\xdc\x9c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc0) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xc2\x5d\x34\x7f\xfb\x5b\x7b\xa0\x79\xbe\x22\x79\xa0"
+ "\xa7\xf2\x20\xf1\x9c\x74\xbb\x9c\x5a\x15\xb6"),
+ .iv = TEST_DATA_STR ("\xb3\x5f\x14\x21\x82\xfe\xa6\x5c\x64\x23\x68\xed"),
+ .aad = TEST_DATA_STR (
+ "\x19\x67\xa0\xbd\x80\xcf\x2c\x9c\x58\xe4\x41\xe1\x2c\xba\x78\x8f\x9c\x07"
+ "\x21\x77\xe1\xce\x02\xf3\x0d\x58\xae\x98\x1a\xb3\x7e\xac\x45\x2c\x0d\x9f"
+ "\x1c\x5f\x34\x85\xd7\xb1\x6a\xe0\x93\x66\x82\x1d\x23\xd4\x44\x79\xd5\x2c"
+ "\xcc\x4a\xcd\x8f\xa6\xf5\xb9\x01\x38\x45\xc6\x29\xf6\x9c\x61\x2c\x9c\xbb"
+ "\xcd\xca\x3b\xdf\x43\x85\x5f\xa7\xc7\x1b\xff\x45\x8a\x7d\x4c\x01\x9a"
+ "\xd9"),
+ .tag = TEST_DATA_STR (
+ "\xf5\xa0\xd6\x49\x24\xae\xab\x15\xa6\x36\xc7\xce\x4d\xb5\x22\x43"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc1) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xcb\x00\x0f\xdd\xd6\x7b\xf5\xa2\x4b\x03\xc8\xb0\x89"
+ "\x65\xfc\x56\x89\x62\xd7\xb2\xa0\xb4\xe6\x8e"),
+ .iv = TEST_DATA_STR ("\xac\xad\xc8\xf8\x22\xb8\x37\xb8\xfc\xd5\xac\x53"),
+ .aad = TEST_DATA_STR (
+ "\xce\x0e\x3e\x4e\x6f\xfe\xae\x66\xc5\x35\x66\x7e\x8a\x8c\xf1\x2f\xca\x0e"
+ "\x9d\xae\x69\x87\x83\x5e\x8e\xc6\x2f\xb9\x5b\x38\xf3\x1e\xc5\xe9\x37\xbd"
+ "\xfe\xd5\xb5\x51\x74\x83\x4b\x03\x8b\xa3\x32\x2b\x4a\x25\x65\xac\x41\x3b"
+ "\x6e\x20\x4f\x88\xc3\xa9\x32\x16\xb8\x81\x06\x49\x4e\xaa\x14\xa8\x20\x68"
+ "\xf0\x0a\x3b\xf2\x27\xb6\x27\x07\x53\x83\x68\x2b\xd6\xbe\xd6\x23\x1e"
+ "\xaf"),
+ .tag = TEST_DATA_STR (
+ "\x2c\x1c\xdf\xc8\xaf\xb7\x56\x9b\x87\x7b\xa5\xae\x13\xd6\x23\x5b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc2) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x94\x9f\x77\x6b\x66\xb5\x28\x34\xde\x80\xe2\x4d\xa2"
+ "\xc6\x68\x3c\x00\x94\x74\x3c\x6b\x4b\x57\xd1"),
+ .iv = TEST_DATA_STR ("\x75\x57\xf7\xb9\xa8\x55\x4e\x79\xf8\x69\x52\x9b"),
+ .aad = TEST_DATA_STR (
+ "\xe3\x6d\xb9\xd5\x1d\xed\xe1\x0f\x17\xe4\xba\x3a\xa2\x0e\xee\x49\xc2\x06"
+ "\x24\x4f\x89\xf6\x7f\xfa\x7d\x49\x94\x58\x93\xa0\x5f\xb6\xb5\x94\x8c\x53"
+ "\x61\xdc\x84\xb3\x3a\x4c\x35\x76\x8c\xb6\x54\x74\x08\xba\x61\x7e\xdb\xa4"
+ "\x17\x82\xa6\x5e\x4f\xca\x1a\x02\x79\x68\xf4\x4c\x43\x3f\x84\x53\xdb\xef"
+ "\xb3\x5a\xa4\xc2\x1b\x6c\x52\x0b\x10\x2a\xe4\xfd\xf2\x07\x9f\x81\xdd"
+ "\x0c"),
+ .tag = TEST_DATA_STR (
+ "\x88\xc0\x61\x2c\x1d\xde\xf9\x14\xb3\x43\x95\x05\x3f\x7f\x63\x2e"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc3) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x25\x87\x2c\x71\xd8\x70\x0c\x6e\x0a\x74\xf4\x4e\x95"
+ "\x46\x8b\x12\xf2\xdc\xeb\x94\xc2\x57\x57\x5d"),
+ .iv = TEST_DATA_STR ("\x20\xa8\x7b\xaf\xff\x89\x83\xae\x72\x5a\x6f\xf1"),
+ .aad = TEST_DATA_STR (
+ "\xc0\x9c\x11\x84\xd0\xfb\xe3\xaf\x22\x20\x2a\x59\xdf\xef\xd6\x6f\xcd\xa2"
+ "\x29\x3c\x90\x62\x6f\x14\x93\xd6\xfd\x79\xed\x5b\x5d\x01\xbf\x8a\xc9\x09"
+ "\x5f\x44\xa3\x1f\x9d\xb4\xa2\x6f\x79\x75\x4d\x75\xec\xf4\xfe\x02\x5f\x2c"
+ "\x1a\xdf\x3c\xe5\xf3\xae\x76\x72\x1d\xaf\x3d\xcc\x9d\xd8\x99\xe3\xf9\x6c"
+ "\x82\x73\xb2\x9b\xc1\x8f\xc3\x8a\xae\x1a\xaa\x12\x4d\xb3\x71\xaa\x47"
+ "\xfd"),
+ .tag = TEST_DATA_STR (
+ "\xb1\x66\x3e\xb5\xb6\x98\xae\x8a\x7a\x18\xa6\xee\x74\x81\xb9\x8b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc4) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x18\x8c\xa6\x91\x49\x83\xd3\xc1\xe5\x6c\x05\x9d\x0d"
+ "\x70\x1d\x57\x3a\x61\xdf\x2d\xea\xee\xb1\xa6"),
+ .iv = TEST_DATA_STR ("\x6c\x2f\xed\xb5\xf7\xf9\xf1\x15\x3a\xc3\x6c\xd8"),
+ .aad = TEST_DATA_STR (
+ "\x1d\xb4\xb3\x12\x70\x44\x94\x98\xba\x03\x97\x31\xb7\x33\x07\x68\xd1\x4c"
+ "\x27\xe3\x73\xb7\xde\xbd\xb9\x8f\x2a\x41\xb6\xae\xc3\xb2\x98\xa0\x3e\xa5"
+ "\xde\x8f\xed\x8f\xf2\x17\x96\x75\xea\x08\xe3\xc9\x81\x2c\x3f\x4f\x63\x76"
+ "\x5f\x40\x39\x53\x4c\x5c\xcf\x98\xfd\xc3\xe7\x0c\xb1\x30\x9a\xd4\x16\x1e"
+ "\x37\xe7\x14\xe6\x97\x28\x72\xfa\x65\x83\x72\x83\x25\xac\x52\x0d\x56"
+ "\x69"),
+ .tag = TEST_DATA_STR (
+ "\x29\xc5\x6f\x77\xd8\x26\x0c\xa2\x94\x83\x37\xb2\x1c\x0c\x37\xa2"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc5) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xce\xcc\xe8\xae\x97\x77\x18\x7e\x5a\x87\xec\xb2\xd7"
+ "\x35\xf7\x82\xf9\xf7\xaa\xb8\xb8\x7b\x13\x7d"),
+ .iv = TEST_DATA_STR ("\x7d\x56\x4d\xb2\x02\xd0\xfa\xb3\x8d\xed\x36\xdd"),
+ .aad = TEST_DATA_STR (
+ "\xf6\xfb\xd1\xb5\x75\x5d\x70\x91\x54\x31\x2e\x11\x0f\xd4\x60\x85\xa4\xb6"
+ "\xf6\x17\xc1\x27\xfe\xa7\x76\x36\xbf\xb8\xa5\x8a\x6a\x6d\x90\x30\xb2\xa6"
+ "\xc4\xe7\x0d\x7a\x3a\x89\x4a\x75\x96\x7f\x65\x02\xe0\xc8\x16\xfb\x30\x69"
+ "\xf2\xed\x94\xc8\x88\xd3\x07\x4c\x1c\x63\xc5\x95\x12\xbe\x45\x3e\x57\x5c"
+ "\xec\x11\x5c\x49\xeb\x4d\xba\x44\xd2\xf7\xc7\x8b\x33\x55\xb1\xe6\x77"
+ "\x87"),
+ .tag = TEST_DATA_STR (
+ "\xb2\x7c\x0b\xe6\x89\x85\x66\x26\xe5\x5e\x03\x77\xa0\x83\x34\x13"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc6) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x94\x47\x03\x91\xde\xf6\x95\xfe\x5d\xe5\xb8\x23\x3a"
+ "\x20\xfe\x52\x11\xbb\x1d\xbb\xb2\x73\x31\x3f"),
+ .iv = TEST_DATA_STR ("\x57\xce\x3a\x88\xf6\xd2\x72\x15\xc9\x43\x7c\x30"),
+ .aad = TEST_DATA_STR (
+ "\x51\x05\x96\x5c\xed\xe3\x1c\x1e\x2f\xbb\x1f\x5f\xb6\x41\xaa\x45\x65\xf8"
+ "\x15\xbf\x18\x1a\x42\x9c\xdc\x35\x3b\xcf\x41\x7a\x0e\x57\xb9\x57\x49\xb4"
+ "\x88\x6a\x80\x19\x01\x37\xf7\x7b\x99\xff\xe2\x80\x88\xa8\xa7\xf9\xf1\x2f"
+ "\xf4\xc6\x16\x53\xdf\x30\x57\x2b\xde\xed\x92\xf2\xfa\xc5\xc4\x93\xce\x6f"
+ "\xad\x20\xc0\xee\xd6\x6f\x95\x02\x6c\x76\x33\x48\x89\x20\xb9\x02\x32"
+ "\xa0"),
+ .tag = TEST_DATA_STR (
+ "\x50\x31\x79\x2c\xa7\x0d\xc4\x9e\xeb\xd8\xea\xd3\x76\xe6\xe3\x33"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc7) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x22\x0a\x24\x93\x89\x79\x3c\x97\xfb\xe2\x8b\xa6\xaf"
+ "\xeb\xf1\x2a\xc0\xde\x55\xed\x71\xaf\xfa\x68"),
+ .iv = TEST_DATA_STR ("\xb5\xa5\x71\x95\x1a\x37\x30\x30\xfc\xf0\xeb\x4d"),
+ .aad = TEST_DATA_STR (
+ "\xa2\x75\x20\x58\xa8\x46\x9b\x60\xd6\x99\x7a\x31\x5e\x5c\x88\x25\xec\xb2"
+ "\xf6\xfd\x1f\x60\x8d\x1a\xe5\xb5\xa4\xf5\xb4\xb9\x28\x62\xb8\x4d\x6b\x3e"
+ "\x74\x4e\x92\x3b\x02\x44\xb7\xb0\xfd\x6d\x6f\x36\xa8\xc1\x73\xd4\x6a\xd2"
+ "\x01\xdd\x8d\x8a\x55\xc0\x8d\x95\x49\x30\x26\x69\xb9\xd3\x3f\x46\x61\x80"
+ "\xf0\x58\x1e\xb3\x00\xbb\x8a\xb8\xb0\x61\x11\x32\x34\xd9\x68\xce\xcc"
+ "\xce"),
+ .tag = TEST_DATA_STR (
+ "\xb2\xcf\x3f\xa8\xca\x8d\x3e\xea\xaa\x3f\x82\x41\x10\x64\xc9\x87"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc8) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xe5\x4c\x36\xdb\xb4\x67\xfe\xb4\x30\xf4\x08\x7f\xe4"
+ "\xcf\x12\xba\xfc\x94\xa1\x78\x00\x68\x38\xe8"),
+ .iv = TEST_DATA_STR ("\x31\x9e\x97\x14\xb4\x92\x5c\xb1\x61\xbc\xfd\x91"),
+ .aad = TEST_DATA_STR (
+ "\xfc\x50\x39\x7c\xc9\x92\xfd\xe3\xd4\x44\xd2\xfd\xf3\x87\x77\xf2\x9a\xb6"
+ "\x04\x99\x63\xea\x08\xc4\xe2\xf0\x0c\x15\x98\xb8\xc0\xbe\xa7\xe9\x4f\x59"
+ "\x1b\xb8\x3e\xb5\x35\x1f\xfa\x4b\xff\xef\x3e\x3e\xc3\x57\xfe\x47\xb1\x7d"
+ "\xb7\xee\xc0\x4a\xd4\x66\x9b\x92\x13\x02\xe5\xc4\x1a\xc6\x9f\xe4\x45\x83"
+ "\x8f\xcf\xd5\xb8\xd5\x1e\x89\xb3\xef\xdf\x2e\x7a\xf4\xf0\x57\x6d\xfc"
+ "\x69"),
+ .tag = TEST_DATA_STR (
+ "\x45\x35\x3a\x04\x31\x39\x23\x75\x54\xb5\x11\x7d\x0b\x8d\x52\xa7"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc9) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xaa\xd8\x0b\x3b\xb6\xe2\x2d\x9d\x18\xf1\x07\x8f\x54"
+ "\x29\x73\xaa\x8f\xff\x28\xab\xfa\x2e\xd6\x37"),
+ .iv = TEST_DATA_STR ("\x6b\x53\x35\x92\x9a\x6f\xc7\xd3\x4c\x3e\x72\x8f"),
+ .aad = TEST_DATA_STR (
+ "\x31\x4a\x33\x07\xa6\x41\x8a\xd2\x29\xaf\x5b\x03\x25\xd2\xbd\x41\x98\xfe"
+ "\x82\xd8\xc5\xa8\x96\x02\xe9\x26\x84\x8c\x09\x6f\xd0\x1e\xa3\x94\x84\xdf"
+ "\x6e\x4a\xae\xd1\x8f\x2e\x2b\x07\x0c\xa3\x6e\xe5\xed\x66\xcd\xa3\xc0\x4a"
+ "\xb6\xeb\x41\xb3\x27\x52\x49\x4b\xa3\x56\xef\x13\x27\xd8\xfd\x6a\x83\x52"
+ "\xa6\x21\xe1\xbb\x0b\x20\x66\x3f\xc7\x04\x89\x9a\x85\x5d\x32\x77\x77"
+ "\x0c"),
+ .tag = TEST_DATA_STR (
+ "\x8f\xda\x0e\x49\x52\xbe\xef\x47\xbe\xa6\xf4\x8d\x9b\xdb\x3e\x79"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc10) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x69\xd3\xd0\x0e\x9a\xef\xe5\xb3\xa9\xaf\x64\x83\x8b"
+ "\x40\x45\x79\xd4\x59\x2f\x9c\xfe\xe8\x64\x57"),
+ .iv = TEST_DATA_STR ("\x05\x4c\xc3\x74\x8c\xd8\x44\x24\x10\x50\x3a\xd5"),
+ .aad = TEST_DATA_STR (
+ "\x10\x31\xcc\x7d\x96\x77\xc5\xf9\x57\x45\xc3\xdc\xc2\x6d\x62\x52\x76\x32"
+ "\x35\x56\x7d\x56\xc6\x13\x86\x7b\xce\x17\xec\x09\x9d\xef\x27\x8a\x64\x37"
+ "\xd1\xb7\x02\x64\x2b\xea\x5c\xfd\xed\x9a\xf6\xd0\xc5\xe0\x20\xf7\x04\x92"
+ "\xad\x7f\x04\xa1\xb4\xba\xd3\x95\x3b\x96\x13\x57\x4c\x2a\x18\xce\x5f\x14"
+ "\xd4\x36\x68\x79\xd1\x1e\x0b\x0a\x58\xfe\x09\x2f\x3c\xf0\xe0\x1a\xc0"
+ "\x3d"),
+ .tag = TEST_DATA_STR (
+ "\x1b\xff\xd2\x07\x47\xb2\x5e\x87\x25\x18\x44\x68\x28\x81\xf5\x3e"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc11) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x03\x5e\x18\x64\xfc\xaa\x90\x78\xd1\xc8\x30\x99\xb2"
+ "\x3f\xd7\x17\x8c\x94\x6a\x58\x69\xc3\x15\x77"),
+ .iv = TEST_DATA_STR ("\xe2\xc4\x07\xa6\xaa\xd6\xd9\x04\x0e\x5b\x67\x49"),
+ .aad = TEST_DATA_STR (
+ "\x2f\xc4\x1f\x0f\xd5\xe3\xec\xef\x75\xa1\xf1\xa0\xf0\x33\x51\x5e\x6f\x96"
+ "\x19\xb8\x7a\x8c\xa1\x68\x7b\xb2\xd6\x37\x52\xcc\x3d\x47\x36\x77\xdb\x30"
+ "\x0e\x76\x97\x8c\xd3\x42\xc5\x1f\x57\x6b\x15\x98\x56\x75\x02\xaf\x0e\xd1"
+ "\xca\x85\xc5\xde\x2d\x84\xc2\xa3\x21\x19\x61\x53\x8d\xf5\x15\x25\x0a\x69"
+ "\xe8\xd6\x7e\xa2\xe8\x77\xd8\xf5\x2e\x69\x7f\xc9\x0b\xad\x33\x0b\x97"
+ "\xe4"),
+ .tag = TEST_DATA_STR (
+ "\x3c\x90\x68\x75\x7b\xda\x60\x22\xea\xb5\xb1\x98\x75\x0b\xad\xc4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc12) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x47\xcf\x37\x7a\x1d\xb1\x10\x6f\x8d\xd4\x54\x88\x4f"
+ "\x71\xef\x93\xf4\xa6\x45\xe0\xe3\xc9\xd4\x30"),
+ .iv = TEST_DATA_STR ("\xf4\x39\x46\xec\x30\x3f\x1e\xfc\x19\xdc\x21\xc2"),
+ .aad = TEST_DATA_STR (
+ "\x2e\xab\xfa\xdb\x99\x7d\x15\x4b\xea\x95\xd4\x5f\x7c\x4d\x5c\x5f\x18\x2b"
+ "\x1e\xd9\x89\x7a\xb0\x12\x41\xf6\x15\xf0\x4b\x8a\x16\xf7\xa9\x65\x2b\x34"
+ "\xa0\xee\x70\x52\xff\x5a\x20\x9a\xd4\xd2\x4a\x2b\xfc\x5e\x5e\xbc\x42\x4f"
+ "\x6d\xbb\xf0\x33\xf0\x59\x51\x24\x7a\xb3\x73\xcb\x9c\xce\x73\x5d\x7f\xb1"
+ "\x80\xa4\xf6\x2a\xd5\xa4\x12\x1e\xb7\xaa\x47\x26\x9f\x95\x41\xbd\xd9"
+ "\x5a"),
+ .tag = TEST_DATA_STR (
+ "\xfe\xed\xe5\x21\x2f\x35\xea\xa8\xfa\xa9\xe2\xe6\xbb\x7b\x1e\x18"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc13) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x64\x98\xf9\x61\x00\xe7\xb0\xb6\xed\xd7\x2b\x61\xf8"
+ "\x64\xd3\x38\x23\xbc\xbd\x0b\x58\x51\xc5\x2a"),
+ .iv = TEST_DATA_STR ("\x81\xf0\x05\xdf\x39\x2a\xc0\x25\x0a\xe0\x7a\x69"),
+ .aad = TEST_DATA_STR (
+ "\xd1\x83\x82\x41\x68\x23\x15\xdc\x27\x3a\xe8\xc2\xd5\x9d\x71\x27\x17\x48"
+ "\xbf\x1e\xf0\x38\x5d\xe4\x05\xfc\x5c\x2f\xe5\xca\xcf\x57\xc8\xd5\x1d\x72"
+ "\xdf\x09\x6d\x2c\x3e\x46\x63\xf1\xc5\x9b\xd4\xda\x3c\xfe\xe9\x4e\x53\xab"
+ "\xa8\x7e\x49\x3a\xad\x38\x6b\xb3\x28\x3d\xd3\x37\xa0\xba\x57\xb8\x4f\x2d"
+ "\x35\xa8\xb6\xbf\xb2\x07\x7d\x22\xb8\x23\x98\xff\x6c\x34\x31\xec\xc4"
+ "\xf6"),
+ .tag = TEST_DATA_STR (
+ "\xe1\x49\xfb\xaa\x73\xf0\x50\x9d\x34\xbd\xdf\x03\x1c\x4c\xc4\x76"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc14) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xba\xff\x99\xa6\xdd\x4d\x29\x81\x04\x3a\x48\xb5\x2f"
+ "\x36\xba\x5d\xbb\x73\x80\xca\xa7\x5b\xc6\x5d"),
+ .iv = TEST_DATA_STR ("\x98\x38\xd9\xf9\xb8\x63\x2c\xbd\x48\xa2\xba\x35"),
+ .aad = TEST_DATA_STR (
+ "\xe7\x81\xf8\xf1\xf5\xbf\xad\x3a\x50\xc4\x7e\x36\x33\x5e\x7a\x22\x5d\xbf"
+ "\x32\xbc\x15\x96\x7d\x66\xdd\x30\x06\xdd\x42\x4b\xa9\x71\xd8\xf1\xa9\xca"
+ "\x90\x61\x94\x50\xbd\xa4\x56\x29\x39\x01\x5f\x75\xb4\x67\xd6\x33\xbb\x57"
+ "\x43\xbb\xf3\x7c\x9a\x2b\x24\x15\xd7\x30\x65\xfa\xd7\x1d\xa3\x31\x2d\x81"
+ "\x7b\xa2\xe6\x24\xc6\x88\x63\xf7\x22\x78\x05\x2a\x4d\xb0\xe7\x3d\xbf"
+ "\x10"),
+ .tag = TEST_DATA_STR (
+ "\xf8\xed\xe3\x60\x48\x26\x1d\x8a\x3b\xf7\x8b\x19\x33\xf3\x3b\x22"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc0) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xb5\x2c\x50\x5a\x37\xd7\x8e\xda\x5d\xd3\x4f\x20\xc2\x25\x40\xea\x1b\x58"
+ "\x96\x3c\xf8\xe5\xbf\x8f\xfa\x85\xf9\xf2\x49\x25\x05\xb4"),
+ .iv = TEST_DATA_STR ("\x51\x6c\x33\x92\x9d\xf5\xa3\x28\x4f\xf4\x63\xd7"),
+ .tag = TEST_DATA_STR (
+ "\xbd\xc1\xac\x88\x4d\x33\x24\x57\xa1\xd2\x66\x4f\x16\x8c\x76\xf0"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc1) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x5f\xe0\x86\x1c\xdc\x26\x90\xce\x69\xb3\x65\x8c\x7f\x26\xf8\x45\x8e\xec"
+ "\x1c\x92\x43\xc5\xba\x08\x45\x30\x5d\x89\x7e\x96\xca\x0f"),
+ .iv = TEST_DATA_STR ("\x77\x0a\xc1\xa5\xa3\xd4\x76\xd5\xd9\x69\x44\xa1"),
+ .tag = TEST_DATA_STR (
+ "\x19\x6d\x69\x1e\x10\x47\x09\x3c\xa4\xb3\xd2\xef\x4b\xab\xa2\x16"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc2) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x76\x20\xb7\x9b\x17\xb2\x1b\x06\xd9\x70\x19\xaa\x70\xe1\xca\x10\x5e\x1c"
+ "\x03\xd2\xa0\xcf\x8b\x20\xb5\xa0\xce\x5c\x39\x03\xe5\x48"),
+ .iv = TEST_DATA_STR ("\x60\xf5\x6e\xb7\xa4\xb3\x8d\x4f\x03\x39\x55\x11"),
+ .tag = TEST_DATA_STR (
+ "\xf5\x70\xc3\x82\x02\xd9\x45\x64\xba\xb3\x9f\x75\x61\x7b\xc8\x7a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc3) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x7e\x2d\xb0\x03\x21\x18\x94\x76\xd1\x44\xc5\xf2\x7e\x78\x70\x87\x30\x2a"
+ "\x48\xb5\xf7\x78\x6c\xd9\x1e\x93\x64\x16\x28\xc2\x32\x8b"),
+ .iv = TEST_DATA_STR ("\xea\x9d\x52\x5b\xf0\x1d\xe7\xb2\x23\x4b\x60\x6a"),
+ .tag = TEST_DATA_STR (
+ "\xdb\x9d\xf5\xf1\x4f\x6c\x9f\x2a\xe8\x1f\xd4\x21\x41\x2d\xdb\xbb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc4) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xa2\x3d\xfb\x84\xb5\x97\x6b\x46\xb1\x83\x0d\x93\xbc\xf6\x19\x41\xca\xe5"
+ "\xe4\x09\xe4\xf5\x55\x1d\xc6\x84\xbd\xce\xf9\x87\x64\x80"),
+ .iv = TEST_DATA_STR ("\x5a\xa3\x45\x90\x80\x48\xde\x10\xa2\xbd\x3d\x32"),
+ .tag = TEST_DATA_STR (
+ "\xf2\x82\x17\x64\x92\x30\xbd\x7a\x40\xa9\xa4\xdd\xab\xc6\x7c\x43"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc5) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xdf\xe9\x28\xf8\x64\x30\xb7\x8a\xdd\x7b\xb7\x69\x60\x23\xe6\x15\x3d\x76"
+ "\x97\x7e\x56\x10\x3b\x18\x02\x53\x49\x0a\xff\xb9\x43\x1c"),
+ .iv = TEST_DATA_STR ("\x1d\xd0\x78\x5a\xf9\xf5\x89\x79\xa1\x0b\xd6\x2d"),
+ .tag = TEST_DATA_STR (
+ "\xa5\x5e\xb0\x9e\x9e\xde\xf5\x8d\x9f\x67\x1d\x72\x20\x7f\x8b\x3c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc6) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x34\x04\x8d\xb8\x15\x91\xee\x68\x22\x49\x56\xbd\x69\x89\xe1\x63\x0f\xcf"
+ "\x06\x8d\x7f\xf7\x26\xae\x81\xe5\xb2\x9f\x54\x8c\xfc\xfb"),
+ .iv = TEST_DATA_STR ("\x16\x21\xd3\x4c\xff\x2a\x5b\x25\x0c\x7b\x76\xfc"),
+ .tag = TEST_DATA_STR (
+ "\x49\x92\xec\x3d\x57\xcc\xcf\xa5\x8f\xd8\x91\x6c\x59\xb7\x0b\x11"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc7) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xa1\x11\x4f\x87\x49\xc7\x2b\x8c\xef\x62\xe7\x50\x3f\x1a\xd9\x21\xd3\x3e"
+ "\xee\xde\x32\xb0\xb5\xb8\xe0\xd6\x80\x7a\xa2\x33\xd0\xad"),
+ .iv = TEST_DATA_STR ("\xa1\x90\xed\x3f\xf2\xe2\x38\xbe\x56\xf9\x0b\xd6"),
+ .tag = TEST_DATA_STR (
+ "\xc8\x46\x4d\x95\xd5\x40\xfb\x19\x11\x56\xfb\xbc\x16\x08\x84\x2a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc8) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xdd\xbb\x99\xdc\x31\x02\xd3\x11\x02\xc0\xe1\x4b\x23\x85\x18\x60\x57\x66"
+ "\xc5\xb2\x3d\x9b\xea\x52\xc7\xc5\xa7\x71\x04\x2c\x85\xa0"),
+ .iv = TEST_DATA_STR ("\x95\xd1\x5e\xd7\x5c\x6a\x10\x9a\xac\x1b\x1d\x86"),
+ .tag = TEST_DATA_STR (
+ "\x81\x3d\x1d\xa3\x77\x5c\xac\xd7\x8e\x96\xd8\x6f\x03\x6c\xff\x96"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc9) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x1f\xaa\x50\x6b\x8f\x13\xa2\xe6\x66\x0a\xf7\x8d\x92\x91\x5a\xdf\x33\x36"
+ "\x58\xf7\x48\xf4\xe4\x8f\xa2\x01\x35\xa2\x9e\x9a\xbe\x5f"),
+ .iv = TEST_DATA_STR ("\xe5\x0f\x27\x8d\x36\x62\xc9\x9d\x75\x0f\x60\xd3"),
+ .tag = TEST_DATA_STR (
+ "\xae\xc7\xec\xe6\x6b\x73\x44\xaf\xd6\xf6\xcc\x74\x19\xcf\x60\x27"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc10) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xf3\x0b\x59\x42\xfa\xf5\x7d\x4c\x13\xe7\xa8\x24\x95\xae\xdf\x1b\x4e\x60"
+ "\x35\x39\xb2\xe1\x59\x93\x17\xcc\x6e\x53\x22\x5a\x24\x93"),
+ .iv = TEST_DATA_STR ("\x33\x6c\x38\x8e\x18\xe6\xab\xf9\x2b\xb7\x39\xa9"),
+ .tag = TEST_DATA_STR (
+ "\xdd\xaf\x8e\xf4\xcb\x2f\x8a\x6d\x40\x1f\x3b\xe5\xff\x0b\xaf\x6a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc11) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xda\xf4\xd9\xc1\x2c\x5d\x29\xfc\x3f\xa9\x36\x53\x2c\x96\x19\x6e\x56\xae"
+ "\x84\x2e\x47\x06\x3a\x4b\x29\xbf\xff\x2a\x35\xed\x92\x80"),
+ .iv = TEST_DATA_STR ("\x53\x81\xf2\x11\x97\xe0\x93\xb9\x6c\xda\xc4\xfa"),
+ .tag = TEST_DATA_STR (
+ "\x7f\x18\x32\xc7\xf7\xcd\x78\x12\xa0\x04\xb7\x9c\x3d\x39\x94\x73"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc12) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x6b\x52\x47\x54\x14\x9c\x81\x40\x1d\x29\xa4\xb8\xa6\xf4\xa4\x78\x33\x37"
+ "\x28\x06\xb2\xd4\x08\x3f\xf1\x7f\x2d\xb3\xbf\xc1\x7b\xca"),
+ .iv = TEST_DATA_STR ("\xac\x7d\x3d\x61\x8a\xb6\x90\x55\x5e\xc2\x44\x08"),
+ .tag = TEST_DATA_STR (
+ "\xdb\x07\xa8\x85\xe2\xbd\x39\xda\x74\x11\x6d\x06\xc3\x16\xa5\xc9"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc13) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xcf\xf0\x83\x30\x3f\xf4\x0a\x1f\x66\xc4\xae\xd1\xac\x7f\x50\x62\x8f\xe7"
+ "\xe9\x31\x1f\x5d\x03\x7e\xbf\x49\xf4\xa4\xb9\xf0\x22\x3f"),
+ .iv = TEST_DATA_STR ("\x45\xd4\x6e\x1b\xaa\xdc\xfb\xc8\xf0\xe9\x22\xff"),
+ .tag = TEST_DATA_STR (
+ "\x16\x87\xc6\xd4\x59\xea\x48\x1b\xf8\x8e\x4b\x22\x63\x22\x79\x06"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc14) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x39\x54\xf6\x0c\xdd\xbb\x39\xd2\xd8\xb0\x58\xad\xf5\x45\xd5\xb8\x24\x90"
+ "\xc8\xae\x92\x83\xaf\xa5\x27\x86\x89\x04\x1d\x41\x5a\x3a"),
+ .iv = TEST_DATA_STR ("\x8f\xb3\xd9\x8e\xf2\x4f\xba\x03\x74\x6a\xc8\x4f"),
+ .tag = TEST_DATA_STR (
+ "\x7f\xb1\x30\x85\x5d\xfe\x7a\x37\x33\x13\x36\x1f\x33\xf5\x52\x37"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc0) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x78\xdc\x4e\x0a\xaf\x52\xd9\x35\xc3\xc0\x1e\xea\x57\x42\x8f\x00\xca\x1f"
+ "\xd4\x75\xf5\xda\x86\xa4\x9c\x8d\xd7\x3d\x68\xc8\xe2\x23"),
+ .iv = TEST_DATA_STR ("\xd7\x9c\xf2\x2d\x50\x4c\xc7\x93\xc3\xfb\x6c\x8a"),
+ .aad = TEST_DATA_STR (
+ "\xb9\x6b\xaa\x8c\x1c\x75\xa6\x71\xbf\xb2\xd0\x8d\x06\xbe\x5f\x36"),
+ .tag = TEST_DATA_STR (
+ "\x3e\x5d\x48\x6a\xa2\xe3\x0b\x22\xe0\x40\xb8\x57\x23\xa0\x6e\x76"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc1) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x44\x57\xff\x33\x68\x3c\xca\x6c\xa4\x93\x87\x8b\xdc\x00\x37\x38\x93\xa9"
+ "\x76\x34\x12\xee\xf8\xcd\xdb\x54\xf9\x13\x18\xe0\xda\x88"),
+ .iv = TEST_DATA_STR ("\x69\x9d\x1f\x29\xd7\xb8\xc5\x53\x00\xbb\x1f\xd2"),
+ .aad = TEST_DATA_STR (
+ "\x67\x49\xda\xee\xa3\x67\xd0\xe9\x80\x9e\x2d\xc2\xf3\x09\xe6\xe3"),
+ .tag = TEST_DATA_STR (
+ "\xd6\x0c\x74\xd2\x51\x7f\xde\x4a\x74\xe0\xcd\x47\x09\xed\x43\xa9"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc2) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x4d\x01\xc9\x6e\xf9\xd9\x8d\x4f\xb4\xe9\xb6\x1b\xe5\xef\xa7\x72\xc9\x78"
+ "\x85\x45\xb3\xea\xc3\x9e\xb1\xca\xcb\x99\x7a\x5f\x07\x92"),
+ .iv = TEST_DATA_STR ("\x32\x12\x4a\x4d\x9e\x57\x6a\xea\x25\x89\xf2\x38"),
+ .aad = TEST_DATA_STR (
+ "\xd7\x2b\xad\x0c\x38\x49\x5e\xda\x50\xd5\x58\x11\x94\x5e\xe2\x05"),
+ .tag = TEST_DATA_STR (
+ "\x6d\x63\x97\xc9\xe2\x03\x0f\x5b\x80\x53\xbf\xe5\x10\xf3\xf2\xcf"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc3) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x83\x78\x19\x3a\x4c\xe6\x41\x80\x81\x4b\xd6\x05\x91\xd1\x05\x4a\x04\xdb"
+ "\xc4\xda\x02\xaf\xde\x45\x37\x99\xcd\x68\x88\xee\x0c\x6c"),
+ .iv = TEST_DATA_STR ("\xbd\x8b\x4e\x35\x2c\x7f\x69\x87\x8a\x47\x54\x35"),
+ .aad = TEST_DATA_STR (
+ "\x1c\x6b\x34\x3c\x4d\x04\x5c\xbb\xa5\x62\xba\xe3\xe5\xff\x1b\x18"),
+ .tag = TEST_DATA_STR (
+ "\x08\x33\x96\x7a\x6a\x53\xba\x24\xe7\x5c\x03\x72\xa6\xa1\x7b\xda"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc4) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x22\xfc\x82\xdb\x5b\x60\x69\x98\xad\x45\x09\x9b\x79\x78\xb5\xb4\xf9\xdd"
+ "\x4e\xa6\x01\x7e\x57\x37\x0a\xc5\x61\x41\xca\xaa\xbd\x12"),
+ .iv = TEST_DATA_STR ("\x88\x0d\x05\xc5\xee\x59\x9e\x5f\x15\x1e\x30\x2f"),
+ .aad = TEST_DATA_STR (
+ "\x3e\x3e\xb5\x74\x7e\x39\x0f\x7b\xc8\x0e\x74\x82\x33\x48\x4f\xfc"),
+ .tag = TEST_DATA_STR (
+ "\x2e\x12\x2a\x47\x8e\x64\x46\x32\x86\xf8\xb4\x89\xdc\xdd\x09\xc8"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc5) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xfc\x00\x96\x0d\xdd\x69\x8d\x35\x72\x8c\x5a\xc6\x07\x59\x6b\x51\xb3\xf8"
+ "\x97\x41\xd1\x4c\x25\xb8\xba\xda\xc9\x19\x76\x12\x0d\x99"),
+ .iv = TEST_DATA_STR ("\xa4\x24\xa3\x2a\x23\x7f\x0d\xf5\x30\xf0\x5e\x30"),
+ .aad = TEST_DATA_STR (
+ "\xcf\xb7\xe0\x5e\x31\x57\xf0\xc9\x05\x49\xd5\xc7\x86\x50\x63\x11"),
+ .tag = TEST_DATA_STR (
+ "\xdc\xdc\xb9\xe4\x00\x4b\x85\x2a\x0d\xa1\x2b\xdf\x25\x5b\x4d\xdd"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc6) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x69\x74\x99\x43\x09\x2f\x56\x05\xbf\x97\x1e\x18\x5c\x19\x1c\x61\x82\x61"
+ "\xb2\xc7\xcc\x16\x93\xcd\xa1\x08\x0c\xa2\xfd\x8d\x51\x11"),
+ .iv = TEST_DATA_STR ("\xbd\x0d\x62\xc0\x2e\xe6\x82\x06\x9b\xd1\xe1\x28"),
+ .aad = TEST_DATA_STR (
+ "\x69\x67\xdc\xe8\x78\xf0\x3b\x64\x3b\xf5\xcd\xba\x59\x6a\x7a\xf3"),
+ .tag = TEST_DATA_STR (
+ "\x37\x8f\x79\x6a\xe5\x43\xe1\xb2\x91\x15\xcc\x18\xac\xd1\x93\xf4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc7) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xfc\x48\x75\xdb\x84\x81\x98\x34\xb1\xcb\x43\x82\x8d\x2f\x0a\xe3\x47\x3a"
+ "\xa3\x80\x11\x1c\x27\x37\xe8\x2a\x9a\xb1\x1f\xea\x1f\x19"),
+ .iv = TEST_DATA_STR ("\xda\x6a\x68\x4d\x3f\xf6\x3a\x2d\x10\x9d\xec\xd6"),
+ .aad = TEST_DATA_STR (
+ "\x91\xb6\xfa\x2a\xb4\xde\x44\x28\x2f\xfc\x86\xc8\xcd\xe6\xe7\xf5"),
+ .tag = TEST_DATA_STR (
+ "\x50\x4e\x81\xd2\xe7\x87\x7e\x4d\xad\x6f\x31\xcd\xeb\x07\xbd\xbd"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc8) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x9f\x9f\xe7\xd2\xa2\x6d\xcf\x59\xd6\x84\xf1\xc0\x94\x5b\x5f\xfa\xfe\x0a"
+ "\x47\x46\x84\x5e\xd3\x17\xd3\x5f\x3e\xd7\x6c\x93\x04\x4d"),
+ .iv = TEST_DATA_STR ("\x13\xb5\x99\x71\xcd\x4d\xd3\x6b\x19\xac\x71\x04"),
+ .aad = TEST_DATA_STR (
+ "\x19\x0a\x69\x34\xf4\x5f\x89\xc9\x00\x67\xc2\xf6\x2e\x04\xc5\x3b"),
+ .tag = TEST_DATA_STR (
+ "\x4f\x63\x6a\x29\x4b\xfb\xf5\x1f\xc0\xe1\x31\xd6\x94\xd5\xc2\x22"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc9) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xab\x91\x55\xd7\xd8\x1b\xa6\xf3\x31\x93\x69\x5c\xf4\x56\x6a\x9b\x6e\x97"
+ "\xa3\xe4\x09\xf5\x71\x59\xae\x6c\xa4\x96\x55\xcc\xa0\x71"),
+ .iv = TEST_DATA_STR ("\x26\xa9\xf8\xd6\x65\xd1\x63\xdd\xb9\x2d\x03\x5d"),
+ .aad = TEST_DATA_STR (
+ "\x4a\x20\x3a\xc2\x6b\x95\x1a\x1f\x67\x3c\x66\x05\x65\x3e\xc0\x2d"),
+ .tag = TEST_DATA_STR (
+ "\x43\x7e\xa7\x7a\x38\x79\xf0\x10\x69\x1e\x28\x8d\x62\x69\xa9\x96"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc10) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x0f\x1c\x62\xdd\x80\xb4\xa6\xd0\x9e\xe9\xd7\x87\xb1\xb0\x43\x27\xaa\x36"
+ "\x15\x29\xff\xa3\x40\x75\x60\x41\x4a\xc4\x7b\x7e\xf7\xbc"),
+ .iv = TEST_DATA_STR ("\xc8\x76\x13\xa3\xb7\x0d\x2a\x04\x8f\x32\xcb\x9a"),
+ .aad = TEST_DATA_STR (
+ "\x8f\x23\xd4\x04\xbe\x2d\x9e\x88\x8d\x21\x9f\x1b\x40\xaa\x29\xe8"),
+ .tag = TEST_DATA_STR (
+ "\x36\xd8\xa3\x09\xac\xbb\x87\x16\xc9\xc0\x8c\x7f\x5d\xe4\x91\x1e"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc11) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xf3\xe9\x54\xa3\x89\x56\xdf\x89\x02\x55\xf0\x17\x09\xe4\x57\xb3\x3f\x4b"
+ "\xfe\x7e\xcb\x36\xd0\xee\x50\xf2\x50\x04\x71\xee\xbc\xde"),
+ .iv = TEST_DATA_STR ("\x97\x99\xab\xd3\xc5\x21\x10\xc7\x04\xb0\xf3\x6a"),
+ .aad = TEST_DATA_STR (
+ "\xdd\xb7\x01\x73\xf4\x41\x57\x75\x5b\x6c\x9b\x70\x58\xf4\x0c\xb7"),
+ .tag = TEST_DATA_STR (
+ "\xb3\x23\xae\x3a\xbc\xb4\x15\xc7\xf4\x20\x87\x6c\x98\x0f\x48\x58"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc12) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x06\x25\x31\x65\x34\xfb\xd8\x2f\xe8\xfd\xea\x50\xfa\x57\x3c\x46\x20\x22"
+ "\xc4\x2f\x79\xe8\xb2\x13\x60\xe5\xa6\xdc\xe6\x6d\xde\x28"),
+ .iv = TEST_DATA_STR ("\xda\x64\xa6\x74\x90\x7c\xd6\xcf\x24\x8f\x5f\xbb"),
+ .aad = TEST_DATA_STR (
+ "\xf2\x4d\x48\xe0\x4f\x5a\x0d\x98\x7b\xa7\xc7\x45\xb7\x3b\x03\x64"),
+ .tag = TEST_DATA_STR (
+ "\xdf\x36\x0b\x81\x0f\x27\xe7\x94\x67\x3a\x8b\xb2\xdc\x0d\x68\xb0"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc13) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x28\xf0\x45\xac\x7c\x4f\xe5\xd4\xb0\x1a\x9d\xcd\x5f\x1a\xd3\xef\xff\x1c"
+ "\x4f\x17\x0f\xc8\xab\x87\x58\xd9\x72\x92\x86\x8d\x58\x28"),
+ .iv = TEST_DATA_STR ("\x5d\x85\xde\x95\xb0\xbd\xc4\x45\x14\x14\x39\x19"),
+ .aad = TEST_DATA_STR (
+ "\x60\x1d\x21\x58\xf1\x7a\xb3\xc7\xb4\xdc\xb6\x95\x0f\xbd\xcd\xde"),
+ .tag = TEST_DATA_STR (
+ "\x42\xc3\xf5\x27\x41\x8c\xf2\xc3\xf5\xd5\x01\x0c\xcb\xa8\xf2\x71"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc14) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x19\x31\x0e\xed\x5f\x5f\x44\xeb\x47\x07\x5c\x10\x5e\xb3\x1e\x36\xbb\xfd"
+ "\x13\x10\xf7\x41\xb9\xba\xa6\x6a\x81\x13\x8d\x35\x72\x42"),
+ .iv = TEST_DATA_STR ("\xa1\x24\x71\x20\x13\x8f\xa4\xf0\xe9\x6c\x99\x2c"),
+ .aad = TEST_DATA_STR (
+ "\x29\xd7\x46\x41\x43\x33\xe0\xf7\x2b\x4c\x3f\x44\xec\x6b\xfe\x42"),
+ .tag = TEST_DATA_STR (
+ "\xd5\x99\x7e\x2f\x95\x6d\xf3\xfa\x2c\x23\x88\xe2\x0f\x30\xc4\x80"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc0) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x88\x6c\xff\x5f\x3e\x6b\x8d\x0e\x1a\xd0\xa3\x8f\xcd\xb2\x6d\xe9\x7e\x8a"
+ "\xcb\xe7\x9f\x6b\xed\x66\x95\x9a\x59\x8f\xa5\x04\x7d\x65"),
+ .iv = TEST_DATA_STR ("\x3a\x8e\xfa\x1c\xd7\x4b\xba\xb5\x44\x8f\x99\x45"),
+ .aad = TEST_DATA_STR ("\x51\x9f\xee\x51\x9d\x25\xc7\xa3\x04\xd6\xc6\xaa\x18"
+ "\x97\xee\x1e\xb8\xc5\x96\x55"),
+ .tag = TEST_DATA_STR (
+ "\xf6\xd4\x75\x05\xec\x96\xc9\x8a\x42\xdc\x3a\xe7\x19\x87\x7b\x87"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc1) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x69\x37\xa5\x7d\x35\xfe\x6d\xc3\xfc\x42\x0b\x12\x3b\xcc\xdc\xe8\x74\xbd"
+ "\x4c\x18\xf2\xe7\xc0\x1c\xe2\xfa\xf3\x3d\x39\x44\xfd\x9d"),
+ .iv = TEST_DATA_STR ("\xa8\x72\x47\x79\x7b\x75\x84\x67\xb9\x63\x10\xf3"),
+ .aad = TEST_DATA_STR ("\xea\xd9\x61\x93\x9a\x33\xdd\x57\x8f\x8e\x93\xdb\x8b"
+ "\x28\xa1\xc8\x53\x62\x90\x5f"),
+ .tag = TEST_DATA_STR (
+ "\x59\x9d\xe3\xec\xf2\x2c\xb8\x67\xf0\x3f\x7f\x6d\x9f\xd7\x42\x8a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc2) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xe6\x5a\x33\x17\x76\xc9\xdc\xdf\x5e\xba\x6c\x59\xe0\x5e\xc0\x79\xd9\x74"
+ "\x73\xbc\xdc\xe8\x4d\xaf\x83\x6b\xe3\x23\x45\x62\x63\xa0"),
+ .iv = TEST_DATA_STR ("\xca\x73\x1f\x76\x8d\xa0\x1d\x02\xeb\x8e\x72\x7e"),
+ .aad = TEST_DATA_STR ("\xd7\x27\x45\x86\x51\x7b\xf1\xd8\xda\x86\x6f\x4a\x47"
+ "\xad\x0b\xcf\x29\x48\xa8\x62"),
+ .tag = TEST_DATA_STR (
+ "\xa8\xab\xe7\xa8\x08\x5f\x25\x13\x0a\x72\x06\xd3\x7a\x8a\xaf\x6d"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc3) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x77\xbb\x1b\x6e\xf8\x98\x68\x3c\x98\x1b\x2f\xc8\x99\x31\x9f\xfb\xb6\x00"
+ "\x0e\xdc\xa2\x25\x66\xb6\x34\xdb\x3a\x3c\x80\x40\x59\xe5"),
+ .iv = TEST_DATA_STR ("\x35\x4a\x19\x28\x37\x69\xb3\xb9\x91\xb0\x5a\x4c"),
+ .aad = TEST_DATA_STR ("\xb5\x56\x62\x51\xa8\xa8\xbe\xc2\x12\xdc\x08\x11\x32"
+ "\x29\xff\x85\x90\x16\x88\x00"),
+ .tag = TEST_DATA_STR (
+ "\xe5\xc2\xdc\xcf\x8f\xc7\xf2\x96\xca\xc9\x5d\x70\x71\xcb\x8d\x7d"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc4) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x2a\x43\x30\x8d\x52\x0a\x59\xed\x51\xe4\x7a\x3a\x91\x5e\x1d\xbf\x20\xa9"
+ "\x1f\x08\x86\x50\x6e\x48\x1a\xd3\xde\x65\xd5\x09\x75\xb4"),
+ .iv = TEST_DATA_STR ("\xbc\xbf\x99\x73\x3d\x8e\xc9\x0c\xb2\x3e\x6c\xe6"),
+ .aad = TEST_DATA_STR ("\xeb\x88\x28\x87\x29\x28\x9d\x26\xfe\x0e\x75\x7a\x99"
+ "\xad\x8e\xec\x96\x10\x60\x53"),
+ .tag = TEST_DATA_STR (
+ "\x01\xb0\x19\x69\x33\xaa\x49\x12\x3e\xab\x4e\x15\x71\x25\x03\x83"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc5) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x23\x79\xb3\x5f\x85\x10\x2d\xb4\xe7\xae\xcc\x52\xb7\x05\xbc\x69\x5d\x47"
+ "\x68\xd4\x12\xe2\xd7\xbe\xbe\x99\x92\x36\x78\x39\x72\xff"),
+ .iv = TEST_DATA_STR ("\x91\x89\x98\xc4\x80\x10\x37\xb1\xcd\x10\x2f\xaa"),
+ .aad = TEST_DATA_STR ("\xb3\x72\x23\x09\xe0\xf0\x66\x22\x5e\x8d\x16\x59\x08"
+ "\x4e\xbb\x07\xa9\x3b\x43\x5d"),
+ .tag = TEST_DATA_STR (
+ "\xdf\xb1\x8a\xee\x99\xd1\xf6\x7f\x57\x48\xd4\xb4\x84\x3c\xb6\x49"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc6) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x98\xb3\xcb\x75\x37\x16\x7e\x6d\x14\xa2\xa8\xb2\x31\x0f\xe9\x4b\x71\x5c"
+ "\x72\x9f\xdf\x85\x21\x65\x68\x15\x0b\x55\x6d\x07\x97\xba"),
+ .iv = TEST_DATA_STR ("\xbc\xa5\xe2\xe5\xa6\xb3\x0f\x18\xd2\x63\xc6\xb2"),
+ .aad = TEST_DATA_STR ("\x26\x0d\x3d\x72\xdb\x70\xd6\x77\xa4\xe3\xe1\xf3\xe1"
+ "\x14\x31\x21\x7a\x2e\x47\x13"),
+ .tag = TEST_DATA_STR (
+ "\xd6\xb7\x56\x0f\x8a\xc2\xf0\xa9\x0b\xad\x42\xa6\xa0\x72\x04\xbc"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc7) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x30\x34\x1a\xe0\xf1\x99\xb1\x0a\x15\x17\x5d\x00\x91\x3d\x50\x29\x52\x6a"
+ "\xb7\xf7\x61\xc0\xb9\x36\xa7\xdd\x5f\x1b\x15\x83\x42\x9d"),
+ .iv = TEST_DATA_STR ("\xdb\xe1\x09\xa8\xce\x5f\x7b\x24\x1e\x99\xf7\xaf"),
+ .aad = TEST_DATA_STR ("\xfe\x4b\xde\xe5\xca\x9c\x48\x06\xfa\x02\x47\x15\xfb"
+ "\xf6\x6a\xb8\x45\x28\x5f\xa7"),
+ .tag = TEST_DATA_STR (
+ "\xae\x91\xda\xed\x65\x8e\x26\xc0\xd1\x26\x57\x51\x47\xaf\x98\x99"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc8) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x82\x32\xb6\xa1\xd2\xe3\x67\xe9\xce\x1e\xa8\xd4\x2f\xcf\xc8\x3a\x4b\xc8"
+ "\xbd\xec\x46\x5c\x6b\xa3\x26\xe3\x53\xad\x92\x55\xf2\x07"),
+ .iv = TEST_DATA_STR ("\xcd\x2f\xb5\xff\x9c\xf0\xf3\x98\x68\xad\x86\x85"),
+ .aad = TEST_DATA_STR ("\x02\x41\x8b\x3d\xde\x54\x92\x4a\x96\x28\xde\x06\x00"
+ "\x4c\x08\x82\xae\x4e\xc3\xbb"),
+ .tag = TEST_DATA_STR (
+ "\xd5\x30\x8f\x63\x70\x86\x75\xce\xd1\x9b\x27\x10\xaf\xd2\xdb\x49"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc9) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xf9\xa1\x32\xa5\x0a\x50\x81\x45\xff\xd8\x29\x4e\x68\x94\x4e\xa4\x36\xce"
+ "\x0f\x9a\x97\xe1\x81\xf5\xe0\xd6\xc5\xd2\x72\x31\x1f\xc1"),
+ .iv = TEST_DATA_STR ("\x89\x29\x91\xb5\x4e\x94\xb9\xd5\x74\x42\xcc\xaf"),
+ .aad = TEST_DATA_STR ("\x4e\x0f\xbd\x37\x99\xda\x25\x0f\xa2\x79\x11\xb7\xe6"
+ "\x8d\x76\x23\xbf\xe6\x0a\x53"),
+ .tag = TEST_DATA_STR (
+ "\x89\x88\x1d\x5f\x78\x6e\x6d\x53\xe0\xd1\x9c\x3b\x4e\x68\x87\xd8"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc10) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x0e\x37\x46\xe5\x06\x46\x33\xea\x93\x11\xb2\xb8\x42\x7c\x53\x6a\xf9\x27"
+ "\x17\xde\x20\xee\xb6\x26\x0d\xb1\x33\x3c\x3d\x8a\x81\x14"),
+ .iv = TEST_DATA_STR ("\xf8\x4c\x3a\x1c\x94\x53\x3f\x7f\x25\xce\xc0\xac"),
+ .aad = TEST_DATA_STR ("\x8c\x0d\x41\xe6\x13\x53\x38\xc8\xd3\xe6\x3e\x2a\x5f"
+ "\xa0\xa9\x66\x7e\xc9\xa5\x80"),
+ .tag = TEST_DATA_STR (
+ "\x47\x9c\xcf\xe9\x24\x1d\xe2\xc4\x74\xf2\xed\xeb\xbb\x38\x5c\x09"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc11) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xb9\x97\xe9\xb0\x74\x6a\xba\xae\xd6\xe6\x4b\x63\xbd\xf6\x48\x82\x52\x6a"
+ "\xd9\x2e\x24\xa2\xf5\x64\x9d\xf0\x55\xc9\xec\x0f\x1d\xaa"),
+ .iv = TEST_DATA_STR ("\xf1\x41\xd8\xd7\x1b\x03\x37\x55\x02\x2f\x0a\x7d"),
+ .aad = TEST_DATA_STR ("\x68\x1d\x65\x83\xf5\x27\xb1\xa9\x2f\x66\xca\xae\x9b"
+ "\x1d\x4d\x02\x8e\x2e\x63\x1e"),
+ .tag = TEST_DATA_STR (
+ "\xb3\x04\x42\xa6\x39\x5e\xc1\x32\x46\xc4\x8b\x21\xff\xc6\x55\x09"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc12) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x87\x66\x0e\xc1\x70\x0d\x4e\x9f\x88\xa3\x23\xa4\x9f\x0b\x87\x1e\x6a\xaf"
+ "\x43\x4a\x2d\x84\x48\xd0\x4d\x4a\x22\xf6\x56\x10\x28\xe0"),
+ .iv = TEST_DATA_STR ("\x2a\x07\xb4\x25\x93\xcd\x24\xf0\xa6\xfe\x40\x6c"),
+ .aad = TEST_DATA_STR ("\x1d\xd2\x39\xb5\x71\x85\xb7\xe4\x57\xce\xd7\x3e\xbb"
+ "\xa0\x43\x05\x7f\x04\x9e\xdd"),
+ .tag = TEST_DATA_STR (
+ "\xdf\x7a\x50\x10\x49\xb3\x7a\x53\x40\x98\xcb\x45\xcb\x9c\x21\xb7"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc13) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xea\x47\x92\xe1\xf1\x71\x7b\x77\xa0\x0d\xe4\xd1\x09\xe6\x27\x54\x9b\x16"
+ "\x5c\x82\xaf\x35\xf3\x3c\xa7\xe1\xa6\xb8\xed\x62\xf1\x4f"),
+ .iv = TEST_DATA_STR ("\x74\x53\xcc\x8b\x46\xfe\x4b\x93\xbc\xc4\x83\x81"),
+ .aad = TEST_DATA_STR ("\x46\xd9\x89\x70\xa6\x36\xe7\xcd\x7b\x76\xfc\x36\x2a"
+ "\xe8\x82\x98\x43\x6f\x83\x4f"),
+ .tag = TEST_DATA_STR (
+ "\x51\x8d\xba\xcd\x36\xbe\x6f\xba\x5c\x12\x87\x16\x78\xa5\x55\x16"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc14) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x34\x89\x2c\xdd\x1d\x48\xca\x16\x6f\x7b\xa7\x31\x82\xcb\x97\x33\x6c\x2c"
+ "\x75\x4a\xc1\x60\xa3\xe3\x71\x83\xd6\xfb\x50\x78\xce\xc3"),
+ .iv = TEST_DATA_STR ("\xed\x31\x98\xc5\x86\x1b\x78\xc7\x1a\x6a\x4e\xec"),
+ .aad = TEST_DATA_STR ("\xa6\xfa\x6d\x0d\xd1\xe0\xb9\x5b\x46\x09\x95\x1b\xbb"
+ "\xe7\x14\xde\x0a\xe0\xcc\xfa"),
+ .tag = TEST_DATA_STR (
+ "\xc6\x38\x77\x95\x09\x6b\x34\x8e\xcf\x1d\x1f\x6c\xaa\xa3\xc8\x13"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc0) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xf4\x06\x9b\xb7\x39\xd0\x7d\x0c\xaf\xdc\xbc\x60\x9c\xa0\x15\x97\xf9\x85"
+ "\xc4\x3d\xb6\x3b\xba\xaa\x0d\xeb\xbb\x04\xd3\x84\xe4\x9c"),
+ .iv = TEST_DATA_STR ("\xd2\x5f\xf3\x0f\xdc\x3d\x46\x4f\xe1\x73\xe8\x05"),
+ .aad = TEST_DATA_STR (
+ "\x3e\x14\x49\xc4\x83\x7f\x08\x92\xf9\xd5\x51\x27\xc7\x5c\x4b\x25\xd6\x9b"
+ "\xe3\x34\xba\xf5\xf1\x93\x94\xd2\xd8\xbb\x46\x0c\xbf\x21\x20\xe1\x47\x36"
+ "\xd0\xf6\x34\xaa\x79\x2f\xec\xa2\x0e\x45\x5f\x11"),
+ .tag = TEST_DATA_STR (
+ "\x80\x5e\xc2\x93\x1c\x21\x81\xe5\xbf\xb7\x4f\xa0\xa9\x75\xf0\xcf"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc1) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x62\x18\x9d\xcc\x4b\xeb\x97\x46\x2d\x6c\x09\x27\xd8\xa2\x70\xd3\x9a\x1b"
+ "\x07\xd7\x2d\x0a\xd2\x88\x40\xba\xdd\x4f\x68\xcf\x9c\x8b"),
+ .iv = TEST_DATA_STR ("\x85\x9f\xda\x52\x47\xc8\x88\x82\x3a\x4b\x80\x32"),
+ .aad = TEST_DATA_STR (
+ "\xb2\x8d\x16\x21\xee\x11\x0f\x4c\x9d\x70\x9f\xad\x76\x4b\xba\x2d\xd6\xd2"
+ "\x91\xbc\x00\x37\x48\xfa\xac\x6d\x90\x19\x37\x12\x0d\x41\xc1\xb7\xce\x67"
+ "\x63\x37\x63\xe9\x9e\x05\xc7\x13\x63\xfc\xec\xa8"),
+ .tag = TEST_DATA_STR (
+ "\x27\x33\x09\x07\xd0\x00\x28\x80\xbb\xb4\xc1\xa1\xd2\x3c\x0b\xe2"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc2) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x59\x01\x2d\x85\xa1\xb9\x0a\xeb\x03\x59\xe6\x38\x4c\x99\x91\xe7\xbe\x21"
+ "\x93\x19\xf5\xb8\x91\xc9\x2c\x38\x4a\xde\x2f\x37\x18\x16"),
+ .iv = TEST_DATA_STR ("\x3c\x9c\xde\x00\xc2\x39\x12\xcf\xf9\x68\x9c\x7c"),
+ .aad = TEST_DATA_STR (
+ "\xe5\xda\xf4\x73\xa4\x70\x86\x0b\x55\x21\x0a\x48\x3c\x0d\x1a\x97\x8d\x8a"
+ "\xdd\x84\x3c\x2c\x09\x7f\x73\xa3\xcd\xa4\x9a\xc4\xa6\x14\xc8\xe8\x87\xd9"
+ "\x4e\x66\x92\x30\x9d\x2e\xd9\x7e\xbe\x1e\xaf\x5d"),
+ .tag = TEST_DATA_STR (
+ "\x04\x82\x39\xe4\xe5\xc2\xc8\xb3\x38\x90\xa7\xc9\x50\xcd\xa8\x52"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc3) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x4b\xe0\x9b\x40\x8a\xd6\x8b\x89\x0f\x94\xbe\x5e\xfa\x7f\xe9\xc9\x17\x36"
+ "\x27\x12\xa3\x48\x0c\x57\xcd\x38\x44\x93\x5f\x35\xac\xb7"),
+ .iv = TEST_DATA_STR ("\x8f\x35\x0b\xd3\xb8\xee\xa1\x73\xfc\x73\x70\xbc"),
+ .aad = TEST_DATA_STR (
+ "\x28\x19\xd6\x5a\xec\x94\x21\x98\xca\x97\xd4\x43\x5e\xfd\x9d\xd4\xd4\x39"
+ "\x3b\x96\xcf\x5b\xa4\x4f\x09\xbc\xe4\xba\x13\x5f\xc8\x63\x6e\x82\x75\xdc"
+ "\xb5\x15\x41\x4b\x8b\xef\xd3\x2f\x91\xfc\x48\x22"),
+ .tag = TEST_DATA_STR (
+ "\xa1\x33\xcb\x7a\x7d\x04\x71\xdb\xac\x61\xfb\x41\x58\x9a\x2e\xfe"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc4) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x13\xcb\x96\x5a\x4d\x9d\x1a\x36\xef\xad\x9f\x6c\xa1\xba\x76\x38\x6a\x5b"
+ "\xb1\x60\xd8\x0b\x09\x17\x27\x71\x02\x35\x7a\xc7\xaf\xc8"),
+ .iv = TEST_DATA_STR ("\xf3\x13\xad\xec\x42\xa6\x6d\x13\xc3\x95\x81\x80"),
+ .aad = TEST_DATA_STR (
+ "\x71\x7b\x48\x35\x88\x98\xe5\xcc\xfe\xa4\x28\x90\x49\xad\xcc\x1b\xb0\xdb"
+ "\x3b\x3e\xbd\x17\x67\xac\x24\xfb\x2b\x7d\x37\xdc\x80\xea\x23\x16\xc1\x7f"
+ "\x14\xfb\x51\xb5\xe1\x8c\xd5\xbb\x09\xaf\xe4\x14"),
+ .tag = TEST_DATA_STR (
+ "\x81\xb4\xef\x7a\x84\xdc\x4a\x0b\x1f\xdd\xbe\xfe\x37\xf5\x38\x52"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc5) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xd2\x7f\x1b\xeb\xbb\xde\xf0\xed\xca\x39\x3a\x62\x61\xb0\x33\x8a\xbb\xc4"
+ "\x91\x26\x2e\xab\x07\x37\xf5\x52\x46\x45\x8f\x66\x68\xcc"),
+ .iv = TEST_DATA_STR ("\xfc\x06\x2f\x85\x78\x86\xe2\x78\xf3\xa5\x67\xd2"),
+ .aad = TEST_DATA_STR (
+ "\x2b\xae\x92\xde\xa6\x4a\xa9\x91\x89\xde\x8e\xa4\xc0\x46\x74\x53\x06\x00"
+ "\x2e\x02\xcf\xb4\x6a\x41\x44\x4c\xe8\xbf\xcc\x32\x9b\xd4\x20\x59\x63\xd9"
+ "\xab\x53\x57\xb0\x26\xa4\xa3\x4b\x1a\x86\x17\x71"),
+ .tag = TEST_DATA_STR (
+ "\x5c\x5a\x6c\x46\x13\xf1\xe5\x22\x59\x63\x30\xd4\x5f\x24\x3f\xdd"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc6) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x7b\x4d\x19\xcd\x35\x69\xf7\x4c\x7b\x5d\xf6\x1a\xb7\x83\x79\xee\x6b\xfa"
+ "\x15\x10\x5d\x21\xb1\x0b\xf6\x09\x66\x99\x53\x90\x06\xd0"),
+ .iv = TEST_DATA_STR ("\xfb\xed\x56\x95\xc4\xa7\x39\xed\xed\x97\xb1\xe3"),
+ .aad = TEST_DATA_STR (
+ "\xc6\xf2\xe5\xd6\x63\xbf\xaf\x66\x8d\x01\x45\x50\xef\x2e\x66\xbf\x89\x97"
+ "\x87\x99\xa7\x85\xf1\xf2\xc7\x9a\x2c\xb3\xeb\x3f\x2f\xd4\x07\x62\x07\xd5"
+ "\xf7\xe1\xc2\x84\xb4\xaf\x5c\xff\xc4\xe4\x61\x98"),
+ .tag = TEST_DATA_STR (
+ "\x71\x01\xb4\x34\xfb\x90\xc7\xf9\x5b\x9b\x7a\x0d\xee\xeb\x5c\x81"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc7) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xd3\x43\x14\x88\xd8\xf0\x48\x59\x0b\xd7\x6e\xc6\x6e\x71\x42\x1e\xf0\x9f"
+ "\x65\x5d\x7c\xf8\x04\x3b\xf3\x2f\x75\xb4\xb2\xe7\xef\xcc"),
+ .iv = TEST_DATA_STR ("\xcc\x76\x6e\x98\xb4\x0a\x81\x51\x9f\xa4\x63\x92"),
+ .aad = TEST_DATA_STR (
+ "\x93\x32\x01\x79\xfd\xb4\x0c\xbc\x1c\xcf\x00\xb8\x72\xa3\xb4\xa5\xf6\xc7"
+ "\x0b\x56\xe4\x3a\x84\xfc\xac\x5e\xb4\x54\xa0\xa1\x9a\x74\x7d\x45\x20\x42"
+ "\x61\x1b\xf3\xbb\xaa\xfd\x92\x5e\x80\x6f\xfe\x8e"),
+ .tag = TEST_DATA_STR (
+ "\x3a\xfc\xc3\x36\xce\x8b\x71\x91\xea\xb0\x4a\xd6\x79\x16\x3c\x2a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc8) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xa4\x40\x94\x8c\x03\x78\x56\x1c\x39\x56\x81\x3c\x03\x1f\x81\x57\x32\x08"
+ "\xc7\xff\xa8\x15\x11\x4e\xf2\xee\xe1\xeb\x64\x2e\x74\xc6"),
+ .iv = TEST_DATA_STR ("\xc1\xf4\xff\xe5\x4b\x86\x80\x83\x2e\xed\x88\x19"),
+ .aad = TEST_DATA_STR (
+ "\x25\x34\x38\xf1\x32\xb1\x8e\x84\x83\x07\x45\x61\x89\x8c\x56\x52\xb4\x3a"
+ "\x82\xcc\x94\x1e\x8b\x4a\xe3\x7e\x79\x2a\x8e\xd6\xec\x5c\xe2\xbc\xec\x9f"
+ "\x1f\xfc\xf4\x21\x6e\x46\x69\x63\x07\xbb\x77\x4a"),
+ .tag = TEST_DATA_STR (
+ "\x12\x94\x45\xf0\xa3\xc9\x79\xa1\x12\xa3\xaf\xb1\x0a\x24\xe2\x45"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc9) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x79\x87\x06\xb6\x51\x03\x3d\x9e\x9b\xf2\xce\x06\x4f\xb1\x2b\xe7\xdf\x73"
+ "\x08\xcf\x45\xdf\x44\x77\x65\x88\xcd\x39\x1c\x49\xff\x85"),
+ .iv = TEST_DATA_STR ("\x5a\x43\x36\x8a\x39\xe7\xff\xb7\x75\xed\xfa\xf4"),
+ .aad = TEST_DATA_STR (
+ "\x92\x6b\x74\xfe\x63\x81\xeb\xd3\x57\x57\xe4\x2e\x8e\x55\x76\x01\xf2\x28"
+ "\x7b\xfc\x13\x3a\x13\xfd\x86\xd6\x1c\x01\xaa\x84\xf3\x97\x13\xbf\x99\xa8"
+ "\xdc\x07\xb8\x12\xf0\x27\x4c\x9d\x32\x80\xa1\x38"),
+ .tag = TEST_DATA_STR (
+ "\x89\xfe\x48\x1a\x3d\x95\xc0\x3a\x0a\x9d\x4e\xe3\xe3\xf0\xed\x4a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc10) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xc3\xaa\x2a\x39\xa9\xfe\xf4\xa4\x66\x61\x8d\x12\x88\xbb\x62\xf8\xda\x7b"
+ "\x1c\xb7\x60\xcc\xc8\xf1\xbe\x3e\x99\xe0\x76\xf0\x8e\xff"),
+ .iv = TEST_DATA_STR ("\x99\x65\xba\x5e\x23\xd9\x45\x3d\x72\x67\xca\x5b"),
+ .aad = TEST_DATA_STR (
+ "\x93\xef\xb6\xa2\xaf\xfc\x30\x4c\xb2\x5d\xfd\x49\xaa\x3e\x3c\xcd\xb2\x5c"
+ "\xea\xc3\xd3\xce\xa9\x0d\xd9\x9e\x38\x97\x69\x78\x21\x7a\xd5\xf2\xb9\x90"
+ "\xd1\x0b\x91\x72\x5c\x7f\xd2\x03\x5e\xcc\x6a\x30"),
+ .tag = TEST_DATA_STR (
+ "\x00\xa9\x4c\x18\xa4\x57\x2d\xcf\x4f\x9e\x22\x26\xa0\x3d\x4c\x07"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc11) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x14\xe0\x68\x58\x00\x8f\x7e\x77\x18\x6a\x2b\x3a\x79\x28\xa0\xc7\xfc\xee"
+ "\x22\x13\x6b\xc3\x6f\x53\x55\x3f\x20\xfa\x5c\x37\xed\xcd"),
+ .iv = TEST_DATA_STR ("\x32\xeb\xe0\xdc\x9a\xda\x84\x9b\x5e\xda\x7b\x48"),
+ .aad = TEST_DATA_STR (
+ "\x6c\x01\x52\xab\xfa\x48\x5b\x8c\xd6\x7c\x15\x4a\x5f\x04\x11\xf2\x21\x21"
+ "\x37\x97\x74\xd7\x45\xf4\x0e\xe5\x77\xb0\x28\xfd\x0e\x18\x82\x97\x58\x15"
+ "\x61\xae\x97\x22\x23\xd7\x5a\x24\xb4\x88\xae\xd7"),
+ .tag = TEST_DATA_STR (
+ "\x26\x25\xb0\xba\x6e\xe0\x2b\x58\xbc\x52\x9e\x43\xe2\xeb\x47\x1b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc12) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xfb\xb5\x6b\x11\xc5\x1a\x09\x3c\xe1\x69\xa6\x99\x03\x99\xc4\xd7\x41\xf6"
+ "\x2b\x3c\xc6\x1f\x9e\x8a\x60\x9a\x1b\x6a\xe8\xe7\xe9\x65"),
+ .iv = TEST_DATA_STR ("\x9c\x5a\x95\x32\x47\xe9\x1a\xce\xce\xb9\xde\xfb"),
+ .aad = TEST_DATA_STR (
+ "\x46\xcb\x5c\x4f\x61\x79\x16\xa9\xb1\xb2\xe0\x32\x72\xcb\x05\x90\xce\x71"
+ "\x64\x98\x53\x30\x47\xd7\x3c\x81\xe4\xcb\xe9\x27\x8a\x36\x86\x11\x6f\x56"
+ "\x32\x75\x3e\xa2\xdf\x52\xef\xb3\x55\x1a\xea\x2d"),
+ .tag = TEST_DATA_STR (
+ "\x4f\x3b\x82\xe6\xbe\x4f\x08\x75\x60\x71\xf2\xc4\x6c\x31\xfe\xdf"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc13) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xb3\x03\xbf\x02\xf6\xa8\xdb\xb5\xbc\x4b\xac\xca\xb0\x80\x0d\xb5\xee\x06"
+ "\xde\x64\x8e\x2f\xae\x29\x9b\x95\xf1\x35\xc9\xb1\x07\xcc"),
+ .iv = TEST_DATA_STR ("\x90\x64\x95\xb6\x7e\xf4\xce\x00\xb4\x44\x22\xfa"),
+ .aad = TEST_DATA_STR (
+ "\x87\x2c\x6c\x37\x09\x26\x53\x5c\x3f\xa1\xba\xec\x03\x1e\x31\xe7\xc6\xc8"
+ "\x28\x08\xc8\xa0\x60\x74\x2d\xbe\xf1\x14\x96\x1c\x31\x4f\x19\x86\xb2\x13"
+ "\x1a\x9d\x91\xf3\x0f\x53\x06\x7e\xc0\x12\xc6\xb7"),
+ .tag = TEST_DATA_STR (
+ "\x64\xdd\xe3\x71\x69\x08\x2d\x18\x1a\x69\x10\x7f\x60\xc5\xc6\xbb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc14) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x29\xf5\xf8\x07\x59\x03\x06\x3c\xb6\xd7\x05\x06\x69\xb1\xf7\x4e\x08\xa3"
+ "\xf7\x9e\xf5\x66\x29\x2d\xfd\xef\x1c\x06\xa4\x08\xe1\xab"),
+ .iv = TEST_DATA_STR ("\x35\xf2\x5c\x48\xb4\xb5\x35\x5e\x78\xb9\xfb\x3a"),
+ .aad = TEST_DATA_STR (
+ "\x10\x7e\x2e\x23\x15\x9f\xc5\xc0\x74\x8c\xa7\xa0\x77\xe5\xcc\x05\x3f\xa5"
+ "\xc6\x82\xff\x52\x69\xd3\x50\xee\x81\x7f\x8b\x5d\xe4\xd3\x97\x20\x41\xd1"
+ "\x07\xb1\xe2\xf2\xe5\x4c\xa9\x3b\x72\xcd\x04\x08"),
+ .tag = TEST_DATA_STR (
+ "\xfe\xe5\xa9\xba\xeb\xb5\xbe\x01\x65\xde\xaa\x86\x7e\x96\x7a\x9e"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc0) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x03\xcc\xb7\xdb\xc7\xb8\x42\x54\x65\xc2\xc3\xfc\x39\xed\x05\x93\x92\x9f"
+ "\xfd\x02\xa4\x5f\xf5\x83\xbd\x89\xb7\x9c\x6f\x64\x6f\xe9"),
+ .iv = TEST_DATA_STR ("\xfd\x11\x99\x85\x53\x3b\xd5\x52\x0b\x30\x1d\x12"),
+ .aad = TEST_DATA_STR (
+ "\x98\xe6\x8c\x10\xbf\x4b\x5a\xe6\x2d\x43\x49\x28\xfc\x64\x05\x14\x7c\x63"
+ "\x01\x41\x73\x03\xef\x3a\x70\x3d\xcf\xd2\xc0\xc3\x39\xa4\xd0\xa8\x9b\xd2"
+ "\x9f\xe6\x1f\xec\xf1\x06\x6a\xb0\x6d\x7a\x5c\x31\xa4\x8f\xfb\xfe\xd2\x2f"
+ "\x74\x9b\x17\xe9\xbd\x0d\xc1\xc6\xf8\xfb\xd6\xfd\x45\x87\x18\x4d\xb9\x64"
+ "\xd5\x45\x61\x32\x10\x6d\x78\x23\x38\xc3\xf1\x17\xec\x05\x22\x9b\x08"
+ "\x99"),
+ .tag = TEST_DATA_STR (
+ "\xcf\x54\xe7\x14\x13\x49\xb6\x6f\x24\x81\x54\x42\x78\x10\xc8\x7a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc1) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x57\xe1\x12\xcd\x45\xf2\xc5\x7d\xdb\x81\x9e\xa6\x51\xc2\x06\x76\x31\x63"
+ "\xef\x01\x6c\xee\xad\x5c\x4e\xae\x40\xf2\xbb\xe0\xe4\xb4"),
+ .iv = TEST_DATA_STR ("\x18\x80\x22\xc2\x12\x5d\x2b\x1f\xcf\x9e\x47\x69"),
+ .aad = TEST_DATA_STR (
+ "\x09\xc8\xf4\x45\xce\x5b\x71\x46\x56\x95\xf8\x38\xc4\xbb\x2b\x00\x62\x4a"
+ "\x1c\x91\x85\xa3\xd5\x52\x54\x6d\x9d\x2e\xe4\x87\x00\x07\xaa\xf3\x00\x70"
+ "\x08\xf8\xae\x9a\xff\xb7\x58\x8b\x88\xd0\x9a\x90\xe5\x8b\x45\x7f\x88\xf1"
+ "\xe3\x75\x2e\x3f\xb9\x49\xce\x37\x86\x70\xb6\x7a\x95\xf8\xcf\x7f\x5c\x7c"
+ "\xeb\x65\x0e\xfd\x73\x5d\xbc\x65\x2c\xae\x06\xe5\x46\xa5\xdb\xd8\x61"
+ "\xbd"),
+ .tag = TEST_DATA_STR (
+ "\x9e\xfc\xdd\xfa\x0b\xe2\x15\x82\xa0\x57\x49\xf4\x05\x0d\x29\xfe"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc2) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xa4\xdd\xf3\xca\xb7\x45\x3a\xae\xfa\xd6\x16\xfd\x65\xd6\x3d\x13\x00\x5e"
+ "\x94\x59\xc1\x7d\x31\x73\xcd\x6e\xd7\xf2\xa8\x6c\x92\x1f"),
+ .iv = TEST_DATA_STR ("\x06\x17\x7b\x24\xc5\x8f\x3b\xe4\xf3\xdd\x49\x20"),
+ .aad = TEST_DATA_STR (
+ "\xf9\x5b\x04\x6d\x80\x48\x5e\x41\x1c\x56\xb8\x34\x20\x9d\x3a\xbd\x5a\x8a"
+ "\x9d\xdf\x72\xb1\xb9\x16\x67\x9a\xdf\xdd\xe8\x93\x04\x43\x15\xa5\xf4\x96"
+ "\x7f\xd0\x40\x5e\xc2\x97\xaa\x33\x2f\x67\x6f\xf0\xfa\x5b\xd7\x95\xeb\x60"
+ "\x9b\x2e\x4f\x08\x8d\xb1\xcd\xf3\x7c\xcf\xf0\x73\x5a\x5e\x53\xc4\xc1\x21"
+ "\x73\xa0\x02\x6a\xea\x42\x38\x8a\x7d\x71\x53\xa8\x83\x0b\x8a\x90\x1c"
+ "\xf9"),
+ .tag = TEST_DATA_STR (
+ "\x9d\x1b\xd8\xec\xb3\x27\x69\x06\x13\x8d\x0b\x03\xfc\xb8\xc1\xbb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc3) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x24\xa9\x2b\x24\xe8\x59\x03\xcd\x4a\xaa\xbf\xe0\x7c\x31\x0d\xf5\xa4\xf8"
+ "\xf4\x59\xe0\x3a\x63\xcb\xd1\xb4\x78\x55\xb0\x9c\x0b\xe8"),
+ .iv = TEST_DATA_STR ("\x22\xe7\x56\xdc\x89\x8d\x4c\xf1\x22\x08\x06\x12"),
+ .aad = TEST_DATA_STR (
+ "\x2e\x01\xb2\x53\x6d\xbe\x37\x6b\xe1\x44\x29\x6f\x5c\x38\xfb\x09\x9e\x00"
+ "\x8f\x96\x2b\x9f\x0e\x89\x63\x34\xb6\x40\x83\x93\xbf\xf1\x02\x0a\x0e\x44"
+ "\x24\x77\xab\xfd\xb1\x72\x72\x13\xb6\xcc\xc5\x77\xf5\xe1\x6c\xb0\x57\xc8"
+ "\x94\x5a\x07\xe3\x07\x26\x4b\x65\x97\x9a\xed\x96\xb5\x99\x5f\x40\x25\x0f"
+ "\xfb\xaa\xa1\xa1\xf0\xec\xcf\x39\x40\x15\xf6\x29\x0f\x5e\x64\xdf\xe5"
+ "\xca"),
+ .tag = TEST_DATA_STR (
+ "\x0d\x7f\x1a\xed\x47\x08\xa0\x3b\x0c\x80\xb2\xa1\x87\x85\xc9\x6d"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc4) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x15\x27\x6f\xc6\x44\x38\x57\x8e\x0e\xc5\x33\x66\xb9\x0a\x0e\x23\xd9\x39"
+ "\x10\xfe\xc1\x0d\xc3\x00\x3d\x9b\x3f\x3f\xa7\x2d\xb7\x02"),
+ .iv = TEST_DATA_STR ("\xc5\xe9\x31\x94\x6d\x5c\xae\xbc\x22\x76\x56\xd2"),
+ .aad = TEST_DATA_STR (
+ "\x3f\x96\x7c\x83\xba\x02\xe7\x7c\x14\xe9\xd4\x11\x85\xeb\x87\xf1\x72\x25"
+ "\x0e\x93\xed\xb0\xf8\x2b\x67\x42\xc1\x24\x29\x8a\xb6\x94\x18\x35\x8e\xdd"
+ "\xef\xa3\x9f\xed\xc3\xca\xde\x9d\x80\xf0\x36\xd8\x64\xa5\x9e\xad\x37\xc8"
+ "\x77\x27\xc5\x6c\x70\x1a\x8c\xd9\x63\x44\x69\xff\x31\xc7\x04\xf5\xee\x39"
+ "\x35\x41\x57\xe6\x55\x84\x67\xb9\x28\x24\xda\x36\xb1\xc0\x71\xbe\xdf"
+ "\xe9"),
+ .tag = TEST_DATA_STR (
+ "\xa0\xff\xa1\x9a\xdc\xf3\x1d\x06\x1c\xd0\xdd\x46\xd2\x40\x15\xef"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc5) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xec\x09\x80\x4a\x04\x8b\xb8\x54\xc7\x16\x18\xb5\xa3\xa1\xc5\x90\x91\x0f"
+ "\xc8\xa6\x84\x55\x13\x9b\x71\x94\x86\xd2\x28\x0e\xa5\x9a"),
+ .iv = TEST_DATA_STR ("\xd0\xb1\x24\x7e\x71\x21\xa9\x27\x6a\xc1\x8c\xa3"),
+ .aad = TEST_DATA_STR (
+ "\x66\xb1\xd3\x9d\x41\x45\x96\x30\x8e\x86\x6b\x04\x47\x6e\x05\x3b\x71\xac"
+ "\xd1\xcd\x07\xce\x80\x93\x95\x77\xeb\xbe\xac\xe0\x43\x0f\x7e\x4c\x0c\x18"
+ "\x5f\xe1\xd9\x7a\xc7\x56\x99\x50\xc8\x3d\xb4\x0b\xbe\xd0\xf1\xd1\x73\xe1"
+ "\xaa\x0d\xc2\x8b\x47\x73\x70\x50\x32\xd9\x75\x51\xf7\xfc\xef\x7f\x55\xe4"
+ "\xb6\x9f\x88\xdf\x65\x00\x32\xdf\xc5\x23\x2c\x15\x66\x41\x10\x4b\x53"
+ "\x97"),
+ .tag = TEST_DATA_STR (
+ "\x84\x40\xe6\xd8\x64\xab\x77\x8f\x9b\xe4\x78\xf2\x03\x16\x2d\x86"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc6) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x4a\xdf\x86\xbf\xa5\x47\x72\x5e\x4b\x80\x36\x5a\x5a\x32\x7c\x10\x70\x40"
+ "\xfa\xcf\xff\x00\x7d\xc3\x51\x02\x06\x6b\xd6\xa9\x95\xc4"),
+ .iv = TEST_DATA_STR ("\xb1\x01\x8c\xc3\x31\x91\x12\x55\xa5\x5a\x07\x95"),
+ .aad = TEST_DATA_STR (
+ "\x05\x3c\xa4\x42\x8c\x99\x0b\x44\x56\xd3\xc1\x89\x5d\x5d\x52\xde\xff\x67"
+ "\x58\x96\xde\x9f\xaa\x53\xd8\xcf\x24\x12\x55\xf4\xa3\x1d\xc3\x39\x9f\x15"
+ "\xd8\x3b\xe3\x80\x25\x66\x16\xe5\xaf\x04\x3a\xbf\xb3\x75\x52\x65\x5a\xdf"
+ "\x4f\x2e\x68\xdd\xa2\x4b\xc3\x73\x69\x51\x13\x4f\x35\x9d\x9c\x0e\x28\x8b"
+ "\xb7\x98\xb6\xc3\xea\x46\x23\x92\x31\xa3\xcb\x28\x00\x66\xdb\x98\x62"
+ "\xe7"),
+ .tag = TEST_DATA_STR (
+ "\xc7\x42\x4f\x38\x08\x49\x30\xbf\xc5\xed\xc1\xfc\xf1\xe7\x60\x8d"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc7) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x3c\x92\xe0\xd1\xe3\x9a\x3c\x76\x65\x73\xc4\x64\x6c\x76\x8c\x40\x2c\xcf"
+ "\xf4\x8a\x56\x68\x2a\x93\x43\x35\x12\xab\xf0\x45\x6e\x00"),
+ .iv = TEST_DATA_STR ("\xd5\x7f\x31\x9e\x59\x01\x91\x84\x1d\x2b\x98\xbd"),
+ .aad = TEST_DATA_STR (
+ "\x84\x0d\x93\x94\xaa\x24\x0e\x52\xba\x15\x21\x51\xc1\x2a\xcd\x1c\xd4\x48"
+ "\x81\xe8\x54\x9d\xc8\x32\xb7\x1a\x45\xda\x7e\xfc\xc7\x4f\xb7\xe8\x44\xd9"
+ "\xfe\xc2\x5e\x5d\x49\x7b\x8f\xb8\xf4\x7f\x32\x8c\x8d\x99\x04\x5a\x19\xe3"
+ "\x66\xe6\xce\x5e\x19\xdc\x26\xf6\x7a\x81\xa9\x4f\xa6\xc9\x7c\x31\x4d\x88"
+ "\x6e\x7b\x56\xef\xf1\x44\xc0\x9f\x6f\xa5\x19\xdb\x63\x08\xbc\x73\x42"
+ "\x2e"),
+ .tag = TEST_DATA_STR (
+ "\xcb\x4e\xf7\x2d\xbd\xa4\x91\x4d\x74\x34\xf9\x68\x6f\x82\x3e\x2f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc8) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xb6\x6b\xa3\x97\x33\x88\x8a\x9e\x0a\x2e\x30\x45\x28\x44\x16\x1d\xc3\x3c"
+ "\xb3\x83\xc0\x2c\xe1\x6c\x4e\xfa\xd5\x45\x25\x09\xb5\xb5"),
+ .iv = TEST_DATA_STR ("\x93\x7c\xb6\x65\xe3\x70\x59\xb2\xe4\x03\x59\xf2"),
+ .aad = TEST_DATA_STR (
+ "\xdb\xcd\x96\x94\xa8\x83\x48\x60\x03\x4e\x8e\xde\x3a\x5b\xd4\x19\xfc\xf9"
+ "\x1c\x00\x5a\xd9\x9f\x48\x8a\xa6\x23\xf5\x81\x62\x20\x93\xf9\xd4\x1e\x6a"
+ "\x68\xe2\x0f\xd2\x02\xf3\x02\xbc\xfc\x44\x17\xca\x89\x09\x0b\xfc\xd4\xd5"
+ "\x22\x4e\x8f\xf4\xeb\x5b\xba\xe4\xec\xb2\x7b\xaa\x23\x9f\x59\xc2\xf9\x9c"
+ "\xd4\x7c\x0a\x26\x9c\x49\x79\x06\xb4\x1a\x8f\x32\x0a\x3d\xd2\xdc\x2d"
+ "\xe2"),
+ .tag = TEST_DATA_STR (
+ "\xbd\xc8\x24\x93\x02\xd9\xd6\x66\xcf\x71\x68\x31\x7c\x11\x87\x43"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc9) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x2f\x9f\xcd\x10\x43\x45\x56\x95\x63\x8c\x99\x1a\x1b\x1d\x35\xad\x57\xc1"
+ "\x8e\xf0\x72\x73\x22\x74\x7b\x79\x91\xab\xc3\xd7\x87\xf3"),
+ .iv = TEST_DATA_STR ("\xd0\x6c\xf5\x48\xf6\x28\x69\xf4\xbe\xd7\xa3\x18"),
+ .aad = TEST_DATA_STR (
+ "\x43\x20\x23\xc1\x2c\xf1\xf6\x14\xe1\x00\x51\x12\xa1\x7d\xbe\x6c\x5d\x54"
+ "\x02\x2a\x95\xcf\x63\x35\xa5\xbc\x55\x00\x4c\x75\xf0\x9a\x56\x99\x73\x9e"
+ "\xcf\x92\x8e\x1c\x78\xd0\x3d\xad\x50\x96\xa1\x7a\x08\x4a\xfe\x1c\xc2\x20"
+ "\x41\xbb\xdf\xb5\x98\x5b\xd0\x8b\x0d\xcc\x59\xd2\xb0\x8c\xd8\x6b\x7a\xad"
+ "\x59\x7c\x4c\xd7\xb4\xba\x6d\x6a\x73\x70\xb8\x39\x95\xa6\x51\x1a\x1f"
+ "\x9e"),
+ .tag = TEST_DATA_STR (
+ "\x32\x2e\xb8\x4f\xb6\x88\x4f\x10\xcf\xb7\x66\xc2\xe3\xec\x77\x9e"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc10) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x21\xc5\x83\x9a\x63\xe1\x23\x0c\x06\xb0\x86\x34\x1c\x96\xab\x74\x58\x5e"
+ "\x69\xbc\xed\x94\x33\x2c\xae\xb1\xfa\x77\xd5\x10\xc2\x4f"),
+ .iv = TEST_DATA_STR ("\x5a\xb6\xe5\xed\x6e\xe7\x33\xbe\x72\x50\x85\x8c"),
+ .aad = TEST_DATA_STR (
+ "\xc9\x2f\x08\xe3\x0f\x67\xd4\x25\x16\x13\x3c\x48\xe9\x7b\x65\xcc\x9e\x12"
+ "\x43\x65\xe1\x10\xab\xa5\xe7\xb2\xcb\xe8\x3d\xeb\xcc\x99\xed\xf4\xeb\x00"
+ "\x07\xaf\x05\x2b\xda\x22\xd8\x59\x00\x27\x1b\x18\x97\xaf\x4f\xd9\xac\xe6"
+ "\xa2\xd0\x9d\x98\x4a\xc3\xde\x79\xd0\x5d\xe0\xb1\x05\xa8\x1b\x12\x54\x2b"
+ "\x2c\x48\xe2\x7d\x40\x9f\xd6\x99\x2d\xd0\x62\xd6\x05\x5d\x6f\xc6\x68"
+ "\x42"),
+ .tag = TEST_DATA_STR (
+ "\x53\xb0\xe4\x50\x30\x9d\x14\x64\x59\xf2\xa1\xe4\x6c\x9d\x9e\x23"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc11) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x25\xa1\x44\xf0\xfd\xba\x18\x41\x25\xd8\x1a\x87\xe7\xed\x82\xfa\xd3\x3c"
+ "\x70\x1a\x09\x4a\x67\xa8\x1f\xe4\x69\x2d\xc6\x9a\xfa\x31"),
+ .iv = TEST_DATA_STR ("\x8b\xf5\x75\xc5\xc2\xb4\x5b\x4e\xfc\x67\x46\xe4"),
+ .aad = TEST_DATA_STR (
+ "\x2a\x36\x7c\xb0\xd3\xb7\xc5\xb8\x32\x0b\x3c\xf9\x5e\x82\xb6\xba\x0b\xba"
+ "\x1d\x09\xa2\x05\x58\x85\xde\xdd\x9e\xf5\x64\x16\x23\x68\x22\x12\x10\x32"
+ "\x38\xb8\xf7\x75\xcc\xe4\x2d\xdf\xd4\xf6\x63\x82\xf2\xc3\xa5\xe8\xd6\xdf"
+ "\xf9\x16\x3c\xed\x83\x58\x0a\x75\x70\x55\x74\x02\x6b\x55\xdb\x90\xf7\x5f"
+ "\x8a\xbb\x30\x14\xc9\xa7\x07\x02\x1d\xed\xc0\x75\xda\x38\xbe\xbb\xf0"
+ "\xa0"),
+ .tag = TEST_DATA_STR (
+ "\x0e\x2c\xe9\xca\xc8\xdf\xce\xdb\x05\x72\xec\x6c\xab\x62\x1e\xfd"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc12) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x42\xbc\x84\x1b\x3b\x03\xa8\x07\xcd\x36\x6a\x35\xec\xec\x8a\x6a\xeb\xef"
+ "\x7c\x4c\xba\x0e\xc8\xcb\x8d\xa0\xda\x41\xdf\x8c\xce\xf1"),
+ .iv = TEST_DATA_STR ("\x1b\xd4\x6f\x85\xdf\x5f\x4b\x3a\x12\x6e\xe3\x15"),
+ .aad = TEST_DATA_STR (
+ "\xed\xe3\xdc\xdd\xbd\xc7\xd8\xe5\xd0\x34\xc0\x16\x61\x33\x2e\xc3\x49\xcb"
+ "\x4e\x7a\x9f\xba\xaf\x7a\xbe\x2c\x64\x75\x87\xdb\x86\xcd\x42\x7c\xe6\x69"
+ "\x08\xe0\x70\xbc\x49\xef\x83\x87\x47\xe0\x6b\x45\xac\x48\x6d\xfb\xea\x6f"
+ "\x86\x98\xb4\x62\x5e\x21\xe6\x9d\xb8\x32\x7e\xc0\x5c\xfd\x74\xac\xcb\xe6"
+ "\x7a\xb6\x44\x94\x8c\xdb\x55\x4a\xf1\x79\xa1\xe2\x64\xe0\x8f\xe1\x66"
+ "\x41"),
+ .tag = TEST_DATA_STR (
+ "\x63\x3a\xb6\xaa\xf5\xb3\x2b\x53\xa7\x94\xf6\xbe\x62\x62\xfc\x5f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc13) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xc2\x5b\x85\x00\xbe\x73\x21\x05\x96\xfc\x4a\x9f\xb4\xd8\x4d\x1a\x33\x79"
+ "\xa9\x1e\x3f\x0a\x6c\xc4\x17\x7d\x99\x60\x46\x62\x76\x79"),
+ .iv = TEST_DATA_STR ("\xb5\x6c\x48\xc0\xc4\xcd\x31\x8b\x20\x43\x70\x02"),
+ .aad = TEST_DATA_STR (
+ "\xbc\xd1\x4d\xd0\x43\xfd\xc8\xc3\x27\x95\x7e\x1c\x14\x28\x69\x85\x43\xec"
+ "\x86\x02\x52\x1a\x7c\x74\x78\x8d\x29\x6d\x37\xd4\x82\x8f\x10\xf9\x06\x56"
+ "\x88\x3d\x25\x31\xc7\x02\xeb\xda\x2d\xc0\xa6\x8d\xab\x00\x15\x45\x77\x45"
+ "\x44\x55\xfa\xd9\x86\xff\x8e\x09\x73\x09\x8d\xbf\x37\x0f\xf7\x03\xed\x98"
+ "\x22\x2b\x94\x57\x26\xed\x9b\xe7\x90\x92\x10\xdd\xbc\x67\x2e\x99\xfd"
+ "\xd9"),
+ .tag = TEST_DATA_STR (
+ "\x81\x71\xd4\xff\x60\xfe\x7e\xf6\xde\x02\x88\x32\x6a\xa7\x32\x23"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc14) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xdd\x95\x25\x9b\xc8\xee\xfa\x3e\x49\x3c\xb1\xa6\xba\x1d\x8e\xe2\xb3\x41"
+ "\xd5\x23\x0d\x50\x36\x30\x94\xa2\xcc\x34\x33\xb3\xd9\xb9"),
+ .iv = TEST_DATA_STR ("\xa1\xa6\xce\xd0\x84\xf4\xf1\x39\x90\x75\x0a\x9e"),
+ .aad = TEST_DATA_STR (
+ "\xd4\x6d\xb9\x0e\x13\x68\x4b\x26\x14\x9c\xb3\xb7\xf7\x76\xe2\x28\xa0\x53"
+ "\x8f\xa1\x89\x2c\x41\x8a\xaa\xd0\x7a\xa0\x8d\x30\x76\xf4\xa5\x2b\xee\x8f"
+ "\x13\x0f\xf5\x60\xdb\x2b\x8d\x10\x09\xe9\x26\x0f\xa6\x23\x3f\xc2\x27\x33"
+ "\xe0\x50\xc9\xe4\xf7\xcc\x69\x90\x62\x76\x5e\x26\x1d\xff\xff\x11\x59\xe9"
+ "\x06\x0b\x26\xc8\x06\x5d\xfa\xb0\x40\x55\xb5\x8c\x82\xc3\x40\xd9\x87"
+ "\xc9"),
+ .tag = TEST_DATA_STR (
+ "\x9e\x12\x0b\x01\x89\x9f\xe2\xcb\x3e\x3a\x0b\x0c\x05\x04\x59\x40"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/unittest/crypto/chacha20_poly1305.c b/src/plugins/unittest/crypto/chacha20_poly1305.c
index 650898524fd..740d6304b8f 100644
--- a/src/plugins/unittest/crypto/chacha20_poly1305.c
+++ b/src/plugins/unittest/crypto/chacha20_poly1305.c
@@ -61,7 +61,6 @@ static u8 tc1_ciphertext[] = {
0x61, 0x16
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (chacha20_poly1305_tc1) = {
.name = "CHACHA20-POLY1305 TC1",
.alg = VNET_CRYPTO_ALG_CHACHA20_POLY1305,
@@ -72,7 +71,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (chacha20_poly1305_tc1) = {
.plaintext = TEST_DATA (tc1_plaintext),
.ciphertext = TEST_DATA (tc1_ciphertext),
};
-/* *INDENT-ON* */
static u8 tc2_key[] = {
0x2d, 0xb0, 0x5d, 0x40, 0xc8, 0xed, 0x44, 0x88,
@@ -100,7 +98,6 @@ static u8 tc2_plaintext[] = { };
static u8 tc2_ciphertext[] = { };
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (chacha20_poly1305_tc2) = {
.name = "CHACHA20-POLY1305 TC2",
.alg = VNET_CRYPTO_ALG_CHACHA20_POLY1305,
@@ -111,7 +108,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (chacha20_poly1305_tc2) = {
.plaintext = TEST_DATA (tc2_plaintext),
.ciphertext = TEST_DATA (tc2_ciphertext),
};
-/* *INDENT-ON* */
static u8 tc3_key[] = {
0x4c, 0xf5, 0x96, 0x83, 0x38, 0xe6, 0xae, 0x7f,
@@ -137,7 +133,6 @@ static u8 tc3_plaintext[] = { };
static u8 tc3_ciphertext[] = { };
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (chacha20_poly1305_tc3) = {
.name = "CHACHA20-POLY1305 TC3",
.alg = VNET_CRYPTO_ALG_CHACHA20_POLY1305,
@@ -148,5 +143,4 @@ UNITTEST_REGISTER_CRYPTO_TEST (chacha20_poly1305_tc3) = {
.plaintext = TEST_DATA (tc3_plaintext),
.ciphertext = TEST_DATA (tc3_ciphertext),
};
-/* *INDENT-ON* */
diff --git a/src/plugins/unittest/crypto/crypto.h b/src/plugins/unittest/crypto/crypto.h
index 5e09a3ab0ec..90f75dbcfac 100644
--- a/src/plugins/unittest/crypto/crypto.h
+++ b/src/plugins/unittest/crypto/crypto.h
@@ -61,6 +61,10 @@ typedef struct
extern crypto_test_main_t crypto_test_main;
#define TEST_DATA(n) { .data = (u8 *) n, .length = sizeof (n)}
+#define TEST_DATA_STR(n) \
+ { \
+ .data = (u8 *) n, .length = sizeof (n) - 1 \
+ }
#define TEST_DATA_CHUNK(s,off,n) { .data = (u8 *) s + off, .length = n}
#define UNITTEST_REGISTER_CRYPTO_TEST(x) \
diff --git a/src/plugins/unittest/crypto/rfc2202_hmac_md5.c b/src/plugins/unittest/crypto/rfc2202_hmac_md5.c
index 7a39aed3030..c9604b84c1d 100644
--- a/src/plugins/unittest/crypto/rfc2202_hmac_md5.c
+++ b/src/plugins/unittest/crypto/rfc2202_hmac_md5.c
@@ -31,7 +31,6 @@ static u8 md5_tc1_digest[] = {
0x13, 0xf4, 0x8e, 0xf8, 0x15, 0x8b, 0xfc, 0x9d
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc1) = {
.name = "RFC2202 HMAC-MD5 TC1",
.alg = VNET_CRYPTO_ALG_HMAC_MD5,
@@ -39,7 +38,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc1) = {
.plaintext = TEST_DATA (md5_tc1_data),
.digest = TEST_DATA (md5_tc1_digest),
};
-/* *INDENT-ON* */
static char md5_tc2_key[4] = "Jefe";
@@ -50,7 +48,6 @@ static u8 md5_tc2_digest[] = {
0xea, 0xa8, 0x6e, 0x31, 0x0a, 0x5d, 0xb7, 0x38,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc2) = {
.name = "RFC2202 HMAC-MD5 TC2",
.alg = VNET_CRYPTO_ALG_HMAC_MD5,
@@ -58,7 +55,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc2) = {
.plaintext = TEST_DATA (md5_tc2_data),
.digest = TEST_DATA (md5_tc2_digest),
};
-/* *INDENT-ON* */
static char md5_tc3_key[16] = {
0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
@@ -80,7 +76,6 @@ static u8 md5_tc3_digest[] = {
0xdb, 0xb8, 0xc7, 0x33, 0xf0, 0xe8, 0xb3, 0xf6,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc3) = {
.name = "RFC2202 HMAC-MD5 TC3",
.alg = VNET_CRYPTO_ALG_HMAC_MD5,
@@ -88,7 +83,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc3) = {
.plaintext = TEST_DATA (md5_tc3_data),
.digest = TEST_DATA (md5_tc3_digest),
};
-/* *INDENT-ON* */
static u8 md5_tc4_key[25] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
@@ -112,7 +106,6 @@ static u8 md5_tc4_digest[] = {
0x3a, 0x75, 0x16, 0x47, 0x46, 0xff, 0xaa, 0x79,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc4) = {
.name = "RFC2202 HMAC-MD5 TC4",
.alg = VNET_CRYPTO_ALG_HMAC_MD5,
@@ -120,7 +113,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc4) = {
.plaintext = TEST_DATA (md5_tc4_data),
.digest = TEST_DATA (md5_tc4_digest),
};
-/* *INDENT-ON* */
static u8 md5_tc5_key[16] = {
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
@@ -134,7 +126,6 @@ static u8 md5_tc5_digest[] = {
0xf9, 0xba, 0xb9, 0x95, 0x69, 0x0e, 0xfd, 0x4c,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc5) = {
.name = "RFC2202 HMAC-MD5 TC5",
.alg = VNET_CRYPTO_ALG_HMAC_MD5,
@@ -142,7 +133,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc5) = {
.plaintext = TEST_DATA (md5_tc5_data),
.digest = TEST_DATA (md5_tc5_digest),
};
-/* *INDENT-ON* */
static u8 md5_tc6_key[80] = {
0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
@@ -165,7 +155,6 @@ static u8 md5_tc6_digest[] = {
0x0b, 0x62, 0xe6, 0xce, 0x61, 0xb9, 0xd0, 0xcd,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc6) = {
.name = "RFC2202 HMAC-MD5 TC6",
.alg = VNET_CRYPTO_ALG_HMAC_MD5,
@@ -173,7 +162,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc6) = {
.plaintext = TEST_DATA (md5_tc6_data),
.digest = TEST_DATA (md5_tc6_digest),
};
-/* *INDENT-ON* */
static char md5_tc7_data[73] =
"Test Using Larger Than Block-Size Key and Larger Than One Block-Size Data";
@@ -183,7 +171,6 @@ static u8 md5_tc7_digest[] = {
0x1f, 0xb1, 0xf5, 0x62, 0xdb, 0x3a, 0xa5, 0x3e,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc7) = {
.name = "RFC2202 HMAC-MD5 TC7",
.alg = VNET_CRYPTO_ALG_HMAC_MD5,
@@ -203,7 +190,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc7_chained) = {
TEST_DATA_CHUNK (md5_tc7_data, 40, 33)
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/crypto/rfc2202_hmac_sha1.c b/src/plugins/unittest/crypto/rfc2202_hmac_sha1.c
index 2513c5ebad2..aa440625cc6 100644
--- a/src/plugins/unittest/crypto/rfc2202_hmac_sha1.c
+++ b/src/plugins/unittest/crypto/rfc2202_hmac_sha1.c
@@ -33,7 +33,6 @@ static u8 sha1_tc1_digest[] = {
0xf1, 0x46, 0xbe, 0x00
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc1) = {
.name = "RFC2202 HMAC-SHA-1 TC1",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -41,7 +40,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc1) = {
.plaintext = TEST_DATA (sha1_tc1_data),
.digest = TEST_DATA (sha1_tc1_digest),
};
-/* *INDENT-ON* */
static char sha1_tc2_key[4] = "Jefe";
@@ -53,7 +51,6 @@ static u8 sha1_tc2_digest[] = {
0x25, 0x9a, 0x7c, 0x79
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc2) = {
.name = "RFC2202 HMAC-SHA-1 TC2",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -61,7 +58,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc2) = {
.plaintext = TEST_DATA (sha1_tc2_data),
.digest = TEST_DATA (sha1_tc2_digest),
};
-/* *INDENT-ON* */
static u8 sha1_tc3_key[20] = {
0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
@@ -85,7 +81,6 @@ static u8 sha1_tc3_digest[] = {
0x63, 0xf1, 0x75, 0xd3,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc3) = {
.name = "RFC2202 HMAC-SHA-1 TC3",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -93,7 +88,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc3) = {
.plaintext = TEST_DATA (sha1_tc3_data),
.digest = TEST_DATA (sha1_tc3_digest),
};
-/* *INDENT-ON* */
static u8 sha1_tc4_key[25] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
@@ -118,7 +112,6 @@ static u8 sha1_tc4_digest[] = {
0x2d, 0x72, 0x35, 0xda,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc4) = {
.name = "RFC2202 HMAC-SHA-1 TC4",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -126,7 +119,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc4) = {
.plaintext = TEST_DATA (sha1_tc4_data),
.digest = TEST_DATA (sha1_tc4_digest),
};
-/* *INDENT-ON* */
static u8 sha1_tc5_key[20] = {
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
@@ -142,7 +134,6 @@ static u8 sha1_tc5_digest[] = {
0x4a, 0x9a, 0x5a, 0x04
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc5) = {
.name = "RFC2202 HMAC-SHA-1 TC5",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -150,14 +141,12 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc5) = {
.plaintext = TEST_DATA (sha1_tc5_data),
.digest = TEST_DATA (sha1_tc5_digest),
};
-/* *INDENT-ON* */
static u8 sha1_tc5_digest_96[12] = {
0x4c, 0x1a, 0x03, 0x42, 0x4b, 0x55, 0xe0, 0x7f,
0xe7, 0xf2, 0x7b, 0xe1
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc5_trunc) = {
.name = "RFC2202 HMAC-SHA-1-96 TC5-trunc",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -165,7 +154,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc5_trunc) = {
.plaintext = TEST_DATA (sha1_tc5_data),
.digest = TEST_DATA (sha1_tc5_digest_96),
};
-/* *INDENT-ON* */
static u8 sha1_tc6_key[80] = {
0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
@@ -189,7 +177,6 @@ static u8 sha1_tc6_digest[] = {
0xed, 0x40, 0x21, 0x12
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc6) = {
.name = "RFC2202 HMAC-SHA-1 TC6",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -197,7 +184,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc6) = {
.plaintext = TEST_DATA (sha1_tc6_data),
.digest = TEST_DATA (sha1_tc6_digest),
};
-/* *INDENT-ON* */
static char sha1_tc7_data[73] =
"Test Using Larger Than Block-Size Key and Larger Than One Block-Size Data";
@@ -208,7 +194,6 @@ static u8 sha1_tc7_digest[20] = {
0xbb, 0xff, 0x1a, 0x91
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc7) = {
.name = "RFC2202 HMAC-SHA-1 TC7",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -237,7 +222,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc7_inc) = {
.key.length = 80,
.digest.length = 12,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/crypto/rfc4231.c b/src/plugins/unittest/crypto/rfc4231.c
index 127e1bfe521..edd502e0609 100644
--- a/src/plugins/unittest/crypto/rfc4231.c
+++ b/src/plugins/unittest/crypto/rfc4231.c
@@ -61,7 +61,6 @@ static u8 tc1_digest_sha512[] = {
0x2e, 0x69, 0x6c, 0x20, 0x3a, 0x12, 0x68, 0x54
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc1_sha224) = {
.name = "RFC4231 TC1",
.alg = VNET_CRYPTO_ALG_HMAC_SHA224,
@@ -93,7 +92,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc1_sha512) = {
.plaintext = TEST_DATA (tc1_data),
.digest = TEST_DATA (tc1_digest_sha512),
};
-/* *INDENT-ON* */
static char tc2_key[4] = "Jefe";
@@ -133,7 +131,6 @@ static u8 tc2_digest_sha512[] = {
0x63, 0x6e, 0x07, 0x0a, 0x38, 0xbc, 0xe7, 0x37,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc2_sha224) = {
.name = "RFC4231 TC2",
.alg = VNET_CRYPTO_ALG_HMAC_SHA224,
@@ -165,7 +162,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc2_sha512) = {
.plaintext = TEST_DATA (tc2_data),
.digest = TEST_DATA (tc2_digest_sha512),
};
-/* *INDENT-ON* */
static u8 tc3_key[20] = {
0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
@@ -217,7 +213,6 @@ static u8 tc3_digest_sha512[] = {
0x74, 0x27, 0x88, 0x59, 0xe1, 0x32, 0x92, 0xfb
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc3_sha224) = {
.name = "RFC4231 TC3",
.alg = VNET_CRYPTO_ALG_HMAC_SHA224,
@@ -249,7 +244,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc3_sha512) = {
.plaintext = TEST_DATA (tc3_data),
.digest = TEST_DATA (tc3_digest_sha512),
};
-/* *INDENT-ON* */
static u8 tc4_key[25] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
@@ -302,7 +296,6 @@ static u8 tc4_digest_sha512[] = {
0xe2, 0xad, 0xeb, 0xeb, 0x10, 0xa2, 0x98, 0xdd
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc4_sha224) = {
.name = "RFC4231 TC4",
.alg = VNET_CRYPTO_ALG_HMAC_SHA224,
@@ -334,7 +327,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc4_sha512) = {
.plaintext = TEST_DATA (tc4_data),
.digest = TEST_DATA (tc4_digest_sha512),
};
-/* *INDENT-ON* */
static u8 tc5_key[20] = {
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
@@ -368,7 +360,6 @@ static u8 tc5_digest_sha512[16] = {
0x1d, 0x41, 0x79, 0xbc, 0x89, 0x1d, 0x87, 0xa6
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc5_sha224) = {
.name = "RFC4231 TC5",
.alg = VNET_CRYPTO_ALG_HMAC_SHA224,
@@ -400,7 +391,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc5_sha512) = {
.plaintext = TEST_DATA (tc5_data),
.digest = TEST_DATA (tc5_digest_sha512),
};
-/* *INDENT-ON* */
static u8 tc6_key[131] = {
0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
@@ -459,7 +449,6 @@ static u8 tc6_digest_sha512[] = {
0x8b, 0x91, 0x5a, 0x98, 0x5d, 0x78, 0x65, 0x98
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc6_sha224) = {
.name = "RFC4231 TC6",
.alg = VNET_CRYPTO_ALG_HMAC_SHA224,
@@ -491,7 +480,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc6_sha512) = {
.plaintext = TEST_DATA (tc6_data),
.digest = TEST_DATA (tc6_digest_sha512),
};
-/* *INDENT-ON* */
static u8 tc7_key[131] = {
0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
@@ -552,7 +540,6 @@ static u8 tc7_digest_sha512[] = {
0x65, 0xc9, 0x74, 0x40, 0xfa, 0x8c, 0x6a, 0x58
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc7_sha224) = {
.name = "RFC4231 TC7",
.alg = VNET_CRYPTO_ALG_HMAC_SHA224,
@@ -598,7 +585,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc7_sha512_chain) = {
TEST_DATA_CHUNK (tc7_data, 150, 2),
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/crypto_test.c b/src/plugins/unittest/crypto_test.c
index ed21e86b8d7..4bc06f71c79 100644
--- a/src/plugins/unittest/crypto_test.c
+++ b/src/plugins/unittest/crypto_test.c
@@ -139,8 +139,7 @@ print_results (vlib_main_t * vm, unittest_crypto_test_registration_t ** rv,
if (vec_len (err))
fail = 1;
- vlib_cli_output (vm, "%-60v%s%v", s, vec_len (err) ? "FAIL: " : "OK",
- err);
+ vlib_cli_output (vm, "%-65v%s%v", s, vec_len (err) ? "FAIL: " : "OK", err);
if (tm->verbose)
{
if (tm->verbose == 2)
@@ -455,7 +454,6 @@ test_crypto_static (vlib_main_t * vm, crypto_test_main_t * tm,
current_op = ops;
current_chained_op = chained_ops;
- /* *INDENT-OFF* */
vec_foreach_index (i, rv)
{
r = rv[i];
@@ -645,7 +643,6 @@ test_crypto_static (vlib_main_t * vm, crypto_test_main_t * tm,
op->user_data = i;
}
}
- /* *INDENT-ON* */
vnet_crypto_process_ops (vm, ops, vec_len (ops));
vnet_crypto_process_chained_ops (vm, chained_ops, chunks,
@@ -671,10 +668,8 @@ test_crypto_get_key_sz (vnet_crypto_alg_t alg)
#define _(n, s, l) \
case VNET_CRYPTO_ALG_##n: \
return l;
- /* *INDENT-OFF* */
foreach_crypto_cipher_alg
foreach_crypto_aead_alg
- /* *INDENT-ON* */
#undef _
case VNET_CRYPTO_ALG_HMAC_MD5:
case VNET_CRYPTO_ALG_HMAC_SHA1:
@@ -857,7 +852,7 @@ test_crypto_perf (vlib_main_t * vm, crypto_test_main_t * tm)
vnet_crypto_op_t *ops1 = 0, *ops2 = 0, *op1, *op2;
vnet_crypto_alg_data_t *ad = vec_elt_at_index (cm->algs, tm->alg);
vnet_crypto_key_index_t key_index = ~0;
- u8 key[32];
+ u8 key[64];
int buffer_size = vlib_buffer_get_default_data_size (vm);
u64 seed = clib_cpu_time_now ();
u64 t0[5], t1[5], t2[5], n_bytes = 0;
@@ -925,7 +920,6 @@ test_crypto_perf (vlib_main_t * vm, crypto_test_main_t * tm)
ad->op_by_type[VNET_CRYPTO_OP_TYPE_ENCRYPT]);
vnet_crypto_op_init (op2,
ad->op_by_type[VNET_CRYPTO_OP_TYPE_DECRYPT]);
- op1->flags = VNET_CRYPTO_OP_FLAG_INIT_IV;
op1->src = op2->src = op1->dst = op2->dst = b->data;
op1->key_index = op2->key_index = key_index;
op1->iv = op2->iv = b->data - 64;
@@ -1062,14 +1056,12 @@ test_crypto_command_fn (vlib_main_t * vm,
return test_crypto (vm, tm);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_crypto_command, static) =
{
.path = "test crypto",
.short_help = "test crypto",
.function = test_crypto_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
crypto_test_init (vlib_main_t * vm)
diff --git a/src/plugins/unittest/fib_test.c b/src/plugins/unittest/fib_test.c
index 76b675bca83..fbac809d726 100644
--- a/src/plugins/unittest/fib_test.c
+++ b/src/plugins/unittest/fib_test.c
@@ -142,28 +142,21 @@ fib_test_mk_intf (u32 ninterfaces)
for (i = 0; i < ninterfaces; i++)
{
- hw_address[5] = i;
+ vnet_eth_interface_registration_t eir = {};
+ vnet_main_t *vnm = vnet_get_main();
- error = ethernet_register_interface(vnet_get_main(),
- test_interface_device_class.index,
- i /* instance */,
- hw_address,
- &tm->hw_if_indicies[i],
- /* flag change */ 0);
+ hw_address[5] = i;
- FIB_TEST((NULL == error), "ADD interface %d", i);
+ eir.dev_class_index = test_interface_device_class.index;
+ eir.dev_instance = i;
+ eir.address = hw_address;
+ tm->hw_if_indicies[i] = vnet_eth_register_interface (vnm, &eir);
error = vnet_hw_interface_set_flags(vnet_get_main(),
tm->hw_if_indicies[i],
VNET_HW_INTERFACE_FLAG_LINK_UP);
tm->hw[i] = vnet_get_hw_interface(vnet_get_main(),
tm->hw_if_indicies[i]);
- vec_validate (ip4_main.fib_index_by_sw_if_index,
- tm->hw[i]->sw_if_index);
- vec_validate (ip6_main.fib_index_by_sw_if_index,
- tm->hw[i]->sw_if_index);
- ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
- ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
error = vnet_sw_interface_set_flags(vnet_get_main(),
tm->hw[i]->sw_if_index,
@@ -788,6 +781,69 @@ fib_test_validate_entry (fib_node_index_t fei,
}
static int
+fib_test_multipath_v4 (const test_main_t *tm, const u32 fib_index,
+ const fib_prefix_t *pfx, const int n_paths,
+ const int expected_n_buckets)
+{
+ const int path_list_pool_size = fib_path_list_pool_size();
+ const int path_list_db_size = fib_path_list_db_size();
+ const int entry_pool_size = fib_entry_pool_size();
+ fib_route_path_t *r_paths = NULL;
+ const load_balance_t *lb;
+ const dpo_id_t *dpo;
+ u32 fei;
+ int res = 0;
+ int i;
+
+ for (i = 0; i < n_paths; i++)
+ {
+ fib_route_path_t r_path = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + i),
+ },
+ .frp_sw_if_index = tm->hw[0]->sw_if_index,
+ .frp_weight = 1,
+ .frp_fib_index = ~0,
+ .frp_flags = FIB_ROUTE_PATH_ATTACHED,
+ };
+ vec_add1(r_paths, r_path);
+ }
+
+ fib_table_entry_update(fib_index,
+ pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ r_paths);
+
+ fei = fib_table_lookup_exact_match(fib_index, pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "prefix present");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ lb = load_balance_get(dpo->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == expected_n_buckets),
+ "prefix lb over %d paths", lb->lb_n_buckets);
+
+ fib_table_entry_delete(fib_index,
+ pfx,
+ FIB_SOURCE_API);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, pfx), "prefix removed");
+ vec_free(r_paths);
+
+ /*
+ * add-remove test. no change.
+ */
+ FIB_TEST((path_list_db_size == fib_path_list_db_size()),
+ "path list DB population:%d", fib_path_list_db_size());
+ FIB_TEST((path_list_pool_size == fib_path_list_pool_size()),
+ "path list pool size is %d", fib_path_list_pool_size());
+ FIB_TEST((entry_pool_size == fib_entry_pool_size()),
+ "entry pool size is %d", fib_entry_pool_size());
+ return res;
+}
+
+static int
fib_test_v4 (void)
{
/*
@@ -826,9 +882,7 @@ fib_test_v4 (void)
FIB_SOURCE_API);
for (ii = 0; ii < 4; ii++)
- {
- ip4_main.fib_index_by_sw_if_index[tm->hw[ii]->sw_if_index] = fib_index;
- }
+ fib_table_bind (FIB_PROTOCOL_IP4, tm->hw[ii]->sw_if_index, fib_index);
fib_prefix_t pfx_0_0_0_0_s_0 = {
.fp_len = 0,
@@ -3623,52 +3677,26 @@ fib_test_v4 (void)
/*
* A route with multiple paths at once
*/
- fib_route_path_t *r_paths = NULL;
-
- for (ii = 0; ii < 4; ii++)
- {
- fib_route_path_t r_path = {
- .frp_proto = DPO_PROTO_IP4,
- .frp_addr = {
- .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + ii),
- },
- .frp_sw_if_index = tm->hw[0]->sw_if_index,
- .frp_weight = 1,
- .frp_fib_index = ~0,
- };
- vec_add1(r_paths, r_path);
- }
-
- fib_table_entry_update(fib_index,
- &pfx_4_4_4_4_s_32,
- FIB_SOURCE_API,
- FIB_ENTRY_FLAG_NONE,
- r_paths);
-
- fei = fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32);
- FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.4.4.4/32 present");
- dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(0 ==
+ fib_test_multipath_v4(tm, fib_index, &pfx_4_4_4_4_s_32, 4, 4),
+ "multipath with 4 nexthops");
- lb = load_balance_get(dpo->dpoi_index);
- FIB_TEST((lb->lb_n_buckets == 4), "4.4.4.4/32 lb over %d paths", lb->lb_n_buckets);
-
- fib_table_entry_delete(fib_index,
- &pfx_4_4_4_4_s_32,
- FIB_SOURCE_API);
- FIB_TEST(FIB_NODE_INDEX_INVALID ==
- fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
- "4.4.4.4/32 removed");
- vec_free(r_paths);
+ /*
+ * A route with lots of multiple paths that will overflow max supported
+ * lb buckets because of normalization
+ */
+ FIB_TEST(0 ==
+ fib_test_multipath_v4(tm, fib_index, &pfx_4_4_4_4_s_32,
+ LB_MAX_BUCKETS / 2 + 23, LB_MAX_BUCKETS),
+ "multipath with too many nexthops");
/*
- * add-remove test. no change.
+ * A route with more paths than max supported lb buckets
*/
- FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
- fib_path_list_db_size());
- FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
- fib_path_list_pool_size());
- FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
- fib_entry_pool_size());
+ FIB_TEST(0 ==
+ fib_test_multipath_v4 (tm, fib_index, &pfx_4_4_4_4_s_32,
+ LB_MAX_BUCKETS + 13, LB_MAX_BUCKETS),
+ "multipath with too many nexthops");
/*
* A route deag route
@@ -3707,7 +3735,6 @@ fib_test_v4 (void)
FIB_TEST(FIB_NODE_INDEX_INVALID ==
fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
"4.4.4.4/32 removed");
- vec_free(r_paths);
/*
* A route deag route in a source lookup table
@@ -3746,7 +3773,6 @@ fib_test_v4 (void)
FIB_TEST(FIB_NODE_INDEX_INVALID ==
fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
"4.4.4.4/32 removed");
- vec_free(r_paths);
/*
* add-remove test. no change.
@@ -4397,6 +4423,9 @@ fib_test_v4 (void)
FIB_SOURCE_INTERFACE)),
"NO INterface Source'd prefixes");
+ for (ii = 0; ii < 4; ii++)
+ fib_table_bind (FIB_PROTOCOL_IP4, tm->hw[ii]->sw_if_index, 0);
+
fib_table_unlock(fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_API);
FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
@@ -4455,9 +4484,7 @@ fib_test_v6 (void)
FIB_SOURCE_API);
for (ii = 0; ii < 4; ii++)
- {
- ip6_main.fib_index_by_sw_if_index[tm->hw[ii]->sw_if_index] = fib_index;
- }
+ fib_table_bind (FIB_PROTOCOL_IP6, tm->hw[ii]->sw_if_index, fib_index);
fib_prefix_t pfx_0_0 = {
.fp_len = 0,
@@ -5176,12 +5203,11 @@ fib_test_v6 (void)
/*
* Add the interface back. routes stay unresolved.
*/
- error = ethernet_register_interface(vnet_get_main(),
- test_interface_device_class.index,
- 0 /* instance */,
- hw_address,
- &tm->hw_if_indicies[0],
- /* flag change */ 0);
+ vnet_eth_interface_registration_t eir = {};
+ eir.dev_class_index = test_interface_device_class.index;
+ eir.dev_instance = 0;
+ eir.address = hw_address;
+ tm->hw_if_indicies[0] = vnet_eth_register_interface (vnet_get_main(), &eir);
fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64);
FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
@@ -5276,6 +5302,10 @@ fib_test_v6 (void)
/*
* now remove the VRF
*/
+
+ for (ii = 0; ii < 4; ii++)
+ fib_table_bind (FIB_PROTOCOL_IP6, tm->hw[ii]->sw_if_index, 0);
+
fib_table_unlock(fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_API);
FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
@@ -5314,14 +5344,12 @@ fib_test_ae (void)
{
const dpo_id_t *dpo, *dpo_drop;
const u32 fib_index = 0;
- fib_node_index_t fei;
+ fib_node_index_t dfrt, fei;
test_main_t *tm;
- ip4_main_t *im;
int res;
res = 0;
tm = &test_main;
- im = &ip4_main;
FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
adj_nbr_db_size());
@@ -5341,8 +5369,7 @@ fib_test_ae (void)
},
};
- vec_validate(im->fib_index_by_sw_if_index, tm->hw[0]->sw_if_index);
- im->fib_index_by_sw_if_index[tm->hw[0]->sw_if_index] = fib_index;
+ fib_table_bind (FIB_PROTOCOL_IP4, tm->hw[0]->sw_if_index, fib_index);
dpo_drop = drop_dpo_get(DPO_PROTO_IP4);
@@ -5415,6 +5442,44 @@ fib_test_ae (void)
import_fib_index1 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4,
11,
FIB_SOURCE_CLI);
+ /*
+ * Add default route in the import FIB
+ */
+ fib_prefix_t pfx_0_0_0_0_s_0 = {
+ .fp_len = 0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ {0}
+ },
+ },
+ };
+
+ dfrt = fib_table_lookup(import_fib_index1, &pfx_0_0_0_0_s_0);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != dfrt), "default route present");
+
+ fib_table_entry_path_add(import_fib_index1,
+ &pfx_0_0_0_0_s_0,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_0_0_0_0_s_0);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "default route present");
+ FIB_TEST((fei != dfrt), "default route added");
+
+ /*
+ * delete default route and check for the presence in the import table
+ */
+ fib_table_entry_delete(import_fib_index1, &pfx_0_0_0_0_s_0, FIB_SOURCE_API);
+ fei = fib_table_lookup(import_fib_index1, &pfx_0_0_0_0_s_0);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "default route present");
+ FIB_TEST((fei == dfrt), "default route removed");
/*
* Add an attached route in the import FIB
@@ -5871,11 +5936,9 @@ static int
fib_test_pref (void)
{
test_main_t *tm;
- ip4_main_t *im;
int res, i;
tm = &test_main;
- im = &ip4_main;
res = 0;
const fib_prefix_t pfx_1_1_1_1_s_32 = {
@@ -5888,10 +5951,8 @@ fib_test_pref (void)
},
};
- vec_validate(im->fib_index_by_sw_if_index, tm->hw[2]->sw_if_index);
-
for (i = 0; i <= 2; i++)
- im->fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+ fib_table_bind (FIB_PROTOCOL_IP4, tm->hw[i]->sw_if_index, 0);
/*
* 2 high, 2 medium and 2 low preference non-recursive paths
@@ -6340,12 +6401,10 @@ fib_test_label (void)
const u32 fib_index = 0;
int lb_count, ii, res;
test_main_t *tm;
- ip4_main_t *im;
res = 0;
lb_count = pool_elts(load_balance_pool);
tm = &test_main;
- im = &ip4_main;
/*
* add interface routes. We'll assume this works. It's more rigorously
@@ -6365,8 +6424,7 @@ fib_test_label (void)
FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
adj_nbr_db_size());
- vec_validate(im->fib_index_by_sw_if_index, tm->hw[0]->sw_if_index);
- im->fib_index_by_sw_if_index[tm->hw[0]->sw_if_index] = fib_index;
+ fib_table_bind (FIB_PROTOCOL_IP4, tm->hw[0]->sw_if_index, fib_index);
fib_table_entry_update_one_path(fib_index, &local0_pfx,
FIB_SOURCE_INTERFACE,
@@ -6411,8 +6469,7 @@ fib_test_label (void)
},
};
- vec_validate(im->fib_index_by_sw_if_index, tm->hw[1]->sw_if_index);
- im->fib_index_by_sw_if_index[tm->hw[1]->sw_if_index] = fib_index;
+ fib_table_bind (FIB_PROTOCOL_IP4, tm->hw[1]->sw_if_index, fib_index);
fib_table_entry_update_one_path(fib_index, &local1_pfx,
FIB_SOURCE_INTERFACE,
@@ -7557,6 +7614,7 @@ fib_test_child_get_node (fib_node_index_t index)
}
static int fib_test_walk_spawns_walks;
+static fib_node_type_t test_node_type;
static fib_node_back_walk_rc_t
fib_test_child_back_walk_notify (fib_node_t *node,
@@ -7567,9 +7625,9 @@ fib_test_child_back_walk_notify (fib_node_t *node,
vec_add1(tc->ctxs, *ctx);
if (1 == fib_test_walk_spawns_walks)
- fib_walk_sync(FIB_NODE_TYPE_TEST, tc->index, ctx);
+ fib_walk_sync(test_node_type, tc->index, ctx);
if (2 == fib_test_walk_spawns_walks)
- fib_walk_async(FIB_NODE_TYPE_TEST, tc->index,
+ fib_walk_async(test_node_type, tc->index,
FIB_WALK_PRIORITY_HIGH, ctx);
return (FIB_NODE_BACK_WALK_CONTINUE);
@@ -7610,23 +7668,23 @@ fib_test_walk (void)
res = 0;
vm = vlib_get_main();
- fib_node_register_type(FIB_NODE_TYPE_TEST, &fib_test_child_vft);
+ test_node_type = fib_node_register_new_type("fib-test", &fib_test_child_vft);
/*
* init a fake node on which we will add children
*/
fib_node_init(&fib_test_nodes[PARENT_INDEX].node,
- FIB_NODE_TYPE_TEST);
+ test_node_type);
FOR_EACH_TEST_CHILD(tc)
{
- fib_node_init(&tc->node, FIB_NODE_TYPE_TEST);
+ fib_node_init(&tc->node, test_node_type);
fib_node_lock(&tc->node);
tc->ctxs = NULL;
tc->index = ii;
- tc->sibling = fib_node_child_add(FIB_NODE_TYPE_TEST,
+ tc->sibling = fib_node_child_add(test_node_type,
PARENT_INDEX,
- FIB_NODE_TYPE_TEST, ii);
+ test_node_type, ii);
}
/*
@@ -7634,7 +7692,7 @@ fib_test_walk (void)
*/
high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
FIB_TEST(N_TEST_CHILDREN+1 == fib_node_list_get_size(PARENT()->fn_children),
"Parent has %d children pre-walk",
@@ -7680,9 +7738,9 @@ fib_test_walk (void)
high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_LOW, &low_ctx);
fib_walk_process_queues(vm, 1);
@@ -7708,9 +7766,9 @@ fib_test_walk (void)
high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &low_ctx);
fib_walk_process_queues(vm, 1);
@@ -7736,9 +7794,9 @@ fib_test_walk (void)
high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &low_ctx);
fib_walk_process_queues(vm, 1);
@@ -7764,7 +7822,7 @@ fib_test_walk (void)
*/
high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
fib_walk_process_queues(vm, 0);
@@ -7818,7 +7876,7 @@ fib_test_walk (void)
/*
* schedule another walk that will catch-up and merge.
*/
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
fib_walk_process_queues(vm, 1);
@@ -7851,13 +7909,13 @@ fib_test_walk (void)
*/
high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
fib_walk_process_queues(vm, 0);
fib_walk_process_queues(vm, 0);
- fib_walk_sync(FIB_NODE_TYPE_TEST, PARENT_INDEX, &high_ctx);
+ fib_walk_sync(test_node_type, PARENT_INDEX, &high_ctx);
FOR_EACH_TEST_CHILD(tc)
{
@@ -7886,9 +7944,9 @@ fib_test_walk (void)
* make the parent a child of one of its children, thus inducing a routing loop.
*/
fib_test_nodes[PARENT_INDEX].sibling =
- fib_node_child_add(FIB_NODE_TYPE_TEST,
+ fib_node_child_add(test_node_type,
1, // the first child
- FIB_NODE_TYPE_TEST,
+ test_node_type,
PARENT_INDEX);
/*
@@ -7897,7 +7955,7 @@ fib_test_walk (void)
*/
fib_test_walk_spawns_walks = 1;
- fib_walk_sync(FIB_NODE_TYPE_TEST, PARENT_INDEX, &high_ctx);
+ fib_walk_sync(test_node_type, PARENT_INDEX, &high_ctx);
FOR_EACH_TEST_CHILD(tc)
{
@@ -7938,7 +7996,7 @@ fib_test_walk (void)
* execute an async walk of the graph loop, with each child spawns sync walks
*/
high_ctx.fnbw_depth = 0;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
fib_walk_process_queues(vm, 1);
@@ -7960,7 +8018,7 @@ fib_test_walk (void)
*/
fib_test_walk_spawns_walks = 2;
high_ctx.fnbw_depth = 0;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
fib_walk_process_queues(vm, 1);
@@ -7978,7 +8036,7 @@ fib_test_walk (void)
}
- fib_node_child_remove(FIB_NODE_TYPE_TEST,
+ fib_node_child_remove(test_node_type,
1, // the first child
fib_test_nodes[PARENT_INDEX].sibling);
@@ -7987,7 +8045,7 @@ fib_test_walk (void)
*/
FOR_EACH_TEST_CHILD(tc)
{
- fib_node_child_remove(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_node_child_remove(test_node_type, PARENT_INDEX,
tc->sibling);
fib_node_deinit(&tc->node);
fib_node_unlock(&tc->node);
@@ -8385,12 +8443,14 @@ fib_test_bfd (void)
bfd_10_10_10_1.hop_type = BFD_HOP_TYPE_SINGLE;
bfd_10_10_10_1.udp.key.sw_if_index = tm->hw[0]->sw_if_index;
- adj_bfd_notify(BFD_LISTEN_EVENT_CREATE, &bfd_10_10_10_1);
-
ai_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
VNET_LINK_IP4,
&nh_10_10_10_1,
tm->hw[0]->sw_if_index);
+ bfd_10_10_10_1.udp.adj_index = ai_10_10_10_1;
+
+ adj_bfd_notify(BFD_LISTEN_EVENT_CREATE, &bfd_10_10_10_1);
+
/*
* whilst the BFD session is not signalled, the adj is up
*/
@@ -8535,7 +8595,7 @@ lfib_test (void)
mpls_table_create(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API, NULL);
mpls_sw_interface_enable_disable(&mpls_main,
tm->hw[0]->sw_if_index,
- 1, 1);
+ 1);
ip46_address_t nh_10_10_10_1 = {
.ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01),
@@ -9106,7 +9166,7 @@ lfib_test (void)
*/
mpls_sw_interface_enable_disable(&mpls_main,
tm->hw[0]->sw_if_index,
- 0, 1);
+ 0);
mpls_table_delete(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API);
FIB_TEST(0 == pool_elts(mpls_disp_dpo_pool),
@@ -9128,22 +9188,15 @@ fib_test_inherit (void)
fib_node_index_t fei;
int n_feis, res, i;
test_main_t *tm;
- ip4_main_t *im4;
- ip6_main_t *im6;
tm = &test_main;
- im4 = &ip4_main;
- im6 = &ip6_main;
res = 0;
- vec_validate(im4->fib_index_by_sw_if_index, tm->hw[2]->sw_if_index);
- vec_validate(im6->fib_index_by_sw_if_index, tm->hw[2]->sw_if_index);
-
for (i = 0; i <= 2; i++)
- {
- im4->fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
- im6->fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
- }
+ {
+ fib_table_bind (FIB_PROTOCOL_IP4, tm->hw[i]->sw_if_index, 0);
+ fib_table_bind (FIB_PROTOCOL_IP6, tm->hw[i]->sw_if_index, 0);
+ }
n_feis = fib_entry_pool_size();
const ip46_address_t nh_10_10_10_1 = {
@@ -10610,7 +10663,7 @@ fib_test_sticky (void)
fib_route_path_t *r_paths2 = NULL;
r_paths2 = vec_dup(r_paths);
- _vec_len(r_paths2) = 3;
+ vec_set_len (r_paths2, 3);
pl_index = fib_path_list_create(FIB_PATH_LIST_FLAG_SHARED, r_paths2);
fib_path_list_lock(pl_index);
@@ -10681,7 +10734,7 @@ fib_test_sticky (void)
fib_route_path_t *r_paths3 = NULL;
r_paths3 = vec_dup(r_paths);
- _vec_len(r_paths3) = 3;
+ vec_set_len (r_paths3, 3);
r_paths3[0].frp_weight = 3;
diff --git a/src/plugins/unittest/gso_test.c b/src/plugins/unittest/gso_test.c
new file mode 100644
index 00000000000..43c614341d2
--- /dev/null
+++ b/src/plugins/unittest/gso_test.c
@@ -0,0 +1,456 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/time.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/error.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/gso/gso.h>
+#include <vnet/gso/hdr_offset_parser.h>
+#include <vnet/tcp/tcp_packet.h>
+
+#define MAX_GSO_PACKET_SIZE (TCP_MAX_GSO_SZ - 1)
+#define MIN_GSO_SEGMENT_SIZE 128
+#define MAX_GSO_SEGMENT_SIZE 2048
+#define DEFAULT_GSO_SEGMENT_SIZE 1448
+
+typedef struct _gso_test_data
+{
+ const char *name;
+ const char *description;
+ u8 *data;
+ u32 data_size;
+ u32 l4_hdr_len;
+ u8 is_l2;
+ u8 is_ip6;
+ struct _gso_test_data *next;
+} gso_test_data_t;
+
+typedef struct
+{
+ int verbose;
+
+ char *gso_name;
+ u32 warmup_rounds;
+ u32 rounds;
+ u32 n_buffers;
+ u32 buffer_size;
+ u32 packet_size;
+ u32 gso_size;
+ gso_test_data_t *gso_test_data;
+} gso_test_main_t;
+
+gso_test_main_t gso_test_main;
+
+#define GSO_TEST_REGISTER_DATA(x, ...) \
+ __VA_ARGS__ gso_test_data_t __gso_test_data_##x; \
+ static void __clib_constructor __gso_test_data_fn_##x (void) \
+ { \
+ gso_test_main_t *gtm = &gso_test_main; \
+ __gso_test_data_##x.next = gtm->gso_test_data; \
+ gtm->gso_test_data = &__gso_test_data_##x; \
+ } \
+ __VA_ARGS__ gso_test_data_t __gso_test_data_##x
+
+// ipv4
+u8 gso_ipv4_tcp_data[64] = {
+ 0x02, 0xfe, 0x39, 0xe5, 0x09, 0x8f, 0x02, 0xfe, 0x2d, 0x18, 0x63, 0x18, 0x08,
+ 0x00, 0x45, 0x00, 0x05, 0xdc, 0xdb, 0x42, 0x40, 0x00, 0x40, 0x06, 0xc4, 0x85,
+ 0xc0, 0xa8, 0x0a, 0x02, 0xc0, 0xa8, 0x0a, 0x01, 0xd8, 0xde, 0x14, 0x51, 0x34,
+ 0x93, 0xa8, 0x1b, 0x7b, 0xef, 0x2e, 0x7e, 0x80, 0x10, 0x00, 0xe5, 0xc7, 0x03,
+ 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0xce, 0xaa, 0x00, 0x2f, 0xf2, 0xc3
+};
+
+GSO_TEST_REGISTER_DATA (gso_ipv4_tcp, static) = {
+ .name = "ipv4-tcp",
+ .description = "IPv4 TCP",
+ .data = gso_ipv4_tcp_data,
+ .data_size = sizeof (gso_ipv4_tcp_data),
+ .l4_hdr_len = sizeof (tcp_header_t),
+ .is_l2 = 1,
+ .is_ip6 = 0,
+};
+
+// ipv6
+u8 gso_ipv6_tcp_data[] = {
+ 0x02, 0xfe, 0x39, 0xe5, 0x09, 0x8f, 0x02, 0xfe, 0x2d, 0x18, 0x63, 0x18,
+ 0x08, 0x00, 0x60, 0x0d, 0xf4, 0x97, 0x00, 0x40, 0x06, 0x40, 0xfd, 0x01,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x10, 0x00, 0xfd, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x10, 0x01, 0xd8, 0xde, 0x14, 0x51, 0x34, 0x93,
+ 0xa8, 0x1b, 0x7b, 0xef, 0x2e, 0x7e, 0x80, 0x10, 0x00, 0xe5, 0xc7, 0x03,
+ 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0xce, 0xaa, 0x00, 0x2f, 0xf2, 0xc3
+};
+
+GSO_TEST_REGISTER_DATA (gso_ipv6_tcp, static) = {
+ .name = "ipv6-tcp",
+ .description = "IPv6 TCP",
+ .data = gso_ipv6_tcp_data,
+ .data_size = sizeof (gso_ipv6_tcp_data),
+ .l4_hdr_len = sizeof (tcp_header_t),
+ .is_l2 = 1,
+ .is_ip6 = 1,
+};
+
+/*
+ * this does not support tunnel packets
+ */
+static void
+set_hdr_offsets (vlib_buffer_t *b0, u8 is_l2)
+{
+ u16 ethertype = 0, l2hdr_sz = 0;
+ vnet_buffer_oflags_t oflags = 0;
+ u8 l4_proto = 0;
+
+ if (!is_l2)
+ {
+ switch (b0->data[0] & 0xf0)
+ {
+ case 0x40:
+ ethertype = ETHERNET_TYPE_IP4;
+ break;
+ case 0x60:
+ ethertype = ETHERNET_TYPE_IP6;
+ break;
+ }
+ }
+ else
+ {
+ ethernet_header_t *eh = (ethernet_header_t *) b0->data;
+ ethertype = clib_net_to_host_u16 (eh->type);
+ l2hdr_sz = sizeof (ethernet_header_t);
+
+ if (ethernet_frame_is_tagged (ethertype))
+ {
+ ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eh + 1);
+
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ l2hdr_sz += sizeof (*vlan);
+ if (ethertype == ETHERNET_TYPE_VLAN)
+ {
+ vlan++;
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ l2hdr_sz += sizeof (*vlan);
+ }
+ }
+ }
+
+ vnet_buffer (b0)->l2_hdr_offset = 0;
+ vnet_buffer (b0)->l3_hdr_offset = l2hdr_sz;
+
+ if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4))
+ {
+ ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l2hdr_sz);
+ vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4);
+ l4_proto = ip4->protocol;
+ oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
+ b0->flags |= (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+ }
+ else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
+ {
+ ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l2hdr_sz);
+ vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + sizeof (ip6_header_t);
+ /* FIXME IPv6 EH traversal */
+ l4_proto = ip6->protocol;
+ b0->flags |= (VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+ }
+ if (l4_proto == IP_PROTOCOL_TCP)
+ {
+ oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
+ }
+ else if (l4_proto == IP_PROTOCOL_UDP)
+ {
+ oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
+ }
+ if (oflags)
+ vnet_buffer_offload_flags_set (b0, oflags);
+}
+
+static u32
+fill_buffers (vlib_main_t *vm, u32 *buffer_indices,
+ gso_test_data_t *gso_test_data, u32 n_buffers, u32 buffer_size,
+ u32 packet_size, u32 gso_size)
+{
+ u32 i;
+ u8 *data = gso_test_data->data;
+ u32 data_size = gso_test_data->data_size;
+ u32 l4_hdr_len = gso_test_data->l4_hdr_len;
+ u8 is_l2 = gso_test_data->is_l2;
+
+ for (i = 0; i < n_buffers; i++)
+ {
+ u64 seed = clib_cpu_time_now ();
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
+ u32 len = 0;
+ u32 remaining_data =
+ (packet_size > buffer_size) ? (packet_size - buffer_size) : 0;
+
+ clib_memcpy_fast (b->data, data, data_size);
+ b->current_data = 0;
+
+ for (u32 j = data_size; j < buffer_size; j += 8)
+ *(u64 *) (b->data + j) = 1 + random_u64 (&seed);
+ b->current_length = buffer_size;
+
+ if (remaining_data)
+ {
+ vlib_buffer_t *pb = b;
+ u32 n_alloc,
+ n_bufs = ((remaining_data + buffer_size - 1) / buffer_size);
+ u32 *buffers = 0;
+ u32 fill_data_size;
+ u32 k = 0;
+
+ vec_validate (buffers, n_bufs - 1);
+ n_alloc = vlib_buffer_alloc (vm, buffers, n_bufs);
+ if (n_alloc < n_bufs)
+ {
+ vlib_buffer_free (vm, buffers, n_alloc);
+ vlib_cli_output (
+ vm, "vlib buffer alloc failed at %u requested %u actual %u", i,
+ n_bufs, n_alloc);
+ return i;
+ }
+
+ do
+ {
+ pb->next_buffer = buffers[k];
+ pb->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ pb = vlib_get_buffer (vm, buffers[k]);
+ pb->current_data = 0;
+ fill_data_size = clib_min (buffer_size, remaining_data);
+ remaining_data -= fill_data_size;
+ for (u32 l = 0; l < fill_data_size; l += 8)
+ *(u64 *) (pb->data + l) = 1 + random_u64 (&seed);
+ pb->current_length = fill_data_size;
+ k++;
+ len += fill_data_size;
+ }
+ while (k < n_bufs);
+
+ set_hdr_offsets (b, is_l2);
+ b->flags |= VNET_BUFFER_F_GSO;
+ vnet_buffer2 (b)->gso_size = gso_size;
+ vnet_buffer2 (b)->gso_l4_hdr_sz = l4_hdr_len;
+ }
+ b->total_length_not_including_first_buffer = len;
+ b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ }
+ return i;
+}
+
+static_always_inline u32
+gso_segment_buffer_test (vlib_main_t *vm, u32 bi,
+ vnet_interface_per_thread_data_t *ptd, u8 is_l2)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ u32 n_tx_bytes = 0;
+
+ if (PREDICT_TRUE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ n_tx_bytes = gso_segment_buffer_inline (vm, ptd, b, is_l2);
+ }
+
+ return n_tx_bytes;
+}
+
+static clib_error_t *
+test_gso_perf (vlib_main_t *vm, gso_test_main_t *gtm)
+{
+ clib_error_t *err = 0;
+ vnet_interface_per_thread_data_t *ptd = 0;
+ u32 packet_size = MAX_GSO_PACKET_SIZE;
+ u32 buffer_size = vlib_buffer_get_default_data_size (vm);
+ u32 gso_size;
+ u32 n_buffers, warmup_rounds, rounds;
+ u32 *buffer_indices = 0;
+ u64 t0, t1, t2[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ gso_test_data_t *gso_test_data = gtm->gso_test_data;
+ int i, j, k;
+
+ if (gtm->buffer_size > buffer_size)
+ return clib_error_return (0, "buffer size must be <= %u", buffer_size);
+
+ if (gtm->packet_size > packet_size)
+ return clib_error_return (0, "gso packet size must be <= %u", packet_size);
+
+ if ((gtm->gso_size > MAX_GSO_SEGMENT_SIZE) ||
+ (gtm->gso_size < MIN_GSO_SEGMENT_SIZE))
+ return clib_error_return (
+ 0, "gso segment size must be in between %u >= and <= %u",
+ MIN_GSO_SEGMENT_SIZE, MAX_GSO_SEGMENT_SIZE);
+
+ rounds = gtm->rounds ? gtm->rounds : 256;
+ n_buffers = gtm->n_buffers ? gtm->n_buffers : 256;
+ warmup_rounds = gtm->warmup_rounds ? gtm->warmup_rounds : 256;
+ buffer_size = gtm->buffer_size ? gtm->buffer_size : buffer_size;
+ gso_size = gtm->gso_size;
+ packet_size = gtm->packet_size ? gtm->packet_size : packet_size;
+
+ vec_validate_aligned (ptd, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (buffer_indices, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
+
+ vlib_cli_output (vm,
+ "GSO Segmentation: packet-size %u gso-size %u buffer-size "
+ "%u n_buffers %u rounds %u "
+ "warmup-rounds %u",
+ packet_size, gso_size, buffer_size, n_buffers, rounds,
+ warmup_rounds);
+ vlib_cli_output (vm, " cpu-freq %.2f GHz",
+ (f64) vm->clib_time.clocks_per_second * 1e-9);
+
+ while (gso_test_data)
+ {
+ u32 n_filled = 0;
+ u32 n_alloc = vlib_buffer_alloc (vm, buffer_indices, n_buffers);
+ if (n_alloc != n_buffers)
+ {
+ vlib_cli_output (vm, " Test: %s FAILED", gso_test_data->description);
+ err = clib_error_return (0, "buffer alloc failure");
+ vlib_buffer_free (vm, buffer_indices, n_alloc);
+ goto done;
+ }
+ n_filled = fill_buffers (vm, buffer_indices, gso_test_data, n_buffers,
+ buffer_size, packet_size, gso_size);
+
+ u8 is_l2 = gso_test_data->is_l2;
+
+ for (k = 0; k < warmup_rounds; k++)
+ {
+ for (j = 0; j < n_filled; j++)
+ gso_segment_buffer_test (vm, buffer_indices[j], &ptd[j], is_l2);
+
+ for (j = 0; j < n_filled; j++)
+ {
+ vlib_buffer_free (vm, ptd[j].split_buffers,
+ vec_len (ptd[j].split_buffers));
+ vec_free (ptd[j].split_buffers);
+ }
+ }
+
+ for (i = 0; i < 10; i++)
+ {
+ for (k = 0; k < rounds; k++)
+ {
+ t0 = clib_cpu_time_now ();
+ for (j = 0; j < n_filled; j++)
+ gso_segment_buffer_test (vm, buffer_indices[j], &ptd[j],
+ is_l2);
+
+ t1 = clib_cpu_time_now ();
+ t2[i] += (t1 - t0);
+ for (j = 0; j < n_filled; j++)
+ {
+ vlib_buffer_free (vm, ptd[j].split_buffers,
+ vec_len (ptd[j].split_buffers));
+ vec_free (ptd[j].split_buffers);
+ }
+ }
+ }
+
+ vlib_cli_output (
+ vm, "===========================================================");
+ vlib_cli_output (vm, " Test: %s", gso_test_data->description);
+ vlib_cli_output (
+ vm, "===========================================================");
+ for (i = 0; i < 10; i++)
+ {
+ // ticks per packet
+ f64 tpp1 = (f64) (t2[i]) / (n_filled * rounds);
+ // ticks per Byte
+ f64 tpB1 = (f64) (t2[i]) / (n_filled * rounds * packet_size);
+ // Packets per second
+ f64 Kpps1 = vm->clib_time.clocks_per_second * 1e-3 / tpp1;
+ // Throughput Giga-bits per second
+ f64 Gbps1 = vm->clib_time.clocks_per_second * 8 * 1e-9 / tpB1;
+
+ vlib_cli_output (
+ vm, "%-2u: %.03f ticks/packet, %.02f Kpps, %.02f Gbps\n", i + 1,
+ tpp1, Kpps1, Gbps1);
+ }
+ if (n_alloc)
+ vlib_buffer_free (vm, buffer_indices, n_alloc);
+ clib_memset (t2, 0, sizeof (t2));
+ gso_test_data = gso_test_data->next;
+ }
+
+done:
+
+ vec_free (ptd);
+ vec_free (buffer_indices);
+ return err;
+}
+
+static clib_error_t *
+test_gso_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ gso_test_main_t *gtm = &gso_test_main;
+ clib_error_t *err = 0;
+ f64 end, start, total_time;
+
+ gtm->gso_size = DEFAULT_GSO_SEGMENT_SIZE;
+ gtm->warmup_rounds = 0;
+ gtm->rounds = 0;
+ gtm->n_buffers = 0;
+ gtm->buffer_size = 0;
+ gtm->packet_size = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ gtm->verbose = 1;
+ else if (unformat (input, "detail"))
+ gtm->verbose = 2;
+ else if (unformat (input, "buffers %u", &gtm->n_buffers))
+ ;
+ else if (unformat (input, "buffer-size %u", &gtm->buffer_size))
+ ;
+ else if (unformat (input, "packet-size %u", &gtm->packet_size))
+ ;
+ else if (unformat (input, "gso-size %u", &gtm->gso_size))
+ ;
+ else if (unformat (input, "rounds %u", &gtm->rounds))
+ ;
+ else if (unformat (input, "warmup-rounds %u", &gtm->warmup_rounds))
+ ;
+ else
+ {
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+ }
+
+ start = clib_cpu_time_now ();
+ err = test_gso_perf (vm, gtm);
+ end = clib_cpu_time_now ();
+
+ total_time = (f64) (end - start) / vm->clib_time.clocks_per_second;
+ vlib_cli_output (vm, "Total Time Test Took %.02f seconds", total_time);
+
+ return err;
+}
+
+VLIB_CLI_COMMAND (test_gso_command, static) = {
+ .path = "test gso",
+ .short_help = "test gso [buffers <n>] [buffer-size <size>] [packet-size "
+ "<size>] [gso-size <size>] [rounds <n>] "
+ "[warmup-rounds <n>]",
+ .function = test_gso_command_fn,
+};
+
+static clib_error_t *
+gso_test_init (vlib_main_t *vm)
+{
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (gso_test_init);
diff --git a/src/plugins/unittest/hash_test.c b/src/plugins/unittest/hash_test.c
new file mode 100644
index 00000000000..3b0a3cf04b9
--- /dev/null
+++ b/src/plugins/unittest/hash_test.c
@@ -0,0 +1,331 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/time.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/error.h>
+#include <vnet/hash/hash.h>
+#include <vnet/ethernet/ethernet.h>
+
+#define HASH_TEST_DATA_SIZE 2048
+
+typedef struct _hash_test_data
+{
+ const char *name;
+ const char *description;
+ u8 *data;
+ u32 data_size;
+ vnet_hash_fn_type_t ftype;
+ struct _hash_test_data *next;
+} hash_test_data_t;
+
+typedef struct
+{
+ int verbose;
+
+ char *hash_name;
+ u32 warmup_rounds;
+ u32 rounds;
+ u32 n_buffers;
+
+ hash_test_data_t *hash_test_data;
+} hash_test_main_t;
+
+hash_test_main_t hash_test_main;
+
+#define HASH_TEST_REGISTER_DATA(x, ...) \
+ __VA_ARGS__ hash_test_data_t __hash_test_data_##x; \
+ static void __clib_constructor __hash_test_data_fn_##x (void) \
+ { \
+ hash_test_main_t *htm = &hash_test_main; \
+ __hash_test_data_##x.next = htm->hash_test_data; \
+ htm->hash_test_data = &__hash_test_data_##x; \
+ } \
+ __VA_ARGS__ hash_test_data_t __hash_test_data_##x
+
+// qinq
+u8 eth_qinq_ipv4_tcp_data[72] = {
+ 0x02, 0xfe, 0x39, 0xe5, 0x09, 0x8f, 0x02, 0xfe, 0x2d, 0x18, 0x63, 0x18,
+ 0x88, 0xa8, 0x03, 0xe8, 0x81, 0x00, 0x03, 0xe8, 0x08, 0x00, 0x45, 0x00,
+ 0x05, 0xdc, 0xdb, 0x42, 0x40, 0x00, 0x40, 0x06, 0xc4, 0x85, 0xc0, 0xa8,
+ 0x0a, 0x02, 0xc0, 0xa8, 0x0a, 0x01, 0xd8, 0xde, 0x14, 0x51, 0x34, 0x93,
+ 0xa8, 0x1b, 0x7b, 0xef, 0x2e, 0x7e, 0x80, 0x10, 0x00, 0xe5, 0xc7, 0x03,
+ 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0xce, 0xaa, 0x00, 0x2f, 0xf2, 0xc3
+};
+
+HASH_TEST_REGISTER_DATA (eth_qinq_ipv4_tcp, static) = {
+ .name = "eth-qinq-ipv4-tcp",
+ .description = "Ethernet QinQ IPv4 TCP",
+ .data = eth_qinq_ipv4_tcp_data,
+ .data_size = sizeof (eth_qinq_ipv4_tcp_data),
+ .ftype = VNET_HASH_FN_TYPE_ETHERNET,
+};
+
+// vlan
+u8 eth_vlan_ipv4_tcp_data[68] = {
+ 0x02, 0xfe, 0x39, 0xe5, 0x09, 0x8f, 0x02, 0xfe, 0x2d, 0x18, 0x63, 0x18,
+ 0x81, 0x00, 0x03, 0xe8, 0x08, 0x00, 0x45, 0x00, 0x05, 0xdc, 0xdb, 0x42,
+ 0x40, 0x00, 0x40, 0x06, 0xc4, 0x85, 0xc0, 0xa8, 0x0a, 0x02, 0xc0, 0xa8,
+ 0x0a, 0x01, 0xd8, 0xde, 0x14, 0x51, 0x34, 0x93, 0xa8, 0x1b, 0x7b, 0xef,
+ 0x2e, 0x7e, 0x80, 0x10, 0x00, 0xe5, 0xc7, 0x03, 0x00, 0x00, 0x01, 0x01,
+ 0x08, 0x0a, 0xce, 0xaa, 0x00, 0x2f, 0xf2, 0xc3
+};
+
+HASH_TEST_REGISTER_DATA (eth_vlan_ipv4_tcp, static) = {
+ .name = "eth-vlan-ipv4-tcp",
+ .description = "Ethernet Vlan IPv4 TCP",
+ .data = eth_vlan_ipv4_tcp_data,
+ .data_size = sizeof (eth_vlan_ipv4_tcp_data),
+ .ftype = VNET_HASH_FN_TYPE_ETHERNET,
+};
+
+// ethernet
+u8 eth_ipv4_tcp_data[64] = {
+ 0x02, 0xfe, 0x39, 0xe5, 0x09, 0x8f, 0x02, 0xfe, 0x2d, 0x18, 0x63, 0x18, 0x08,
+ 0x00, 0x45, 0x00, 0x05, 0xdc, 0xdb, 0x42, 0x40, 0x00, 0x40, 0x06, 0xc4, 0x85,
+ 0xc0, 0xa8, 0x0a, 0x02, 0xc0, 0xa8, 0x0a, 0x01, 0xd8, 0xde, 0x14, 0x51, 0x34,
+ 0x93, 0xa8, 0x1b, 0x7b, 0xef, 0x2e, 0x7e, 0x80, 0x10, 0x00, 0xe5, 0xc7, 0x03,
+ 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0xce, 0xaa, 0x00, 0x2f, 0xf2, 0xc3
+};
+
+HASH_TEST_REGISTER_DATA (eth_ipv4_tcp, static) = {
+ .name = "eth-ipv4-tcp",
+ .description = "Ethernet IPv4 TCP",
+ .data = eth_ipv4_tcp_data,
+ .data_size = sizeof (eth_ipv4_tcp_data),
+ .ftype = VNET_HASH_FN_TYPE_ETHERNET,
+};
+
+// udp
+u8 eth_ipv4_udp_data[42] = { 0x62, 0x36, 0xbe, 0xff, 0x91, 0x20, 0x5e,
+ 0x2c, 0xaf, 0x2e, 0x1e, 0x51, 0x08, 0x00,
+ 0x45, 0x00, 0x05, 0xc4, 0x9d, 0xc3, 0x40,
+ 0x00, 0x33, 0x11, 0x49, 0x61, 0x3e, 0xd2,
+ 0x12, 0x28, 0x0a, 0x09, 0x00, 0x02, 0x14,
+ 0x58, 0xc0, 0xd8, 0x05, 0xb0, 0x75, 0xbd };
+
+HASH_TEST_REGISTER_DATA (eth_ipv4_udp, static) = {
+ .name = "eth-ipv4-udp",
+ .description = "Ethernet IPv4 UDP",
+ .data = eth_ipv4_udp_data,
+ .data_size = sizeof (eth_ipv4_udp_data),
+ .ftype = VNET_HASH_FN_TYPE_ETHERNET,
+};
+
+// ipv4
+u8 ipv4_tcp_data[50] = { 0x45, 0x00, 0x05, 0xdc, 0xdb, 0x42, 0x40, 0x00, 0x40,
+ 0x06, 0xc4, 0x85, 0xc0, 0xa8, 0x0a, 0x02, 0xc0, 0xa8,
+ 0x0a, 0x01, 0xd8, 0xde, 0x14, 0x51, 0x34, 0x93, 0xa8,
+ 0x1b, 0x7b, 0xef, 0x2e, 0x7e, 0x80, 0x10, 0x00, 0xe5,
+ 0xc7, 0x03, 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0xce,
+ 0xaa, 0x00, 0x2f, 0xf2, 0xc3 };
+
+HASH_TEST_REGISTER_DATA (ipv4_tcp, static) = {
+ .name = "ipv4-tcp",
+ .description = "IPv4 TCP",
+ .data = ipv4_tcp_data,
+ .data_size = sizeof (ipv4_tcp_data),
+ .ftype = VNET_HASH_FN_TYPE_IP,
+};
+
+u8 ipv4_icmp_data[84] = {
+ 0x45, 0x00, 0x00, 0x54, 0xb7, 0xe6, 0x40, 0x00, 0x40, 0x01, 0xed, 0x6e,
+ 0xc0, 0xa8, 0x0a, 0x01, 0xc0, 0xa8, 0x0a, 0x02, 0x08, 0x00, 0xc7, 0x84,
+ 0x00, 0x16, 0x00, 0x92, 0xfd, 0xdb, 0xd9, 0x60, 0x00, 0x00, 0x00, 0x00,
+ 0x91, 0xc3, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
+ 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
+
+};
+
+HASH_TEST_REGISTER_DATA (ipv4_icmp, static) = {
+ .name = "ipv4-icmp",
+ .description = "IPv4 ICMP",
+ .data = ipv4_icmp_data,
+ .data_size = sizeof (ipv4_icmp_data),
+ .ftype = VNET_HASH_FN_TYPE_IP,
+};
+
+// ip6
+u8 ipv6_icmp6_data[104] = {
+ 0x60, 0x0d, 0xf4, 0x97, 0x00, 0x40, 0x3a, 0x40, 0xfd, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0xfd, 0x01,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
+ 0x01, 0x80, 0x00, 0x10, 0x84, 0xb1, 0x25, 0x00, 0x01, 0x22, 0x57, 0xf0, 0x60,
+ 0x00, 0x00, 0x00, 0x00, 0xcb, 0x4a, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
+ 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a,
+ 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
+};
+
+HASH_TEST_REGISTER_DATA (ipv6_icmp6, static) = {
+ .name = "ipv6-icmp6",
+ .description = "IPv6 ICMP6",
+ .data = ipv6_icmp6_data,
+ .data_size = sizeof (ipv6_icmp6_data),
+ .ftype = VNET_HASH_FN_TYPE_IP,
+};
+
+void
+fill_buffers (vlib_main_t *vm, u32 *buffer_indices, u8 *data, u32 data_size,
+ u32 n_buffers)
+{
+ int i, j;
+ u64 seed = clib_cpu_time_now ();
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
+ clib_memcpy_fast (b->data, data, data_size);
+ b->current_data = 0;
+ for (j = data_size; j < HASH_TEST_DATA_SIZE; j += 8)
+ *(u64 *) (b->data + j) = 1 + random_u64 (&seed);
+ b->current_length = HASH_TEST_DATA_SIZE;
+ }
+}
+
+static clib_error_t *
+test_hash_perf (vlib_main_t *vm, hash_test_main_t *htm)
+{
+ clib_error_t *err = 0;
+ u32 n_buffers, n_alloc = 0, warmup_rounds, rounds;
+ u32 *buffer_indices = 0;
+ u64 t0[5], t1[5];
+ vnet_hash_fn_t hf;
+ hash_test_data_t *hash_test_data = htm->hash_test_data;
+ void **p = 0;
+ int i, j;
+
+ rounds = htm->rounds ? htm->rounds : 100;
+ n_buffers = htm->n_buffers ? htm->n_buffers : 256;
+ warmup_rounds = htm->warmup_rounds ? htm->warmup_rounds : 100;
+
+ vec_validate_aligned (p, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (buffer_indices, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
+ n_alloc = vlib_buffer_alloc (vm, buffer_indices, n_buffers);
+ if (n_alloc != n_buffers)
+ {
+ err = clib_error_return (0, "buffer alloc failure");
+ goto done;
+ }
+
+ vlib_cli_output (vm,
+ "%s: n_buffers %u rounds %u "
+ "warmup-rounds %u",
+ htm->hash_name, n_buffers, rounds, warmup_rounds);
+ vlib_cli_output (vm, " cpu-freq %.2f GHz",
+ (f64) vm->clib_time.clocks_per_second * 1e-9);
+
+ while (hash_test_data)
+ {
+ fill_buffers (vm, buffer_indices, hash_test_data->data,
+ hash_test_data->data_size, n_buffers);
+
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
+ p[i] = vlib_buffer_get_current (b);
+ }
+
+ hf =
+ vnet_hash_function_from_name (htm->hash_name, hash_test_data->ftype);
+
+ if (!hf)
+ {
+ err = clib_error_return (0, "wrong hash name");
+ goto done;
+ }
+
+ for (i = 0; i < 5; i++)
+ {
+ u32 h[n_buffers];
+ for (j = 0; j < warmup_rounds; j++)
+ {
+ hf (p, h, n_buffers);
+ }
+
+ t0[i] = clib_cpu_time_now ();
+ for (j = 0; j < rounds; j++)
+ hf (p, h, n_buffers);
+ t1[i] = clib_cpu_time_now ();
+ }
+
+ vlib_cli_output (
+ vm, "===========================================================");
+ vlib_cli_output (vm, " Test: %s", hash_test_data->description);
+ vlib_cli_output (
+ vm, "===========================================================");
+ for (i = 0; i < 5; i++)
+ {
+ f64 tpp1 = (f64) (t1[i] - t0[i]) / (n_buffers * rounds);
+ f64 Mpps1 = vm->clib_time.clocks_per_second * 1e-6 / tpp1;
+
+ vlib_cli_output (vm, "%-2u: %.03f ticks/packet, %.02f Mpps\n", i + 1,
+ tpp1, Mpps1);
+ }
+ hash_test_data = hash_test_data->next;
+ }
+
+done:
+ if (n_alloc)
+ vlib_buffer_free (vm, buffer_indices, n_alloc);
+
+ vec_free (p);
+ vec_free (buffer_indices);
+ return err;
+}
+
+static clib_error_t *
+test_hash_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ hash_test_main_t *tm = &hash_test_main;
+ clib_error_t *err = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ tm->verbose = 1;
+ else if (unformat (input, "detail"))
+ tm->verbose = 2;
+ else if (unformat (input, "perf %s", &tm->hash_name))
+ ;
+ else if (unformat (input, "buffers %u", &tm->n_buffers))
+ ;
+ else if (unformat (input, "rounds %u", &tm->rounds))
+ ;
+ else if (unformat (input, "warmup-rounds %u", &tm->warmup_rounds))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ goto error;
+ }
+ }
+
+ err = test_hash_perf (vm, tm);
+
+error:
+ vec_free (tm->hash_name);
+
+ return err;
+}
+
+VLIB_CLI_COMMAND (test_hash_command, static) = {
+ .path = "test hash",
+ .short_help = "test hash [perf <hash-name>] [buffers <n>] [rounds <n>] "
+ "[warmup-rounds <n>]",
+ .function = test_hash_command_fn,
+};
+
+static clib_error_t *
+hash_test_init (vlib_main_t *vm)
+{
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (hash_test_init);
diff --git a/src/plugins/unittest/interface_test.c b/src/plugins/unittest/interface_test.c
index 4cf5ae43b3c..b5d5b6f776e 100644
--- a/src/plugins/unittest/interface_test.c
+++ b/src/plugins/unittest/interface_test.c
@@ -57,14 +57,12 @@ test_interface_command_fn (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_interface_command, static) =
{
.path = "test interface link-state",
.short_help = "test interface link-state <interface> [up] [down]",
.function = test_interface_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/ip_psh_cksum_test.c b/src/plugins/unittest/ip_psh_cksum_test.c
new file mode 100644
index 00000000000..7a0e1c3a1f4
--- /dev/null
+++ b/src/plugins/unittest/ip_psh_cksum_test.c
@@ -0,0 +1,266 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/time.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/error.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip_psh_cksum.h>
+
+static_always_inline void
+compute_ip_phc (void *p)
+{
+ if ((((u8 *) p)[0] & 0xf0) == 0x40)
+ ip4_pseudo_header_cksum (p);
+ else if ((((u8 *) p)[0] & 0xf0) == 0x60)
+ ip6_pseudo_header_cksum (p);
+}
+
+void
+compute_ip_phc_func (void **p, u32 n_packets)
+{
+ u32 n_left_from = n_packets;
+
+ while (n_left_from >= 8)
+ {
+ clib_prefetch_load (p[4]);
+ clib_prefetch_load (p[5]);
+ clib_prefetch_load (p[6]);
+ clib_prefetch_load (p[7]);
+
+ compute_ip_phc (p[0]);
+ compute_ip_phc (p[1]);
+ compute_ip_phc (p[2]);
+ compute_ip_phc (p[3]);
+
+ n_left_from -= 4;
+ p += 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ compute_ip_phc (p[0]);
+
+ n_left_from -= 1;
+ p += 1;
+ }
+}
+
+typedef struct _phc_test_data
+{
+ const char *name;
+ const char *description;
+ u8 *data;
+ u32 data_size;
+ struct _phc_test_data *next;
+} phc_test_data_t;
+
+typedef struct
+{
+ int verbose;
+
+ char *phc_name;
+ u32 warmup_rounds;
+ u32 rounds;
+ u32 n_buffers;
+ u32 buffer_size;
+ phc_test_data_t *phc_test_data;
+} phc_test_main_t;
+
+phc_test_main_t phc_test_main;
+
+#define PHC_TEST_REGISTER_DATA(x, ...) \
+ __VA_ARGS__ phc_test_data_t __phc_test_data_##x; \
+ static void __clib_constructor __phc_test_data_fn_##x (void) \
+ { \
+ phc_test_main_t *ptm = &phc_test_main; \
+ __phc_test_data_##x.next = ptm->phc_test_data; \
+ ptm->phc_test_data = &__phc_test_data_##x; \
+ } \
+ __VA_ARGS__ phc_test_data_t __phc_test_data_##x
+
+// ipv4
+u8 phc_ipv4_tcp_data[50] = {
+ 0x45, 0x00, 0x05, 0xdc, 0xdb, 0x42, 0x40, 0x00, 0x40, 0x06, 0xc4, 0x85, 0xc0,
+ 0xa8, 0x0a, 0x02, 0xc0, 0xa8, 0x0a, 0x01, 0xd8, 0xde, 0x14, 0x51, 0x34, 0x93,
+ 0xa8, 0x1b, 0x7b, 0xef, 0x2e, 0x7e, 0x80, 0x10, 0x00, 0xe5, 0xc7, 0x03, 0x00,
+ 0x00, 0x01, 0x01, 0x08, 0x0a, 0xce, 0xaa, 0x00, 0x2f, 0xf2, 0xc3
+};
+
+PHC_TEST_REGISTER_DATA (ipv4_tcp, static) = {
+ .name = "ipv4-tcp",
+ .description = "IPv4 TCP",
+ .data = phc_ipv4_tcp_data,
+ .data_size = sizeof (phc_ipv4_tcp_data),
+};
+
+// ip6
+u8 phc_ipv6_udp_data[65] = {
+ 0x60, 0x0d, 0xf4, 0x97, 0x00, 0x40, 0x3a, 0x40, 0xfd, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0xfd, 0x01,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
+ 0x01, 0x80, 0x00, 0x10, 0x84, 0xb1, 0x25, 0x00, 0x01, 0x22, 0x57, 0xf0, 0x60,
+ 0x00, 0x00, 0x00, 0x00, 0xcb, 0x4a, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
+};
+
+PHC_TEST_REGISTER_DATA (ipv6_udp, static) = {
+ .name = "ipv6-udp",
+ .description = "IPv6 UDP",
+ .data = phc_ipv6_udp_data,
+ .data_size = sizeof (phc_ipv6_udp_data),
+};
+
+static void
+fill_buffers (vlib_main_t *vm, u32 *buffer_indices, u8 *data, u32 data_size,
+ u32 n_buffers, u32 buffer_size)
+{
+ int i, j;
+ u64 seed = clib_cpu_time_now ();
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
+ clib_memcpy_fast (b->data, data, data_size);
+ b->current_data = 0;
+ for (j = data_size; j < buffer_size; j += 8)
+ *(u64 *) (b->data + j) = 1 + random_u64 (&seed);
+ b->current_length = buffer_size;
+ }
+}
+
+static clib_error_t *
+test_phc_perf (vlib_main_t *vm, phc_test_main_t *ptm)
+{
+ clib_error_t *err = 0;
+ u32 buffer_size = vlib_buffer_get_default_data_size (vm);
+ u32 n_buffers, n_alloc = 0, warmup_rounds, rounds;
+ u32 *buffer_indices = 0;
+ u64 t0[5], t1[5];
+ phc_test_data_t *phc_test_data = ptm->phc_test_data;
+ void **p = 0;
+ int i, j;
+
+ if (ptm->buffer_size > buffer_size)
+ return clib_error_return (0, "buffer size must be <= %u", buffer_size);
+
+ rounds = ptm->rounds ? ptm->rounds : 100;
+ n_buffers = ptm->n_buffers ? ptm->n_buffers : 256;
+ warmup_rounds = ptm->warmup_rounds ? ptm->warmup_rounds : 100;
+ buffer_size = ptm->buffer_size ? ptm->buffer_size : buffer_size;
+
+ vec_validate_aligned (p, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (buffer_indices, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
+ n_alloc = vlib_buffer_alloc (vm, buffer_indices, n_buffers);
+ if (n_alloc != n_buffers)
+ {
+ err = clib_error_return (0, "buffer alloc failure");
+ goto done;
+ }
+
+ vlib_cli_output (
+ vm,
+ "pseudo header checksum: buffer-size %u, n_buffers %u rounds %u "
+ "warmup-rounds %u",
+ buffer_size, n_buffers, rounds, warmup_rounds);
+ vlib_cli_output (vm, " cpu-freq %.2f GHz",
+ (f64) vm->clib_time.clocks_per_second * 1e-9);
+
+ while (phc_test_data)
+ {
+ fill_buffers (vm, buffer_indices, phc_test_data->data,
+ phc_test_data->data_size, n_buffers, buffer_size);
+
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
+ p[i] = vlib_buffer_get_current (b);
+ }
+
+ for (i = 0; i < 5; i++)
+ {
+ for (j = 0; j < warmup_rounds; j++)
+ {
+ compute_ip_phc_func (p, n_buffers);
+ }
+
+ t0[i] = clib_cpu_time_now ();
+ for (j = 0; j < rounds; j++)
+ compute_ip_phc_func (p, n_buffers);
+ t1[i] = clib_cpu_time_now ();
+ }
+
+ vlib_cli_output (
+ vm, "===========================================================");
+ vlib_cli_output (vm, " Test: %s", phc_test_data->description);
+ vlib_cli_output (
+ vm, "===========================================================");
+ for (i = 0; i < 5; i++)
+ {
+ f64 tpp1 = (f64) (t1[i] - t0[i]) / (n_buffers * rounds);
+ f64 Mpps1 = vm->clib_time.clocks_per_second * 1e-6 / tpp1;
+
+ vlib_cli_output (vm, "%-2u: %.03f ticks/packet, %.02f Mpps\n", i + 1,
+ tpp1, Mpps1);
+ }
+ phc_test_data = phc_test_data->next;
+ }
+
+done:
+ if (n_alloc)
+ vlib_buffer_free (vm, buffer_indices, n_alloc);
+
+ vec_free (p);
+ vec_free (buffer_indices);
+ return err;
+}
+
+static clib_error_t *
+test_phc_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ phc_test_main_t *ptm = &phc_test_main;
+ clib_error_t *err = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ ptm->verbose = 1;
+ else if (unformat (input, "detail"))
+ ptm->verbose = 2;
+ else if (unformat (input, "buffers %u", &ptm->n_buffers))
+ ;
+ else if (unformat (input, "buffer-size %u", &ptm->buffer_size))
+ ;
+ else if (unformat (input, "rounds %u", &ptm->rounds))
+ ;
+ else if (unformat (input, "warmup-rounds %u", &ptm->warmup_rounds))
+ ;
+ else
+ {
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+ }
+
+ test_phc_perf (vm, ptm);
+
+ return err;
+}
+
+VLIB_CLI_COMMAND (test_phc_command, static) = {
+ .path = "test phc",
+ .short_help = "test phc [buffers <n>] [buffer-size <size>] [rounds <n>] "
+ "[warmup-rounds <n>]",
+ .function = test_phc_command_fn,
+};
+
+static clib_error_t *
+phc_test_init (vlib_main_t *vm)
+{
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (phc_test_init);
diff --git a/src/plugins/unittest/ipsec_test.c b/src/plugins/unittest/ipsec_test.c
index 0e9865052b4..98253eeb12a 100644
--- a/src/plugins/unittest/ipsec_test.c
+++ b/src/plugins/unittest/ipsec_test.c
@@ -15,10 +15,11 @@
#include <vnet/ipsec/ipsec.h>
#include <vnet/ipsec/ipsec_sa.h>
+#include <vnet/ipsec/ipsec_output.h>
static clib_error_t *
-test_ipsec_command_fn (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
+test_ipsec_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
u64 seq_num;
u32 sa_id;
@@ -47,25 +48,336 @@ test_ipsec_command_fn (vlib_main_t * vm,
sa->seq = seq_num & 0xffffffff;
sa->seq_hi = seq_num >> 32;
+ /* clear the window */
+ if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
+ clib_bitmap_zero (sa->replay_window_huge);
+ else
+ sa->replay_window = 0;
+
ipsec_sa_unlock (sa_index);
}
else
{
- return clib_error_return (0, "unknown SA `%U'",
- format_unformat_error, input);
+ return clib_error_return (0, "unknown SA `%U'", format_unformat_error,
+ input);
}
return (NULL);
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (test_ipsec_command, static) =
+static clib_error_t *
+test_ipsec_spd_outbound_perf_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
+ clib_error_t *err = 0;
+ ipsec_crypto_alg_t crypto_alg = IPSEC_CRYPTO_ALG_AES_GCM_128;
+ ipsec_integ_alg_t integ_alg = IPSEC_INTEG_ALG_NONE;
+ ipsec_protocol_t proto = IPSEC_PROTOCOL_ESP;
+ ipsec_sa_flags_t sa_flags = IPSEC_SA_FLAG_NONE;
+ ipsec_key_t ck = { 0 };
+ u8 key_data[] = { 31, 32, 33, 34, 35, 36, 37, 38,
+ 39, 30, 31, 32, 33, 34, 35, 36 };
+ ipsec_mk_key (&ck, key_data, 16);
+ ipsec_key_t ik = { 0 };
+ u32 sa_id = 123456, spi = 654321, salt = 1234, sai;
+ u16 udp_src = IPSEC_UDP_PORT_NONE, udp_dst = IPSEC_UDP_PORT_NONE;
+ tunnel_t tun = {};
+
+ /* SPD policy */
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p0 = NULL;
+ ipsec_spd_t *spd0;
+ uword *pp;
+ u32 stat_index, spd_idx, spd_id = 1;
+ int is_add = 1;
+ int rv;
+ ipsec_policy_t *p_vec = NULL;
+ u64 i;
+ u64 flows = 100;
+
+ u64 t_add_0 = 0;
+ u64 t_add_1 = 0;
+ u64 t_add = 0;
+ u64 t_look_0 = 0;
+ u64 t_look_1 = 0;
+ u64 t_look = 0;
+ u8 flow_cache_enabled = im->output_flow_cache_flag;
+ u32 count_cached = 0;
+ u32 count_slow_path = 0;
+ u32 seed = random_default_seed ();
+ u32 *rand_val = NULL;
+ u32 ip4_start;
+#define BURST_MAX_SIZE 256
+ ipsec_policy_t *policies[BURST_MAX_SIZE];
+ ipsec4_spd_5tuple_t ip4_5tuples[BURST_MAX_SIZE];
+ u32 burst_size = 10;
+ int burst_enabled = 0;
+ u64 t0 = clib_cpu_time_now ();
+ u64 t1 = 0;
+ u32 k = 0, m;
+ u64 burst_counter = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "flows %d", &flows))
+ ;
+ else if (unformat (input, "burst %d", &burst_size))
+ {
+ if (burst_size == 0)
+ burst_enabled = 0;
+ else
+ {
+ burst_enabled = 1;
+ burst_size = clib_min (burst_size, BURST_MAX_SIZE);
+ }
+ }
+ else
+ break;
+ }
+
+ vlib_cli_output (vm, "Create env:");
+ /* creating a new SA */
+ rv = ipsec_sa_add_and_lock (sa_id, spi, proto, crypto_alg, &ck, integ_alg,
+ &ik, sa_flags, clib_host_to_net_u32 (salt),
+ udp_src, udp_dst, 0, &tun, &sai);
+ if (rv)
+ {
+ err = clib_error_return (0, "create sa failure");
+ goto done;
+ }
+ else
+ vlib_cli_output (vm, "\tAdd a new SA");
+
+ /* creating a new SPD */
+ rv = ipsec_add_del_spd (vm, spd_id, is_add);
+ if (rv)
+ {
+ err = clib_error_return (0, "create spd failure");
+ goto done;
+ }
+ else
+ vlib_cli_output (vm, "\tAdd a new SPD");
+
+ /* vector for spd_policy */
+ vec_validate (p_vec, flows + 1);
+ vec_validate (rand_val, flows + 1);
+
+ /* fill spd policy */
+ for (i = 0; i < flows; i++)
+ {
+ rand_val[i] = random_u32 (&seed) % flows;
+
+ p_vec[i].type = IPSEC_SPD_POLICY_IP4_OUTBOUND;
+ p_vec[i].priority = flows - i;
+ p_vec[i].policy = IPSEC_POLICY_ACTION_PROTECT;
+ p_vec[i].id = spd_id;
+ p_vec[i].sa_id = sa_id;
+ p_vec[i].protocol = IP_PROTOCOL_UDP;
+ p_vec[i].lport.start = 1;
+ p_vec[i].lport.stop = 1;
+ p_vec[i].rport.start = 1;
+ p_vec[i].rport.stop = 1;
+ /* address: 1.0.0.0 as u32 */
+ ip4_start = 16777216;
+ p_vec[i].laddr.start.ip4.data_u32 =
+ clib_host_to_net_u32 (ip4_start + i * 32);
+ p_vec[i].laddr.stop.ip4.data_u32 =
+ clib_host_to_net_u32 (ip4_start + i * 32);
+ p_vec[i].raddr.start.ip4.data_u32 =
+ clib_host_to_net_u32 (ip4_start + i * 32);
+ p_vec[i].raddr.stop.ip4.data_u32 =
+ clib_host_to_net_u32 (ip4_start + i * 32);
+ }
+
+ vlib_cli_output (vm, "Add SPD Policy");
+ t_add_0 = clib_cpu_time_now ();
+ for (i = 0; i < flows; i++)
+ {
+ rv = ipsec_add_del_policy (vm, &p_vec[i], is_add, &stat_index);
+ if (rv)
+ {
+ clib_warning ("No add SPD Policy: %u", stat_index);
+ err = clib_error_return (0, "add SPD Policy failure");
+ goto done;
+ }
+ }
+ t_add_1 = clib_cpu_time_now ();
+
+ pp = hash_get (im->spd_index_by_spd_id, spd_id);
+ spd_idx = pp[0];
+ spd0 = pool_elt_at_index (im->spds, spd_idx);
+
+ vlib_cli_output (vm, "Lookup SPD Policy");
+ u64 j = 0;
+ u64 n_lookup = 1000 * 1000;
+ t_look_0 = clib_cpu_time_now ();
+ for (i = 0; i < n_lookup; i++)
+ {
+ if (flows == j)
+ j = 0;
+
+ p0 = NULL;
+ if (flow_cache_enabled)
+ {
+ p0 = ipsec4_out_spd_find_flow_cache_entry (
+ im, 0,
+ clib_net_to_host_u32 (ip4_start +
+ ((flows - 1) - rand_val[j]) * 32),
+ clib_net_to_host_u32 (ip4_start +
+ ((flows - 1) - rand_val[j]) * 32),
+ clib_net_to_host_u16 (1), clib_net_to_host_u16 (1));
+ if (p0)
+ count_cached++;
+ }
+ if (p0 == NULL)
+ {
+ if (burst_enabled)
+ {
+ u32 src_addr = (ip4_start + ((flows - 1) - rand_val[j]) * 32);
+ u32 dst_addr = (ip4_start + ((flows - 1) - rand_val[j]) * 32);
+ ipsec4_spd_5tuple_t ip4_5tuple = {
+ .ip4_addr = { (ip4_address_t) src_addr,
+ (ip4_address_t) dst_addr },
+ .port = { 1, 1 },
+ .proto = IP_PROTOCOL_UDP
+ };
+
+ if (k == burst_size)
+ {
+ k = 0;
+ clib_memset (policies, 0,
+ burst_size * sizeof (ipsec_policy_t *));
+ burst_counter += ipsec_output_policy_match_n (
+ spd0, ip4_5tuples, policies, burst_size,
+ flow_cache_enabled);
+ for (m = 0; m < burst_size; m++)
+ {
+ ASSERT (policies[m] != 0);
+ }
+ }
+
+ clib_memcpy (ip4_5tuples + k, &ip4_5tuple,
+ sizeof (ipsec4_spd_5tuple_t));
+ k++;
+ }
+ else
+ {
+
+ p0 = ipsec_output_policy_match (
+ spd0, IP_PROTOCOL_UDP,
+ (ip4_start + ((flows - 1) - rand_val[j]) * 32),
+ (ip4_start + ((flows - 1) - rand_val[j]) * 32), 1, 1,
+ flow_cache_enabled);
+ }
+
+ count_slow_path++;
+ }
+ j++;
+ if (!burst_enabled)
+ ASSERT (p0 != 0);
+ }
+
+ if (burst_enabled && k > 0)
+ {
+ clib_memset (policies, 0, k * sizeof (ipsec_policy_t *));
+ burst_counter += ipsec_output_policy_match_n (
+ spd0, ip4_5tuples, policies, k, flow_cache_enabled);
+ for (m = 0; m < k; m++)
+ {
+ ASSERT (policies[m] != 0);
+ }
+ }
+ t_look_1 = clib_cpu_time_now ();
+
+ t_add = (t_add_1 - t_add_0);
+ t_look = (t_look_1 - t_look_0);
+
+ vlib_cli_output (vm, "Results Outbound:");
+ vlib_cli_output (vm, "Time to add %u flows: \t\t%12.10f s", flows,
+ (t_add / vm->clib_time.clocks_per_second));
+ vlib_cli_output (vm, "Average time to add 1 flow: \t\t%12.10f s",
+ ((t_add / flows) / vm->clib_time.clocks_per_second));
+ vlib_cli_output (vm, "Time to lookup %u flows: \t\t%12.10f s", flows,
+ (t_look / vm->clib_time.clocks_per_second));
+ vlib_cli_output (vm, "Average time to lookup 1 flow: \t\t%12.10f s",
+ ((t_look / n_lookup) / vm->clib_time.clocks_per_second));
+
+ vlib_cli_output (vm, " ");
+
+ vlib_cli_output (vm, "Cycle CPU to add %u flows: \t\t%32lu cycles", flows,
+ t_add);
+ vlib_cli_output (vm, "Average cycle CPU to add 1 flow: \t%32lu cycles",
+ t_add / flows);
+ vlib_cli_output (vm, "Cycle CPU to lookup %u flows: \t%32lu cycles", flows,
+ t_look);
+ vlib_cli_output (vm, "Average cycle CPU to lookup 1 flow: \t%32lu cycles",
+ t_look / n_lookup);
+
+ if (count_slow_path || count_cached)
+ vlib_cli_output (
+ vm, "flow cache hit rate: \t\t%12.10f\n cached: \t%d\n slow_path: \t%d",
+ ((float) count_cached) / ((float) count_cached + count_slow_path),
+ count_cached, count_slow_path);
+
+ if (burst_enabled)
+ vlib_cli_output (vm, "Total number of packets matched in bursts: \t\t%d\n",
+ burst_counter);
+
+done:
+ vlib_cli_output (vm, "Cleaning:");
+ /* delete SPD policy */
+ is_add = 0;
+ for (i = 0; i < flows; i++)
+ {
+ rv = ipsec_add_del_policy (vm, &p_vec[i], is_add, &stat_index);
+ if (rv)
+ {
+ clib_warning ("No delete SPD Policy: %u", i);
+ err = clib_error_return (0, "delete SPD Policy failure");
+ }
+ }
+ vlib_cli_output (vm, "\tDelete all SPD Policy");
+
+ /* delete SPD */
+ rv = ipsec_add_del_spd (vm, spd_id, is_add);
+ if (rv)
+ {
+ err = clib_error_return (0, "delete spd failure");
+ }
+ else
+ vlib_cli_output (vm, "\tDelete SPD");
+
+ /* delete SA */
+ rv = ipsec_sa_unlock_id (sa_id);
+ if (rv)
+ {
+ err = clib_error_return (0, "delete sa failure");
+ }
+ else
+ vlib_cli_output (vm, "\tDelete SA");
+
+ t1 = clib_cpu_time_now ();
+ vlib_cli_output (vm, "Time for test: \t%12.10f s",
+ ((t1 - t0) / vm->clib_time.clocks_per_second));
+
+ vec_free (p_vec);
+ vlib_cli_output (vm, "End");
+
+ return (err);
+}
+
+VLIB_CLI_COMMAND (test_ipsec_spd_perf_command, static) = {
+ .path = "test ipsec_spd_outbound_perf",
+ .short_help = "test ipsec_spd_outbound_perf flows <n_flows>",
+ .function = test_ipsec_spd_outbound_perf_command_fn,
+};
+
+VLIB_CLI_COMMAND (test_ipsec_command, static) = {
.path = "test ipsec",
.short_help = "test ipsec sa <ID> seq-num <VALUE>",
.function = test_ipsec_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/llist_test.c b/src/plugins/unittest/llist_test.c
index a67075de44e..5a712cde33e 100644
--- a/src/plugins/unittest/llist_test.c
+++ b/src/plugins/unittest/llist_test.c
@@ -132,13 +132,11 @@ llist_test_basic (vlib_main_t * vm, unformat_input_t * input)
list_test_is_sane (pelts, ll_test, he);
i--;
- /* *INDENT-OFF* */
clib_llist_foreach (pelts, ll_test, he, e, ({
if (i != e->data)
LLIST_TEST (0, "incorrect element i = %u data = %u", i, e->data);
i--;
}));
- /* *INDENT-ON* */
LLIST_TEST (i == -1, "head insertion works i = %d", i);
@@ -180,13 +178,11 @@ llist_test_basic (vlib_main_t * vm, unformat_input_t * input)
"list should not be empty");
i--;
- /* *INDENT-OFF* */
clib_llist_foreach_reverse (pelts, ll_test2, he2, e, ({
if (i != e->data)
LLIST_TEST (0, "incorrect element i = %u data = %u", i, e->data);
i--;
}));
- /* *INDENT-ON* */
LLIST_TEST (i == -1, "tail insertion works");
/*
@@ -217,13 +213,11 @@ llist_test_basic (vlib_main_t * vm, unformat_input_t * input)
i = 0;
- /* *INDENT-OFF* */
clib_llist_foreach (pelts, ll_test, he, e, ({
if (i != e->data)
LLIST_TEST (0, "incorrect element i = %u data = %u", i, e->data);
i++;
}));
- /* *INDENT-ON* */
LLIST_TEST (i == 100, "move from ll_test2 to ll_test worked i %u", i);
@@ -335,14 +329,12 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (llist_test_command, static) =
{
.path = "test llist",
.short_help = "internal llist unit tests",
.function = llist_test,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/mactime_test.c b/src/plugins/unittest/mactime_test.c
index 46d6263b938..5bc195c9694 100644
--- a/src/plugins/unittest/mactime_test.c
+++ b/src/plugins/unittest/mactime_test.c
@@ -165,14 +165,12 @@ test_time_range_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_time_range_command, static) =
{
.path = "test time-range",
.short_help = "test time-range",
.function = test_time_range_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/mfib_test.c b/src/plugins/unittest/mfib_test.c
index c456d59d7db..5cf821fb5f9 100644
--- a/src/plugins/unittest/mfib_test.c
+++ b/src/plugins/unittest/mfib_test.c
@@ -123,40 +123,31 @@ mfib_test_mk_intf (u32 ninterfaces)
for (i = 0; i < ninterfaces; i++)
{
- hw_address[5] = i;
-
- error = ethernet_register_interface(vnet_get_main(),
- test_interface_device_class.index,
- i /* instance */,
- hw_address,
- &tm->hw_if_indicies[i],
- /* flag change */ 0);
-
- MFIB_TEST((NULL == error), "ADD interface %d", i);
-
- error = vnet_hw_interface_set_flags(vnet_get_main(),
- tm->hw_if_indicies[i],
- VNET_HW_INTERFACE_FLAG_LINK_UP);
- tm->hw[i] = vnet_get_hw_interface(vnet_get_main(),
- tm->hw_if_indicies[i]);
- vec_validate (ip4_main.fib_index_by_sw_if_index,
- tm->hw[i]->sw_if_index);
- vec_validate (ip6_main.fib_index_by_sw_if_index,
- tm->hw[i]->sw_if_index);
- ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
- ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
-
- vec_validate (ip4_main.mfib_index_by_sw_if_index,
- tm->hw[i]->sw_if_index);
- vec_validate (ip6_main.mfib_index_by_sw_if_index,
- tm->hw[i]->sw_if_index);
- ip4_main.mfib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
- ip6_main.mfib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
-
- error = vnet_sw_interface_set_flags(vnet_get_main(),
- tm->hw[i]->sw_if_index,
- VNET_SW_INTERFACE_FLAG_ADMIN_UP);
- MFIB_TEST((NULL == error), "UP interface %d", i);
+ vnet_eth_interface_registration_t eir = {};
+ vnet_main_t *vnm = vnet_get_main ();
+
+ hw_address[5] = i;
+
+ eir.dev_class_index = test_interface_device_class.index;
+ eir.dev_instance = i;
+ eir.address = hw_address;
+ tm->hw_if_indicies[i] = vnet_eth_register_interface (vnm, &eir);
+
+ error =
+ vnet_hw_interface_set_flags (vnet_get_main (), tm->hw_if_indicies[i],
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ tm->hw[i] =
+ vnet_get_hw_interface (vnet_get_main (), tm->hw_if_indicies[i]);
+ ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+ ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+
+ ip4_main.mfib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+ ip6_main.mfib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+
+ error =
+ vnet_sw_interface_set_flags (vnet_get_main (), tm->hw[i]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ MFIB_TEST ((NULL == error), "UP interface %d", i);
}
/*
* re-eval after the inevitable realloc
@@ -413,10 +404,8 @@ mfib_test_i (fib_protocol_t PROTO,
.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT,
};
- mfib_table_entry_path_update(fib_index,
- pfx_no_forward,
- MFIB_SOURCE_API,
- &path_via_if0);
+ mfib_table_entry_path_update (fib_index, pfx_no_forward, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if0);
mfei_no_f = mfib_table_lookup_exact_match(fib_index, pfx_no_forward);
MFIB_TEST(!mfib_test_entry(mfei_no_f,
@@ -464,18 +453,12 @@ mfib_test_i (fib_protocol_t PROTO,
/*
* An (S,G) with 1 accepting and 3 forwarding paths
*/
- mfib_table_entry_path_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- &path_via_if0);
- mfib_table_entry_path_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- &path_via_if1);
- mfib_table_entry_paths_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- two_paths);
+ mfib_table_entry_path_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if0);
+ mfib_table_entry_path_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if1);
+ mfib_table_entry_paths_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, two_paths);
mfei_s_g = mfib_table_lookup_exact_match(fib_index, pfx_s_g);
@@ -504,14 +487,11 @@ mfib_test_i (fib_protocol_t PROTO,
* A (*,G), which the same G as the (S,G).
* different paths. test our LPM.
*/
- mfei_g_1 = mfib_table_entry_path_update(fib_index,
- pfx_star_g_1,
- MFIB_SOURCE_API,
- &path_via_if0);
- mfib_table_entry_path_update(fib_index,
- pfx_star_g_1,
- MFIB_SOURCE_API,
- &path_via_if1);
+ mfei_g_1 =
+ mfib_table_entry_path_update (fib_index, pfx_star_g_1, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if0);
+ mfib_table_entry_path_update (fib_index, pfx_star_g_1, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if1);
/*
* test we find the *,G and S,G via LPM and exact matches
@@ -574,14 +554,12 @@ mfib_test_i (fib_protocol_t PROTO,
* different paths. test our LPM.
*/
path_via_if2.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
- mfei_g_m = mfib_table_entry_path_update(fib_index,
- pfx_star_g_slash_m,
- MFIB_SOURCE_API,
- &path_via_if2);
- mfib_table_entry_path_update(fib_index,
- pfx_star_g_slash_m,
- MFIB_SOURCE_API,
- &path_via_if3);
+ mfei_g_m = mfib_table_entry_path_update (
+ fib_index, pfx_star_g_slash_m, MFIB_SOURCE_API, MFIB_ENTRY_FLAG_NONE,
+ &path_via_if2);
+ mfib_table_entry_path_update (fib_index, pfx_star_g_slash_m,
+ MFIB_SOURCE_API, MFIB_ENTRY_FLAG_NONE,
+ &path_via_if3);
/*
* test we find the (*,G/m), (*,G) and (S,G) via LPM and exact matches
@@ -655,10 +633,8 @@ mfib_test_i (fib_protocol_t PROTO,
.frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
};
- mfei = mfib_table_entry_path_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- &path_for_us);
+ mfei = mfib_table_entry_path_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_for_us);
MFIB_TEST(!mfib_test_entry(mfei,
MFIB_ENTRY_FLAG_NONE,
@@ -692,10 +668,8 @@ mfib_test_i (fib_protocol_t PROTO,
* - expect it to be removed from the replication set.
*/
path_via_if3.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
- mfib_table_entry_path_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- &path_via_if3);
+ mfib_table_entry_path_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if3);
MFIB_TEST(!mfib_test_entry(mfei,
MFIB_ENTRY_FLAG_NONE,
@@ -719,10 +693,8 @@ mfib_test_i (fib_protocol_t PROTO,
path_via_if3.frp_mitf_flags = (MFIB_ITF_FLAG_FORWARD |
MFIB_ITF_FLAG_ACCEPT |
MFIB_ITF_FLAG_NEGATE_SIGNAL);
- mfib_table_entry_path_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- &path_via_if3);
+ mfib_table_entry_path_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if3);
mfei = mfib_table_lookup_exact_match(fib_index,
pfx_s_g);
@@ -824,7 +796,7 @@ mfib_test_i (fib_protocol_t PROTO,
/* MFIB_TEST_NS(!mfib_test_entry_no_itf(mfei, tm->hw[3]->sw_if_index)); */
/*
- * remove the last path and the accpeting only interface,
+ * remove the last path and the accepting only interface,
* the entry still has flags so it remains
*/
vec_reset_length(two_paths);
@@ -863,10 +835,9 @@ mfib_test_i (fib_protocol_t PROTO,
*/
path_via_if0.frp_mitf_flags = (MFIB_ITF_FLAG_ACCEPT |
MFIB_ITF_FLAG_NEGATE_SIGNAL);
- mfei_g_2 = mfib_table_entry_path_update(fib_index,
- pfx_star_g_2,
- MFIB_SOURCE_API,
- &path_via_if0);
+ mfei_g_2 =
+ mfib_table_entry_path_update (fib_index, pfx_star_g_2, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if0);
MFIB_TEST(!mfib_test_entry(mfei_g_2,
MFIB_ENTRY_FLAG_NONE,
0),
@@ -891,10 +862,9 @@ mfib_test_i (fib_protocol_t PROTO,
*/
path_via_if0.frp_mitf_flags = (MFIB_ITF_FLAG_ACCEPT |
MFIB_ITF_FLAG_NEGATE_SIGNAL);
- mfei_g_3 = mfib_table_entry_path_update(fib_index,
- pfx_star_g_3,
- MFIB_SOURCE_API,
- &path_via_if0);
+ mfei_g_3 =
+ mfib_table_entry_path_update (fib_index, pfx_star_g_3, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if0);
MFIB_TEST(!mfib_test_entry(mfei_g_3,
MFIB_ENTRY_FLAG_NONE,
0),
@@ -1073,14 +1043,12 @@ mfib_test_i (fib_protocol_t PROTO,
.frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
};
- mfei_g_1 = mfib_table_entry_path_update(fib_index,
- pfx_star_g_1,
- MFIB_SOURCE_API,
- &path_via_nbr1);
- mfei_g_1 = mfib_table_entry_path_update(fib_index,
- pfx_star_g_1,
- MFIB_SOURCE_API,
- &path_via_nbr2);
+ mfei_g_1 =
+ mfib_table_entry_path_update (fib_index, pfx_star_g_1, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_nbr1);
+ mfei_g_1 =
+ mfib_table_entry_path_update (fib_index, pfx_star_g_1, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_nbr2);
MFIB_TEST(!mfib_test_entry(mfei_g_1,
MFIB_ENTRY_FLAG_NONE,
2,
@@ -1201,9 +1169,7 @@ mfib_test_i (fib_protocol_t PROTO,
* MPLS enable an interface so we get the MPLS table created
*/
mpls_table_create(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API, NULL);
- mpls_sw_interface_enable_disable(&mpls_main,
- tm->hw[0]->sw_if_index,
- 1, 0);
+ mpls_sw_interface_enable_disable (&mpls_main, tm->hw[0]->sw_if_index, 1);
lfei = fib_table_entry_update_one_path(0, // default MPLS Table
&pfx_3500,
@@ -1241,10 +1207,8 @@ mfib_test_i (fib_protocol_t PROTO,
FIB_FORW_CHAIN_TYPE_MPLS_EOS,
&mldp_dpo);
- mfei = mfib_table_entry_path_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- &path_via_mldp);
+ mfei = mfib_table_entry_path_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_mldp);
MFIB_TEST(!mfib_test_entry(mfei,
MFIB_ENTRY_FLAG_NONE,
@@ -1256,10 +1220,8 @@ mfib_test_i (fib_protocol_t PROTO,
/*
* add a for-us path. this tests two types of non-attached paths on one entry
*/
- mfei = mfib_table_entry_path_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- &path_for_us);
+ mfei = mfib_table_entry_path_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_for_us);
MFIB_TEST(!mfib_test_entry(mfei,
MFIB_ENTRY_FLAG_NONE,
2,
@@ -1299,9 +1261,7 @@ mfib_test_i (fib_protocol_t PROTO,
/*
* MPLS disable the interface
*/
- mpls_sw_interface_enable_disable(&mpls_main,
- tm->hw[0]->sw_if_index,
- 0, 0);
+ mpls_sw_interface_enable_disable (&mpls_main, tm->hw[0]->sw_if_index, 0);
mpls_table_delete(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API);
/*
@@ -1585,10 +1545,8 @@ mfib_test_rr_i (fib_protocol_t FPROTO,
/*
* Insert the less specific /28
*/
- mfib_table_entry_path_update(fib_index,
- pfx_cover,
- MFIB_SOURCE_API,
- &path_via_if1);
+ mfib_table_entry_path_update (fib_index, pfx_cover, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if1);
mfei_cover = mfib_table_lookup_exact_match(fib_index, pfx_cover);
@@ -1612,10 +1570,8 @@ mfib_test_rr_i (fib_protocol_t FPROTO,
/*
* add another path to the cover
*/
- mfib_table_entry_path_update(fib_index,
- pfx_cover,
- MFIB_SOURCE_API,
- &path_via_if2);
+ mfib_table_entry_path_update (fib_index, pfx_cover, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if2);
/*
* expect the /32 and /28 to be via both boths
@@ -1669,10 +1625,8 @@ mfib_test_rr_i (fib_protocol_t FPROTO,
/*
* add an accepting path to the cover
*/
- mfib_table_entry_path_update(fib_index,
- pfx_cover,
- MFIB_SOURCE_API,
- &path_via_if0);
+ mfib_table_entry_path_update (fib_index, pfx_cover, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if0);
/*
* expect the /32 and /28 to be via both boths
@@ -1707,10 +1661,8 @@ mfib_test_rr_i (fib_protocol_t FPROTO,
/*
* add a for-us path to the cover
*/
- mfib_table_entry_path_update(fib_index,
- pfx_cover,
- MFIB_SOURCE_API,
- &path_for_us);
+ mfib_table_entry_path_update (fib_index, pfx_cover, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_for_us);
/*
* expect the /32 and /28 to be via all three paths
@@ -1778,10 +1730,9 @@ mfib_test_rr_i (fib_protocol_t FPROTO,
/*
* source the /32 with its own path
*/
- mfei_host1 = mfib_table_entry_path_update(fib_index,
- pfx_host1,
- MFIB_SOURCE_API,
- &path_via_if2);
+ mfei_host1 =
+ mfib_table_entry_path_update (fib_index, pfx_host1, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if2);
MFIB_TEST(!mfib_test_entry(mfei_host1,
MFIB_ENTRY_FLAG_NONE,
1,
@@ -1811,10 +1762,9 @@ mfib_test_rr_i (fib_protocol_t FPROTO,
/*
* add the RR back then remove the path and RR
*/
- mfei_host1 = mfib_table_entry_path_update(fib_index,
- pfx_host1,
- MFIB_SOURCE_API,
- &path_via_if2);
+ mfei_host1 =
+ mfib_table_entry_path_update (fib_index, pfx_host1, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if2);
MFIB_TEST(!mfib_test_entry(mfei_host1,
MFIB_ENTRY_FLAG_NONE,
1,
diff --git a/src/plugins/unittest/mpcap_node.c b/src/plugins/unittest/mpcap_node.c
index 083c22e32df..12c62cf13cd 100644
--- a/src/plugins/unittest/mpcap_node.c
+++ b/src/plugins/unittest/mpcap_node.c
@@ -215,7 +215,6 @@ VLIB_NODE_FN (mpcap_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
0 /* is_trace */ );
}
-/* *INDENT-OFF* */
#ifndef CLIB_MARCH_VARIANT
VLIB_REGISTER_NODE (mpcap_node) =
{
@@ -249,7 +248,6 @@ mpcap_node_init (vlib_main_t *vm)
VLIB_INIT_FUNCTION (mpcap_node_init);
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/pool_test.c b/src/plugins/unittest/pool_test.c
index 237b6beea09..23ac6d6d95f 100644
--- a/src/plugins/unittest/pool_test.c
+++ b/src/plugins/unittest/pool_test.c
@@ -19,29 +19,37 @@ static clib_error_t *
test_pool_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
- int i;
+ static int sizes[] = { 3, 31, 2042, 2048 };
+
+ int i, j;
u64 *pool;
+ uword this_size;
- pool_init_fixed (pool, 2048);
+ for (j = 0; j < ARRAY_LEN (sizes); j++)
+ {
+ this_size = sizes[j];
- i = 0;
+ pool_init_fixed (pool, this_size);
- while (pool_free_elts (pool) > 0)
- {
- u64 *p __attribute__ ((unused));
+ i = 0;
- pool_get (pool, p);
- i++;
- }
+ while (pool_free_elts (pool) > 0)
+ {
+ u64 *p __attribute__ ((unused));
- vlib_cli_output (vm, "allocated %d elts\n", i);
+ pool_get (pool, p);
+ i++;
+ }
- for (--i; i >= 0; i--)
- {
- pool_put_index (pool, i);
- }
+ vlib_cli_output (vm, "allocated %d elts\n", i);
- ALWAYS_ASSERT (pool_free_elts (pool) == 2048);
+ for (--i; i >= 0; i--)
+ {
+ pool_put_index (pool, i);
+ }
+
+ ALWAYS_ASSERT (pool_free_elts (pool) == this_size);
+ }
vlib_cli_output (vm, "Test succeeded...\n");
return 0;
diff --git a/src/plugins/unittest/punt_test.c b/src/plugins/unittest/punt_test.c
index 0c4622283e0..7d00e5b8920 100644
--- a/src/plugins/unittest/punt_test.c
+++ b/src/plugins/unittest/punt_test.c
@@ -129,7 +129,6 @@ punt_test_pg1_ip6 (vlib_main_t * vm,
return (punt_test_fwd (vm, node, frame, FIB_PROTOCOL_IP6, SW_IF_INDEX_PG1));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (punt_test_pg0_ip4_node) = {
.function = punt_test_pg0_ip4,
.name = "punt-test-pg0-ip4",
@@ -154,7 +153,6 @@ VLIB_REGISTER_NODE (punt_test_pg1_ip6_node) = {
.vector_size = sizeof (u32),
.format_trace = format_punt_trace,
};
-/* *INDENT-ON* */
typedef struct punt_feat_trace_t_
{
@@ -242,7 +240,6 @@ punt_test_feat_ip6 (vlib_main_t * vm,
return (punt_test_feat_inline (vm, node, frame, 0));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (punt_test_feat_ip6_node) = {
.function = punt_test_feat_ip6,
.name = "punt-test-feat-ip6",
@@ -273,7 +270,6 @@ VNET_FEATURE_INIT (punt_test_feat_ip4_feature, static) =
.arc_name = "ip4-unicast",
.node_name = "punt-test-feat-ip4",
};
-/* *INDENT-ON* */
static clib_error_t *
punt_test (vlib_main_t * vm,
@@ -382,14 +378,12 @@ punt_test (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_fib_command, static) =
{
.path = "test punt",
.short_help = "punt unit tests - DO NOT RUN ON A LIVE SYSTEM",
.function = punt_test,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/rbtree_test.c b/src/plugins/unittest/rbtree_test.c
index bfab98c3cd7..4a1fcc4dd70 100644
--- a/src/plugins/unittest/rbtree_test.c
+++ b/src/plugins/unittest/rbtree_test.c
@@ -238,14 +238,12 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (rbtree_test_command, static) =
{
.path = "test rbtree",
.short_help = "internal rbtree unit tests",
.function = rbtree_test,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/segment_manager_test.c b/src/plugins/unittest/segment_manager_test.c
index 31b417aef24..a106470ee48 100644
--- a/src/plugins/unittest/segment_manager_test.c
+++ b/src/plugins/unittest/segment_manager_test.c
@@ -79,7 +79,6 @@ placeholder_server_rx_callback (session_t * s)
return -1;
}
-/* *INDENT-OFF* */
static session_cb_vft_t placeholder_session_cbs = {
.session_reset_callback = placeholder_session_reset_callback,
.session_connected_callback = placeholder_session_connected_callback,
@@ -89,7 +88,6 @@ static session_cb_vft_t placeholder_session_cbs = {
.add_segment_callback = placeholder_add_segment_callback,
.del_segment_callback = placeholder_del_segment_callback,
};
-/* *INDENT-ON* */
static char *states_str[] = {
#define _(sym,str) str,
@@ -178,14 +176,14 @@ segment_manager_test_pressure_1 (vlib_main_t * vm, unformat_input_t * input)
svm_fifo_enqueue (rx_fifo, fifo_size, data);
svm_fifo_enqueue (tx_fifo, fifo_size, data);
svm_fifo_enqueue (tx_fifo, fifo_size, data);
- svm_fifo_enqueue (tx_fifo, fifo_size, data);
- /* 8 chunks : 49% */
+ /* 7 chunks : ~44% */
rv = fifo_segment_get_mem_status (fs);
SEG_MGR_TEST ((rv == MEMORY_PRESSURE_NO_PRESSURE),
"fifo_segment_get_mem_status %s", states_str[rv]);
/* grow fifos */
+ svm_fifo_enqueue (tx_fifo, fifo_size, data);
svm_fifo_enqueue (rx_fifo, fifo_size, data);
svm_fifo_enqueue (tx_fifo, fifo_size, data);
@@ -212,7 +210,7 @@ segment_manager_test_pressure_1 (vlib_main_t * vm, unformat_input_t * input)
svm_fifo_dequeue_drop (tx_fifo, fifo_size);
svm_fifo_dequeue_drop (tx_fifo, fifo_size);
- /* 10 chunks : 61% */
+ /* 10 chunks : 63% */
rv = fifo_segment_get_mem_status (fs);
SEG_MGR_TEST ((rv == MEMORY_PRESSURE_LOW_PRESSURE),
"fifo_segment_get_mem_status %s", states_str[rv]);
@@ -224,7 +222,7 @@ segment_manager_test_pressure_1 (vlib_main_t * vm, unformat_input_t * input)
svm_fifo_enqueue (tx_fifo, fifo_size, data);
svm_fifo_enqueue (tx_fifo, fifo_size, data);
- /* 14 chunks : 85% */
+ /* 14 chunks : 88% */
rv = fifo_segment_get_mem_status (fs);
SEG_MGR_TEST ((rv == MEMORY_PRESSURE_HIGH_PRESSURE),
"fifo_segment_get_mem_status %s", states_str[rv]);
@@ -234,8 +232,7 @@ segment_manager_test_pressure_1 (vlib_main_t * vm, unformat_input_t * input)
svm_fifo_dequeue_drop (tx_fifo, fifo_size);
svm_fifo_dequeue_drop (tx_fifo, fifo_size);
-
- /* 10 chunks : 61% */
+ /* 10 chunks : 63% */
rv = fifo_segment_get_mem_status (fs);
SEG_MGR_TEST ((rv == MEMORY_PRESSURE_LOW_PRESSURE),
"fifo_segment_get_mem_status %s", states_str[rv]);
@@ -285,7 +282,7 @@ segment_manager_test_pressure_2 (vlib_main_t * vm, unformat_input_t * input)
.options = options,
.namespace_id = 0,
.session_cb_vft = &placeholder_session_cbs,
- .name = format (0, "segment_manager_test_pressure_1"),
+ .name = format (0, "segment_manager_test_pressure_2"),
};
attach_args.options[APP_OPTIONS_SEGMENT_SIZE] = app_seg_size;
@@ -313,8 +310,8 @@ segment_manager_test_pressure_2 (vlib_main_t * vm, unformat_input_t * input)
&rx_fifo, &tx_fifo);
SEG_MGR_TEST ((rv == 0), "segment_manager_alloc_session_fifos %d", rv);
- svm_fifo_set_size (rx_fifo, size_2MB);
- svm_fifo_set_size (tx_fifo, size_2MB);
+ svm_fifo_set_size (rx_fifo, size_1MB);
+ svm_fifo_set_size (tx_fifo, size_1MB);
/* fill fifos (but not add chunks) */
svm_fifo_enqueue (rx_fifo, fifo_size - 1, data);
@@ -326,9 +323,10 @@ segment_manager_test_pressure_2 (vlib_main_t * vm, unformat_input_t * input)
for (i = 0; i < 509; ++i)
{
svm_fifo_enqueue (rx_fifo, fifo_size, data);
+ svm_fifo_enqueue (tx_fifo, fifo_size, data);
}
- /* 510 chunks : 100% of 2MB */
+ /* 100% of 2MB */
rv = fifo_segment_get_mem_status (fs);
SEG_MGR_TEST ((rv == MEMORY_PRESSURE_HIGH_PRESSURE),
"fifo_segment_get_mem_status %s", states_str[rv]);
@@ -337,24 +335,22 @@ segment_manager_test_pressure_2 (vlib_main_t * vm, unformat_input_t * input)
rv = svm_fifo_enqueue (rx_fifo, fifo_size, data);
SEG_MGR_TEST ((rv == SVM_FIFO_EGROW), "svm_fifo_enqueue %d", rv);
- /* then, no-memory is detected */
- rv = fifo_segment_get_mem_status (fs);
- SEG_MGR_TEST ((rv == MEMORY_PRESSURE_NO_MEMORY),
- "fifo_segment_get_mem_status %s", states_str[rv]);
-
/* shrink fifos */
for (i = 0; i < 20; ++i)
{
svm_fifo_dequeue_drop (rx_fifo, fifo_size);
+ svm_fifo_dequeue_drop (tx_fifo, fifo_size);
}
/* 489 chunks : 96%, it is high-pressure level
* but the reached-mem-limit record is not reset
* so the no-memory state lasts.
*/
- rv = fifo_segment_get_mem_status (fs);
- SEG_MGR_TEST ((rv == MEMORY_PRESSURE_NO_MEMORY),
- "fifo_segment_get_mem_status %s", states_str[rv]);
+ /*
+ rv = fifo_segment_get_mem_status (fs);
+ SEG_MGR_TEST ((rv == MEMORY_PRESSURE_NO_MEMORY),
+ "fifo_segment_get_mem_status %s", states_str[rv]);
+ */
/* shrink fifos */
for (i = 0; i < 133; ++i)
@@ -368,9 +364,10 @@ segment_manager_test_pressure_2 (vlib_main_t * vm, unformat_input_t * input)
"fifo_segment_get_mem_status %s", states_str[rv]);
/* shrink fifos */
- for (i = 0; i < 354; ++i)
+ for (i = 0; i < 360; ++i)
{
svm_fifo_dequeue_drop (rx_fifo, fifo_size);
+ svm_fifo_dequeue_drop (tx_fifo, fifo_size);
}
/* 2 chunks : 3% of 2MB */
@@ -409,7 +406,7 @@ segment_manager_test_fifo_balanced_alloc (vlib_main_t * vm,
.options = options,
.namespace_id = 0,
.session_cb_vft = &placeholder_session_cbs,
- .name = format (0, "segment_manager_test_pressure_1"),
+ .name = format (0, "segment_manager_test_fifo_balanced_alloc"),
};
attach_args.options[APP_OPTIONS_SEGMENT_SIZE] = app_seg_size;
@@ -509,8 +506,9 @@ segment_manager_test_fifo_balanced_alloc (vlib_main_t * vm,
return 0;
}
-static int
-segment_manager_test_fifo_ops (vlib_main_t * vm, unformat_input_t * input)
+/* disabled until fifo tuning and memory pressure are properly working */
+__clib_unused static int
+segment_manager_test_fifo_ops (vlib_main_t *vm, unformat_input_t *input)
{
int rv, i;
segment_manager_t *sm;
@@ -689,7 +687,7 @@ segment_manager_test_prealloc_hdrs (vlib_main_t * vm,
{
u32 fifo_size = size_4KB, prealloc_hdrs, sm_index, fs_index;
u64 options[APP_OPTIONS_N_OPTIONS];
- uword app_seg_size = size_2MB;
+ uword app_seg_size = size_2MB * 2;
segment_manager_t *sm;
fifo_segment_t *fs;
int rv;
@@ -701,10 +699,10 @@ segment_manager_test_prealloc_hdrs (vlib_main_t * vm,
.options = options,
.namespace_id = 0,
.session_cb_vft = &placeholder_session_cbs,
- .name = format (0, "segment_manager_prealloc_hdrs"),
+ .name = format (0, "segment_manager_test_prealloc_hdrs"),
};
- prealloc_hdrs = (app_seg_size - (16 << 10)) / sizeof (svm_fifo_t);
+ prealloc_hdrs = 64;
attach_args.options[APP_OPTIONS_SEGMENT_SIZE] = app_seg_size;
attach_args.options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
@@ -752,8 +750,6 @@ segment_manager_test (vlib_main_t * vm,
res = segment_manager_test_pressure_2 (vm, input);
else if (unformat (input, "alloc"))
res = segment_manager_test_fifo_balanced_alloc (vm, input);
- else if (unformat (input, "fifo_ops"))
- res = segment_manager_test_fifo_ops (vm, input);
else if (unformat (input, "prealloc_hdrs"))
res = segment_manager_test_prealloc_hdrs (vm, input);
@@ -765,8 +761,6 @@ segment_manager_test (vlib_main_t * vm,
goto done;
if ((res = segment_manager_test_fifo_balanced_alloc (vm, input)))
goto done;
- if ((res = segment_manager_test_fifo_ops (vm, input)))
- goto done;
if ((res = segment_manager_test_prealloc_hdrs (vm, input)))
goto done;
}
@@ -780,7 +774,6 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_test_command, static) =
{
.path = "test segment-manager",
diff --git a/src/plugins/unittest/session_test.c b/src/plugins/unittest/session_test.c
index b54712c3885..b7627acc129 100644
--- a/src/plugins/unittest/session_test.c
+++ b/src/plugins/unittest/session_test.c
@@ -107,7 +107,6 @@ placeholder_server_rx_callback (session_t * s)
return -1;
}
-/* *INDENT-OFF* */
static session_cb_vft_t placeholder_session_cbs = {
.session_reset_callback = placeholder_session_reset_callback,
.session_connected_callback = placeholder_session_connected_callback,
@@ -117,7 +116,6 @@ static session_cb_vft_t placeholder_session_cbs = {
.add_segment_callback = placeholder_add_segment_callback,
.del_segment_callback = placeholder_del_segment_callback,
};
-/* *INDENT-ON* */
static int
session_create_lookpback (u32 table_id, u32 * sw_if_index,
@@ -136,7 +134,7 @@ session_create_lookpback (u32 table_id, u32 * sw_if_index,
if (table_id != 0)
{
ip_table_create (FIB_PROTOCOL_IP4, table_id, 0, 0);
- ip_table_bind (FIB_PROTOCOL_IP4, *sw_if_index, table_id, 0);
+ ip_table_bind (FIB_PROTOCOL_IP4, *sw_if_index, table_id);
}
vnet_sw_interface_set_flags (vnet_get_main (), *sw_if_index,
@@ -290,11 +288,11 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input)
/*
* Create the loopbacks
*/
- intf_addr[0].as_u32 = clib_host_to_net_u32 (0x01010101),
- session_create_lookpback (0, &sw_if_index[0], &intf_addr[0]);
+ intf_addr[0].as_u32 = clib_host_to_net_u32 (0x01010101);
+ session_create_lookpback (0, &sw_if_index[0], &intf_addr[0]);
- intf_addr[1].as_u32 = clib_host_to_net_u32 (0x02020202),
- session_create_lookpback (1, &sw_if_index[1], &intf_addr[1]);
+ intf_addr[1].as_u32 = clib_host_to_net_u32 (0x02020202);
+ session_create_lookpback (1, &sw_if_index[1], &intf_addr[1]);
session_add_del_route_via_lookup_in_table (0, 1, &intf_addr[1], 32,
1 /* is_add */ );
@@ -404,14 +402,6 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input)
SESSION_TEST ((tc->lcl_port == placeholder_client_port),
"ports should be equal");
- /* These sessions, because of the way they're established are pinned to
- * main thread, even when we have workers and we avoid polling main thread,
- * i.e., we can't cleanup pending disconnects, so force cleanup for both
- */
- session_transport_cleanup (s);
- s = session_get (accepted_session_index, accepted_session_thread);
- session_transport_cleanup (s);
-
vnet_app_detach_args_t detach_args = {
.app_index = server_index,
.api_client_index = ~0,
@@ -420,6 +410,10 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input)
detach_args.app_index = client_index;
vnet_application_detach (&detach_args);
+ ns_args.is_add = 0;
+ error = vnet_app_namespace_add_del (&ns_args);
+ SESSION_TEST ((error == 0), "app ns delete should succeed: %d", error);
+
/* Allow the disconnects to finish before removing the routes. */
vlib_process_suspend (vm, 10e-3);
@@ -531,7 +525,7 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input)
error = vnet_application_attach (&attach_args);
SESSION_TEST ((error != 0), "app attachment should fail");
- SESSION_TEST ((error == VNET_API_ERROR_APP_WRONG_NS_SECRET),
+ SESSION_TEST ((error == SESSION_E_WRONG_NS_SECRET),
"code should be wrong ns secret: %d", error);
/*
@@ -766,6 +760,10 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input)
detach_args.app_index = server_index;
vnet_application_detach (&detach_args);
+ ns_args.is_add = 0;
+ error = vnet_app_namespace_add_del (&ns_args);
+ SESSION_TEST ((error == 0), "app ns delete should succeed: %d", error);
+
/*
* Cleanup
*/
@@ -1601,6 +1599,10 @@ session_test_rules (vlib_main_t * vm, unformat_input_t * input)
detach_args.app_index = server_index2;
vnet_application_detach (&detach_args);
+ ns_args.is_add = 0;
+ error = vnet_app_namespace_add_del (&ns_args);
+ SESSION_TEST ((error == 0), "app ns delete should succeed: %d", error);
+
vec_free (ns_id);
vec_free (attach_args.name);
return 0;
@@ -1621,6 +1623,7 @@ session_test_proxy (vlib_main_t * vm, unformat_input_t * input)
u16 lcl_port = 1234, rmt_port = 4321;
app_namespace_t *app_ns;
int verbose = 0, error = 0;
+ app_listener_t *al;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
@@ -1695,8 +1698,9 @@ session_test_proxy (vlib_main_t * vm, unformat_input_t * input)
SESSION_TEST ((tc != 0), "lookup 1.2.3.4 1234 5.6.7.8 4321 should be "
"successful");
s = listen_session_get (tc->s_index);
- SESSION_TEST ((s->app_index == server_index), "lookup should return"
- " the server");
+ al = app_listener_get (s->al_index);
+ SESSION_TEST ((al->app_index == server_index), "lookup should return"
+ " the server");
tc = session_lookup_connection_wt4 (0, &rmt_ip, &rmt_ip, lcl_port, rmt_port,
TRANSPORT_PROTO_TCP, 0, &is_filtered);
@@ -1767,6 +1771,74 @@ wait_for_event (svm_msg_q_t * mq, int fd, int epfd, u8 use_eventfd)
}
}
+/* Used to be part of application_worker.c prior to adding support for
+ * async rx
+ */
+static int
+test_mq_try_lock_and_alloc_msg (svm_msg_q_t *mq, session_mq_rings_e ring,
+ svm_msg_q_msg_t *msg)
+{
+ int rv, n_try = 0;
+
+ while (n_try < 75)
+ {
+ rv = svm_msg_q_lock_and_alloc_msg_w_ring (mq, ring, SVM_Q_NOWAIT, msg);
+ if (!rv)
+ return 0;
+ /*
+ * Break the loop if mq is full, usually this is because the
+ * app has crashed or is hanging on somewhere.
+ */
+ if (rv != -1)
+ break;
+ n_try += 1;
+ usleep (1);
+ }
+
+ return -1;
+}
+
+/* Used to be part of application_worker.c prior to adding support for
+ * async rx and was used for delivering io events over mq
+ * NB: removed handling of mq congestion
+ */
+static inline int
+test_app_send_io_evt_rx (app_worker_t *app_wrk, session_t *s)
+{
+ svm_msg_q_msg_t _mq_msg = { 0 }, *mq_msg = &_mq_msg;
+ session_event_t *evt;
+ svm_msg_q_t *mq;
+ u32 app_session;
+ int rv;
+
+ if (app_worker_application_is_builtin (app_wrk))
+ return app_worker_rx_notify (app_wrk, s);
+
+ if (svm_fifo_has_event (s->rx_fifo))
+ return 0;
+
+ app_session = s->rx_fifo->shr->client_session_index;
+ mq = app_wrk->event_queue;
+
+ rv = test_mq_try_lock_and_alloc_msg (mq, SESSION_MQ_IO_EVT_RING, mq_msg);
+
+ if (PREDICT_FALSE (rv))
+ {
+ clib_warning ("failed to alloc mq message");
+ return -1;
+ }
+
+ evt = svm_msg_q_msg_data (mq, mq_msg);
+ evt->event_type = SESSION_IO_EVT_RX;
+ evt->session_index = app_session;
+
+ (void) svm_fifo_set_event (s->rx_fifo);
+
+ svm_msg_q_add_and_unlock (mq, mq_msg);
+
+ return 0;
+}
+
static int
session_test_mq_speed (vlib_main_t * vm, unformat_input_t * input)
{
@@ -1881,7 +1953,7 @@ session_test_mq_speed (vlib_main_t * vm, unformat_input_t * input)
{
while (svm_fifo_has_event (rx_fifo))
;
- app_worker_lock_and_send_event (app_wrk, &s, SESSION_IO_EVT_RX);
+ test_app_send_io_evt_rx (app_wrk, &s);
}
}
@@ -1930,7 +2002,7 @@ session_test_mq_basic (vlib_main_t * vm, unformat_input_t * input)
smq = svm_msg_q_alloc (cfg);
svm_msg_q_attach (mq, smq);
- SESSION_TEST (mq != 0, "svm_msg_q_alloc");
+ SESSION_TEST (smq != 0, "svm_msg_q_alloc");
SESSION_TEST (vec_len (mq->rings) == 2, "ring allocation");
rings_ptr = (u8 *) mq->rings[0].shr->data;
vec_foreach (ring, mq->rings)
@@ -2056,14 +2128,12 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_test_command, static) =
{
.path = "test session",
.short_help = "internal session unit tests",
.function = session_test,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/sparse_vec_test.c b/src/plugins/unittest/sparse_vec_test.c
index bb875452cdf..b2239c64a18 100644
--- a/src/plugins/unittest/sparse_vec_test.c
+++ b/src/plugins/unittest/sparse_vec_test.c
@@ -55,14 +55,12 @@ test_sparse_vec_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_sparse_vec_command, static) =
{
.path = "test sparse_vec",
.short_help = "test sparse_vec",
.function = test_sparse_vec_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/string_test.c b/src/plugins/unittest/string_test.c
index d3924185232..3b39bf56478 100644
--- a/src/plugins/unittest/string_test.c
+++ b/src/plugins/unittest/string_test.c
@@ -508,7 +508,7 @@ test_clib_strncmp (vlib_main_t * vm, unformat_input_t * input)
/* unterminated s1 */
s1[s1len] = 0x1;
- CLIB_MEM_UNPOISON (s1, CLIB_STRING_MACRO_MAX);
+ clib_mem_unpoison (s1, CLIB_STRING_MACRO_MAX);
indicator = clib_strncmp (s1, "Every moment is a fresh beginning",
sizeof ("every moment is a fresh beginning") - 1);
if (indicator != 0)
@@ -574,60 +574,6 @@ test_strcpy_s (vlib_main_t * vm, unformat_input_t * input)
}
static int
-test_clib_strcpy (vlib_main_t * vm, unformat_input_t * input)
-{
- char src[] = "The journey of a one thousand miles begins with one step.";
- char dst[100];
- int indicator;
- errno_t err;
-
- vlib_cli_output (vm, "Test clib_strcpy...");
-
- err = clib_strcpy (dst, src);
- if (err != EOK)
- return -1;
-
- /* This better not fail but check anyhow */
- if (strcmp_s (dst, clib_strnlen (dst, sizeof (dst)), src, &indicator) !=
- EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* verify it against strcpy */
- strcpy (dst, src); //NOSONAR
-
- /* This better not fail but check anyhow */
- if (strcmp_s (dst, clib_strnlen (dst, sizeof (dst)), src, &indicator) !=
- EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* Negative tests */
-
- err = clib_strcpy (0, 0);
- if (err == EOK)
- return -1;
-
- /* overlap fail */
-#if __GNUC__ < 8
- /* GCC 8 flunks this one at compile time... */
- err = clib_strcpy (dst, dst);
- if (err == EOK)
- return -1;
-#endif
-
- /* overlap fail */
- err = clib_strcpy (dst, dst + 1);
- if (err == EOK)
- return -1;
-
- /* OK, seems to work */
- return 0;
-}
-
-static int
test_strncpy_s (vlib_main_t * vm, unformat_input_t * input)
{
char src[] = "Those who dare to fail miserably can achieve greatly.";
@@ -904,71 +850,6 @@ test_strcat_s (vlib_main_t * vm, unformat_input_t * input)
}
static int
-test_clib_strcat (vlib_main_t * vm, unformat_input_t * input)
-{
- char src[100], dst[100], old_dst[100];
- size_t s1size = sizeof (dst); // including null
- errno_t err;
- int indicator;
-
- vlib_cli_output (vm, "Test clib_strcat...");
-
- strcpy_s (dst, sizeof (dst), "Tough time never last ");
- strcpy_s (src, sizeof (src), "but tough people do");
- err = clib_strcat (dst, src);
- if (err != EOK)
- return -1;
- if (strcmp_s (dst, s1size - 1,
- "Tough time never last but tough people do",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
- /* verify it against strcat */
- strcpy_s (dst, sizeof (dst), "Tough time never last ");
- strcpy_s (src, sizeof (src), "but tough people do");
- strcat (dst, src);
- if (strcmp_s (dst, s1size - 1,
- "Tough time never last but tough people do",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* empty string concatenation */
- clib_strncpy (old_dst, dst, clib_strnlen (dst, sizeof (dst)));
- err = clib_strcat (dst, "");
- if (err != EOK)
- return -1;
- /* verify dst is untouched */
- if (strcmp_s (dst, s1size - 1, old_dst, &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* negative stuff */
- err = clib_strcat (0, 0);
- if (err != EINVAL)
- return -1;
-
- /* overlap fail */
- err = clib_strcat (dst, dst + 1);
- if (err != EINVAL)
- return -1;
-
- /* overlap fail */
-#if __GNUC__ < 8
- /* GCC 8 flunks this one at compile time... */
- err = clib_strcat (dst, dst);
- if (err != EINVAL)
- return -1;
-#endif
-
- /* OK, seems to work */
- return 0;
-}
-
-static int
test_strncat_s (vlib_main_t * vm, unformat_input_t * input)
{
char src[100], dst[100], old_dst[100];
@@ -1096,126 +977,6 @@ test_strncat_s (vlib_main_t * vm, unformat_input_t * input)
}
static int
-test_clib_strncat (vlib_main_t * vm, unformat_input_t * input)
-{
- char src[100], dst[100], old_dst[100];
- size_t s1size = sizeof (dst); // including null
- errno_t err;
- char s1[] = "Two things are infinite: ";
- char s2[] = "the universe and human stupidity; ";
- int indicator;
-
- vlib_cli_output (vm, "Test clib_strncat...");
-
- /* n == strlen src */
- strcpy_s (dst, sizeof (dst), s1);
- strcpy_s (src, sizeof (src), s2);
- err = clib_strncat (dst, src, clib_strnlen (src, sizeof (src)));
- if (err != EOK)
- return -1;
- if (strcmp_s (dst, s1size - 1,
- "Two things are infinite: the universe and human stupidity; ",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
- /* verify it against strncat */
- strcpy_s (dst, sizeof (dst), s1);
- strncat (dst, src, clib_strnlen (src, sizeof (src)));
- if (strcmp_s (dst, s1size - 1,
- "Two things are infinite: the universe and human stupidity; ",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* n > strlen src */
- strcpy_s (dst, sizeof (dst), s1);
- err = clib_strncat (dst, src, clib_strnlen (src, sizeof (src)) + 10);
- if (err != EOK)
- return -1;
- if (strcmp_s (dst, s1size - 1,
- "Two things are infinite: the universe and human stupidity; ",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
- /* verify it against strncat */
- strcpy_s (dst, sizeof (dst), s1);
- strncat (dst, src, clib_strnlen (src, sizeof (src)));
- if (strcmp_s (dst, s1size - 1,
- "Two things are infinite: the universe and human stupidity; ",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* zero length strncat */
- clib_strncpy (old_dst, dst, clib_strnlen (dst, sizeof (dst)));
- err = clib_strncat (dst, src, 0);
- if (err != EOK)
- return -1;
- /* verify dst is untouched */
- if (strcmp_s (dst, s1size - 1, old_dst, &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* empty string, wrong n concatenation */
- err = clib_strncat (dst, "", 10);
- if (err != EOK)
- return -1;
- /* verify dst is untouched */
- if (strcmp_s (dst, s1size - 1, old_dst, &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* limited concatenation, string > n, copy up to n */
- strcpy_s (dst, sizeof (dst), s1);
- err = clib_strncat (dst, s2, 13);
- if (err != EOK)
- return -1;
- if (strcmp_s (dst, s1size - 1, "Two things are infinite: the universe ",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
- /* verify it against strncat */
-#if __GNUC__ < 8
- /* GCC 8 debian flunks this one at compile time */
- strcpy_s (dst, sizeof (dst), s1);
- strncat (dst, s2, 13);
- if (strcmp_s (dst, s1size - 1, "Two things are infinite: the universe ",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
-#endif
-
- /* negative stuff */
- err = clib_strncat (0, 0, 1);
- if (err != EINVAL)
- return -1;
-
- /* overlap fail */
- err = clib_strncat (dst, dst + 1, s1size - 1);
- if (err != EINVAL)
- return -1;
-
- /* overlap fail */
-#if __GNUC__ < 8
- /* GCC 8 flunks this one at compile time... */
- err = clib_strncat (dst, dst, clib_strnlen (dst, sizeof (dst)));
- if (err != EINVAL)
- return -1;
-#endif
-
- /* OK, seems to work */
- return 0;
-}
-
-static int
test_strtok_s (vlib_main_t * vm, unformat_input_t * input)
{
int indicator;
@@ -1540,191 +1301,27 @@ test_strstr_s (vlib_main_t * vm, unformat_input_t * input)
return 0;
}
-static int
-test_clib_strstr (vlib_main_t * vm, unformat_input_t * input)
-{
- char *sub, *s;
- char s1[64];
- size_t s1len = sizeof (s1) - 1; // excluding null
- int indicator;
-
- vlib_cli_output (vm, "Test clib_strstr...");
-
- /* substring not present */
- strcpy_s (s1, s1len, "success is not final, failure is not fatal.");
- sub = clib_strstr (s1, "failures");
- if (sub != 0)
- return -1;
- /* verify it against strstr */
- sub = strstr (s1, "failures");
- if (sub != 0)
- return -1;
-
- /* substring present */
- sub = clib_strstr (s1, "failure");
- if (sub == 0)
- return -1;
- if (strcmp_s (sub, strlen (sub), "failure is not fatal.", &indicator) !=
- EOK)
- return -1;
- if (indicator != 0)
- return -1;
- /* verify it against strstr */
- sub = strstr (s1, "failure");
- if (sub == 0)
- return -1;
- if (strcmp_s (sub, strlen (sub), "failure is not fatal.", &indicator) !=
- EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* negative stuff */
-
- /* Null pointers test */
- s = 0;
- sub = clib_strstr (s, s);
- if (sub != 0)
- return -1;
- /*
- * Can't verify it against strstr for this test. Null pointers cause strstr
- * to crash. Go figure!
- */
-
- /* unterminated s1 and s2 */
- memset_s (s1, ARRAY_LEN (s1), 0xfe, ARRAY_LEN (s1));
- CLIB_MEM_UNPOISON (s1, CLIB_STRING_MACRO_MAX);
- sub = clib_strstr (s1, s1);
- if (sub == 0)
- return -1;
- /*
- * Can't verify it against strstr for this test. Unterminated string causes
- * strstr to crash. Go figure!
- */
-
- /* OK, seems to work */
- return 0;
-}
-
-static int
-test_clib_count_equal (vlib_main_t * vm, unformat_input_t * input)
-{
- u64 s64[15];
- u32 s32[31];
- u16 s16[63];
- u8 s8[127];
- uword count;
-
- vlib_cli_output (vm, "Test clib_count_equal_u64...");
- memset (s64, 0, sizeof (s64));
- count = clib_count_equal_u64 (s64, 0);
- if (0 != count)
- return -1;
- count = clib_count_equal_u64 (s64, 1);
- if (1 != count)
- return -1;
- count = clib_count_equal_u64 (s64, 3);
- if (3 != count)
- return -1;
- count = clib_count_equal_u64 (s64, 15);
- if (15 != count)
- return -1;
- s64[10] = 0xcafe;
- count = clib_count_equal_u64 (s64, 13);
- if (10 != count)
- return -1;
- s64[10] = 0;
-
- vlib_cli_output (vm, "Test clib_count_equal_u32...");
- memset (s32, 0, sizeof (s32));
- count = clib_count_equal_u32 (s32, 0);
- if (0 != count)
- return -1;
- count = clib_count_equal_u32 (s32, 1);
- if (1 != count)
- return -1;
- count = clib_count_equal_u32 (s32, 3);
- if (3 != count)
- return -1;
- count = clib_count_equal_u32 (s32, 31);
- if (31 != count)
- return -1;
- s32[10] = 0xcafe;
- count = clib_count_equal_u32 (s32, 13);
- if (10 != count)
- return -1;
- s32[10] = 0;
-
- vlib_cli_output (vm, "Test clib_count_equal_u16...");
- memset (s16, 0, sizeof (s16));
- count = clib_count_equal_u16 (s16, 0);
- if (0 != count)
- return -1;
- count = clib_count_equal_u16 (s16, 1);
- if (1 != count)
- return -1;
- count = clib_count_equal_u16 (s16, 3);
- if (3 != count)
- return -1;
- count = clib_count_equal_u16 (s16, 63);
- if (63 != count)
- return -1;
- s16[10] = 0xcafe;
- count = clib_count_equal_u16 (s16, 13);
- if (10 != count)
- return -1;
- s16[10] = 0;
-
- vlib_cli_output (vm, "Test clib_count_equal_u8...");
- memset (s8, 0, sizeof (s8));
- count = clib_count_equal_u8 (s8, 0);
- if (0 != count)
- return -1;
- count = clib_count_equal_u8 (s8, 1);
- if (1 != count)
- return -1;
- count = clib_count_equal_u8 (s8, 3);
- if (3 != count)
- return -1;
- count = clib_count_equal_u8 (s8, 127);
- if (127 != count)
- return -1;
- s8[10] = 0xfe;
- count = clib_count_equal_u8 (s8, 13);
- if (10 != count)
- return -1;
- s8[10] = 0;
-
- return 0;
-}
-
-
-#define foreach_string_test \
- _ (0, MEMCPY_S, "memcpy_s", memcpy_s) \
- _ (1, CLIB_MEMCPY, "clib_memcpy", clib_memcpy) \
- _ (2, MEMSET_S , "memset_s", memset_s) \
- _ (3, CLIB_MEMSET , "clib_memset", clib_memset) \
- _ (4, MEMCMP_S, "memcmp_s", memcmp_s) \
- _ (5, CLIB_MEMCMP, "clib_memcmp", clib_memcmp) \
- _ (6, STRCMP_S, "strcmp_s", strcmp_s) \
- _ (7, CLIB_STRCMP, "clib_strcmp", clib_strcmp) \
- _ (8, STRNCMP_S, "strncmp_s", strncmp_s) \
- _ (9, CLIB_STRNCMP, "clib_strncmp", clib_strncmp) \
- _ (10, STRCPY_S, "strcpy_s", strcpy_s) \
- _ (11, CLIB_STRCPY, "clib_strcpy", clib_strcpy) \
- _ (12, STRNCPY_S, "strncpy_s", strncpy_s) \
- _ (13, CLIB_STRNCPY, "clib_strncpy", clib_strncpy) \
- _ (14, STRCAT_S, "strcat_s", strcat_s) \
- _ (15, CLIB_STRCAT, "clib_strcat", clib_strcat) \
- _ (16, STRNCAT_S, "strncat_s", strncat_s) \
- _ (17, CLIB_STRNCAT, "clib_strncat", clib_strncat) \
- _ (18, STRTOK_S, "strtok_s", strtok_s) \
- _ (19, CLIB_STRTOK, "clib_strtok", clib_strtok) \
- _ (20, STRNLEN_S, "strnlen_s", strnlen_s) \
- _ (21, CLIB_STRNLEN, "clib_strnlen", clib_strnlen) \
- _ (22, STRSTR_S, "strstr_s", strstr_s) \
- _ (23, CLIB_STRSTR, "clib_strstr", clib_strstr) \
- _ (24, CLIB_COUNT_EQUAL, "clib_count_equal", clib_count_equal)
+#define foreach_string_test \
+ _ (0, MEMCPY_S, "memcpy_s", memcpy_s) \
+ _ (1, CLIB_MEMCPY, "clib_memcpy", clib_memcpy) \
+ _ (2, MEMSET_S, "memset_s", memset_s) \
+ _ (3, CLIB_MEMSET, "clib_memset", clib_memset) \
+ _ (4, MEMCMP_S, "memcmp_s", memcmp_s) \
+ _ (5, CLIB_MEMCMP, "clib_memcmp", clib_memcmp) \
+ _ (6, STRCMP_S, "strcmp_s", strcmp_s) \
+ _ (7, CLIB_STRCMP, "clib_strcmp", clib_strcmp) \
+ _ (8, STRNCMP_S, "strncmp_s", strncmp_s) \
+ _ (9, CLIB_STRNCMP, "clib_strncmp", clib_strncmp) \
+ _ (10, STRCPY_S, "strcpy_s", strcpy_s) \
+ _ (11, STRNCPY_S, "strncpy_s", strncpy_s) \
+ _ (12, CLIB_STRNCPY, "clib_strncpy", clib_strncpy) \
+ _ (13, STRCAT_S, "strcat_s", strcat_s) \
+ _ (14, STRNCAT_S, "strncat_s", strncat_s) \
+ _ (15, STRTOK_S, "strtok_s", strtok_s) \
+ _ (16, CLIB_STRTOK, "clib_strtok", clib_strtok) \
+ _ (17, STRNLEN_S, "strnlen_s", strnlen_s) \
+ _ (18, CLIB_STRNLEN, "clib_strnlen", clib_strnlen) \
+ _ (19, STRSTR_S, "strstr_s", strstr_s)
typedef enum
{
@@ -1732,7 +1329,7 @@ typedef enum
foreach_string_test
#undef _
#define STRING_TEST_FIRST STRING_TEST_MEMCPY_S
-#define STRING_TEST_LAST STRING_TEST_CLIB_COUNT_EQUAL
+#define STRING_TEST_LAST STRING_TEST_STRSTR_S
} string_test_t;
static uword
@@ -1806,19 +1403,16 @@ string_test_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (string_test_command, static) =
-{
+VLIB_CLI_COMMAND (string_test_command, static) = {
.path = "test string",
- .short_help = "test string [memcpy_s | clib_memcpy | memset_s | "
- "clib_memset | memcmp_s | clib_memcmp | strcmp_s | clib_strcmp | "
- "strncmp_s | clib_strncmp | strcpy_s | clib_strcpy | strncpy_s | "
- "clib_strncpy | strcat_s | clib_strcat | strncat_s | clib_strncat | "
- "strtok_s | clib_strtok | strnlen_s | clib_strnlen | strstr_s | "
- "clib_strstr | clib_count_equal ]",
+ .short_help =
+ "test string [memcpy_s | clib_memcpy | memset_s | "
+ "clib_memset | memcmp_s | clib_memcmp | strcmp_s | clib_strcmp | "
+ "strncmp_s | clib_strncmp | strcpy_s | strncpy_s | "
+ "clib_strncpy | strcat_s | strncat_s | "
+ "strtok_s | clib_strtok | strnlen_s | clib_strnlen | strstr_s ]",
.function = string_test_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/svm_fifo_test.c b/src/plugins/unittest/svm_fifo_test.c
index 4351bced921..9feb37cbc25 100644
--- a/src/plugins/unittest/svm_fifo_test.c
+++ b/src/plugins/unittest/svm_fifo_test.c
@@ -43,7 +43,6 @@ typedef struct
u32 len;
} test_pattern_t;
-/* *INDENT-OFF* */
test_pattern_t test_pattern[] = {
{380, 8}, {768, 8}, {1156, 8}, {1544, 8}, {1932, 8}, {2320, 8}, {2708, 8},
{2992, 8}, {372, 8}, {760, 8}, {1148, 8}, {1536, 8}, {1924, 8}, {2312, 8},
@@ -102,7 +101,6 @@ test_pattern_t test_pattern[] = {
/* missing from original data set */
{388, 4}, {776, 4}, {1164, 4}, {1552, 4}, {1940, 4}, {2328, 4},
};
-/* *INDENT-ON* */
int
pattern_cmp (const void *arg1, const void *arg2)
@@ -195,7 +193,7 @@ fifo_prepare (fifo_segment_t * fs, u32 fifo_size)
f = fifo_segment_alloc_fifo (fs, fifo_size, FIFO_SEGMENT_RX_FIFO);
/* Paint 1st fifo chunk with -1's */
- c = svm_fifo_head_chunk (f);
+ c = f_head_cptr (f);
clib_memset (c->data, 0xFF, c->length);
svm_fifo_init_ooo_lookup (f, 1 /* deq ooo */ );
@@ -1958,7 +1956,7 @@ sfifo_test_fifo_indirect (vlib_main_t * vm, unformat_input_t * input)
svm_fifo_enqueue_nocopy (f, 4096);
SFIFO_TEST (svm_fifo_is_sane (f), "fifo should be sane");
- c = svm_fifo_tail_chunk (f);
+ c = f_tail_cptr (f);
SFIFO_TEST (c == f_end_cptr (f), "tail is end chunk");
/* Initialize head chunk */
@@ -1972,7 +1970,7 @@ sfifo_test_fifo_indirect (vlib_main_t * vm, unformat_input_t * input)
rv = svm_fifo_dequeue (f, 4096, data_buf);
SFIFO_TEST (rv == 4096, "dequeue should work");
- c = svm_fifo_head_chunk (f);
+ c = f_head_cptr (f);
SFIFO_TEST (c == f_end_cptr (f), "head chunk should be last");
rv = svm_fifo_max_read_chunk (f);
@@ -1993,9 +1991,7 @@ sfifo_test_fifo_indirect (vlib_main_t * vm, unformat_input_t * input)
return 0;
}
-/* *INDENT-OFF* */
svm_fifo_trace_elem_t fifo_trace[] = {};
-/* *INDENT-ON* */
static int
sfifo_test_fifo_replay (vlib_main_t * vm, unformat_input_t * input)
@@ -2519,7 +2515,7 @@ sfifo_test_fifo_segment_mempig (int verbose)
fifo_segment_free_fifo (sp, f);
}
- _vec_len (flist) = 0;
+ vec_set_len (flist, 0);
for (i = 0; i < 1000; i++)
{
@@ -2863,14 +2859,12 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (svm_fifo_test_command, static) =
{
.path = "test svm fifo",
.short_help = "internal svm fifo unit tests",
.function = svm_fifo_test,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/tcp_test.c b/src/plugins/unittest/tcp_test.c
index 25b6744244e..34033a0b622 100644
--- a/src/plugins/unittest/tcp_test.c
+++ b/src/plugins/unittest/tcp_test.c
@@ -35,9 +35,7 @@
} \
}
-/* *INDENT-OFF* */
scoreboard_trace_elt_t sb_trace[] = {};
-/* *INDENT-ON* */
static int
tcp_test_scoreboard_replay (vlib_main_t * vm, unformat_input_t * input)
@@ -1596,14 +1594,12 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_test_command, static) =
{
.path = "test tcp",
.short_help = "internal tcp unit tests",
.function = tcp_test,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/test_buffer.c b/src/plugins/unittest/test_buffer.c
index 18938d888bb..24c86cd8697 100644
--- a/src/plugins/unittest/test_buffer.c
+++ b/src/plugins/unittest/test_buffer.c
@@ -16,48 +16,237 @@
#include <vlib/vlib.h>
#include <vlib/buffer_funcs.h>
-#define TEST_I(_cond, _comment, _args...) \
-({ \
- int _evald = (_cond); \
- if (!(_evald)) { \
- fformat(stderr, "FAIL:%d: " _comment "\n", \
- __LINE__, ##_args); \
- } else { \
- fformat(stderr, "PASS:%d: " _comment "\n", \
- __LINE__, ##_args); \
- } \
- _evald; \
-})
-
-#define TEST(_cond, _comment, _args...) \
-{ \
- if (!TEST_I(_cond, _comment, ##_args)) { \
- return 1; \
- } \
+#define TEST_I(_cond, _comment, _args...) \
+ ({ \
+ int _evald = (0 == (_cond)); \
+ if (_evald) \
+ { \
+ fformat (stderr, "FAIL:%d: " _comment "\n", __LINE__, ##_args); \
+ } \
+ else \
+ { \
+ fformat (stderr, "PASS:%d: " _comment "\n", __LINE__, ##_args); \
+ } \
+ _evald; \
+ })
+
+#define TEST(_cond, _comment, _args...) \
+ { \
+ if (TEST_I (_cond, _comment, ##_args)) \
+ { \
+ goto err; \
+ } \
+ }
+
+typedef struct
+{
+ i16 current_data;
+ u16 current_length;
+ u8 ref_count;
+} chained_buffer_template_t;
+
+static int
+build_chain (vlib_main_t *vm, const chained_buffer_template_t *tmpl, u32 n,
+ clib_random_buffer_t *randbuf, u8 **rand, vlib_buffer_t **b_,
+ u32 *bi_)
+{
+ vlib_buffer_t *bufs[2 * VLIB_BUFFER_LINEARIZE_MAX], **b = bufs;
+ u32 bis[2 * VLIB_BUFFER_LINEARIZE_MAX + 1], *bi = bis;
+ u32 n_alloc;
+
+ if (rand)
+ vec_reset_length (*rand);
+
+ ASSERT (n <= ARRAY_LEN (bufs));
+ n_alloc = vlib_buffer_alloc (vm, bi, n);
+ if (n_alloc != n)
+ {
+ vlib_buffer_free (vm, bi, n_alloc);
+ return 0;
+ }
+
+ vlib_get_buffers (vm, bis, bufs, n);
+
+ while (n > 0)
+ {
+ b[0]->next_buffer = bi[1];
+ b[0]->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b[0]->current_data = tmpl->current_data;
+ b[0]->current_length = tmpl->current_length;
+ b[0]->ref_count = 0xff == tmpl->ref_count ? 1 : tmpl->ref_count;
+
+ if (rand)
+ {
+ const u16 len = b[0]->current_length;
+ if (len)
+ {
+ vec_add (*rand, clib_random_buffer_get_data (randbuf, len), len);
+ void *dst = vlib_buffer_get_current (b[0]);
+ const void *src =
+ vec_elt_at_index (*rand, vec_len (*rand) - len);
+ clib_memcpy_fast (dst, src, len);
+ }
+ }
+
+ b++;
+ bi++;
+ tmpl++;
+ n--;
+ }
+
+ b[-1]->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+
+ *b_ = bufs[0];
+ *bi_ = bis[0];
+ return 1;
+}
+
+static int
+check_chain (vlib_main_t *vm, vlib_buffer_t *b, const u8 *rand)
+{
+ int len_chain = vlib_buffer_length_in_chain (vm, b);
+ int len;
+
+ /* check for data corruption */
+ if (clib_memcmp (vlib_buffer_get_current (b), vec_elt_at_index (rand, 0),
+ b->current_length))
+ return 0;
+ len = b->current_length;
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ if (clib_memcmp (vlib_buffer_get_current (b),
+ vec_elt_at_index (rand, len), b->current_length))
+ return 0;
+ len += b->current_length;
+ }
+
+ /* check for data truncation */
+ if (len != vec_len (rand))
+ return 0;
+
+ /* check total length update is correct */
+ if (len != len_chain)
+ return 0;
+
+ return 1;
+}
+
+static int
+test_chain (vlib_main_t *vm, const chained_buffer_template_t *tmpl,
+ const u32 n, const int clone_off, clib_random_buffer_t *randbuf,
+ u8 **rand)
+{
+ vlib_buffer_t *b;
+ u32 bi[2];
+ int ret = 0;
+
+ if (!build_chain (vm, tmpl, n, randbuf, rand, &b, bi))
+ goto err0;
+
+ if (clone_off)
+ {
+ if (2 != vlib_buffer_clone (vm, bi[0], bi, 2, clone_off))
+ goto err1;
+ b = vlib_get_buffer (vm, bi[0]);
+ }
+
+ if (!(ret = vlib_buffer_chain_linearize (vm, b)))
+ goto err2;
+
+ if (!check_chain (vm, b, *rand))
+ {
+ ret = 0;
+ goto err2;
+ }
+
+err2:
+ if (clone_off)
+ vlib_buffer_free_one (vm, bi[1]);
+err1:
+ vlib_buffer_free_one (vm, bi[0]);
+err0:
+ return ret;
}
-/* test function for a specific case where current_data is negative, verify
- * that there is no crash */
static int
-linearize_negative_current_data (vlib_main_t * vm)
+linearize_test (vlib_main_t *vm)
{
- u32 bi[32];
- TEST (ARRAY_LEN (bi) == vlib_buffer_alloc (vm, bi, ARRAY_LEN (bi)),
- "buff alloc");
+ chained_buffer_template_t tmpl[VLIB_BUFFER_LINEARIZE_MAX];
+ clib_random_buffer_t randbuf;
u32 data_size = vlib_buffer_get_default_data_size (vm);
- u32 i;
- for (i = 0; i < ARRAY_LEN (bi) - 1; ++i)
+ u8 *rand = 0;
+ int ret = 0;
+ int i;
+
+ clib_random_buffer_init (&randbuf, 0);
+
+ clib_memset (tmpl, 0xff, sizeof (tmpl));
+ for (i = 0; i < 2; i++)
{
- vlib_buffer_t *b = vlib_get_buffer (vm, bi[i]);
- b->next_buffer = bi[i + 1];
- b->flags |= VLIB_BUFFER_NEXT_PRESENT;
- b->current_data = -14;
- b->current_length = 14 + data_size;
+ tmpl[i].current_data = -14;
+ tmpl[i].current_length = 14 + data_size;
}
+ TEST (2 == test_chain (vm, tmpl, 2, 0, &randbuf, &rand),
+ "linearize chain with negative current data");
- (void) vlib_buffer_chain_linearize (vm, vlib_get_buffer (vm, bi[0]));
+ clib_memset (tmpl, 0xff, sizeof (tmpl));
+ tmpl[0].current_data = 12;
+ tmpl[0].current_length = data_size - 12;
+ tmpl[1].current_data = 0;
+ tmpl[1].current_length = 0;
+ TEST (1 == test_chain (vm, tmpl, 2, 0, &randbuf, &rand),
+ "linearize chain with empty next");
- return 0;
+ clib_memset (tmpl, 0xff, sizeof (tmpl));
+ tmpl[0].current_data = 0;
+ tmpl[0].current_length = data_size - 17;
+ tmpl[1].current_data = -5;
+ tmpl[1].current_length = 3;
+ tmpl[2].current_data = 17;
+ tmpl[2].current_length = 9;
+ tmpl[3].current_data = 3;
+ tmpl[3].current_length = 5;
+ TEST (1 == test_chain (vm, tmpl, 4, 0, &randbuf, &rand),
+ "linearize chain into a single buffer");
+
+ clib_memset (tmpl, 0xff, sizeof (tmpl));
+ tmpl[0].current_data = 0;
+ tmpl[0].current_length = data_size - 2;
+ tmpl[1].current_data = -VLIB_BUFFER_PRE_DATA_SIZE;
+ tmpl[1].current_length = 20;
+ tmpl[2].current_data = data_size - 10;
+ tmpl[2].current_length = 10;
+ tmpl[3].current_data = 0;
+ tmpl[3].current_length = data_size;
+ TEST (2 == test_chain (vm, tmpl, 4, data_size - 1, &randbuf, &rand),
+ "linearize cloned chain");
+
+ clib_memset (tmpl, 0xff, sizeof (tmpl));
+ for (i = 0; i < 100; i++)
+ {
+ u8 *r = clib_random_buffer_get_data (&randbuf, 1);
+ int n = clib_max (r[0] % ARRAY_LEN (tmpl), 1);
+ int j;
+ for (j = 0; j < n; j++)
+ {
+ r = clib_random_buffer_get_data (&randbuf, 3);
+ i16 current_data = (i16) r[0] - VLIB_BUFFER_PRE_DATA_SIZE;
+ u16 current_length = *(u16 *) (r + 1) % (data_size - current_data);
+ tmpl[j].current_data = current_data;
+ tmpl[j].current_length = current_length;
+ }
+ r = clib_random_buffer_get_data (&randbuf, 1);
+ TEST (
+ test_chain (vm, tmpl, n, r[0] > 250 ? r[0] % 128 : 0, &randbuf, &rand),
+ "linearize random chain %d", i);
+ }
+
+ ret = 1;
+err:
+ clib_random_buffer_free (&randbuf);
+ vec_free (rand);
+ return ret;
}
static clib_error_t *
@@ -65,22 +254,67 @@ test_linearize_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- if (linearize_negative_current_data (vm))
+ if (!linearize_test (vm))
{
- return clib_error_return (0, "linearize_negative_current_data failed");
+ return clib_error_return (0, "linearize test failed");
}
- return (NULL);
+ return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_linearize_command, static) =
{
.path = "test chained-buffer-linearization",
.short_help = "test chained-buffer-linearization",
.function = test_linearize_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+test_linearize_speed_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ /* typical 9000-bytes TCP jumbo frames */
+ const chained_buffer_template_t tmpl[5] = { { 14, 2034, 1 },
+ { 0, 2048, 1 },
+ { 0, 2048, 1 },
+ { 0, 2048, 1 },
+ { 0, 808, 1 } };
+ int i, j;
+
+ for (i = 0; i < 10; i++)
+ {
+ u64 tot = 0;
+ for (j = 0; j < 100000; j++)
+ {
+ vlib_buffer_t *b;
+ u32 bi;
+
+ if (!build_chain (vm, tmpl, 5, 0, 0, &b, &bi))
+ return clib_error_create ("build_chain() failed");
+
+ CLIB_COMPILER_BARRIER ();
+ u64 start = clib_cpu_time_now ();
+ CLIB_COMPILER_BARRIER ();
+
+ vlib_buffer_chain_linearize (vm, b);
+
+ CLIB_COMPILER_BARRIER ();
+ tot += clib_cpu_time_now () - start;
+ CLIB_COMPILER_BARRIER ();
+
+ vlib_buffer_free_one (vm, bi);
+ }
+ vlib_cli_output (vm, "%.03f ticks/call", (f64) tot / j);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (test_linearize_speed_command, static) = {
+ .path = "test chained-buffer-linearization speed",
+ .short_help = "test chained-buffer-linearization speed",
+ .function = test_linearize_speed_fn,
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/unittest.c b/src/plugins/unittest/unittest.c
index 60ebac130da..555516fc9de 100644
--- a/src/plugins/unittest/unittest.c
+++ b/src/plugins/unittest/unittest.c
@@ -19,14 +19,12 @@
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "C unit tests",
.default_disabled = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/util_test.c b/src/plugins/unittest/util_test.c
index 8dce270f814..53384e55494 100644
--- a/src/plugins/unittest/util_test.c
+++ b/src/plugins/unittest/util_test.c
@@ -22,13 +22,11 @@ test_crash_command_fn (vlib_main_t * vm,
{
u64 *p = (u64 *) 0xdefec8ed;
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "deliberate crash: touching %x",
.format_args = "i4",
};
- /* *INDENT-ON* */
elog (&vlib_global_main.elog_main, &e, 0xdefec8ed);
*p = 0xdeadbeef;
@@ -37,14 +35,12 @@ test_crash_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_crash_command, static) =
{
.path = "test crash",
.short_help = "crash the bus!",
.function = test_crash_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_hash_command_fn (vlib_main_t * vm,
@@ -98,14 +94,12 @@ test_hash_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_hash_command, static) =
{
.path = "test hash_memory",
.short_help = "page boundary crossing test",
.function = test_hash_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/vlib_test.c b/src/plugins/unittest/vlib_test.c
index 3154b7d21cf..c9c46901a4d 100644
--- a/src/plugins/unittest/vlib_test.c
+++ b/src/plugins/unittest/vlib_test.c
@@ -129,14 +129,12 @@ test_vlib_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_vlib_command, static) =
{
.path = "test vlib",
.short_help = "vlib code coverage unit test",
.function = test_vlib_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_format_vlib_command_fn (vlib_main_t * vm,
@@ -180,14 +178,12 @@ test_format_vlib_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_format_vlib_command, static) =
{
.path = "test format-vlib",
.short_help = "vlib format code coverate unit test",
.function = test_format_vlib_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_vlib2_command_fn (vlib_main_t * vm,
@@ -215,14 +211,12 @@ test_vlib2_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_vlib2_command, static) =
{
.path = "test vlib2",
.short_help = "vlib code coverage unit test #2",
.function = test_vlib2_command_fn,
};
-/* *INDENT-ON* */
diff --git a/src/plugins/urpf/ip4_urpf.c b/src/plugins/urpf/ip4_urpf.c
index 1d329029478..7cbf81c50c3 100644
--- a/src/plugins/urpf/ip4_urpf.c
+++ b/src/plugins/urpf/ip4_urpf.c
@@ -74,7 +74,6 @@ VLIB_NODE_FN (ip4_tx_urpf_strict) (vlib_main_t * vm,
return (urpf_inline (vm, node, frame, AF_IP4, VLIB_TX, URPF_MODE_STRICT));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_rx_urpf_loose) = {
.name = "ip4-rx-urpf-loose",
.vector_size = sizeof (u32),
@@ -160,7 +159,6 @@ VNET_FEATURE_INIT (ip4_tx_urpf_strict_feat, static) =
.arc_name = "ip4-output",
.node_name = "ip4-tx-urpf-strict",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/urpf/ip6_urpf.c b/src/plugins/urpf/ip6_urpf.c
index 48d991573b5..d278289b5e2 100644
--- a/src/plugins/urpf/ip6_urpf.c
+++ b/src/plugins/urpf/ip6_urpf.c
@@ -74,7 +74,6 @@ VLIB_NODE_FN (ip6_tx_urpf_strict) (vlib_main_t * vm,
return (urpf_inline (vm, node, frame, AF_IP6, VLIB_TX, URPF_MODE_STRICT));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_rx_urpf_loose) = {
.name = "ip6-rx-urpf-loose",
.vector_size = sizeof (u32),
@@ -160,7 +159,6 @@ VNET_FEATURE_INIT (ip6_tx_urpf_strict_feat, static) =
.arc_name = "ip6-output",
.node_name = "ip6-tx-urpf-strict",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/urpf/urpf.api b/src/plugins/urpf/urpf.api
index 944db08cc94..a4e897afd17 100644
--- a/src/plugins/urpf/urpf.api
+++ b/src/plugins/urpf/urpf.api
@@ -50,6 +50,51 @@ autoreply define urpf_update
vl_api_interface_index_t sw_if_index;
};
+/**
+ * @brief Enable uRPF on a given interface in a given direction
+ * @param client_index - opaque cookie to identify the sender
+ * @param context - sender context, to match reply w/ request
+ * @param mode - Mode
+ * @param af - Address Family
+ * @param sw_if_index - Interface
+ * @param is_input - Direction.
+ * @param table-id - Table ID
+ */
+autoreply define urpf_update_v2
+{
+ u32 client_index;
+ u32 context;
+ bool is_input[default = true];
+ vl_api_urpf_mode_t mode;
+ vl_api_address_family_t af;
+ vl_api_interface_index_t sw_if_index;
+ u32 table_id [default=0xffffffff];
+};
+
+/** @brief Dump uRPF enabled interface(s) in zero or more urpf_interface_details replies
+ @param client_index - opaque cookie to identify the sender
+ @param sw_if_index - sw_if_index of a specific interface, or -1 (default)
+ to return all uRPF enabled interfaces
+*/
+define urpf_interface_dump
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index [default=0xffffffff];
+};
+
+/** @brief uRPF enabled interface details
+*/
+define urpf_interface_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ bool is_input;
+ vl_api_urpf_mode_t mode;
+ vl_api_address_family_t af;
+ u32 table_id;
+};
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/urpf/urpf.c b/src/plugins/urpf/urpf.c
index 7e1986a4250..e5209caafb4 100644
--- a/src/plugins/urpf/urpf.c
+++ b/src/plugins/urpf/urpf.c
@@ -17,7 +17,6 @@
#include <vnet/fib/fib_table.h>
-/* *INDENT-OFF* */
static const char *urpf_feat_arcs[N_AF][VLIB_N_DIR] =
{
[AF_IP4] = {
@@ -53,12 +52,12 @@ static const char *urpf_feats[N_AF][VLIB_N_DIR][URPF_N_MODES] =
},
},
};
-/* *INDENT-ON* */
/**
* Per-af, per-direction, per-interface uRPF configs
*/
-static urpf_mode_t *urpf_cfgs[N_AF][VLIB_N_DIR];
+
+urpf_data_t *urpf_cfgs[N_AF][VLIB_N_DIR];
u8 *
format_urpf_mode (u8 * s, va_list * a)
@@ -95,34 +94,105 @@ unformat_urpf_mode (unformat_input_t * input, va_list * args)
return 0;
}
-void
-urpf_update (urpf_mode_t mode,
- u32 sw_if_index, ip_address_family_t af, vlib_dir_t dir)
+int
+urpf_update (urpf_mode_t mode, u32 sw_if_index, ip_address_family_t af,
+ vlib_dir_t dir, u32 table_id)
{
- urpf_mode_t old;
+ fib_protocol_t proto;
+ u32 fib_index;
+ if (table_id != ~0)
+ {
+ proto = ip_address_family_to_fib_proto (af);
+ fib_index = fib_table_find (proto, table_id);
+ if (fib_index == (~0))
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+ else
+ {
+ bool is_ip4 = (AF_IP4 == af);
+ u32 *fib_index_by_sw_if_index = is_ip4 ?
+ ip4_main.fib_index_by_sw_if_index :
+ ip6_main.fib_index_by_sw_if_index;
- vec_validate_init_empty (urpf_cfgs[af][dir], sw_if_index, URPF_MODE_OFF);
+ fib_index = fib_index_by_sw_if_index[sw_if_index];
+ }
+ urpf_data_t old;
+ urpf_mode_t off = URPF_MODE_OFF;
+ urpf_data_t empty = { .fib_index = 0, .mode = off };
+ vec_validate_init_empty (urpf_cfgs[af][dir], sw_if_index, empty);
old = urpf_cfgs[af][dir][sw_if_index];
- if (mode != old)
+ urpf_data_t data = { .fib_index = fib_index,
+ .mode = mode,
+ .fib_index_is_custom = (table_id != ~0) };
+ urpf_cfgs[af][dir][sw_if_index] = data;
+ if (data.mode != old.mode || data.fib_index != old.fib_index)
{
- if (URPF_MODE_OFF != old)
+ if (URPF_MODE_OFF != old.mode)
/* disable what we have */
vnet_feature_enable_disable (urpf_feat_arcs[af][dir],
- urpf_feats[af][dir][old],
+ urpf_feats[af][dir][old.mode],
sw_if_index, 0, 0, 0);
- if (URPF_MODE_OFF != mode)
+ if (URPF_MODE_OFF != data.mode)
/* enable what's new */
vnet_feature_enable_disable (urpf_feat_arcs[af][dir],
- urpf_feats[af][dir][mode],
+ urpf_feats[af][dir][data.mode],
sw_if_index, 1, 0, 0);
}
/* else - no change to existing config */
+ return 0;
+}
- urpf_cfgs[af][dir][sw_if_index] = mode;
+static void
+urpf_table_bind_v4 (ip4_main_t *im, uword opaque, u32 sw_if_index,
+ u32 new_fib_index, u32 old_fib_index)
+{
+ vlib_dir_t dir;
+ urpf_data_t empty = { .fib_index = 0, .mode = URPF_MODE_OFF };
+ FOREACH_VLIB_DIR (dir)
+ {
+ vec_validate_init_empty (urpf_cfgs[AF_IP4][dir], sw_if_index, empty);
+ if (!urpf_cfgs[AF_IP4][dir][sw_if_index].fib_index_is_custom)
+ {
+ urpf_cfgs[AF_IP4][dir][sw_if_index].fib_index = new_fib_index;
+ }
+ }
}
+static void
+urpf_table_bind_v6 (ip6_main_t *im, uword opaque, u32 sw_if_index,
+ u32 new_fib_index, u32 old_fib_index)
+{
+ vlib_dir_t dir;
+ urpf_data_t empty = { .fib_index = 0, .mode = URPF_MODE_OFF };
+ FOREACH_VLIB_DIR (dir)
+ {
+ vec_validate_init_empty (urpf_cfgs[AF_IP6][dir], sw_if_index, empty);
+ if (!urpf_cfgs[AF_IP6][dir][sw_if_index].fib_index_is_custom)
+ {
+ urpf_cfgs[AF_IP6][dir][sw_if_index].fib_index = new_fib_index;
+ }
+ }
+}
+
+static clib_error_t *
+urpf_init (vlib_main_t *vm)
+{
+ ip4_table_bind_callback_t cb4 = {
+ .function = urpf_table_bind_v4,
+ };
+ vec_add1 (ip4_main.table_bind_callbacks, cb4);
+
+ ip6_table_bind_callback_t cb6 = {
+ .function = urpf_table_bind_v6,
+ };
+ vec_add1 (ip6_main.table_bind_callbacks, cb6);
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (urpf_init);
+
static clib_error_t *
urpf_cli_update (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -134,11 +204,13 @@ urpf_cli_update (vlib_main_t * vm,
urpf_mode_t mode;
u32 sw_if_index;
vlib_dir_t dir;
+ u32 table_id;
sw_if_index = ~0;
af = AF_IP4;
dir = VLIB_RX;
mode = URPF_MODE_STRICT;
+ table_id = ~0;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -150,6 +222,8 @@ urpf_cli_update (vlib_main_t * vm,
;
else if (unformat (line_input, "%U", unformat_urpf_mode, &mode))
;
+ else if (unformat (line_input, "table %d", &table_id))
+ ;
else if (unformat (line_input, "%U", unformat_ip_address_family, &af))
;
else if (unformat (line_input, "%U", unformat_vlib_rx_tx, &dir))
@@ -168,7 +242,13 @@ urpf_cli_update (vlib_main_t * vm,
goto done;
}
- urpf_update (mode, sw_if_index, af, dir);
+ int rv = 0;
+ rv = urpf_update (mode, sw_if_index, af, dir, table_id);
+ if (rv)
+ {
+ error = clib_error_return (0, "unknown table id");
+ goto done;
+ }
done:
unformat_free (line_input);
@@ -196,12 +276,12 @@ done:
*
* Example of graph node after range checking is enabled:
* @cliexstart{show vlib graph ip4-rx-urpf-loose}
- * Name Next Previous
- * ip4-rx-urpf-loose ip4-drop [0] ip4-input-no-checksum
- * ip4-source-and-port-range- ip4-input
+ * Name Next Previous
+ * ip4-rx-urpf-loose ip4-drop [0] ip4-input-no-checksum
+ * ip4-source-and-port-range- ip4-input
* @cliexend
*
- * Example of how to display the feature enabed on an interface:
+ * Example of how to display the feature enabled on an interface:
* @cliexstart{show ip interface features GigabitEthernet2/0/0}
* IP feature paths configured on GigabitEthernet2/0/0...
*
@@ -229,13 +309,12 @@ done:
* @cliexcmd{set urpf ip4 off GigabitEthernet2/0/0}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_source_check_command, static) = {
.path = "set urpf",
.function = urpf_cli_update,
- .short_help = "set urpf [ip4|ip6] [rx|tx] [off|strict|loose] <INTERFACE>",
+ .short_help = "set urpf [ip4|ip6] [rx|tx] [off|strict|loose] "
+ "<INTERFACE> [table <table>]",
};
-/* *INDENT-ON* */
static clib_error_t *
urpf_cli_accept (vlib_main_t * vm,
@@ -306,13 +385,11 @@ done:
* loose RPF tests:
* @cliexcmd{set urpf-accept table 7 10.0.0.0/8 add}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (urpf_accept_command, static) = {
.path = "set urpf-accept",
.function = urpf_cli_accept,
.short_help = "urpf-accept [table <table-id>] [add|del] <PREFIX>",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/urpf/urpf.h b/src/plugins/urpf/urpf.h
index 941cda25f4b..6983a2b440c 100644
--- a/src/plugins/urpf/urpf.h
+++ b/src/plugins/urpf/urpf.h
@@ -18,10 +18,10 @@
#include <vnet/ip/ip_types.h>
-#define foreach_urpf_mode \
- _(OFF, "off") \
- _(LOOSE, "loose") \
- _(STRICT, "strict") \
+#define foreach_urpf_mode \
+ _ (OFF, "off") \
+ _ (LOOSE, "loose") \
+ _ (STRICT, "strict")
typedef enum urpf_mode_t_
{
@@ -34,10 +34,17 @@ typedef enum urpf_mode_t_
extern u8 *format_urpf_mode (u8 * s, va_list * a);
-extern void urpf_update (urpf_mode_t mode,
- u32 sw_if_index,
- ip_address_family_t af, vlib_dir_t dir);
+typedef struct
+{
+ urpf_mode_t mode;
+ u32 fib_index;
+ u8 fib_index_is_custom;
+} urpf_data_t;
+
+extern urpf_data_t *urpf_cfgs[N_AF][VLIB_N_DIR];
+extern int urpf_update (urpf_mode_t mode, u32 sw_if_index,
+ ip_address_family_t af, vlib_dir_t dir, u32 table_id);
#endif
diff --git a/src/plugins/urpf/urpf_api.c b/src/plugins/urpf/urpf_api.c
index ad060399347..3d0f4b4e8d4 100644
--- a/src/plugins/urpf/urpf_api.c
+++ b/src/plugins/urpf/urpf_api.c
@@ -26,6 +26,8 @@
#include <vnet/format_fns.h>
#include <urpf/urpf.api_enum.h>
#include <urpf/urpf.api_types.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/ip/ip_types.h>
/**
* Base message ID fot the plugin
@@ -62,7 +64,34 @@ vl_api_urpf_update_t_handler (vl_api_urpf_update_t * mp)
VALIDATE_SW_IF_INDEX (mp);
rv = urpf_mode_decode (mp->mode, &mode);
+ if (rv)
+ goto done;
+ rv = ip_address_family_decode (mp->af, &af);
+ if (rv)
+ goto done;
+
+ rv = urpf_update (mode, htonl (mp->sw_if_index), af,
+ (mp->is_input ? VLIB_RX : VLIB_TX), 0);
+ if (rv)
+ goto done;
+
+ BAD_SW_IF_INDEX_LABEL;
+done:
+ REPLY_MACRO (VL_API_URPF_UPDATE_REPLY);
+}
+
+static void
+vl_api_urpf_update_v2_t_handler (vl_api_urpf_update_v2_t *mp)
+{
+ vl_api_urpf_update_reply_t *rmp;
+ ip_address_family_t af;
+ urpf_mode_t mode;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = urpf_mode_decode (mp->mode, &mode);
if (rv)
goto done;
@@ -71,12 +100,85 @@ vl_api_urpf_update_t_handler (vl_api_urpf_update_t * mp)
if (rv)
goto done;
- urpf_update (mode, htonl (mp->sw_if_index), af,
- (mp->is_input ? VLIB_RX : VLIB_TX));
+ rv = urpf_update (mode, htonl (mp->sw_if_index), af,
+ (mp->is_input ? VLIB_RX : VLIB_TX), ntohl (mp->table_id));
+
+ if (rv)
+ goto done;
BAD_SW_IF_INDEX_LABEL;
done:
- REPLY_MACRO (VL_API_URPF_UPDATE_REPLY);
+ REPLY_MACRO (VL_API_URPF_UPDATE_V2_REPLY);
+}
+
+static void
+send_urpf_interface_details (vpe_api_main_t *am, vl_api_registration_t *reg,
+ u32 context, const u32 sw_if_index,
+ const urpf_data_t *ud,
+ const ip_address_family_t af,
+ const vlib_dir_t dir)
+{
+ vl_api_urpf_interface_details_t *mp;
+
+ mp = vl_msg_api_alloc_zero (sizeof (*mp));
+ mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_URPF_INTERFACE_DETAILS);
+ mp->context = context;
+
+ mp->sw_if_index = htonl (sw_if_index);
+ mp->table_id = htonl (fib_table_get_table_id (
+ ud->fib_index, (af == AF_IP4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6)));
+ mp->af = (vl_api_address_family_t) af;
+ mp->mode = (vl_api_urpf_mode_t) ud->mode;
+ mp->is_input = (dir == VLIB_RX);
+
+ vl_api_send_msg (reg, (u8 *) mp);
+}
+
+static void
+send_urpf_interface (vpe_api_main_t *am, vl_api_registration_t *reg,
+ u32 context, const u32 sw_if_index)
+{
+ urpf_data_t *ud;
+ vlib_dir_t dir;
+ ip_address_family_t af;
+
+ FOR_EACH_IP_ADDRESS_FAMILY (af)
+ FOREACH_VLIB_DIR (dir)
+ if (sw_if_index < vec_len (urpf_cfgs[af][dir]))
+ {
+ ud = &urpf_cfgs[af][dir][sw_if_index];
+ if (ud->mode || ud->fib_index_is_custom)
+ send_urpf_interface_details (am, reg, context, sw_if_index, ud, af,
+ dir);
+ }
+}
+
+static void
+vl_api_urpf_interface_dump_t_handler (vl_api_urpf_interface_dump_t *mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ vl_api_registration_t *reg;
+ vnet_interface_main_t *im = &vnet_main.interface_main;
+ vnet_sw_interface_t *si;
+ u32 sw_if_index = ~0;
+ int __attribute__ ((unused)) rv = 0;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (sw_if_index == ~0)
+ {
+ pool_foreach (si, im->sw_interfaces)
+ {
+ send_urpf_interface (am, reg, mp->context, si->sw_if_index);
+ }
+ return;
+ }
+ VALIDATE_SW_IF_INDEX (mp);
+ send_urpf_interface (am, reg, mp->context, sw_if_index);
+ BAD_SW_IF_INDEX_LABEL;
}
#include <urpf/urpf.api.c>
@@ -92,12 +194,10 @@ urpf_api_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (urpf_api_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Unicast Reverse Path Forwarding (uRPF)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/urpf/urpf_dp.h b/src/plugins/urpf/urpf_dp.h
index bfe1f659171..816d8b70b90 100644
--- a/src/plugins/urpf/urpf_dp.h
+++ b/src/plugins/urpf/urpf_dp.h
@@ -128,6 +128,11 @@ urpf_inline (vlib_main_t * vm,
h1 += vnet_buffer (b[1])->ip.save_rewrite_length;
}
+ fib_index0 =
+ urpf_cfgs[af][dir][vnet_buffer (b[0])->sw_if_index[dir]].fib_index;
+ fib_index1 =
+ urpf_cfgs[af][dir][vnet_buffer (b[1])->sw_if_index[dir]].fib_index;
+
if (AF_IP4 == af)
{
const ip4_header_t *ip0, *ip1;
@@ -135,11 +140,6 @@ urpf_inline (vlib_main_t * vm,
ip0 = (ip4_header_t *) h0;
ip1 = (ip4_header_t *) h1;
- fib_index0 = ip4_main.fib_index_by_sw_if_index
- [vnet_buffer (b[0])->sw_if_index[dir]];
- fib_index1 = ip4_main.fib_index_by_sw_if_index
- [vnet_buffer (b[1])->sw_if_index[dir]];
-
ip4_fib_forwarding_lookup_x2 (fib_index0,
fib_index1,
&ip0->src_address,
@@ -155,11 +155,6 @@ urpf_inline (vlib_main_t * vm,
{
const ip6_header_t *ip0, *ip1;
- fib_index0 = ip6_main.fib_index_by_sw_if_index
- [vnet_buffer (b[0])->sw_if_index[dir]];
- fib_index1 = ip6_main.fib_index_by_sw_if_index
- [vnet_buffer (b[1])->sw_if_index[dir]];
-
ip0 = (ip6_header_t *) h0;
ip1 = (ip6_header_t *) h1;
@@ -255,12 +250,13 @@ urpf_inline (vlib_main_t * vm,
if (VLIB_TX == dir)
h0 += vnet_buffer (b[0])->ip.save_rewrite_length;
+ fib_index0 =
+ urpf_cfgs[af][dir][vnet_buffer (b[0])->sw_if_index[dir]].fib_index;
+
if (AF_IP4 == af)
{
const ip4_header_t *ip0;
- fib_index0 = ip4_main.fib_index_by_sw_if_index
- [vnet_buffer (b[0])->sw_if_index[dir]];
ip0 = (ip4_header_t *) h0;
lb_index0 = ip4_fib_forwarding_lookup (fib_index0,
@@ -275,8 +271,6 @@ urpf_inline (vlib_main_t * vm,
const ip6_header_t *ip0;
ip0 = (ip6_header_t *) h0;
- fib_index0 = ip6_main.fib_index_by_sw_if_index
- [vnet_buffer (b[0])->sw_if_index[dir]];
lb_index0 = ip6_fib_table_fwding_lookup (fib_index0,
&ip0->src_address);
diff --git a/src/plugins/vhost/CMakeLists.txt b/src/plugins/vhost/CMakeLists.txt
new file mode 100644
index 00000000000..6b86c8c98d1
--- /dev/null
+++ b/src/plugins/vhost/CMakeLists.txt
@@ -0,0 +1,34 @@
+# Copyright (c) 2020 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(vhost
+ SOURCES
+ plugin.c
+ vhost_user.c
+ vhost_user_api.c
+ vhost_user_input.c
+ vhost_user_output.c
+ vhost_std.h
+ vhost_user.h
+ vhost_user_inline.h
+ virtio_std.h
+
+ MULTIARCH_SOURCES
+ vhost_user_input.c
+ vhost_user_output.c
+
+ API_FILES
+ vhost_user.api
+
+ SUPPORTED_OS_LIST Linux
+)
diff --git a/src/plugins/vhost/FEATURE.yaml b/src/plugins/vhost/FEATURE.yaml
new file mode 100644
index 00000000000..7104dda1dc5
--- /dev/null
+++ b/src/plugins/vhost/FEATURE.yaml
@@ -0,0 +1,13 @@
+---
+name: Vhost-user Device Driver
+maintainer: sluong@cisco.com
+features:
+ - Device mode to emulate vhost-user interface presented to VPP from the
+ guest VM.
+ - Support virtio 1.0 in virtio
+ - Support virtio 1.1 packed ring in virtio [experimental]
+ - Support multi-queue, GSO, checksum offload, indirect descriptor,
+ jumbo frame, and packed ring.
+description: "Vhost-user implementation"
+state: production
+properties: [API, CLI, STATS, MULTITHREAD]
diff --git a/src/plugins/vhost/plugin.c b/src/plugins/vhost/plugin.c
new file mode 100644
index 00000000000..0e6158ba7d8
--- /dev/null
+++ b/src/plugins/vhost/plugin.c
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Vhost-User",
+};
diff --git a/src/plugins/vhost/vhost_std.h b/src/plugins/vhost/vhost_std.h
new file mode 100644
index 00000000000..7799093bac3
--- /dev/null
+++ b/src/plugins/vhost/vhost_std.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __VHOST_STD_H__
+#define __VHOST_STD_H__
+
+typedef struct
+{
+ u64 guest_phys_addr;
+ u64 memory_size;
+ u64 userspace_addr;
+ u64 mmap_offset;
+} vhost_memory_region_t;
+
+typedef struct
+{
+ u32 nregions;
+ u32 padding;
+ vhost_memory_region_t regions[0];
+} vhost_memory_t;
+
+typedef struct
+{
+ u32 index;
+ u32 num;
+} vhost_vring_state_t;
+
+typedef struct
+{
+ u32 index;
+ int fd;
+} vhost_vring_file_t;
+
+typedef struct
+{
+ u32 index;
+ u32 flags;
+ u64 desc_user_addr;
+ u64 used_user_addr;
+ u64 avail_user_addr;
+ u64 log_guest_addr;
+} vhost_vring_addr_t;
+
+typedef struct
+{
+ u64 size;
+ u64 offset;
+} vhost_user_log_t;
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/vhost/vhost_user.api b/src/plugins/vhost/vhost_user.api
new file mode 100644
index 00000000000..b026ba768a9
--- /dev/null
+++ b/src/plugins/vhost/vhost_user.api
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "4.1.1";
+
+import "vnet/interface_types.api";
+import "vnet/ethernet/ethernet_types.api";
+import "vnet/devices/virtio/virtio_types.api";
+
+/** \brief vhost-user interface create request
+ @param client_index - opaque cookie to identify the sender
+ @param is_server - our side is socket server
+ @param sock_filename - unix socket filename, used to speak with frontend
+ @param use_custom_mac - enable or disable the use of the provided hardware address
+ @param disable_mrg_rxbuf - disable the use of merge receive buffers
+ @param disable_indirect_desc - disable the use of indirect descriptors which driver can use
+ @param enable_gso - enable gso support (default 0)
+ @param enable_packed - enable packed ring support (default 0)
+ @param mac_address - hardware address to use if 'use_custom_mac' is set
+*/
+define create_vhost_user_if
+{
+ option deprecated;
+ u32 client_index;
+ u32 context;
+ bool is_server;
+ string sock_filename[256];
+ bool renumber;
+ bool disable_mrg_rxbuf;
+ bool disable_indirect_desc;
+ bool enable_gso;
+ bool enable_packed;
+ u32 custom_dev_instance;
+ bool use_custom_mac;
+ vl_api_mac_address_t mac_address;
+ string tag[64];
+};
+
+/** \brief vhost-user interface create response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param sw_if_index - interface the operation is applied to
+*/
+define create_vhost_user_if_reply
+{
+ option deprecated;
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief vhost-user interface modify request
+ @param client_index - opaque cookie to identify the sender
+ @param is_server - our side is socket server
+ @param sock_filename - unix socket filename, used to speak with frontend
+ @param enable_gso - enable gso support (default 0)
+ @param enable_packed - enable packed ring support (default 0)
+*/
+autoreply define modify_vhost_user_if
+{
+ option deprecated;
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ bool is_server;
+ string sock_filename[256];
+ bool renumber;
+ bool enable_gso;
+ bool enable_packed;
+ u32 custom_dev_instance;
+};
+
+/** \brief vhost-user interface create request
+ @param client_index - opaque cookie to identify the sender
+ @param is_server - our side is socket server
+ @param sock_filename - unix socket filename, used to speak with frontend
+ @param use_custom_mac - enable or disable the use of the provided hardware address
+ @param disable_mrg_rxbuf - disable the use of merge receive buffers
+ @param disable_indirect_desc - disable the use of indirect descriptors which driver can use
+ @param enable_gso - enable gso support (default 0)
+ @param enable_packed - enable packed ring support (default 0)
+ @param enable_event_idx - enable event_idx support (default 0)
+ @param mac_address - hardware address to use if 'use_custom_mac' is set
+ @param renumber - if true, use custom_dev_instance is valid
+ @param custom_dev_instance - custom device instance number
+*/
+define create_vhost_user_if_v2
+{
+ u32 client_index;
+ u32 context;
+ bool is_server;
+ string sock_filename[256];
+ bool renumber;
+ bool disable_mrg_rxbuf;
+ bool disable_indirect_desc;
+ bool enable_gso;
+ bool enable_packed;
+ bool enable_event_idx;
+ u32 custom_dev_instance;
+ bool use_custom_mac;
+ vl_api_mac_address_t mac_address;
+ string tag[64];
+};
+
+/** \brief vhost-user interface create response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param sw_if_index - interface the operation is applied to
+*/
+define create_vhost_user_if_v2_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief vhost-user interface modify request
+ @param client_index - opaque cookie to identify the sender
+ @param is_server - our side is socket server
+ @param sock_filename - unix socket filename, used to speak with frontend
+ @param enable_gso - enable gso support (default 0)
+ @param enable_packed - enable packed ring support (default 0)
+ @param enable_event_idx - enable event idx support (default 0)
+ @param renumber - if true, use custom_dev_instance is valid
+ @param custom_dev_instance - custom device instance number
+*/
+autoreply define modify_vhost_user_if_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ bool is_server;
+ string sock_filename[256];
+ bool renumber;
+ bool enable_gso;
+ bool enable_packed;
+ bool enable_event_idx;
+ u32 custom_dev_instance;
+};
+
+/** \brief vhost-user interface delete request
+ @param client_index - opaque cookie to identify the sender
+*/
+autoreply define delete_vhost_user_if
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief Vhost-user interface details structure (fix this)
+ @param sw_if_index - index of the interface
+ @param interface_name - name of interface
+ @param virtio_net_hdr_sz - net header size
+ @param features_first_32 - interface features, first 32 bits
+ @param features_last_32 - interface features, last 32 bits
+ @param is_server - vhost-user server socket
+ @param sock_filename - socket filename
+ @param num_regions - number of used memory regions
+ @param sock_errno - socket errno
+*/
+define sw_interface_vhost_user_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ string interface_name[64];
+ u32 virtio_net_hdr_sz;
+ vl_api_virtio_net_features_first_32_t features_first_32;
+ vl_api_virtio_net_features_last_32_t features_last_32;
+ bool is_server;
+ string sock_filename[256];
+ u32 num_regions;
+ i32 sock_errno;
+};
+
+/** \brief Vhost-user interface dump request
+ @param sw_if_index - filter by sw_if_index
+*/
+define sw_interface_vhost_user_dump
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index [default=0xffffffff];
+};
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/vhost/vhost_user.c b/src/plugins/vhost/vhost_user.c
new file mode 100644
index 00000000000..fdee984f97b
--- /dev/null
+++ b/src/plugins/vhost/vhost_user.c
@@ -0,0 +1,2594 @@
+/*
+ *------------------------------------------------------------------
+ * vhost.c - vhost-user
+ *
+ * Copyright (c) 2014-2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <fcntl.h> /* for open */
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h> /* for iovec */
+#include <netinet/in.h>
+#include <sys/vfs.h>
+
+#include <linux/if_arp.h>
+#include <linux/if_tun.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/devices.h>
+#include <vnet/feature/feature.h>
+#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
+
+#include <vhost/vhost_user.h>
+#include <vhost/vhost_user_inline.h>
+
+/**
+ * @file
+ * @brief vHost User Device Driver.
+ *
+ * This file contains the source code for vHost User interface.
+ */
+
+
+vlib_node_registration_t vhost_user_send_interrupt_node;
+
+vhost_user_main_t vhost_user_main = {
+ .mtu_bytes = 1518,
+};
+
+VNET_HW_INTERFACE_CLASS (vhost_interface_class, static) = {
+ .name = "vhost-user",
+};
+
+static long
+get_huge_page_size (int fd)
+{
+ struct statfs s;
+ fstatfs (fd, &s);
+ return s.f_bsize;
+}
+
+static void
+unmap_all_mem_regions (vhost_user_intf_t * vui)
+{
+ int i, r, q;
+ vhost_user_vring_t *vq;
+
+ for (i = 0; i < vui->nregions; i++)
+ {
+ if (vui->region_mmap_addr[i] != MAP_FAILED)
+ {
+
+ long page_sz = get_huge_page_size (vui->region_mmap_fd[i]);
+
+ ssize_t map_sz = (vui->regions[i].memory_size +
+ vui->regions[i].mmap_offset +
+ page_sz - 1) & ~(page_sz - 1);
+
+ r =
+ munmap (vui->region_mmap_addr[i] - vui->regions[i].mmap_offset,
+ map_sz);
+
+ vu_log_debug (vui, "unmap memory region %d addr 0x%lx len 0x%lx "
+ "page_sz 0x%x", i, vui->region_mmap_addr[i], map_sz,
+ page_sz);
+
+ vui->region_mmap_addr[i] = MAP_FAILED;
+
+ if (r == -1)
+ {
+ vu_log_err (vui, "failed to unmap memory region (errno %d)",
+ errno);
+ }
+ close (vui->region_mmap_fd[i]);
+ }
+ }
+ vui->nregions = 0;
+
+ FOR_ALL_VHOST_RX_TXQ (q, vui)
+ {
+ vq = &vui->vrings[q];
+ vq->avail = 0;
+ vq->used = 0;
+ vq->desc = 0;
+ }
+}
+
+static_always_inline void
+vhost_user_tx_thread_placement (vhost_user_intf_t *vui, u32 qid)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vhost_user_vring_t *rxvq = &vui->vrings[qid];
+ u32 q = qid >> 1, rxvq_count;
+
+ ASSERT ((qid & 1) == 0);
+ if (!rxvq->started || !rxvq->enabled)
+ return;
+
+ rxvq_count = (qid >> 1) + 1;
+ if (rxvq->queue_index == ~0)
+ {
+ rxvq->queue_index =
+ vnet_hw_if_register_tx_queue (vnm, vui->hw_if_index, q);
+ rxvq->qid = q;
+ }
+
+ FOR_ALL_VHOST_RXQ (q, vui)
+ {
+ vhost_user_vring_t *rxvq = &vui->vrings[q];
+ u32 qi = rxvq->queue_index;
+
+ if (rxvq->queue_index == ~0)
+ break;
+ for (u32 i = 0; i < vlib_get_n_threads (); i++)
+ vnet_hw_if_tx_queue_unassign_thread (vnm, qi, i);
+ }
+
+ for (u32 i = 0; i < vlib_get_n_threads (); i++)
+ {
+ vhost_user_vring_t *rxvq =
+ &vui->vrings[VHOST_VRING_IDX_RX (i % rxvq_count)];
+ u32 qi = rxvq->queue_index;
+
+ vnet_hw_if_tx_queue_assign_thread (vnm, qi, i);
+ }
+
+ vnet_hw_if_update_runtime_data (vnm, vui->hw_if_index);
+}
+
+/**
+ * @brief Unassign existing interface/queue to thread mappings and re-assign
+ * new interface/queue to thread mappings
+ */
+static_always_inline void
+vhost_user_rx_thread_placement (vhost_user_intf_t * vui, u32 qid)
+{
+ vhost_user_vring_t *txvq = &vui->vrings[qid];
+ vnet_main_t *vnm = vnet_get_main ();
+ int rv;
+ u32 q = qid >> 1;
+ vhost_user_main_t *vum = &vhost_user_main;
+
+ ASSERT ((qid & 1) == 1); // should be odd
+ // Assign new queue mappings for the interface
+ if (txvq->queue_index != ~0)
+ return;
+ vnet_hw_if_set_input_node (vnm, vui->hw_if_index,
+ vhost_user_input_node.index);
+ txvq->queue_index = vnet_hw_if_register_rx_queue (vnm, vui->hw_if_index, q,
+ VNET_HW_IF_RXQ_THREAD_ANY);
+ txvq->thread_index =
+ vnet_hw_if_get_rx_queue_thread_index (vnm, txvq->queue_index);
+
+ if (txvq->mode == VNET_HW_IF_RX_MODE_UNKNOWN)
+ /* Set polling as the default */
+ txvq->mode = VNET_HW_IF_RX_MODE_POLLING;
+ if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
+ {
+ vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
+ /* Keep a polling queue count for each thread */
+ cpu->polling_q_count++;
+ }
+ txvq->qid = q;
+ rv = vnet_hw_if_set_rx_queue_mode (vnm, txvq->queue_index, txvq->mode);
+ if (rv)
+ vu_log_warn (vui, "unable to set rx mode for interface %d, "
+ "queue %d: rc=%d", vui->hw_if_index, q, rv);
+ vnet_hw_if_update_runtime_data (vnm, vui->hw_if_index);
+}
+
+/** @brief Returns whether at least one TX and one RX vring are enabled */
+static_always_inline int
+vhost_user_intf_ready (vhost_user_intf_t * vui)
+{
+ int i, found[2] = { }; //RX + TX
+
+ for (i = 0; i < vui->num_qid; i++)
+ if (vui->vrings[i].started && vui->vrings[i].enabled)
+ found[i & 1] = 1;
+
+ return found[0] && found[1];
+}
+
+static_always_inline void
+vhost_user_update_iface_state (vhost_user_intf_t * vui)
+{
+ /* if we have pointers to descriptor table, go up */
+ int is_ready = vhost_user_intf_ready (vui);
+ if (is_ready != vui->is_ready)
+ {
+ vu_log_debug (vui, "interface %d %s", vui->sw_if_index,
+ is_ready ? "ready" : "down");
+ if (vui->admin_up)
+ vnet_hw_interface_set_flags (vnet_get_main (), vui->hw_if_index,
+ is_ready ? VNET_HW_INTERFACE_FLAG_LINK_UP
+ : 0);
+ vui->is_ready = is_ready;
+ }
+}
+
+static clib_error_t *
+vhost_user_callfd_read_ready (clib_file_t * uf)
+{
+ __attribute__ ((unused)) int n;
+ u8 buff[8];
+
+ n = read (uf->file_descriptor, ((char *) &buff), 8);
+
+ return 0;
+}
+
+static_always_inline void
+vhost_user_thread_placement (vhost_user_intf_t * vui, u32 qid)
+{
+ if (qid & 1) // RX is odd, TX is even
+ {
+ if (vui->vrings[qid].queue_index == ~0)
+ vhost_user_rx_thread_placement (vui, qid);
+ }
+ else
+ vhost_user_tx_thread_placement (vui, qid);
+}
+
+static clib_error_t *
+vhost_user_kickfd_read_ready (clib_file_t * uf)
+{
+ __attribute__ ((unused)) ssize_t n;
+ u8 buff[8];
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui =
+ pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data >> 8);
+ u32 qid = uf->private_data & 0xff;
+ u32 is_txq = qid & 1;
+ vhost_user_vring_t *vq = &vui->vrings[qid];
+ vnet_main_t *vnm = vnet_get_main ();
+
+ n = read (uf->file_descriptor, buff, 8);
+ if (vq->started == 0)
+ {
+ vq->started = 1;
+ vhost_user_thread_placement (vui, qid);
+ vhost_user_update_iface_state (vui);
+ if (is_txq)
+ vnet_hw_if_set_rx_queue_file_index (vnm, vq->queue_index,
+ vq->kickfd_idx);
+ }
+
+ if (is_txq && (vq->mode != VNET_HW_IF_RX_MODE_POLLING) &&
+ vhost_user_intf_ready (vui))
+ {
+ vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, vq->thread_index);
+ /*
+ * If the thread has more than 1 queue and the other queue is in polling
+ * mode, there is no need to trigger an interrupt
+ */
+ if (cpu->polling_q_count == 0)
+ vnet_hw_if_rx_queue_set_int_pending (vnm, vq->queue_index);
+ }
+
+ return 0;
+}
+
+static_always_inline void
+vhost_user_vring_init (vhost_user_intf_t * vui, u32 qid)
+{
+ vhost_user_vring_t *vring = &vui->vrings[qid];
+
+ clib_memset (vring, 0, sizeof (*vring));
+ vring->kickfd_idx = ~0;
+ vring->callfd_idx = ~0;
+ vring->errfd = -1;
+ vring->qid = -1;
+ vring->queue_index = ~0;
+ vring->thread_index = ~0;
+ vring->mode = VNET_HW_IF_RX_MODE_POLLING;
+
+ clib_spinlock_init (&vring->vring_lock);
+
+ /*
+ * We have a bug with some qemu 2.5, and this may be a fix.
+ * Feel like interpretation holy text, but this is from vhost-user.txt.
+ * "
+ * One queue pair is enabled initially. More queues are enabled
+ * dynamically, by sending message VHOST_USER_SET_VRING_ENABLE.
+ * "
+ * Don't know who's right, but this is what DPDK does.
+ */
+ if (qid == 0 || qid == 1)
+ vring->enabled = 1;
+}
+
+static_always_inline void
+vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid)
+{
+ vhost_user_vring_t *vring = &vui->vrings[qid];
+
+ if (vring->kickfd_idx != ~0)
+ {
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
+ vring->kickfd_idx);
+ clib_file_del (&file_main, uf);
+ vring->kickfd_idx = ~0;
+ }
+ if (vring->callfd_idx != ~0)
+ {
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
+ vring->callfd_idx);
+ clib_file_del (&file_main, uf);
+ vring->callfd_idx = ~0;
+ }
+ if (vring->errfd != -1)
+ {
+ close (vring->errfd);
+ vring->errfd = -1;
+ }
+
+ clib_spinlock_free (&vring->vring_lock);
+
+ // save the needed information in vrings prior to being wiped out
+ u16 q = vui->vrings[qid].qid;
+ u32 queue_index = vui->vrings[qid].queue_index;
+ u32 mode = vui->vrings[qid].mode;
+ u32 thread_index = vui->vrings[qid].thread_index;
+ vhost_user_vring_init (vui, qid);
+ vui->vrings[qid].qid = q;
+ vui->vrings[qid].queue_index = queue_index;
+ vui->vrings[qid].mode = mode;
+ vui->vrings[qid].thread_index = thread_index;
+}
+
+static_always_inline void
+vhost_user_if_disconnect (vhost_user_intf_t * vui)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ int q;
+
+ vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
+
+ if (vui->clib_file_index != ~0)
+ {
+ clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index);
+ vui->clib_file_index = ~0;
+ }
+
+ vui->is_ready = 0;
+
+ FOR_ALL_VHOST_RX_TXQ (q, vui) { vhost_user_vring_close (vui, q); }
+
+ unmap_all_mem_regions (vui);
+ vu_log_debug (vui, "interface ifindex %d disconnected", vui->sw_if_index);
+}
+
+void
+vhost_user_set_operation_mode (vhost_user_intf_t *vui,
+ vhost_user_vring_t *txvq)
+{
+ if (vhost_user_is_packed_ring_supported (vui))
+ {
+ if (txvq->used_event)
+ {
+ if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
+ txvq->used_event->flags = VRING_EVENT_F_DISABLE;
+ else
+ txvq->used_event->flags = 0;
+ }
+ }
+ else
+ {
+ if (txvq->used)
+ {
+ if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
+ txvq->used->flags = VRING_USED_F_NO_NOTIFY;
+ else
+ txvq->used->flags = 0;
+ }
+ }
+}
+
+static clib_error_t *
+vhost_user_socket_read (clib_file_t * uf)
+{
+ int n, i, j;
+ int fd, number_of_fds = 0;
+ int fds[VHOST_MEMORY_MAX_NREGIONS];
+ vhost_user_msg_t msg;
+ struct msghdr mh;
+ struct iovec iov[1];
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+ struct cmsghdr *cmsg;
+ u8 q;
+ clib_file_t template = { 0 };
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+
+ vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
+
+ char control[CMSG_SPACE (VHOST_MEMORY_MAX_NREGIONS * sizeof (int))];
+
+ clib_memset (&mh, 0, sizeof (mh));
+ clib_memset (control, 0, sizeof (control));
+
+ for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++)
+ fds[i] = -1;
+
+ /* set the payload */
+ iov[0].iov_base = (void *) &msg;
+ iov[0].iov_len = VHOST_USER_MSG_HDR_SZ;
+
+ mh.msg_iov = iov;
+ mh.msg_iovlen = 1;
+ mh.msg_control = control;
+ mh.msg_controllen = sizeof (control);
+
+ n = recvmsg (uf->file_descriptor, &mh, 0);
+
+ if (n != VHOST_USER_MSG_HDR_SZ)
+ {
+ if (n == -1)
+ {
+ vu_log_debug (vui, "recvmsg returned error %d %s", errno,
+ strerror (errno));
+ }
+ else
+ {
+ vu_log_debug (vui, "n (%d) != VHOST_USER_MSG_HDR_SZ (%d)",
+ n, VHOST_USER_MSG_HDR_SZ);
+ }
+ goto close_socket;
+ }
+
+ if (mh.msg_flags & MSG_CTRUNC)
+ {
+ vu_log_debug (vui, "MSG_CTRUNC is set");
+ goto close_socket;
+ }
+
+ cmsg = CMSG_FIRSTHDR (&mh);
+
+ if (cmsg && (cmsg->cmsg_len > 0) && (cmsg->cmsg_level == SOL_SOCKET) &&
+ (cmsg->cmsg_type == SCM_RIGHTS) &&
+ (cmsg->cmsg_len - CMSG_LEN (0) <=
+ VHOST_MEMORY_MAX_NREGIONS * sizeof (int)))
+ {
+ number_of_fds = (cmsg->cmsg_len - CMSG_LEN (0)) / sizeof (int);
+ clib_memcpy_fast (fds, CMSG_DATA (cmsg), number_of_fds * sizeof (int));
+ }
+
+ /* version 1, no reply bit set */
+ if ((msg.flags & 7) != 1)
+ {
+ vu_log_debug (vui, "malformed message received. closing socket");
+ goto close_socket;
+ }
+
+ {
+ int rv;
+ rv =
+ read (uf->file_descriptor, ((char *) &msg) + VHOST_USER_MSG_HDR_SZ,
+ msg.size);
+ if (rv < 0)
+ {
+ vu_log_debug (vui, "read failed %s", strerror (errno));
+ goto close_socket;
+ }
+ else if (rv != msg.size)
+ {
+ vu_log_debug (vui, "message too short (read %dB should be %dB)", rv,
+ msg.size);
+ goto close_socket;
+ }
+ }
+
+ switch (msg.request)
+ {
+ case VHOST_USER_GET_FEATURES:
+ msg.flags |= 4;
+ msg.u64 = VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) |
+ VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ) |
+ VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT) |
+ VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC) |
+ VIRTIO_FEATURE (VHOST_F_LOG_ALL) |
+ VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_ANNOUNCE) |
+ VIRTIO_FEATURE (VIRTIO_NET_F_MQ) |
+ VIRTIO_FEATURE (VHOST_USER_F_PROTOCOL_FEATURES) |
+ VIRTIO_FEATURE (VIRTIO_F_VERSION_1);
+ msg.u64 &= vui->feature_mask;
+
+ if (vui->enable_event_idx)
+ msg.u64 |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
+ if (vui->enable_gso)
+ msg.u64 |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS;
+ if (vui->enable_packed)
+ msg.u64 |= VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
+
+ msg.size = sizeof (msg.u64);
+ vu_log_debug (vui, "if %d msg VHOST_USER_GET_FEATURES - reply "
+ "0x%016llx", vui->hw_if_index, msg.u64);
+ n =
+ send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
+ if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
+ {
+ vu_log_debug (vui, "could not send message response");
+ goto close_socket;
+ }
+ break;
+
+ case VHOST_USER_SET_FEATURES:
+ vu_log_debug (vui, "if %d msg VHOST_USER_SET_FEATURES features "
+ "0x%016llx", vui->hw_if_index, msg.u64);
+
+ vui->features = msg.u64;
+
+ if (vui->features &
+ (VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) |
+ VIRTIO_FEATURE (VIRTIO_F_VERSION_1)))
+ vui->virtio_net_hdr_sz = 12;
+ else
+ vui->virtio_net_hdr_sz = 10;
+
+ vui->is_any_layout =
+ (vui->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)) ? 1 : 0;
+
+ ASSERT (vui->virtio_net_hdr_sz < VLIB_BUFFER_PRE_DATA_SIZE);
+ if (vui->enable_gso &&
+ ((vui->features & FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS)
+ == FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS))
+ {
+ vnet_hw_if_set_caps (vnm, vui->hw_if_index,
+ VNET_HW_IF_CAP_TCP_GSO |
+ VNET_HW_IF_CAP_TX_TCP_CKSUM |
+ VNET_HW_IF_CAP_TX_UDP_CKSUM);
+ }
+ else
+ {
+ vnet_hw_if_unset_caps (vnm, vui->hw_if_index,
+ VNET_HW_IF_CAP_TCP_GSO |
+ VNET_HW_IF_CAP_L4_TX_CKSUM);
+ }
+ vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
+ vui->is_ready = 0;
+ vhost_user_update_iface_state (vui);
+ break;
+
+ case VHOST_USER_SET_MEM_TABLE:
+ vu_log_debug (vui, "if %d msg VHOST_USER_SET_MEM_TABLE nregions %d",
+ vui->hw_if_index, msg.memory.nregions);
+
+ if ((msg.memory.nregions < 1) ||
+ (msg.memory.nregions > VHOST_MEMORY_MAX_NREGIONS))
+ {
+ vu_log_debug (vui, "number of mem regions must be between 1 and %i",
+ VHOST_MEMORY_MAX_NREGIONS);
+ goto close_socket;
+ }
+
+ if (msg.memory.nregions != number_of_fds)
+ {
+ vu_log_debug (vui, "each memory region must have FD");
+ goto close_socket;
+ }
+
+ /* Do the mmap without barrier sync */
+ void *region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS];
+ for (i = 0; i < msg.memory.nregions; i++)
+ {
+ long page_sz = get_huge_page_size (fds[i]);
+
+ /* align size to page */
+ ssize_t map_sz = (msg.memory.regions[i].memory_size +
+ msg.memory.regions[i].mmap_offset +
+ page_sz - 1) & ~(page_sz - 1);
+
+ region_mmap_addr[i] = mmap (0, map_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fds[i], 0);
+ if (region_mmap_addr[i] == MAP_FAILED)
+ {
+ vu_log_err (vui, "failed to map memory. errno is %d", errno);
+ for (j = 0; j < i; j++)
+ munmap (region_mmap_addr[j], map_sz);
+ goto close_socket;
+ }
+ vu_log_debug (vui, "map memory region %d addr 0 len 0x%lx fd %d "
+ "mapped 0x%lx page_sz 0x%x", i, map_sz, fds[i],
+ region_mmap_addr[i], page_sz);
+ }
+
+ vlib_worker_thread_barrier_sync (vm);
+ unmap_all_mem_regions (vui);
+ for (i = 0; i < msg.memory.nregions; i++)
+ {
+ clib_memcpy_fast (&(vui->regions[i]), &msg.memory.regions[i],
+ sizeof (vhost_user_memory_region_t));
+
+ vui->region_mmap_addr[i] = region_mmap_addr[i];
+ vui->region_guest_addr_lo[i] = vui->regions[i].guest_phys_addr;
+ vui->region_guest_addr_hi[i] = vui->regions[i].guest_phys_addr +
+ vui->regions[i].memory_size;
+
+ vui->region_mmap_addr[i] += vui->regions[i].mmap_offset;
+ vui->region_mmap_fd[i] = fds[i];
+
+ vui->nregions++;
+ }
+
+ /*
+ * Re-compute desc, used, and avail descriptor table if vring address
+ * is set.
+ */
+ FOR_ALL_VHOST_RX_TXQ (q, vui)
+ {
+ if (vui->vrings[q].desc_user_addr && vui->vrings[q].used_user_addr &&
+ vui->vrings[q].avail_user_addr)
+ {
+ vui->vrings[q].desc =
+ map_user_mem (vui, vui->vrings[q].desc_user_addr);
+ vui->vrings[q].used =
+ map_user_mem (vui, vui->vrings[q].used_user_addr);
+ vui->vrings[q].avail =
+ map_user_mem (vui, vui->vrings[q].avail_user_addr);
+ }
+ }
+ vlib_worker_thread_barrier_release (vm);
+ break;
+
+ case VHOST_USER_SET_VRING_NUM:
+ vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_NUM idx %d num %d",
+ vui->hw_if_index, msg.state.index, msg.state.num);
+
+ if ((msg.state.num > 32768) || /* maximum ring size is 32768 */
+ (msg.state.num == 0) || /* it cannot be zero */
+ ((msg.state.num - 1) & msg.state.num) || /* must be power of 2 */
+ (msg.state.index >= vui->num_qid))
+ {
+ vu_log_debug (vui, "invalid VHOST_USER_SET_VRING_NUM: msg.state.num"
+ " %d, msg.state.index %d, curruent max q %d",
+ msg.state.num, msg.state.index, vui->num_qid);
+ goto close_socket;
+ }
+ vui->vrings[msg.state.index].qsz_mask = msg.state.num - 1;
+ break;
+
+ case VHOST_USER_SET_VRING_ADDR:
+ vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_ADDR idx %d",
+ vui->hw_if_index, msg.state.index);
+
+ if (msg.state.index >= vui->num_qid)
+ {
+ vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_ADDR:"
+ " %u >= %u", msg.state.index, vui->num_qid);
+ goto close_socket;
+ }
+
+ if (msg.size < sizeof (msg.addr))
+ {
+ vu_log_debug (vui, "vhost message is too short (%d < %d)",
+ msg.size, sizeof (msg.addr));
+ goto close_socket;
+ }
+
+ vnet_virtio_vring_desc_t *desc =
+ map_user_mem (vui, msg.addr.desc_user_addr);
+ vnet_virtio_vring_used_t *used =
+ map_user_mem (vui, msg.addr.used_user_addr);
+ vnet_virtio_vring_avail_t *avail =
+ map_user_mem (vui, msg.addr.avail_user_addr);
+
+ if ((desc == NULL) || (used == NULL) || (avail == NULL))
+ {
+ vu_log_debug (vui, "failed to map user memory for hw_if_index %d",
+ vui->hw_if_index);
+ goto close_socket;
+ }
+
+ vui->vrings[msg.state.index].desc_user_addr = msg.addr.desc_user_addr;
+ vui->vrings[msg.state.index].used_user_addr = msg.addr.used_user_addr;
+ vui->vrings[msg.state.index].avail_user_addr = msg.addr.avail_user_addr;
+
+ vlib_worker_thread_barrier_sync (vm);
+ vui->vrings[msg.state.index].desc = desc;
+ vui->vrings[msg.state.index].used = used;
+ vui->vrings[msg.state.index].avail = avail;
+
+ vui->vrings[msg.state.index].log_guest_addr = msg.addr.log_guest_addr;
+ vui->vrings[msg.state.index].log_used =
+ (msg.addr.flags & (1 << VHOST_VRING_F_LOG)) ? 1 : 0;
+
+ /* Spec says: If VHOST_USER_F_PROTOCOL_FEATURES has not been negotiated,
+ the ring is initialized in an enabled state. */
+ if (!(vui->features & VIRTIO_FEATURE (VHOST_USER_F_PROTOCOL_FEATURES)))
+ vui->vrings[msg.state.index].enabled = 1;
+
+ vui->vrings[msg.state.index].last_used_idx =
+ vui->vrings[msg.state.index].last_avail_idx =
+ vui->vrings[msg.state.index].used->idx;
+ vui->vrings[msg.state.index].last_kick =
+ vui->vrings[msg.state.index].last_used_idx;
+
+ /* tell driver that we want interrupts or not */
+ vhost_user_set_operation_mode (vui, &vui->vrings[msg.state.index]);
+ vlib_worker_thread_barrier_release (vm);
+ vhost_user_update_iface_state (vui);
+ break;
+
+ case VHOST_USER_SET_OWNER:
+ vu_log_debug (vui, "if %d msg VHOST_USER_SET_OWNER", vui->hw_if_index);
+ break;
+
+ case VHOST_USER_RESET_OWNER:
+ vu_log_debug (vui, "if %d msg VHOST_USER_RESET_OWNER",
+ vui->hw_if_index);
+ break;
+
+ case VHOST_USER_SET_VRING_CALL:
+ vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_CALL %d",
+ vui->hw_if_index, msg.u64);
+
+ q = (u8) (msg.u64 & 0xFF);
+ if (vui->num_qid > q)
+ {
+ /* if there is old fd, delete and close it */
+ if (vui->vrings[q].callfd_idx != ~0)
+ {
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
+ vui->vrings[q].callfd_idx);
+ clib_file_del (&file_main, uf);
+ vui->vrings[q].callfd_idx = ~0;
+ }
+ }
+ else if (vec_len (vui->vrings) > q)
+ {
+ /* grow vrings by pair (RX + TX) */
+ vui->num_qid = (q & 1) ? (q + 1) : (q + 2);
+ }
+ else
+ {
+ u32 i, new_max_q, old_max_q = vec_len (vui->vrings);
+
+ /*
+ * Double the array size if it is less than 64 entries.
+ * Slow down thereafter.
+ */
+ if (vec_len (vui->vrings) < (VHOST_VRING_INIT_MQ_PAIR_SZ << 3))
+ new_max_q = vec_len (vui->vrings) << 1;
+ else
+ new_max_q = vec_len (vui->vrings) +
+ (VHOST_VRING_INIT_MQ_PAIR_SZ << 2);
+ if (new_max_q > (VHOST_VRING_MAX_MQ_PAIR_SZ << 1))
+ new_max_q = (VHOST_VRING_MAX_MQ_PAIR_SZ << 1);
+
+ /* sync with the worker threads, vrings may move due to realloc */
+ vlib_worker_thread_barrier_sync (vm);
+ vec_validate_aligned (vui->vrings, new_max_q - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vlib_worker_thread_barrier_release (vm);
+
+ for (i = old_max_q; i < vec_len (vui->vrings); i++)
+ vhost_user_vring_init (vui, i);
+
+ /* grow vrings by pair (RX + TX) */
+ vui->num_qid = (q & 1) ? (q + 1) : (q + 2);
+ }
+
+ if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
+ {
+ if (number_of_fds != 1)
+ {
+ vu_log_debug (vui, "More than one fd received !");
+ goto close_socket;
+ }
+
+ template.read_function = vhost_user_callfd_read_ready;
+ template.file_descriptor = fds[0];
+ template.private_data =
+ ((vui - vhost_user_main.vhost_user_interfaces) << 8) + q;
+ template.description = format (0, "vhost user");
+ vui->vrings[q].callfd_idx = clib_file_add (&file_main, &template);
+ }
+ else
+ vui->vrings[q].callfd_idx = ~0;
+ break;
+
+ case VHOST_USER_SET_VRING_KICK:
+ vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_KICK %d",
+ vui->hw_if_index, msg.u64);
+
+ q = (u8) (msg.u64 & 0xFF);
+ if (q >= vui->num_qid)
+ {
+ vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_KICK:"
+ " %u >= %u", q, vui->num_qid);
+ goto close_socket;
+ }
+
+ if (vui->vrings[q].kickfd_idx != ~0)
+ {
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
+ vui->vrings[q].kickfd_idx);
+ clib_file_del (&file_main, uf);
+ vui->vrings[q].kickfd_idx = ~0;
+ }
+
+ if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
+ {
+ if (number_of_fds != 1)
+ {
+ vu_log_debug (vui, "More than one fd received !");
+ goto close_socket;
+ }
+
+ template.read_function = vhost_user_kickfd_read_ready;
+ template.file_descriptor = fds[0];
+ template.private_data =
+ (((uword) (vui - vhost_user_main.vhost_user_interfaces)) << 8) +
+ q;
+ vui->vrings[q].kickfd_idx = clib_file_add (&file_main, &template);
+ }
+ else
+ {
+ //When no kickfd is set, the queue is initialized as started
+ vui->vrings[q].kickfd_idx = ~0;
+ vui->vrings[q].started = 1;
+ vhost_user_thread_placement (vui, q);
+ }
+ vhost_user_update_iface_state (vui);
+ break;
+
+ case VHOST_USER_SET_VRING_ERR:
+ vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_ERR %d",
+ vui->hw_if_index, msg.u64);
+
+ q = (u8) (msg.u64 & 0xFF);
+ if (q >= vui->num_qid)
+ {
+ vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_ERR:"
+ " %u >= %u", q, vui->num_qid);
+ goto close_socket;
+ }
+
+ if (vui->vrings[q].errfd != -1)
+ close (vui->vrings[q].errfd);
+
+ if (!(msg.u64 & VHOST_USER_VRING_NOFD_MASK))
+ {
+ if (number_of_fds != 1)
+ goto close_socket;
+
+ vui->vrings[q].errfd = fds[0];
+ }
+ else
+ vui->vrings[q].errfd = -1;
+ break;
+
+ case VHOST_USER_SET_VRING_BASE:
+ vu_log_debug (vui,
+ "if %d msg VHOST_USER_SET_VRING_BASE idx %d num 0x%x",
+ vui->hw_if_index, msg.state.index, msg.state.num);
+ if (msg.state.index >= vui->num_qid)
+ {
+ vu_log_debug (vui, "invalid vring index VHOST_USER_SET_VRING_ADDR:"
+ " %u >= %u", msg.state.index, vui->num_qid);
+ goto close_socket;
+ }
+ vlib_worker_thread_barrier_sync (vm);
+ vui->vrings[msg.state.index].last_avail_idx = msg.state.num;
+ if (vhost_user_is_packed_ring_supported (vui))
+ {
+ /*
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | last avail idx | | last used idx | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * ^ ^
+ * | |
+ * avail wrap counter used wrap counter
+ */
+ /* last avail idx at bit 0-14. */
+ vui->vrings[msg.state.index].last_avail_idx =
+ msg.state.num & 0x7fff;
+ /* avail wrap counter at bit 15 */
+ vui->vrings[msg.state.index].avail_wrap_counter =
+ ! !(msg.state.num & (1 << 15));
+
+ /*
+ * Although last_used_idx is passed in the upper 16 bits in qemu
+ * implementation, in practice, last_avail_idx and last_used_idx are
+ * usually the same. As a result, DPDK does not bother to pass us
+ * last_used_idx. The spec is not clear on thex coding. I figured it
+ * out by reading the qemu code. So let's just read last_avail_idx
+ * and set last_used_idx equals to last_avail_idx.
+ */
+ vui->vrings[msg.state.index].last_used_idx =
+ vui->vrings[msg.state.index].last_avail_idx;
+ vui->vrings[msg.state.index].last_kick =
+ vui->vrings[msg.state.index].last_used_idx;
+ vui->vrings[msg.state.index].used_wrap_counter =
+ vui->vrings[msg.state.index].avail_wrap_counter;
+
+ if (vui->vrings[msg.state.index].avail_wrap_counter == 1)
+ vui->vrings[msg.state.index].avail_wrap_counter =
+ VRING_DESC_F_AVAIL;
+ }
+ vlib_worker_thread_barrier_release (vm);
+ break;
+
+ case VHOST_USER_GET_VRING_BASE:
+ if (msg.state.index >= vui->num_qid)
+ {
+ vu_log_debug (vui, "invalid vring index VHOST_USER_GET_VRING_BASE:"
+ " %u >= %u", msg.state.index, vui->num_qid);
+ goto close_socket;
+ }
+
+ /* protection is needed to prevent rx/tx from changing last_avail_idx */
+ vlib_worker_thread_barrier_sync (vm);
+ /*
+ * Copy last_avail_idx from the vring before closing it because
+ * closing the vring also initializes the vring last_avail_idx
+ */
+ msg.state.num = vui->vrings[msg.state.index].last_avail_idx;
+ if (vhost_user_is_packed_ring_supported (vui))
+ {
+ msg.state.num =
+ (vui->vrings[msg.state.index].last_avail_idx & 0x7fff) |
+ (! !vui->vrings[msg.state.index].avail_wrap_counter << 15);
+ msg.state.num |=
+ ((vui->vrings[msg.state.index].last_used_idx & 0x7fff) |
+ (! !vui->vrings[msg.state.index].used_wrap_counter << 15)) << 16;
+ }
+ msg.flags |= 4;
+ msg.size = sizeof (msg.state);
+
+ /*
+ * Spec says: Client must [...] stop ring upon receiving
+ * VHOST_USER_GET_VRING_BASE
+ */
+ vhost_user_vring_close (vui, msg.state.index);
+ vlib_worker_thread_barrier_release (vm);
+ vu_log_debug (vui,
+ "if %d msg VHOST_USER_GET_VRING_BASE idx %d num 0x%x",
+ vui->hw_if_index, msg.state.index, msg.state.num);
+ n =
+ send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
+ if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
+ {
+ vu_log_debug (vui, "could not send message response");
+ goto close_socket;
+ }
+ vhost_user_update_iface_state (vui);
+ break;
+
+ case VHOST_USER_NONE:
+ vu_log_debug (vui, "if %d msg VHOST_USER_NONE", vui->hw_if_index);
+ break;
+
+ case VHOST_USER_SET_LOG_BASE:
+ vu_log_debug (vui, "if %d msg VHOST_USER_SET_LOG_BASE",
+ vui->hw_if_index);
+
+ if (msg.size != sizeof (msg.log))
+ {
+ vu_log_debug (vui, "invalid msg size for VHOST_USER_SET_LOG_BASE:"
+ " %d instead of %d", msg.size, sizeof (msg.log));
+ goto close_socket;
+ }
+
+ if (!(vui->protocol_features & (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD)))
+ {
+ vu_log_debug (vui, "VHOST_USER_PROTOCOL_F_LOG_SHMFD not set but "
+ "VHOST_USER_SET_LOG_BASE received");
+ goto close_socket;
+ }
+
+ fd = fds[0];
+ /* align size to page */
+ long page_sz = get_huge_page_size (fd);
+ ssize_t map_sz =
+ (msg.log.size + msg.log.offset + page_sz - 1) & ~(page_sz - 1);
+
+ void *log_base_addr = mmap (0, map_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+
+ vu_log_debug (vui, "map log region addr 0 len 0x%lx off 0x%lx fd %d "
+ "mapped 0x%lx", map_sz, msg.log.offset, fd,
+ log_base_addr);
+
+ if (log_base_addr == MAP_FAILED)
+ {
+ vu_log_err (vui, "failed to map memory. errno is %d", errno);
+ goto close_socket;
+ }
+
+ vlib_worker_thread_barrier_sync (vm);
+ vui->log_base_addr = log_base_addr;
+ vui->log_base_addr += msg.log.offset;
+ vui->log_size = msg.log.size;
+ vlib_worker_thread_barrier_release (vm);
+
+ msg.flags |= 4;
+ msg.size = sizeof (msg.u64);
+ n =
+ send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
+ if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
+ {
+ vu_log_debug (vui, "could not send message response");
+ goto close_socket;
+ }
+ break;
+
+ case VHOST_USER_SET_LOG_FD:
+ vu_log_debug (vui, "if %d msg VHOST_USER_SET_LOG_FD", vui->hw_if_index);
+ break;
+
+ case VHOST_USER_GET_PROTOCOL_FEATURES:
+ msg.flags |= 4;
+ msg.u64 = (1 << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |
+ (1 << VHOST_USER_PROTOCOL_F_MQ);
+ msg.size = sizeof (msg.u64);
+ vu_log_debug (vui, "if %d msg VHOST_USER_GET_PROTOCOL_FEATURES - "
+ "reply 0x%016llx", vui->hw_if_index, msg.u64);
+ n =
+ send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
+ if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
+ {
+ vu_log_debug (vui, "could not send message response");
+ goto close_socket;
+ }
+ break;
+
+ case VHOST_USER_SET_PROTOCOL_FEATURES:
+ vu_log_debug (vui, "if %d msg VHOST_USER_SET_PROTOCOL_FEATURES "
+ "features 0x%016llx", vui->hw_if_index, msg.u64);
+ vui->protocol_features = msg.u64;
+ break;
+
+ case VHOST_USER_GET_QUEUE_NUM:
+ msg.flags |= 4;
+ msg.u64 = VHOST_VRING_MAX_MQ_PAIR_SZ;
+ msg.size = sizeof (msg.u64);
+ vu_log_debug (vui, "if %d msg VHOST_USER_GET_QUEUE_NUM - reply %d",
+ vui->hw_if_index, msg.u64);
+ n =
+ send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
+ if (n != (msg.size + VHOST_USER_MSG_HDR_SZ))
+ {
+ vu_log_debug (vui, "could not send message response");
+ goto close_socket;
+ }
+ break;
+
+ case VHOST_USER_SET_VRING_ENABLE:
+ vu_log_debug (vui, "if %d VHOST_USER_SET_VRING_ENABLE: %s queue %d",
+ vui->hw_if_index, msg.state.num ? "enable" : "disable",
+ msg.state.index);
+ if (msg.state.index >= vui->num_qid)
+ {
+ vu_log_debug (vui, "invalid vring idx VHOST_USER_SET_VRING_ENABLE:"
+ " %u >= %u", msg.state.index, vui->num_qid);
+ goto close_socket;
+ }
+
+ vui->vrings[msg.state.index].enabled = msg.state.num;
+ vhost_user_thread_placement (vui, msg.state.index);
+ vhost_user_update_iface_state (vui);
+ break;
+
+ default:
+ vu_log_debug (vui, "unknown vhost-user message %d received. "
+ "closing socket", msg.request);
+ goto close_socket;
+ }
+
+ return 0;
+
+close_socket:
+ vlib_worker_thread_barrier_sync (vm);
+ vhost_user_if_disconnect (vui);
+ vlib_worker_thread_barrier_release (vm);
+ vhost_user_update_iface_state (vui);
+ return 0;
+}
+
+static clib_error_t *
+vhost_user_socket_error (clib_file_t * uf)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui =
+ pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
+
+ vu_log_debug (vui, "socket error on if %d", vui->sw_if_index);
+ vlib_worker_thread_barrier_sync (vm);
+ vhost_user_if_disconnect (vui);
+ vlib_worker_thread_barrier_release (vm);
+ return 0;
+}
+
+static clib_error_t *
+vhost_user_socksvr_accept_ready (clib_file_t * uf)
+{
+ int client_fd, client_len;
+ struct sockaddr_un client;
+ clib_file_t template = { 0 };
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+
+ vui = pool_elt_at_index (vum->vhost_user_interfaces, uf->private_data);
+
+ client_len = sizeof (client);
+ client_fd = accept (uf->file_descriptor,
+ (struct sockaddr *) &client,
+ (socklen_t *) & client_len);
+
+ if (client_fd < 0)
+ return clib_error_return_unix (0, "accept");
+
+ if (vui->clib_file_index != ~0)
+ {
+ vu_log_debug (vui, "Close client socket for vhost interface %d, fd %d",
+ vui->sw_if_index, UNIX_GET_FD (vui->clib_file_index));
+ clib_file_del (&file_main, file_main.file_pool + vui->clib_file_index);
+ }
+
+ vu_log_debug (vui, "New client socket for vhost interface %d, fd %d",
+ vui->sw_if_index, client_fd);
+ template.read_function = vhost_user_socket_read;
+ template.error_function = vhost_user_socket_error;
+ template.file_descriptor = client_fd;
+ template.private_data = vui - vhost_user_main.vhost_user_interfaces;
+ template.description = format (0, "vhost interface %d", vui->sw_if_index);
+ vui->clib_file_index = clib_file_add (&file_main, &template);
+ vui->num_qid = 2;
+ return 0;
+}
+
+static clib_error_t *
+vhost_user_init (vlib_main_t * vm)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ vum->log_default = vlib_log_register_class ("vhost-user", 0);
+
+ vum->coalesce_frames = 32;
+ vum->coalesce_time = 1e-3;
+
+ vec_validate (vum->cpus, tm->n_vlib_mains - 1);
+
+ vhost_cpu_t *cpu;
+ vec_foreach (cpu, vum->cpus)
+ {
+ /* This is actually not necessary as validate already zeroes it
+ * Just keeping the loop here for later because I am lazy. */
+ cpu->rx_buffers_len = 0;
+ }
+
+ vum->random = random_default_seed ();
+
+ mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword));
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vhost_user_init) =
+{
+ .runs_after = VLIB_INITS("ip4_init"),
+};
+
+static uword
+vhost_user_send_interrupt_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ vhost_user_intf_t *vui;
+ f64 timeout = 3153600000.0 /* 100 years */ ;
+ uword event_type, *event_data = 0;
+ vhost_user_main_t *vum = &vhost_user_main;
+ u16 qid;
+ f64 now, poll_time_remaining;
+ f64 next_timeout;
+ u8 stop_timer = 0;
+
+ while (1)
+ {
+ poll_time_remaining =
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+ event_type = vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+
+ /*
+ * Use the remaining timeout if it is less than coalesce time to avoid
+ * resetting the existing timer in the middle of expiration
+ */
+ timeout = poll_time_remaining;
+ if (vlib_process_suspend_time_is_zero (timeout) ||
+ (timeout > vum->coalesce_time))
+ timeout = vum->coalesce_time;
+
+ now = vlib_time_now (vm);
+ switch (event_type)
+ {
+ case VHOST_USER_EVENT_STOP_TIMER:
+ stop_timer = 1;
+ break;
+
+ case VHOST_USER_EVENT_START_TIMER:
+ stop_timer = 0;
+ timeout = 1e-3;
+ if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
+ break;
+ /* fall through */
+
+ case ~0:
+ pool_foreach (vui, vum->vhost_user_interfaces) {
+ next_timeout = timeout;
+ FOR_ALL_VHOST_RX_TXQ (qid, vui)
+ {
+ vhost_user_vring_t *vq = &vui->vrings[qid];
+
+ if (vq->started == 0)
+ continue;
+ if (vq->n_since_last_int)
+ {
+ if (now >= vq->int_deadline)
+ vhost_user_send_call (vm, vui, vq);
+ else
+ next_timeout = vq->int_deadline - now;
+ }
+
+ if ((next_timeout < timeout) && (next_timeout > 0.0))
+ timeout = next_timeout;
+ }
+ }
+ break;
+
+ default:
+ clib_warning ("BUG: unhandled event type %d", event_type);
+ break;
+ }
+ /* No less than 1 millisecond */
+ if (timeout < 1e-3)
+ timeout = 1e-3;
+ if (stop_timer)
+ timeout = 3153600000.0;
+ }
+ return 0;
+}
+
+VLIB_REGISTER_NODE (vhost_user_send_interrupt_node) = {
+ .function = vhost_user_send_interrupt_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "vhost-user-send-interrupt-process",
+};
+
+static uword
+vhost_user_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+ struct sockaddr_un sun;
+ int sockfd;
+ clib_file_t template = { 0 };
+ f64 timeout = 3153600000.0 /* 100 years */ ;
+ uword *event_data = 0;
+
+ sockfd = -1;
+ sun.sun_family = AF_UNIX;
+ template.read_function = vhost_user_socket_read;
+ template.error_function = vhost_user_socket_error;
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+ vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+
+ timeout = 3.0;
+
+ pool_foreach (vui, vum->vhost_user_interfaces) {
+
+ if (vui->unix_server_index == ~0) { //Nothing to do for server sockets
+ if (vui->clib_file_index == ~0)
+ {
+ if ((sockfd < 0) &&
+ ((sockfd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0))
+ {
+ /*
+ * 1st time error or new error for this interface,
+ * spit out the message and record the error
+ */
+ if (!vui->sock_errno || (vui->sock_errno != errno))
+ {
+ clib_unix_warning
+ ("Error: Could not open unix socket for %s",
+ vui->sock_filename);
+ vui->sock_errno = errno;
+ }
+ continue;
+ }
+
+ /* try to connect */
+ strncpy (sun.sun_path, (char *) vui->sock_filename,
+ sizeof (sun.sun_path) - 1);
+ sun.sun_path[sizeof (sun.sun_path) - 1] = 0;
+
+ /* Avoid hanging VPP if the other end does not accept */
+ if (fcntl(sockfd, F_SETFL, O_NONBLOCK) < 0)
+ clib_unix_warning ("fcntl");
+
+ if (connect (sockfd, (struct sockaddr *) &sun,
+ sizeof (struct sockaddr_un)) == 0)
+ {
+ /* Set the socket to blocking as it was before */
+ if (fcntl(sockfd, F_SETFL, 0) < 0)
+ clib_unix_warning ("fcntl2");
+
+ vui->sock_errno = 0;
+ template.file_descriptor = sockfd;
+ template.private_data =
+ vui - vhost_user_main.vhost_user_interfaces;
+ template.description = format (0, "vhost user process");
+ vui->clib_file_index = clib_file_add (&file_main, &template);
+ vui->num_qid = 2;
+
+ /* This sockfd is considered consumed */
+ sockfd = -1;
+ }
+ else
+ {
+ vui->sock_errno = errno;
+ }
+ }
+ else
+ {
+ /* check if socket is alive */
+ int error = 0;
+ socklen_t len = sizeof (error);
+ int fd = UNIX_GET_FD(vui->clib_file_index);
+ int retval =
+ getsockopt (fd, SOL_SOCKET, SO_ERROR, &error, &len);
+
+ if (retval)
+ {
+ vu_log_debug (vui, "getsockopt returned %d", retval);
+ vhost_user_if_disconnect (vui);
+ }
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+VLIB_REGISTER_NODE (vhost_user_process_node,static) = {
+ .function = vhost_user_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "vhost-user-process",
+};
+
+/**
+ * Disables and reset interface structure.
+ * It can then be either init again, or removed from used interfaces.
+ */
+static void
+vhost_user_term_if (vhost_user_intf_t * vui)
+{
+ int q;
+ vhost_user_main_t *vum = &vhost_user_main;
+
+ // disconnect interface sockets
+ vhost_user_if_disconnect (vui);
+ vhost_user_update_gso_interface_count (vui, 0 /* delete */ );
+ vhost_user_update_iface_state (vui);
+
+ for (q = 0; q < vec_len (vui->vrings); q++)
+ clib_spinlock_free (&vui->vrings[q].vring_lock);
+
+ if (vui->unix_server_index != ~0)
+ {
+ //Close server socket
+ clib_file_t *uf = pool_elt_at_index (file_main.file_pool,
+ vui->unix_server_index);
+ clib_file_del (&file_main, uf);
+ vui->unix_server_index = ~0;
+ unlink (vui->sock_filename);
+ }
+
+ mhash_unset (&vum->if_index_by_sock_name, vui->sock_filename,
+ &vui->if_index);
+}
+
+int
+vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+ int rv = 0;
+ vnet_hw_interface_t *hwif;
+ u16 qid;
+
+ if (!
+ (hwif =
+ vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index))
+ || hwif->dev_class_index != vhost_user_device_class.index)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
+
+ vu_log_debug (vui, "Deleting vhost-user interface %s (instance %d)",
+ hwif->name, hwif->dev_instance);
+
+ FOR_ALL_VHOST_TXQ (qid, vui)
+ {
+ vhost_user_vring_t *txvq = &vui->vrings[qid];
+
+ if ((txvq->mode == VNET_HW_IF_RX_MODE_POLLING) &&
+ (txvq->thread_index != ~0))
+ {
+ vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
+ ASSERT (cpu->polling_q_count != 0);
+ cpu->polling_q_count--;
+ }
+
+ if ((vum->ifq_count > 0) &&
+ ((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
+ (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)))
+ {
+ vum->ifq_count--;
+ // Stop the timer if there is no more interrupt interface/queue
+ if (vum->ifq_count == 0)
+ {
+ vlib_process_signal_event (vm,
+ vhost_user_send_interrupt_node.index,
+ VHOST_USER_EVENT_STOP_TIMER, 0);
+ break;
+ }
+ }
+ }
+
+ // Disable and reset interface
+ vhost_user_term_if (vui);
+
+ // Reset renumbered iface
+ if (hwif->dev_instance <
+ vec_len (vum->show_dev_instance_by_real_dev_instance))
+ vum->show_dev_instance_by_real_dev_instance[hwif->dev_instance] = ~0;
+
+ // Delete ethernet interface
+ ethernet_delete_interface (vnm, vui->hw_if_index);
+
+ // free vrings
+ vec_free (vui->vrings);
+
+ // Back to pool
+ pool_put (vum->vhost_user_interfaces, vui);
+
+ return rv;
+}
+
+static clib_error_t *
+vhost_user_exit (vlib_main_t * vm)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+
+ vlib_worker_thread_barrier_sync (vlib_get_main ());
+ pool_foreach (vui, vum->vhost_user_interfaces) {
+ vhost_user_delete_if (vnm, vm, vui->sw_if_index);
+ }
+ vlib_worker_thread_barrier_release (vlib_get_main ());
+ return 0;
+}
+
+VLIB_MAIN_LOOP_EXIT_FUNCTION (vhost_user_exit);
+
+/**
+ * Open server unix socket on specified sock_filename.
+ */
+static int
+vhost_user_init_server_sock (const char *sock_filename, int *sock_fd)
+{
+ int rv = 0;
+ struct sockaddr_un un = { };
+ int fd;
+ /* create listening socket */
+ if ((fd = socket (AF_UNIX, SOCK_STREAM, 0)) < 0)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+
+ un.sun_family = AF_UNIX;
+ strncpy ((char *) un.sun_path, (char *) sock_filename,
+ sizeof (un.sun_path) - 1);
+
+ /* remove if exists */
+ unlink ((char *) sock_filename);
+
+ if (bind (fd, (struct sockaddr *) &un, sizeof (un)) == -1)
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_2;
+ goto error;
+ }
+
+ if (listen (fd, 1) == -1)
+ {
+ rv = VNET_API_ERROR_SYSCALL_ERROR_3;
+ goto error;
+ }
+
+ *sock_fd = fd;
+ return 0;
+
+error:
+ close (fd);
+ return rv;
+}
+
+/**
+ * Create ethernet interface for vhost user interface.
+ */
+static void
+vhost_user_create_ethernet (vnet_main_t *vnm, vlib_main_t *vm,
+ vhost_user_intf_t *vui,
+ vhost_user_create_if_args_t *args)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ vnet_eth_interface_registration_t eir = {};
+ u8 hwaddr[6];
+
+ /* create hw and sw interface */
+ if (args->use_custom_mac)
+ {
+ clib_memcpy (hwaddr, args->hwaddr, 6);
+ }
+ else
+ {
+ random_u32 (&vum->random);
+ clib_memcpy (hwaddr + 2, &vum->random, sizeof (vum->random));
+ hwaddr[0] = 2;
+ hwaddr[1] = 0xfe;
+ }
+
+ eir.dev_class_index = vhost_user_device_class.index;
+ eir.dev_instance = vui - vum->vhost_user_interfaces /* device instance */,
+ eir.address = hwaddr;
+ vui->hw_if_index = vnet_eth_register_interface (vnm, &eir);
+}
+
+/*
+ * Initialize vui with specified attributes
+ */
+static void
+vhost_user_vui_init (vnet_main_t * vnm, vhost_user_intf_t * vui,
+ int server_sock_fd, vhost_user_create_if_args_t * args,
+ u32 * sw_if_index)
+{
+ vnet_sw_interface_t *sw;
+ int q;
+ vhost_user_main_t *vum = &vhost_user_main;
+
+ sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
+ if (server_sock_fd != -1)
+ {
+ clib_file_t template = { 0 };
+ template.read_function = vhost_user_socksvr_accept_ready;
+ template.file_descriptor = server_sock_fd;
+ template.private_data = vui - vum->vhost_user_interfaces; //hw index
+ template.description = format (0, "vhost user %d", sw);
+ vui->unix_server_index = clib_file_add (&file_main, &template);
+ }
+ else
+ {
+ vui->unix_server_index = ~0;
+ }
+
+ vui->sw_if_index = sw->sw_if_index;
+ strncpy (vui->sock_filename, args->sock_filename,
+ ARRAY_LEN (vui->sock_filename) - 1);
+ vui->sock_errno = 0;
+ vui->is_ready = 0;
+ vui->feature_mask = args->feature_mask;
+ vui->clib_file_index = ~0;
+ vui->log_base_addr = 0;
+ vui->if_index = vui - vum->vhost_user_interfaces;
+ vui->enable_gso = args->enable_gso;
+ vui->enable_event_idx = args->enable_event_idx;
+ vui->enable_packed = args->enable_packed;
+ /*
+ * enable_gso takes precedence over configurable feature mask if there
+ * is a clash.
+ * if feature mask disables gso, but enable_gso is configured,
+ * then gso is enable
+ * if feature mask enables gso, but enable_gso is not configured,
+ * then gso is enable
+ *
+ * if gso is enable via feature mask, it must enable both host and guest
+ * gso feature mask, we don't support one sided GSO or partial GSO.
+ */
+ if ((vui->enable_gso == 0) &&
+ ((args->feature_mask & FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS)
+ == (FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS)))
+ vui->enable_gso = 1;
+ vhost_user_update_gso_interface_count (vui, 1 /* add */ );
+ mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename,
+ &vui->if_index, 0);
+
+ vec_validate_aligned (vui->vrings, (VHOST_VRING_INIT_MQ_PAIR_SZ << 1) - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vui->num_qid = 2;
+ for (q = 0; q < vec_len (vui->vrings); q++)
+ vhost_user_vring_init (vui, q);
+
+ vnet_hw_if_set_caps (vnm, vui->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
+ vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
+
+ if (sw_if_index)
+ *sw_if_index = vui->sw_if_index;
+}
+
+int
+vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
+ vhost_user_create_if_args_t * args)
+{
+ vhost_user_intf_t *vui = NULL;
+ u32 sw_if_idx = ~0;
+ int rv = 0;
+ int server_sock_fd = -1;
+ vhost_user_main_t *vum = &vhost_user_main;
+ uword *if_index;
+
+ if (args->sock_filename == NULL || !(strlen (args->sock_filename) > 0))
+ {
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ if_index = mhash_get (&vum->if_index_by_sock_name,
+ (void *) args->sock_filename);
+ if (if_index)
+ {
+ vui = &vum->vhost_user_interfaces[*if_index];
+ args->sw_if_index = vui->sw_if_index;
+ return VNET_API_ERROR_IF_ALREADY_EXISTS;
+ }
+
+ if (args->is_server)
+ {
+ if ((rv =
+ vhost_user_init_server_sock (args->sock_filename,
+ &server_sock_fd)) != 0)
+ {
+ return rv;
+ }
+ }
+
+ /* Protect the uninitialized vui from being dispatched by rx/tx */
+ vlib_worker_thread_barrier_sync (vm);
+ pool_get (vhost_user_main.vhost_user_interfaces, vui);
+ vhost_user_create_ethernet (vnm, vm, vui, args);
+ vlib_worker_thread_barrier_release (vm);
+
+ vhost_user_vui_init (vnm, vui, server_sock_fd, args, &sw_if_idx);
+ vnet_sw_interface_set_mtu (vnm, vui->sw_if_index, 9000);
+ vhost_user_rx_thread_placement (vui, 1);
+
+ if (args->renumber)
+ vnet_interface_name_renumber (sw_if_idx, args->custom_dev_instance);
+
+ args->sw_if_index = sw_if_idx;
+
+ // Process node must connect
+ vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
+
+ return rv;
+}
+
+int
+vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
+ vhost_user_create_if_args_t * args)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui = NULL;
+ u32 sw_if_idx = ~0;
+ int server_sock_fd = -1;
+ int rv = 0;
+ vnet_hw_interface_t *hwif;
+ uword *if_index;
+
+ if (!(hwif = vnet_get_sup_hw_interface_api_visible_or_null (vnm,
+ args->sw_if_index))
+ || hwif->dev_class_index != vhost_user_device_class.index)
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (args->sock_filename == NULL || !(strlen (args->sock_filename) > 0))
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+
+ vui = vec_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
+
+ /*
+ * Disallow changing the interface to have the same path name
+ * as other interface
+ */
+ if_index = mhash_get (&vum->if_index_by_sock_name,
+ (void *) args->sock_filename);
+ if (if_index && (*if_index != vui->if_index))
+ return VNET_API_ERROR_IF_ALREADY_EXISTS;
+
+ // First try to open server socket
+ if (args->is_server)
+ if ((rv = vhost_user_init_server_sock (args->sock_filename,
+ &server_sock_fd)) != 0)
+ return rv;
+
+ vhost_user_term_if (vui);
+ vhost_user_vui_init (vnm, vui, server_sock_fd, args, &sw_if_idx);
+
+ if (args->renumber)
+ vnet_interface_name_renumber (sw_if_idx, args->custom_dev_instance);
+
+ // Process node must connect
+ vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
+
+ return rv;
+}
+
+clib_error_t *
+vhost_user_connect_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+ vhost_user_create_if_args_t args = { 0 };
+ int rv;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ args.feature_mask = (u64) ~ (0ULL);
+ args.custom_dev_instance = ~0;
+ /* GSO feature is disable by default */
+ args.feature_mask &= ~FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS;
+ /* packed-ring feature is disable by default */
+ args.feature_mask &= ~VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
+ /* event_idx feature is disable by default */
+ args.feature_mask &= ~VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "socket %s", &args.sock_filename))
+ ;
+ else if (unformat (line_input, "server"))
+ args.is_server = 1;
+ else if (unformat (line_input, "gso"))
+ args.enable_gso = 1;
+ else if (unformat (line_input, "packed"))
+ args.enable_packed = 1;
+ else if (unformat (line_input, "event-idx"))
+ args.enable_event_idx = 1;
+ else if (unformat (line_input, "feature-mask 0x%llx",
+ &args.feature_mask))
+ ;
+ else if (unformat (line_input, "hwaddr %U", unformat_ethernet_address,
+ args.hwaddr))
+ args.use_custom_mac = 1;
+ else if (unformat (line_input, "renumber %d",
+ &args.custom_dev_instance))
+ args.renumber = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if ((rv = vhost_user_create_if (vnm, vm, &args)))
+ {
+ error = clib_error_return (0, "vhost_user_create_if returned %d", rv);
+ goto done;
+ }
+
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnm,
+ args.sw_if_index);
+
+done:
+ vec_free (args.sock_filename);
+ unformat_free (line_input);
+
+ return error;
+}
+
+clib_error_t *
+vhost_user_delete_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 sw_if_index = ~0;
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat
+ (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ {
+ vnet_hw_interface_t *hwif =
+ vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index);
+ if (hwif == NULL ||
+ vhost_user_device_class.index != hwif->dev_class_index)
+ {
+ error = clib_error_return (0, "Not a vhost interface");
+ goto done;
+ }
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ vhost_user_delete_if (vnm, vm, sw_if_index);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+int
+vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
+ vhost_user_intf_details_t ** out_vuids)
+{
+ int rv = 0;
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+ vhost_user_intf_details_t *r_vuids = NULL;
+ vhost_user_intf_details_t *vuid = NULL;
+ u32 *hw_if_indices = 0;
+ vnet_hw_interface_t *hi;
+ int i;
+
+ if (!out_vuids)
+ return -1;
+
+ pool_foreach (vui, vum->vhost_user_interfaces)
+ vec_add1 (hw_if_indices, vui->hw_if_index);
+
+ for (i = 0; i < vec_len (hw_if_indices); i++)
+ {
+ hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
+ vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
+
+ vec_add2 (r_vuids, vuid, 1);
+ vuid->sw_if_index = vui->sw_if_index;
+ vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz;
+ vuid->features = vui->features;
+ vuid->num_regions = vui->nregions;
+ vuid->is_server = vui->unix_server_index != ~0;
+ vuid->sock_errno = vui->sock_errno;
+ snprintf ((char *) vuid->sock_filename, sizeof (vuid->sock_filename),
+ "%s", vui->sock_filename);
+ memcpy_s (vuid->if_name, sizeof (vuid->if_name), hi->name,
+ clib_min (vec_len (hi->name), sizeof (vuid->if_name) - 1));
+ vuid->if_name[sizeof (vuid->if_name) - 1] = 0;
+ }
+
+ vec_free (hw_if_indices);
+
+ *out_vuids = r_vuids;
+
+ return rv;
+}
+
+static u8 *
+format_vhost_user_desc (u8 * s, va_list * args)
+{
+ char *fmt = va_arg (*args, char *);
+ vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *);
+ vnet_virtio_vring_desc_t *desc_table =
+ va_arg (*args, vnet_virtio_vring_desc_t *);
+ int idx = va_arg (*args, int);
+ u32 *mem_hint = va_arg (*args, u32 *);
+
+ s = format (s, fmt, idx, desc_table[idx].addr, desc_table[idx].len,
+ desc_table[idx].flags, desc_table[idx].next,
+ pointer_to_uword (map_guest_mem (vui, desc_table[idx].addr,
+ mem_hint)));
+ return s;
+}
+
+static void
+vhost_user_show_fds (vlib_main_t * vm, vhost_user_vring_t * vq)
+{
+ int kickfd = UNIX_GET_FD (vq->kickfd_idx);
+ int callfd = UNIX_GET_FD (vq->callfd_idx);
+
+ vlib_cli_output (vm, " kickfd %d callfd %d errfd %d\n", kickfd, callfd,
+ vq->errfd);
+}
+
+static void
+vhost_user_show_desc (vlib_main_t * vm, vhost_user_intf_t * vui, int q,
+ int show_descr, int show_verbose)
+{
+ int j;
+ u32 mem_hint = 0;
+ u32 idx;
+ u32 n_entries;
+ vnet_virtio_vring_desc_t *desc_table;
+ vhost_user_vring_t *vq = &vui->vrings[q];
+
+ if (vq->avail && vq->used)
+ vlib_cli_output (vm,
+ " avail.flags %x avail event idx %u avail.idx %d "
+ "used.flags %x used event idx %u used.idx %d\n",
+ vq->avail->flags, vhost_user_avail_event_idx (vq),
+ vq->avail->idx, vq->used->flags,
+ vhost_user_used_event_idx (vq), vq->used->idx);
+
+ vhost_user_show_fds (vm, vq);
+
+ if (show_descr)
+ {
+ vlib_cli_output (vm, "\n descriptor table:\n");
+ vlib_cli_output (vm,
+ " slot addr len flags next "
+ "user_addr\n");
+ vlib_cli_output (vm,
+ " ===== ================== ===== ====== ===== "
+ "==================\n");
+ for (j = 0; j < vq->qsz_mask + 1; j++)
+ {
+ desc_table = vq->desc;
+ vlib_cli_output (vm, "%U", format_vhost_user_desc,
+ " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", vui,
+ desc_table, j, &mem_hint);
+ if (show_verbose && (desc_table[j].flags & VRING_DESC_F_INDIRECT))
+ {
+ n_entries =
+ desc_table[j].len / sizeof (vnet_virtio_vring_desc_t);
+ desc_table = map_guest_mem (vui, desc_table[j].addr, &mem_hint);
+ if (desc_table)
+ {
+ for (idx = 0; idx < clib_min (20, n_entries); idx++)
+ {
+ vlib_cli_output
+ (vm, "%U", format_vhost_user_desc,
+ "> %-4u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui,
+ desc_table, idx, &mem_hint);
+ }
+ if (n_entries >= 20)
+ vlib_cli_output (vm, "Skip displaying entries 20...%u\n",
+ n_entries);
+ }
+ }
+ }
+ }
+}
+
+static u8 *
+format_vhost_user_packed_desc (u8 * s, va_list * args)
+{
+ char *fmt = va_arg (*args, char *);
+ vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *);
+ vnet_virtio_vring_packed_desc_t *desc_table =
+ va_arg (*args, vnet_virtio_vring_packed_desc_t *);
+ int idx = va_arg (*args, int);
+ u32 *mem_hint = va_arg (*args, u32 *);
+
+ s = format (s, fmt, idx, desc_table[idx].addr, desc_table[idx].len,
+ desc_table[idx].flags, desc_table[idx].id,
+ pointer_to_uword (map_guest_mem (vui, desc_table[idx].addr,
+ mem_hint)));
+ return s;
+}
+
+static u8 *
+format_vhost_user_event_idx_flags (u8 * s, va_list * args)
+{
+ u32 flags = va_arg (*args, u32);
+ typedef struct
+ {
+ u8 value;
+ char *str;
+ } event_idx_flags;
+ static event_idx_flags event_idx_array[] = {
+#define _(s,v) { .str = #s, .value = v, },
+ foreach_virtio_event_idx_flags
+#undef _
+ };
+ u32 num_entries = sizeof (event_idx_array) / sizeof (event_idx_flags);
+
+ if (flags < num_entries)
+ s = format (s, "%s", event_idx_array[flags].str);
+ else
+ s = format (s, "%u", flags);
+ return s;
+}
+
+static void
+vhost_user_show_desc_packed (vlib_main_t * vm, vhost_user_intf_t * vui, int q,
+ int show_descr, int show_verbose)
+{
+ int j;
+ u32 mem_hint = 0;
+ u32 idx;
+ u32 n_entries;
+ vnet_virtio_vring_packed_desc_t *desc_table;
+ vhost_user_vring_t *vq = &vui->vrings[q];
+ u16 off_wrap, event_idx;
+
+ off_wrap = vq->avail_event->off_wrap;
+ event_idx = off_wrap & 0x7fff;
+ vlib_cli_output (vm, " avail_event.flags %U avail_event.off_wrap %u "
+ "avail event idx %u\n", format_vhost_user_event_idx_flags,
+ (u32) vq->avail_event->flags, off_wrap, event_idx);
+
+ off_wrap = vq->used_event->off_wrap;
+ event_idx = off_wrap & 0x7fff;
+ vlib_cli_output (vm, " used_event.flags %U used_event.off_wrap %u "
+ "used event idx %u\n", format_vhost_user_event_idx_flags,
+ (u32) vq->used_event->flags, off_wrap, event_idx);
+
+ vlib_cli_output (vm, " avail wrap counter %u, used wrap counter %u\n",
+ vq->avail_wrap_counter, vq->used_wrap_counter);
+
+ vhost_user_show_fds (vm, vq);
+
+ if (show_descr)
+ {
+ vlib_cli_output (vm, "\n descriptor table:\n");
+ vlib_cli_output (vm,
+ " slot addr len flags id "
+ "user_addr\n");
+ vlib_cli_output (vm,
+ " ===== ================== ===== ====== ===== "
+ "==================\n");
+ for (j = 0; j < vq->qsz_mask + 1; j++)
+ {
+ desc_table = vq->packed_desc;
+ vlib_cli_output (vm, "%U", format_vhost_user_packed_desc,
+ " %-5u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui,
+ desc_table, j, &mem_hint);
+ if (show_verbose && (desc_table[j].flags & VRING_DESC_F_INDIRECT))
+ {
+ n_entries = desc_table[j].len >> 4;
+ desc_table = map_guest_mem (vui, desc_table[j].addr, &mem_hint);
+ if (desc_table)
+ {
+ for (idx = 0; idx < clib_min (20, n_entries); idx++)
+ {
+ vlib_cli_output
+ (vm, "%U", format_vhost_user_packed_desc,
+ "> %-4u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui,
+ desc_table, idx, &mem_hint);
+ }
+ if (n_entries >= 20)
+ vlib_cli_output (vm, "Skip displaying entries 20...%u\n",
+ n_entries);
+ }
+ }
+ }
+ }
+}
+
+clib_error_t *
+show_vhost_user_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+ u32 hw_if_index, *hw_if_indices = 0;
+ vnet_hw_interface_t *hi;
+ u16 qid;
+ int i, j, q;
+ int show_descr = 0;
+ int show_verbose = 0;
+ struct feat_struct
+ {
+ u8 bit;
+ char *str;
+ };
+ struct feat_struct *feat_entry;
+
+ static struct feat_struct feat_array[] = {
+#define _(s,b) { .str = #s, .bit = b, },
+ foreach_virtio_net_features
+#undef _
+ {.str = NULL}
+ };
+
+#define foreach_protocol_feature \
+ _(VHOST_USER_PROTOCOL_F_MQ) \
+ _(VHOST_USER_PROTOCOL_F_LOG_SHMFD)
+
+ static struct feat_struct proto_feat_array[] = {
+#define _(s) { .str = #s, .bit = s},
+ foreach_protocol_feature
+#undef _
+ {.str = NULL}
+ };
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat
+ (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
+ {
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ if (vhost_user_device_class.index != hi->dev_class_index)
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ vec_add1 (hw_if_indices, hw_if_index);
+ }
+ else if (unformat (input, "descriptors") || unformat (input, "desc"))
+ show_descr = 1;
+ else if (unformat (input, "verbose"))
+ show_verbose = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ if (vec_len (hw_if_indices) == 0)
+ {
+ pool_foreach (vui, vum->vhost_user_interfaces)
+ vec_add1 (hw_if_indices, vui->hw_if_index);
+ }
+ vlib_cli_output (vm, "Virtio vhost-user interfaces");
+ vlib_cli_output (vm, "Global:\n coalesce frames %d time %e",
+ vum->coalesce_frames, vum->coalesce_time);
+ vlib_cli_output (vm, " Number of rx virtqueues in interrupt mode: %d",
+ vum->ifq_count);
+ vlib_cli_output (vm, " Number of GSO interfaces: %d", vum->gso_count);
+ for (u32 tid = 0; tid <= vlib_num_workers (); tid++)
+ {
+ vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, tid);
+ vlib_cli_output (vm, " Thread %u: Polling queue count %u", tid,
+ cpu->polling_q_count);
+ }
+
+ for (i = 0; i < vec_len (hw_if_indices); i++)
+ {
+ hi = vnet_get_hw_interface (vnm, hw_if_indices[i]);
+ vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
+ vlib_cli_output (vm, "Interface: %U (ifindex %d)",
+ format_vnet_hw_if_index_name, vnm, hw_if_indices[i],
+ hw_if_indices[i]);
+ vlib_cli_output (vm, " Number of qids %u", vui->num_qid);
+ if (vui->enable_gso)
+ vlib_cli_output (vm, " GSO enable");
+ if (vui->enable_packed)
+ vlib_cli_output (vm, " Packed ring enable");
+ if (vui->enable_event_idx)
+ vlib_cli_output (vm, " Event index enable");
+
+ vlib_cli_output (vm, "virtio_net_hdr_sz %d\n"
+ " features mask (0x%llx): \n"
+ " features (0x%llx): \n",
+ vui->virtio_net_hdr_sz, vui->feature_mask,
+ vui->features);
+
+ feat_entry = (struct feat_struct *) &feat_array;
+ while (feat_entry->str)
+ {
+ if (vui->features & (1ULL << feat_entry->bit))
+ vlib_cli_output (vm, " %s (%d)", feat_entry->str,
+ feat_entry->bit);
+ feat_entry++;
+ }
+
+ vlib_cli_output (vm, " protocol features (0x%llx)",
+ vui->protocol_features);
+ feat_entry = (struct feat_struct *) &proto_feat_array;
+ while (feat_entry->str)
+ {
+ if (vui->protocol_features & (1ULL << feat_entry->bit))
+ vlib_cli_output (vm, " %s (%d)", feat_entry->str,
+ feat_entry->bit);
+ feat_entry++;
+ }
+
+ vlib_cli_output (vm, "\n");
+
+ vlib_cli_output (vm, " socket filename %s type %s errno \"%s\"\n\n",
+ vui->sock_filename,
+ (vui->unix_server_index != ~0) ? "server" : "client",
+ strerror (vui->sock_errno));
+
+ vlib_cli_output (vm, " rx placement: ");
+
+ FOR_ALL_VHOST_TXQ (qid, vui)
+ {
+ vhost_user_vring_t *txvq = &vui->vrings[qid];
+
+ if (txvq->qid == -1)
+ continue;
+ vlib_cli_output (vm, " thread %d on vring %d, %U\n",
+ txvq->thread_index, qid, format_vnet_hw_if_rx_mode,
+ txvq->mode);
+ }
+
+ vlib_cli_output (vm, " tx placement\n");
+
+ FOR_ALL_VHOST_RXQ (qid, vui)
+ {
+ vhost_user_vring_t *rxvq = &vui->vrings[qid];
+ vnet_hw_if_tx_queue_t *txq;
+
+ if (rxvq->queue_index == ~0)
+ continue;
+ txq = vnet_hw_if_get_tx_queue (vnm, rxvq->queue_index);
+ if (txq->threads)
+ vlib_cli_output (vm, " threads %U on vring %u: %s\n",
+ format_bitmap_list, txq->threads, qid,
+ txq->shared_queue ? "spin-lock" : "lock-free");
+ }
+
+ vlib_cli_output (vm, "\n");
+
+ vlib_cli_output (vm, " Memory regions (total %d)\n", vui->nregions);
+
+ if (vui->nregions)
+ {
+ vlib_cli_output (vm,
+ " region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr\n");
+ vlib_cli_output (vm,
+ " ====== ===== ================== ================== ================== ================== ==================\n");
+ }
+ for (j = 0; j < vui->nregions; j++)
+ {
+ vlib_cli_output (vm,
+ " %d %-5d 0x%016lx 0x%016lx 0x%016lx 0x%016lx 0x%016lx\n",
+ j, vui->region_mmap_fd[j],
+ vui->regions[j].guest_phys_addr,
+ vui->regions[j].memory_size,
+ vui->regions[j].userspace_addr,
+ vui->regions[j].mmap_offset,
+ pointer_to_uword (vui->region_mmap_addr[j]));
+ }
+ FOR_ALL_VHOST_RX_TXQ (q, vui)
+ {
+ if (!vui->vrings[q].started)
+ continue;
+
+ vlib_cli_output (vm, "\n Virtqueue %d (%s%s)\n", q,
+ (q & 1) ? "RX" : "TX",
+ vui->vrings[q].enabled ? "" : " disabled");
+ vlib_cli_output (vm, " global %s queue index %u\n",
+ (q & 1) ? "RX" : "TX", vui->vrings[q].queue_index);
+
+ vlib_cli_output (
+ vm,
+ " qsz %d last_avail_idx %d last_used_idx %d"
+ " last_kick %u\n",
+ vui->vrings[q].qsz_mask + 1, vui->vrings[q].last_avail_idx,
+ vui->vrings[q].last_used_idx, vui->vrings[q].last_kick);
+
+ if (vhost_user_is_packed_ring_supported (vui))
+ vhost_user_show_desc_packed (vm, vui, q, show_descr, show_verbose);
+ else
+ vhost_user_show_desc (vm, vui, q, show_descr, show_verbose);
+ }
+ vlib_cli_output (vm, "\n");
+ }
+done:
+ vec_free (hw_if_indices);
+ return error;
+}
+
+/*
+ * CLI functions
+ */
+
+/*?
+ * Create a vHost User interface. Once created, a new virtual interface
+ * will exist with the name '<em>VirtualEthernet0/0/x</em>', where '<em>x</em>'
+ * is the next free index.
+ *
+ * There are several parameters associated with a vHost interface:
+ *
+ * - <b>socket <socket-filename></b> - Name of the linux socket used by
+ * hypervisor and VPP to manage the vHost interface. If in <em>server</em>
+ * mode, VPP will create the socket if it does not already exist. If in
+ * <em>client</em> mode, hypervisor will create the socket if it does not
+ * already exist. The VPP code is indifferent to the file location. However,
+ * if SELinux is enabled, then the socket needs to be created in
+ * <em>/var/run/vpp/</em>.
+ *
+ * - <b>server</b> - Optional flag to indicate that VPP should be the server
+ * for the linux socket. If not provided, VPP will be the client. In
+ * <em>server</em> mode, the VM can be reset without tearing down the vHost
+ * Interface. In <em>client</em> mode, VPP can be reset without bringing down
+ * the VM and tearing down the vHost Interface.
+ *
+ * - <b>feature-mask <hex></b> - Optional virtio/vhost feature set negotiated
+ * at startup. <b>This is intended for degugging only.</b> It is recommended
+ * that this parameter not be used except by experienced users. By default,
+ * all supported features will be advertised. Otherwise, provide the set of
+ * features desired.
+ * - 0x000008000 (15) - VIRTIO_NET_F_MRG_RXBUF
+ * - 0x000020000 (17) - VIRTIO_NET_F_CTRL_VQ
+ * - 0x000200000 (21) - VIRTIO_NET_F_GUEST_ANNOUNCE
+ * - 0x000400000 (22) - VIRTIO_NET_F_MQ
+ * - 0x004000000 (26) - VHOST_F_LOG_ALL
+ * - 0x008000000 (27) - VIRTIO_F_ANY_LAYOUT
+ * - 0x010000000 (28) - VIRTIO_F_INDIRECT_DESC
+ * - 0x040000000 (30) - VHOST_USER_F_PROTOCOL_FEATURES
+ * - 0x100000000 (32) - VIRTIO_F_VERSION_1
+ *
+ * - <b>hwaddr <mac-addr></b> - Optional ethernet address, can be in either
+ * X:X:X:X:X:X unix or X.X.X cisco format.
+ *
+ * - <b>renumber <dev_instance></b> - Optional parameter which allows the
+ * instance in the name to be specified. If instance already exists, name
+ * will be used anyway and multiple instances will have the same name. Use
+ * with caution.
+ *
+ * @cliexpar
+ * Example of how to create a vhost interface with VPP as the client and all
+ * features enabled:
+ * @cliexstart{create vhost-user socket /var/run/vpp/vhost1.sock}
+ * VirtualEthernet0/0/0
+ * @cliexend
+ * Example of how to create a vhost interface with VPP as the server and with
+ * just multiple queues enabled:
+ * @cliexstart{create vhost-user socket /var/run/vpp/vhost2.sock server
+ * feature-mask 0x40400000}
+ * VirtualEthernet0/0/1
+ * @cliexend
+ * Once the vHost interface is created, enable the interface using:
+ * @cliexcmd{set interface state VirtualEthernet0/0/0 up}
+?*/
+VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
+ .path = "create vhost-user",
+ .short_help = "create vhost-user socket <socket-filename> [server] "
+ "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] [gso] "
+ "[packed] [event-idx]",
+ .function = vhost_user_connect_command_fn,
+ .is_mp_safe = 1,
+};
+
+/*?
+ * Delete a vHost User interface using the interface name or the
+ * software interface index. Use the '<em>show interface</em>'
+ * command to determine the software interface index. On deletion,
+ * the linux socket will not be deleted.
+ *
+ * @cliexpar
+ * Example of how to delete a vhost interface by name:
+ * @cliexcmd{delete vhost-user VirtualEthernet0/0/1}
+ * Example of how to delete a vhost interface by software interface index:
+ * @cliexcmd{delete vhost-user sw_if_index 1}
+?*/
+VLIB_CLI_COMMAND (vhost_user_delete_command, static) = {
+ .path = "delete vhost-user",
+ .short_help = "delete vhost-user {<interface> | sw_if_index <sw_idx>}",
+ .function = vhost_user_delete_command_fn,
+};
+
+/*?
+ * Display the attributes of a single vHost User interface (provide interface
+ * name), multiple vHost User interfaces (provide a list of interface names
+ * separated by spaces) or all Vhost User interfaces (omit an interface name
+ * to display all vHost interfaces).
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how to display a vhost interface:
+ * @cliexstart{show vhost-user VirtualEthernet0/0/0}
+ * Virtio vhost-user interfaces
+ * Global:
+ * coalesce frames 32 time 1e-3
+ * Interface: VirtualEthernet0/0/0 (ifindex 1)
+ * virtio_net_hdr_sz 12
+ * features mask (0xffffffffffffffff):
+ * features (0x50408000):
+ * VIRTIO_NET_F_MRG_RXBUF (15)
+ * VIRTIO_NET_F_MQ (22)
+ * VIRTIO_F_INDIRECT_DESC (28)
+ * VHOST_USER_F_PROTOCOL_FEATURES (30)
+ * protocol features (0x3)
+ * VHOST_USER_PROTOCOL_F_MQ (0)
+ * VHOST_USER_PROTOCOL_F_LOG_SHMFD (1)
+ *
+ * socket filename /var/run/vpp/vhost1.sock type client errno "Success"
+ *
+ * rx placement:
+ * thread 1 on vring 1
+ * thread 1 on vring 5
+ * thread 2 on vring 3
+ * thread 2 on vring 7
+ * tx placement: spin-lock
+ * thread 0 on vring 0
+ * thread 1 on vring 2
+ * thread 2 on vring 0
+ *
+ * Memory regions (total 2)
+ * region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr
+ * ====== == =============== =========== ============== =========== ==========
+ * 0 60 0x00000000 0x000a0000 0xaac00000 0x00000000 0x2b400000
+ * 1 61 0x000c0000 0x3ff40000 0xaacc0000 0x000c0000 0xabcc0000
+ *
+ * Virtqueue 0 (TX)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
+ * kickfd 62 callfd 64 errfd -1
+ *
+ * Virtqueue 1 (RX)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
+ * kickfd 65 callfd 66 errfd -1
+ *
+ * Virtqueue 2 (TX)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
+ * kickfd 63 callfd 70 errfd -1
+ *
+ * Virtqueue 3 (RX)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
+ * kickfd 72 callfd 74 errfd -1
+ *
+ * Virtqueue 4 (TX disabled)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
+ * kickfd 76 callfd 78 errfd -1
+ *
+ * Virtqueue 5 (RX disabled)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
+ * kickfd 80 callfd 82 errfd -1
+ *
+ * Virtqueue 6 (TX disabled)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
+ * kickfd 84 callfd 86 errfd -1
+ *
+ * Virtqueue 7 (RX disabled)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 0 used.flags 1 used.idx 0
+ * kickfd 88 callfd 90 errfd -1
+ *
+ * @cliexend
+ *
+ * The optional '<em>descriptors</em>' parameter will display the same output
+ * as the previous example but will include the descriptor table for each
+ * queue.
+ * The output is truncated below:
+ * @cliexstart{show vhost-user VirtualEthernet0/0/0 descriptors}
+ * Virtio vhost-user interfaces
+ * Global:
+ * coalesce frames 32 time 1e-3
+ * Interface: VirtualEthernet0/0/0 (ifindex 1)
+ * virtio_net_hdr_sz 12
+ * features mask (0xffffffffffffffff):
+ * features (0x50408000):
+ * VIRTIO_NET_F_MRG_RXBUF (15)
+ * VIRTIO_NET_F_MQ (22)
+ * :
+ * Virtqueue 0 (TX)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * avail.flags 1 avail.idx 128 used.flags 1 used.idx 0
+ * kickfd 62 callfd 64 errfd -1
+ *
+ * descriptor table:
+ * id addr len flags next user_addr
+ * ===== ================== ===== ====== ===== ==================
+ * 0 0x0000000010b6e974 2060 0x0002 1 0x00002aabbc76e974
+ * 1 0x0000000010b6e034 2060 0x0002 2 0x00002aabbc76e034
+ * 2 0x0000000010b6d6f4 2060 0x0002 3 0x00002aabbc76d6f4
+ * 3 0x0000000010b6cdb4 2060 0x0002 4 0x00002aabbc76cdb4
+ * 4 0x0000000010b6c474 2060 0x0002 5 0x00002aabbc76c474
+ * 5 0x0000000010b6bb34 2060 0x0002 6 0x00002aabbc76bb34
+ * 6 0x0000000010b6b1f4 2060 0x0002 7 0x00002aabbc76b1f4
+ * 7 0x0000000010b6a8b4 2060 0x0002 8 0x00002aabbc76a8b4
+ * 8 0x0000000010b69f74 2060 0x0002 9 0x00002aabbc769f74
+ * 9 0x0000000010b69634 2060 0x0002 10 0x00002aabbc769634
+ * 10 0x0000000010b68cf4 2060 0x0002 11 0x00002aabbc768cf4
+ * :
+ * 249 0x0000000000000000 0 0x0000 250 0x00002aab2b400000
+ * 250 0x0000000000000000 0 0x0000 251 0x00002aab2b400000
+ * 251 0x0000000000000000 0 0x0000 252 0x00002aab2b400000
+ * 252 0x0000000000000000 0 0x0000 253 0x00002aab2b400000
+ * 253 0x0000000000000000 0 0x0000 254 0x00002aab2b400000
+ * 254 0x0000000000000000 0 0x0000 255 0x00002aab2b400000
+ * 255 0x0000000000000000 0 0x0000 32768 0x00002aab2b400000
+ *
+ * Virtqueue 1 (RX)
+ * qsz 256 last_avail_idx 0 last_used_idx 0
+ * :
+ * @cliexend
+ * @endparblock
+?*/
+VLIB_CLI_COMMAND (show_vhost_user_command, static) = {
+ .path = "show vhost-user",
+ .short_help = "show vhost-user [<interface> [<interface> [..]]] "
+ "[[descriptors] [verbose]]",
+ .function = show_vhost_user_command_fn,
+};
+
+
+static clib_error_t *
+vhost_user_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "coalesce-frames %d", &vum->coalesce_frames))
+ ;
+ else if (unformat (input, "coalesce-time %f", &vum->coalesce_time))
+ ;
+ else if (unformat (input, "dont-dump-memory"))
+ vum->dont_dump_vhost_user_memory = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ return 0;
+}
+
+/* vhost-user { ... } configuration. */
+VLIB_CONFIG_FUNCTION (vhost_user_config, "vhost-user");
+
+void
+vhost_user_unmap_all (void)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui;
+
+ if (vum->dont_dump_vhost_user_memory)
+ {
+ pool_foreach (vui, vum->vhost_user_interfaces)
+ unmap_all_mem_regions (vui);
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/vhost/vhost_user.h b/src/plugins/vhost/vhost_user.h
new file mode 100644
index 00000000000..a3582affb4b
--- /dev/null
+++ b/src/plugins/vhost/vhost_user.h
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __VIRTIO_VHOST_USER_H__
+#define __VIRTIO_VHOST_USER_H__
+
+#include <vhost/virtio_std.h>
+#include <vhost/vhost_std.h>
+
+/* vhost-user data structures */
+
+#define VHOST_MEMORY_MAX_NREGIONS 8
+#define VHOST_USER_MSG_HDR_SZ 12
+#define VHOST_VRING_INIT_MQ_PAIR_SZ 8 //8TX + 8RX
+
+/*
+ * qid is one byte in size in the spec. Please see VHOST_USER_SET_VRING_CALL,
+ * VHOST_USER_SET_VRING_KICK, and VHOST_USER_SET_VRING_ERR.
+ * The max number for q pair is naturally 128.
+ */
+#define VHOST_VRING_MAX_MQ_PAIR_SZ 128
+#define VHOST_VRING_IDX_RX(qid) (2 * (qid))
+#define VHOST_VRING_IDX_TX(qid) (2 * (qid) + 1)
+
+#define VHOST_USER_VRING_NOFD_MASK 0x100
+
+#define VHOST_USER_PROTOCOL_F_MQ 0
+#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1
+#define VHOST_VRING_F_LOG 0
+
+#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \
+ (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD))
+
+#define vu_log_debug(dev, f, ...) \
+{ \
+ vlib_log(VLIB_LOG_LEVEL_DEBUG, vhost_user_main.log_default, "%U: " f, \
+ format_vnet_hw_if_index_name, vnet_get_main(), \
+ dev->hw_if_index, ##__VA_ARGS__); \
+};
+
+#define vu_log_warn(dev, f, ...) \
+{ \
+ vlib_log(VLIB_LOG_LEVEL_WARNING, vhost_user_main.log_default, "%U: " f, \
+ format_vnet_hw_if_index_name, vnet_get_main(), \
+ dev->hw_if_index, ##__VA_ARGS__); \
+};
+#define vu_log_err(dev, f, ...) \
+{ \
+ vlib_log(VLIB_LOG_LEVEL_ERR, vhost_user_main.log_default, "%U: " f, \
+ format_vnet_hw_if_index_name, vnet_get_main(), \
+ dev->hw_if_index, ##__VA_ARGS__); \
+};
+
+#define UNIX_GET_FD(unixfd_idx) ({ \
+ typeof(unixfd_idx) __unixfd_idx = (unixfd_idx); \
+ (__unixfd_idx != ~0) ? \
+ pool_elt_at_index (file_main.file_pool, \
+ __unixfd_idx)->file_descriptor : -1; })
+
+#define foreach_virtio_trace_flags \
+ _ (SIMPLE_CHAINED, 0, "Simple descriptor chaining") \
+ _ (SINGLE_DESC, 1, "Single descriptor packet") \
+ _ (INDIRECT, 2, "Indirect descriptor") \
+ _ (MAP_ERROR, 4, "Memory mapping error")
+
+typedef enum
+{
+#define _(n,i,s) VIRTIO_TRACE_F_##n,
+ foreach_virtio_trace_flags
+#undef _
+} virtio_trace_flag_t;
+
+#define FEATURE_VIRTIO_NET_F_HOST_TSO_FEATURE_BITS \
+ (VIRTIO_FEATURE (VIRTIO_NET_F_CSUM) | \
+ VIRTIO_FEATURE (VIRTIO_NET_F_HOST_UFO) | \
+ VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO4) | \
+ VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO6))
+
+#define FEATURE_VIRTIO_NET_F_GUEST_TSO_FEATURE_BITS \
+ (VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM) | \
+ VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO) | \
+ VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4) | \
+ VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6))
+
+#define FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS \
+ (FEATURE_VIRTIO_NET_F_HOST_TSO_FEATURE_BITS | \
+ FEATURE_VIRTIO_NET_F_GUEST_TSO_FEATURE_BITS)
+
+
+typedef struct
+{
+ char *sock_filename;
+ u64 feature_mask;
+ u32 custom_dev_instance;
+ u8 hwaddr[6];
+ u8 renumber;
+ u8 is_server;
+ u8 enable_gso;
+ u8 enable_packed;
+ u8 enable_event_idx;
+ u8 use_custom_mac;
+
+ /* return */
+ u32 sw_if_index;
+} vhost_user_create_if_args_t;
+
+int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
+ vhost_user_create_if_args_t * args);
+int vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
+ vhost_user_create_if_args_t * args);
+int vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm,
+ u32 sw_if_index);
+
+typedef struct vhost_user_memory_region
+{
+ u64 guest_phys_addr;
+ u64 memory_size;
+ u64 userspace_addr;
+ u64 mmap_offset;
+} __attribute ((packed)) vhost_user_memory_region_t;
+
+typedef struct vhost_user_memory
+{
+ u32 nregions;
+ u32 padding;
+ vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS];
+} __attribute ((packed)) vhost_user_memory_t;
+
+typedef enum vhost_user_req
+{
+ VHOST_USER_NONE = 0,
+ VHOST_USER_GET_FEATURES = 1,
+ VHOST_USER_SET_FEATURES = 2,
+ VHOST_USER_SET_OWNER = 3,
+ VHOST_USER_RESET_OWNER = 4,
+ VHOST_USER_SET_MEM_TABLE = 5,
+ VHOST_USER_SET_LOG_BASE = 6,
+ VHOST_USER_SET_LOG_FD = 7,
+ VHOST_USER_SET_VRING_NUM = 8,
+ VHOST_USER_SET_VRING_ADDR = 9,
+ VHOST_USER_SET_VRING_BASE = 10,
+ VHOST_USER_GET_VRING_BASE = 11,
+ VHOST_USER_SET_VRING_KICK = 12,
+ VHOST_USER_SET_VRING_CALL = 13,
+ VHOST_USER_SET_VRING_ERR = 14,
+ VHOST_USER_GET_PROTOCOL_FEATURES = 15,
+ VHOST_USER_SET_PROTOCOL_FEATURES = 16,
+ VHOST_USER_GET_QUEUE_NUM = 17,
+ VHOST_USER_SET_VRING_ENABLE = 18,
+ VHOST_USER_MAX
+} vhost_user_req_t;
+
+typedef struct vhost_user_msg {
+ vhost_user_req_t request;
+ u32 flags;
+ u32 size;
+ union
+ {
+ u64 u64;
+ vhost_vring_state_t state;
+ vhost_vring_addr_t addr;
+ vhost_user_memory_t memory;
+ vhost_user_log_t log;
+ };
+} __attribute ((packed)) vhost_user_msg_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u16 qsz_mask;
+ u16 last_avail_idx;
+ u16 last_used_idx;
+ u16 n_since_last_int;
+ union
+ {
+ vnet_virtio_vring_desc_t *desc;
+ vnet_virtio_vring_packed_desc_t *packed_desc;
+ };
+ union
+ {
+ vnet_virtio_vring_avail_t *avail;
+ vnet_virtio_vring_desc_event_t *avail_event;
+ };
+ union
+ {
+ vnet_virtio_vring_used_t *used;
+ vnet_virtio_vring_desc_event_t *used_event;
+ };
+ uword desc_user_addr;
+ uword used_user_addr;
+ uword avail_user_addr;
+ f64 int_deadline;
+ u8 started;
+ u8 enabled;
+ u8 log_used;
+ clib_spinlock_t vring_lock;
+
+ //Put non-runtime in a different cache line
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+ int errfd;
+ u32 callfd_idx;
+ u32 kickfd_idx;
+ u64 log_guest_addr;
+
+ /* The rx queue policy (interrupt/adaptive/polling) for this queue */
+ u32 mode;
+
+ /*
+ * It contains the device queue number. -1 if it does not. The idea is
+ * to not invoke vnet_hw_interface_assign_rx_thread and
+ * vnet_hw_interface_unassign_rx_thread more than once for the duration of
+ * the interface even if it is disconnected and reconnected.
+ */
+ i16 qid;
+
+ u16 used_wrap_counter;
+ u16 avail_wrap_counter;
+ u16 last_kick;
+ u8 first_kick;
+ u32 queue_index;
+ u32 thread_index;
+} vhost_user_vring_t;
+
+#define VHOST_USER_EVENT_START_TIMER 1
+#define VHOST_USER_EVENT_STOP_TIMER 2
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 is_ready;
+ u32 admin_up;
+ u32 unix_server_index;
+ u32 clib_file_index;
+ char sock_filename[256];
+ int sock_errno;
+ uword if_index;
+ u32 hw_if_index, sw_if_index;
+
+ //Feature negotiation
+ u64 features;
+ u64 feature_mask;
+ u64 protocol_features;
+
+ //Memory region information
+ u32 nregions;
+ vhost_user_memory_region_t regions[VHOST_MEMORY_MAX_NREGIONS];
+ void *region_mmap_addr[VHOST_MEMORY_MAX_NREGIONS];
+ u64 region_guest_addr_lo[VHOST_MEMORY_MAX_NREGIONS];
+ u64 region_guest_addr_hi[VHOST_MEMORY_MAX_NREGIONS];
+ u32 region_mmap_fd[VHOST_MEMORY_MAX_NREGIONS];
+
+ //Virtual rings
+ vhost_user_vring_t *vrings;
+
+ /*
+ * vrings is a dynamic array. It may have more elements than it is
+ * currently used. num_qid indicates the current total qid's in the
+ * vrings. For example, vec_len(vrings) = 64, num_qid = 60, so the
+ * current valid/used qid is (0, 59) in the vrings array.
+ */
+ u32 num_qid;
+
+ int virtio_net_hdr_sz;
+ int is_any_layout;
+
+ void *log_base_addr;
+ u64 log_size;
+
+ u8 enable_gso;
+
+ /* Packed ring configured */
+ u8 enable_packed;
+
+ u8 enable_event_idx;
+} vhost_user_intf_t;
+
+#define FOR_ALL_VHOST_TXQ(qid, vui) for (qid = 1; qid < vui->num_qid; qid += 2)
+
+#define FOR_ALL_VHOST_RXQ(qid, vui) for (qid = 0; qid < vui->num_qid; qid += 2)
+
+#define FOR_ALL_VHOST_RX_TXQ(qid, vui) for (qid = 0; qid < vui->num_qid; qid++)
+
+typedef struct
+{
+ uword dst;
+ uword src;
+ u32 len;
+} vhost_copy_t;
+
+typedef struct
+{
+ u16 qid; /** The interface queue index (Not the virtio vring idx) */
+ u16 device_index; /** The device index */
+ u32 virtio_ring_flags; /** Runtime queue flags **/
+ u16 first_desc_len; /** Length of the first data descriptor **/
+ vnet_virtio_net_hdr_mrg_rxbuf_t hdr; /** Virtio header **/
+} vhost_trace_t;
+
+#define VHOST_USER_RX_BUFFERS_N (2 * VLIB_FRAME_SIZE + 2)
+#define VHOST_USER_COPY_ARRAY_N (4 * VLIB_FRAME_SIZE)
+
+typedef struct
+{
+ u32 rx_buffers_len;
+ u32 rx_buffers[VHOST_USER_RX_BUFFERS_N];
+
+ vnet_virtio_net_hdr_mrg_rxbuf_t tx_headers[VLIB_FRAME_SIZE];
+ vhost_copy_t copy[VHOST_USER_COPY_ARRAY_N];
+
+ /* This is here so it doesn't end-up
+ * using stack or registers. */
+ vhost_trace_t *current_trace;
+
+ u32 *to_next_list;
+ vlib_buffer_t **rx_buffers_pdesc;
+ u32 polling_q_count;
+} vhost_cpu_t;
+
+typedef struct
+{
+ mhash_t if_index_by_sock_name;
+ u32 mtu_bytes;
+ vhost_user_intf_t *vhost_user_interfaces;
+ u32 *show_dev_instance_by_real_dev_instance;
+ u32 coalesce_frames;
+ f64 coalesce_time;
+ int dont_dump_vhost_user_memory;
+
+ /** Per-CPU data for vhost-user */
+ vhost_cpu_t *cpus;
+
+ /** Pseudo random iterator */
+ u32 random;
+
+ /* The number of rx interface/queue pairs in interrupt mode */
+ u32 ifq_count;
+
+ /* logging */
+ vlib_log_class_t log_default;
+
+ /* gso interface count */
+ u32 gso_count;
+} vhost_user_main_t;
+
+typedef struct
+{
+ u8 if_name[64];
+ u32 sw_if_index;
+ u32 virtio_net_hdr_sz;
+ u64 features;
+ u8 is_server;
+ u8 sock_filename[256];
+ u32 num_regions;
+ int sock_errno;
+} vhost_user_intf_details_t;
+
+int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
+ vhost_user_intf_details_t ** out_vuids);
+void vhost_user_set_operation_mode (vhost_user_intf_t *vui,
+ vhost_user_vring_t *txvq);
+
+extern vlib_node_registration_t vhost_user_send_interrupt_node;
+extern vnet_device_class_t vhost_user_device_class;
+extern vlib_node_registration_t vhost_user_input_node;
+extern vhost_user_main_t vhost_user_main;
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/vhost/vhost_user_api.c b/src/plugins/vhost/vhost_user_api.c
new file mode 100644
index 00000000000..33447c556a8
--- /dev/null
+++ b/src/plugins/vhost/vhost_user_api.c
@@ -0,0 +1,357 @@
+/*
+ *------------------------------------------------------------------
+ * vhost-user_api.c - vhost-user api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vhost/vhost_user.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/ethernet_types_api.h>
+
+#include <vnet/format_fns.h>
+#include <vhost/vhost_user.api_enum.h>
+#include <vhost/vhost_user.api_types.h>
+
+#define REPLY_MSG_ID_BASE msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static u16 msg_id_base;
+
+static void
+vl_api_create_vhost_user_if_t_handler (vl_api_create_vhost_user_if_t * mp)
+{
+ int rv = 0;
+ vl_api_create_vhost_user_if_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ u64 disabled_features = (u64) (0ULL);
+ vhost_user_create_if_args_t args = { 0 };
+
+ args.sw_if_index = (u32) ~ 0;
+ args.feature_mask = (u64) ~ (0ULL);
+ if (mp->disable_mrg_rxbuf)
+ disabled_features = VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF);
+
+ if (mp->disable_indirect_desc)
+ disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC);
+
+ /*
+ * GSO and PACKED are not supported by feature mask via binary API. We
+ * disable GSO and PACKED feature in the feature mask. They may be enabled
+ * explicitly via enable_gso and enable_packed argument
+ */
+ disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS |
+ VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
+
+ /* EVENT_IDX is disabled by default */
+ disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
+ args.feature_mask &= ~disabled_features;
+
+ if (mp->use_custom_mac)
+ mac_address_decode (mp->mac_address, (mac_address_t *) args.hwaddr);
+
+ args.use_custom_mac = mp->use_custom_mac;
+ args.is_server = mp->is_server;
+ args.sock_filename = (char *) mp->sock_filename;
+ args.renumber = mp->renumber;
+ args.custom_dev_instance = ntohl (mp->custom_dev_instance);
+ args.enable_gso = mp->enable_gso;
+ args.enable_packed = mp->enable_packed;
+ rv = vhost_user_create_if (vnm, vm, &args);
+
+ /* Remember an interface tag for the new interface */
+ if (rv == 0)
+ {
+ /* If a tag was supplied... */
+ if (mp->tag[0])
+ {
+ /* Make sure it's a proper C-string */
+ mp->tag[ARRAY_LEN (mp->tag) - 1] = 0;
+ u8 *tag = format (0, "%s%c", mp->tag, 0);
+ vnet_set_sw_interface_tag (vnm, tag, args.sw_if_index);
+ }
+ }
+
+ REPLY_MACRO2(VL_API_CREATE_VHOST_USER_IF_REPLY,
+ ({
+ rmp->sw_if_index = ntohl (args.sw_if_index);
+ }));
+}
+
+static void
+vl_api_modify_vhost_user_if_t_handler (vl_api_modify_vhost_user_if_t * mp)
+{
+ int rv = 0;
+ vl_api_modify_vhost_user_if_reply_t *rmp;
+ u64 disabled_features = (u64) (0ULL);
+ vhost_user_create_if_args_t args = { 0 };
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+
+ args.feature_mask = (u64) ~ (0ULL);
+ /*
+ * GSO and PACKED are not supported by feature mask via binary API. We
+ * disable GSO and PACKED feature in the feature mask. They may be enabled
+ * explicitly via enable_gso and enable_packed argument
+ */
+ disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS |
+ VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
+
+ /* EVENT_IDX is disabled by default */
+ disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
+ args.feature_mask &= ~disabled_features;
+
+ args.sw_if_index = ntohl (mp->sw_if_index);
+ args.sock_filename = (char *) mp->sock_filename;
+ args.is_server = mp->is_server;
+ args.renumber = mp->renumber;
+ args.custom_dev_instance = ntohl (mp->custom_dev_instance);
+ args.enable_gso = mp->enable_gso;
+ args.enable_packed = mp->enable_packed;
+ rv = vhost_user_modify_if (vnm, vm, &args);
+
+ REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_REPLY);
+}
+
+static void
+vl_api_create_vhost_user_if_v2_t_handler (vl_api_create_vhost_user_if_v2_t *
+ mp)
+{
+ int rv = 0;
+ vl_api_create_vhost_user_if_v2_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ u64 disabled_features = (u64) (0ULL);
+ vhost_user_create_if_args_t args = { 0 };
+
+ args.sw_if_index = (u32) ~ 0;
+ args.feature_mask = (u64) ~ (0ULL);
+ if (mp->disable_mrg_rxbuf)
+ disabled_features = VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF);
+
+ if (mp->disable_indirect_desc)
+ disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC);
+
+ /*
+ * GSO and PACKED are not supported by feature mask via binary API. We
+ * disable GSO and PACKED feature in the feature mask. They may be enabled
+ * explicitly via enable_gso and enable_packed argument
+ */
+ disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS |
+ VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
+
+ /* EVENT_IDX is disabled by default */
+ disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
+ args.feature_mask &= ~disabled_features;
+
+ if (mp->use_custom_mac)
+ mac_address_decode (mp->mac_address, (mac_address_t *) args.hwaddr);
+
+ args.use_custom_mac = mp->use_custom_mac;
+ args.is_server = mp->is_server;
+ args.sock_filename = (char *) mp->sock_filename;
+ args.renumber = mp->renumber;
+ args.custom_dev_instance = ntohl (mp->custom_dev_instance);
+ args.enable_gso = mp->enable_gso;
+ args.enable_packed = mp->enable_packed;
+ args.enable_event_idx = mp->enable_event_idx;
+ rv = vhost_user_create_if (vnm, vm, &args);
+
+ /* Remember an interface tag for the new interface */
+ if (rv == 0)
+ {
+ /* If a tag was supplied... */
+ if (mp->tag[0])
+ {
+ /* Make sure it's a proper C-string */
+ mp->tag[ARRAY_LEN (mp->tag) - 1] = 0;
+ u8 *tag = format (0, "%s%c", mp->tag, 0);
+ vnet_set_sw_interface_tag (vnm, tag, args.sw_if_index);
+ }
+ }
+
+ REPLY_MACRO2(VL_API_CREATE_VHOST_USER_IF_V2_REPLY,
+ ({
+ rmp->sw_if_index = ntohl (args.sw_if_index);
+ }));
+}
+
+static void
+vl_api_modify_vhost_user_if_v2_t_handler (vl_api_modify_vhost_user_if_v2_t *
+ mp)
+{
+ int rv = 0;
+ vl_api_modify_vhost_user_if_v2_reply_t *rmp;
+ u64 disabled_features = (u64) (0ULL);
+ vhost_user_create_if_args_t args = { 0 };
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+
+ args.feature_mask = (u64) ~ (0ULL);
+ /*
+ * GSO and PACKED are not supported by feature mask via binary API. We
+ * disable GSO and PACKED feature in the feature mask. They may be enabled
+ * explicitly via enable_gso and enable_packed argument
+ */
+ disabled_features |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS |
+ VIRTIO_FEATURE (VIRTIO_F_RING_PACKED);
+
+ /* EVENT_IDX is disabled by default */
+ disabled_features |= VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX);
+ args.feature_mask &= ~disabled_features;
+
+ args.sw_if_index = ntohl (mp->sw_if_index);
+ args.sock_filename = (char *) mp->sock_filename;
+ args.is_server = mp->is_server;
+ args.renumber = mp->renumber;
+ args.custom_dev_instance = ntohl (mp->custom_dev_instance);
+ args.enable_gso = mp->enable_gso;
+ args.enable_packed = mp->enable_packed;
+ args.enable_event_idx = mp->enable_event_idx;
+ rv = vhost_user_modify_if (vnm, vm, &args);
+
+ REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_V2_REPLY);
+}
+
+static void
+vl_api_delete_vhost_user_if_t_handler (vl_api_delete_vhost_user_if_t * mp)
+{
+ int rv = 0;
+ vl_api_delete_vhost_user_if_reply_t *rmp;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ vl_api_registration_t *reg;
+
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+
+ rv = vhost_user_delete_if (vnm, vm, sw_if_index);
+
+ REPLY_MACRO (VL_API_DELETE_VHOST_USER_IF_REPLY);
+ if (!rv)
+ {
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ vnet_clear_sw_interface_tag (vnm, sw_if_index);
+ }
+}
+
+static void
+vhost_user_features_encode (u64 features, u32 *first, u32 *last)
+{
+ *first = clib_net_to_host_u32 (features);
+ *last = clib_net_to_host_u32 (features >> 32);
+}
+
+static void
+send_sw_interface_vhost_user_details (vpe_api_main_t * am,
+ vl_api_registration_t * reg,
+ vhost_user_intf_details_t * vui,
+ u32 context)
+{
+ vl_api_sw_interface_vhost_user_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ clib_memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id =
+ ntohs (REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_VHOST_USER_DETAILS);
+ mp->sw_if_index = ntohl (vui->sw_if_index);
+ mp->virtio_net_hdr_sz = ntohl (vui->virtio_net_hdr_sz);
+ vhost_user_features_encode (vui->features, (u32 *) &mp->features_first_32,
+ (u32 *) &mp->features_last_32);
+ mp->is_server = vui->is_server;
+ mp->num_regions = ntohl (vui->num_regions);
+ mp->sock_errno = ntohl (vui->sock_errno);
+ mp->context = context;
+
+ strncpy ((char *) mp->sock_filename,
+ (char *) vui->sock_filename, ARRAY_LEN (mp->sock_filename) - 1);
+ strncpy ((char *) mp->interface_name,
+ (char *) vui->if_name, ARRAY_LEN (mp->interface_name) - 1);
+
+ vl_api_send_msg (reg, (u8 *) mp);
+}
+
+static void
+ vl_api_sw_interface_vhost_user_dump_t_handler
+ (vl_api_sw_interface_vhost_user_dump_t * mp)
+{
+ int rv = 0;
+ vpe_api_main_t *am = &vpe_api_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ vhost_user_intf_details_t *ifaces = NULL;
+ vhost_user_intf_details_t *vuid = NULL;
+ vl_api_registration_t *reg;
+ u32 filter_sw_if_index;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ filter_sw_if_index = htonl (mp->sw_if_index);
+ if (filter_sw_if_index != ~0)
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = vhost_user_dump_ifs (vnm, vm, &ifaces);
+ if (rv)
+ return;
+
+ vec_foreach (vuid, ifaces)
+ {
+ if ((filter_sw_if_index == ~0) ||
+ (vuid->sw_if_index == filter_sw_if_index))
+ send_sw_interface_vhost_user_details (am, reg, vuid, mp->context);
+ }
+ BAD_SW_IF_INDEX_LABEL;
+ vec_free (ifaces);
+}
+
+#include <vhost/vhost_user.api.c>
+static clib_error_t *
+vhost_user_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = vlibapi_get_main ();
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ REPLY_MSG_ID_BASE = setup_message_id_table ();
+
+ /* Mark CREATE_VHOST_USER_IF as mp safe */
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_CREATE_VHOST_USER_IF, 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_CREATE_VHOST_USER_IF_V2, 1);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (vhost_user_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/vhost/vhost_user_inline.h b/src/plugins/vhost/vhost_user_inline.h
new file mode 100644
index 00000000000..e27f819e96d
--- /dev/null
+++ b/src/plugins/vhost/vhost_user_inline.h
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __VIRTIO_VHOST_USER_INLINE_H__
+#define __VIRTIO_VHOST_USER_INLINE_H__
+/* vhost-user inline functions */
+#include <vppinfra/elog.h>
+
+static_always_inline void *
+map_guest_mem (vhost_user_intf_t * vui, uword addr, u32 * hint)
+{
+ int i = *hint;
+ if (PREDICT_TRUE ((vui->regions[i].guest_phys_addr <= addr) &&
+ ((vui->regions[i].guest_phys_addr +
+ vui->regions[i].memory_size) > addr)))
+ {
+ return (void *) (vui->region_mmap_addr[i] + addr -
+ vui->regions[i].guest_phys_addr);
+ }
+#if __SSE4_2__
+ __m128i rl, rh, al, ah, r;
+ al = _mm_set1_epi64x (addr + 1);
+ ah = _mm_set1_epi64x (addr);
+
+ rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[0]);
+ rl = _mm_cmpgt_epi64 (al, rl);
+ rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[0]);
+ rh = _mm_cmpgt_epi64 (rh, ah);
+ r = _mm_and_si128 (rl, rh);
+
+ rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[2]);
+ rl = _mm_cmpgt_epi64 (al, rl);
+ rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[2]);
+ rh = _mm_cmpgt_epi64 (rh, ah);
+ r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x22);
+
+ rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[4]);
+ rl = _mm_cmpgt_epi64 (al, rl);
+ rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[4]);
+ rh = _mm_cmpgt_epi64 (rh, ah);
+ r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x44);
+
+ rl = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_lo[6]);
+ rl = _mm_cmpgt_epi64 (al, rl);
+ rh = _mm_loadu_si128 ((__m128i *) & vui->region_guest_addr_hi[6]);
+ rh = _mm_cmpgt_epi64 (rh, ah);
+ r = _mm_blend_epi16 (r, _mm_and_si128 (rl, rh), 0x88);
+
+ r = _mm_shuffle_epi8 (r, _mm_set_epi64x (0, 0x0e060c040a020800));
+ i = count_trailing_zeros (_mm_movemask_epi8 (r) |
+ (1 << VHOST_MEMORY_MAX_NREGIONS));
+
+ if (i < vui->nregions)
+ {
+ *hint = i;
+ return (void *) (vui->region_mmap_addr[i] + addr -
+ vui->regions[i].guest_phys_addr);
+ }
+#elif __aarch64__ && __ARM_NEON
+ uint64x2_t al, ah, rl, rh, r;
+ uint32_t u32 = 0;
+
+ al = vdupq_n_u64 (addr + 1);
+ ah = vdupq_n_u64 (addr);
+
+ /*First Iteration */
+ rl = vld1q_u64 (&vui->region_guest_addr_lo[0]);
+ rl = vcgtq_u64 (al, rl);
+ rh = vld1q_u64 (&vui->region_guest_addr_hi[0]);
+ rh = vcgtq_u64 (rh, ah);
+ r = vandq_u64 (rl, rh);
+ u32 |= (vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 1);
+
+ if (u32)
+ {
+ i = count_trailing_zeros (u32);
+ goto vhost_map_guest_mem_done;
+ }
+
+ /*Second Iteration */
+ rl = vld1q_u64 (&vui->region_guest_addr_lo[2]);
+ rl = vcgtq_u64 (al, rl);
+ rh = vld1q_u64 (&vui->region_guest_addr_hi[2]);
+ rh = vcgtq_u64 (rh, ah);
+ r = vandq_u64 (rl, rh);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 2);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 3);
+
+ if (u32)
+ {
+ i = count_trailing_zeros (u32);
+ goto vhost_map_guest_mem_done;
+ }
+
+ /*Third Iteration */
+ rl = vld1q_u64 (&vui->region_guest_addr_lo[4]);
+ rl = vcgtq_u64 (al, rl);
+ rh = vld1q_u64 (&vui->region_guest_addr_hi[4]);
+ rh = vcgtq_u64 (rh, ah);
+ r = vandq_u64 (rl, rh);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 0) & 0x1) << 6);
+ u32 |= ((vgetq_lane_u8 (vreinterpretq_u8_u64 (r), 8) & 0x1) << 7);
+
+ i = count_trailing_zeros (u32 | (1 << VHOST_MEMORY_MAX_NREGIONS));
+
+vhost_map_guest_mem_done:
+ if (i < vui->nregions)
+ {
+ *hint = i;
+ return (void *) (vui->region_mmap_addr[i] + addr -
+ vui->regions[i].guest_phys_addr);
+ }
+#else
+ for (i = 0; i < vui->nregions; i++)
+ {
+ if ((vui->regions[i].guest_phys_addr <= addr) &&
+ ((vui->regions[i].guest_phys_addr + vui->regions[i].memory_size) >
+ addr))
+ {
+ *hint = i;
+ return (void *) (vui->region_mmap_addr[i] + addr -
+ vui->regions[i].guest_phys_addr);
+ }
+ }
+#endif
+ ELOG_TYPE_DECLARE (el) =
+ {
+ .format = "failed to map guest mem addr %lx",
+ .format_args = "i8",
+ };
+ struct
+ {
+ uword addr;
+ } *ed;
+ ed = ELOG_DATA (&vlib_global_main.elog_main, el);
+ ed->addr = addr;
+ *hint = 0;
+ return 0;
+}
+
+static_always_inline void *
+map_user_mem (vhost_user_intf_t * vui, uword addr)
+{
+ int i;
+ for (i = 0; i < vui->nregions; i++)
+ {
+ if ((vui->regions[i].userspace_addr <= addr) &&
+ ((vui->regions[i].userspace_addr + vui->regions[i].memory_size) >
+ addr))
+ {
+ return (void *) (vui->region_mmap_addr[i] + addr -
+ vui->regions[i].userspace_addr);
+ }
+ }
+ return 0;
+}
+
+#define VHOST_LOG_PAGE 0x1000
+
+static_always_inline void
+vhost_user_log_dirty_pages_2 (vhost_user_intf_t * vui,
+ u64 addr, u64 len, u8 is_host_address)
+{
+ if (PREDICT_TRUE (vui->log_base_addr == 0
+ || !(vui->features & VIRTIO_FEATURE (VHOST_F_LOG_ALL))))
+ {
+ return;
+ }
+ if (is_host_address)
+ {
+ addr = pointer_to_uword (map_user_mem (vui, (uword) addr));
+ }
+ if (PREDICT_FALSE ((addr + len - 1) / VHOST_LOG_PAGE / 8 >= vui->log_size))
+ {
+ vu_log_debug (vui, "vhost_user_log_dirty_pages(): out of range\n");
+ return;
+ }
+
+ CLIB_MEMORY_BARRIER ();
+ u64 page = addr / VHOST_LOG_PAGE;
+ while (page * VHOST_LOG_PAGE < addr + len)
+ {
+ ((u8 *) vui->log_base_addr)[page / 8] |= 1 << page % 8;
+ page++;
+ }
+}
+
+#define vhost_user_log_dirty_ring(vui, vq, member) \
+ if (PREDICT_FALSE (vq->log_used)) \
+ { \
+ vhost_user_log_dirty_pages_2 ( \
+ vui, \
+ vq->log_guest_addr + \
+ STRUCT_OFFSET_OF (vnet_virtio_vring_used_t, member), \
+ sizeof (vq->used->member), 0); \
+ }
+
+static_always_inline u8 *
+format_vhost_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main ();
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_trace_t *t = va_arg (*va, vhost_trace_t *);
+ vhost_user_intf_t *vui = vum->vhost_user_interfaces + t->device_index;
+ vnet_sw_interface_t *sw;
+ u32 indent;
+
+ if (pool_is_free (vum->vhost_user_interfaces, vui))
+ {
+ s = format (s, "vhost-user interface is deleted");
+ return s;
+ }
+ sw = vnet_get_sw_interface (vnm, vui->sw_if_index);
+ indent = format_get_indent (s);
+ s = format (s, "%U %U queue %d\n", format_white_space, indent,
+ format_vnet_sw_interface_name, vnm, sw, t->qid);
+
+ s = format (s, "%U virtio flags:\n", format_white_space, indent);
+#define _(n,i,st) \
+ if (t->virtio_ring_flags & (1 << VIRTIO_TRACE_F_##n)) \
+ s = format (s, "%U %s %s\n", format_white_space, indent, #n, st);
+ foreach_virtio_trace_flags
+#undef _
+ s = format (s, "%U virtio_net_hdr first_desc_len %u\n",
+ format_white_space, indent, t->first_desc_len);
+
+ s = format (s, "%U flags 0x%02x gso_type %u\n",
+ format_white_space, indent,
+ t->hdr.hdr.flags, t->hdr.hdr.gso_type);
+
+ if (vui->virtio_net_hdr_sz == 12)
+ s = format (s, "%U num_buff %u",
+ format_white_space, indent, t->hdr.num_buffers);
+
+ return s;
+}
+
+static_always_inline u64
+vhost_user_is_packed_ring_supported (vhost_user_intf_t * vui)
+{
+ return (vui->features & VIRTIO_FEATURE (VIRTIO_F_RING_PACKED));
+}
+
+static_always_inline u64
+vhost_user_is_event_idx_supported (vhost_user_intf_t * vui)
+{
+ return (vui->features & VIRTIO_FEATURE (VIRTIO_RING_F_EVENT_IDX));
+}
+
+static_always_inline void
+vhost_user_kick (vlib_main_t * vm, vhost_user_vring_t * vq)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ u64 x = 1;
+ int fd = UNIX_GET_FD (vq->callfd_idx);
+ int rv;
+
+ rv = write (fd, &x, sizeof (x));
+ if (PREDICT_FALSE (rv <= 0))
+ {
+ clib_unix_warning
+ ("Error: Could not write to unix socket for callfd %d", fd);
+ return;
+ }
+
+ vq->n_since_last_int = 0;
+ vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
+}
+
+static_always_inline u16
+vhost_user_avail_event_idx (vhost_user_vring_t * vq)
+{
+ volatile u16 *event_idx = (u16 *) & (vq->used->ring[vq->qsz_mask + 1]);
+
+ return *event_idx;
+}
+
+static_always_inline u16
+vhost_user_used_event_idx (vhost_user_vring_t * vq)
+{
+ volatile u16 *event_idx = (u16 *) & (vq->avail->ring[vq->qsz_mask + 1]);
+
+ return *event_idx;
+}
+
+static_always_inline u16
+vhost_user_need_event (u16 event_idx, u16 new_idx, u16 old_idx)
+{
+ return ((u16) (new_idx - event_idx - 1) < (u16) (new_idx - old_idx));
+}
+
+static_always_inline void
+vhost_user_send_call_event_idx (vlib_main_t * vm, vhost_user_vring_t * vq)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ u8 first_kick = vq->first_kick;
+ u16 event_idx = vhost_user_used_event_idx (vq);
+
+ vq->first_kick = 1;
+ if (vhost_user_need_event (event_idx, vq->last_used_idx, vq->last_kick) ||
+ PREDICT_FALSE (!first_kick))
+ {
+ vhost_user_kick (vm, vq);
+ vq->last_kick = event_idx;
+ }
+ else
+ {
+ vq->n_since_last_int = 0;
+ vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
+ }
+}
+
+static_always_inline void
+vhost_user_send_call_event_idx_packed (vlib_main_t * vm,
+ vhost_user_vring_t * vq)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ u8 first_kick = vq->first_kick;
+ u16 off_wrap;
+ u16 event_idx;
+ u16 new_idx = vq->last_used_idx;
+ u16 old_idx = vq->last_kick;
+
+ if (PREDICT_TRUE (vq->avail_event->flags == VRING_EVENT_F_DESC))
+ {
+ CLIB_COMPILER_BARRIER ();
+ off_wrap = vq->avail_event->off_wrap;
+ event_idx = off_wrap & 0x7fff;
+ if (vq->used_wrap_counter != (off_wrap >> 15))
+ event_idx -= (vq->qsz_mask + 1);
+
+ if (new_idx <= old_idx)
+ old_idx -= (vq->qsz_mask + 1);
+
+ vq->first_kick = 1;
+ vq->last_kick = event_idx;
+ if (vhost_user_need_event (event_idx, new_idx, old_idx) ||
+ PREDICT_FALSE (!first_kick))
+ vhost_user_kick (vm, vq);
+ else
+ {
+ vq->n_since_last_int = 0;
+ vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
+ }
+ }
+ else
+ vhost_user_kick (vm, vq);
+}
+
+static_always_inline void
+vhost_user_send_call (vlib_main_t * vm, vhost_user_intf_t * vui,
+ vhost_user_vring_t * vq)
+{
+ if (vhost_user_is_event_idx_supported (vui))
+ {
+ if (vhost_user_is_packed_ring_supported (vui))
+ vhost_user_send_call_event_idx_packed (vm, vq);
+ else
+ vhost_user_send_call_event_idx (vm, vq);
+ }
+ else
+ vhost_user_kick (vm, vq);
+}
+
+static_always_inline u8
+vui_is_link_up (vhost_user_intf_t * vui)
+{
+ return vui->admin_up && vui->is_ready;
+}
+
+static_always_inline void
+vhost_user_update_gso_interface_count (vhost_user_intf_t * vui, u8 add)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+
+ if (vui->enable_gso)
+ {
+ if (add)
+ {
+ vum->gso_count++;
+ }
+ else
+ {
+ ASSERT (vum->gso_count > 0);
+ vum->gso_count--;
+ }
+ }
+}
+
+static_always_inline u8
+vhost_user_packed_desc_available (vhost_user_vring_t * vring, u16 idx)
+{
+ return (((vring->packed_desc[idx].flags & VRING_DESC_F_AVAIL) ==
+ vring->avail_wrap_counter));
+}
+
+static_always_inline void
+vhost_user_advance_last_avail_idx (vhost_user_vring_t * vring)
+{
+ vring->last_avail_idx++;
+ if (PREDICT_FALSE ((vring->last_avail_idx & vring->qsz_mask) == 0))
+ {
+ vring->avail_wrap_counter ^= VRING_DESC_F_AVAIL;
+ vring->last_avail_idx = 0;
+ }
+}
+
+static_always_inline void
+vhost_user_advance_last_avail_table_idx (vhost_user_intf_t * vui,
+ vhost_user_vring_t * vring,
+ u8 chained)
+{
+ if (chained)
+ {
+ vnet_virtio_vring_packed_desc_t *desc_table = vring->packed_desc;
+
+ /* pick up the slot of the next avail idx */
+ while (desc_table[vring->last_avail_idx & vring->qsz_mask].flags &
+ VRING_DESC_F_NEXT)
+ vhost_user_advance_last_avail_idx (vring);
+ }
+
+ vhost_user_advance_last_avail_idx (vring);
+}
+
+static_always_inline void
+vhost_user_undo_advanced_last_avail_idx (vhost_user_vring_t * vring)
+{
+ if (PREDICT_FALSE ((vring->last_avail_idx & vring->qsz_mask) == 0))
+ vring->avail_wrap_counter ^= VRING_DESC_F_AVAIL;
+
+ if (PREDICT_FALSE (vring->last_avail_idx == 0))
+ vring->last_avail_idx = vring->qsz_mask;
+ else
+ vring->last_avail_idx--;
+}
+
+static_always_inline void
+vhost_user_dequeue_descs (vhost_user_vring_t *rxvq,
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr,
+ u16 *n_descs_processed)
+{
+ u16 i;
+
+ *n_descs_processed -= (hdr->num_buffers - 1);
+ for (i = 0; i < hdr->num_buffers - 1; i++)
+ vhost_user_undo_advanced_last_avail_idx (rxvq);
+}
+
+static_always_inline void
+vhost_user_dequeue_chained_descs (vhost_user_vring_t * rxvq,
+ u16 * n_descs_processed)
+{
+ while (*n_descs_processed)
+ {
+ vhost_user_undo_advanced_last_avail_idx (rxvq);
+ (*n_descs_processed)--;
+ }
+}
+
+static_always_inline void
+vhost_user_advance_last_used_idx (vhost_user_vring_t * vring)
+{
+ vring->last_used_idx++;
+ if (PREDICT_FALSE ((vring->last_used_idx & vring->qsz_mask) == 0))
+ {
+ vring->used_wrap_counter ^= 1;
+ vring->last_used_idx = 0;
+ }
+}
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/vhost/vhost_user_input.c b/src/plugins/vhost/vhost_user_input.c
new file mode 100644
index 00000000000..ca5072485ff
--- /dev/null
+++ b/src/plugins/vhost/vhost_user_input.c
@@ -0,0 +1,1472 @@
+/*
+ *------------------------------------------------------------------
+ * vhost-user-input
+ *
+ * Copyright (c) 2014-2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <fcntl.h> /* for open */
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h> /* for iovec */
+#include <netinet/in.h>
+#include <sys/vfs.h>
+
+#include <linux/if_arp.h>
+#include <linux/if_tun.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/devices.h>
+#include <vnet/feature/feature.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/interface/rx_queue_funcs.h>
+
+#include <vhost/vhost_user.h>
+#include <vhost/vhost_user_inline.h>
+
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+/*
+ * When an RX queue is down but active, received packets
+ * must be discarded. This value controls up to how many
+ * packets will be discarded during each round.
+ */
+#define VHOST_USER_DOWN_DISCARD_COUNT 256
+
+/*
+ * When the number of available buffers gets under this threshold,
+ * RX node will start discarding packets.
+ */
+#define VHOST_USER_RX_BUFFER_STARVATION 32
+
+/*
+ * On the receive side, the host should free descriptors as soon
+ * as possible in order to avoid TX drop in the VM.
+ * This value controls the number of copy operations that are stacked
+ * before copy is done for all and descriptors are given back to
+ * the guest.
+ * The value 64 was obtained by testing (48 and 128 were not as good).
+ */
+#define VHOST_USER_RX_COPY_THRESHOLD 64
+
+extern vlib_node_registration_t vhost_user_input_node;
+
+#define foreach_vhost_user_input_func_error \
+ _(NO_ERROR, "no error") \
+ _(NO_BUFFER, "no available buffer") \
+ _(MMAP_FAIL, "mmap failure") \
+ _(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \
+ _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \
+ _(NOT_READY, "vhost interface not ready or down") \
+ _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)")
+
+typedef enum
+{
+#define _(f,s) VHOST_USER_INPUT_FUNC_ERROR_##f,
+ foreach_vhost_user_input_func_error
+#undef _
+ VHOST_USER_INPUT_FUNC_N_ERROR,
+} vhost_user_input_func_error_t;
+
+static __clib_unused char *vhost_user_input_func_error_strings[] = {
+#define _(n,s) s,
+ foreach_vhost_user_input_func_error
+#undef _
+};
+
+static_always_inline void
+vhost_user_rx_trace (vhost_trace_t * t,
+ vhost_user_intf_t * vui, u16 qid,
+ vlib_buffer_t * b, vhost_user_vring_t * txvq,
+ u16 last_avail_idx)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ u32 desc_current = txvq->avail->ring[last_avail_idx & txvq->qsz_mask];
+ vnet_virtio_vring_desc_t *hdr_desc = 0;
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
+ u32 hint = 0;
+
+ clib_memset (t, 0, sizeof (*t));
+ t->device_index = vui - vum->vhost_user_interfaces;
+ t->qid = qid;
+
+ hdr_desc = &txvq->desc[desc_current];
+ if (txvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
+ /* Header is the first here */
+ hdr_desc = map_guest_mem (vui, txvq->desc[desc_current].addr, &hint);
+ }
+ if (txvq->desc[desc_current].flags & VRING_DESC_F_NEXT)
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
+ }
+ if (!(txvq->desc[desc_current].flags & VRING_DESC_F_NEXT) &&
+ !(txvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT))
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
+ }
+
+ t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
+
+ if (!hdr_desc || !(hdr = map_guest_mem (vui, hdr_desc->addr, &hint)))
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_MAP_ERROR;
+ }
+ else
+ {
+ u32 len = vui->virtio_net_hdr_sz;
+ memcpy (&t->hdr, hdr, len > hdr_desc->len ? hdr_desc->len : len);
+ }
+}
+
+static_always_inline u32
+vhost_user_input_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
+ u16 copy_len, u32 * map_hint)
+{
+ void *src0, *src1, *src2, *src3;
+ if (PREDICT_TRUE (copy_len >= 4))
+ {
+ if (PREDICT_FALSE (!(src2 = map_guest_mem (vui, cpy[0].src, map_hint))))
+ return 1;
+ if (PREDICT_FALSE (!(src3 = map_guest_mem (vui, cpy[1].src, map_hint))))
+ return 1;
+
+ while (PREDICT_TRUE (copy_len >= 4))
+ {
+ src0 = src2;
+ src1 = src3;
+
+ if (PREDICT_FALSE
+ (!(src2 = map_guest_mem (vui, cpy[2].src, map_hint))))
+ return 1;
+ if (PREDICT_FALSE
+ (!(src3 = map_guest_mem (vui, cpy[3].src, map_hint))))
+ return 1;
+
+ clib_prefetch_load (src2);
+ clib_prefetch_load (src3);
+
+ clib_memcpy_fast ((void *) cpy[0].dst, src0, cpy[0].len);
+ clib_memcpy_fast ((void *) cpy[1].dst, src1, cpy[1].len);
+ copy_len -= 2;
+ cpy += 2;
+ }
+ }
+ while (copy_len)
+ {
+ if (PREDICT_FALSE (!(src0 = map_guest_mem (vui, cpy->src, map_hint))))
+ return 1;
+ clib_memcpy_fast ((void *) cpy->dst, src0, cpy->len);
+ copy_len -= 1;
+ cpy += 1;
+ }
+ return 0;
+}
+
+/**
+ * Try to discard packets from the tx ring (VPP RX path).
+ * Returns the number of discarded packets.
+ */
+static_always_inline u32
+vhost_user_rx_discard_packet (vlib_main_t * vm,
+ vhost_user_intf_t * vui,
+ vhost_user_vring_t * txvq, u32 discard_max)
+{
+ /*
+ * On the RX side, each packet corresponds to one descriptor
+ * (it is the same whether it is a shallow descriptor, chained, or indirect).
+ * Therefore, discarding a packet is like discarding a descriptor.
+ */
+ u32 discarded_packets = 0;
+ u32 avail_idx = txvq->avail->idx;
+ u16 mask = txvq->qsz_mask;
+ u16 last_avail_idx = txvq->last_avail_idx;
+ u16 last_used_idx = txvq->last_used_idx;
+ while (discarded_packets != discard_max)
+ {
+ if (avail_idx == last_avail_idx)
+ goto out;
+
+ u16 desc_chain_head = txvq->avail->ring[last_avail_idx & mask];
+ last_avail_idx++;
+ txvq->used->ring[last_used_idx & mask].id = desc_chain_head;
+ txvq->used->ring[last_used_idx & mask].len = 0;
+ vhost_user_log_dirty_ring (vui, txvq, ring[last_used_idx & mask]);
+ last_used_idx++;
+ discarded_packets++;
+ }
+
+out:
+ txvq->last_avail_idx = last_avail_idx;
+ txvq->last_used_idx = last_used_idx;
+ CLIB_MEMORY_STORE_BARRIER ();
+ txvq->used->idx = txvq->last_used_idx;
+ vhost_user_log_dirty_ring (vui, txvq, idx);
+ return discarded_packets;
+}
+
+/*
+ * In case of overflow, we need to rewind the array of allocated buffers.
+ */
+static_always_inline void
+vhost_user_input_rewind_buffers (vlib_main_t * vm,
+ vhost_cpu_t * cpu, vlib_buffer_t * b_head)
+{
+ u32 bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
+ vlib_buffer_t *b_current = vlib_get_buffer (vm, bi_current);
+ b_current->current_length = 0;
+ b_current->flags = 0;
+ while (b_current != b_head)
+ {
+ cpu->rx_buffers_len++;
+ bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
+ b_current = vlib_get_buffer (vm, bi_current);
+ b_current->current_length = 0;
+ b_current->flags = 0;
+ }
+ cpu->rx_buffers_len++;
+}
+
+static_always_inline void
+vhost_user_handle_rx_offload (vlib_buffer_t *b0, u8 *b0_data,
+ vnet_virtio_net_hdr_t *hdr)
+{
+ u8 l4_hdr_sz = 0;
+ u8 l4_proto = 0;
+ ethernet_header_t *eh = (ethernet_header_t *) b0_data;
+ u16 ethertype = clib_net_to_host_u16 (eh->type);
+ u16 l2hdr_sz = sizeof (ethernet_header_t);
+ vnet_buffer_oflags_t oflags = 0;
+
+ if (ethernet_frame_is_tagged (ethertype))
+ {
+ ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eh + 1);
+
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ l2hdr_sz += sizeof (*vlan);
+ if (ethertype == ETHERNET_TYPE_VLAN)
+ {
+ vlan++;
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ l2hdr_sz += sizeof (*vlan);
+ }
+ }
+ vnet_buffer (b0)->l2_hdr_offset = 0;
+ vnet_buffer (b0)->l3_hdr_offset = l2hdr_sz;
+ vnet_buffer (b0)->l4_hdr_offset = hdr->csum_start;
+ b0->flags |= (VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+
+ if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4))
+ {
+ ip4_header_t *ip4 = (ip4_header_t *) (b0_data + l2hdr_sz);
+ l4_proto = ip4->protocol;
+ b0->flags |= VNET_BUFFER_F_IS_IP4;
+ oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
+ }
+ else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
+ {
+ ip6_header_t *ip6 = (ip6_header_t *) (b0_data + l2hdr_sz);
+ l4_proto = ip6->protocol;
+ b0->flags |= VNET_BUFFER_F_IS_IP6;
+ }
+
+ if (l4_proto == IP_PROTOCOL_TCP)
+ {
+ tcp_header_t *tcp = (tcp_header_t *)
+ (b0_data + vnet_buffer (b0)->l4_hdr_offset);
+ l4_hdr_sz = tcp_header_bytes (tcp);
+ oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
+ }
+ else if (l4_proto == IP_PROTOCOL_UDP)
+ {
+ l4_hdr_sz = sizeof (udp_header_t);
+ oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
+ }
+
+ if (hdr->gso_type == VIRTIO_NET_HDR_GSO_UDP)
+ {
+ vnet_buffer2 (b0)->gso_size = hdr->gso_size;
+ vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
+ b0->flags |= VNET_BUFFER_F_GSO;
+ }
+ else if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4)
+ {
+ vnet_buffer2 (b0)->gso_size = hdr->gso_size;
+ vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
+ b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP4);
+ }
+ else if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV6)
+ {
+ vnet_buffer2 (b0)->gso_size = hdr->gso_size;
+ vnet_buffer2 (b0)->gso_l4_hdr_sz = l4_hdr_sz;
+ b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6);
+ }
+
+ if (oflags)
+ vnet_buffer_offload_flags_set (b0, oflags);
+}
+
+static_always_inline void
+vhost_user_input_do_interrupt (vlib_main_t * vm, vhost_user_intf_t * vui,
+ vhost_user_vring_t * txvq,
+ vhost_user_vring_t * rxvq)
+{
+ f64 now = vlib_time_now (vm);
+
+ if ((txvq->n_since_last_int) && (txvq->int_deadline < now))
+ vhost_user_send_call (vm, vui, txvq);
+
+ if ((rxvq->n_since_last_int) && (rxvq->int_deadline < now))
+ vhost_user_send_call (vm, vui, rxvq);
+}
+
+static_always_inline void
+vhost_user_input_setup_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vhost_user_intf_t * vui,
+ u32 * current_config_index, u32 * next_index,
+ u32 ** to_next, u32 * n_left_to_next)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ u8 feature_arc_idx = fm->device_input_feature_arc_index;
+
+ if (PREDICT_FALSE (vnet_have_features (feature_arc_idx, vui->sw_if_index)))
+ {
+ vnet_feature_config_main_t *cm;
+ cm = &fm->feature_config_mains[feature_arc_idx];
+ *current_config_index = vec_elt (cm->config_index_by_sw_if_index,
+ vui->sw_if_index);
+ vnet_get_config_data (&cm->config_main, current_config_index,
+ next_index, 0);
+ }
+
+ vlib_get_new_next_frame (vm, node, *next_index, *to_next, *n_left_to_next);
+
+ if (*next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT)
+ {
+ /* give some hints to ethernet-input */
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ ethernet_input_frame_t *ef;
+ nf = vlib_node_runtime_get_next_frame (vm, node, *next_index);
+ f = vlib_get_frame (vm, nf->frame);
+ f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+ ef = vlib_frame_scalar_args (f);
+ ef->sw_if_index = vui->sw_if_index;
+ ef->hw_if_index = vui->hw_if_index;
+ vlib_frame_no_append (f);
+ }
+}
+
+static_always_inline u32
+vhost_user_if_input (vlib_main_t *vm, vhost_user_main_t *vum,
+ vhost_user_intf_t *vui, u16 qid,
+ vlib_node_runtime_t *node, u8 enable_csum)
+{
+ vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
+ vnet_feature_main_t *fm = &feature_main;
+ u16 n_rx_packets = 0;
+ u32 n_rx_bytes = 0;
+ u16 n_left;
+ u32 n_left_to_next, *to_next;
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ u32 n_trace = vlib_get_trace_count (vm, node);
+ u32 buffer_data_size = vlib_buffer_get_default_data_size (vm);
+ u32 map_hint = 0;
+ vhost_cpu_t *cpu = &vum->cpus[vm->thread_index];
+ u16 copy_len = 0;
+ u8 feature_arc_idx = fm->device_input_feature_arc_index;
+ u32 current_config_index = ~(u32) 0;
+ u16 mask = txvq->qsz_mask;
+
+ /* The descriptor table is not ready yet */
+ if (PREDICT_FALSE (txvq->avail == 0))
+ goto done;
+
+ {
+ /* do we have pending interrupts ? */
+ vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
+ vhost_user_input_do_interrupt (vm, vui, txvq, rxvq);
+ }
+
+ /*
+ * For adaptive mode, it is optimized to reduce interrupts.
+ * If the scheduler switches the input node to polling due
+ * to burst of traffic, we tell the driver no interrupt.
+ * When the traffic subsides, the scheduler switches the node back to
+ * interrupt mode. We must tell the driver we want interrupt.
+ */
+ if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
+ {
+ if ((node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
+ !(node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
+ /* Tell driver we want notification */
+ txvq->used->flags = 0;
+ else
+ /* Tell driver we don't want notification */
+ txvq->used->flags = VRING_USED_F_NO_NOTIFY;
+ }
+
+ if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE))
+ goto done;
+
+ n_left = (u16) (txvq->avail->idx - txvq->last_avail_idx);
+
+ /* nothing to do */
+ if (PREDICT_FALSE (n_left == 0))
+ goto done;
+
+ if (PREDICT_FALSE (!vui->admin_up || !(txvq->enabled)))
+ {
+ /*
+ * Discard input packet if interface is admin down or vring is not
+ * enabled.
+ * "For example, for a networking device, in the disabled state
+ * client must not supply any new RX packets, but must process
+ * and discard any TX packets."
+ */
+ vhost_user_rx_discard_packet (vm, vui, txvq,
+ VHOST_USER_DOWN_DISCARD_COUNT);
+ goto done;
+ }
+
+ if (PREDICT_FALSE (n_left == (mask + 1)))
+ {
+ /*
+ * Informational error logging when VPP is not
+ * receiving packets fast enough.
+ */
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1);
+ }
+
+ if (n_left > VLIB_FRAME_SIZE)
+ n_left = VLIB_FRAME_SIZE;
+
+ /*
+ * For small packets (<2kB), we will not need more than one vlib buffer
+ * per packet. In case packets are bigger, we will just yield at some point
+ * in the loop and come back later. This is not an issue as for big packet,
+ * processing cost really comes from the memory copy.
+ * The assumption is that big packets will fit in 40 buffers.
+ */
+ if (PREDICT_FALSE (cpu->rx_buffers_len < n_left + 1 ||
+ cpu->rx_buffers_len < 40))
+ {
+ u32 curr_len = cpu->rx_buffers_len;
+ cpu->rx_buffers_len +=
+ vlib_buffer_alloc (vm, cpu->rx_buffers + curr_len,
+ VHOST_USER_RX_BUFFERS_N - curr_len);
+
+ if (PREDICT_FALSE
+ (cpu->rx_buffers_len < VHOST_USER_RX_BUFFER_STARVATION))
+ {
+ /* In case of buffer starvation, discard some packets from the queue
+ * and log the event.
+ * We keep doing best effort for the remaining packets. */
+ u32 flush = (n_left + 1 > cpu->rx_buffers_len) ?
+ n_left + 1 - cpu->rx_buffers_len : 1;
+ flush = vhost_user_rx_discard_packet (vm, vui, txvq, flush);
+
+ n_left -= flush;
+ vlib_increment_simple_counter (vnet_main.
+ interface_main.sw_if_counters +
+ VNET_INTERFACE_COUNTER_DROP,
+ vm->thread_index, vui->sw_if_index,
+ flush);
+
+ vlib_error_count (vm, vhost_user_input_node.index,
+ VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, flush);
+ }
+ }
+
+ vhost_user_input_setup_frame (vm, node, vui, &current_config_index,
+ &next_index, &to_next, &n_left_to_next);
+
+ u16 last_avail_idx = txvq->last_avail_idx;
+ u16 last_used_idx = txvq->last_used_idx;
+
+ while (n_left > 0)
+ {
+ vlib_buffer_t *b_head, *b_current;
+ u32 bi_current;
+ u16 desc_current;
+ u32 desc_data_offset;
+ vnet_virtio_vring_desc_t *desc_table = txvq->desc;
+
+ if (PREDICT_FALSE (cpu->rx_buffers_len <= 1))
+ {
+ /* Not enough rx_buffers
+ * Note: We yeld on 1 so we don't need to do an additional
+ * check for the next buffer prefetch.
+ */
+ n_left = 0;
+ break;
+ }
+
+ desc_current = txvq->avail->ring[last_avail_idx & mask];
+ cpu->rx_buffers_len--;
+ bi_current = cpu->rx_buffers[cpu->rx_buffers_len];
+ b_head = b_current = vlib_get_buffer (vm, bi_current);
+ to_next[0] = bi_current; //We do that now so we can forget about bi_current
+ to_next++;
+ n_left_to_next--;
+
+ vlib_prefetch_buffer_with_index
+ (vm, cpu->rx_buffers[cpu->rx_buffers_len - 1], LOAD);
+
+ /* Just preset the used descriptor id and length for later */
+ txvq->used->ring[last_used_idx & mask].id = desc_current;
+ txvq->used->ring[last_used_idx & mask].len = 0;
+ vhost_user_log_dirty_ring (vui, txvq, ring[last_used_idx & mask]);
+
+ /* The buffer should already be initialized */
+ b_head->total_length_not_including_first_buffer = 0;
+ b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ if (PREDICT_FALSE
+ (n_trace > 0 && vlib_trace_buffer (vm, node, next_index, b_head,
+ /* follow_chain */ 0)))
+ {
+ vhost_trace_t *t0 =
+ vlib_add_trace (vm, node, b_head, sizeof (t0[0]));
+ vhost_user_rx_trace (t0, vui, qid, b_head, txvq, last_avail_idx);
+ n_trace--;
+ vlib_set_trace_count (vm, node, n_trace);
+ }
+
+ /* This depends on the setup but is very consistent
+ * So I think the CPU branch predictor will make a pretty good job
+ * at optimizing the decision. */
+ if (txvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
+ {
+ desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr,
+ &map_hint);
+ desc_current = 0;
+ if (PREDICT_FALSE (desc_table == 0))
+ {
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
+ goto out;
+ }
+ }
+
+ desc_data_offset = vui->virtio_net_hdr_sz;
+
+ if (enable_csum)
+ {
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
+ u8 *b_data;
+ u16 current;
+
+ hdr = map_guest_mem (vui, desc_table[desc_current].addr, &map_hint);
+ if (PREDICT_FALSE (hdr == 0))
+ {
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
+ goto out;
+ }
+ if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
+ {
+ if ((desc_data_offset == desc_table[desc_current].len) &&
+ (desc_table[desc_current].flags & VRING_DESC_F_NEXT))
+ {
+ current = desc_table[desc_current].next;
+ b_data = map_guest_mem (vui, desc_table[current].addr,
+ &map_hint);
+ if (PREDICT_FALSE (b_data == 0))
+ {
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL,
+ 1);
+ goto out;
+ }
+ }
+ else
+ b_data = (u8 *) hdr + desc_data_offset;
+
+ vhost_user_handle_rx_offload (b_head, b_data, &hdr->hdr);
+ }
+ }
+
+ while (1)
+ {
+ /* Get more input if necessary. Or end of packet. */
+ if (desc_data_offset == desc_table[desc_current].len)
+ {
+ if (PREDICT_FALSE (desc_table[desc_current].flags &
+ VRING_DESC_F_NEXT))
+ {
+ desc_current = desc_table[desc_current].next;
+ desc_data_offset = 0;
+ }
+ else
+ {
+ goto out;
+ }
+ }
+
+ /* Get more output if necessary. Or end of packet. */
+ if (PREDICT_FALSE (b_current->current_length == buffer_data_size))
+ {
+ if (PREDICT_FALSE (cpu->rx_buffers_len == 0))
+ {
+ /* Cancel speculation */
+ to_next--;
+ n_left_to_next++;
+
+ /*
+ * Checking if there are some left buffers.
+ * If not, just rewind the used buffers and stop.
+ * Note: Scheduled copies are not cancelled. This is
+ * not an issue as they would still be valid. Useless,
+ * but valid.
+ */
+ vhost_user_input_rewind_buffers (vm, cpu, b_head);
+ n_left = 0;
+ goto stop;
+ }
+
+ /* Get next output */
+ cpu->rx_buffers_len--;
+ u32 bi_next = cpu->rx_buffers[cpu->rx_buffers_len];
+ b_current->next_buffer = bi_next;
+ b_current->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ bi_current = bi_next;
+ b_current = vlib_get_buffer (vm, bi_current);
+ }
+
+ /* Prepare a copy order executed later for the data */
+ ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
+ vhost_copy_t *cpy = &cpu->copy[copy_len];
+ copy_len++;
+ u32 desc_data_l = desc_table[desc_current].len - desc_data_offset;
+ cpy->len = buffer_data_size - b_current->current_length;
+ cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len;
+ cpy->dst = (uword) (vlib_buffer_get_current (b_current) +
+ b_current->current_length);
+ cpy->src = desc_table[desc_current].addr + desc_data_offset;
+
+ desc_data_offset += cpy->len;
+
+ b_current->current_length += cpy->len;
+ b_head->total_length_not_including_first_buffer += cpy->len;
+ }
+
+ out:
+
+ n_rx_bytes += b_head->total_length_not_including_first_buffer;
+ n_rx_packets++;
+
+ b_head->total_length_not_including_first_buffer -=
+ b_head->current_length;
+
+ /* consume the descriptor and return it as used */
+ last_avail_idx++;
+ last_used_idx++;
+
+ vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
+ vnet_buffer (b_head)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ b_head->error = 0;
+
+ if (current_config_index != ~(u32) 0)
+ {
+ b_head->current_config_index = current_config_index;
+ vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
+ }
+
+ n_left--;
+
+ /*
+ * Although separating memory copies from virtio ring parsing
+ * is beneficial, we can offer to perform the copies from time
+ * to time in order to free some space in the ring.
+ */
+ if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
+ {
+ if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy,
+ copy_len, &map_hint)))
+ {
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
+ }
+ copy_len = 0;
+
+ /* give buffers back to driver */
+ CLIB_MEMORY_STORE_BARRIER ();
+ txvq->used->idx = last_used_idx;
+ vhost_user_log_dirty_ring (vui, txvq, idx);
+ }
+ }
+stop:
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
+ txvq->last_used_idx = last_used_idx;
+ txvq->last_avail_idx = last_avail_idx;
+
+ /* Do the memory copies */
+ if (PREDICT_FALSE (vhost_user_input_copy (vui, cpu->copy, copy_len,
+ &map_hint)))
+ {
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
+ }
+
+ /* give buffers back to driver */
+ CLIB_MEMORY_STORE_BARRIER ();
+ txvq->used->idx = txvq->last_used_idx;
+ vhost_user_log_dirty_ring (vui, txvq, idx);
+
+ /* interrupt (call) handling */
+ if ((txvq->callfd_idx != ~0) &&
+ !(txvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
+ {
+ txvq->n_since_last_int += n_rx_packets;
+
+ if (txvq->n_since_last_int > vum->coalesce_frames)
+ vhost_user_send_call (vm, vui, txvq);
+ }
+
+ /* increase rx counters */
+ vlib_increment_combined_counter
+ (vnet_main.interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX, vm->thread_index, vui->sw_if_index,
+ n_rx_packets, n_rx_bytes);
+
+ vnet_device_increment_rx_packets (vm->thread_index, n_rx_packets);
+
+done:
+ return n_rx_packets;
+}
+
+static_always_inline void
+vhost_user_mark_desc_consumed (vhost_user_intf_t * vui,
+ vhost_user_vring_t * txvq, u16 desc_head,
+ u16 n_descs_processed)
+{
+ vnet_virtio_vring_packed_desc_t *desc_table = txvq->packed_desc;
+ u16 desc_idx;
+ u16 mask = txvq->qsz_mask;
+
+ for (desc_idx = 0; desc_idx < n_descs_processed; desc_idx++)
+ {
+ if (txvq->used_wrap_counter)
+ desc_table[(desc_head + desc_idx) & mask].flags |=
+ (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
+ else
+ desc_table[(desc_head + desc_idx) & mask].flags &=
+ ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
+ vhost_user_advance_last_used_idx (txvq);
+ }
+}
+
+static_always_inline void
+vhost_user_rx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
+ u16 qid, vhost_user_vring_t * txvq,
+ u16 desc_current)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ vnet_virtio_vring_packed_desc_t *hdr_desc;
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
+ u32 hint = 0;
+
+ clib_memset (t, 0, sizeof (*t));
+ t->device_index = vui - vum->vhost_user_interfaces;
+ t->qid = qid;
+
+ hdr_desc = &txvq->packed_desc[desc_current];
+ if (txvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT)
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
+ /* Header is the first here */
+ hdr_desc = map_guest_mem (vui, txvq->packed_desc[desc_current].addr,
+ &hint);
+ }
+ if (txvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT)
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
+
+ if (!(txvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) &&
+ !(txvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT))
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
+
+ t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
+
+ if (!hdr_desc || !(hdr = map_guest_mem (vui, hdr_desc->addr, &hint)))
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_MAP_ERROR;
+ else
+ {
+ u32 len = vui->virtio_net_hdr_sz;
+ clib_memcpy_fast (&t->hdr, hdr,
+ len > hdr_desc->len ? hdr_desc->len : len);
+ }
+}
+
+static_always_inline u32
+vhost_user_rx_discard_packet_packed (vlib_main_t * vm,
+ vhost_user_intf_t * vui,
+ vhost_user_vring_t * txvq,
+ u32 discard_max)
+{
+ u32 discarded_packets = 0;
+ u16 mask = txvq->qsz_mask;
+ u16 desc_current, desc_head;
+
+ desc_head = desc_current = txvq->last_used_idx & mask;
+
+ /*
+ * On the RX side, each packet corresponds to one descriptor
+ * (it is the same whether it is a shallow descriptor, chained, or indirect).
+ * Therefore, discarding a packet is like discarding a descriptor.
+ */
+ while ((discarded_packets != discard_max) &&
+ vhost_user_packed_desc_available (txvq, desc_current))
+ {
+ vhost_user_advance_last_avail_idx (txvq);
+ discarded_packets++;
+ desc_current = (desc_current + 1) & mask;
+ }
+
+ if (PREDICT_TRUE (discarded_packets))
+ vhost_user_mark_desc_consumed (vui, txvq, desc_head, discarded_packets);
+ return (discarded_packets);
+}
+
+static_always_inline u32
+vhost_user_input_copy_packed (vhost_user_intf_t * vui, vhost_copy_t * cpy,
+ u16 copy_len, u32 * map_hint)
+{
+ void *src0, *src1, *src2, *src3, *src4, *src5, *src6, *src7;
+ u8 bad;
+ u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
+
+ if (PREDICT_TRUE (copy_len >= 8))
+ {
+ src4 = map_guest_mem (vui, cpy[0].src, map_hint);
+ src5 = map_guest_mem (vui, cpy[1].src, map_hint);
+ src6 = map_guest_mem (vui, cpy[2].src, map_hint);
+ src7 = map_guest_mem (vui, cpy[3].src, map_hint);
+ bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0);
+ if (PREDICT_FALSE (bad))
+ goto one_by_one;
+ clib_prefetch_load (src4);
+ clib_prefetch_load (src5);
+ clib_prefetch_load (src6);
+ clib_prefetch_load (src7);
+
+ while (PREDICT_TRUE (copy_len >= 8))
+ {
+ src0 = src4;
+ src1 = src5;
+ src2 = src6;
+ src3 = src7;
+
+ src4 = map_guest_mem (vui, cpy[4].src, map_hint);
+ src5 = map_guest_mem (vui, cpy[5].src, map_hint);
+ src6 = map_guest_mem (vui, cpy[6].src, map_hint);
+ src7 = map_guest_mem (vui, cpy[7].src, map_hint);
+ bad = (src4 == 0) + (src5 == 0) + (src6 == 0) + (src7 == 0);
+ if (PREDICT_FALSE (bad))
+ break;
+
+ clib_prefetch_load (src4);
+ clib_prefetch_load (src5);
+ clib_prefetch_load (src6);
+ clib_prefetch_load (src7);
+
+ clib_memcpy_fast ((void *) cpy[0].dst, src0, cpy[0].len);
+ clib_memcpy_fast ((void *) cpy[1].dst, src1, cpy[1].len);
+ clib_memcpy_fast ((void *) cpy[2].dst, src2, cpy[2].len);
+ clib_memcpy_fast ((void *) cpy[3].dst, src3, cpy[3].len);
+ copy_len -= 4;
+ cpy += 4;
+ }
+ }
+
+one_by_one:
+ while (copy_len)
+ {
+ if (PREDICT_FALSE (!(src0 = map_guest_mem (vui, cpy->src, map_hint))))
+ {
+ rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
+ break;
+ }
+ clib_memcpy_fast ((void *) cpy->dst, src0, cpy->len);
+ copy_len -= 1;
+ cpy += 1;
+ }
+ return rc;
+}
+
+static_always_inline u32
+vhost_user_do_offload (vhost_user_intf_t *vui,
+ vnet_virtio_vring_packed_desc_t *desc_table,
+ u16 desc_current, u16 mask, vlib_buffer_t *b_head,
+ u32 *map_hint)
+{
+ u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
+ u8 *b_data;
+ u32 desc_data_offset = vui->virtio_net_hdr_sz;
+
+ hdr = map_guest_mem (vui, desc_table[desc_current].addr, map_hint);
+ if (PREDICT_FALSE (hdr == 0))
+ rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
+ else if (hdr->hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
+ {
+ if (desc_data_offset == desc_table[desc_current].len)
+ {
+ desc_current = (desc_current + 1) & mask;
+ b_data =
+ map_guest_mem (vui, desc_table[desc_current].addr, map_hint);
+ if (PREDICT_FALSE (b_data == 0))
+ rc = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
+ else
+ vhost_user_handle_rx_offload (b_head, b_data, &hdr->hdr);
+ }
+ else
+ {
+ b_data = (u8 *) hdr + desc_data_offset;
+ vhost_user_handle_rx_offload (b_head, b_data, &hdr->hdr);
+ }
+ }
+
+ return rc;
+}
+
+static_always_inline u32
+vhost_user_compute_buffers_required (u32 desc_len, u32 buffer_data_size)
+{
+ div_t result;
+ u32 buffers_required;
+
+ if (PREDICT_TRUE (buffer_data_size == 2048))
+ {
+ buffers_required = desc_len >> 11;
+ if ((desc_len & 2047) != 0)
+ buffers_required++;
+ return (buffers_required);
+ }
+
+ if (desc_len < buffer_data_size)
+ return 1;
+
+ result = div (desc_len, buffer_data_size);
+ if (result.rem)
+ buffers_required = result.quot + 1;
+ else
+ buffers_required = result.quot;
+
+ return (buffers_required);
+}
+
+static_always_inline u32
+vhost_user_compute_indirect_desc_len (vhost_user_intf_t * vui,
+ vhost_user_vring_t * txvq,
+ u32 buffer_data_size, u16 desc_current,
+ u32 * map_hint)
+{
+ vnet_virtio_vring_packed_desc_t *desc_table = txvq->packed_desc;
+ u32 desc_len = 0;
+ u16 desc_data_offset = vui->virtio_net_hdr_sz;
+ u16 desc_idx = desc_current;
+ u32 n_descs;
+
+ n_descs = desc_table[desc_idx].len >> 4;
+ desc_table = map_guest_mem (vui, desc_table[desc_idx].addr, map_hint);
+ if (PREDICT_FALSE (desc_table == 0))
+ return 0;
+
+ for (desc_idx = 0; desc_idx < n_descs; desc_idx++)
+ desc_len += desc_table[desc_idx].len;
+
+ if (PREDICT_TRUE (desc_len > desc_data_offset))
+ desc_len -= desc_data_offset;
+
+ return vhost_user_compute_buffers_required (desc_len, buffer_data_size);
+}
+
+static_always_inline u32
+vhost_user_compute_chained_desc_len (vhost_user_intf_t * vui,
+ vhost_user_vring_t * txvq,
+ u32 buffer_data_size, u16 * current,
+ u16 * n_left)
+{
+ vnet_virtio_vring_packed_desc_t *desc_table = txvq->packed_desc;
+ u32 desc_len = 0;
+ u16 mask = txvq->qsz_mask;
+
+ while (desc_table[*current].flags & VRING_DESC_F_NEXT)
+ {
+ desc_len += desc_table[*current].len;
+ (*n_left)++;
+ *current = (*current + 1) & mask;
+ vhost_user_advance_last_avail_idx (txvq);
+ }
+ desc_len += desc_table[*current].len;
+ (*n_left)++;
+ *current = (*current + 1) & mask;
+ vhost_user_advance_last_avail_idx (txvq);
+
+ if (PREDICT_TRUE (desc_len > vui->virtio_net_hdr_sz))
+ desc_len -= vui->virtio_net_hdr_sz;
+
+ return vhost_user_compute_buffers_required (desc_len, buffer_data_size);
+}
+
+static_always_inline void
+vhost_user_assemble_packet (vnet_virtio_vring_packed_desc_t *desc_table,
+ u16 *desc_idx, vlib_buffer_t *b_head,
+ vlib_buffer_t **b_current, u32 **next,
+ vlib_buffer_t ***b, u32 *bi_current,
+ vhost_cpu_t *cpu, u16 *copy_len, u32 *buffers_used,
+ u32 buffers_required, u32 *desc_data_offset,
+ u32 buffer_data_size, u16 mask)
+{
+ u32 desc_data_l;
+
+ while (*desc_data_offset < desc_table[*desc_idx].len)
+ {
+ /* Get more output if necessary. Or end of packet. */
+ if (PREDICT_FALSE ((*b_current)->current_length == buffer_data_size))
+ {
+ /* Get next output */
+ u32 bi_next = **next;
+ (*next)++;
+ (*b_current)->next_buffer = bi_next;
+ (*b_current)->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ *bi_current = bi_next;
+ *b_current = **b;
+ (*b)++;
+ (*buffers_used)++;
+ ASSERT (*buffers_used <= buffers_required);
+ }
+
+ /* Prepare a copy order executed later for the data */
+ ASSERT (*copy_len < VHOST_USER_COPY_ARRAY_N);
+ vhost_copy_t *cpy = &cpu->copy[*copy_len];
+ (*copy_len)++;
+ desc_data_l = desc_table[*desc_idx].len - *desc_data_offset;
+ cpy->len = buffer_data_size - (*b_current)->current_length;
+ cpy->len = (cpy->len > desc_data_l) ? desc_data_l : cpy->len;
+ cpy->dst = (uword) (vlib_buffer_get_current (*b_current) +
+ (*b_current)->current_length);
+ cpy->src = desc_table[*desc_idx].addr + *desc_data_offset;
+
+ *desc_data_offset += cpy->len;
+
+ (*b_current)->current_length += cpy->len;
+ b_head->total_length_not_including_first_buffer += cpy->len;
+ }
+ *desc_idx = (*desc_idx + 1) & mask;;
+ *desc_data_offset = 0;
+}
+
+static_always_inline u32
+vhost_user_if_input_packed (vlib_main_t *vm, vhost_user_main_t *vum,
+ vhost_user_intf_t *vui, u16 qid,
+ vlib_node_runtime_t *node, u8 enable_csum)
+{
+ vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
+ vnet_feature_main_t *fm = &feature_main;
+ u8 feature_arc_idx = fm->device_input_feature_arc_index;
+ u16 n_rx_packets = 0;
+ u32 n_rx_bytes = 0;
+ u16 n_left = 0;
+ u32 buffers_required = 0;
+ u32 n_left_to_next, *to_next;
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ u32 n_trace = vlib_get_trace_count (vm, node);
+ u32 buffer_data_size = vlib_buffer_get_default_data_size (vm);
+ u32 map_hint = 0;
+ vhost_cpu_t *cpu = &vum->cpus[vm->thread_index];
+ u16 copy_len = 0;
+ u32 current_config_index = ~0;
+ u16 mask = txvq->qsz_mask;
+ u16 desc_current, desc_head, last_used_idx;
+ vnet_virtio_vring_packed_desc_t *desc_table = 0;
+ u32 n_descs_processed = 0;
+ u32 rv;
+ vlib_buffer_t **b;
+ u32 *next;
+ u32 buffers_used = 0;
+ u16 current, n_descs_to_process;
+
+ /* The descriptor table is not ready yet */
+ if (PREDICT_FALSE (txvq->packed_desc == 0))
+ goto done;
+
+ /* do we have pending interrupts ? */
+ vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
+ vhost_user_input_do_interrupt (vm, vui, txvq, rxvq);
+
+ /*
+ * For adaptive mode, it is optimized to reduce interrupts.
+ * If the scheduler switches the input node to polling due
+ * to burst of traffic, we tell the driver no interrupt.
+ * When the traffic subsides, the scheduler switches the node back to
+ * interrupt mode. We must tell the driver we want interrupt.
+ */
+ if (PREDICT_FALSE (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
+ {
+ if ((node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
+ !(node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
+ /* Tell driver we want notification */
+ txvq->used_event->flags = 0;
+ else
+ /* Tell driver we don't want notification */
+ txvq->used_event->flags = VRING_EVENT_F_DISABLE;
+ }
+
+ last_used_idx = txvq->last_used_idx & mask;
+ desc_head = desc_current = last_used_idx;
+
+ if (vhost_user_packed_desc_available (txvq, desc_current) == 0)
+ goto done;
+
+ if (PREDICT_FALSE (!vui->admin_up || !vui->is_ready || !(txvq->enabled)))
+ {
+ /*
+ * Discard input packet if interface is admin down or vring is not
+ * enabled.
+ * "For example, for a networking device, in the disabled state
+ * client must not supply any new RX packets, but must process
+ * and discard any TX packets."
+ */
+ rv = vhost_user_rx_discard_packet_packed (vm, vui, txvq,
+ VHOST_USER_DOWN_DISCARD_COUNT);
+ vlib_error_count (vm, vhost_user_input_node.index,
+ VHOST_USER_INPUT_FUNC_ERROR_NOT_READY, rv);
+ goto done;
+ }
+
+ vhost_user_input_setup_frame (vm, node, vui, &current_config_index,
+ &next_index, &to_next, &n_left_to_next);
+
+ /*
+ * Compute n_left and total buffers needed
+ */
+ desc_table = txvq->packed_desc;
+ current = desc_current;
+ while (vhost_user_packed_desc_available (txvq, current) &&
+ (n_left < VLIB_FRAME_SIZE))
+ {
+ if (desc_table[current].flags & VRING_DESC_F_INDIRECT)
+ {
+ buffers_required +=
+ vhost_user_compute_indirect_desc_len (vui, txvq, buffer_data_size,
+ current, &map_hint);
+ n_left++;
+ current = (current + 1) & mask;
+ vhost_user_advance_last_avail_idx (txvq);
+ }
+ else
+ {
+ buffers_required +=
+ vhost_user_compute_chained_desc_len (vui, txvq, buffer_data_size,
+ &current, &n_left);
+ }
+ }
+
+ /* Something is broken if we need more than 10000 buffers */
+ if (PREDICT_FALSE ((buffers_required == 0) || (buffers_required > 10000)))
+ {
+ rv = vhost_user_rx_discard_packet_packed (vm, vui, txvq, n_left);
+ vlib_error_count (vm, vhost_user_input_node.index,
+ VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, rv);
+ goto done;
+ }
+
+ vec_validate (cpu->to_next_list, buffers_required);
+ rv = vlib_buffer_alloc (vm, cpu->to_next_list, buffers_required);
+ if (PREDICT_FALSE (rv != buffers_required))
+ {
+ vlib_buffer_free (vm, cpu->to_next_list, rv);
+ rv = vhost_user_rx_discard_packet_packed (vm, vui, txvq, n_left);
+ vlib_error_count (vm, vhost_user_input_node.index,
+ VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER, rv);
+ goto done;
+ }
+
+ next = cpu->to_next_list;
+ vec_validate (cpu->rx_buffers_pdesc, buffers_required);
+ vlib_get_buffers (vm, next, cpu->rx_buffers_pdesc, buffers_required);
+ b = cpu->rx_buffers_pdesc;
+ n_descs_processed = n_left;
+
+ while (n_left)
+ {
+ vlib_buffer_t *b_head, *b_current;
+ u32 bi_current;
+ u32 desc_data_offset;
+ u16 desc_idx = desc_current;
+ u32 n_descs;
+
+ desc_table = txvq->packed_desc;
+ to_next[0] = bi_current = next[0];
+ b_head = b_current = b[0];
+ b++;
+ buffers_used++;
+ ASSERT (buffers_used <= buffers_required);
+ to_next++;
+ next++;
+ n_left_to_next--;
+
+ /* The buffer should already be initialized */
+ b_head->total_length_not_including_first_buffer = 0;
+ b_head->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ desc_data_offset = vui->virtio_net_hdr_sz;
+ n_descs_to_process = 1;
+
+ if (desc_table[desc_idx].flags & VRING_DESC_F_INDIRECT)
+ {
+ n_descs = desc_table[desc_idx].len >> 4;
+ desc_table = map_guest_mem (vui, desc_table[desc_idx].addr,
+ &map_hint);
+ desc_idx = 0;
+ if (PREDICT_FALSE (desc_table == 0) ||
+ (enable_csum &&
+ (PREDICT_FALSE
+ (vhost_user_do_offload
+ (vui, desc_table, desc_idx, mask, b_head,
+ &map_hint) != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))))
+ {
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL, 1);
+ to_next--;
+ next--;
+ n_left_to_next++;
+ buffers_used--;
+ b--;
+ goto out;
+ }
+ while (n_descs)
+ {
+ vhost_user_assemble_packet (desc_table, &desc_idx, b_head,
+ &b_current, &next, &b, &bi_current,
+ cpu, &copy_len, &buffers_used,
+ buffers_required, &desc_data_offset,
+ buffer_data_size, mask);
+ n_descs--;
+ }
+ }
+ else
+ {
+ if (enable_csum)
+ {
+ rv = vhost_user_do_offload (vui, desc_table, desc_idx, mask,
+ b_head, &map_hint);
+ if (PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
+ {
+ vlib_error_count (vm, node->node_index, rv, 1);
+ to_next--;
+ next--;
+ n_left_to_next++;
+ buffers_used--;
+ b--;
+ goto out;
+ }
+ }
+ /*
+ * For chained descriptor, we process all chains in a single while
+ * loop. So count how many descriptors in the chain.
+ */
+ n_descs_to_process = 1;
+ while (desc_table[desc_idx].flags & VRING_DESC_F_NEXT)
+ {
+ vhost_user_assemble_packet (desc_table, &desc_idx, b_head,
+ &b_current, &next, &b, &bi_current,
+ cpu, &copy_len, &buffers_used,
+ buffers_required, &desc_data_offset,
+ buffer_data_size, mask);
+ n_descs_to_process++;
+ }
+ vhost_user_assemble_packet (desc_table, &desc_idx, b_head,
+ &b_current, &next, &b, &bi_current,
+ cpu, &copy_len, &buffers_used,
+ buffers_required, &desc_data_offset,
+ buffer_data_size, mask);
+ }
+
+ n_rx_bytes += b_head->total_length_not_including_first_buffer;
+ n_rx_packets++;
+
+ b_head->total_length_not_including_first_buffer -=
+ b_head->current_length;
+
+ vnet_buffer (b_head)->sw_if_index[VLIB_RX] = vui->sw_if_index;
+ vnet_buffer (b_head)->sw_if_index[VLIB_TX] = ~0;
+ b_head->error = 0;
+
+ if (current_config_index != ~0)
+ {
+ b_head->current_config_index = current_config_index;
+ vnet_buffer (b_head)->feature_arc_index = feature_arc_idx;
+ }
+
+ out:
+ ASSERT (n_left >= n_descs_to_process);
+ n_left -= n_descs_to_process;
+
+ /* advance to next descrptor */
+ desc_current = (desc_current + n_descs_to_process) & mask;
+
+ /*
+ * Although separating memory copies from virtio ring parsing
+ * is beneficial, we can offer to perform the copies from time
+ * to time in order to free some space in the ring.
+ */
+ if (PREDICT_FALSE (copy_len >= VHOST_USER_RX_COPY_THRESHOLD))
+ {
+ rv = vhost_user_input_copy_packed (vui, cpu->copy, copy_len,
+ &map_hint);
+ if (PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
+ vlib_error_count (vm, node->node_index, rv, 1);
+ copy_len = 0;
+ }
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
+ /* Do the memory copies */
+ rv = vhost_user_input_copy_packed (vui, cpu->copy, copy_len, &map_hint);
+ if (PREDICT_FALSE (rv != VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
+ vlib_error_count (vm, node->node_index, rv, 1);
+
+ /* Must do the tracing before giving buffers back to driver */
+ if (PREDICT_FALSE (n_trace))
+ {
+ u32 left = n_rx_packets;
+
+ b = cpu->rx_buffers_pdesc;
+ while (n_trace && left)
+ {
+ if (PREDICT_TRUE
+ (vlib_trace_buffer
+ (vm, node, next_index, b[0], /* follow_chain */ 0)))
+ {
+ vhost_trace_t *t0;
+ t0 = vlib_add_trace (vm, node, b[0], sizeof (t0[0]));
+ vhost_user_rx_trace_packed (t0, vui, qid, txvq, last_used_idx);
+ last_used_idx = (last_used_idx + 1) & mask;
+ n_trace--;
+ vlib_set_trace_count (vm, node, n_trace);
+ }
+ left--;
+ b++;
+ }
+ }
+
+ /*
+ * Give buffers back to driver.
+ */
+ vhost_user_mark_desc_consumed (vui, txvq, desc_head, n_descs_processed);
+
+ /* interrupt (call) handling */
+ if ((txvq->callfd_idx != ~0) &&
+ (txvq->avail_event->flags != VRING_EVENT_F_DISABLE))
+ {
+ txvq->n_since_last_int += n_rx_packets;
+ if (txvq->n_since_last_int > vum->coalesce_frames)
+ vhost_user_send_call (vm, vui, txvq);
+ }
+
+ /* increase rx counters */
+ vlib_increment_combined_counter
+ (vnet_main.interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX, vm->thread_index, vui->sw_if_index,
+ n_rx_packets, n_rx_bytes);
+
+ vnet_device_increment_rx_packets (vm->thread_index, n_rx_packets);
+
+ if (PREDICT_FALSE (buffers_used < buffers_required))
+ vlib_buffer_free (vm, next, buffers_required - buffers_used);
+
+done:
+ return n_rx_packets;
+}
+
+VLIB_NODE_FN (vhost_user_input_node) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ uword n_rx_packets = 0;
+ vhost_user_intf_t *vui;
+ vnet_hw_if_rxq_poll_vector_t *pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
+ vnet_hw_if_rxq_poll_vector_t *pve;
+
+ vec_foreach (pve, pv)
+ {
+ vui = pool_elt_at_index (vum->vhost_user_interfaces, pve->dev_instance);
+ if (vhost_user_is_packed_ring_supported (vui))
+ {
+ if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
+ n_rx_packets += vhost_user_if_input_packed (
+ vm, vum, vui, pve->queue_id, node, 1);
+ else
+ n_rx_packets += vhost_user_if_input_packed (
+ vm, vum, vui, pve->queue_id, node, 0);
+ }
+ else
+ {
+ if (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))
+ n_rx_packets +=
+ vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 1);
+ else
+ n_rx_packets +=
+ vhost_user_if_input (vm, vum, vui, pve->queue_id, node, 0);
+ }
+ }
+
+ return n_rx_packets;
+}
+
+VLIB_REGISTER_NODE (vhost_user_input_node) = {
+ .type = VLIB_NODE_TYPE_INPUT,
+ .name = "vhost-user-input",
+ .sibling_of = "device-input",
+ .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
+
+ /* Will be enabled if/when hardware is detected. */
+ .state = VLIB_NODE_STATE_DISABLED,
+
+ .format_buffer = format_ethernet_header_with_length,
+ .format_trace = format_vhost_trace,
+
+ .n_errors = VHOST_USER_INPUT_FUNC_N_ERROR,
+ .error_strings = vhost_user_input_func_error_strings,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/vhost/vhost_user_output.c b/src/plugins/vhost/vhost_user_output.c
new file mode 100644
index 00000000000..58fd4309f8c
--- /dev/null
+++ b/src/plugins/vhost/vhost_user_output.c
@@ -0,0 +1,1143 @@
+/*
+ *------------------------------------------------------------------
+ * vhost-user-output
+ *
+ * Copyright (c) 2014-2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stddef.h>
+#include <fcntl.h> /* for open */
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h> /* for iovec */
+#include <netinet/in.h>
+#include <sys/vfs.h>
+
+#include <linux/if_arp.h>
+#include <linux/if_tun.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/devices.h>
+#include <vnet/feature/feature.h>
+#include <vnet/ip/ip_psh_cksum.h>
+
+#include <vhost/vhost_user.h>
+#include <vhost/vhost_user_inline.h>
+
+#include <vnet/gso/hdr_offset_parser.h>
+/*
+ * On the transmit side, we keep processing the buffers from vlib in the while
+ * loop and prepare the copy order to be executed later. However, the static
+ * array which we keep the copy order is limited to VHOST_USER_COPY_ARRAY_N
+ * entries. In order to not corrupt memory, we have to do the copy when the
+ * static array reaches the copy threshold. We subtract 40 in case the code
+ * goes into the inner loop for a maximum of 64k frames which may require
+ * more array entries. We subtract 200 because our default buffer size is
+ * 2048 and the default desc len is likely 1536. While it takes less than 40
+ * vlib buffers for the jumbo frame, it may take twice as much descriptors
+ * for the same jumbo frame. Use 200 for the extra head room.
+ */
+#define VHOST_USER_TX_COPY_THRESHOLD (VHOST_USER_COPY_ARRAY_N - 200)
+
+extern vnet_device_class_t vhost_user_device_class;
+
+#define foreach_vhost_user_tx_func_error \
+ _(NONE, "no error") \
+ _(NOT_READY, "vhost vring not ready") \
+ _(DOWN, "vhost interface is down") \
+ _(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \
+ _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)") \
+ _(MMAP_FAIL, "mmap failure") \
+ _(INDIRECT_OVERFLOW, "indirect descriptor table overflow")
+
+typedef enum
+{
+#define _(f,s) VHOST_USER_TX_FUNC_ERROR_##f,
+ foreach_vhost_user_tx_func_error
+#undef _
+ VHOST_USER_TX_FUNC_N_ERROR,
+} vhost_user_tx_func_error_t;
+
+static __clib_unused char *vhost_user_tx_func_error_strings[] = {
+#define _(n,s) s,
+ foreach_vhost_user_tx_func_error
+#undef _
+};
+
+static __clib_unused u8 *
+format_vhost_user_interface_name (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ u32 show_dev_instance = ~0;
+ vhost_user_main_t *vum = &vhost_user_main;
+
+ if (i < vec_len (vum->show_dev_instance_by_real_dev_instance))
+ show_dev_instance = vum->show_dev_instance_by_real_dev_instance[i];
+
+ if (show_dev_instance != ~0)
+ i = show_dev_instance;
+
+ s = format (s, "VirtualEthernet0/0/%d", i);
+ return s;
+}
+
+static __clib_unused int
+vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
+{
+ // FIXME: check if the new dev instance is already used
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui = pool_elt_at_index (vum->vhost_user_interfaces,
+ hi->dev_instance);
+
+ vec_validate_init_empty (vum->show_dev_instance_by_real_dev_instance,
+ hi->dev_instance, ~0);
+
+ vum->show_dev_instance_by_real_dev_instance[hi->dev_instance] =
+ new_dev_instance;
+
+ vu_log_debug (vui, "renumbered vhost-user interface dev_instance %d to %d",
+ hi->dev_instance, new_dev_instance);
+
+ return 0;
+}
+
+static_always_inline void
+vhost_user_tx_trace (vhost_trace_t * t,
+ vhost_user_intf_t * vui, u16 qid,
+ vlib_buffer_t * b, vhost_user_vring_t * rxvq)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ u32 last_avail_idx = rxvq->last_avail_idx;
+ u32 desc_current = rxvq->avail->ring[last_avail_idx & rxvq->qsz_mask];
+ vnet_virtio_vring_desc_t *hdr_desc = 0;
+ u32 hint = 0;
+
+ clib_memset (t, 0, sizeof (*t));
+ t->device_index = vui - vum->vhost_user_interfaces;
+ t->qid = qid;
+
+ hdr_desc = &rxvq->desc[desc_current];
+ if (rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT)
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
+ /* Header is the first here */
+ hdr_desc = map_guest_mem (vui, rxvq->desc[desc_current].addr, &hint);
+ }
+ if (rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT)
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
+ }
+ if (!(rxvq->desc[desc_current].flags & VRING_DESC_F_NEXT) &&
+ !(rxvq->desc[desc_current].flags & VRING_DESC_F_INDIRECT))
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
+ }
+
+ t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
+}
+
+static_always_inline u32
+vhost_user_tx_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
+ u16 copy_len, u32 * map_hint)
+{
+ void *dst0, *dst1, *dst2, *dst3;
+ if (PREDICT_TRUE (copy_len >= 4))
+ {
+ if (PREDICT_FALSE (!(dst2 = map_guest_mem (vui, cpy[0].dst, map_hint))))
+ return 1;
+ if (PREDICT_FALSE (!(dst3 = map_guest_mem (vui, cpy[1].dst, map_hint))))
+ return 1;
+ while (PREDICT_TRUE (copy_len >= 4))
+ {
+ dst0 = dst2;
+ dst1 = dst3;
+
+ if (PREDICT_FALSE
+ (!(dst2 = map_guest_mem (vui, cpy[2].dst, map_hint))))
+ return 1;
+ if (PREDICT_FALSE
+ (!(dst3 = map_guest_mem (vui, cpy[3].dst, map_hint))))
+ return 1;
+
+ clib_prefetch_load ((void *) cpy[2].src);
+ clib_prefetch_load ((void *) cpy[3].src);
+
+ clib_memcpy_fast (dst0, (void *) cpy[0].src, cpy[0].len);
+ clib_memcpy_fast (dst1, (void *) cpy[1].src, cpy[1].len);
+
+ vhost_user_log_dirty_pages_2 (vui, cpy[0].dst, cpy[0].len, 1);
+ vhost_user_log_dirty_pages_2 (vui, cpy[1].dst, cpy[1].len, 1);
+ copy_len -= 2;
+ cpy += 2;
+ }
+ }
+ while (copy_len)
+ {
+ if (PREDICT_FALSE (!(dst0 = map_guest_mem (vui, cpy->dst, map_hint))))
+ return 1;
+ clib_memcpy_fast (dst0, (void *) cpy->src, cpy->len);
+ vhost_user_log_dirty_pages_2 (vui, cpy->dst, cpy->len, 1);
+ copy_len -= 1;
+ cpy += 1;
+ }
+ return 0;
+}
+
+static_always_inline void
+vhost_user_handle_tx_offload (vhost_user_intf_t *vui, vlib_buffer_t *b,
+ vnet_virtio_net_hdr_t *hdr)
+{
+ generic_header_offset_t gho = { 0 };
+ int is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
+ int is_ip6 = b->flags & VNET_BUFFER_F_IS_IP6;
+ vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
+ u16 psh_cksum = 0;
+ ip4_header_t *ip4 = 0;
+ ip6_header_t *ip6 = 0;
+
+ ASSERT (!(is_ip4 && is_ip6));
+ vnet_generic_header_offset_parser (b, &gho, 1 /* l2 */ , is_ip4, is_ip6);
+ if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
+ {
+ ip4 =
+ (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
+ ip4->checksum = ip4_header_checksum (ip4);
+ psh_cksum = ip4_pseudo_header_cksum (ip4);
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
+ psh_cksum = ip6_pseudo_header_cksum (ip6);
+ }
+
+ /* checksum offload */
+ if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
+ {
+ udp_header_t *udp =
+ (udp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
+ udp->checksum = psh_cksum;
+ hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ hdr->csum_start = gho.l4_hdr_offset;
+ hdr->csum_offset = offsetof (udp_header_t, checksum);
+ }
+ else if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
+ {
+ tcp_header_t *tcp =
+ (tcp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
+ tcp->checksum = psh_cksum;
+ hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ hdr->csum_start = gho.l4_hdr_offset;
+ hdr->csum_offset = offsetof (tcp_header_t, checksum);
+ }
+
+ /* GSO offload */
+ if (b->flags & VNET_BUFFER_F_GSO)
+ {
+ if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
+ {
+ if (is_ip4 &&
+ (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4)))
+ {
+ hdr->gso_size = vnet_buffer2 (b)->gso_size;
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+ }
+ else if (is_ip6 &&
+ (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6)))
+ {
+ hdr->gso_size = vnet_buffer2 (b)->gso_size;
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+ }
+ }
+ else if ((vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO)) &&
+ (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM))
+ {
+ hdr->gso_size = vnet_buffer2 (b)->gso_size;
+ hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+ }
+ }
+}
+
+static_always_inline void
+vhost_user_mark_desc_available (vlib_main_t * vm, vhost_user_intf_t * vui,
+ vhost_user_vring_t * rxvq,
+ u16 * n_descs_processed, u8 chained,
+ vlib_frame_t * frame, u32 n_left)
+{
+ u16 desc_idx, flags;
+ vnet_virtio_vring_packed_desc_t *desc_table = rxvq->packed_desc;
+ u16 last_used_idx = rxvq->last_used_idx;
+
+ if (PREDICT_FALSE (*n_descs_processed == 0))
+ return;
+
+ if (rxvq->used_wrap_counter)
+ flags = desc_table[last_used_idx & rxvq->qsz_mask].flags |
+ (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
+ else
+ flags = desc_table[last_used_idx & rxvq->qsz_mask].flags &
+ ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
+
+ vhost_user_advance_last_used_idx (rxvq);
+
+ for (desc_idx = 1; desc_idx < *n_descs_processed; desc_idx++)
+ {
+ if (rxvq->used_wrap_counter)
+ desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags |=
+ (VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
+ else
+ desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &=
+ ~(VRING_DESC_F_AVAIL | VRING_DESC_F_USED);
+ vhost_user_advance_last_used_idx (rxvq);
+ }
+
+ desc_table[last_used_idx & rxvq->qsz_mask].flags = flags;
+
+ *n_descs_processed = 0;
+
+ if (chained)
+ {
+ vnet_virtio_vring_packed_desc_t *desc_table = rxvq->packed_desc;
+
+ while (desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &
+ VRING_DESC_F_NEXT)
+ vhost_user_advance_last_used_idx (rxvq);
+
+ /* Advance past the current chained table entries */
+ vhost_user_advance_last_used_idx (rxvq);
+ }
+
+ /* interrupt (call) handling */
+ if ((rxvq->callfd_idx != ~0) &&
+ (rxvq->avail_event->flags != VRING_EVENT_F_DISABLE))
+ {
+ vhost_user_main_t *vum = &vhost_user_main;
+
+ rxvq->n_since_last_int += frame->n_vectors - n_left;
+ if (rxvq->n_since_last_int > vum->coalesce_frames)
+ vhost_user_send_call (vm, vui, rxvq);
+ }
+}
+
+static_always_inline void
+vhost_user_tx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
+ u16 qid, vlib_buffer_t * b,
+ vhost_user_vring_t * rxvq)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ u32 last_avail_idx = rxvq->last_avail_idx;
+ u32 desc_current = last_avail_idx & rxvq->qsz_mask;
+ vnet_virtio_vring_packed_desc_t *hdr_desc = 0;
+ u32 hint = 0;
+
+ clib_memset (t, 0, sizeof (*t));
+ t->device_index = vui - vum->vhost_user_interfaces;
+ t->qid = qid;
+
+ hdr_desc = &rxvq->packed_desc[desc_current];
+ if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT)
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_INDIRECT;
+ /* Header is the first here */
+ hdr_desc = map_guest_mem (vui, rxvq->packed_desc[desc_current].addr,
+ &hint);
+ }
+ if (rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT)
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SIMPLE_CHAINED;
+ }
+ if (!(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_NEXT) &&
+ !(rxvq->packed_desc[desc_current].flags & VRING_DESC_F_INDIRECT))
+ {
+ t->virtio_ring_flags |= 1 << VIRTIO_TRACE_F_SINGLE_DESC;
+ }
+
+ t->first_desc_len = hdr_desc ? hdr_desc->len : 0;
+}
+
+static_always_inline uword
+vhost_user_device_class_packed (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, vhost_user_intf_t *vui,
+ vhost_user_vring_t *rxvq)
+{
+ u32 *buffers = vlib_frame_vector_args (frame);
+ u32 n_left = frame->n_vectors;
+ vhost_user_main_t *vum = &vhost_user_main;
+ u32 qid = rxvq->qid;
+ u8 error;
+ u32 thread_index = vm->thread_index;
+ vhost_cpu_t *cpu = &vum->cpus[thread_index];
+ u32 map_hint = 0;
+ u8 retry = 8;
+ u16 copy_len;
+ u16 tx_headers_len;
+ vnet_virtio_vring_packed_desc_t *desc_table;
+ u32 or_flags;
+ u16 desc_head, desc_index, desc_len;
+ u16 n_descs_processed;
+ u8 indirect, chained;
+
+retry:
+ error = VHOST_USER_TX_FUNC_ERROR_NONE;
+ tx_headers_len = 0;
+ copy_len = 0;
+ n_descs_processed = 0;
+
+ while (n_left > 0)
+ {
+ vlib_buffer_t *b0, *current_b0;
+ uword buffer_map_addr;
+ u32 buffer_len;
+ u16 bytes_left;
+ u32 total_desc_len = 0;
+ u16 n_entries = 0;
+
+ indirect = 0;
+ chained = 0;
+ if (PREDICT_TRUE (n_left > 1))
+ vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ cpu->current_trace = vlib_add_trace (vm, node, b0,
+ sizeof (*cpu->current_trace));
+ vhost_user_tx_trace_packed (cpu->current_trace, vui, qid / 2, b0,
+ rxvq);
+ }
+
+ desc_table = rxvq->packed_desc;
+ desc_head = desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
+ if (PREDICT_FALSE (!vhost_user_packed_desc_available (rxvq, desc_head)))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
+ goto done;
+ }
+ /*
+ * Go deeper in case of indirect descriptor.
+ * To test it, turn off mrg_rxbuf.
+ */
+ if (desc_table[desc_head].flags & VRING_DESC_F_INDIRECT)
+ {
+ indirect = 1;
+ if (PREDICT_FALSE (desc_table[desc_head].len <
+ sizeof (vnet_virtio_vring_packed_desc_t)))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
+ goto done;
+ }
+ n_entries = desc_table[desc_head].len >> 4;
+ desc_table = map_guest_mem (vui, desc_table[desc_index].addr,
+ &map_hint);
+ if (PREDICT_FALSE (desc_table == 0))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
+ goto done;
+ }
+ desc_index = 0;
+ }
+ else if (rxvq->packed_desc[desc_head].flags & VRING_DESC_F_NEXT)
+ chained = 1;
+
+ desc_len = vui->virtio_net_hdr_sz;
+ buffer_map_addr = desc_table[desc_index].addr;
+ buffer_len = desc_table[desc_index].len;
+
+ /* Get a header from the header array */
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len];
+ tx_headers_len++;
+ hdr->hdr.flags = 0;
+ hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
+ hdr->num_buffers = 1;
+
+ or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD);
+
+ /* Guest supports csum offload and buffer requires checksum offload? */
+ if (or_flags &&
+ (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
+ vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
+
+ /* Prepare a copy order executed later for the header */
+ ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
+ vhost_copy_t *cpy = &cpu->copy[copy_len];
+ copy_len++;
+ cpy->len = vui->virtio_net_hdr_sz;
+ cpy->dst = buffer_map_addr;
+ cpy->src = (uword) hdr;
+
+ buffer_map_addr += vui->virtio_net_hdr_sz;
+ buffer_len -= vui->virtio_net_hdr_sz;
+ bytes_left = b0->current_length;
+ current_b0 = b0;
+ while (1)
+ {
+ if (buffer_len == 0)
+ {
+ /* Get new output */
+ if (chained)
+ {
+ /*
+ * Next one is chained
+ * Test it with both indirect and mrg_rxbuf off
+ */
+ if (PREDICT_FALSE (!(desc_table[desc_index].flags &
+ VRING_DESC_F_NEXT)))
+ {
+ /*
+ * Last descriptor in chain.
+ * Dequeue queued descriptors for this packet
+ */
+ vhost_user_dequeue_chained_descs (rxvq,
+ &n_descs_processed);
+ error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
+ goto done;
+ }
+ vhost_user_advance_last_avail_idx (rxvq);
+ desc_index = rxvq->last_avail_idx & rxvq->qsz_mask;
+ n_descs_processed++;
+ buffer_map_addr = desc_table[desc_index].addr;
+ buffer_len = desc_table[desc_index].len;
+ total_desc_len += desc_len;
+ desc_len = 0;
+ }
+ else if (indirect)
+ {
+ /*
+ * Indirect table
+ * Test it with mrg_rxnuf off
+ */
+ if (PREDICT_TRUE (n_entries > 0))
+ n_entries--;
+ else
+ {
+ /* Dequeue queued descriptors for this packet */
+ vhost_user_dequeue_chained_descs (rxvq,
+ &n_descs_processed);
+ error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
+ goto done;
+ }
+ total_desc_len += desc_len;
+ desc_index = (desc_index + 1) & rxvq->qsz_mask;
+ buffer_map_addr = desc_table[desc_index].addr;
+ buffer_len = desc_table[desc_index].len;
+ desc_len = 0;
+ }
+ else if (vui->virtio_net_hdr_sz == 12)
+ {
+ /*
+ * MRG is available
+ * This is the default setting for the guest VM
+ */
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr =
+ &cpu->tx_headers[tx_headers_len - 1];
+
+ desc_table[desc_index].len = desc_len;
+ vhost_user_advance_last_avail_idx (rxvq);
+ desc_head = desc_index =
+ rxvq->last_avail_idx & rxvq->qsz_mask;
+ hdr->num_buffers++;
+ n_descs_processed++;
+ desc_len = 0;
+
+ if (PREDICT_FALSE (!vhost_user_packed_desc_available
+ (rxvq, desc_index)))
+ {
+ /* Dequeue queued descriptors for this packet */
+ vhost_user_dequeue_descs (rxvq, hdr,
+ &n_descs_processed);
+ error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
+ goto done;
+ }
+
+ buffer_map_addr = desc_table[desc_index].addr;
+ buffer_len = desc_table[desc_index].len;
+ }
+ else
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
+ goto done;
+ }
+ }
+
+ ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
+ vhost_copy_t *cpy = &cpu->copy[copy_len];
+ copy_len++;
+ cpy->len = bytes_left;
+ cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
+ cpy->dst = buffer_map_addr;
+ cpy->src = (uword) vlib_buffer_get_current (current_b0) +
+ current_b0->current_length - bytes_left;
+
+ bytes_left -= cpy->len;
+ buffer_len -= cpy->len;
+ buffer_map_addr += cpy->len;
+ desc_len += cpy->len;
+
+ clib_prefetch_load (&rxvq->packed_desc);
+
+ /* Check if vlib buffer has more data. If not, get more or break */
+ if (PREDICT_TRUE (!bytes_left))
+ {
+ if (PREDICT_FALSE
+ (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
+ bytes_left = current_b0->current_length;
+ }
+ else
+ {
+ /* End of packet */
+ break;
+ }
+ }
+ }
+
+ /* Move from available to used ring */
+ total_desc_len += desc_len;
+ rxvq->packed_desc[desc_head].len = total_desc_len;
+
+ vhost_user_advance_last_avail_table_idx (vui, rxvq, chained);
+ n_descs_processed++;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
+
+ n_left--;
+
+ /*
+ * Do the copy periodically to prevent
+ * cpu->copy array overflow and corrupt memory
+ */
+ if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD) || chained)
+ {
+ if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
+ &map_hint)))
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
+ copy_len = 0;
+
+ /* give buffers back to driver */
+ vhost_user_mark_desc_available (vm, vui, rxvq, &n_descs_processed,
+ chained, frame, n_left);
+ }
+
+ buffers++;
+ }
+
+done:
+ if (PREDICT_TRUE (copy_len))
+ {
+ if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
+ &map_hint)))
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
+
+ vhost_user_mark_desc_available (vm, vui, rxvq, &n_descs_processed,
+ chained, frame, n_left);
+ }
+
+ /*
+ * When n_left is set, error is always set to something too.
+ * In case error is due to lack of remaining buffers, we go back up and
+ * retry.
+ * The idea is that it is better to waste some time on packets
+ * that have been processed already than dropping them and get
+ * more fresh packets with a good likelyhood that they will be dropped too.
+ * This technique also gives more time to VM driver to pick-up packets.
+ * In case the traffic flows from physical to virtual interfaces, this
+ * technique will end-up leveraging the physical NIC buffer in order to
+ * absorb the VM's CPU jitter.
+ */
+ if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
+ {
+ retry--;
+ goto retry;
+ }
+
+ clib_spinlock_unlock (&rxvq->vring_lock);
+
+ if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
+ {
+ vlib_error_count (vm, node->node_index, error, n_left);
+ vlib_increment_simple_counter
+ (vnet_main.interface_main.sw_if_counters +
+ VNET_INTERFACE_COUNTER_DROP, thread_index, vui->sw_if_index, n_left);
+ }
+
+ vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
+ return frame->n_vectors;
+}
+
+VNET_DEVICE_CLASS_TX_FN (vhost_user_device_class) (vlib_main_t * vm,
+ vlib_node_runtime_t *
+ node, vlib_frame_t * frame)
+{
+ u32 *buffers = vlib_frame_vector_args (frame);
+ u32 n_left = frame->n_vectors;
+ vhost_user_main_t *vum = &vhost_user_main;
+ vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
+ vhost_user_intf_t *vui =
+ pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
+ u32 qid;
+ vhost_user_vring_t *rxvq;
+ u8 error;
+ u32 thread_index = vm->thread_index;
+ vhost_cpu_t *cpu = &vum->cpus[thread_index];
+ u32 map_hint = 0;
+ u8 retry = 8;
+ u16 copy_len;
+ u16 tx_headers_len;
+ u32 or_flags;
+ vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
+
+ if (PREDICT_FALSE (!vui->admin_up))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_DOWN;
+ goto done3;
+ }
+
+ if (PREDICT_FALSE (!vui->is_ready))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_NOT_READY;
+ goto done3;
+ }
+
+ qid = VHOST_VRING_IDX_RX (tf->queue_id);
+ rxvq = &vui->vrings[qid];
+ ASSERT (tf->queue_id == rxvq->qid);
+
+ if (PREDICT_FALSE (rxvq->avail == 0))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
+ goto done3;
+ }
+ if (tf->shared_queue)
+ clib_spinlock_lock (&rxvq->vring_lock);
+
+ if (vhost_user_is_packed_ring_supported (vui))
+ return (vhost_user_device_class_packed (vm, node, frame, vui, rxvq));
+
+retry:
+ error = VHOST_USER_TX_FUNC_ERROR_NONE;
+ tx_headers_len = 0;
+ copy_len = 0;
+ while (n_left > 0)
+ {
+ vlib_buffer_t *b0, *current_b0;
+ u16 desc_head, desc_index, desc_len;
+ vnet_virtio_vring_desc_t *desc_table;
+ uword buffer_map_addr;
+ u32 buffer_len;
+ u16 bytes_left;
+
+ if (PREDICT_TRUE (n_left > 1))
+ vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
+
+ b0 = vlib_get_buffer (vm, buffers[0]);
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ cpu->current_trace = vlib_add_trace (vm, node, b0,
+ sizeof (*cpu->current_trace));
+ vhost_user_tx_trace (cpu->current_trace, vui, qid / 2, b0, rxvq);
+ }
+
+ if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
+ goto done;
+ }
+
+ desc_table = rxvq->desc;
+ desc_head = desc_index =
+ rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
+
+ /* Go deeper in case of indirect descriptor
+ * I don't know of any driver providing indirect for RX. */
+ if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
+ {
+ if (PREDICT_FALSE (rxvq->desc[desc_head].len <
+ sizeof (vnet_virtio_vring_desc_t)))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
+ goto done;
+ }
+ if (PREDICT_FALSE
+ (!(desc_table =
+ map_guest_mem (vui, rxvq->desc[desc_index].addr,
+ &map_hint))))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
+ goto done;
+ }
+ desc_index = 0;
+ }
+
+ desc_len = vui->virtio_net_hdr_sz;
+ buffer_map_addr = desc_table[desc_index].addr;
+ buffer_len = desc_table[desc_index].len;
+
+ {
+ // Get a header from the header array
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr =
+ &cpu->tx_headers[tx_headers_len];
+ tx_headers_len++;
+ hdr->hdr.flags = 0;
+ hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
+ hdr->num_buffers = 1; //This is local, no need to check
+
+ or_flags = (b0->flags & VNET_BUFFER_F_OFFLOAD);
+
+ /* Guest supports csum offload and buffer requires checksum offload? */
+ if (or_flags
+ && (vui->features & VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM)))
+ vhost_user_handle_tx_offload (vui, b0, &hdr->hdr);
+
+ // Prepare a copy order executed later for the header
+ ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
+ vhost_copy_t *cpy = &cpu->copy[copy_len];
+ copy_len++;
+ cpy->len = vui->virtio_net_hdr_sz;
+ cpy->dst = buffer_map_addr;
+ cpy->src = (uword) hdr;
+ }
+
+ buffer_map_addr += vui->virtio_net_hdr_sz;
+ buffer_len -= vui->virtio_net_hdr_sz;
+ bytes_left = b0->current_length;
+ current_b0 = b0;
+ while (1)
+ {
+ if (buffer_len == 0)
+ { //Get new output
+ if (desc_table[desc_index].flags & VRING_DESC_F_NEXT)
+ {
+ //Next one is chained
+ desc_index = desc_table[desc_index].next;
+ buffer_map_addr = desc_table[desc_index].addr;
+ buffer_len = desc_table[desc_index].len;
+ }
+ else if (vui->virtio_net_hdr_sz == 12) //MRG is available
+ {
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr =
+ &cpu->tx_headers[tx_headers_len - 1];
+
+ //Move from available to used buffer
+ rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id =
+ desc_head;
+ rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len =
+ desc_len;
+ vhost_user_log_dirty_ring (vui, rxvq,
+ ring[rxvq->last_used_idx &
+ rxvq->qsz_mask]);
+
+ rxvq->last_avail_idx++;
+ rxvq->last_used_idx++;
+ hdr->num_buffers++;
+ desc_len = 0;
+
+ if (PREDICT_FALSE
+ (rxvq->last_avail_idx == rxvq->avail->idx))
+ {
+ //Dequeue queued descriptors for this packet
+ rxvq->last_used_idx -= hdr->num_buffers - 1;
+ rxvq->last_avail_idx -= hdr->num_buffers - 1;
+ error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
+ goto done;
+ }
+
+ desc_table = rxvq->desc;
+ desc_head = desc_index =
+ rxvq->avail->ring[rxvq->last_avail_idx & rxvq->qsz_mask];
+ if (PREDICT_FALSE
+ (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
+ {
+ //It is seriously unlikely that a driver will put indirect descriptor
+ //after non-indirect descriptor.
+ if (PREDICT_FALSE (rxvq->desc[desc_head].len <
+ sizeof (vnet_virtio_vring_desc_t)))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
+ goto done;
+ }
+ if (PREDICT_FALSE
+ (!(desc_table =
+ map_guest_mem (vui,
+ rxvq->desc[desc_index].addr,
+ &map_hint))))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
+ goto done;
+ }
+ desc_index = 0;
+ }
+ buffer_map_addr = desc_table[desc_index].addr;
+ buffer_len = desc_table[desc_index].len;
+ }
+ else
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
+ goto done;
+ }
+ }
+
+ {
+ ASSERT (copy_len < VHOST_USER_COPY_ARRAY_N);
+ vhost_copy_t *cpy = &cpu->copy[copy_len];
+ copy_len++;
+ cpy->len = bytes_left;
+ cpy->len = (cpy->len > buffer_len) ? buffer_len : cpy->len;
+ cpy->dst = buffer_map_addr;
+ cpy->src = (uword) vlib_buffer_get_current (current_b0) +
+ current_b0->current_length - bytes_left;
+
+ bytes_left -= cpy->len;
+ buffer_len -= cpy->len;
+ buffer_map_addr += cpy->len;
+ desc_len += cpy->len;
+
+ clib_prefetch_load (&rxvq->desc);
+ }
+
+ // Check if vlib buffer has more data. If not, get more or break.
+ if (PREDICT_TRUE (!bytes_left))
+ {
+ if (PREDICT_FALSE
+ (current_b0->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ current_b0 = vlib_get_buffer (vm, current_b0->next_buffer);
+ bytes_left = current_b0->current_length;
+ }
+ else
+ {
+ //End of packet
+ break;
+ }
+ }
+ }
+
+ //Move from available to used ring
+ rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].id = desc_head;
+ rxvq->used->ring[rxvq->last_used_idx & rxvq->qsz_mask].len = desc_len;
+ vhost_user_log_dirty_ring (vui, rxvq,
+ ring[rxvq->last_used_idx & rxvq->qsz_mask]);
+ rxvq->last_avail_idx++;
+ rxvq->last_used_idx++;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ cpu->current_trace->hdr = cpu->tx_headers[tx_headers_len - 1];
+ }
+
+ n_left--; //At the end for error counting when 'goto done' is invoked
+
+ /*
+ * Do the copy periodically to prevent
+ * cpu->copy array overflow and corrupt memory
+ */
+ if (PREDICT_FALSE (copy_len >= VHOST_USER_TX_COPY_THRESHOLD))
+ {
+ if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
+ &map_hint)))
+ {
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
+ }
+ copy_len = 0;
+
+ /* give buffers back to driver */
+ CLIB_MEMORY_BARRIER ();
+ rxvq->used->idx = rxvq->last_used_idx;
+ vhost_user_log_dirty_ring (vui, rxvq, idx);
+ }
+ buffers++;
+ }
+
+done:
+ //Do the memory copies
+ if (PREDICT_FALSE (vhost_user_tx_copy (vui, cpu->copy, copy_len,
+ &map_hint)))
+ {
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL, 1);
+ }
+
+ CLIB_MEMORY_BARRIER ();
+ rxvq->used->idx = rxvq->last_used_idx;
+ vhost_user_log_dirty_ring (vui, rxvq, idx);
+
+ /*
+ * When n_left is set, error is always set to something too.
+ * In case error is due to lack of remaining buffers, we go back up and
+ * retry.
+ * The idea is that it is better to waste some time on packets
+ * that have been processed already than dropping them and get
+ * more fresh packets with a good likelihood that they will be dropped too.
+ * This technique also gives more time to VM driver to pick-up packets.
+ * In case the traffic flows from physical to virtual interfaces, this
+ * technique will end-up leveraging the physical NIC buffer in order to
+ * absorb the VM's CPU jitter.
+ */
+ if (n_left && (error == VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF) && retry)
+ {
+ retry--;
+ goto retry;
+ }
+
+ /* interrupt (call) handling */
+ if ((rxvq->callfd_idx != ~0) &&
+ !(rxvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
+ {
+ rxvq->n_since_last_int += frame->n_vectors - n_left;
+
+ if (rxvq->n_since_last_int > vum->coalesce_frames)
+ vhost_user_send_call (vm, vui, rxvq);
+ }
+
+ clib_spinlock_unlock (&rxvq->vring_lock);
+
+done3:
+ if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
+ {
+ vlib_error_count (vm, node->node_index, error, n_left);
+ vlib_increment_simple_counter
+ (vnet_main.interface_main.sw_if_counters
+ + VNET_INTERFACE_COUNTER_DROP,
+ thread_index, vui->sw_if_index, n_left);
+ }
+
+ vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
+ return frame->n_vectors;
+}
+
+static __clib_unused clib_error_t *
+vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
+ u32 qid, vnet_hw_if_rx_mode mode)
+{
+ vlib_main_t *vm = vnm->vlib_main;
+ vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui =
+ pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
+ vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
+ vhost_cpu_t *cpu;
+
+ if (mode == txvq->mode)
+ return 0;
+
+ if ((mode != VNET_HW_IF_RX_MODE_POLLING) &&
+ (mode != VNET_HW_IF_RX_MODE_ADAPTIVE) &&
+ (mode != VNET_HW_IF_RX_MODE_INTERRUPT))
+ {
+ vu_log_err (vui, "unhandled mode %d changed for if %d queue %d", mode,
+ hw_if_index, qid);
+ return clib_error_return (0, "unsupported");
+ }
+
+ if (txvq->thread_index == ~0)
+ return clib_error_return (0, "Queue initialization is not finished yet");
+
+ cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
+ if ((mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
+ (mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
+ {
+ if (txvq->kickfd_idx == ~0)
+ {
+ // We cannot support interrupt mode if the driver opts out
+ return clib_error_return (0, "Driver does not support interrupt");
+ }
+ if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
+ {
+ ASSERT (cpu->polling_q_count != 0);
+ if (cpu->polling_q_count)
+ cpu->polling_q_count--;
+ vum->ifq_count++;
+ // Start the timer if this is the first encounter on interrupt
+ // interface/queue
+ if ((vum->ifq_count == 1) &&
+ ((vum->coalesce_time > 0.0) || (vum->coalesce_frames > 0)))
+ vlib_process_signal_event (vm,
+ vhost_user_send_interrupt_node.index,
+ VHOST_USER_EVENT_START_TIMER, 0);
+ }
+ }
+ else if (mode == VNET_HW_IF_RX_MODE_POLLING)
+ {
+ if (((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
+ (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) && vum->ifq_count)
+ {
+ cpu->polling_q_count++;
+ vum->ifq_count--;
+ // Stop the timer if there is no more interrupt interface/queue
+ if (vum->ifq_count == 0)
+ vlib_process_signal_event (vm,
+ vhost_user_send_interrupt_node.index,
+ VHOST_USER_EVENT_STOP_TIMER, 0);
+ }
+ }
+
+ txvq->mode = mode;
+ vhost_user_set_operation_mode (vui, txvq);
+
+ return 0;
+}
+
+static __clib_unused clib_error_t *
+vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
+ u32 flags)
+{
+ vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui =
+ pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
+ u8 link_old, link_new;
+
+ link_old = vui_is_link_up (vui);
+
+ vui->admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ link_new = vui_is_link_up (vui);
+
+ if (link_old != link_new)
+ vnet_hw_interface_set_flags (vnm, vui->hw_if_index, link_new ?
+ VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
+
+ return /* no error */ 0;
+}
+
+VNET_DEVICE_CLASS (vhost_user_device_class) = {
+ .name = "vhost-user",
+ .tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR,
+ .tx_function_error_strings = vhost_user_tx_func_error_strings,
+ .format_device_name = format_vhost_user_interface_name,
+ .name_renumber = vhost_user_name_renumber,
+ .admin_up_down_function = vhost_user_interface_admin_up_down,
+ .rx_mode_change_function = vhost_user_interface_rx_mode_change,
+ .format_tx_trace = format_vhost_trace,
+};
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/vhost/virtio_std.h b/src/plugins/vhost/virtio_std.h
new file mode 100644
index 00000000000..fa826933a9c
--- /dev/null
+++ b/src/plugins/vhost/virtio_std.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __VIRTIO_STD_H__
+#define __VIRTIO_STD_H__
+
+#define foreach_virtio_net_features \
+ _ (VIRTIO_NET_F_CSUM, 0) /* Host handles pkts w/ partial csum */ \
+ _ (VIRTIO_NET_F_GUEST_CSUM, 1) /* Guest handles pkts w/ partial csum */ \
+ _ (VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
+ 2) /* Dynamic offload configuration. */ \
+ _ (VIRTIO_NET_F_MTU, 3) /* Initial MTU advice. */ \
+ _ (VIRTIO_NET_F_MAC, 5) /* Host has given MAC address. */ \
+ _ (VIRTIO_NET_F_GSO, 6) /* Host handles pkts w/ any GSO. */ \
+ _ (VIRTIO_NET_F_GUEST_TSO4, 7) /* Guest can handle TSOv4 in. */ \
+ _ (VIRTIO_NET_F_GUEST_TSO6, 8) /* Guest can handle TSOv6 in. */ \
+ _ (VIRTIO_NET_F_GUEST_ECN, 9) /* Guest can handle TSO[6] w/ ECN in. */ \
+ _ (VIRTIO_NET_F_GUEST_UFO, 10) /* Guest can handle UFO in. */ \
+ _ (VIRTIO_NET_F_HOST_TSO4, 11) /* Host can handle TSOv4 in. */ \
+ _ (VIRTIO_NET_F_HOST_TSO6, 12) /* Host can handle TSOv6 in. */ \
+ _ (VIRTIO_NET_F_HOST_ECN, 13) /* Host can handle TSO[6] w/ ECN in. */ \
+ _ (VIRTIO_NET_F_HOST_UFO, 14) /* Host can handle UFO in. */ \
+ _ (VIRTIO_NET_F_MRG_RXBUF, 15) /* Host can merge receive buffers. */ \
+ _ (VIRTIO_NET_F_STATUS, 16) /* virtio_net_config.status available */ \
+ _ (VIRTIO_NET_F_CTRL_VQ, 17) /* Control channel available */ \
+ _ (VIRTIO_NET_F_CTRL_RX, 18) /* Control channel RX mode support */ \
+ _ (VIRTIO_NET_F_CTRL_VLAN, 19) /* Control channel VLAN filtering */ \
+ _ (VIRTIO_NET_F_CTRL_RX_EXTRA, 20) /* Extra RX mode control support */ \
+ _ (VIRTIO_NET_F_GUEST_ANNOUNCE, \
+ 21) /* Guest can announce device on the network */ \
+ _ (VIRTIO_NET_F_MQ, 22) /* Device supports Receive Flow Steering */ \
+ _ (VIRTIO_NET_F_CTRL_MAC_ADDR, 23) /* Set MAC address */ \
+ _ (VIRTIO_F_NOTIFY_ON_EMPTY, 24) \
+ _ (VHOST_F_LOG_ALL, 26) /* Log all write descriptors */ \
+ _ (VIRTIO_F_ANY_LAYOUT, \
+ 27) /* Can the device handle any descriptor layout */ \
+ _ (VIRTIO_RING_F_INDIRECT_DESC, \
+ 28) /* Support indirect buffer descriptors */ \
+ _ (VIRTIO_RING_F_EVENT_IDX, \
+ 29) /* The Guest publishes the used index for which it expects an \
+ * interrupt at the end of the avail ring. Host should ignore the \
+ * avail->flags field. */ \
+ /* The Host publishes the avail index for which it expects a kick \
+ * at the end of the used ring. Guest should ignore the used->flags field. \
+ */ \
+ _ (VHOST_USER_F_PROTOCOL_FEATURES, 30) \
+ _ (VIRTIO_F_VERSION_1, 32) /* v1.0 compliant. */ \
+ _ (VIRTIO_F_IOMMU_PLATFORM, 33) \
+ _ (VIRTIO_F_RING_PACKED, 34) \
+ _ (VIRTIO_F_IN_ORDER, 35) /* all buffers are used by the device in the */ \
+ /* same order in which they have been made available */ \
+ _ (VIRTIO_F_ORDER_PLATFORM, 36) /* memory accesses by the driver and the */ \
+ /* device are ordered in a way described by the platfor */ \
+ _ (VIRTIO_F_NOTIFICATION_DATA, \
+ 38) /* the driver passes extra data (besides */ \
+ /* identifying the virtqueue) in its device notifications. */ \
+ _ (VIRTIO_NET_F_SPEED_DUPLEX, 63) /* Device set linkspeed and duplex */
+
+typedef enum
+{
+#define _(f, n) f = n,
+ foreach_virtio_net_features
+#undef _
+} vnet_virtio_net_feature_t;
+
+#define VIRTIO_FEATURE(X) (1ULL << X)
+
+#define VRING_MAX_SIZE 32768
+
+#define VRING_DESC_F_NEXT 1
+#define VRING_DESC_F_WRITE 2
+#define VRING_DESC_F_INDIRECT 4
+
+#define VRING_DESC_F_AVAIL (1 << 7)
+#define VRING_DESC_F_USED (1 << 15)
+
+#define foreach_virtio_event_idx_flags \
+ _ (VRING_EVENT_F_ENABLE, 0) \
+ _ (VRING_EVENT_F_DISABLE, 1) \
+ _ (VRING_EVENT_F_DESC, 2)
+
+typedef enum
+{
+#define _(f, n) f = n,
+ foreach_virtio_event_idx_flags
+#undef _
+} vnet_virtio_event_idx_flags_t;
+
+#define VRING_USED_F_NO_NOTIFY 1
+#define VRING_AVAIL_F_NO_INTERRUPT 1
+
+typedef struct
+{
+ u64 addr;
+ u32 len;
+ u16 flags;
+ u16 next;
+} vnet_virtio_vring_desc_t;
+
+typedef struct
+{
+ u16 flags;
+ u16 idx;
+ u16 ring[0];
+ /* u16 used_event; */
+} vnet_virtio_vring_avail_t;
+
+typedef struct
+{
+ u32 id;
+ u32 len;
+} vnet_virtio_vring_used_elem_t;
+
+typedef struct
+{
+ u16 flags;
+ u16 idx;
+ vnet_virtio_vring_used_elem_t ring[0];
+ /* u16 avail_event; */
+} vnet_virtio_vring_used_t;
+
+typedef CLIB_PACKED (struct {
+ u64 addr; // packet data buffer address
+ u32 len; // packet data buffer size
+ u16 id; // buffer id
+ u16 flags; // flags
+}) vnet_virtio_vring_packed_desc_t;
+
+STATIC_ASSERT_SIZEOF (vnet_virtio_vring_packed_desc_t, 16);
+
+typedef CLIB_PACKED (struct {
+ u16 off_wrap;
+ u16 flags;
+}) vnet_virtio_vring_desc_event_t;
+
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
+#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
+
+#define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */
+#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
+#define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */
+#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */
+
+typedef CLIB_PACKED (struct {
+ u8 flags;
+ u8 gso_type;
+ u16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */
+ u16 gso_size; /* Bytes to append to hdr_len per frame */
+ u16 csum_start; /* Position to start checksumming from */
+ u16 csum_offset; /* Offset after that to place checksum */
+ u16 num_buffers; /* Number of merged rx buffers */
+}) vnet_virtio_net_hdr_v1_t;
+
+typedef CLIB_PACKED (struct {
+ u8 flags;
+ u8 gso_type;
+ u16 hdr_len;
+ u16 gso_size;
+ u16 csum_start;
+ u16 csum_offset;
+}) vnet_virtio_net_hdr_t;
+
+typedef CLIB_PACKED (struct {
+ vnet_virtio_net_hdr_t hdr;
+ u16 num_buffers;
+}) vnet_virtio_net_hdr_mrg_rxbuf_t;
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/vmxnet3/README.md b/src/plugins/vmxnet3/README.md
deleted file mode 100644
index 6e9fb194c94..00000000000
--- a/src/plugins/vmxnet3/README.md
+++ /dev/null
@@ -1,64 +0,0 @@
-# VMWARE vmxnet3 device driver plugin {#vmxnet3_doc}
-
-##Overview
-This plugin provides native PCI driver support for VMWare vmxnet3.
-
-##Prerequisites
- * This code is tested with vfio-pci driver installed with Ubuntu 18.04 which
-has kernel version 4.15.0-33-generic.
-
- * This driver is tested with ESXi vSwitch version 6.5/6.7 for LRO/TSO support, VMware Workstation 15 Pro (no LRO/TSO), and VMware Fusion 11 Pro (no LRO/TSO)
-
- * Driver requires MSI-X interrupt support, which is not supported by
-uio_pci_generic driver. So vfio-pci must be used. On systems without IOMMU,
-vfio driver can still be used with 4.15.0-33-generic kernel (Ubuntu 18.04) which supports no-iommu mode.
-
-##Known issues
-
-* VLAN filter
-
-## Usage
-### System setup
-
-1. load VFIO driver
-```
-sudo modprobe vfio-pci
-```
-
-2. Make sure the interface is down
-```
-sudo ifconfig <if-name> down
-```
-
-Steps 3 and 4 are optional. They can be accomplished by specifying the optional keyword "bind" when creating the vmxnet3 interface.
-
-3. (systems without IOMMU only) enable unsafe NOIOMMU mode
-```
-echo Y | sudo tee /sys/module/vfio/parameters/enable_unsafe_noiommu_mode
-```
-
-4. Bind interface to vfio-pci
-```
-sudo dpdk-devbind.py --bind vfio-pci 0b:00.0
-```
-
-### Interface Creation
-Interface can be dynamically created with following CLI, with or without the bind option. If step 3 and 4 were executed, bind can be omitted.
-```
-create interface vmxnet3 0000:0b:00.0 bind
-set int state vmxnet3-0/b/0/0 up
-```
-
-### Interface Deletion
-Interface can be deleted with following CLI:
-```
-delete interface vmxnet3 <if-name>
-```
-
-### Interface Statistics
-Interface statistics can be displayed with `show hardware-interface <if-name>`
-command.
-
-### Show Interface CLI
-Interface and ring information can be obtained with
-`show vmxnet3 [if-name] [desc]`
diff --git a/src/plugins/vmxnet3/README.rst b/src/plugins/vmxnet3/README.rst
new file mode 100644
index 00000000000..14430433c17
--- /dev/null
+++ b/src/plugins/vmxnet3/README.rst
@@ -0,0 +1,86 @@
+VMWARE vmxnet3 device driver
+============================
+
+##Overview This plugin provides native PCI driver support for VMWare
+vmxnet3.
+
+##Prerequisites \* This code is tested with vfio-pci driver installed
+with Ubuntu 18.04 which has kernel version 4.15.0-33-generic.
+
+- This driver is tested with ESXi vSwitch version 6.5/6.7 for LRO/TSO
+ support, VMware Workstation 15 Pro (no LRO/TSO), and VMware Fusion 11
+ Pro (no LRO/TSO)
+
+- Driver requires MSI-X interrupt support, which is not supported by
+ uio_pci_generic driver. So vfio-pci must be used. On systems without
+ IOMMU, vfio driver can still be used with 4.15.0-33-generic kernel
+ (Ubuntu 18.04) which supports no-iommu mode.
+
+##Known issues
+
+- VLAN filter
+
+Usage
+-----
+
+System setup
+~~~~~~~~~~~~
+
+1. load VFIO driver
+
+::
+
+ sudo modprobe vfio-pci
+
+2. Make sure the interface is down
+
+::
+
+ sudo ifconfig <if-name> down
+
+Steps 3 and 4 are optional. They can be accomplished by specifying the
+optional keyword “bind” when creating the vmxnet3 interface.
+
+3. (systems without IOMMU only) enable unsafe NOIOMMU mode
+
+::
+
+ echo Y | sudo tee /sys/module/vfio/parameters/enable_unsafe_noiommu_mode
+
+4. Bind interface to vfio-pci
+
+::
+
+ sudo dpdk-devbind.py --bind vfio-pci 0b:00.0
+
+Interface Creation
+~~~~~~~~~~~~~~~~~~
+
+Interface can be dynamically created with following CLI, with or without
+the bind option. If step 3 and 4 were executed, bind can be omitted.
+
+::
+
+ create interface vmxnet3 0000:0b:00.0 bind
+ set int state vmxnet3-0/b/0/0 up
+
+Interface Deletion
+~~~~~~~~~~~~~~~~~~
+
+Interface can be deleted with following CLI:
+
+::
+
+ delete interface vmxnet3 <if-name>
+
+Interface Statistics
+~~~~~~~~~~~~~~~~~~~~
+
+Interface statistics can be displayed with
+``show hardware-interface <if-name>`` command.
+
+Show Interface CLI
+~~~~~~~~~~~~~~~~~~
+
+Interface and ring information can be obtained with
+``show vmxnet3 [if-name] [desc]``
diff --git a/src/plugins/vmxnet3/cli.c b/src/plugins/vmxnet3/cli.c
index 71342bd535c..d682e3ec2c9 100644
--- a/src/plugins/vmxnet3/cli.c
+++ b/src/plugins/vmxnet3/cli.c
@@ -47,8 +47,10 @@ vmxnet3_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
args.enable_gso = 1;
else if (unformat (line_input, "elog"))
args.enable_elog = 1;
+ else if (unformat (line_input, "bind force"))
+ args.bind = VMXNET3_BIND_FORCE;
else if (unformat (line_input, "bind"))
- args.bind = 1;
+ args.bind = VMXNET3_BIND_DEFAULT;
else if (unformat (line_input, "rx-queue-size %u", &size))
args.rxq_size = size;
else if (unformat (line_input, "tx-queue-size %u", &size))
@@ -58,12 +60,14 @@ vmxnet3_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
else if (unformat (line_input, "num-rx-queues %u", &size))
args.rxq_num = size;
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
}
unformat_free (line_input);
-
vmxnet3_create_if (vm, &args);
if (args.error == 0)
vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
@@ -72,16 +76,15 @@ vmxnet3_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vmxnet3_create_command, static) = {
.path = "create interface vmxnet3",
- .short_help = "create interface vmxnet3 <pci-address>"
- " [rx-queue-size <size>] [tx-queue-size <size>]"
- " [num-tx-queues <number>] [num-rx-queues <number>] [bind]"
- " [gso]",
+ .short_help =
+ "create interface vmxnet3 <pci-address>"
+ " [rx-queue-size <size>] [tx-queue-size <size>]"
+ " [num-tx-queues <number>] [num-rx-queues <number>] [bind [force]]"
+ " [gso]",
.function = vmxnet3_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
vmxnet3_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -126,14 +129,12 @@ vmxnet3_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vmxnet3_delete_command, static) = {
.path = "delete interface vmxnet3",
.short_help = "delete interface vmxnet3 "
"{<interface> | sw_if_index <sw_idx>}",
.function = vmxnet3_delete_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
vmxnet3_test_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -187,14 +188,12 @@ vmxnet3_test_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vmxnet3_test_command, static) = {
.path = "test vmxnet3",
.short_help = "test vmxnet3 <interface> | sw_if_index <sw_idx> [irq] "
"[elog-on] [elog-off]",
.function = vmxnet3_test_command_fn,
};
-/* *INDENT-ON* */
static void
show_vmxnet3 (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr,
@@ -213,6 +212,15 @@ show_vmxnet3 (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr,
vmxnet3_tx_comp *tx_comp;
u16 qid;
+ vlib_cli_output (vm, "Global:");
+ for (u32 tid = 0; tid <= vlib_num_workers (); tid++)
+ {
+ vmxnet3_per_thread_data_t *ptd =
+ vec_elt_at_index (vmxm->per_thread_data, tid);
+ vlib_cli_output (vm, " Thread %u: polling queue count %u", tid,
+ ptd->polling_q_count);
+ }
+
if (!hw_if_indices)
return;
@@ -568,24 +576,25 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_vmxnet3_command, static) = {
.path = "show vmxnet3",
.short_help = "show vmxnet3 [[<interface>] ([desc] | ([rx-comp] | "
"[rx-desc-0] | [rx-desc-1] | [tx-comp] | [tx-desc]) [<slot>])]",
.function = show_vmxnet3_fn,
};
-/* *INDENT-ON* */
clib_error_t *
vmxnet3_cli_init (vlib_main_t * vm)
{
vmxnet3_main_t *vmxm = &vmxnet3_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
/* initialize binary API */
vmxnet3_plugin_api_hookup (vm);
vmxm->log_default = vlib_log_register_class ("vmxnet3", 0);
+
+ vec_validate (vmxm->per_thread_data, tm->n_vlib_mains - 1);
return 0;
}
diff --git a/src/plugins/vmxnet3/format.c b/src/plugins/vmxnet3/format.c
index d463feb3bec..43d790d31eb 100644
--- a/src/plugins/vmxnet3/format.c
+++ b/src/plugins/vmxnet3/format.c
@@ -164,7 +164,7 @@ format_vmxnet3_input_trace (u8 * s, va_list * args)
s = format (s, "vmxnet3: %v (%d) next-node %U",
hi->name, t->hw_if_index, format_vlib_next_node_name, vm,
node->index, t->next_index);
- s = format (s, "\n buffer %U", format_vnet_buffer, &t->buffer);
+ s = format (s, "\n buffer %U", format_vnet_buffer_no_chain, &t->buffer);
return s;
}
diff --git a/src/plugins/vmxnet3/input.c b/src/plugins/vmxnet3/input.c
index 3015fb116ca..25632546b6d 100644
--- a/src/plugins/vmxnet3/input.c
+++ b/src/plugins/vmxnet3/input.c
@@ -23,6 +23,7 @@
#include <vnet/ip/ip6_packet.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
#include <vnet/interface/rx_queue_funcs.h>
#include <vmxnet3/vmxnet3.h>
@@ -106,19 +107,11 @@ vmxnet3_handle_offload (vmxnet3_rx_comp * rx_comp, vlib_buffer_t * hb,
{
if (rx_comp->flags & VMXNET3_RXCF_TCP)
{
- tcp_header_t *tcp =
- (tcp_header_t *) (hb->data +
- vnet_buffer (hb)->l4_hdr_offset);
oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
- tcp->checksum = 0;
}
else if (rx_comp->flags & VMXNET3_RXCF_UDP)
{
- udp_header_t *udp =
- (udp_header_t *) (hb->data +
- vnet_buffer (hb)->l4_hdr_offset);
oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
- udp->checksum = 0;
}
}
}
@@ -384,8 +377,8 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (PREDICT_FALSE
(vnet_device_input_have_features (vd->sw_if_index)))
{
- vnet_feature_start_device_input_x1 (vd->sw_if_index,
- &next_index, hb);
+ vnet_feature_start_device_input (vd->sw_if_index, &next_index,
+ hb);
known_next = 1;
}
@@ -487,7 +480,6 @@ VLIB_NODE_FN (vmxnet3_input_node) (vlib_main_t * vm,
}
#ifndef CLIB_MARCH_VARIANT
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vmxnet3_input_node) = {
.name = "vmxnet3-input",
.sibling_of = "device-input",
@@ -500,7 +492,6 @@ VLIB_REGISTER_NODE (vmxnet3_input_node) = {
};
#endif
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vmxnet3/plugin.c b/src/plugins/vmxnet3/plugin.c
index 76f1cfc5e3e..20caf97f612 100644
--- a/src/plugins/vmxnet3/plugin.c
+++ b/src/plugins/vmxnet3/plugin.c
@@ -19,12 +19,10 @@
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "VMWare Vmxnet3 Device Driver",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vmxnet3/vmxnet3.c b/src/plugins/vmxnet3/vmxnet3.c
index ff0a7dc706b..e64e0d135d6 100644
--- a/src/plugins/vmxnet3/vmxnet3.c
+++ b/src/plugins/vmxnet3/vmxnet3.c
@@ -69,11 +69,23 @@ vmxnet3_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
vmxnet3_device_t *vd = pool_elt_at_index (vmxm->devices, hw->dev_instance);
vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid);
+ vmxnet3_per_thread_data_t *ptd;
- if (mode == VNET_HW_IF_RX_MODE_POLLING)
- rxq->int_mode = 0;
+ if (mode == rxq->mode)
+ return 0;
+ if ((mode != VNET_HW_IF_RX_MODE_POLLING) &&
+ (mode != VNET_HW_IF_RX_MODE_INTERRUPT))
+ return clib_error_return (0, "Rx mode %U not supported",
+ format_vnet_hw_if_rx_mode, mode);
+ rxq->mode = mode;
+ ptd = vec_elt_at_index (vmxm->per_thread_data, rxq->thread_index);
+ if (rxq->mode == VNET_HW_IF_RX_MODE_POLLING)
+ ptd->polling_q_count++;
else
- rxq->int_mode = 1;
+ {
+ ASSERT (ptd->polling_q_count != 0);
+ ptd->polling_q_count--;
+ }
return 0;
}
@@ -133,7 +145,6 @@ static char *vmxnet3_tx_func_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (vmxnet3_device_class,) =
{
.name = "VMXNET3 interface",
@@ -146,7 +157,6 @@ VNET_DEVICE_CLASS (vmxnet3_device_class,) =
.tx_function_n_errors = VMXNET3_TX_N_ERROR,
.tx_function_error_strings = vmxnet3_tx_func_error_strings,
};
-/* *INDENT-ON* */
static u32
vmxnet3_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags)
@@ -288,6 +298,7 @@ vmxnet3_rxq_init (vlib_main_t * vm, vmxnet3_device_t * vd, u16 qid, u16 qsz)
rxq = vec_elt_at_index (vd->rxqs, qid);
clib_memset (rxq, 0, sizeof (*rxq));
rxq->size = qsz;
+ rxq->mode = VNET_HW_IF_RX_MODE_POLLING;
for (rid = 0; rid < VMXNET3_RX_RING_SIZE; rid++)
{
rxq->rx_desc[rid] = vlib_physmem_alloc_aligned_on_numa
@@ -534,8 +545,13 @@ vmxnet3_rxq_irq_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line)
u16 qid = line;
vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid);
- if (vec_len (vd->rxqs) > qid && vd->rxqs[qid].int_mode != 0)
- vnet_hw_if_rx_queue_set_int_pending (vnm, rxq->queue_index);
+ if (vec_len (vd->rxqs) > qid && (rxq->mode != VNET_HW_IF_RX_MODE_POLLING))
+ {
+ vmxnet3_per_thread_data_t *ptd =
+ vec_elt_at_index (vmxm->per_thread_data, rxq->thread_index);
+ if (ptd->polling_q_count == 0)
+ vnet_hw_if_rx_queue_set_int_pending (vnm, rxq->queue_index);
+ }
}
static void
@@ -554,8 +570,9 @@ vmxnet3_event_irq_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
{
vd->flags |= VMXNET3_DEVICE_F_LINK_UP;
vd->link_speed = ret >> 16;
- vnet_hw_interface_set_link_speed (vnm, vd->hw_if_index,
- vd->link_speed * 1000);
+ vnet_hw_interface_set_link_speed (
+ vnm, vd->hw_if_index,
+ (vd->link_speed == UINT32_MAX) ? UINT32_MAX : vd->link_speed * 1000);
vnet_hw_interface_set_flags (vnm, vd->hw_if_index,
VNET_HW_INTERFACE_FLAG_LINK_UP);
}
@@ -599,8 +616,11 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args)
{
vnet_main_t *vnm = vnet_get_main ();
vmxnet3_main_t *vmxm = &vmxnet3_main;
+ vnet_eth_interface_registration_t eir = {};
+
vmxnet3_device_t *vd;
vlib_pci_dev_handle_t h;
+ vnet_hw_if_caps_change_t cc = {};
clib_error_t *error = 0;
u16 qid;
u32 num_intr;
@@ -653,7 +673,6 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args)
return;
}
- /* *INDENT-OFF* */
pool_foreach (vd, vmxm->devices) {
if (vd->pci_addr.as_u32 == args->addr.as_u32)
{
@@ -666,11 +685,11 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args)
return;
}
}
- /* *INDENT-ON* */
if (args->bind)
{
- error = vlib_pci_bind_to_uio (vm, &args->addr, (char *) "auto");
+ error = vlib_pci_bind_to_uio (vm, &args->addr, (char *) "auto",
+ VMXNET3_BIND_FORCE == args->bind);
if (error)
{
args->rv = VNET_API_ERROR_INVALID_INTERFACE;
@@ -784,29 +803,24 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args)
}
/* create interface */
- error = ethernet_register_interface (vnm, vmxnet3_device_class.index,
- vd->dev_instance, vd->mac_addr,
- &vd->hw_if_index, vmxnet3_flag_change);
-
- if (error)
- {
- vmxnet3_log_error (vd,
- "error encountered on ethernet register interface");
- goto error;
- }
+ eir.dev_class_index = vmxnet3_device_class.index;
+ eir.dev_instance = vd->dev_instance;
+ eir.address = vd->mac_addr;
+ eir.cb.flag_change = vmxnet3_flag_change;
+ vd->hw_if_index = vnet_eth_register_interface (vnm, &eir);
vnet_sw_interface_t *sw = vnet_get_hw_sw_interface (vnm, vd->hw_if_index);
vd->sw_if_index = sw->sw_if_index;
args->sw_if_index = sw->sw_if_index;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vd->hw_if_index);
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ cc.mask = VNET_HW_IF_CAP_INT_MODE | VNET_HW_IF_CAP_TCP_GSO |
+ VNET_HW_IF_CAP_TX_TCP_CKSUM | VNET_HW_IF_CAP_TX_UDP_CKSUM;
if (vd->gso_enable)
- {
- hw->caps |= (VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM);
- }
+ cc.val = cc.mask;
+ else
+ cc.val = VNET_HW_IF_CAP_INT_MODE;
+
+ vnet_hw_if_change_caps (vnm, vd->hw_if_index, &cc);
vnet_hw_if_set_input_node (vnm, vd->hw_if_index, vmxnet3_input_node.index);
/* Disable interrupts */
@@ -815,12 +829,20 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args)
{
vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid);
u32 qi, fi;
+ vmxnet3_per_thread_data_t *ptd;
qi = vnet_hw_if_register_rx_queue (vnm, vd->hw_if_index, qid,
VNET_HW_IF_RXQ_THREAD_ANY);
fi = vlib_pci_get_msix_file_index (vm, vd->pci_dev_handle, qid);
vnet_hw_if_set_rx_queue_file_index (vnm, qi, fi);
rxq->queue_index = qi;
+ rxq->thread_index =
+ vnet_hw_if_get_rx_queue_thread_index (vnm, rxq->queue_index);
+ if (rxq->mode == VNET_HW_IF_RX_MODE_POLLING)
+ {
+ ptd = vec_elt_at_index (vmxm->per_thread_data, rxq->thread_index);
+ ptd->polling_q_count++;
+ }
rxq->buffer_pool_index =
vnet_hw_if_get_rx_queue_numa_node (vnm, rxq->queue_index);
vmxnet3_rxq_refill_ring0 (vm, vd, rxq);
@@ -843,8 +865,9 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args)
vd->flags |= VMXNET3_DEVICE_F_INITIALIZED;
vmxnet3_enable_interrupt (vd);
- vnet_hw_interface_set_link_speed (vnm, vd->hw_if_index,
- vd->link_speed * 1000);
+ vnet_hw_interface_set_link_speed (
+ vnm, vd->hw_if_index,
+ (vd->link_speed == UINT32_MAX) ? UINT32_MAX : vd->link_speed * 1000);
if (vd->flags & VMXNET3_DEVICE_F_LINK_UP)
vnet_hw_interface_set_flags (vnm, vd->hw_if_index,
VNET_HW_INTERFACE_FLAG_LINK_UP);
@@ -880,13 +903,19 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd)
vlib_pci_device_close (vm, vd->pci_dev_handle);
- /* *INDENT-OFF* */
vec_foreach_index (i, vd->rxqs)
{
vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, i);
u16 mask = rxq->size - 1;
u16 rid;
+ vmxnet3_per_thread_data_t *ptd =
+ vec_elt_at_index (vmxm->per_thread_data, rxq->thread_index);
+ if (rxq->mode == VNET_HW_IF_RX_MODE_POLLING)
+ {
+ ASSERT (ptd->polling_q_count != 0);
+ ptd->polling_q_count--;
+ }
for (rid = 0; rid < VMXNET3_RX_RING_SIZE; rid++)
{
vmxnet3_rx_ring *ring;
@@ -900,11 +929,9 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd)
}
vlib_physmem_free (vm, rxq->rx_comp);
}
- /* *INDENT-ON* */
vec_free (vd->rxqs);
vec_free (vd->rx_stats);
- /* *INDENT-OFF* */
vec_foreach_index (i, vd->txqs)
{
vmxnet3_txq_t *txq = vec_elt_at_index (vd->txqs, i);
@@ -925,7 +952,6 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd)
vlib_physmem_free (vm, txq->tx_desc);
vlib_physmem_free (vm, txq->tx_comp);
}
- /* *INDENT-ON* */
vec_free (vd->txqs);
vec_free (vd->tx_stats);
diff --git a/src/plugins/vmxnet3/vmxnet3.h b/src/plugins/vmxnet3/vmxnet3.h
index 75107689443..89602f8ee9e 100644
--- a/src/plugins/vmxnet3/vmxnet3.h
+++ b/src/plugins/vmxnet3/vmxnet3.h
@@ -513,10 +513,17 @@ typedef struct
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 polling_q_count;
+} vmxnet3_per_thread_data_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u16 size;
- u8 int_mode;
+ u32 mode;
u8 buffer_pool_index;
u32 queue_index;
+ u32 thread_index;
vmxnet3_rx_ring rx_ring[VMXNET3_RX_RING_SIZE];
vmxnet3_rx_desc *rx_desc[VMXNET3_RX_RING_SIZE];
vmxnet3_rx_comp *rx_comp;
@@ -594,10 +601,18 @@ typedef struct
vmxnet3_device_t *devices;
u16 msg_id_base;
vlib_log_class_t log_default;
+ vmxnet3_per_thread_data_t *per_thread_data;
} vmxnet3_main_t;
extern vmxnet3_main_t vmxnet3_main;
+typedef enum
+{
+ VMXNET3_BIND_NONE = 0,
+ VMXNET3_BIND_DEFAULT = 1,
+ VMXNET3_BIND_FORCE = 2,
+} __clib_packed vmxnet3_bind_t;
+
typedef struct
{
vlib_pci_addr_t addr;
@@ -606,7 +621,7 @@ typedef struct
u16 rxq_num;
u16 txq_size;
u16 txq_num;
- u8 bind;
+ vmxnet3_bind_t bind;
u8 enable_gso;
/* return */
i32 rv;
diff --git a/src/plugins/vmxnet3/vmxnet3_api.c b/src/plugins/vmxnet3/vmxnet3_api.c
index cef0770a63b..c51c07b705d 100644
--- a/src/plugins/vmxnet3/vmxnet3_api.c
+++ b/src/plugins/vmxnet3/vmxnet3_api.c
@@ -29,6 +29,7 @@
#include <vmxnet3/vmxnet3.api_enum.h>
#include <vmxnet3/vmxnet3.api_types.h>
+#define REPLY_MSG_ID_BASE (vmxm->msg_id_base)
#include <vlibapi/api_helper_macros.h>
static void
@@ -54,12 +55,8 @@ vl_api_vmxnet3_create_t_handler (vl_api_vmxnet3_create_t * mp)
vmxnet3_create_if (vm, &args);
rv = args.rv;
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_VMXNET3_CREATE_REPLY + vmxm->msg_id_base,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_VMXNET3_CREATE_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
}
static void
@@ -87,7 +84,7 @@ vl_api_vmxnet3_delete_t_handler (vl_api_vmxnet3_delete_t * mp)
vmxnet3_delete_if (vm, vd);
reply:
- REPLY_MACRO (VL_API_VMXNET3_DELETE_REPLY + vmxm->msg_id_base);
+ REPLY_MACRO (VL_API_VMXNET3_DELETE_REPLY);
}
static void
@@ -173,16 +170,14 @@ vl_api_vmxnet3_dump_t_handler (vl_api_vmxnet3_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (vd, vmxm->devices)
{
swif = vnet_get_sw_interface (vnm, vd->sw_if_index);
if_name = format (if_name, "%U%c", format_vnet_sw_interface_name, vnm,
swif, 0);
send_vmxnet3_details (reg, vd, swif, if_name, mp->context);
- _vec_len (if_name) = 0;
+ vec_set_len (if_name, 0);
}
- /* *INDENT-ON* */
vec_free (if_name);
}
@@ -211,7 +206,6 @@ static void vl_api_sw_vmxnet3_interface_dump_t_handler
(vnet_sw_interface_is_api_valid (vnm, filter_sw_if_index) == 0))
goto bad_sw_if_index;
- /* *INDENT-OFF* */
pool_foreach (vd, vmxm->devices)
{
if ((filter_sw_if_index == ~0) ||
@@ -221,10 +215,9 @@ static void vl_api_sw_vmxnet3_interface_dump_t_handler
if_name = format (if_name, "%U%c", format_vnet_sw_interface_name, vnm,
swif, 0);
send_vmxnet3_details (reg, vd, swif, if_name, mp->context);
- _vec_len (if_name) = 0;
+ vec_set_len (if_name, 0);
}
}
- /* *INDENT-ON* */
BAD_SW_IF_INDEX_LABEL;
vec_free (if_name);
diff --git a/src/plugins/vmxnet3/vmxnet3_test.c b/src/plugins/vmxnet3/vmxnet3_test.c
index 6fa9373486c..9b73c09d03c 100644
--- a/src/plugins/vmxnet3/vmxnet3_test.c
+++ b/src/plugins/vmxnet3/vmxnet3_test.c
@@ -34,7 +34,7 @@
/* declare message IDs */
#include <vmxnet3/vmxnet3.api_enum.h>
#include <vmxnet3/vmxnet3.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
diff --git a/src/plugins/vrrp/node.c b/src/plugins/vrrp/node.c
index 7ba18c4f75c..d5594ae0e43 100644
--- a/src/plugins/vrrp/node.c
+++ b/src/plugins/vrrp/node.c
@@ -86,22 +86,16 @@ typedef enum
VRRP_INPUT_N_NEXT,
} vrrp_next_t;
-typedef struct vrrp_input_process_args
-{
- u32 vr_index;
- vrrp_header_t *pkt;
-} vrrp_input_process_args_t;
-
/* Given a VR and a pointer to the VRRP header of an incoming packet,
* compare the local src address to the peers. Return < 0 if the local
* address < the peer address, 0 if they're equal, > 0 if
* the local address > the peer address
*/
static int
-vrrp_vr_addr_cmp (vrrp_vr_t * vr, vrrp_header_t * pkt)
+vrrp_vr_addr_cmp (vrrp_vr_t *vr, ip46_address_t *peer_addr)
{
vrrp_vr_config_t *vrc = &vr->config;
- void *peer_addr, *local_addr;
+ void *peer_addr_bytes, *local_addr;
ip46_address_t addr;
int addr_size;
@@ -109,7 +103,7 @@ vrrp_vr_addr_cmp (vrrp_vr_t * vr, vrrp_header_t * pkt)
if (vrrp_vr_is_ipv6 (vr))
{
- peer_addr = &(((ip6_header_t *) pkt) - 1)->src_address;
+ peer_addr_bytes = &peer_addr->ip6;
local_addr = &addr.ip6;
addr_size = 16;
ip6_address_copy (local_addr,
@@ -117,25 +111,26 @@ vrrp_vr_addr_cmp (vrrp_vr_t * vr, vrrp_header_t * pkt)
}
else
{
- peer_addr = &(((ip4_header_t *) pkt) - 1)->src_address;
+ peer_addr_bytes = &peer_addr->ip4;
local_addr = &addr.ip4;
addr_size = 4;
fib_sas4_get (vrc->sw_if_index, NULL, local_addr);
}
- return memcmp (local_addr, peer_addr, addr_size);
+ return memcmp (local_addr, peer_addr_bytes, addr_size);
}
static void
-vrrp_input_process_master (vrrp_vr_t * vr, vrrp_header_t * pkt)
+vrrp_input_process_master (vrrp_vr_t *vr, vrrp_input_process_args_t *args)
{
/* received priority 0, another VR is shutting down. send an adv and
* remain in the master state
*/
- if (pkt->priority == 0)
+ if (args->priority == 0)
{
clib_warning ("Received shutdown message from a peer on VR %U",
format_vrrp_vr_key, vr);
+ vrrp_incr_stat_counter (VRRP_STAT_COUNTER_PRIO0_RCVD, vr->stat_index);
vrrp_adv_send (vr, 0);
vrrp_vr_timer_set (vr, VRRP_VR_TIMER_ADV);
return;
@@ -146,11 +141,11 @@ vrrp_input_process_master (vrrp_vr_t * vr, vrrp_header_t * pkt)
* - received priority == adjusted priority and peer addr > local addr
* allow the local VR to be preempted by the peer
*/
- if ((pkt->priority > vrrp_vr_priority (vr)) ||
- ((pkt->priority == vrrp_vr_priority (vr)) &&
- (vrrp_vr_addr_cmp (vr, pkt) < 0)))
+ if ((args->priority > vrrp_vr_priority (vr)) ||
+ ((args->priority == vrrp_vr_priority (vr)) &&
+ (vrrp_vr_addr_cmp (vr, &args->src_addr) < 0)))
{
- vrrp_vr_transition (vr, VRRP_VR_STATE_BACKUP, pkt);
+ vrrp_vr_transition (vr, VRRP_VR_STATE_BACKUP, args);
return;
}
@@ -163,16 +158,17 @@ vrrp_input_process_master (vrrp_vr_t * vr, vrrp_header_t * pkt)
/* RFC 5798 section 6.4.2 */
static void
-vrrp_input_process_backup (vrrp_vr_t * vr, vrrp_header_t * pkt)
+vrrp_input_process_backup (vrrp_vr_t *vr, vrrp_input_process_args_t *args)
{
vrrp_vr_config_t *vrc = &vr->config;
vrrp_vr_runtime_t *vrt = &vr->runtime;
/* master shutting down, ready for election */
- if (pkt->priority == 0)
+ if (args->priority == 0)
{
clib_warning ("Master for VR %U is shutting down", format_vrrp_vr_key,
vr);
+ vrrp_incr_stat_counter (VRRP_STAT_COUNTER_PRIO0_RCVD, vr->stat_index);
vrt->master_down_int = vrt->skew;
vrrp_vr_timer_set (vr, VRRP_VR_TIMER_MASTER_DOWN);
return;
@@ -180,10 +176,9 @@ vrrp_input_process_backup (vrrp_vr_t * vr, vrrp_header_t * pkt)
/* no preempt set or adv from a higher priority router, update timers */
if (!(vrc->flags & VRRP_VR_PREEMPT) ||
- (pkt->priority >= vrrp_vr_priority (vr)))
+ (args->priority >= vrrp_vr_priority (vr)))
{
- vrt->master_adv_int = clib_net_to_host_u16 (pkt->rsvd_and_max_adv_int);
- vrt->master_adv_int &= ((u16) 0x0fff); /* ignore rsvd bits */
+ vrt->master_adv_int = args->max_adv_int;
vrrp_vr_skew_compute (vr);
vrrp_vr_master_down_compute (vr);
@@ -208,19 +203,21 @@ vrrp_input_process (vrrp_input_process_args_t * args)
return;
}
+ vrrp_incr_stat_counter (VRRP_STAT_COUNTER_ADV_RCVD, vr->stat_index);
+
switch (vr->runtime.state)
{
case VRRP_VR_STATE_INIT:
return;
case VRRP_VR_STATE_BACKUP:
/* this is usually the only state an advertisement should be received */
- vrrp_input_process_backup (vr, args->pkt);
+ vrrp_input_process_backup (vr, args);
break;
case VRRP_VR_STATE_MASTER:
/* might be getting preempted. or have a misbehaving peer */
clib_warning ("Received advertisement for master VR %U",
format_vrrp_vr_key, vr);
- vrrp_input_process_master (vr, args->pkt);
+ vrrp_input_process_master (vr, args);
break;
default:
clib_warning ("Received advertisement for VR %U in unknown state %d",
@@ -489,7 +486,6 @@ VLIB_NODE_FN (vrrp4_arp_input_node) (vlib_main_t * vm,
return vrrp_arp_nd_input_inline (vm, node, frame, 0 /* is_ipv6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vrrp4_arp_input_node) =
{
.name = "vrrp4-arp-input",
@@ -522,7 +518,6 @@ VLIB_NODE_FN (vrrp6_nd_input_node) (vlib_main_t * vm,
return vrrp_arp_nd_input_inline (vm, node, frame, 1 /* is_ipv6 */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vrrp6_nd_input_node) =
{
.name = "vrrp6-nd-input",
@@ -586,6 +581,7 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
addr_len = 16;
payload_len0 = clib_net_to_host_u16 (ip6->payload_length);
vlib_buffer_advance (b0, sizeof (*ip6));
+ clib_memcpy_fast (&args0.src_addr.ip6, &ip6->src_address, addr_len);
}
else
{
@@ -596,6 +592,7 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
addr_len = 4;
payload_len0 = clib_net_to_host_u16 (ip4->length) - sizeof(*ip4);
vlib_buffer_advance (b0, sizeof (*ip4));
+ clib_memcpy_fast (&args0.src_addr.ip4, &ip4->src_address, addr_len);
}
next0 = VRRP_INPUT_NEXT_DROP;
@@ -612,6 +609,7 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (*ttl0 != 255)
{
error0 = VRRP_ERROR_BAD_TTL;
+ vrrp_incr_err_counter (VRRP_ERR_COUNTER_TTL);
goto trace;
}
@@ -619,6 +617,7 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if ((vrrp0->vrrp_version_and_type >> 4) != 3)
{
error0 = VRRP_ERROR_NOT_VERSION_3;
+ vrrp_incr_err_counter (VRRP_ERR_COUNTER_VERSION);
goto trace;
}
@@ -627,6 +626,7 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
((u32) vrrp0->n_addrs) * addr_len)
{
error0 = VRRP_ERROR_INCOMPLETE_PKT;
+ vrrp_incr_err_counter (VRRP_ERR_COUNTER_PKT_LEN);
goto trace;
}
@@ -634,6 +634,7 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (rx_csum0 != vrrp_adv_csum (ip0, vrrp0, is_ipv6, payload_len0))
{
error0 = VRRP_ERROR_BAD_CHECKSUM;
+ vrrp_incr_err_counter (VRRP_ERR_COUNTER_CHKSUM);
goto trace;
}
@@ -643,6 +644,7 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vrrp0->vr_id, is_ipv6)))
{
error0 = VRRP_ERROR_UNKNOWN_VR;
+ vrrp_incr_err_counter (VRRP_ERR_COUNTER_VRID);
goto trace;
}
@@ -651,12 +653,14 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (vrrp0->n_addrs != vec_len (vr0->config.vr_addrs))
{
error0 = VRRP_ERROR_ADDR_MISMATCH;
+ vrrp_incr_err_counter (VRRP_ERR_COUNTER_ADDR_LIST);
goto trace;
}
/* signal main thread to process contents of packet */
args0.vr_index = vr0 - vmp->vrs;
- args0.pkt = vrrp0;
+ args0.priority = vrrp0->priority;
+ args0.max_adv_int = vrrp_adv_int_from_packet (vrrp0);
vl_api_rpc_call_main_thread (vrrp_input_process, (u8 *) &args0,
sizeof (args0));
@@ -693,7 +697,6 @@ VLIB_NODE_FN (vrrp4_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return vrrp_input_inline (vm, node, frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vrrp4_input_node) =
{
.name = "vrrp4-input",
@@ -1098,7 +1101,6 @@ vrrp_input_init (vlib_main_t *vm)
VLIB_INIT_FUNCTION (vrrp_input_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vrrp/setup.pg b/src/plugins/vrrp/setup.pg
index 9275fcc46c6..6328448d79e 100644
--- a/src/plugins/vrrp/setup.pg
+++ b/src/plugins/vrrp/setup.pg
@@ -7,14 +7,14 @@ set int state loop0 up
comment { Packet generator script. Src MAC 00:de:ad:be:ef:01 }
comment { Dst mac 01:ba:db:ab:be:01 ethtype 0800 }
-packet-generator new {
- name simple
- limit 1
- size 128-128
- interface loop0
- node vrrp
- data {
- hex 0x00deadbeef0001badbabbe010800
- incrementing 30
- }
+packet-generator new { \
+ name simple \
+ limit 1 \
+ size 128-128 \
+ interface loop0 \
+ node vrrp \
+ data { \
+ hex 0x00deadbeef0001badbabbe010800 \
+ incrementing 30 \
+ } \
}
diff --git a/src/plugins/vrrp/vrrp.api b/src/plugins/vrrp/vrrp.api
index a34b06ffc57..03193e99a2c 100644
--- a/src/plugins/vrrp/vrrp.api
+++ b/src/plugins/vrrp/vrrp.api
@@ -5,7 +5,7 @@
*
*/
-option version = "1.0.1";
+option version = "1.1.1";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
@@ -60,6 +60,55 @@ autoreply define vrrp_vr_add_del {
vl_api_address_t addrs[n_addrs];
};
+/** @brief Replace an existing VRRP virtual router in-place or create a new one
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vrrp_index - an existing VRRP entry to replace, or 0xffffffff to crate a new one
+ @param sw_if_index - interface backed up by this vr
+ @param vr_id - the VR ID advertised by this vr
+ @param priority - the priority advertised for this vr
+ @param interval - interval between advertisements in centiseconds
+ @param flags - bit flags for booleans - preempt, accept, unicast, ipv6
+ @param n_addrs - number of addresses being backed up by this vr
+ @param addrs - the addresses backed up by this vr
+*/
+define vrrp_vr_update {
+ u32 client_index;
+ u32 context;
+ u32 vrrp_index;
+ vl_api_interface_index_t sw_if_index;
+ u8 vr_id;
+ u8 priority;
+ u16 interval;
+ vl_api_vrrp_vr_flags_t flags;
+ u8 n_addrs;
+ vl_api_address_t addrs[n_addrs];
+};
+
+/**
+ * @brief Reply to a VRRP add/replace
+ * @param context - returned sender context, to match reply w/ request
+ * @param vrrp_index - index of the updated or newly created VRRP instance
+ * @param retval 0 - no error
+ */
+define vrrp_vr_update_reply {
+ u32 context;
+ i32 retval;
+ u32 vrrp_index;
+};
+
+/**
+ * @brief Delete an existing VRRP instance
+ * @param client_index - opaque cookie to identify the sender
+ * @param context - returned sender context, to match reply w/ request
+ * @param vrrp_index - index of the VRRP instance to delete
+ */
+autoreply define vrrp_vr_del {
+ u32 client_index;
+ u32 context;
+ u32 vrrp_index;
+};
+
/** \brief VRRP: dump virtual router data
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/plugins/vrrp/vrrp.c b/src/plugins/vrrp/vrrp.c
index 5ee011cceb1..fb0659605c7 100644
--- a/src/plugins/vrrp/vrrp.c
+++ b/src/plugins/vrrp/vrrp.c
@@ -32,6 +32,97 @@ static const mac_address_t ipv6_vmac = {
.bytes = {0x00, 0x00, 0x5e, 0x00, 0x02, 0x00}
};
+vlib_simple_counter_main_t vrrp_errs[] = {
+ /* Total number of VRRP packets received with invalid checksum */
+ {
+ .name = "CHKSUM_ERRS",
+ .stat_segment_name = "/net/vrrp/chksum-errs",
+ },
+ /* Total number of VRRP packets received with unknown or unsupported version
+ */
+ {
+ .name = "VERSION_ERRS",
+ .stat_segment_name = "/net/vrrp/version-errs",
+ },
+ /* Total number of VRRP packets received with invalid VRID */
+ {
+ .name = "VRID_ERRS",
+ .stat_segment_name = "/net/vrrp/vrid-errs",
+ },
+ /* Total number of VRRP packets received with TTL/Hop limit != 255 */
+ {
+ .name = "TTL_ERRS",
+ .stat_segment_name = "/net/vrrp/ttl-errs",
+ },
+ /* Number of packets received with an address list not matching the locally
+ configured one */
+ {
+ .name = "ADDR_LIST_ERRS",
+ .stat_segment_name = "/net/vrrp/addr-list-errs",
+ },
+ /* Number of packets received with a length less than the VRRP header */
+ {
+ .name = "PACKET_LEN_ERRS",
+ .stat_segment_name = "/net/vrrp/packet-len-errs",
+ },
+};
+
+void
+vrrp_incr_err_counter (vrrp_err_counter_t err_type)
+{
+ if (err_type >= VRRP_ERR_COUNTER_MAX)
+ {
+ clib_warning ("Attempt to increse error counter of unknown type %u",
+ err_type);
+ return;
+ }
+ vlib_increment_simple_counter (&vrrp_errs[err_type],
+ vlib_get_main ()->thread_index, 0, 1);
+}
+
+// per-VRRP statistics
+
+/* Number of times a VRRP instance has transitioned to master */
+vlib_simple_counter_main_t vrrp_stats[] = {
+ {
+ .name = "MASTER_TRANS",
+ .stat_segment_name = "/net/vrrp/master-trans",
+ },
+ /* Number of VRRP advertisements sent by a VRRP instance */
+ {
+ .name = "ADV_SENT",
+ .stat_segment_name = "/net/vrrp/adv-sent",
+ },
+ /* Number of VRRP advertisements received by a VRRP instance */
+ {
+ .name = "ADV_RCVD",
+ .stat_segment_name = "/net/vrrp/adv-rcvd",
+ },
+ /* Number of VRRP priority-0 packets sent by a VRRP instance */
+ {
+ .name = "PRIO0_SENT",
+ .stat_segment_name = "/net/vrrp/prio0-sent",
+ },
+ /* Number of VRRP priority-0 packets received by a VRRP instance */
+ {
+ .name = "PRIO0_RCVD",
+ .stat_segment_name = "/net/vrrp/prio0-rcvd",
+ },
+};
+
+void
+vrrp_incr_stat_counter (vrrp_stat_counter_t stat_type, u32 stat_index)
+{
+ if (stat_type >= VRRP_STAT_COUNTER_MAX)
+ {
+ clib_warning ("Attempt to increse stat counter of unknown type %u",
+ stat_type);
+ return;
+ }
+ vlib_increment_simple_counter (
+ &vrrp_stats[stat_type], vlib_get_main ()->thread_index, stat_index, 1);
+}
+
typedef struct
{
vrrp_vr_key_t key;
@@ -227,9 +318,6 @@ vrrp_vr_transition_addrs (vrrp_vr_t * vr, vrrp_vr_state_t new_state)
if (vrrp_vr_is_owner (vr))
return;
- if (vrrp_vr_is_unicast (vr))
- return;
-
/* only need to do something if entering or leaving master state */
if ((vr->runtime.state != VRRP_VR_STATE_MASTER) &&
(new_state != VRRP_VR_STATE_MASTER))
@@ -293,6 +381,7 @@ vrrp_vr_transition (vrrp_vr_t * vr, vrrp_vr_state_t new_state, void *data)
if (new_state == VRRP_VR_STATE_MASTER)
{
+ vrrp_incr_stat_counter (VRRP_STAT_COUNTER_MASTER_TRANS, vr->stat_index);
/* RFC 5798 sec 6.4.1 (105) - startup event for VR with priority 255
* sec 6.4.2 (365) - master down timer fires on backup VR
*/
@@ -313,9 +402,10 @@ vrrp_vr_transition (vrrp_vr_t * vr, vrrp_vr_state_t new_state, void *data)
if (vr->runtime.state == VRRP_VR_STATE_MASTER)
{
- vrrp_header_t *pkt = data;
- vr->runtime.master_adv_int = vrrp_adv_int_from_packet (pkt);
+ vrrp_input_process_args_t *args = data;
+ if (args)
+ vr->runtime.master_adv_int = args->max_adv_int;
}
else /* INIT, INTF_DOWN */
vr->runtime.master_adv_int = vr->config.adv_interval;
@@ -384,10 +474,9 @@ static int
vrrp_intf_enable_disable_mcast (u8 enable, u32 sw_if_index, u8 is_ipv6)
{
vrrp_main_t *vrm = &vrrp_main;
- vrrp_vr_t *vr;
vrrp_intf_t *intf;
- u32 fib_index;
- u32 n_vrs = 0;
+ u32 fib_index, i;
+ u32 n_vrs_in_fib = 0;
const mfib_prefix_t *vrrp_prefix;
fib_protocol_t proto;
vnet_link_t link_type;
@@ -422,30 +511,29 @@ vrrp_intf_enable_disable_mcast (u8 enable, u32 sw_if_index, u8 is_ipv6)
via_itf.frp_proto = fib_proto_to_dpo (proto);
fib_index = mfib_table_get_index_for_sw_if_index (proto, sw_if_index);
- /* *INDENT-OFF* */
- pool_foreach (vr, vrm->vrs)
- {
- if (vrrp_vr_is_ipv6 (vr) == is_ipv6)
- n_vrs++;
- }
- /* *INDENT-ON* */
+ vec_foreach_index (i, vrm->vrrp_intfs)
+ {
+ if (mfib_table_get_index_for_sw_if_index (proto, i) != fib_index)
+ continue;
+
+ n_vrs_in_fib += vrrp_intf_num_vrs (i, is_ipv6);
+ }
if (enable)
{
- /* If this is the first VR configured, add the local mcast routes */
- if (n_vrs == 1)
- mfib_table_entry_path_update (fib_index, vrrp_prefix, MFIB_SOURCE_API,
- &for_us);
+ /* ensure that the local mcast route exists */
+ mfib_table_entry_path_update (fib_index, vrrp_prefix, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &for_us);
mfib_table_entry_path_update (fib_index, vrrp_prefix, MFIB_SOURCE_API,
- &via_itf);
+ MFIB_ENTRY_FLAG_NONE, &via_itf);
intf->mcast_adj_index[! !is_ipv6] =
adj_mcast_add_or_lock (proto, link_type, sw_if_index);
}
else
{
/* Remove mcast local routes if this is the last VR being deleted */
- if (n_vrs == 0)
+ if (n_vrs_in_fib == 0)
mfib_table_entry_path_remove (fib_index, vrrp_prefix, MFIB_SOURCE_API,
&for_us);
@@ -509,7 +597,7 @@ vrrp_vr_valid_addrs_owner (vrrp_vr_config_t * vr_conf)
}
static int
-vrrp_vr_valid_addrs_unused (vrrp_vr_config_t * vr_conf)
+vrrp_vr_valid_addrs_unused (vrrp_vr_config_t *vr_conf, index_t vrrp_index)
{
ip46_address_t *vr_addr;
u8 is_ipv6 = (vr_conf->flags & VRRP_VR_IPV6) != 0;
@@ -521,7 +609,7 @@ vrrp_vr_valid_addrs_unused (vrrp_vr_config_t * vr_conf)
addr = (is_ipv6) ? (void *) &vr_addr->ip6 : (void *) &vr_addr->ip4;
vr_index = vrrp_vr_lookup_address (vr_conf->sw_if_index, is_ipv6, addr);
- if (vr_index != ~0)
+ if (vr_index != ~0 && vrrp_index != vr_index)
return VNET_API_ERROR_ADDRESS_IN_USE;
}
@@ -529,7 +617,7 @@ vrrp_vr_valid_addrs_unused (vrrp_vr_config_t * vr_conf)
}
static int
-vrrp_vr_valid_addrs (vrrp_vr_config_t * vr_conf)
+vrrp_vr_valid_addrs (vrrp_vr_config_t *vr_conf, index_t vrrp_index)
{
int ret = 0;
@@ -539,7 +627,7 @@ vrrp_vr_valid_addrs (vrrp_vr_config_t * vr_conf)
return ret;
/* make sure no other VR has already configured any of the VR addresses */
- ret = vrrp_vr_valid_addrs_unused (vr_conf);
+ ret = vrrp_vr_valid_addrs_unused (vr_conf, vrrp_index);
return ret;
}
@@ -574,7 +662,7 @@ vrrp_vr_addr_add_del (vrrp_vr_t * vr, u8 is_add, ip46_address_t * vr_addr)
{
if (!ip46_address_cmp (addr, vr_addr))
{
- vec_del1 (vr->config.vr_addrs, vr->config.vr_addrs - addr);
+ vec_del1 (vr->config.vr_addrs, addr - vr->config.vr_addrs);
break;
}
}
@@ -596,7 +684,7 @@ vrrp_vr_addr_add_del (vrrp_vr_t * vr, u8 is_add, ip46_address_t * vr_addr)
{
if (!ip46_address_cmp (addr, vr_addr))
{
- vec_del1 (vr->config.vr_addrs, vr->config.vr_addrs - addr);
+ vec_del1 (vr->config.vr_addrs, addr - vr->config.vr_addrs);
break;
}
}
@@ -617,9 +705,153 @@ vrrp_vr_addrs_add_del (vrrp_vr_t * vr, u8 is_add, ip46_address_t * vr_addrs)
}
}
+int
+vrrp_vr_update (index_t *vrrp_index, vrrp_vr_config_t *vr_conf)
+{
+ index_t index = *vrrp_index;
+ vrrp_main_t *vrm = &vrrp_main;
+ vrrp_vr_t *vr = NULL;
+ vrrp_vr_key_t key = { 0 };
+ uint8_t must_restart = 0;
+ int ret = 0;
+
+ /* no valid index -> create and return allocated index */
+ if (index == INDEX_INVALID)
+ {
+ return vrrp_vr_add_del (1, vr_conf, vrrp_index);
+ }
+ /* update: lookup vrrp instance */
+ if (pool_is_free_index (vrm->vrs, index))
+ return (VNET_API_ERROR_NO_SUCH_ENTRY);
+
+ /* fetch existing VR */
+ vr = pool_elt_at_index (vrm->vrs, index);
+
+ /* populate key */
+ key.vr_id = vr->config.vr_id;
+ key.is_ipv6 = !!(vr->config.flags & VRRP_VR_IPV6);
+ ;
+ key.sw_if_index = vr->config.sw_if_index;
+
+ /* Do not allow changes to the keys of the VRRP instance */
+ if (vr_conf->vr_id != key.vr_id || vr_conf->sw_if_index != key.sw_if_index ||
+ !!(vr_conf->flags & VRRP_VR_IPV6) != key.is_ipv6)
+ {
+ clib_warning ("Attempt to change VR ID, IP version or interface index "
+ "for VRRP instance with index %u",
+ index);
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ /* were IPvX addresses included ? */
+ if (!vec_len (vr_conf->vr_addrs))
+ {
+ clib_warning ("Conf of VR %u for IPv%d on sw_if_index %u "
+ " does not contain IP addresses",
+ key.vr_id, key.is_ipv6 ? 6 : 4, key.sw_if_index);
+ return VNET_API_ERROR_INVALID_SRC_ADDRESS;
+ }
+
+ /* Make sure the addresses are ok to use */
+ if ((ret = vrrp_vr_valid_addrs (vr_conf, index)) < 0)
+ return ret;
+
+ /* stop it if needed */
+ must_restart = (vr->runtime.state != VRRP_VR_STATE_INIT);
+ if (must_restart)
+ vrrp_vr_start_stop (0, &key);
+
+ /* overwrite new config */
+ vr->config.priority = vr_conf->priority;
+ vr->config.adv_interval = vr_conf->adv_interval;
+ vr->config.flags = vr_conf->flags;
+
+ /* check if any address has changed */
+ ip46_address_t *vr_addr, *conf_addr;
+ uint8_t found;
+ vec_foreach (vr_addr, vr->config.vr_addrs)
+ {
+ found = 0;
+ vec_foreach (conf_addr, vr_conf->vr_addrs)
+ {
+ if (ip46_address_is_equal (vr_addr, conf_addr))
+ {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ {
+ vrrp_vr_addr_add_del (vr, 0, vr_addr);
+ }
+ }
+ vec_foreach (conf_addr, vr_conf->vr_addrs)
+ {
+ found = 0;
+ vec_foreach (vr_addr, vr->config.vr_addrs)
+ {
+ if (ip46_address_is_equal (vr_addr, conf_addr))
+ {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ {
+ vrrp_vr_addr_add_del (vr, 1, conf_addr);
+ }
+ }
+
+ /* restart it if needed */
+ if (must_restart)
+ vrrp_vr_start_stop (1, &key);
+
+ return 0;
+}
+
+static void
+vrrp_vr_del_common (vrrp_vr_t *vr, vrrp_vr_key_t *key)
+{
+ vrrp_main_t *vrm = &vrrp_main;
+ ip46_address_t *vr_addrs_del_copy;
+
+ vrrp_vr_timer_cancel (vr);
+ vrrp_vr_tracking_ifs_add_del (vr, vr->tracking.interfaces, 0);
+ vr_addrs_del_copy = vec_dup (vr->config.vr_addrs);
+ vrrp_vr_addrs_add_del (vr, 0, vr_addrs_del_copy);
+ mhash_unset (&vrm->vr_index_by_key, key, 0);
+ vec_free (vr_addrs_del_copy);
+ vec_free (vr->config.peer_addrs);
+ vec_free (vr->config.vr_addrs);
+ vec_free (vr->tracking.interfaces);
+ pool_put (vrm->vrs, vr);
+}
+
+int
+vrrp_vr_del (index_t vrrp_index)
+{
+ vrrp_main_t *vrm = &vrrp_main;
+ vrrp_vr_key_t key;
+ vrrp_vr_t *vr = 0;
+
+ if (pool_is_free_index (vrm->vrs, vrrp_index))
+ {
+ return (VNET_API_ERROR_NO_SUCH_ENTRY);
+ }
+ else
+ {
+ vr = pool_elt_at_index (vrm->vrs, vrrp_index);
+ key.sw_if_index = vr->config.sw_if_index;
+ key.vr_id = vr->config.vr_id;
+ key.is_ipv6 = vrrp_vr_is_ipv6 (vr);
+ vrrp_vr_del_common (vr, &key);
+ return 0;
+ }
+}
+
/* Action function shared between message handler and debug CLI */
int
-vrrp_vr_add_del (u8 is_add, vrrp_vr_config_t * vr_conf)
+vrrp_vr_add_del (u8 is_add, vrrp_vr_config_t *vr_conf, index_t *ret_index)
{
vrrp_main_t *vrm = &vrrp_main;
vnet_main_t *vnm = vnet_get_main ();
@@ -661,7 +893,7 @@ vrrp_vr_add_del (u8 is_add, vrrp_vr_config_t * vr_conf)
}
/* Make sure the addresses are ok to use */
- if ((ret = vrrp_vr_valid_addrs (vr_conf)) < 0)
+ if ((ret = vrrp_vr_valid_addrs (vr_conf, INDEX_INVALID)) < 0)
return ret;
pool_get_zero (vrm->vrs, vr);
@@ -679,6 +911,20 @@ vrrp_vr_add_del (u8 is_add, vrrp_vr_config_t * vr_conf)
vr->runtime.mac = (key.is_ipv6) ? ipv6_vmac : ipv4_vmac;
vr->runtime.mac.bytes[5] = vr_conf->vr_id;
+ /* recall pool index for stats */
+ vr->stat_index = vr_index;
+ /* and return it if we were asked to */
+ if (ret_index != NULL)
+ {
+ *ret_index = vr_index;
+ }
+ /* allocate & reset stats */
+ for (int i = 0; i < VRRP_STAT_COUNTER_MAX; i++)
+ {
+ vlib_validate_simple_counter (&vrrp_stats[i], vr_index);
+ vlib_zero_simple_counter (&vrrp_stats[i], vr_index);
+ }
+
mhash_set (&vrm->vr_index_by_key, &key, vr_index, 0);
}
else
@@ -692,13 +938,7 @@ vrrp_vr_add_del (u8 is_add, vrrp_vr_config_t * vr_conf)
vr_index = p[0];
vr = pool_elt_at_index (vrm->vrs, vr_index);
-
- vrrp_vr_tracking_ifs_add_del (vr, vr->tracking.interfaces, is_add);
- vrrp_vr_addrs_add_del (vr, is_add, vr->config.vr_addrs);
- mhash_unset (&vrm->vr_index_by_key, &key, 0);
- vec_free (vr->config.vr_addrs);
- vec_free (vr->tracking.interfaces);
- pool_put (vrm->vrs, vr);
+ vrrp_vr_del_common (vr, &key);
}
vrrp_intf_vr_add_del (is_add, vr_conf->sw_if_index, vr_index, key.is_ipv6);
@@ -1266,19 +1506,24 @@ vrrp_init (vlib_main_t * vm)
vrrp_ip6_delegate_id = ip6_link_delegate_register (&vrrp_ip6_delegate_vft);
+ /* allocate & reset error counters */
+ for (int i = 0; i < VRRP_ERR_COUNTER_MAX; i++)
+ {
+ vlib_validate_simple_counter (&vrrp_errs[i], 0);
+ vlib_zero_simple_counter (&vrrp_errs[i], 0);
+ }
+
return error;
}
VLIB_INIT_FUNCTION (vrrp_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "VRRP v3 (RFC 5798)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vrrp/vrrp.h b/src/plugins/vrrp/vrrp.h
index c9325921959..acab7440ead 100644
--- a/src/plugins/vrrp/vrrp.h
+++ b/src/plugins/vrrp/vrrp.h
@@ -33,7 +33,6 @@ typedef struct vrrp_vr_key
u8 is_ipv6;
} vrrp_vr_key_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct vrrp4_arp_key {
union {
@@ -44,15 +43,12 @@ typedef CLIB_PACKED
u64 as_u64;
};
}) vrrp4_arp_key_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct vrrp6_nd_key {
u32 sw_if_index;
ip6_address_t addr;
}) vrrp6_nd_key_t;
-/* *INDENT-ON* */
typedef struct vrrp_vr_tracking_if
{
@@ -108,6 +104,7 @@ typedef struct vrrp_vr
vrrp_vr_config_t config;
vrrp_vr_runtime_t runtime;
vrrp_vr_tracking_t tracking;
+ u32 stat_index;
} vrrp_vr_t;
/* Timers */
@@ -185,9 +182,46 @@ extern vlib_node_registration_t vrrp_periodic_node;
#define VRRP_EVENT_VR_STOP 2
#define VRRP_EVENT_PERIODIC_ENABLE_DISABLE 3
+/* global error counter types */
+#define foreach_vrrp_err_counter \
+ _ (CHKSUM, 0) \
+ _ (VERSION, 1) \
+ _ (VRID, 2) \
+ _ (TTL, 3) \
+ _ (ADDR_LIST, 4) \
+ _ (PKT_LEN, 5)
+
+typedef enum vrrp_err_counter_
+{
+#define _(sym, val) VRRP_ERR_COUNTER_##sym = val,
+ foreach_vrrp_err_counter
+#undef _
+} vrrp_err_counter_t;
+
+#define VRRP_ERR_COUNTER_MAX 6
+
+/* per-instance stats */
+#define foreach_vrrp_stat_counter \
+ _ (MASTER_TRANS, 0) \
+ _ (ADV_SENT, 1) \
+ _ (ADV_RCVD, 2) \
+ _ (PRIO0_SENT, 3) \
+ _ (PRIO0_RCVD, 4)
+
+typedef enum vrrp_stat_counter_
+{
+#define _(sym, val) VRRP_STAT_COUNTER_##sym = val,
+ foreach_vrrp_stat_counter
+#undef _
+} vrrp_stat_counter_t;
+
+#define VRRP_STAT_COUNTER_MAX 5
+
clib_error_t *vrrp_plugin_api_hookup (vlib_main_t * vm);
-int vrrp_vr_add_del (u8 is_add, vrrp_vr_config_t * conf);
+int vrrp_vr_add_del (u8 is_add, vrrp_vr_config_t *conf, index_t *ret_index);
+int vrrp_vr_update (index_t *vrrp_index, vrrp_vr_config_t *vr_conf);
+int vrrp_vr_del (index_t vrrp_index);
int vrrp_vr_start_stop (u8 is_start, vrrp_vr_key_t * vr_key);
extern u8 *format_vrrp_vr (u8 * s, va_list * args);
extern u8 *format_vrrp_vr_key (u8 * s, va_list * args);
@@ -209,6 +243,9 @@ int vrrp_vr_tracking_ifs_add_del (vrrp_vr_t * vr,
u8 is_add);
void vrrp_vr_event (vrrp_vr_t * vr, vrrp_vr_state_t new_state);
+// stats
+void vrrp_incr_err_counter (vrrp_err_counter_t err_type);
+void vrrp_incr_stat_counter (vrrp_stat_counter_t stat_type, u32 stat_index);
always_inline void
vrrp_vr_skew_compute (vrrp_vr_t * vr)
diff --git a/src/plugins/vrrp/vrrp_all_api_h.h b/src/plugins/vrrp/vrrp_all_api_h.h
deleted file mode 100644
index 4f45909de70..00000000000
--- a/src/plugins/vrrp/vrrp_all_api_h.h
+++ /dev/null
@@ -1,11 +0,0 @@
-
-/*
- * vrrp_all_api_h.h - vrrp plug-in api #include file
- *
- * Copyright 2019-2020 Rubicon Communications, LLC (Netgate)
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- */
-/* Include the generated file, see BUILT_SOURCES in Makefile.am */
-#include <vrrp/vrrp.api.h>
diff --git a/src/plugins/vrrp/vrrp_api.c b/src/plugins/vrrp/vrrp_api.c
index 9a206fa6cdc..e31e0a74c14 100644
--- a/src/plugins/vrrp/vrrp_api.c
+++ b/src/plugins/vrrp/vrrp_api.c
@@ -25,6 +25,109 @@
/* API message handlers */
static void
+vl_api_vrrp_vr_update_t_handler (vl_api_vrrp_vr_update_t *mp)
+{
+ vl_api_vrrp_vr_update_reply_t *rmp;
+ vrrp_vr_config_t vr_conf;
+ u32 api_flags;
+ u32 vrrp_index = INDEX_INVALID;
+ ip46_address_t *addrs = 0;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ api_flags = htonl (mp->flags);
+
+ clib_memset (&vr_conf, 0, sizeof (vr_conf));
+
+ vr_conf.sw_if_index = ntohl (mp->sw_if_index);
+ vr_conf.vr_id = mp->vr_id;
+ vr_conf.priority = mp->priority;
+ vr_conf.adv_interval = ntohs (mp->interval);
+
+ if (api_flags & VRRP_API_VR_PREEMPT)
+ vr_conf.flags |= VRRP_VR_PREEMPT;
+
+ if (api_flags & VRRP_API_VR_ACCEPT)
+ vr_conf.flags |= VRRP_VR_ACCEPT;
+
+ if (api_flags & VRRP_API_VR_UNICAST)
+ vr_conf.flags |= VRRP_VR_UNICAST;
+
+ if (api_flags & VRRP_API_VR_IPV6)
+ vr_conf.flags |= VRRP_VR_IPV6;
+
+ int i;
+ for (i = 0; i < mp->n_addrs; i++)
+ {
+ ip46_address_t *addr;
+ void *src, *dst;
+ int len;
+
+ vec_add2 (addrs, addr, 1);
+
+ if (ntohl (mp->addrs[i].af) == ADDRESS_IP4)
+ {
+ src = &mp->addrs[i].un.ip4;
+ dst = &addr->ip4;
+ len = sizeof (addr->ip4);
+ }
+ else
+ {
+ src = &mp->addrs[i].un.ip6;
+ dst = &addr->ip6;
+ len = sizeof (addr->ip6);
+ }
+
+ clib_memcpy (dst, src, len);
+ }
+
+ vr_conf.vr_addrs = addrs;
+
+ if (vr_conf.priority == 0)
+ {
+ clib_warning ("VR priority must be > 0");
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ }
+ else if (vr_conf.adv_interval == 0)
+ {
+ clib_warning ("VR advertisement interval must be > 0");
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ }
+ else if (vr_conf.vr_id == 0)
+ {
+ clib_warning ("VR ID must be > 0");
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ }
+ else
+ {
+ vrrp_index = ntohl (mp->vrrp_index);
+ rv = vrrp_vr_update (&vrrp_index, &vr_conf);
+ }
+
+ vec_free (addrs);
+
+ BAD_SW_IF_INDEX_LABEL;
+ // clang-format off
+ REPLY_MACRO2 (VL_API_VRRP_VR_UPDATE_REPLY,
+ ({
+ rmp->vrrp_index = htonl (vrrp_index);
+ }));
+ // clang-format on
+}
+
+static void
+vl_api_vrrp_vr_del_t_handler (vl_api_vrrp_vr_del_t *mp)
+{
+ vl_api_vrrp_vr_del_reply_t *rmp;
+ int rv;
+
+ rv = vrrp_vr_del (ntohl (mp->vrrp_index));
+
+ REPLY_MACRO (VL_API_VRRP_VR_DEL_REPLY);
+}
+
+static void
vl_api_vrrp_vr_add_del_t_handler (vl_api_vrrp_vr_add_del_t * mp)
{
vl_api_vrrp_vr_add_del_reply_t *rmp;
@@ -103,7 +206,7 @@ vl_api_vrrp_vr_add_del_t_handler (vl_api_vrrp_vr_add_del_t * mp)
rv = VNET_API_ERROR_INVALID_VALUE;
}
else
- rv = vrrp_vr_add_del (mp->is_add, &vr_conf);
+ rv = vrrp_vr_add_del (mp->is_add, &vr_conf, NULL);
vec_free (addrs);
@@ -215,7 +318,6 @@ vl_api_vrrp_vr_dump_t_handler (vl_api_vrrp_vr_dump_t * mp)
sw_if_index = htonl (mp->sw_if_index);
- /* *INDENT-OFF* */
pool_foreach (vr, vmp->vrs) {
if (sw_if_index && (sw_if_index != ~0) &&
@@ -224,7 +326,6 @@ vl_api_vrrp_vr_dump_t_handler (vl_api_vrrp_vr_dump_t * mp)
send_vrrp_vr_details (vr, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -363,7 +464,6 @@ vl_api_vrrp_vr_peer_dump_t_handler (vl_api_vrrp_vr_peer_dump_t * mp)
return;
}
- /* *INDENT-OFF* */
pool_foreach (vr, vmp->vrs) {
if (!vec_len (vr->config.peer_addrs))
@@ -372,7 +472,6 @@ vl_api_vrrp_vr_peer_dump_t_handler (vl_api_vrrp_vr_peer_dump_t * mp)
send_vrrp_vr_details (vr, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -467,7 +566,6 @@ vl_api_vrrp_vr_track_if_dump_t_handler (vl_api_vrrp_vr_track_if_dump_t * mp)
return;
}
- /* *INDENT-OFF* */
pool_foreach (vr, vmp->vrs) {
if (!vec_len (vr->tracking.interfaces))
@@ -476,7 +574,6 @@ vl_api_vrrp_vr_track_if_dump_t_handler (vl_api_vrrp_vr_track_if_dump_t * mp)
send_vrrp_vr_track_if_details (vr, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -510,14 +607,12 @@ vrrp_vr_event (vrrp_vr_t * vr, vrrp_vr_state_t new_state)
vpe_client_registration_t *reg;
vl_api_registration_t *vl_reg;
- /* *INDENT-OFF* */
pool_foreach (reg, vam->vrrp_vr_events_registrations)
{
vl_reg = vl_api_client_index_to_registration (reg->client_index);
if (vl_reg)
send_vrrp_vr_event (reg, vl_reg, vr, new_state);
}
- /* *INDENT-ON* */
}
pub_sub_handler (vrrp_vr_events, VRRP_VR_EVENTS);
@@ -535,7 +630,6 @@ vrrp_plugin_api_hookup (vlib_main_t * vm)
return 0;
}
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vrrp/vrrp_cli.c b/src/plugins/vrrp/vrrp_cli.c
index a154a11a8fa..fb52da474fa 100644
--- a/src/plugins/vrrp/vrrp_cli.c
+++ b/src/plugins/vrrp/vrrp_cli.c
@@ -102,7 +102,7 @@ vrrp_vr_add_del_command_fn (vlib_main_t * vm,
vr_conf.adv_interval = (u16) interval;
vr_conf.vr_addrs = addrs;
- rv = vrrp_vr_add_del (is_add, &vr_conf);
+ rv = vrrp_vr_add_del (is_add, &vr_conf, NULL);
switch (rv)
{
@@ -151,7 +151,6 @@ vrrp_vr_add_command_fn (vlib_main_t * vm, unformat_input_t * input,
return vrrp_vr_add_del_command_fn (vm, input, cmd, 1 /* is_add */ );
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vrrp_vr_add_command, static) =
{
.path = "vrrp vr add",
@@ -159,7 +158,6 @@ VLIB_CLI_COMMAND (vrrp_vr_add_command, static) =
"vrrp vr add <interface> [vr_id <n>] [ipv6] [priority <value>] [interval <value>] [no_preempt] [accept_mode] [unicast] [<ip_addr> ...]",
.function = vrrp_vr_add_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
vrrp_vr_del_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -168,14 +166,12 @@ vrrp_vr_del_command_fn (vlib_main_t * vm, unformat_input_t * input,
return vrrp_vr_add_del_command_fn (vm, input, cmd, 0 /* is_add */ );
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vrrp_vr_del_command, static) =
{
.path = "vrrp vr del",
.short_help = "vrrp vr del <interface> [vr_id <n>] [ipv6]",
.function = vrrp_vr_del_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
vrrp_show_vr_command_fn (vlib_main_t * vm,
@@ -208,7 +204,6 @@ vrrp_show_vr_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vrrp_show_vr_command, static) =
{
.path = "show vrrp vr",
@@ -216,7 +211,6 @@ VLIB_CLI_COMMAND (vrrp_show_vr_command, static) =
"show vrrp vr [(<intf_name>|sw_if_index <n>)]",
.function = vrrp_show_vr_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
vrrp_proto_start_stop_command_fn (vlib_main_t * vm,
@@ -242,6 +236,8 @@ vrrp_proto_start_stop_command_fn (vlib_main_t * vm,
if (unformat (input, "%U", unformat_vnet_sw_interface, vmp->vnet_main,
&sw_if_index))
;
+ else if (unformat (input, "sw_if_index %u", &sw_if_index))
+ ;
else if (unformat (input, "vr_id %u", &vr_id))
;
else if (unformat (input, "ipv6"))
@@ -311,6 +307,8 @@ vrrp_peers_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (unformat (input, "%U", unformat_vnet_sw_interface, vmp->vnet_main,
&sw_if_index))
;
+ else if (unformat (input, "sw_if_index %u", &sw_if_index))
+ ;
else if (unformat (input, "vr_id %u", &vr_id))
;
else if (unformat (input, "ipv6"))
@@ -373,7 +371,6 @@ done:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vrrp_proto_start_stop_command, static) =
{
.path = "vrrp proto",
@@ -381,9 +378,7 @@ VLIB_CLI_COMMAND (vrrp_proto_start_stop_command, static) =
"vrrp proto (start|stop) (<intf_name>|sw_if_index <n>) vr_id <n> [ipv6]",
.function = vrrp_proto_start_stop_command_fn,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vrrp_peers_command, static) =
{
.path = "vrrp peers",
@@ -391,7 +386,6 @@ VLIB_CLI_COMMAND (vrrp_peers_command, static) =
"vrrp peers (<intf_name>|sw_if_index <n>) vr_id <n> [ipv6] <peer1_addr> [<peer2_addr> ...]",
.function = vrrp_peers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
vrrp_vr_track_if_command_fn (vlib_main_t * vm,
@@ -418,6 +412,8 @@ vrrp_vr_track_if_command_fn (vlib_main_t * vm,
if (unformat (input, "%U", unformat_vnet_sw_interface, vmp->vnet_main,
&sw_if_index))
;
+ else if (unformat (input, "sw_if_index %u", &sw_if_index))
+ ;
else if (unformat (input, "add"))
is_add = 1;
else if (unformat (input, "del"))
@@ -487,7 +483,6 @@ done:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vrrp_vr_track_if_command, static) =
{
.path = "vrrp vr track-if",
@@ -495,7 +490,6 @@ VLIB_CLI_COMMAND (vrrp_vr_track_if_command, static) =
"vrrp vr track-if (add|del) (<intf_name>|sw_if_index <n>) vr_id <n> [ipv6] track-index <n> priority <n> [ track-index <n> priority <n> ...]",
.function = vrrp_vr_track_if_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vrrp/vrrp_msg_enum.h b/src/plugins/vrrp/vrrp_msg_enum.h
deleted file mode 100644
index 48ae619205a..00000000000
--- a/src/plugins/vrrp/vrrp_msg_enum.h
+++ /dev/null
@@ -1,23 +0,0 @@
-
-/*
- * vrrp_msg_enum.h - vrrp plug-in message enumeration
- *
- * Copyright 2019-2020 Rubicon Communications, LLC (Netgate)
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- */
-#ifndef included_vrrp_msg_enum_h
-#define included_vrrp_msg_enum_h
-
-#include <vppinfra/byte_order.h>
-
-#define vl_msg_id(n,h) n,
-typedef enum {
-#include <vrrp/vrrp_all_api_h.h>
- /* We'll want to know how many messages IDs we need... */
- VL_MSG_FIRST_AVAILABLE,
-} vl_msg_id_t;
-#undef vl_msg_id
-
-#endif /* included_vrrp_msg_enum_h */
diff --git a/src/plugins/vrrp/vrrp_packet.c b/src/plugins/vrrp/vrrp_packet.c
index b470ddeba08..69e635f811a 100644
--- a/src/plugins/vrrp/vrrp_packet.c
+++ b/src/plugins/vrrp/vrrp_packet.c
@@ -102,13 +102,24 @@ vrrp_adv_l3_build (vrrp_vr_t * vr, vlib_buffer_t * b,
if (!vrrp_vr_is_ipv6 (vr)) /* IPv4 */
{
ip4_header_t *ip4 = vlib_buffer_get_current (b);
+ ip4_address_t *src4;
clib_memset (ip4, 0, sizeof (*ip4));
ip4->ip_version_and_header_length = 0x45;
ip4->ttl = 255;
ip4->protocol = IP_PROTOCOL_VRRP;
clib_memcpy (&ip4->dst_address, &dst->ip4, sizeof (dst->ip4));
- fib_sas4_get (vr->config.sw_if_index, NULL, &ip4->src_address);
+
+ /* RFC 5798 Section 5.1.1.1 - Source Address "is the primary IPv4
+ * address of the interface the packet is being sent from". Assume
+ * this is the first address on the interface.
+ */
+ src4 = ip_interface_get_first_ip (vr->config.sw_if_index, 1);
+ if (!src4)
+ {
+ return -1;
+ }
+ ip4->src_address.as_u32 = src4->as_u32;
ip4->length = clib_host_to_net_u16 (sizeof (*ip4) +
vrrp_adv_payload_len (vr));
ip4->checksum = ip4_header_checksum (ip4);
@@ -325,7 +336,12 @@ vrrp_adv_send (vrrp_vr_t * vr, int shutdown)
else
vrrp_adv_l2_build_multicast (vr, b);
- vrrp_adv_l3_build (vr, b, dst);
+ if (-1 == vrrp_adv_l3_build (vr, b, dst))
+ {
+ vlib_frame_free (vm, to_frame);
+ vlib_buffer_free (vm, bi, n_buffers);
+ return -1;
+ }
vrrp_adv_payload_build (vr, b, shutdown);
vlib_buffer_reset (b);
@@ -337,6 +353,12 @@ vrrp_adv_send (vrrp_vr_t * vr, int shutdown)
vlib_put_frame_to_node (vm, node_index, to_frame);
+ vrrp_incr_stat_counter (VRRP_STAT_COUNTER_ADV_SENT, vr->stat_index);
+ if (shutdown)
+ {
+ vrrp_incr_stat_counter (VRRP_STAT_COUNTER_PRIO0_SENT, vr->stat_index);
+ }
+
vec_free (bi);
return 0;
@@ -516,6 +538,8 @@ vrrp_garp_or_na_send (vrrp_vr_t * vr)
vlib_put_frame_to_node (vm, vmp->intf_output_node_idx, to_frame);
+ vec_free (bi);
+
return 0;
}
@@ -529,17 +553,25 @@ static const ip4_header_t igmp_ip4_mcast = {
.dst_address = {.as_u8 = IGMP4_MCAST_ADDR_AS_U8,},
};
-static void
-vrrp_igmp_pkt_build (vrrp_vr_t * vr, vlib_buffer_t * b)
+static int
+vrrp_igmp_pkt_build (vrrp_vr_t *vr, vlib_buffer_t *b)
{
ip4_header_t *ip4;
u8 *ip4_options;
igmp_membership_report_v3_t *report;
igmp_membership_group_v3_t *group;
+ ip4_address_t *src4;
ip4 = vlib_buffer_get_current (b);
clib_memcpy (ip4, &igmp_ip4_mcast, sizeof (*ip4));
- fib_sas4_get (vr->config.sw_if_index, NULL, &ip4->src_address);
+
+ /* Use the source address advertisements will use to join mcast group */
+ src4 = ip_interface_get_first_ip (vr->config.sw_if_index, 1);
+ if (!src4)
+ {
+ return -1;
+ }
+ ip4->src_address.as_u32 = src4->as_u32;
vlib_buffer_chain_increase_length (b, b, sizeof (*ip4));
vlib_buffer_advance (b, sizeof (*ip4));
@@ -581,6 +613,7 @@ vrrp_igmp_pkt_build (vrrp_vr_t * vr, vlib_buffer_t * b)
~ip_csum_fold (ip_incremental_checksum (0, report, payload_len));
vlib_buffer_reset (b);
+ return 0;
}
/* multicast listener report packet format for ethernet. */
@@ -720,7 +753,13 @@ vrrp_vr_multicast_group_join (vrrp_vr_t * vr)
}
else
{
- vrrp_igmp_pkt_build (vr, b);
+ if (-1 == vrrp_igmp_pkt_build (vr, b))
+ {
+ clib_warning ("IGMP packet build failed for %U", format_vrrp_vr_key,
+ vr);
+ vlib_buffer_free (vm, &bi, 1);
+ return -1;
+ }
node_index = ip4_rewrite_mcast_node.index;
}
diff --git a/src/plugins/vrrp/vrrp_packet.h b/src/plugins/vrrp/vrrp_packet.h
index 1cbf62d7c72..d5725b6a1a5 100644
--- a/src/plugins/vrrp/vrrp_packet.h
+++ b/src/plugins/vrrp/vrrp_packet.h
@@ -47,6 +47,15 @@ vrrp_adv_int_from_packet (vrrp_header_t * pkt)
return clib_net_to_host_u16 (pkt->rsvd_and_max_adv_int) & ((u16) 0x0fff);
}
+/* Fields from VRRP advertisement packets needed by main thread */
+typedef struct vrrp_input_process_args
+{
+ u32 vr_index;
+ ip46_address_t src_addr;
+ u8 priority;
+ u8 max_adv_int;
+} vrrp_input_process_args_t;
+
#endif /* __included_vrrp_packet_h__ */
/*
diff --git a/src/plugins/vrrp/vrrp_periodic.c b/src/plugins/vrrp/vrrp_periodic.c
index 9c1b76ae59d..5f9d7ae938e 100644
--- a/src/plugins/vrrp/vrrp_periodic.c
+++ b/src/plugins/vrrp/vrrp_periodic.c
@@ -210,14 +210,12 @@ vrrp_periodic_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vrrp_periodic_node) = {
.function = vrrp_periodic_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "vrrp-periodic-process",
.process_log2_n_stack_bytes = 17,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vrrp/vrrp_test.c b/src/plugins/vrrp/vrrp_test.c
index 199f5417f1a..d2f79f65c3f 100644
--- a/src/plugins/vrrp/vrrp_test.c
+++ b/src/plugins/vrrp/vrrp_test.c
@@ -19,8 +19,7 @@ uword unformat_sw_if_index (unformat_input_t * input, va_list * args);
#include <vnet/format_fns.h>
#include <vrrp/vrrp.api_enum.h>
#include <vrrp/vrrp.api_types.h>
-#include <vpp/api/vpe.api_types.h>
-
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
@@ -36,6 +35,176 @@ vrrp_test_main_t vrrp_test_main;
#include <vlibapi/vat_helper_macros.h>
static int
+api_vrrp_vr_update (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ u32 sw_if_index = ~0;
+ u32 vr_id, priority, interval, vrrp_index;
+ u8 is_ipv6, no_preempt, accept_mode, vr_unicast;
+ u8 n_addrs4, n_addrs6;
+ vl_api_vrrp_vr_update_t *mp;
+ vl_api_address_t *api_addr;
+ ip46_address_t *ip_addr, *ip_addrs = 0;
+ ip46_address_t addr;
+ int ret = 0;
+
+ interval = priority = 100;
+ n_addrs4 = n_addrs6 = 0;
+ vr_id = is_ipv6 = no_preempt = accept_mode = vr_unicast = 0;
+ vrrp_index = INDEX_INVALID;
+
+ clib_memset (&addr, 0, sizeof (addr));
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %u", &sw_if_index))
+ ;
+ else if (unformat (i, "vr_id %u", &vr_id))
+ ;
+ else if (unformat (i, "vrrp_index %u", &vrrp_index))
+ ;
+ else if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else if (unformat (i, "priority %u", &priority))
+ ;
+ else if (unformat (i, "interval %u", &interval))
+ ;
+ else if (unformat (i, "no_preempt"))
+ no_preempt = 1;
+ else if (unformat (i, "accept_mode"))
+ accept_mode = 1;
+ else if (unformat (i, "unicast"))
+ vr_unicast = 1;
+ else if (unformat (i, "%U", unformat_ip4_address, &addr.ip4))
+ {
+ vec_add1 (ip_addrs, addr);
+ n_addrs4++;
+ clib_memset (&addr, 0, sizeof (addr));
+ }
+ else if (unformat (i, "%U", unformat_ip6_address, &addr.ip6))
+ {
+ vec_add1 (ip_addrs, addr);
+ n_addrs6++;
+ clib_memset (&addr, 0, sizeof (addr));
+ }
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("Interface not set\n");
+ ret = -99;
+ }
+ else if (n_addrs4 && (n_addrs6 || is_ipv6))
+ {
+ errmsg ("Address family mismatch\n");
+ ret = -99;
+ }
+
+ if (ret)
+ goto done;
+
+ /* Construct the API message */
+ M2 (VRRP_VR_UPDATE, mp, vec_len (ip_addrs) * sizeof (*api_addr));
+
+ mp->vrrp_index = htonl (vrrp_index);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->vr_id = vr_id;
+ mp->priority = priority;
+ mp->interval = htons (interval);
+ mp->flags = VRRP_API_VR_PREEMPT; /* preempt by default */
+
+ if (no_preempt)
+ mp->flags &= ~VRRP_API_VR_PREEMPT;
+
+ if (accept_mode)
+ mp->flags |= VRRP_API_VR_ACCEPT;
+
+ if (vr_unicast)
+ mp->flags |= VRRP_API_VR_UNICAST;
+
+ if (is_ipv6)
+ mp->flags |= VRRP_API_VR_IPV6;
+
+ mp->flags = htonl (mp->flags);
+
+ mp->n_addrs = n_addrs4 + n_addrs6;
+ api_addr = mp->addrs;
+
+ vec_foreach (ip_addr, ip_addrs)
+ {
+ void *src, *dst;
+ int len;
+
+ if (is_ipv6)
+ {
+ api_addr->af = ADDRESS_IP6;
+ src = &ip_addr->ip6;
+ dst = &api_addr->un.ip6;
+ len = sizeof (api_addr->un.ip6);
+ }
+ else
+ {
+ api_addr->af = ADDRESS_IP4;
+ src = &ip_addr->ip4;
+ dst = &api_addr->un.ip4;
+ len = sizeof (api_addr->un.ip4);
+ }
+ clib_memcpy (dst, src, len);
+ api_addr++;
+ }
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+
+done:
+ vec_free (ip_addrs);
+
+ return ret;
+}
+
+static void
+vl_api_vrrp_vr_update_reply_t_handler (vl_api_vrrp_vr_update_reply_t *mp)
+{
+}
+
+static int
+api_vrrp_vr_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_vrrp_vr_del_t *mp;
+ u32 vrrp_index = INDEX_INVALID;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "vrrp_index %u", &vrrp_index))
+ ;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M (VRRP_VR_DEL, mp);
+ mp->vrrp_index = htonl (vrrp_index);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+
+ return ret;
+}
+
+static int
api_vrrp_vr_add_del (vat_main_t * vam)
{
unformat_input_t *i = vam->input;
diff --git a/src/plugins/vxlan/CMakeLists.txt b/src/plugins/vxlan/CMakeLists.txt
new file mode 100644
index 00000000000..bd0272a868e
--- /dev/null
+++ b/src/plugins/vxlan/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Copyright (c) 2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(vxlan
+ SOURCES
+ vxlan.c
+ encap.c
+ decap.c
+ vxlan_api.c
+ plugin.c
+ vxlan.h
+ vxlan_packet.h
+
+ MULTIARCH_SOURCES
+ encap.c
+
+ API_FILES
+ vxlan.api
+)
diff --git a/src/plugins/vxlan/FEATURE.yaml b/src/plugins/vxlan/FEATURE.yaml
new file mode 100644
index 00000000000..dc7d21b010e
--- /dev/null
+++ b/src/plugins/vxlan/FEATURE.yaml
@@ -0,0 +1,14 @@
+---
+name: Virtual eXtensible LAN
+maintainer: John Lo <loj@cisco.com>
+features:
+ - VXLAN tunnel for support of L2 overlay/virtual networks (RFC-7348)
+ - Support either IPv4 or IPv6 underlay network VTEPs
+ - Flooding via headend replication if all VXLAN tunnels in BD are unicast ones
+ - Multicast VXLAN tunnel can be added to BD to flood via IP multicast
+ - VXLAN encap with flow-hashed source port for better underlay IP load balance
+ - VXLAN decap optimization via vxlan-bypass IP feature on underlay interfaces
+ - VXLAN decap HW offload using flow director with DPDK on Intel Fortville NICs
+description: "Virtual eXtensible LAN (VXLAN) tunnels support L2 overlay networks that span L3 networks"
+state: production
+properties: [API, CLI, MULTITHREAD]
diff --git a/src/plugins/vxlan/decap.c b/src/plugins/vxlan/decap.c
new file mode 100644
index 00000000000..5f28c5e97bb
--- /dev/null
+++ b/src/plugins/vxlan/decap.c
@@ -0,0 +1,1322 @@
+/*
+ * decap.c: vxlan tunnel decap packet processing
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vxlan/vxlan.h>
+#include <vnet/udp/udp_local.h>
+
+#ifndef CLIB_MARCH_VARIANT
+__clib_export vlib_node_registration_t vxlan4_input_node;
+__clib_export vlib_node_registration_t vxlan6_input_node;
+#endif
+
+typedef struct
+{
+ u32 next_index;
+ u32 tunnel_index;
+ u32 error;
+ u32 vni;
+} vxlan_rx_trace_t;
+
+static u8 *
+format_vxlan_rx_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vxlan_rx_trace_t *t = va_arg (*args, vxlan_rx_trace_t *);
+
+ if (t->tunnel_index == ~0)
+ return format (s, "VXLAN decap error - tunnel for vni %d does not exist",
+ t->vni);
+ return format (s, "VXLAN decap from vxlan_tunnel%d vni %d next %d error %d",
+ t->tunnel_index, t->vni, t->next_index, t->error);
+}
+
+typedef vxlan4_tunnel_key_t last_tunnel_cache4;
+
+static const vxlan_decap_info_t decap_not_found = {
+ .sw_if_index = ~0,
+ .next_index = VXLAN_INPUT_NEXT_DROP,
+ .error = VXLAN_ERROR_NO_SUCH_TUNNEL
+};
+
+static const vxlan_decap_info_t decap_bad_flags = {
+ .sw_if_index = ~0,
+ .next_index = VXLAN_INPUT_NEXT_DROP,
+ .error = VXLAN_ERROR_BAD_FLAGS
+};
+
+always_inline vxlan_decap_info_t
+vxlan4_find_tunnel (vxlan_main_t * vxm, last_tunnel_cache4 * cache,
+ u32 fib_index, ip4_header_t * ip4_0,
+ vxlan_header_t * vxlan0, u32 * stats_sw_if_index)
+{
+ if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I))
+ return decap_bad_flags;
+
+ /* Make sure VXLAN tunnel exist according to packet S/D IP, UDP port, VRF,
+ * and VNI */
+ u32 dst = ip4_0->dst_address.as_u32;
+ u32 src = ip4_0->src_address.as_u32;
+ udp_header_t *udp = ip4_next_header (ip4_0);
+ vxlan4_tunnel_key_t key4 = {
+ .key[0] = ((u64) dst << 32) | src,
+ .key[1] = ((u64) udp->dst_port << 48) | ((u64) fib_index << 32) |
+ vxlan0->vni_reserved,
+ };
+
+ if (PREDICT_TRUE
+ (key4.key[0] == cache->key[0] && key4.key[1] == cache->key[1]))
+ {
+ /* cache hit */
+ vxlan_decap_info_t di = {.as_u64 = cache->value };
+ *stats_sw_if_index = di.sw_if_index;
+ return di;
+ }
+
+ int rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
+ if (PREDICT_TRUE (rv == 0))
+ {
+ *cache = key4;
+ vxlan_decap_info_t di = {.as_u64 = key4.value };
+ *stats_sw_if_index = di.sw_if_index;
+ return di;
+ }
+
+ /* try multicast */
+ if (PREDICT_TRUE (!ip4_address_is_multicast (&ip4_0->dst_address)))
+ return decap_not_found;
+
+ /* search for mcast decap info by mcast address */
+ key4.key[0] = dst;
+ rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
+ if (rv != 0)
+ return decap_not_found;
+
+ /* search for unicast tunnel using the mcast tunnel local(src) ip */
+ vxlan_decap_info_t mdi = {.as_u64 = key4.value };
+ key4.key[0] = ((u64) mdi.local_ip.as_u32 << 32) | src;
+ rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
+ if (PREDICT_FALSE (rv != 0))
+ return decap_not_found;
+
+ /* mcast traffic does not update the cache */
+ *stats_sw_if_index = mdi.sw_if_index;
+ vxlan_decap_info_t di = {.as_u64 = key4.value };
+ return di;
+}
+
+typedef vxlan6_tunnel_key_t last_tunnel_cache6;
+
+always_inline vxlan_decap_info_t
+vxlan6_find_tunnel (vxlan_main_t * vxm, last_tunnel_cache6 * cache,
+ u32 fib_index, ip6_header_t * ip6_0,
+ vxlan_header_t * vxlan0, u32 * stats_sw_if_index)
+{
+ if (PREDICT_FALSE (vxlan0->flags != VXLAN_FLAGS_I))
+ return decap_bad_flags;
+
+ /* Make sure VXLAN tunnel exist according to packet SIP, UDP port, VRF, and
+ * VNI */
+ udp_header_t *udp = ip6_next_header (ip6_0);
+ vxlan6_tunnel_key_t key6 = {
+ .key[0] = ip6_0->src_address.as_u64[0],
+ .key[1] = ip6_0->src_address.as_u64[1],
+ .key[2] = ((u64) udp->dst_port << 48) | ((u64) fib_index << 32) |
+ vxlan0->vni_reserved,
+ };
+
+ if (PREDICT_FALSE
+ (clib_bihash_key_compare_24_8 (key6.key, cache->key) == 0))
+ {
+ int rv =
+ clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6);
+ if (PREDICT_FALSE (rv != 0))
+ return decap_not_found;
+
+ *cache = key6;
+ }
+ vxlan_tunnel_t *t0 = pool_elt_at_index (vxm->tunnels, cache->value);
+
+ /* Validate VXLAN tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (ip6_address_is_equal (&ip6_0->dst_address, &t0->src.ip6)))
+ *stats_sw_if_index = t0->sw_if_index;
+ else
+ {
+ /* try multicast */
+ if (PREDICT_TRUE (!ip6_address_is_multicast (&ip6_0->dst_address)))
+ return decap_not_found;
+
+ /* Make sure mcast VXLAN tunnel exist by packet DIP and VNI */
+ key6.key[0] = ip6_0->dst_address.as_u64[0];
+ key6.key[1] = ip6_0->dst_address.as_u64[1];
+ int rv =
+ clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6);
+ if (PREDICT_FALSE (rv != 0))
+ return decap_not_found;
+
+ vxlan_tunnel_t *mcast_t0 = pool_elt_at_index (vxm->tunnels, key6.value);
+ *stats_sw_if_index = mcast_t0->sw_if_index;
+ }
+
+ vxlan_decap_info_t di = {
+ .sw_if_index = t0->sw_if_index,
+ .next_index = t0->decap_next_index,
+ };
+ return di;
+}
+
+always_inline uword
+vxlan_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, u32 is_ip4)
+{
+ vxlan_main_t *vxm = &vxlan_main;
+ vnet_main_t *vnm = vxm->vnet_main;
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vlib_combined_counter_main_t *rx_counter =
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX;
+ last_tunnel_cache4 last4;
+ last_tunnel_cache6 last6;
+ u32 pkts_dropped = 0;
+ u32 thread_index = vlib_get_thread_index ();
+
+ if (is_ip4)
+ clib_memset (&last4, 0xff, sizeof last4);
+ else
+ clib_memset (&last6, 0xff, sizeof last6);
+
+ u32 *from = vlib_frame_vector_args (from_frame);
+ u32 n_left_from = from_frame->n_vectors;
+
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+
+ u32 stats_if0 = ~0, stats_if1 = ~0;
+ u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
+ while (n_left_from >= 4)
+ {
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ vlib_prefetch_buffer_header (b[3], LOAD);
+
+ /* udp leaves current_data pointing at the vxlan header */
+ void *cur0 = vlib_buffer_get_current (b[0]);
+ void *cur1 = vlib_buffer_get_current (b[1]);
+ vxlan_header_t *vxlan0 = cur0;
+ vxlan_header_t *vxlan1 = cur1;
+
+
+ ip4_header_t *ip4_0, *ip4_1;
+ ip6_header_t *ip6_0, *ip6_1;
+ if (is_ip4)
+ {
+ ip4_0 = cur0 - sizeof (udp_header_t) - sizeof (ip4_header_t);
+ ip4_1 = cur1 - sizeof (udp_header_t) - sizeof (ip4_header_t);
+ }
+ else
+ {
+ ip6_0 = cur0 - sizeof (udp_header_t) - sizeof (ip6_header_t);
+ ip6_1 = cur1 - sizeof (udp_header_t) - sizeof (ip6_header_t);
+ }
+
+ /* pop vxlan */
+ vlib_buffer_advance (b[0], sizeof *vxlan0);
+ vlib_buffer_advance (b[1], sizeof *vxlan1);
+
+ u32 fi0 = vlib_buffer_get_ip_fib_index (b[0], is_ip4);
+ u32 fi1 = vlib_buffer_get_ip_fib_index (b[1], is_ip4);
+
+ vxlan_decap_info_t di0 = is_ip4 ?
+ vxlan4_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan0, &stats_if0) :
+ vxlan6_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan0, &stats_if0);
+ vxlan_decap_info_t di1 = is_ip4 ?
+ vxlan4_find_tunnel (vxm, &last4, fi1, ip4_1, vxlan1, &stats_if1) :
+ vxlan6_find_tunnel (vxm, &last6, fi1, ip6_1, vxlan1, &stats_if1);
+
+ /* Prefetch next iteration. */
+ clib_prefetch_load (b[2]->data);
+ clib_prefetch_load (b[3]->data);
+
+ u32 len0 = vlib_buffer_length_in_chain (vm, b[0]);
+ u32 len1 = vlib_buffer_length_in_chain (vm, b[1]);
+
+ next[0] = di0.next_index;
+ next[1] = di1.next_index;
+
+ u8 any_error = di0.error | di1.error;
+ if (PREDICT_TRUE (any_error == 0))
+ {
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b[0]);
+ vnet_update_l2_len (b[1]);
+ /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX] = di0.sw_if_index;
+ vnet_buffer (b[1])->sw_if_index[VLIB_RX] = di1.sw_if_index;
+ vlib_increment_combined_counter (rx_counter, thread_index,
+ stats_if0, 1, len0);
+ vlib_increment_combined_counter (rx_counter, thread_index,
+ stats_if1, 1, len1);
+ }
+ else
+ {
+ if (di0.error == 0)
+ {
+ vnet_update_l2_len (b[0]);
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX] = di0.sw_if_index;
+ vlib_increment_combined_counter (rx_counter, thread_index,
+ stats_if0, 1, len0);
+ }
+ else
+ {
+ b[0]->error = node->errors[di0.error];
+ pkts_dropped++;
+ }
+
+ if (di1.error == 0)
+ {
+ vnet_update_l2_len (b[1]);
+ vnet_buffer (b[1])->sw_if_index[VLIB_RX] = di1.sw_if_index;
+ vlib_increment_combined_counter (rx_counter, thread_index,
+ stats_if1, 1, len1);
+ }
+ else
+ {
+ b[1]->error = node->errors[di1.error];
+ pkts_dropped++;
+ }
+ }
+
+ if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_rx_trace_t *tr =
+ vlib_add_trace (vm, node, b[0], sizeof (*tr));
+ tr->next_index = next[0];
+ tr->error = di0.error;
+ tr->tunnel_index = di0.sw_if_index == ~0 ?
+ ~0 : vxm->tunnel_index_by_sw_if_index[di0.sw_if_index];
+ tr->vni = vnet_get_vni (vxlan0);
+ }
+ if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_rx_trace_t *tr =
+ vlib_add_trace (vm, node, b[1], sizeof (*tr));
+ tr->next_index = next[1];
+ tr->error = di1.error;
+ tr->tunnel_index = di1.sw_if_index == ~0 ?
+ ~0 : vxm->tunnel_index_by_sw_if_index[di1.sw_if_index];
+ tr->vni = vnet_get_vni (vxlan1);
+ }
+ b += 2;
+ next += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ /* udp leaves current_data pointing at the vxlan header */
+ void *cur0 = vlib_buffer_get_current (b[0]);
+ vxlan_header_t *vxlan0 = cur0;
+ ip4_header_t *ip4_0;
+ ip6_header_t *ip6_0;
+ if (is_ip4)
+ ip4_0 = cur0 - sizeof (udp_header_t) - sizeof (ip4_header_t);
+ else
+ ip6_0 = cur0 - sizeof (udp_header_t) - sizeof (ip6_header_t);
+
+ /* pop (ip, udp, vxlan) */
+ vlib_buffer_advance (b[0], sizeof (*vxlan0));
+
+ u32 fi0 = vlib_buffer_get_ip_fib_index (b[0], is_ip4);
+
+ vxlan_decap_info_t di0 = is_ip4 ?
+ vxlan4_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan0, &stats_if0) :
+ vxlan6_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan0, &stats_if0);
+
+ uword len0 = vlib_buffer_length_in_chain (vm, b[0]);
+
+ next[0] = di0.next_index;
+
+ /* Validate VXLAN tunnel encap-fib index against packet */
+ if (di0.error == 0)
+ {
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ vnet_update_l2_len (b[0]);
+
+ /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX] = di0.sw_if_index;
+
+ vlib_increment_combined_counter (rx_counter, thread_index,
+ stats_if0, 1, len0);
+ }
+ else
+ {
+ b[0]->error = node->errors[di0.error];
+ pkts_dropped++;
+ }
+
+ if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_rx_trace_t *tr
+ = vlib_add_trace (vm, node, b[0], sizeof (*tr));
+ tr->next_index = next[0];
+ tr->error = di0.error;
+ tr->tunnel_index = di0.sw_if_index == ~0 ?
+ ~0 : vxm->tunnel_index_by_sw_if_index[di0.sw_if_index];
+ tr->vni = vnet_get_vni (vxlan0);
+ }
+ b += 1;
+ next += 1;
+ n_left_from -= 1;
+ }
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, from_frame->n_vectors);
+ /* Do we still need this now that tunnel tx stats is kept? */
+ u32 node_idx = is_ip4 ? vxlan4_input_node.index : vxlan6_input_node.index;
+ vlib_node_increment_counter (vm, node_idx, VXLAN_ERROR_DECAPSULATED,
+ from_frame->n_vectors - pkts_dropped);
+
+ return from_frame->n_vectors;
+}
+
+VLIB_NODE_FN (vxlan4_input_node) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return vxlan_input (vm, node, from_frame, /* is_ip4 */ 1);
+}
+
+VLIB_NODE_FN (vxlan6_input_node) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ return vxlan_input (vm, node, from_frame, /* is_ip4 */ 0);
+}
+
+static char *vxlan_error_strings[] = {
+#define vxlan_error(n,s) s,
+#include <vxlan/vxlan_error.def>
+#undef vxlan_error
+};
+
+VLIB_REGISTER_NODE (vxlan4_input_node) =
+{
+ .name = "vxlan4-input",
+ .vector_size = sizeof (u32),
+ .n_errors = VXLAN_N_ERROR,
+ .error_strings = vxlan_error_strings,
+ .n_next_nodes = VXLAN_INPUT_N_NEXT,
+ .format_trace = format_vxlan_rx_trace,
+ .next_nodes = {
+#define _(s,n) [VXLAN_INPUT_NEXT_##s] = n,
+ foreach_vxlan_input_next
+#undef _
+ },
+};
+
+VLIB_REGISTER_NODE (vxlan6_input_node) =
+{
+ .name = "vxlan6-input",
+ .vector_size = sizeof (u32),
+ .n_errors = VXLAN_N_ERROR,
+ .error_strings = vxlan_error_strings,
+ .n_next_nodes = VXLAN_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [VXLAN_INPUT_NEXT_##s] = n,
+ foreach_vxlan_input_next
+#undef _
+ },
+ .format_trace = format_vxlan_rx_trace,
+};
+
+typedef enum
+{
+ IP_VXLAN_BYPASS_NEXT_DROP,
+ IP_VXLAN_BYPASS_NEXT_VXLAN,
+ IP_VXLAN_BYPASS_N_NEXT,
+} ip_vxlan_bypass_next_t;
+
+always_inline uword
+ip_vxlan_bypass_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, u32 is_ip4)
+{
+ vxlan_main_t *vxm = &vxlan_main;
+ u32 *from, *to_next, n_left_from, n_left_to_next, next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+ vtep4_key_t last_vtep4; /* last IPv4 address / fib index
+ matching a local VTEP address */
+ vtep6_key_t last_vtep6; /* last IPv6 address / fib index
+ matching a local VTEP address */
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+
+ last_tunnel_cache4 last4;
+ last_tunnel_cache6 last6;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ if (is_ip4)
+ {
+ vtep4_key_init (&last_vtep4);
+ clib_memset (&last4, 0xff, sizeof last4);
+ }
+ else
+ {
+ vtep6_key_init (&last_vtep6);
+ clib_memset (&last6, 0xff, sizeof last6);
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *b0, *b1;
+ ip4_header_t *ip40, *ip41;
+ ip6_header_t *ip60, *ip61;
+ udp_header_t *udp0, *udp1;
+ vxlan_header_t *vxlan0, *vxlan1;
+ u32 bi0, ip_len0, udp_len0, flags0, next0;
+ u32 bi1, ip_len1, udp_len1, flags1, next1;
+ i32 len_diff0, len_diff1;
+ u8 error0, good_udp0, proto0;
+ u8 error1, good_udp1, proto1;
+ u32 stats_if0 = ~0, stats_if1 = ~0;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ vlib_prefetch_buffer_header (b[3], LOAD);
+
+ CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ b0 = b[0];
+ b1 = b[1];
+ b += 2;
+ if (is_ip4)
+ {
+ ip40 = vlib_buffer_get_current (b0);
+ ip41 = vlib_buffer_get_current (b1);
+ }
+ else
+ {
+ ip60 = vlib_buffer_get_current (b0);
+ ip61 = vlib_buffer_get_current (b1);
+ }
+
+ /* Setup packet for next IP feature */
+ vnet_feature_next (&next0, b0);
+ vnet_feature_next (&next1, b1);
+
+ if (is_ip4)
+ {
+ /* Treat IP frag packets as "experimental" protocol for now
+ until support of IP frag reassembly is implemented */
+ proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol;
+ proto1 = ip4_is_fragment (ip41) ? 0xfe : ip41->protocol;
+ }
+ else
+ {
+ proto0 = ip60->protocol;
+ proto1 = ip61->protocol;
+ }
+
+ /* Process packet 0 */
+ if (proto0 != IP_PROTOCOL_UDP)
+ goto exit0; /* not UDP packet */
+
+ if (is_ip4)
+ udp0 = ip4_next_header (ip40);
+ else
+ udp0 = ip6_next_header (ip60);
+
+ u32 fi0 = vlib_buffer_get_ip_fib_index (b0, is_ip4);
+ vxlan0 = vlib_buffer_get_current (b0) + sizeof (udp_header_t) +
+ sizeof (ip4_header_t);
+
+ vxlan_decap_info_t di0 =
+ is_ip4 ?
+ vxlan4_find_tunnel (vxm, &last4, fi0, ip40, vxlan0, &stats_if0) :
+ vxlan6_find_tunnel (vxm, &last6, fi0, ip60, vxlan0, &stats_if0);
+
+ if (PREDICT_FALSE (di0.sw_if_index == ~0))
+ goto exit0; /* unknown interface */
+
+ /* Validate DIP against VTEPs */
+ if (is_ip4)
+ {
+#ifdef CLIB_HAVE_VEC512
+ if (!vtep4_check_vector (&vxm->vtep_table, b0, ip40, &last_vtep4,
+ &vxm->vtep4_u512))
+#else
+ if (!vtep4_check (&vxm->vtep_table, b0, ip40, &last_vtep4))
+#endif
+ goto exit0; /* no local VTEP for VXLAN packet */
+ }
+ else
+ {
+ if (!vtep6_check (&vxm->vtep_table, b0, ip60, &last_vtep6))
+ goto exit0; /* no local VTEP for VXLAN packet */
+ }
+
+ flags0 = b0->flags;
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_udp0 |= udp0->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip4)
+ ip_len0 = clib_net_to_host_u16 (ip40->length);
+ else
+ ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp0))
+ {
+ if (is_ip4)
+ flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
+ else
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+
+ if (is_ip4)
+ {
+ error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
+ }
+
+ next0 = error0 ?
+ IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
+ b0->error = error0 ? error_node->errors[error0] : 0;
+
+ /* vxlan-input node expect current at VXLAN header */
+ if (is_ip4)
+ vlib_buffer_advance (b0,
+ sizeof (ip4_header_t) +
+ sizeof (udp_header_t));
+ else
+ vlib_buffer_advance (b0,
+ sizeof (ip6_header_t) +
+ sizeof (udp_header_t));
+
+ exit0:
+ /* Process packet 1 */
+ if (proto1 != IP_PROTOCOL_UDP)
+ goto exit1; /* not UDP packet */
+
+ if (is_ip4)
+ udp1 = ip4_next_header (ip41);
+ else
+ udp1 = ip6_next_header (ip61);
+
+ u32 fi1 = vlib_buffer_get_ip_fib_index (b1, is_ip4);
+ vxlan1 = vlib_buffer_get_current (b1) + sizeof (udp_header_t) +
+ sizeof (ip4_header_t);
+
+ vxlan_decap_info_t di1 =
+ is_ip4 ?
+ vxlan4_find_tunnel (vxm, &last4, fi1, ip41, vxlan1, &stats_if1) :
+ vxlan6_find_tunnel (vxm, &last6, fi1, ip61, vxlan1, &stats_if1);
+
+ if (PREDICT_FALSE (di1.sw_if_index == ~0))
+ goto exit1; /* unknown interface */
+
+ /* Validate DIP against VTEPs */
+ if (is_ip4)
+ {
+#ifdef CLIB_HAVE_VEC512
+ if (!vtep4_check_vector (&vxm->vtep_table, b1, ip41, &last_vtep4,
+ &vxm->vtep4_u512))
+#else
+ if (!vtep4_check (&vxm->vtep_table, b1, ip41, &last_vtep4))
+#endif
+ goto exit1; /* no local VTEP for VXLAN packet */
+ }
+ else
+ {
+ if (!vtep6_check (&vxm->vtep_table, b1, ip61, &last_vtep6))
+ goto exit1; /* no local VTEP for VXLAN packet */
+ }
+
+ flags1 = b1->flags;
+ good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_udp1 |= udp1->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip4)
+ ip_len1 = clib_net_to_host_u16 (ip41->length);
+ else
+ ip_len1 = clib_net_to_host_u16 (ip61->payload_length);
+ udp_len1 = clib_net_to_host_u16 (udp1->length);
+ len_diff1 = ip_len1 - udp_len1;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp1))
+ {
+ if (is_ip4)
+ flags1 = ip4_tcp_udp_validate_checksum (vm, b1);
+ else
+ flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1);
+ good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+
+ if (is_ip4)
+ {
+ error1 = good_udp1 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error1 = (len_diff1 >= 0) ? error1 : IP4_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error1 = good_udp1 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error1 = (len_diff1 >= 0) ? error1 : IP6_ERROR_UDP_LENGTH;
+ }
+
+ next1 = error1 ?
+ IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
+ b1->error = error1 ? error_node->errors[error1] : 0;
+
+ /* vxlan-input node expect current at VXLAN header */
+ if (is_ip4)
+ vlib_buffer_advance (b1,
+ sizeof (ip4_header_t) +
+ sizeof (udp_header_t));
+ else
+ vlib_buffer_advance (b1,
+ sizeof (ip6_header_t) +
+ sizeof (udp_header_t));
+
+ exit1:
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ udp_header_t *udp0;
+ vxlan_header_t *vxlan0;
+ u32 bi0, ip_len0, udp_len0, flags0, next0;
+ i32 len_diff0;
+ u8 error0, good_udp0, proto0;
+ u32 stats_if0 = ~0;
+
+ bi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = b[0];
+ b++;
+ if (is_ip4)
+ ip40 = vlib_buffer_get_current (b0);
+ else
+ ip60 = vlib_buffer_get_current (b0);
+
+ /* Setup packet for next IP feature */
+ vnet_feature_next (&next0, b0);
+
+ if (is_ip4)
+ /* Treat IP4 frag packets as "experimental" protocol for now
+ until support of IP frag reassembly is implemented */
+ proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol;
+ else
+ proto0 = ip60->protocol;
+
+ if (proto0 != IP_PROTOCOL_UDP)
+ goto exit; /* not UDP packet */
+
+ if (is_ip4)
+ udp0 = ip4_next_header (ip40);
+ else
+ udp0 = ip6_next_header (ip60);
+
+ u32 fi0 = vlib_buffer_get_ip_fib_index (b0, is_ip4);
+ vxlan0 = vlib_buffer_get_current (b0) + sizeof (udp_header_t) +
+ sizeof (ip4_header_t);
+
+ vxlan_decap_info_t di0 =
+ is_ip4 ?
+ vxlan4_find_tunnel (vxm, &last4, fi0, ip40, vxlan0, &stats_if0) :
+ vxlan6_find_tunnel (vxm, &last6, fi0, ip60, vxlan0, &stats_if0);
+
+ if (PREDICT_FALSE (di0.sw_if_index == ~0))
+ goto exit; /* unknown interface */
+
+ /* Validate DIP against VTEPs */
+ if (is_ip4)
+ {
+#ifdef CLIB_HAVE_VEC512
+ if (!vtep4_check_vector (&vxm->vtep_table, b0, ip40, &last_vtep4,
+ &vxm->vtep4_u512))
+#else
+ if (!vtep4_check (&vxm->vtep_table, b0, ip40, &last_vtep4))
+#endif
+ goto exit; /* no local VTEP for VXLAN packet */
+ }
+ else
+ {
+ if (!vtep6_check (&vxm->vtep_table, b0, ip60, &last_vtep6))
+ goto exit; /* no local VTEP for VXLAN packet */
+ }
+
+ flags0 = b0->flags;
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_udp0 |= udp0->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip4)
+ ip_len0 = clib_net_to_host_u16 (ip40->length);
+ else
+ ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp0))
+ {
+ if (is_ip4)
+ flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
+ else
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+
+ if (is_ip4)
+ {
+ error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
+ }
+
+ next0 = error0 ?
+ IP_VXLAN_BYPASS_NEXT_DROP : IP_VXLAN_BYPASS_NEXT_VXLAN;
+ b0->error = error0 ? error_node->errors[error0] : 0;
+
+ /* vxlan-input node expect current at VXLAN header */
+ if (is_ip4)
+ vlib_buffer_advance (b0,
+ sizeof (ip4_header_t) +
+ sizeof (udp_header_t));
+ else
+ vlib_buffer_advance (b0,
+ sizeof (ip6_header_t) +
+ sizeof (udp_header_t));
+
+ exit:
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (ip4_vxlan_bypass_node) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_vxlan_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
+}
+
+VLIB_REGISTER_NODE (ip4_vxlan_bypass_node) =
+{
+ .name = "ip4-vxlan-bypass",
+ .vector_size = sizeof (u32),
+ .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
+ .next_nodes = {
+ [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
+ [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan4-input",
+ },
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_forward_next_trace,
+};
+
+
+/* Dummy init function to get us linked in. */
+static clib_error_t *
+ip4_vxlan_bypass_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip4_vxlan_bypass_init);
+
+VLIB_NODE_FN (ip6_vxlan_bypass_node) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip_vxlan_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
+}
+
+VLIB_REGISTER_NODE (ip6_vxlan_bypass_node) =
+{
+ .name = "ip6-vxlan-bypass",
+ .vector_size = sizeof (u32),
+ .n_next_nodes = IP_VXLAN_BYPASS_N_NEXT,
+ .next_nodes = {
+ [IP_VXLAN_BYPASS_NEXT_DROP] = "error-drop",
+ [IP_VXLAN_BYPASS_NEXT_VXLAN] = "vxlan6-input",
+ },
+ .format_buffer = format_ip6_header,
+ .format_trace = format_ip6_forward_next_trace,
+};
+
+
+/* Dummy init function to get us linked in. */
+static clib_error_t *
+ip6_vxlan_bypass_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip6_vxlan_bypass_init);
+
+#define foreach_vxlan_flow_input_next \
+_(DROP, "error-drop") \
+_(L2_INPUT, "l2-input")
+
+typedef enum
+{
+#define _(s,n) VXLAN_FLOW_NEXT_##s,
+ foreach_vxlan_flow_input_next
+#undef _
+ VXLAN_FLOW_N_NEXT,
+} vxlan_flow_input_next_t;
+
+#define foreach_vxlan_flow_error \
+ _(NONE, "no error") \
+ _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \
+ _(IP_HEADER_ERROR, "Rx ip header errors") \
+ _(UDP_CHECKSUM_ERROR, "Rx udp checksum errors") \
+ _(UDP_LENGTH_ERROR, "Rx udp length errors")
+
+typedef enum
+{
+#define _(f,s) VXLAN_FLOW_ERROR_##f,
+ foreach_vxlan_flow_error
+#undef _
+ VXLAN_FLOW_N_ERROR,
+} vxlan_flow_error_t;
+
+static char *vxlan_flow_error_strings[] = {
+#define _(n,s) s,
+ foreach_vxlan_flow_error
+#undef _
+};
+
+
+static_always_inline u8
+vxlan_validate_udp_csum (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ u32 flags = b->flags;
+ enum
+ { offset =
+ sizeof (ip4_header_t) + sizeof (udp_header_t) + sizeof (vxlan_header_t),
+ };
+
+ /* Verify UDP checksum */
+ if ((flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
+ {
+ vlib_buffer_advance (b, -offset);
+ flags = ip4_tcp_udp_validate_checksum (vm, b);
+ vlib_buffer_advance (b, offset);
+ }
+
+ return (flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+}
+
+static_always_inline u8
+vxlan_check_udp_csum (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ ip4_vxlan_header_t *hdr = vlib_buffer_get_current (b) - sizeof *hdr;
+ udp_header_t *udp = &hdr->udp;
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ u8 good_csum = (b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0 ||
+ udp->checksum == 0;
+
+ return !good_csum;
+}
+
+static_always_inline u8
+vxlan_check_ip (vlib_buffer_t * b, u16 payload_len)
+{
+ ip4_vxlan_header_t *hdr = vlib_buffer_get_current (b) - sizeof *hdr;
+ u16 ip_len = clib_net_to_host_u16 (hdr->ip4.length);
+ u16 expected = payload_len + sizeof *hdr;
+ return ip_len > expected || hdr->ip4.ttl == 0
+ || hdr->ip4.ip_version_and_header_length != 0x45;
+}
+
+static_always_inline u8
+vxlan_check_ip_udp_len (vlib_buffer_t * b)
+{
+ ip4_vxlan_header_t *hdr = vlib_buffer_get_current (b) - sizeof *hdr;
+ u16 ip_len = clib_net_to_host_u16 (hdr->ip4.length);
+ u16 udp_len = clib_net_to_host_u16 (hdr->udp.length);
+ return udp_len > ip_len;
+}
+
+static_always_inline u8
+vxlan_err_code (u8 ip_err0, u8 udp_err0, u8 csum_err0)
+{
+ u8 error0 = VXLAN_FLOW_ERROR_NONE;
+ if (ip_err0)
+ error0 = VXLAN_FLOW_ERROR_IP_HEADER_ERROR;
+ if (udp_err0)
+ error0 = VXLAN_FLOW_ERROR_UDP_LENGTH_ERROR;
+ if (csum_err0)
+ error0 = VXLAN_FLOW_ERROR_UDP_CHECKSUM_ERROR;
+ return error0;
+}
+
+VLIB_NODE_FN (vxlan4_flow_input_node) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * f)
+{
+ enum
+ { payload_offset = sizeof (ip4_vxlan_header_t) };
+
+ vxlan_main_t *vxm = &vxlan_main;
+ vnet_interface_main_t *im = &vnet_main.interface_main;
+ vlib_combined_counter_main_t *rx_counter[VXLAN_FLOW_N_NEXT] = {
+ [VXLAN_FLOW_NEXT_DROP] =
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_DROP,
+ [VXLAN_FLOW_NEXT_L2_INPUT] =
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ };
+ u32 thread_index = vlib_get_thread_index ();
+
+ u32 *from = vlib_frame_vector_args (f);
+ u32 n_left_from = f->n_vectors;
+ u32 next_index = VXLAN_FLOW_NEXT_L2_INPUT;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next, *to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 3 && n_left_to_next > 3)
+ {
+ u32 bi0 = to_next[0] = from[0];
+ u32 bi1 = to_next[1] = from[1];
+ u32 bi2 = to_next[2] = from[2];
+ u32 bi3 = to_next[3] = from[3];
+ from += 4;
+ n_left_from -= 4;
+ to_next += 4;
+ n_left_to_next -= 4;
+
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ vlib_buffer_t *b1 = vlib_get_buffer (vm, bi1);
+ vlib_buffer_t *b2 = vlib_get_buffer (vm, bi2);
+ vlib_buffer_t *b3 = vlib_get_buffer (vm, bi3);
+
+ vlib_buffer_advance (b0, payload_offset);
+ vlib_buffer_advance (b1, payload_offset);
+ vlib_buffer_advance (b2, payload_offset);
+ vlib_buffer_advance (b3, payload_offset);
+
+ u16 len0 = vlib_buffer_length_in_chain (vm, b0);
+ u16 len1 = vlib_buffer_length_in_chain (vm, b1);
+ u16 len2 = vlib_buffer_length_in_chain (vm, b2);
+ u16 len3 = vlib_buffer_length_in_chain (vm, b3);
+
+ u32 next0 = VXLAN_FLOW_NEXT_L2_INPUT, next1 =
+ VXLAN_FLOW_NEXT_L2_INPUT, next2 =
+ VXLAN_FLOW_NEXT_L2_INPUT, next3 = VXLAN_FLOW_NEXT_L2_INPUT;
+
+ u8 ip_err0 = vxlan_check_ip (b0, len0);
+ u8 ip_err1 = vxlan_check_ip (b1, len1);
+ u8 ip_err2 = vxlan_check_ip (b2, len2);
+ u8 ip_err3 = vxlan_check_ip (b3, len3);
+ u8 ip_err = ip_err0 | ip_err1 | ip_err2 | ip_err3;
+
+ u8 udp_err0 = vxlan_check_ip_udp_len (b0);
+ u8 udp_err1 = vxlan_check_ip_udp_len (b1);
+ u8 udp_err2 = vxlan_check_ip_udp_len (b2);
+ u8 udp_err3 = vxlan_check_ip_udp_len (b3);
+ u8 udp_err = udp_err0 | udp_err1 | udp_err2 | udp_err3;
+
+ u8 csum_err0 = vxlan_check_udp_csum (vm, b0);
+ u8 csum_err1 = vxlan_check_udp_csum (vm, b1);
+ u8 csum_err2 = vxlan_check_udp_csum (vm, b2);
+ u8 csum_err3 = vxlan_check_udp_csum (vm, b3);
+ u8 csum_err = csum_err0 | csum_err1 | csum_err2 | csum_err3;
+
+ if (PREDICT_FALSE (csum_err))
+ {
+ if (csum_err0)
+ csum_err0 = !vxlan_validate_udp_csum (vm, b0);
+ if (csum_err1)
+ csum_err1 = !vxlan_validate_udp_csum (vm, b1);
+ if (csum_err2)
+ csum_err2 = !vxlan_validate_udp_csum (vm, b2);
+ if (csum_err3)
+ csum_err3 = !vxlan_validate_udp_csum (vm, b3);
+ csum_err = csum_err0 | csum_err1 | csum_err2 | csum_err3;
+ }
+
+ if (PREDICT_FALSE (ip_err || udp_err || csum_err))
+ {
+ if (ip_err0 || udp_err0 || csum_err0)
+ {
+ next0 = VXLAN_FLOW_NEXT_DROP;
+ u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
+ b0->error = node->errors[error0];
+ }
+ if (ip_err1 || udp_err1 || csum_err1)
+ {
+ next1 = VXLAN_FLOW_NEXT_DROP;
+ u8 error1 = vxlan_err_code (ip_err1, udp_err1, csum_err1);
+ b1->error = node->errors[error1];
+ }
+ if (ip_err2 || udp_err2 || csum_err2)
+ {
+ next2 = VXLAN_FLOW_NEXT_DROP;
+ u8 error2 = vxlan_err_code (ip_err2, udp_err2, csum_err2);
+ b2->error = node->errors[error2];
+ }
+ if (ip_err3 || udp_err3 || csum_err3)
+ {
+ next3 = VXLAN_FLOW_NEXT_DROP;
+ u8 error3 = vxlan_err_code (ip_err3, udp_err3, csum_err3);
+ b3->error = node->errors[error3];
+ }
+ }
+
+ vnet_update_l2_len (b0);
+ vnet_update_l2_len (b1);
+ vnet_update_l2_len (b2);
+ vnet_update_l2_len (b3);
+
+ ASSERT (b0->flow_id != 0);
+ ASSERT (b1->flow_id != 0);
+ ASSERT (b2->flow_id != 0);
+ ASSERT (b3->flow_id != 0);
+
+ u32 t_index0 = b0->flow_id - vxm->flow_id_start;
+ u32 t_index1 = b1->flow_id - vxm->flow_id_start;
+ u32 t_index2 = b2->flow_id - vxm->flow_id_start;
+ u32 t_index3 = b3->flow_id - vxm->flow_id_start;
+
+ vxlan_tunnel_t *t0 = &vxm->tunnels[t_index0];
+ vxlan_tunnel_t *t1 = &vxm->tunnels[t_index1];
+ vxlan_tunnel_t *t2 = &vxm->tunnels[t_index2];
+ vxlan_tunnel_t *t3 = &vxm->tunnels[t_index3];
+
+ /* flow id consumed */
+ b0->flow_id = 0;
+ b1->flow_id = 0;
+ b2->flow_id = 0;
+ b3->flow_id = 0;
+
+ u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ t0->sw_if_index;
+ u32 sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX] =
+ t1->sw_if_index;
+ u32 sw_if_index2 = vnet_buffer (b2)->sw_if_index[VLIB_RX] =
+ t2->sw_if_index;
+ u32 sw_if_index3 = vnet_buffer (b3)->sw_if_index[VLIB_RX] =
+ t3->sw_if_index;
+
+ vlib_increment_combined_counter (rx_counter[next0], thread_index,
+ sw_if_index0, 1, len0);
+ vlib_increment_combined_counter (rx_counter[next1], thread_index,
+ sw_if_index1, 1, len1);
+ vlib_increment_combined_counter (rx_counter[next2], thread_index,
+ sw_if_index2, 1, len2);
+ vlib_increment_combined_counter (rx_counter[next3], thread_index,
+ sw_if_index3, 1, len3);
+
+ u32 flags = b0->flags | b1->flags | b2->flags | b3->flags;
+
+ if (PREDICT_FALSE (flags & VLIB_BUFFER_IS_TRACED))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ vxlan_rx_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof *tr);
+ u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->tunnel_index = t_index0;
+ tr->vni = t0->vni;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ vxlan_rx_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof *tr);
+ u8 error1 = vxlan_err_code (ip_err1, udp_err1, csum_err1);
+ tr->next_index = next1;
+ tr->error = error1;
+ tr->tunnel_index = t_index1;
+ tr->vni = t1->vni;
+ }
+ if (b2->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ vxlan_rx_trace_t *tr =
+ vlib_add_trace (vm, node, b2, sizeof *tr);
+ u8 error2 = vxlan_err_code (ip_err2, udp_err2, csum_err2);
+ tr->next_index = next2;
+ tr->error = error2;
+ tr->tunnel_index = t_index2;
+ tr->vni = t2->vni;
+ }
+ if (b3->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ vxlan_rx_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof *tr);
+ u8 error3 = vxlan_err_code (ip_err3, udp_err3, csum_err3);
+ tr->next_index = next3;
+ tr->error = error3;
+ tr->tunnel_index = t_index3;
+ tr->vni = t3->vni;
+ }
+ }
+ vlib_validate_buffer_enqueue_x4
+ (vm, node, next_index, to_next, n_left_to_next,
+ bi0, bi1, bi2, bi3, next0, next1, next2, next3);
+ }
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0 = to_next[0] = from[0];
+ from++;
+ n_left_from--;
+ to_next++;
+ n_left_to_next--;
+
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ vlib_buffer_advance (b0, payload_offset);
+
+ u16 len0 = vlib_buffer_length_in_chain (vm, b0);
+ u32 next0 = VXLAN_FLOW_NEXT_L2_INPUT;
+
+ u8 ip_err0 = vxlan_check_ip (b0, len0);
+ u8 udp_err0 = vxlan_check_ip_udp_len (b0);
+ u8 csum_err0 = vxlan_check_udp_csum (vm, b0);
+
+ if (csum_err0)
+ csum_err0 = !vxlan_validate_udp_csum (vm, b0);
+ if (ip_err0 || udp_err0 || csum_err0)
+ {
+ next0 = VXLAN_FLOW_NEXT_DROP;
+ u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
+ b0->error = node->errors[error0];
+ }
+
+ vnet_update_l2_len (b0);
+
+ ASSERT (b0->flow_id != 0);
+ u32 t_index0 = b0->flow_id - vxm->flow_id_start;
+ vxlan_tunnel_t *t0 = &vxm->tunnels[t_index0];
+ b0->flow_id = 0;
+
+ u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ t0->sw_if_index;
+ vlib_increment_combined_counter (rx_counter[next0], thread_index,
+ sw_if_index0, 1, len0);
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_rx_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof *tr);
+ u8 error0 = vxlan_err_code (ip_err0, udp_err0, csum_err0);
+ tr->next_index = next0;
+ tr->error = error0;
+ tr->tunnel_index = t_index0;
+ tr->vni = t0->vni;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return f->n_vectors;
+}
+
+#ifndef CLIB_MULTIARCH_VARIANT
+VLIB_REGISTER_NODE (vxlan4_flow_input_node) = {
+ .name = "vxlan-flow-input",
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_vxlan_rx_trace,
+
+ .n_errors = VXLAN_FLOW_N_ERROR,
+ .error_strings = vxlan_flow_error_strings,
+
+ .n_next_nodes = VXLAN_FLOW_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [VXLAN_FLOW_NEXT_##s] = n,
+ foreach_vxlan_flow_input_next
+#undef _
+ },
+};
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/vxlan/dir.dox b/src/plugins/vxlan/dir.dox
new file mode 100644
index 00000000000..31a9e2b6112
--- /dev/null
+++ b/src/plugins/vxlan/dir.dox
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+@dir
+@brief VXLAN Code.
+
+This directory contains source code to support VXLAN.
+
+*/
+/*? %%clicmd:group_label VXLAN CLI %% ?*/
diff --git a/src/plugins/vxlan/encap.c b/src/plugins/vxlan/encap.c
new file mode 100644
index 00000000000..98464d809ba
--- /dev/null
+++ b/src/plugins/vxlan/encap.c
@@ -0,0 +1,538 @@
+
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/interface_output.h>
+#include <vxlan/vxlan.h>
+#include <vnet/qos/qos_types.h>
+#include <vnet/adj/rewrite.h>
+
+/* Statistics (not all errors) */
+#define foreach_vxlan_encap_error \
+_(ENCAPSULATED, "good packets encapsulated")
+
+static char *vxlan_encap_error_strings[] = {
+#define _(sym,string) string,
+ foreach_vxlan_encap_error
+#undef _
+};
+
+typedef enum
+{
+#define _(sym,str) VXLAN_ENCAP_ERROR_##sym,
+ foreach_vxlan_encap_error
+#undef _
+ VXLAN_ENCAP_N_ERROR,
+} vxlan_encap_error_t;
+
+typedef enum
+{
+ VXLAN_ENCAP_NEXT_DROP,
+ VXLAN_ENCAP_N_NEXT,
+} vxlan_encap_next_t;
+
+typedef struct
+{
+ u32 tunnel_index;
+ u32 vni;
+} vxlan_encap_trace_t;
+
+#ifndef CLIB_MARCH_VARIANT
+u8 *
+format_vxlan_encap_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vxlan_encap_trace_t *t = va_arg (*args, vxlan_encap_trace_t *);
+
+ s = format (s, "VXLAN encap to vxlan_tunnel%d vni %d",
+ t->tunnel_index, t->vni);
+ return s;
+}
+#endif
+
+always_inline uword
+vxlan_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *from_frame, u8 is_ip4)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ vxlan_main_t *vxm = &vxlan_main;
+ vnet_main_t *vnm = vxm->vnet_main;
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vlib_combined_counter_main_t *tx_counter =
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX;
+ u32 pkts_encapsulated = 0;
+ u32 thread_index = vlib_get_thread_index ();
+ u32 sw_if_index0 = 0, sw_if_index1 = 0;
+ u32 next0 = 0, next1 = 0;
+ vxlan_tunnel_t *t0 = NULL, *t1 = NULL;
+ index_t dpoi_idx0 = INDEX_INVALID, dpoi_idx1 = INDEX_INVALID;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
+ vlib_buffer_t **b = bufs;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ next_index = node->cached_next_index;
+
+ STATIC_ASSERT_SIZEOF (ip6_vxlan_header_t, 56);
+ STATIC_ASSERT_SIZEOF (ip4_vxlan_header_t, 36);
+
+ u8 const underlay_hdr_len = is_ip4 ?
+ sizeof (ip4_vxlan_header_t) : sizeof (ip6_vxlan_header_t);
+ u16 const l3_len = is_ip4 ? sizeof (ip4_header_t) : sizeof (ip6_header_t);
+ u32 const outer_packet_csum_offload_flags =
+ is_ip4 ? (VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_VXLAN) :
+ (VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_VXLAN);
+
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ /* Prefetch next iteration. */
+ {
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ vlib_prefetch_buffer_header (b[3], LOAD);
+
+ CLIB_PREFETCH (b[2]->data - CLIB_CACHE_LINE_BYTES,
+ 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b[3]->data - CLIB_CACHE_LINE_BYTES,
+ 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ u32 bi0 = to_next[0] = from[0];
+ u32 bi1 = to_next[1] = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ vlib_buffer_t *b0 = b[0];
+ vlib_buffer_t *b1 = b[1];
+ b += 2;
+
+ u32 flow_hash0 = vnet_l2_compute_flow_hash (b0);
+ u32 flow_hash1 = vnet_l2_compute_flow_hash (b1);
+
+ /* Get next node index and adj index from tunnel next_dpo */
+ if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX])
+ {
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ vnet_hw_interface_t *hi0 =
+ vnet_get_sup_hw_interface (vnm, sw_if_index0);
+ t0 = &vxm->tunnels[hi0->dev_instance];
+ /* Note: change to always set next0 if it may set to drop */
+ next0 = t0->next_dpo.dpoi_next_node;
+ dpoi_idx0 = t0->next_dpo.dpoi_index;
+ }
+
+ /* Get next node index and adj index from tunnel next_dpo */
+ if (sw_if_index1 != vnet_buffer (b1)->sw_if_index[VLIB_TX])
+ {
+ if (sw_if_index0 == vnet_buffer (b1)->sw_if_index[VLIB_TX])
+ {
+ sw_if_index1 = sw_if_index0;
+ t1 = t0;
+ next1 = next0;
+ dpoi_idx1 = dpoi_idx0;
+ }
+ else
+ {
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+ vnet_hw_interface_t *hi1 =
+ vnet_get_sup_hw_interface (vnm, sw_if_index1);
+ t1 = &vxm->tunnels[hi1->dev_instance];
+ /* Note: change to always set next1 if it may set to drop */
+ next1 = t1->next_dpo.dpoi_next_node;
+ dpoi_idx1 = t1->next_dpo.dpoi_index;
+ }
+ }
+
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpoi_idx0;
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX] = dpoi_idx1;
+
+ ASSERT (t0->rewrite_header.data_bytes == underlay_hdr_len);
+ ASSERT (t1->rewrite_header.data_bytes == underlay_hdr_len);
+ vnet_rewrite_two_headers (*t0, *t1, vlib_buffer_get_current (b0),
+ vlib_buffer_get_current (b1),
+ underlay_hdr_len);
+
+ vlib_buffer_advance (b0, -underlay_hdr_len);
+ vlib_buffer_advance (b1, -underlay_hdr_len);
+
+ u32 len0 = vlib_buffer_length_in_chain (vm, b0);
+ u32 len1 = vlib_buffer_length_in_chain (vm, b1);
+ u16 payload_l0 = clib_host_to_net_u16 (len0 - l3_len);
+ u16 payload_l1 = clib_host_to_net_u16 (len1 - l3_len);
+
+ void *underlay0 = vlib_buffer_get_current (b0);
+ void *underlay1 = vlib_buffer_get_current (b1);
+
+ ip4_header_t *ip4_0, *ip4_1;
+ qos_bits_t ip4_0_tos = 0, ip4_1_tos = 0;
+ ip6_header_t *ip6_0, *ip6_1;
+ udp_header_t *udp0, *udp1;
+ u8 *l3_0, *l3_1;
+ if (is_ip4)
+ {
+ ip4_vxlan_header_t *hdr0 = underlay0;
+ ip4_vxlan_header_t *hdr1 = underlay1;
+
+ /* Fix the IP4 checksum and length */
+ ip4_0 = &hdr0->ip4;
+ ip4_1 = &hdr1->ip4;
+ ip4_0->length = clib_host_to_net_u16 (len0);
+ ip4_1->length = clib_host_to_net_u16 (len1);
+
+ if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_QOS_DATA_VALID))
+ {
+ ip4_0_tos = vnet_buffer2 (b0)->qos.bits;
+ ip4_0->tos = ip4_0_tos;
+ }
+ if (PREDICT_FALSE (b1->flags & VNET_BUFFER_F_QOS_DATA_VALID))
+ {
+ ip4_1_tos = vnet_buffer2 (b1)->qos.bits;
+ ip4_1->tos = ip4_1_tos;
+ }
+
+ l3_0 = (u8 *) ip4_0;
+ l3_1 = (u8 *) ip4_1;
+ udp0 = &hdr0->udp;
+ udp1 = &hdr1->udp;
+ }
+ else /* ipv6 */
+ {
+ ip6_vxlan_header_t *hdr0 = underlay0;
+ ip6_vxlan_header_t *hdr1 = underlay1;
+
+ /* Fix IP6 payload length */
+ ip6_0 = &hdr0->ip6;
+ ip6_1 = &hdr1->ip6;
+ ip6_0->payload_length = payload_l0;
+ ip6_1->payload_length = payload_l1;
+
+ l3_0 = (u8 *) ip6_0;
+ l3_1 = (u8 *) ip6_1;
+ udp0 = &hdr0->udp;
+ udp1 = &hdr1->udp;
+ }
+
+ /* Fix UDP length and set source port */
+ udp0->length = payload_l0;
+ udp0->src_port = flow_hash0;
+ udp1->length = payload_l1;
+ udp1->src_port = flow_hash1;
+
+ if (b0->flags & VNET_BUFFER_F_OFFLOAD)
+ {
+ vnet_buffer2 (b0)->outer_l3_hdr_offset = l3_0 - b0->data;
+ vnet_buffer2 (b0)->outer_l4_hdr_offset = (u8 *) udp0 - b0->data;
+ vnet_buffer_offload_flags_set (b0,
+ outer_packet_csum_offload_flags);
+ }
+ /* IPv4 checksum only */
+ else if (is_ip4)
+ {
+ ip_csum_t sum0 = ip4_0->checksum;
+ sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t,
+ length /* changed member */);
+ if (PREDICT_FALSE (ip4_0_tos))
+ {
+ sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t,
+ tos /* changed member */);
+ }
+ ip4_0->checksum = ip_csum_fold (sum0);
+ }
+ /* IPv6 UDP checksum is mandatory */
+ else
+ {
+ int bogus = 0;
+
+ udp0->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6_0, &bogus);
+ ASSERT (bogus == 0);
+ if (udp0->checksum == 0)
+ udp0->checksum = 0xffff;
+ }
+
+ if (b1->flags & VNET_BUFFER_F_OFFLOAD)
+ {
+ vnet_buffer2 (b1)->outer_l3_hdr_offset = l3_1 - b1->data;
+ vnet_buffer2 (b1)->outer_l4_hdr_offset = (u8 *) udp1 - b1->data;
+ vnet_buffer_offload_flags_set (b1,
+ outer_packet_csum_offload_flags);
+ }
+ /* IPv4 checksum only */
+ else if (is_ip4)
+ {
+ ip_csum_t sum1 = ip4_1->checksum;
+ sum1 = ip_csum_update (sum1, 0, ip4_1->length, ip4_header_t,
+ length /* changed member */);
+ if (PREDICT_FALSE (ip4_1_tos))
+ {
+ sum1 = ip_csum_update (sum1, 0, ip4_1_tos, ip4_header_t,
+ tos /* changed member */);
+ }
+ ip4_1->checksum = ip_csum_fold (sum1);
+ }
+ /* IPv6 UDP checksum is mandatory */
+ else
+ {
+ int bogus = 0;
+
+ udp1->checksum = ip6_tcp_udp_icmp_compute_checksum
+ (vm, b1, ip6_1, &bogus);
+ ASSERT (bogus == 0);
+ if (udp1->checksum == 0)
+ udp1->checksum = 0xffff;
+ }
+
+ /* save inner packet flow_hash for load-balance node */
+ vnet_buffer (b0)->ip.flow_hash = flow_hash0;
+ vnet_buffer (b1)->ip.flow_hash = flow_hash1;
+
+ if (sw_if_index0 == sw_if_index1)
+ {
+ vlib_increment_combined_counter (tx_counter, thread_index,
+ sw_if_index0, 2, len0 + len1);
+ }
+ else
+ {
+ vlib_increment_combined_counter (tx_counter, thread_index,
+ sw_if_index0, 1, len0);
+ vlib_increment_combined_counter (tx_counter, thread_index,
+ sw_if_index1, 1, len1);
+ }
+ pkts_encapsulated += 2;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->tunnel_index = t0 - vxm->tunnels;
+ tr->vni = t0->vni;
+ }
+
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->tunnel_index = t1 - vxm->tunnels;
+ tr->vni = t1->vni;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0 = to_next[0] = from[0];
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ vlib_buffer_t *b0 = b[0];
+ b += 1;
+
+ u32 flow_hash0 = vnet_l2_compute_flow_hash (b0);
+
+ /* Get next node index and adj index from tunnel next_dpo */
+ if (sw_if_index0 != vnet_buffer (b0)->sw_if_index[VLIB_TX])
+ {
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ vnet_hw_interface_t *hi0 =
+ vnet_get_sup_hw_interface (vnm, sw_if_index0);
+ t0 = &vxm->tunnels[hi0->dev_instance];
+ /* Note: change to always set next0 if it may be set to drop */
+ next0 = t0->next_dpo.dpoi_next_node;
+ dpoi_idx0 = t0->next_dpo.dpoi_index;
+ }
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpoi_idx0;
+
+ ASSERT (t0->rewrite_header.data_bytes == underlay_hdr_len);
+ vnet_rewrite_one_header (*t0, vlib_buffer_get_current (b0),
+ underlay_hdr_len);
+
+ vlib_buffer_advance (b0, -underlay_hdr_len);
+ void *underlay0 = vlib_buffer_get_current (b0);
+
+ u32 len0 = vlib_buffer_length_in_chain (vm, b0);
+ u16 payload_l0 = clib_host_to_net_u16 (len0 - l3_len);
+
+ udp_header_t *udp0;
+ ip4_header_t *ip4_0;
+ qos_bits_t ip4_0_tos = 0;
+ ip6_header_t *ip6_0;
+ u8 *l3_0;
+ if (is_ip4)
+ {
+ ip4_vxlan_header_t *hdr = underlay0;
+
+ /* Fix the IP4 checksum and length */
+ ip4_0 = &hdr->ip4;
+ ip4_0->length = clib_host_to_net_u16 (len0);
+
+ if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_QOS_DATA_VALID))
+ {
+ ip4_0_tos = vnet_buffer2 (b0)->qos.bits;
+ ip4_0->tos = ip4_0_tos;
+ }
+
+ l3_0 = (u8 *) ip4_0;
+ udp0 = &hdr->udp;
+ }
+ else /* ip6 path */
+ {
+ ip6_vxlan_header_t *hdr = underlay0;
+
+ /* Fix IP6 payload length */
+ ip6_0 = &hdr->ip6;
+ ip6_0->payload_length = payload_l0;
+
+ l3_0 = (u8 *) ip6_0;
+ udp0 = &hdr->udp;
+ }
+
+ /* Fix UDP length and set source port */
+ udp0->length = payload_l0;
+ udp0->src_port = flow_hash0;
+
+ if (b0->flags & VNET_BUFFER_F_OFFLOAD)
+ {
+ vnet_buffer2 (b0)->outer_l3_hdr_offset = l3_0 - b0->data;
+ vnet_buffer2 (b0)->outer_l4_hdr_offset = (u8 *) udp0 - b0->data;
+ vnet_buffer_offload_flags_set (b0,
+ outer_packet_csum_offload_flags);
+ }
+ /* IPv4 checksum only */
+ else if (is_ip4)
+ {
+ ip_csum_t sum0 = ip4_0->checksum;
+ sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t,
+ length /* changed member */);
+ if (PREDICT_FALSE (ip4_0_tos))
+ {
+ sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t,
+ tos /* changed member */);
+ }
+ ip4_0->checksum = ip_csum_fold (sum0);
+ }
+ /* IPv6 UDP checksum is mandatory */
+ else
+ {
+ int bogus = 0;
+
+ udp0->checksum = ip6_tcp_udp_icmp_compute_checksum
+ (vm, b0, ip6_0, &bogus);
+ ASSERT (bogus == 0);
+ if (udp0->checksum == 0)
+ udp0->checksum = 0xffff;
+ }
+
+ /* reuse inner packet flow_hash for load-balance node */
+ vnet_buffer (b0)->ip.flow_hash = flow_hash0;
+
+ vlib_increment_combined_counter (tx_counter, thread_index,
+ sw_if_index0, 1, len0);
+ pkts_encapsulated++;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ vxlan_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->tunnel_index = t0 - vxm->tunnels;
+ tr->vni = t0->vni;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Do we still need this now that tunnel tx stats is kept? */
+ vlib_node_increment_counter (vm, node->node_index,
+ VXLAN_ENCAP_ERROR_ENCAPSULATED,
+ pkts_encapsulated);
+
+ return from_frame->n_vectors;
+}
+
+VLIB_NODE_FN (vxlan4_encap_node) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ /* Disable chksum offload as setup overhead in tx node is not worthwhile
+ for ip4 header checksum only, unless udp checksum is also required */
+ return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 1);
+}
+
+VLIB_NODE_FN (vxlan6_encap_node) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame)
+{
+ /* Enable checksum offload for ip6 as udp checksum is mandatory, */
+ return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 0);
+}
+
+VLIB_REGISTER_NODE (vxlan4_encap_node) = {
+ .name = "vxlan4-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vxlan_encap_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(vxlan_encap_error_strings),
+ .error_strings = vxlan_encap_error_strings,
+ .n_next_nodes = VXLAN_ENCAP_N_NEXT,
+ .next_nodes = {
+ [VXLAN_ENCAP_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (vxlan6_encap_node) = {
+ .name = "vxlan6-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_vxlan_encap_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(vxlan_encap_error_strings),
+ .error_strings = vxlan_encap_error_strings,
+ .n_next_nodes = VXLAN_ENCAP_N_NEXT,
+ .next_nodes = {
+ [VXLAN_ENCAP_NEXT_DROP] = "error-drop",
+ },
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/vxlan/plugin.c b/src/plugins/vxlan/plugin.c
new file mode 100644
index 00000000000..eae82830524
--- /dev/null
+++ b/src/plugins/vxlan/plugin.c
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "VxLan Tunnels",
+};
diff --git a/src/plugins/vxlan/vxlan.api b/src/plugins/vxlan/vxlan.api
new file mode 100644
index 00000000000..9c617ff22c8
--- /dev/null
+++ b/src/plugins/vxlan/vxlan.api
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "2.1.0";
+
+import "vnet/interface_types.api";
+import "vnet/ip/ip_types.api";
+
+/** \brief Create or delete a VXLAN tunnel
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - Use 1 to create the tunnel, 0 to remove it
+ @param instance - optional unique custom device instance, else ~0.
+ @param src_address - Source IP address
+ @param dst_address - Destination IP address, can be multicast
+ @param mcast_sw_if_index - Interface for multicast destination
+ @param encap_vrf_id - Encap route table FIB index
+ @param decap_next_index - index of decap next graph node
+ @param vni - The VXLAN Network Identifier, uint24
+*/
+define vxlan_add_del_tunnel
+{
+ option deprecated;
+
+ u32 client_index;
+ u32 context;
+ bool is_add [default=true];
+ u32 instance; /* If non-~0, specifies a custom dev instance */
+ vl_api_address_t src_address;
+ vl_api_address_t dst_address;
+ vl_api_interface_index_t mcast_sw_if_index;
+ u32 encap_vrf_id;
+ u32 decap_next_index;
+ u32 vni;
+};
+
+/** \brief Create or delete a VXLAN tunnel
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - Use 1 to create the tunnel, 0 to remove it
+ @param instance - optional unique custom device instance, else ~0.
+ @param src_address - Source IP address
+ @param dst_address - Destination IP address, can be multicast
+ @param src_port - Source UDP port. It is not included in sent packets. Used only for port registration
+ @param dst_port - Destination UDP port
+ @param mcast_sw_if_index - Interface for multicast destination
+ @param encap_vrf_id - Encap route table FIB index
+ @param decap_next_index - index of decap next graph node
+ @param vni - The VXLAN Network Identifier, uint24
+*/
+define vxlan_add_del_tunnel_v2
+{
+ option deprecated;
+
+ u32 client_index;
+ u32 context;
+ bool is_add [default=true];
+ u32 instance [default=0xffffffff]; /* If non-~0, specifies a custom dev instance */
+ vl_api_address_t src_address;
+ vl_api_address_t dst_address;
+ u16 src_port;
+ u16 dst_port;
+ vl_api_interface_index_t mcast_sw_if_index;
+ u32 encap_vrf_id;
+ u32 decap_next_index;
+ u32 vni;
+};
+
+/** \brief Create or delete a VXLAN tunnel
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - Use 1 to create the tunnel, 0 to remove it
+ @param instance - optional unique custom device instance, else ~0.
+ @param src_address - Source IP address
+ @param dst_address - Destination IP address, can be multicast
+ @param src_port - Source UDP port. It is not included in sent packets. Used only for port registration
+ @param dst_port - Destination UDP port
+ @param mcast_sw_if_index - Interface for multicast destination
+ @param encap_vrf_id - Encap route table FIB index
+ @param decap_next_index - index of decap next graph node
+ @param vni - The VXLAN Network Identifier, uint24
+ @param is_l3 - if true, create the interface in L3 mode, w/o MAC
+*/
+define vxlan_add_del_tunnel_v3
+{
+ u32 client_index;
+ u32 context;
+ bool is_add [default=true];
+ u32 instance [default=0xffffffff]; /* If non-~0, specifies a custom dev instance */
+ vl_api_address_t src_address;
+ vl_api_address_t dst_address;
+ u16 src_port;
+ u16 dst_port;
+ vl_api_interface_index_t mcast_sw_if_index;
+ u32 encap_vrf_id;
+ u32 decap_next_index;
+ u32 vni;
+ bool is_l3 [default=false];
+};
+
+define vxlan_add_del_tunnel_reply
+{
+ option deprecated;
+
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+define vxlan_add_del_tunnel_v2_reply
+{
+ option deprecated;
+
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+define vxlan_add_del_tunnel_v3_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+define vxlan_tunnel_dump
+{
+ option deprecated;
+
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+};
+define vxlan_tunnel_v2_dump
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+};
+
+define vxlan_tunnel_details
+{
+ option deprecated;
+
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u32 instance;
+ vl_api_address_t src_address;
+ vl_api_address_t dst_address;
+ vl_api_interface_index_t mcast_sw_if_index;
+ u32 encap_vrf_id;
+ u32 decap_next_index;
+ u32 vni;
+};
+define vxlan_tunnel_v2_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u32 instance;
+ vl_api_address_t src_address;
+ vl_api_address_t dst_address;
+ u16 src_port;
+ u16 dst_port;
+ vl_api_interface_index_t mcast_sw_if_index;
+ u32 encap_vrf_id;
+ u32 decap_next_index;
+ u32 vni;
+};
+
+/** \brief Interface set vxlan-bypass request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface used to reach neighbor
+ @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass
+ @param enable - if non-zero enable, else disable
+*/
+autoreply define sw_interface_set_vxlan_bypass
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ bool is_ipv6;
+ bool enable [default=true];
+};
+
+/** \brief Offload vxlan rx request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param hw_if_index - rx hw interface
+ @param sw_if_index - vxlan interface to offload
+ @param enable - if non-zero enable, else disable
+*/
+autoreply define vxlan_offload_rx
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t hw_if_index;
+ vl_api_interface_index_t sw_if_index;
+ bool enable [default=true];
+};
diff --git a/src/plugins/vxlan/vxlan.c b/src/plugins/vxlan/vxlan.c
new file mode 100644
index 00000000000..0885550d257
--- /dev/null
+++ b/src/plugins/vxlan/vxlan.c
@@ -0,0 +1,1331 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vxlan/vxlan.h>
+#include <vnet/ip/format.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry_track.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/adj/rewrite.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/interface.h>
+#include <vnet/flow/flow.h>
+#include <vnet/udp/udp_local.h>
+#include <vlib/vlib.h>
+
+/**
+ * @file
+ * @brief VXLAN.
+ *
+ * VXLAN provides the features needed to allow L2 bridge domains (BDs)
+ * to span multiple servers. This is done by building an L2 overlay on
+ * top of an L3 network underlay using VXLAN tunnels.
+ *
+ * This makes it possible for servers to be co-located in the same data
+ * center or be separated geographically as long as they are reachable
+ * through the underlay L3 network.
+ *
+ * You can refer to this kind of L2 overlay bridge domain as a VXLAN
+ * (Virtual eXtensible VLAN) segment.
+ */
+
+
+vxlan_main_t vxlan_main;
+
+static u32
+vxlan_eth_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hi, u32 flags)
+{
+ /* nothing for now */
+ return 0;
+}
+
+static clib_error_t *
+vxlan_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+ u32 frame_size)
+{
+ /* nothing for now */
+ return 0;
+}
+
+static u8 *
+format_decap_next (u8 * s, va_list * args)
+{
+ u32 next_index = va_arg (*args, u32);
+
+ if (next_index == VXLAN_INPUT_NEXT_DROP)
+ return format (s, "drop");
+ else
+ return format (s, "index %d", next_index);
+ return s;
+}
+
+u8 *
+format_vxlan_tunnel (u8 * s, va_list * args)
+{
+ vxlan_tunnel_t *t = va_arg (*args, vxlan_tunnel_t *);
+
+ s = format (s,
+ "[%d] instance %d src %U dst %U src_port %d dst_port %d vni %d "
+ "fib-idx %d sw-if-idx %d ",
+ t->dev_instance, t->user_instance, format_ip46_address, &t->src,
+ IP46_TYPE_ANY, format_ip46_address, &t->dst, IP46_TYPE_ANY,
+ t->src_port, t->dst_port, t->vni, t->encap_fib_index,
+ t->sw_if_index);
+
+ s = format (s, "encap-dpo-idx %d ", t->next_dpo.dpoi_index);
+
+ if (PREDICT_FALSE (t->decap_next_index != VXLAN_INPUT_NEXT_L2_INPUT))
+ s = format (s, "decap-next-%U ", format_decap_next, t->decap_next_index);
+
+ if (PREDICT_FALSE (ip46_address_is_multicast (&t->dst)))
+ s = format (s, "mcast-sw-if-idx %d ", t->mcast_sw_if_index);
+
+ if (t->flow_index != ~0)
+ s = format (s, "flow-index %d [%U]", t->flow_index,
+ format_flow_enabled_hw, t->flow_index);
+
+ return s;
+}
+
+static u8 *
+format_vxlan_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ vxlan_main_t *vxm = &vxlan_main;
+ vxlan_tunnel_t *t;
+
+ if (dev_instance == ~0)
+ return format (s, "<cached-unused>");
+
+ if (dev_instance >= vec_len (vxm->tunnels))
+ return format (s, "<improperly-referenced>");
+
+ t = pool_elt_at_index (vxm->tunnels, dev_instance);
+
+ return format (s, "vxlan_tunnel%d", t->user_instance);
+}
+
+static clib_error_t *
+vxlan_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
+ VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+ return /* no error */ 0;
+}
+
+VNET_DEVICE_CLASS (vxlan_device_class, static) = {
+ .name = "VXLAN",
+ .format_device_name = format_vxlan_name,
+ .format_tx_trace = format_vxlan_encap_trace,
+ .admin_up_down_function = vxlan_interface_admin_up_down,
+};
+
+static u8 *
+format_vxlan_header_with_length (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ s = format (s, "unimplemented dev %u", dev_instance);
+ return s;
+}
+
+VNET_HW_INTERFACE_CLASS (vxlan_hw_class) = {
+ .name = "VXLAN",
+ .format_header = format_vxlan_header_with_length,
+ .build_rewrite = default_build_rewrite,
+};
+
+static void
+vxlan_tunnel_restack_dpo (vxlan_tunnel_t * t)
+{
+ u8 is_ip4 = ip46_address_is_ip4 (&t->dst);
+ dpo_id_t dpo = DPO_INVALID;
+ fib_forward_chain_type_t forw_type = is_ip4 ?
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP4 : FIB_FORW_CHAIN_TYPE_UNICAST_IP6;
+
+ fib_entry_contribute_forwarding (t->fib_entry_index, forw_type, &dpo);
+
+ /* vxlan uses the payload hash as the udp source port
+ * hence the packet's hash is unknown
+ * skip single bucket load balance dpo's */
+ while (DPO_LOAD_BALANCE == dpo.dpoi_type)
+ {
+ const load_balance_t *lb;
+ const dpo_id_t *choice;
+
+ lb = load_balance_get (dpo.dpoi_index);
+ if (lb->lb_n_buckets > 1)
+ break;
+
+ choice = load_balance_get_bucket_i (lb, 0);
+
+ if (DPO_RECEIVE == choice->dpoi_type)
+ dpo_copy (&dpo, drop_dpo_get (choice->dpoi_proto));
+ else
+ dpo_copy (&dpo, choice);
+ }
+
+ u32 encap_index = is_ip4 ?
+ vxlan4_encap_node.index : vxlan6_encap_node.index;
+ dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
+ dpo_reset (&dpo);
+}
+
+static vxlan_tunnel_t *
+vxlan_tunnel_from_fib_node (fib_node_t * node)
+{
+ ASSERT (FIB_NODE_TYPE_VXLAN_TUNNEL == node->fn_type);
+ return ((vxlan_tunnel_t *) (((char *) node) -
+ STRUCT_OFFSET_OF (vxlan_tunnel_t, node)));
+}
+
+/**
+ * Function definition to backwalk a FIB node -
+ * Here we will restack the new dpo of VXLAN DIP to encap node.
+ */
+static fib_node_back_walk_rc_t
+vxlan_tunnel_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
+{
+ vxlan_tunnel_restack_dpo (vxlan_tunnel_from_fib_node (node));
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/**
+ * Function definition to get a FIB node from its index
+ */
+static fib_node_t *
+vxlan_tunnel_fib_node_get (fib_node_index_t index)
+{
+ vxlan_tunnel_t *t;
+ vxlan_main_t *vxm = &vxlan_main;
+
+ t = pool_elt_at_index (vxm->tunnels, index);
+
+ return (&t->node);
+}
+
+/**
+ * Function definition to inform the FIB node that its last lock has gone.
+ */
+static void
+vxlan_tunnel_last_lock_gone (fib_node_t * node)
+{
+ /*
+ * The VXLAN tunnel is a root of the graph. As such
+ * it never has children and thus is never locked.
+ */
+ ASSERT (0);
+}
+
+/*
+ * Virtual function table registered by VXLAN tunnels
+ * for participation in the FIB object graph.
+ */
+const static fib_node_vft_t vxlan_vft = {
+ .fnv_get = vxlan_tunnel_fib_node_get,
+ .fnv_last_lock = vxlan_tunnel_last_lock_gone,
+ .fnv_back_walk = vxlan_tunnel_back_walk,
+};
+
+#define foreach_copy_field \
+ _ (vni) \
+ _ (mcast_sw_if_index) \
+ _ (encap_fib_index) \
+ _ (decap_next_index) \
+ _ (src) \
+ _ (dst) \
+ _ (src_port) \
+ _ (dst_port)
+
+static void
+vxlan_rewrite (vxlan_tunnel_t * t, bool is_ip6)
+{
+ union
+ {
+ ip4_vxlan_header_t h4;
+ ip6_vxlan_header_t h6;
+ } h;
+ int len = is_ip6 ? sizeof h.h6 : sizeof h.h4;
+
+ udp_header_t *udp;
+ vxlan_header_t *vxlan;
+ /* Fixed portion of the (outer) ip header */
+
+ clib_memset (&h, 0, sizeof (h));
+ if (!is_ip6)
+ {
+ ip4_header_t *ip = &h.h4.ip4;
+ udp = &h.h4.udp, vxlan = &h.h4.vxlan;
+ ip->ip_version_and_header_length = 0x45;
+ ip->ttl = 254;
+ ip->protocol = IP_PROTOCOL_UDP;
+
+ ip->src_address = t->src.ip4;
+ ip->dst_address = t->dst.ip4;
+
+ /* we fix up the ip4 header length and checksum after-the-fact */
+ ip->checksum = ip4_header_checksum (ip);
+ }
+ else
+ {
+ ip6_header_t *ip = &h.h6.ip6;
+ udp = &h.h6.udp, vxlan = &h.h6.vxlan;
+ ip->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (6 << 28);
+ ip->hop_limit = 255;
+ ip->protocol = IP_PROTOCOL_UDP;
+
+ ip->src_address = t->src.ip6;
+ ip->dst_address = t->dst.ip6;
+ }
+
+ /* UDP header, randomize src port on something, maybe? */
+ udp->src_port = clib_host_to_net_u16 (t->src_port);
+ udp->dst_port = clib_host_to_net_u16 (t->dst_port);
+
+ /* VXLAN header */
+ vnet_set_vni_and_flags (vxlan, t->vni);
+ vnet_rewrite_set_data (*t, &h, len);
+}
+
+static bool
+vxlan_decap_next_is_valid (vxlan_main_t * vxm, u32 is_ip6,
+ u32 decap_next_index)
+{
+ vlib_main_t *vm = vxm->vlib_main;
+ u32 input_idx = (!is_ip6) ?
+ vxlan4_input_node.index : vxlan6_input_node.index;
+ vlib_node_runtime_t *r = vlib_node_get_runtime (vm, input_idx);
+
+ return decap_next_index < r->n_next_nodes;
+}
+
+typedef CLIB_PACKED(union
+{
+ struct
+ {
+ fib_node_index_t mfib_entry_index;
+ adj_index_t mcast_adj_index;
+ };
+ u64 as_u64;
+}) mcast_shared_t;
+
+static inline mcast_shared_t
+mcast_shared_get (ip46_address_t * ip)
+{
+ ASSERT (ip46_address_is_multicast (ip));
+ uword *p = hash_get_mem (vxlan_main.mcast_shared, ip);
+ ALWAYS_ASSERT (p);
+ mcast_shared_t ret = {.as_u64 = *p };
+ return ret;
+}
+
+static inline void
+mcast_shared_add (ip46_address_t * dst, fib_node_index_t mfei, adj_index_t ai)
+{
+ mcast_shared_t new_ep = {
+ .mcast_adj_index = ai,
+ .mfib_entry_index = mfei,
+ };
+
+ hash_set_mem_alloc (&vxlan_main.mcast_shared, dst, new_ep.as_u64);
+}
+
+static inline void
+mcast_shared_remove (ip46_address_t * dst)
+{
+ mcast_shared_t ep = mcast_shared_get (dst);
+
+ adj_unlock (ep.mcast_adj_index);
+ mfib_table_entry_delete_index (ep.mfib_entry_index, MFIB_SOURCE_VXLAN);
+
+ hash_unset_mem_free (&vxlan_main.mcast_shared, dst);
+}
+
+int vnet_vxlan_add_del_tunnel
+ (vnet_vxlan_add_del_tunnel_args_t * a, u32 * sw_if_indexp)
+{
+ vxlan_main_t *vxm = &vxlan_main;
+ vnet_main_t *vnm = vxm->vnet_main;
+ vxlan_decap_info_t *p;
+ u32 sw_if_index = ~0;
+ vxlan4_tunnel_key_t key4;
+ vxlan6_tunnel_key_t key6;
+ u32 is_ip6 = a->is_ip6;
+ vlib_main_t *vm = vlib_get_main ();
+ u8 hw_addr[6];
+
+ /* Set udp-ports */
+ if (a->src_port == 0)
+ a->src_port = is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan;
+
+ if (a->dst_port == 0)
+ a->dst_port = is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan;
+
+ int not_found;
+ if (!is_ip6)
+ {
+ /* ip4 mcast is indexed by mcast addr only */
+ key4.key[0] = ip46_address_is_multicast (&a->dst) ?
+ a->dst.ip4.as_u32 :
+ a->dst.ip4.as_u32 | (((u64) a->src.ip4.as_u32) << 32);
+ key4.key[1] = ((u64) clib_host_to_net_u16 (a->src_port) << 48) |
+ (((u64) a->encap_fib_index) << 32) |
+ clib_host_to_net_u32 (a->vni << 8);
+ not_found =
+ clib_bihash_search_inline_16_8 (&vxm->vxlan4_tunnel_by_key, &key4);
+ p = (void *) &key4.value;
+ }
+ else
+ {
+ key6.key[0] = a->dst.ip6.as_u64[0];
+ key6.key[1] = a->dst.ip6.as_u64[1];
+ key6.key[2] = (((u64) clib_host_to_net_u16 (a->src_port) << 48) |
+ ((u64) a->encap_fib_index) << 32) |
+ clib_host_to_net_u32 (a->vni << 8);
+ not_found =
+ clib_bihash_search_inline_24_8 (&vxm->vxlan6_tunnel_by_key, &key6);
+ p = (void *) &key6.value;
+ }
+
+ if (not_found)
+ p = 0;
+
+ if (a->is_add)
+ {
+ l2input_main_t *l2im = &l2input_main;
+ u32 dev_instance; /* real dev instance tunnel index */
+ u32 user_instance; /* request and actual instance number */
+
+ /* adding a tunnel: tunnel must not already exist */
+ if (p)
+ return VNET_API_ERROR_TUNNEL_EXIST;
+
+ /*if not set explicitly, default to l2 */
+ if (a->decap_next_index == ~0)
+ a->decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT;
+ if (!vxlan_decap_next_is_valid (vxm, is_ip6, a->decap_next_index))
+ return VNET_API_ERROR_INVALID_DECAP_NEXT;
+
+ vxlan_tunnel_t *t;
+ pool_get_aligned (vxm->tunnels, t, CLIB_CACHE_LINE_BYTES);
+ clib_memset (t, 0, sizeof (*t));
+ dev_instance = t - vxm->tunnels;
+
+ /* copy from arg structure */
+#define _(x) t->x = a->x;
+ foreach_copy_field;
+#undef _
+
+ vxlan_rewrite (t, is_ip6);
+ /*
+ * Reconcile the real dev_instance and a possible requested instance.
+ */
+ user_instance = a->instance;
+ if (user_instance == ~0)
+ user_instance = dev_instance;
+ if (hash_get (vxm->instance_used, user_instance))
+ {
+ pool_put (vxm->tunnels, t);
+ return VNET_API_ERROR_INSTANCE_IN_USE;
+ }
+
+ hash_set (vxm->instance_used, user_instance, 1);
+
+ t->dev_instance = dev_instance; /* actual */
+ t->user_instance = user_instance; /* name */
+ t->flow_index = ~0;
+
+ if (a->is_l3)
+ t->hw_if_index =
+ vnet_register_interface (vnm, vxlan_device_class.index, dev_instance,
+ vxlan_hw_class.index, dev_instance);
+ else
+ {
+ vnet_eth_interface_registration_t eir = {};
+ f64 now = vlib_time_now (vm);
+ u32 rnd;
+ rnd = (u32) (now * 1e6);
+ rnd = random_u32 (&rnd);
+ memcpy (hw_addr + 2, &rnd, sizeof (rnd));
+ hw_addr[0] = 2;
+ hw_addr[1] = 0xfe;
+
+ eir.dev_class_index = vxlan_device_class.index;
+ eir.dev_instance = dev_instance;
+ eir.address = hw_addr;
+ eir.cb.flag_change = vxlan_eth_flag_change;
+ eir.cb.set_max_frame_size = vxlan_eth_set_max_frame_size;
+ t->hw_if_index = vnet_eth_register_interface (vnm, &eir);
+ }
+
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
+
+ /* Set vxlan tunnel output node */
+ u32 encap_index = !is_ip6 ?
+ vxlan4_encap_node.index : vxlan6_encap_node.index;
+ vnet_set_interface_output_node (vnm, t->hw_if_index, encap_index);
+
+ t->sw_if_index = sw_if_index = hi->sw_if_index;
+
+ /* copy the key */
+ int add_failed;
+ if (is_ip6)
+ {
+ key6.value = (u64) dev_instance;
+ add_failed = clib_bihash_add_del_24_8 (&vxm->vxlan6_tunnel_by_key,
+ &key6, 1 /*add */ );
+ }
+ else
+ {
+ vxlan_decap_info_t di = {.sw_if_index = t->sw_if_index, };
+ if (ip46_address_is_multicast (&t->dst))
+ di.local_ip = t->src.ip4;
+ else
+ di.next_index = t->decap_next_index;
+ key4.value = di.as_u64;
+ add_failed = clib_bihash_add_del_16_8 (&vxm->vxlan4_tunnel_by_key,
+ &key4, 1 /*add */ );
+ }
+
+ if (add_failed)
+ {
+ if (a->is_l3)
+ vnet_delete_hw_interface (vnm, t->hw_if_index);
+ else
+ ethernet_delete_interface (vnm, t->hw_if_index);
+ hash_unset (vxm->instance_used, t->user_instance);
+ pool_put (vxm->tunnels, t);
+ return VNET_API_ERROR_INVALID_REGISTRATION;
+ }
+
+ vec_validate_init_empty (vxm->tunnel_index_by_sw_if_index, sw_if_index,
+ ~0);
+ vxm->tunnel_index_by_sw_if_index[sw_if_index] = dev_instance;
+
+ /* setup l2 input config with l2 feature and bd 0 to drop packet */
+ vec_validate (l2im->configs, sw_if_index);
+ l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
+ l2im->configs[sw_if_index].bd_index = 0;
+
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
+ vnet_sw_interface_set_flags (vnm, sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ fib_node_init (&t->node, FIB_NODE_TYPE_VXLAN_TUNNEL);
+ fib_prefix_t tun_dst_pfx;
+ vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
+
+ fib_protocol_t fp = fib_ip_proto (is_ip6);
+ fib_prefix_from_ip46_addr (fp, &t->dst, &tun_dst_pfx);
+ if (!ip46_address_is_multicast (&t->dst))
+ {
+ /* Unicast tunnel -
+ * source the FIB entry for the tunnel's destination
+ * and become a child thereof. The tunnel will then get poked
+ * when the forwarding for the entry updates, and the tunnel can
+ * re-stack accordingly
+ */
+ vtep_addr_ref (&vxm->vtep_table, t->encap_fib_index, &t->src);
+ t->fib_entry_index = fib_entry_track (t->encap_fib_index,
+ &tun_dst_pfx,
+ FIB_NODE_TYPE_VXLAN_TUNNEL,
+ dev_instance,
+ &t->sibling_index);
+ vxlan_tunnel_restack_dpo (t);
+ }
+ else
+ {
+ /* Multicast tunnel -
+ * as the same mcast group can be used for multiple mcast tunnels
+ * with different VNIs, create the output fib adjacency only if
+ * it does not already exist
+ */
+ if (vtep_addr_ref (&vxm->vtep_table,
+ t->encap_fib_index, &t->dst) == 1)
+ {
+ fib_node_index_t mfei;
+ adj_index_t ai;
+ fib_route_path_t path = {
+ .frp_proto = fib_proto_to_dpo (fp),
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = 0xffffffff,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
+ };
+ const mfib_prefix_t mpfx = {
+ .fp_proto = fp,
+ .fp_len = (is_ip6 ? 128 : 32),
+ .fp_grp_addr = tun_dst_pfx.fp_addr,
+ };
+
+ /*
+ * Setup the (*,G) to receive traffic on the mcast group
+ * - the forwarding interface is for-us
+ * - the accepting interface is that from the API
+ */
+ mfib_table_entry_path_update (t->encap_fib_index, &mpfx,
+ MFIB_SOURCE_VXLAN,
+ MFIB_ENTRY_FLAG_NONE, &path);
+
+ path.frp_sw_if_index = a->mcast_sw_if_index;
+ path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
+ path.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
+ mfei = mfib_table_entry_path_update (
+ t->encap_fib_index, &mpfx, MFIB_SOURCE_VXLAN,
+ MFIB_ENTRY_FLAG_NONE, &path);
+
+ /*
+ * Create the mcast adjacency to send traffic to the group
+ */
+ ai = adj_mcast_add_or_lock (fp,
+ fib_proto_to_link (fp),
+ a->mcast_sw_if_index);
+
+ /*
+ * create a new end-point
+ */
+ mcast_shared_add (&t->dst, mfei, ai);
+ }
+
+ dpo_id_t dpo = DPO_INVALID;
+ mcast_shared_t ep = mcast_shared_get (&t->dst);
+
+ /* Stack shared mcast dst mac addr rewrite on encap */
+ dpo_set (&dpo, DPO_ADJACENCY_MCAST,
+ fib_proto_to_dpo (fp), ep.mcast_adj_index);
+
+ dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
+ dpo_reset (&dpo);
+ flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
+ }
+
+ vnet_get_sw_interface (vnet_get_main (), sw_if_index)->flood_class =
+ flood_class;
+ }
+ else
+ {
+ /* deleting a tunnel: tunnel must exist */
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ u32 instance = is_ip6 ? key6.value :
+ vxm->tunnel_index_by_sw_if_index[p->sw_if_index];
+ vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, instance);
+
+ sw_if_index = t->sw_if_index;
+ vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
+
+ vxm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
+
+ if (!is_ip6)
+ clib_bihash_add_del_16_8 (&vxm->vxlan4_tunnel_by_key, &key4,
+ 0 /*del */ );
+ else
+ clib_bihash_add_del_24_8 (&vxm->vxlan6_tunnel_by_key, &key6,
+ 0 /*del */ );
+
+ if (!ip46_address_is_multicast (&t->dst))
+ {
+ if (t->flow_index != ~0)
+ vnet_flow_del (vnm, t->flow_index);
+
+ vtep_addr_unref (&vxm->vtep_table, t->encap_fib_index, &t->src);
+ fib_entry_untrack (t->fib_entry_index, t->sibling_index);
+ }
+ else if (vtep_addr_unref (&vxm->vtep_table,
+ t->encap_fib_index, &t->dst) == 0)
+ {
+ mcast_shared_remove (&t->dst);
+ }
+
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, t->hw_if_index);
+ if (hw->dev_class_index == vxlan_device_class.index)
+ vnet_delete_hw_interface (vnm, t->hw_if_index);
+ else
+ ethernet_delete_interface (vnm, t->hw_if_index);
+ hash_unset (vxm->instance_used, t->user_instance);
+
+ fib_node_deinit (&t->node);
+ pool_put (vxm->tunnels, t);
+ }
+
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
+
+ if (a->is_add)
+ {
+ /* register udp ports */
+ if (!is_ip6 && !udp_is_valid_dst_port (a->src_port, 1))
+ udp_register_dst_port (vxm->vlib_main, a->src_port,
+ vxlan4_input_node.index, 1);
+ if (is_ip6 && !udp_is_valid_dst_port (a->src_port, 0))
+ udp_register_dst_port (vxm->vlib_main, a->src_port,
+ vxlan6_input_node.index, 0);
+ }
+
+ return 0;
+}
+
+static uword
+get_decap_next_for_node (u32 node_index, u32 ipv4_set)
+{
+ vxlan_main_t *vxm = &vxlan_main;
+ vlib_main_t *vm = vxm->vlib_main;
+ uword input_node = (ipv4_set) ? vxlan4_input_node.index :
+ vxlan6_input_node.index;
+
+ return vlib_node_add_next (vm, input_node, node_index);
+}
+
+static uword
+unformat_decap_next (unformat_input_t * input, va_list * args)
+{
+ u32 *result = va_arg (*args, u32 *);
+ u32 ipv4_set = va_arg (*args, int);
+ vxlan_main_t *vxm = &vxlan_main;
+ vlib_main_t *vm = vxm->vlib_main;
+ u32 node_index;
+ u32 tmp;
+
+ if (unformat (input, "l2"))
+ *result = VXLAN_INPUT_NEXT_L2_INPUT;
+ else if (unformat (input, "node %U", unformat_vlib_node, vm, &node_index))
+ *result = get_decap_next_for_node (node_index, ipv4_set);
+ else if (unformat (input, "%d", &tmp))
+ *result = tmp;
+ else
+ return 0;
+ return 1;
+}
+
+static clib_error_t *
+vxlan_add_del_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip46_address_t src = ip46_address_initializer, dst =
+ ip46_address_initializer;
+ u8 is_add = 1;
+ u8 src_set = 0;
+ u8 dst_set = 0;
+ u8 grp_set = 0;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ u8 is_l3 = 0;
+ u32 instance = ~0;
+ u32 encap_fib_index = 0;
+ u32 mcast_sw_if_index = ~0;
+ u32 decap_next_index = VXLAN_INPUT_NEXT_L2_INPUT;
+ u32 vni = 0;
+ u32 src_port = 0;
+ u32 dst_port = 0;
+ u32 table_id;
+ clib_error_t *parse_error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ {
+ is_add = 0;
+ }
+ else if (unformat (line_input, "instance %d", &instance))
+ ;
+ else if (unformat (line_input, "src %U",
+ unformat_ip46_address, &src, IP46_TYPE_ANY))
+ {
+ src_set = 1;
+ ip46_address_is_ip4 (&src) ? (ipv4_set = 1) : (ipv6_set = 1);
+ }
+ else if (unformat (line_input, "dst %U",
+ unformat_ip46_address, &dst, IP46_TYPE_ANY))
+ {
+ dst_set = 1;
+ ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1);
+ }
+ else if (unformat (line_input, "group %U %U",
+ unformat_ip46_address, &dst, IP46_TYPE_ANY,
+ unformat_vnet_sw_interface,
+ vnet_get_main (), &mcast_sw_if_index))
+ {
+ grp_set = dst_set = 1;
+ ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1);
+ }
+ else if (unformat (line_input, "encap-vrf-id %d", &table_id))
+ {
+ encap_fib_index =
+ fib_table_find (fib_ip_proto (ipv6_set), table_id);
+ }
+ else if (unformat (line_input, "l3"))
+ is_l3 = 1;
+ else if (unformat (line_input, "decap-next %U", unformat_decap_next,
+ &decap_next_index, ipv4_set))
+ ;
+ else if (unformat (line_input, "vni %d", &vni))
+ ;
+ else if (unformat (line_input, "src_port %d", &src_port))
+ ;
+ else if (unformat (line_input, "dst_port %d", &dst_port))
+ ;
+ else
+ {
+ parse_error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (parse_error)
+ return parse_error;
+
+ if (is_l3 && decap_next_index == VXLAN_INPUT_NEXT_L2_INPUT)
+ {
+ vlib_node_t *node = vlib_get_node_by_name (
+ vm, (u8 *) (ipv4_set ? "ip4-input" : "ip6-input"));
+ decap_next_index = get_decap_next_for_node (node->index, ipv4_set);
+ }
+
+ if (encap_fib_index == ~0)
+ return clib_error_return (0, "nonexistent encap-vrf-id %d", table_id);
+
+ if (src_set == 0)
+ return clib_error_return (0, "tunnel src address not specified");
+
+ if (dst_set == 0)
+ return clib_error_return (0, "tunnel dst address not specified");
+
+ if (grp_set && !ip46_address_is_multicast (&dst))
+ return clib_error_return (0, "tunnel group address not multicast");
+
+ if (grp_set == 0 && ip46_address_is_multicast (&dst))
+ return clib_error_return (0, "dst address must be unicast");
+
+ if (grp_set && mcast_sw_if_index == ~0)
+ return clib_error_return (0, "tunnel nonexistent multicast device");
+
+ if (ipv4_set && ipv6_set)
+ return clib_error_return (0, "both IPv4 and IPv6 addresses specified");
+
+ if (ip46_address_cmp (&src, &dst) == 0)
+ return clib_error_return (0, "src and dst addresses are identical");
+
+ if (decap_next_index == ~0)
+ return clib_error_return (0, "next node not found");
+
+ if (vni == 0)
+ return clib_error_return (0, "vni not specified");
+
+ if (vni >> 24)
+ return clib_error_return (0, "vni %d out of range", vni);
+
+ vnet_vxlan_add_del_tunnel_args_t a = { .is_add = is_add,
+ .is_ip6 = ipv6_set,
+ .is_l3 = is_l3,
+ .instance = instance,
+#define _(x) .x = x,
+ foreach_copy_field
+#undef _
+ };
+
+ u32 tunnel_sw_if_index;
+ int rv = vnet_vxlan_add_del_tunnel (&a, &tunnel_sw_if_index);
+
+ switch (rv)
+ {
+ case 0:
+ if (is_add)
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
+ vnet_get_main (), tunnel_sw_if_index);
+ break;
+
+ case VNET_API_ERROR_TUNNEL_EXIST:
+ return clib_error_return (0, "tunnel already exists...");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "tunnel does not exist...");
+
+ case VNET_API_ERROR_INSTANCE_IN_USE:
+ return clib_error_return (0, "Instance is in use");
+
+ default:
+ return clib_error_return
+ (0, "vnet_vxlan_add_del_tunnel returned %d", rv);
+ }
+
+ return 0;
+}
+
+/*?
+ * Add or delete a VXLAN Tunnel.
+ *
+ * VXLAN provides the features needed to allow L2 bridge domains (BDs)
+ * to span multiple servers. This is done by building an L2 overlay on
+ * top of an L3 network underlay using VXLAN tunnels.
+ *
+ * This makes it possible for servers to be co-located in the same data
+ * center or be separated geographically as long as they are reachable
+ * through the underlay L3 network.
+ *
+ * You can refer to this kind of L2 overlay bridge domain as a VXLAN
+ * (Virtual eXtensible VLAN) segment.
+ *
+ * @cliexpar
+ * Example of how to create a VXLAN Tunnel:
+ * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 encap-vrf-id
+ 7}
+ * Example of how to create a VXLAN Tunnel with a known name, vxlan_tunnel42:
+ * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 instance 42}
+ * Example of how to create a multicast VXLAN Tunnel with a known name,
+ vxlan_tunnel23:
+ * @cliexcmd{create vxlan tunnel src 10.0.3.1 group 239.1.1.1
+ GigabitEthernet0/8/0 instance 23}
+ * Example of how to create a VXLAN Tunnel with custom udp-ports:
+ * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 src_port
+ 59000 dst_port 59001}
+ * Example of how to delete a VXLAN Tunnel:
+ * @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 del}
+ ?*/
+VLIB_CLI_COMMAND (create_vxlan_tunnel_command, static) = {
+ .path = "create vxlan tunnel",
+ .short_help =
+ "create vxlan tunnel src <local-vtep-addr>"
+ " {dst <remote-vtep-addr>|group <mcast-vtep-addr> <intf-name>} vni <nn>"
+ " [instance <id>]"
+ " [encap-vrf-id <nn>] [decap-next [l2|node <name>]] [del] [l3]"
+ " [src_port <local-vtep-udp-port>] [dst_port <remote-vtep-udp-port>]",
+ .function = vxlan_add_del_tunnel_command_fn,
+};
+
+static clib_error_t *
+show_vxlan_tunnel_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vxlan_main_t *vxm = &vxlan_main;
+ vxlan_tunnel_t *t;
+ int raw = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "raw"))
+ raw = 1;
+ else
+ return clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, input);
+ }
+
+ if (pool_elts (vxm->tunnels) == 0)
+ vlib_cli_output (vm, "No vxlan tunnels configured...");
+
+ pool_foreach (t, vxm->tunnels)
+ {
+ vlib_cli_output (vm, "%U", format_vxlan_tunnel, t);
+ }
+
+ if (raw)
+ {
+ vlib_cli_output (vm, "Raw IPv4 Hash Table:\n%U\n",
+ format_bihash_16_8, &vxm->vxlan4_tunnel_by_key,
+ 1 /* verbose */ );
+ vlib_cli_output (vm, "Raw IPv6 Hash Table:\n%U\n",
+ format_bihash_24_8, &vxm->vxlan6_tunnel_by_key,
+ 1 /* verbose */ );
+ }
+
+ return 0;
+}
+
+/*?
+ * Display all the VXLAN Tunnel entries.
+ *
+ * @cliexpar
+ * Example of how to display the VXLAN Tunnel entries:
+ * @cliexstart{show vxlan tunnel}
+ * [0] src 10.0.3.1 dst 10.0.3.3 src_port 4789 dst_port 4789 vni 13
+ encap_fib_index 0 sw_if_index 5 decap_next l2
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (show_vxlan_tunnel_command, static) = {
+ .path = "show vxlan tunnel",
+ .short_help = "show vxlan tunnel [raw]",
+ .function = show_vxlan_tunnel_command_fn,
+};
+
+
+void
+vnet_int_vxlan_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
+{
+ vxlan_main_t *vxm = &vxlan_main;
+
+ if (pool_is_free_index (vxm->vnet_main->interface_main.sw_interfaces,
+ sw_if_index))
+ return;
+
+ is_enable = ! !is_enable;
+
+ if (is_ip6)
+ {
+ if (clib_bitmap_get (vxm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index)
+ != is_enable)
+ {
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-vxlan-bypass",
+ sw_if_index, is_enable, 0, 0);
+ vxm->bm_ip6_bypass_enabled_by_sw_if =
+ clib_bitmap_set (vxm->bm_ip6_bypass_enabled_by_sw_if,
+ sw_if_index, is_enable);
+ }
+ }
+ else
+ {
+ if (clib_bitmap_get (vxm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index)
+ != is_enable)
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-vxlan-bypass",
+ sw_if_index, is_enable, 0, 0);
+ vxm->bm_ip4_bypass_enabled_by_sw_if =
+ clib_bitmap_set (vxm->bm_ip4_bypass_enabled_by_sw_if,
+ sw_if_index, is_enable);
+ }
+ }
+}
+
+
+static clib_error_t *
+set_ip_vxlan_bypass (u32 is_ip6,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index, is_enable;
+
+ sw_if_index = ~0;
+ is_enable = 1;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user
+ (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else if (unformat (line_input, "del"))
+ is_enable = 0;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == sw_if_index)
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ vnet_int_vxlan_bypass_mode (sw_if_index, is_ip6, is_enable);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+set_ip4_vxlan_bypass (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ return set_ip_vxlan_bypass (0, input, cmd);
+}
+
+/*?
+ * This command adds the 'ip4-vxlan-bypass' graph node for a given interface.
+ * By adding the IPv4 vxlan-bypass graph node to an interface, the node checks
+ * for and validate input vxlan packet and bypass ip4-lookup, ip4-local,
+ * ip4-udp-lookup nodes to speedup vxlan packet forwarding. This node will
+ * cause extra overhead to for non-vxlan packets which is kept at a minimum.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before ip4-vxlan-bypass is enabled:
+ * @cliexstart{show vlib graph ip4-vxlan-bypass}
+ * Name Next Previous
+ * ip4-vxlan-bypass error-drop [0]
+ * vxlan4-input [1]
+ * ip4-lookup [2]
+ * @cliexend
+ *
+ * Example of how to enable ip4-vxlan-bypass on an interface:
+ * @cliexcmd{set interface ip vxlan-bypass GigabitEthernet2/0/0}
+ *
+ * Example of graph node after ip4-vxlan-bypass is enabled:
+ * @cliexstart{show vlib graph ip4-vxlan-bypass}
+ * Name Next Previous
+ * ip4-vxlan-bypass error-drop [0] ip4-input
+ * vxlan4-input [1] ip4-input-no-checksum
+ * ip4-lookup [2]
+ * @cliexend
+ *
+ * Example of how to display the feature enabled on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ * ...
+ * ipv4 unicast:
+ * ip4-vxlan-bypass
+ * ip4-lookup
+ * ...
+ * @cliexend
+ *
+ * Example of how to disable ip4-vxlan-bypass on an interface:
+ * @cliexcmd{set interface ip vxlan-bypass GigabitEthernet2/0/0 del}
+ * @endparblock
+?*/
+VLIB_CLI_COMMAND (set_interface_ip_vxlan_bypass_command, static) = {
+ .path = "set interface ip vxlan-bypass",
+ .function = set_ip4_vxlan_bypass,
+ .short_help = "set interface ip vxlan-bypass <interface> [del]",
+};
+
+static clib_error_t *
+set_ip6_vxlan_bypass (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ return set_ip_vxlan_bypass (1, input, cmd);
+}
+
+/*?
+ * This command adds the 'ip6-vxlan-bypass' graph node for a given interface.
+ * By adding the IPv6 vxlan-bypass graph node to an interface, the node checks
+ * for and validate input vxlan packet and bypass ip6-lookup, ip6-local,
+ * ip6-udp-lookup nodes to speedup vxlan packet forwarding. This node will
+ * cause extra overhead to for non-vxlan packets which is kept at a minimum.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before ip6-vxlan-bypass is enabled:
+ * @cliexstart{show vlib graph ip6-vxlan-bypass}
+ * Name Next Previous
+ * ip6-vxlan-bypass error-drop [0]
+ * vxlan6-input [1]
+ * ip6-lookup [2]
+ * @cliexend
+ *
+ * Example of how to enable ip6-vxlan-bypass on an interface:
+ * @cliexcmd{set interface ip6 vxlan-bypass GigabitEthernet2/0/0}
+ *
+ * Example of graph node after ip6-vxlan-bypass is enabled:
+ * @cliexstart{show vlib graph ip6-vxlan-bypass}
+ * Name Next Previous
+ * ip6-vxlan-bypass error-drop [0] ip6-input
+ * vxlan6-input [1] ip4-input-no-checksum
+ * ip6-lookup [2]
+ * @cliexend
+ *
+ * Example of how to display the feature enabled on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ * ...
+ * ipv6 unicast:
+ * ip6-vxlan-bypass
+ * ip6-lookup
+ * ...
+ * @cliexend
+ *
+ * Example of how to disable ip6-vxlan-bypass on an interface:
+ * @cliexcmd{set interface ip6 vxlan-bypass GigabitEthernet2/0/0 del}
+ * @endparblock
+?*/
+VLIB_CLI_COMMAND (set_interface_ip6_vxlan_bypass_command, static) = {
+ .path = "set interface ip6 vxlan-bypass",
+ .function = set_ip6_vxlan_bypass,
+ .short_help = "set interface ip6 vxlan-bypass <interface> [del]",
+};
+
+int
+vnet_vxlan_add_del_rx_flow (u32 hw_if_index, u32 t_index, int is_add)
+{
+ vxlan_main_t *vxm = &vxlan_main;
+ vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index);
+ vnet_main_t *vnm = vnet_get_main ();
+ if (is_add)
+ {
+ if (t->flow_index == ~0)
+ {
+ vxlan_main_t *vxm = &vxlan_main;
+ vnet_flow_t flow = {
+ .actions =
+ VNET_FLOW_ACTION_REDIRECT_TO_NODE | VNET_FLOW_ACTION_MARK |
+ VNET_FLOW_ACTION_BUFFER_ADVANCE,
+ .mark_flow_id = t->dev_instance + vxm->flow_id_start,
+ .redirect_node_index = vxlan4_flow_input_node.index,
+ .buffer_advance = sizeof (ethernet_header_t),
+ .type = VNET_FLOW_TYPE_IP4_VXLAN,
+ .ip4_vxlan = {
+ .protocol.prot = IP_PROTOCOL_UDP,
+ .src_addr.addr = t->dst.ip4,
+ .dst_addr.addr = t->src.ip4,
+ .src_addr.mask.as_u32 = ~0,
+ .dst_addr.mask.as_u32 = ~0,
+ .dst_port.port = t->src_port,
+ .dst_port.mask = 0xFF,
+ .vni = t->vni,
+ }
+ ,
+ };
+ vnet_flow_add (vnm, &flow, &t->flow_index);
+ }
+ return vnet_flow_enable (vnm, t->flow_index, hw_if_index);
+ }
+ /* flow index is removed when the tunnel is deleted */
+ return vnet_flow_disable (vnm, t->flow_index, hw_if_index);
+}
+
+u32
+vnet_vxlan_get_tunnel_index (u32 sw_if_index)
+{
+ vxlan_main_t *vxm = &vxlan_main;
+
+ if (sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index))
+ return ~0;
+ return vxm->tunnel_index_by_sw_if_index[sw_if_index];
+}
+
+static clib_error_t *
+vxlan_offload_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 rx_sw_if_index = ~0;
+ u32 hw_if_index = ~0;
+ int is_add = 1;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "hw %U", unformat_vnet_hw_interface, vnm,
+ &hw_if_index))
+ continue;
+ if (unformat (line_input, "rx %U", unformat_vnet_sw_interface, vnm,
+ &rx_sw_if_index))
+ continue;
+ if (unformat (line_input, "del"))
+ {
+ is_add = 0;
+ continue;
+ }
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+
+ if (rx_sw_if_index == ~0)
+ return clib_error_return (0, "missing rx interface");
+ if (hw_if_index == ~0)
+ return clib_error_return (0, "missing hw interface");
+
+ u32 t_index = vnet_vxlan_get_tunnel_index (rx_sw_if_index);;
+ if (t_index == ~0)
+ return clib_error_return (0, "%U is not a vxlan tunnel",
+ format_vnet_sw_if_index_name, vnm,
+ rx_sw_if_index);
+
+ vxlan_main_t *vxm = &vxlan_main;
+ vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index);
+
+ if (!ip46_address_is_ip4 (&t->dst))
+ return clib_error_return (0, "currently only IPV4 tunnels are supported");
+
+ vnet_hw_interface_t *hw_if = vnet_get_hw_interface (vnm, hw_if_index);
+ ip4_main_t *im = &ip4_main;
+ u32 rx_fib_index =
+ vec_elt (im->fib_index_by_sw_if_index, hw_if->sw_if_index);
+
+ if (t->encap_fib_index != rx_fib_index)
+ return clib_error_return (0, "interface/tunnel fib mismatch");
+
+ if (vnet_vxlan_add_del_rx_flow (hw_if_index, t_index, is_add))
+ return clib_error_return (0, "error %s flow",
+ is_add ? "enabling" : "disabling");
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (vxlan_offload_command, static) = {
+ .path = "set flow-offload vxlan",
+ .short_help =
+ "set flow-offload vxlan hw <interface-name> rx <tunnel-name> [del]",
+ .function = vxlan_offload_command_fn,
+};
+
+#define VXLAN_HASH_NUM_BUCKETS (2 * 1024)
+#define VXLAN_HASH_MEMORY_SIZE (1 << 20)
+
+clib_error_t *
+vxlan_init (vlib_main_t * vm)
+{
+ vxlan_main_t *vxm = &vxlan_main;
+
+ vxm->vnet_main = vnet_get_main ();
+ vxm->vlib_main = vm;
+
+ vnet_flow_get_range (vxm->vnet_main, "vxlan", 1024 * 1024,
+ &vxm->flow_id_start);
+
+ vxm->bm_ip4_bypass_enabled_by_sw_if = 0;
+ vxm->bm_ip6_bypass_enabled_by_sw_if = 0;
+
+ /* initialize the ip6 hash */
+ clib_bihash_init_16_8 (&vxm->vxlan4_tunnel_by_key, "vxlan4",
+ VXLAN_HASH_NUM_BUCKETS, VXLAN_HASH_MEMORY_SIZE);
+ clib_bihash_init_24_8 (&vxm->vxlan6_tunnel_by_key, "vxlan6",
+ VXLAN_HASH_NUM_BUCKETS, VXLAN_HASH_MEMORY_SIZE);
+ vxm->vtep_table = vtep_table_create ();
+ vxm->mcast_shared = hash_create_mem (0,
+ sizeof (ip46_address_t),
+ sizeof (mcast_shared_t));
+
+ fib_node_register_type (FIB_NODE_TYPE_VXLAN_TUNNEL, &vxlan_vft);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vxlan_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/vxlan/vxlan.h b/src/plugins/vxlan/vxlan.h
new file mode 100644
index 00000000000..ccddedeb279
--- /dev/null
+++ b/src/plugins/vxlan/vxlan.h
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_vxlan_h
+#define included_vnet_vxlan_h
+
+#include <vppinfra/error.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/bihash_16_8.h>
+#include <vppinfra/bihash_24_8.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/vtep.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/l2/l2_output.h>
+#include <vnet/l2/l2_bd.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vxlan/vxlan_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/adj/adj_types.h>
+
+typedef CLIB_PACKED (struct {
+ ip4_header_t ip4; /* 20 bytes */
+ udp_header_t udp; /* 8 bytes */
+ vxlan_header_t vxlan; /* 8 bytes */
+}) ip4_vxlan_header_t;
+
+typedef CLIB_PACKED (struct {
+ ip6_header_t ip6; /* 40 bytes */
+ udp_header_t udp; /* 8 bytes */
+ vxlan_header_t vxlan; /* 8 bytes */
+}) ip6_vxlan_header_t;
+
+/*
+* Key fields: remote ip, vni on incoming VXLAN packet
+* all fields in NET byte order
+*/
+typedef clib_bihash_kv_16_8_t vxlan4_tunnel_key_t;
+
+/*
+* Key fields: remote ip, vni and fib index on incoming VXLAN packet
+* ip, vni fields in NET byte order
+* fib index field in host byte order
+*/
+typedef clib_bihash_kv_24_8_t vxlan6_tunnel_key_t;
+
+typedef union
+{
+ struct
+ {
+ u32 sw_if_index; /* unicast - input interface / mcast - stats interface */
+ union
+ {
+ struct /* unicast action */
+ {
+ u16 next_index;
+ u8 error;
+ };
+ ip4_address_t local_ip; /* used as dst ip for mcast pkts to assign them to unicast tunnel */
+ };
+ };
+ u64 as_u64;
+} vxlan_decap_info_t;
+
+typedef struct
+{
+ /* Required for pool_get_aligned */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+
+ /* FIB DPO for IP forwarding of VXLAN encap packet */
+ dpo_id_t next_dpo;
+
+ /* vxlan VNI in HOST byte order */
+ u32 vni;
+
+ /* tunnel src and dst addresses */
+ ip46_address_t src;
+ ip46_address_t dst;
+
+ /* udp-ports */
+ u16 src_port;
+ u16 dst_port;
+
+ /* mcast packet output intfc index (used only if dst is mcast) */
+ u32 mcast_sw_if_index;
+
+ /* decap next index */
+ u16 decap_next_index;
+
+ /* The FIB index for src/dst addresses */
+ u32 encap_fib_index;
+
+ /* vnet intfc index */
+ u32 sw_if_index;
+ u32 hw_if_index;
+
+ /**
+ * Linkage into the FIB object graph
+ */
+ fib_node_t node;
+
+ /*
+ * The FIB entry for (depending on VXLAN tunnel is unicast or mcast)
+ * sending unicast VXLAN encap packets or receiving mcast VXLAN packets
+ */
+ fib_node_index_t fib_entry_index;
+ adj_index_t mcast_adj_index;
+
+ /**
+ * The tunnel is a child of the FIB entry for its destination. This is
+ * so it receives updates when the forwarding information for that entry
+ * changes.
+ * The tunnels sibling index on the FIB entry's dependency list.
+ */
+ u32 sibling_index;
+
+ u32 flow_index; /* infra flow index */
+ u32 dev_instance; /* Real device instance in tunnel vector */
+ u32 user_instance; /* Instance name being shown to user */
+
+ VNET_DECLARE_REWRITE;
+} vxlan_tunnel_t;
+
+#define foreach_vxlan_input_next \
+_(DROP, "error-drop") \
+_(L2_INPUT, "l2-input")
+
+typedef enum
+{
+#define _(s,n) VXLAN_INPUT_NEXT_##s,
+ foreach_vxlan_input_next
+#undef _
+ VXLAN_INPUT_N_NEXT,
+} vxlan_input_next_t;
+
+typedef enum
+{
+#define vxlan_error(n,s) VXLAN_ERROR_##n,
+#include <vxlan/vxlan_error.def>
+#undef vxlan_error
+ VXLAN_N_ERROR,
+} vxlan_input_error_t;
+
+typedef struct
+{
+ /* vector of encap tunnel instances */
+ vxlan_tunnel_t *tunnels;
+
+ /* lookup tunnel by key */
+ clib_bihash_16_8_t
+ vxlan4_tunnel_by_key; /* keyed on ipv4.dst + src_port + fib + vni */
+ clib_bihash_24_8_t
+ vxlan6_tunnel_by_key; /* keyed on ipv6.dst + src_port + fib + vni */
+
+ /* local VTEP IPs ref count used by vxlan-bypass node to check if
+ received VXLAN packet DIP matches any local VTEP address */
+ vtep_table_t vtep_table;
+
+ /* mcast shared info */
+ uword *mcast_shared; /* keyed on mcast ip46 addr */
+
+ /* Mapping from sw_if_index to tunnel index */
+ u32 *tunnel_index_by_sw_if_index;
+
+ /* graph node state */
+ uword *bm_ip4_bypass_enabled_by_sw_if;
+ uword *bm_ip6_bypass_enabled_by_sw_if;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+
+ /* Record used instances */
+ uword *instance_used;
+ u32 flow_id_start;
+
+ /* cache for last 8 vxlan tunnel */
+ vtep4_cache_t vtep4_u512;
+
+} vxlan_main_t;
+
+extern vxlan_main_t vxlan_main;
+
+extern vlib_node_registration_t vxlan4_input_node;
+extern vlib_node_registration_t vxlan6_input_node;
+extern vlib_node_registration_t vxlan4_encap_node;
+extern vlib_node_registration_t vxlan6_encap_node;
+extern vlib_node_registration_t vxlan4_flow_input_node;
+
+u8 *format_vxlan_encap_trace (u8 * s, va_list * args);
+
+typedef struct
+{
+ u8 is_add;
+
+ /* we normally use is_ip4, but since this adds to the
+ * structure, this seems less of a breaking change */
+ u8 is_ip6;
+ u8 is_l3;
+ u32 instance;
+ ip46_address_t src, dst;
+ u32 mcast_sw_if_index;
+ u32 encap_fib_index;
+ u32 decap_next_index;
+ u32 vni;
+ u16 src_port;
+ u16 dst_port;
+} vnet_vxlan_add_del_tunnel_args_t;
+
+int vnet_vxlan_add_del_tunnel
+ (vnet_vxlan_add_del_tunnel_args_t * a, u32 * sw_if_indexp);
+
+void vnet_int_vxlan_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable);
+
+int vnet_vxlan_add_del_rx_flow (u32 hw_if_index, u32 t_imdex, int is_add);
+
+u32 vnet_vxlan_get_tunnel_index (u32 sw_if_index);
+#endif /* included_vnet_vxlan_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/vxlan/vxlan_api.c b/src/plugins/vxlan/vxlan_api.c
new file mode 100644
index 00000000000..8fd0928cc63
--- /dev/null
+++ b/src/plugins/vxlan/vxlan_api.c
@@ -0,0 +1,376 @@
+/*
+ *------------------------------------------------------------------
+ * vxlan_api.c - vxlan api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/feature/feature.h>
+#include <vxlan/vxlan.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/ip/ip_types_api.h>
+#include <vnet/udp/udp_local.h>
+#include <vnet/format_fns.h>
+#include <vxlan/vxlan.api_enum.h>
+#include <vxlan/vxlan.api_types.h>
+
+static u16 msg_id_base;
+
+#define REPLY_MSG_ID_BASE msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static void
+vl_api_vxlan_offload_rx_t_handler (vl_api_vxlan_offload_rx_t * mp)
+{
+ vl_api_vxlan_offload_rx_reply_t *rmp;
+ int rv = 0;
+ u32 hw_if_index = ntohl (mp->hw_if_index);
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ if (!vnet_hw_interface_is_valid (vnet_get_main (), hw_if_index))
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto err;
+ }
+ VALIDATE_SW_IF_INDEX (mp);
+
+ u32 t_index = vnet_vxlan_get_tunnel_index (sw_if_index);
+ if (t_index == ~0)
+ {
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX_2;
+ goto err;
+ }
+
+ vxlan_main_t *vxm = &vxlan_main;
+ vxlan_tunnel_t *t = pool_elt_at_index (vxm->tunnels, t_index);
+ if (!ip46_address_is_ip4 (&t->dst))
+ {
+ rv = VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
+ goto err;
+ }
+
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hw_if = vnet_get_hw_interface (vnm, hw_if_index);
+ ip4_main_t *im = &ip4_main;
+ u32 rx_fib_index =
+ vec_elt (im->fib_index_by_sw_if_index, hw_if->sw_if_index);
+
+ if (t->encap_fib_index != rx_fib_index)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_FIB;
+ goto err;
+ }
+
+ if (vnet_vxlan_add_del_rx_flow (hw_if_index, t_index, mp->enable))
+ {
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ goto err;
+ }
+ BAD_SW_IF_INDEX_LABEL;
+err:
+
+ REPLY_MACRO (VL_API_VXLAN_OFFLOAD_RX_REPLY);
+}
+
+static void
+ vl_api_sw_interface_set_vxlan_bypass_t_handler
+ (vl_api_sw_interface_set_vxlan_bypass_t * mp)
+{
+ vl_api_sw_interface_set_vxlan_bypass_reply_t *rmp;
+ int rv = 0;
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ vnet_int_vxlan_bypass_mode (sw_if_index, mp->is_ipv6, mp->enable);
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_VXLAN_BYPASS_REPLY);
+}
+
+static int
+vxlan_add_del_tunnel_clean_input (vnet_vxlan_add_del_tunnel_args_t *a,
+ u32 encap_vrf_id)
+{
+ a->is_ip6 = !ip46_address_is_ip4 (&a->src);
+
+ a->encap_fib_index = fib_table_find (fib_ip_proto (a->is_ip6), encap_vrf_id);
+ if (a->encap_fib_index == ~0)
+ {
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ }
+
+ if (ip46_address_is_ip4 (&a->src) != ip46_address_is_ip4 (&a->dst))
+ {
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ /* Check src & dst are different */
+ if (ip46_address_cmp (&a->dst, &a->src) == 0)
+ {
+ return VNET_API_ERROR_SAME_SRC_DST;
+ }
+ if (ip46_address_is_multicast (&a->dst) &&
+ !vnet_sw_if_index_is_api_valid (a->mcast_sw_if_index))
+ {
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+ return 0;
+}
+
+static void
+vl_api_vxlan_add_del_tunnel_t_handler (vl_api_vxlan_add_del_tunnel_t *mp)
+{
+ vl_api_vxlan_add_del_tunnel_reply_t *rmp;
+ u32 sw_if_index = ~0;
+ int rv = 0;
+
+ vnet_vxlan_add_del_tunnel_args_t a = {
+ .is_add = mp->is_add,
+ .instance = ntohl (mp->instance),
+ .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index),
+ .decap_next_index = ntohl (mp->decap_next_index),
+ .vni = ntohl (mp->vni),
+ };
+ ip_address_decode (&mp->src_address, &a.src);
+ ip_address_decode (&mp->dst_address, &a.dst);
+
+ rv = vxlan_add_del_tunnel_clean_input (&a, ntohl (mp->encap_vrf_id));
+ if (rv)
+ goto out;
+ a.dst_port = a.is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan,
+ a.src_port = a.is_ip6 ? UDP_DST_PORT_vxlan6 : UDP_DST_PORT_vxlan,
+ rv = vnet_vxlan_add_del_tunnel (&a, &sw_if_index);
+
+out:
+ REPLY_MACRO2(VL_API_VXLAN_ADD_DEL_TUNNEL_REPLY,
+ ({
+ rmp->sw_if_index = ntohl (sw_if_index);
+ }));
+}
+
+static void
+vl_api_vxlan_add_del_tunnel_v2_t_handler (vl_api_vxlan_add_del_tunnel_v2_t *mp)
+{
+ vl_api_vxlan_add_del_tunnel_v2_reply_t *rmp;
+ u32 sw_if_index = ~0;
+ int rv = 0;
+
+ vnet_vxlan_add_del_tunnel_args_t a = {
+ .is_add = mp->is_add,
+ .instance = ntohl (mp->instance),
+ .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index),
+ .decap_next_index = ntohl (mp->decap_next_index),
+ .vni = ntohl (mp->vni),
+ .dst_port = ntohs (mp->dst_port),
+ .src_port = ntohs (mp->src_port),
+ };
+
+ ip_address_decode (&mp->src_address, &a.src);
+ ip_address_decode (&mp->dst_address, &a.dst);
+
+ rv = vxlan_add_del_tunnel_clean_input (&a, ntohl (mp->encap_vrf_id));
+ if (rv)
+ goto out;
+ rv = vnet_vxlan_add_del_tunnel (&a, &sw_if_index);
+out:
+ REPLY_MACRO2 (VL_API_VXLAN_ADD_DEL_TUNNEL_V2_REPLY,
+ ({ rmp->sw_if_index = ntohl (sw_if_index); }));
+}
+
+static void
+vl_api_vxlan_add_del_tunnel_v3_t_handler (vl_api_vxlan_add_del_tunnel_v3_t *mp)
+{
+ vl_api_vxlan_add_del_tunnel_v3_reply_t *rmp;
+ u32 sw_if_index = ~0;
+ int rv = 0;
+
+ vnet_vxlan_add_del_tunnel_args_t a = {
+ .is_add = mp->is_add,
+ .instance = ntohl (mp->instance),
+ .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index),
+ .decap_next_index = ntohl (mp->decap_next_index),
+ .vni = ntohl (mp->vni),
+ .dst_port = ntohs (mp->dst_port),
+ .src_port = ntohs (mp->src_port),
+ .is_l3 = mp->is_l3,
+ };
+
+ ip_address_decode (&mp->src_address, &a.src);
+ ip_address_decode (&mp->dst_address, &a.dst);
+
+ rv = vxlan_add_del_tunnel_clean_input (&a, ntohl (mp->encap_vrf_id));
+ if (rv)
+ goto out;
+ rv = vnet_vxlan_add_del_tunnel (&a, &sw_if_index);
+out:
+ REPLY_MACRO2 (VL_API_VXLAN_ADD_DEL_TUNNEL_V3_REPLY,
+ ({ rmp->sw_if_index = ntohl (sw_if_index); }));
+}
+
+static void send_vxlan_tunnel_details
+ (vxlan_tunnel_t * t, vl_api_registration_t * reg, u32 context)
+{
+ vl_api_vxlan_tunnel_details_t *rmp;
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_VXLAN_TUNNEL_DETAILS);
+
+ ip_address_encode (&t->src, IP46_TYPE_ANY, &rmp->src_address);
+ ip_address_encode (&t->dst, IP46_TYPE_ANY, &rmp->dst_address);
+
+ if (ip46_address_is_ip4 (&t->dst))
+ rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id);
+ else
+ rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id);
+
+ rmp->instance = htonl (t->user_instance);
+ rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index);
+ rmp->vni = htonl (t->vni);
+ rmp->decap_next_index = htonl (t->decap_next_index);
+ rmp->sw_if_index = htonl (t->sw_if_index);
+ rmp->context = context;
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void vl_api_vxlan_tunnel_dump_t_handler
+ (vl_api_vxlan_tunnel_dump_t * mp)
+{
+ vl_api_registration_t *reg;
+ vxlan_main_t *vxm = &vxlan_main;
+ vxlan_tunnel_t *t;
+ u32 sw_if_index;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (~0 == sw_if_index)
+ {
+ pool_foreach (t, vxm->tunnels)
+ send_vxlan_tunnel_details(t, reg, mp->context);
+ }
+ else
+ {
+ if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) ||
+ (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index]))
+ {
+ return;
+ }
+ t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]];
+ send_vxlan_tunnel_details (t, reg, mp->context);
+ }
+}
+
+static void
+send_vxlan_tunnel_v2_details (vxlan_tunnel_t *t, vl_api_registration_t *reg,
+ u32 context)
+{
+ vl_api_vxlan_tunnel_v2_details_t *rmp;
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_VXLAN_TUNNEL_V2_DETAILS);
+
+ ip_address_encode (&t->src, IP46_TYPE_ANY, &rmp->src_address);
+ ip_address_encode (&t->dst, IP46_TYPE_ANY, &rmp->dst_address);
+ rmp->src_port = htons (t->src_port);
+ rmp->dst_port = htons (t->dst_port);
+
+ if (ip46_address_is_ip4 (&t->dst))
+ rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id);
+ else
+ rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id);
+
+ rmp->instance = htonl (t->user_instance);
+ rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index);
+ rmp->vni = htonl (t->vni);
+ rmp->decap_next_index = htonl (t->decap_next_index);
+ rmp->sw_if_index = htonl (t->sw_if_index);
+ rmp->context = context;
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_vxlan_tunnel_v2_dump_t_handler (vl_api_vxlan_tunnel_v2_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ vxlan_main_t *vxm = &vxlan_main;
+ vxlan_tunnel_t *t;
+ u32 sw_if_index;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (~0 == sw_if_index)
+ {
+ pool_foreach (t, vxm->tunnels)
+ send_vxlan_tunnel_v2_details (t, reg, mp->context);
+ }
+ else
+ {
+ if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) ||
+ (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index]))
+ {
+ return;
+ }
+ t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]];
+ send_vxlan_tunnel_v2_details (t, reg, mp->context);
+ }
+}
+
+#include <vxlan/vxlan.api.c>
+static clib_error_t *
+vxlan_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = vlibapi_get_main ();
+
+ vl_api_increase_msg_trace_size (am, VL_API_VXLAN_ADD_DEL_TUNNEL,
+ 16 * sizeof (u32));
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ msg_id_base = setup_message_id_table ();
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (vxlan_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/vxlan/vxlan_error.def b/src/plugins/vxlan/vxlan_error.def
new file mode 100644
index 00000000000..17f905950f5
--- /dev/null
+++ b/src/plugins/vxlan/vxlan_error.def
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+vxlan_error (DECAPSULATED, "good packets decapsulated")
+vxlan_error (NO_SUCH_TUNNEL, "no such tunnel packets")
+vxlan_error (BAD_FLAGS, "packets with bad flags field in vxlan header")
diff --git a/src/plugins/vxlan/vxlan_packet.h b/src/plugins/vxlan/vxlan_packet.h
new file mode 100644
index 00000000000..d1d1ed813e5
--- /dev/null
+++ b/src/plugins/vxlan/vxlan_packet.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_vxlan_packet_h__
+#define __included_vxlan_packet_h__ 1
+
+/*
+ * From RFC-7348
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|R|R|I|R|R|R| Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | VXLAN Network Identifier (VNI) | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * VXLAN Header: This is an 8-byte field that has:
+ *
+ * - Flags (8 bits): where the I flag MUST be set to 1 for a valid
+ * VXLAN Network ID (VNI). The other 7 bits (designated "R") are
+ * reserved fields and MUST be set to zero on transmission and
+ * ignored on receipt.
+ *
+ * - VXLAN Segment ID/VXLAN Network Identifier (VNI): this is a
+ * 24-bit value used to designate the individual VXLAN overlay
+ * network on which the communicating VMs are situated. VMs in
+ * different VXLAN overlay networks cannot communicate with each
+ * other.
+ *
+ * - Reserved fields (24 bits and 8 bits): MUST be set to zero on
+ * transmission and ignored on receipt.
+ *
+ */
+
+typedef struct
+{
+ u8 flags;
+ u8 res1;
+ u8 res2;
+ u8 res3;
+ u32 vni_reserved;
+} vxlan_header_t;
+
+#define VXLAN_FLAGS_I 0x08
+
+static inline u32
+vnet_get_vni (vxlan_header_t * h)
+{
+ u32 vni_reserved_host_byte_order;
+
+ vni_reserved_host_byte_order = clib_net_to_host_u32 (h->vni_reserved);
+ return vni_reserved_host_byte_order >> 8;
+}
+
+static inline void
+vnet_set_vni_and_flags (vxlan_header_t * h, u32 vni)
+{
+ h->vni_reserved = clib_host_to_net_u32 (vni << 8);
+ *(u32 *) h = 0;
+ h->flags = VXLAN_FLAGS_I;
+}
+
+#endif /* __included_vxlan_packet_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/wireguard/CMakeLists.txt b/src/plugins/wireguard/CMakeLists.txt
index 6dddc67298d..710b6a3b04a 100755..100644
--- a/src/plugins/wireguard/CMakeLists.txt
+++ b/src/plugins/wireguard/CMakeLists.txt
@@ -12,7 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+if(NOT OPENSSL_FOUND)
+ message(WARNING "OpenSSL not found - wireguard plugin disabled")
+ return()
+endif()
+
if (OPENSSL_VERSION VERSION_LESS 1.1.0)
+ message(WARNING "OpenSSL too old - wireguard plugin disabled")
return()
endif()
@@ -33,8 +39,11 @@ add_vpp_plugin(wireguard
wireguard_input.c
wireguard_output_tun.c
wireguard_handoff.c
+ wireguard_hchacha20.h
wireguard_key.c
wireguard_key.h
+ wireguard_chachapoly.c
+ wireguard_chachapoly.h
wireguard_cli.c
wireguard_messages.h
wireguard_noise.c
@@ -51,7 +60,7 @@ add_vpp_plugin(wireguard
wireguard_index_table.h
wireguard_api.c
- LINK_LIBRARIES ${OPENSSL_LIBRARIES}
+ LINK_LIBRARIES ${OPENSSL_CRYPTO_LIBRARIES}
API_FILES
wireguard.api
diff --git a/src/plugins/wireguard/FEATURE.yaml b/src/plugins/wireguard/FEATURE.yaml
index cf8b6d7f3c4..5c0a588a484 100644
--- a/src/plugins/wireguard/FEATURE.yaml
+++ b/src/plugins/wireguard/FEATURE.yaml
@@ -7,6 +7,3 @@ features:
description: "Wireguard protocol implementation"
state: development
properties: [API, CLI]
-missing:
- - IPv6 support
- - DoS protection as in the original protocol
diff --git a/src/plugins/wireguard/README.md b/src/plugins/wireguard/README.md
deleted file mode 100755
index df69d93789f..00000000000
--- a/src/plugins/wireguard/README.md
+++ /dev/null
@@ -1,55 +0,0 @@
-# Wireguard vpp-plugin {#wireguard_plugin_doc}
-
-## Overview
-This plugin is an implementation of [wireguard protocol](https://www.wireguard.com/) for VPP. It allows one to create secure VPN tunnels.
-This implementation is based on [wireguard-openbsd](https://git.zx2c4.com/wireguard-openbsd/).
-
-## Crypto
-
-The crypto protocols:
-
-- blake2s [[Source]](https://github.com/BLAKE2/BLAKE2)
-
-OpenSSL:
-
-- curve25519
-- chachapoly1305
-
-## Plugin usage example
-
-### Create wireguard interface
-
-```
-> vpp# wireguard create listen-port <port> private-key <priv_key> src <src_ip4> [generate-key]
-> *wg_interface*
-> vpp# set int state <wg_interface> up
-> vpp# set int ip address <wg_interface> <wg_ip4>
-```
-
-### Add a peer configuration:
-```
-> vpp# wireguard peer add <wg_interface> public-key <pub_key_other> endpoint <ip4_dst> allowed-ip <prefix> dst-port <port_dst> persistent-keepalive [keepalive_interval]
-> vpp# *peer_idx*
-```
-
-### Show config
-```
-> vpp# show wireguard interface
-> vpp# show wireguard peer
-```
-
-### Remove peer
-```
-> vpp# wireguard peer remove <peer_idx>
-```
-
-
-### Delete interface
-```
-> vpp# wireguard delete <wg_interface>
-```
-
-## Main next steps for improving this implementation
-1. Use all benefits of VPP-engine.
-2. Add IPv6 support (currently only supports IPv4)
-3. Add DoS protection as in original protocol (using cookie)
diff --git a/src/plugins/wireguard/README.rst b/src/plugins/wireguard/README.rst
new file mode 100644
index 00000000000..35dd2c41382
--- /dev/null
+++ b/src/plugins/wireguard/README.rst
@@ -0,0 +1,79 @@
+.. _wireguard_plugin_doc:
+
+Wireguard vpp-plugin
+====================
+
+Overview
+--------
+
+This plugin is an implementation of `wireguard
+protocol <https://www.wireguard.com/>`__ for VPP. It allows one to
+create secure VPN tunnels. This implementation is based on
+`wireguard-openbsd <https://git.zx2c4.com/wireguard-openbsd/>`__.
+
+Crypto
+------
+
+The crypto protocols:
+
+- blake2s `[Source] <https://github.com/BLAKE2/BLAKE2>`__
+
+OpenSSL:
+
+- curve25519
+- chachapoly1305
+
+Plugin usage example
+--------------------
+
+Create wireguard interface
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ > vpp# wireguard create listen-port <port> private-key <priv_key> src <src_ip4> [generate-key]
+ > *wg_interface*
+ > vpp# set int state <wg_interface> up
+ > vpp# set int ip address <wg_interface> <wg_ip4>
+
+Add a peer configuration:
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ > vpp# wireguard peer add <wg_interface> public-key <pub_key_other> endpoint <ip4_dst> allowed-ip <prefix> port <port_dst> persistent-keepalive [keepalive_interval]
+ > vpp# *peer_idx*
+
+Add routes for allowed-ip:
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ > ip route add <prefix> via <wg_ip4> <wg_interface>
+
+Show config
+~~~~~~~~~~~
+
+::
+
+ > vpp# show wireguard interface
+ > vpp# show wireguard peer
+
+Remove peer
+~~~~~~~~~~~
+
+::
+
+ > vpp# wireguard peer remove <peer_idx>
+
+Delete interface
+~~~~~~~~~~~~~~~~
+
+::
+
+ > vpp# wireguard delete <wg_interface>
+
+Main next steps for improving this implementation
+-------------------------------------------------
+
+1. Use all benefits of VPP-engine.
diff --git a/src/plugins/wireguard/blake/blake2-impl.h b/src/plugins/wireguard/blake/blake2-impl.h
index ad60b4a5775..ad60b4a5775 100755..100644
--- a/src/plugins/wireguard/blake/blake2-impl.h
+++ b/src/plugins/wireguard/blake/blake2-impl.h
diff --git a/src/plugins/wireguard/blake/blake2s.c b/src/plugins/wireguard/blake/blake2s.c
index 3ff312a1322..3ff312a1322 100755..100644
--- a/src/plugins/wireguard/blake/blake2s.c
+++ b/src/plugins/wireguard/blake/blake2s.c
diff --git a/src/plugins/wireguard/blake/blake2s.h b/src/plugins/wireguard/blake/blake2s.h
index 37da0acf28a..37da0acf28a 100755..100644
--- a/src/plugins/wireguard/blake/blake2s.h
+++ b/src/plugins/wireguard/blake/blake2s.h
diff --git a/src/plugins/wireguard/wireguard.api b/src/plugins/wireguard/wireguard.api
index e290fc41ffc..55a36c6f6e5 100755..100644
--- a/src/plugins/wireguard/wireguard.api
+++ b/src/plugins/wireguard/wireguard.api
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-option version = "0.1.0";
+option version = "1.3.0";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
@@ -83,19 +83,24 @@ define wireguard_interface_details
enum wireguard_peer_flags : u8
{
WIREGUARD_PEER_STATUS_DEAD = 0x1,
+ WIREGUARD_PEER_ESTABLISHED = 0x2,
};
-/** \brief Create new peer
+/** \brief Peer structure
+ @param peer_index - peer pool index
@param public_key - public key (in binary format) of destination peer
@param port - destination port
+ @param persistent_keepalive - keepalive packet timeout
@param table_id - The IP table in which 'endpoint' is reachable
@param endpoint - destination ip
- @param allowed_ip - allowed incoming ip tunnel
- @param tun_sw_if_index - tunnel interface
- @param persistent_keepalive - keepalive packet timeout
+ @param sw_if_index - tunnel SW interface
+ @param flags - peer status flags
+ @param n_allowed_ips - number of prefixes in allowed_ips
+ @param allowed_ips - allowed incoming tunnel prefixes
*/
typedef wireguard_peer
{
+ u32 peer_index;
u8 public_key[32];
u16 port;
u16 persistent_keepalive;
@@ -107,6 +112,41 @@ typedef wireguard_peer
vl_api_prefix_t allowed_ips[n_allowed_ips];
};
+service {
+ rpc want_wireguard_peer_events returns want_wireguard_peer_events_reply
+ events wireguard_peer_event;
+};
+/** \brief Register for wireguard peer events
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface to dump peer info on, ~0 if on all
+ @param peer_index - index of the peer to dump info on, ~0 if on all
+ @param enable_disable - 1 => register for events, 0 => cancel registration
+ @param pid - sender's pid
+*/
+autoreply define want_wireguard_peer_events
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index [default=0xFFFFFFFF];
+ u32 peer_index [default=0xFFFFFFFF];
+ u32 enable_disable;
+ u32 pid;
+};
+/** \brief Interface Event generated by want_wireguard_peer_events
+ @param client_index - opaque cookie to identify the sender
+ @param pid - client pid registered to receive notification
+ @param peer_index - index of the peer for this event
+ @param deleted - interface was deleted
+*/
+define wireguard_peer_event
+{
+ u32 client_index;
+ u32 pid;
+ u32 peer_index;
+ vl_api_wireguard_peer_flags_t flags;
+};
+
/** \brief Create new peer
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -118,6 +158,12 @@ define wireguard_peer_add
u32 context;
vl_api_wireguard_peer_t peer;
};
+
+/** \brief Create new peer
+ @param context - sender context, to match reply w/ request
+ @param retval - return status
+ @param peer_index - Created or existing peer pool index
+*/
define wireguard_peer_add_reply
{
u32 context;
@@ -125,10 +171,10 @@ define wireguard_peer_add_reply
u32 peer_index;
};
-/** \brief Remove peer by public_key
+/** \brief Remove peer
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param public_key
+ @param peer_index - peer to be removed
*/
autoreply define wireguard_peer_remove
{
@@ -140,23 +186,34 @@ autoreply define wireguard_peer_remove
/** \brief Dump all peers
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
+ @param peer_index - peer index to be dumped. If 0xFFFFFFFF dumps all peers
*/
define wireguard_peers_dump {
u32 client_index;
u32 context;
+ u32 peer_index [default=0xFFFFFFFF];
};
-/** \brief Dump peers response
+/** \brief Dump peer details
@param context - sender context, to match reply w/ request
- @param is_dead - is peer valid yet
- @param public_key - peer public_key
- @param ip4_address - ip4 endpoint address
+ @param peer - peer details
*/
define wireguard_peers_details {
u32 context;
vl_api_wireguard_peer_t peer;
};
+/** \brief Wireguard Set Async mode
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param async_enable - wireguard async mode on or off, default off
+*/
+autoreply define wg_set_async_mode {
+ u32 client_index;
+ u32 context;
+ bool async_enable [default=false];
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/plugins/wireguard/wireguard.c b/src/plugins/wireguard/wireguard.c
index 58422299069..b1c8bc79870 100755..100644
--- a/src/plugins/wireguard/wireguard.c
+++ b/src/plugins/wireguard/wireguard.c
@@ -15,8 +15,8 @@
#include <vnet/vnet.h>
#include <vnet/plugin/plugin.h>
-#include <vnet/ipip/ipip.h>
#include <vpp/app/version.h>
+#include <vnet/crypto/crypto.h>
#include <wireguard/wireguard_send.h>
#include <wireguard/wireguard_key.h>
@@ -24,6 +24,45 @@
#include <wireguard/wireguard.h>
wg_main_t wg_main;
+wg_async_post_next_t wg_encrypt_async_next;
+wg_async_post_next_t wg_decrypt_async_next;
+
+void
+wg_set_async_mode (u32 is_enabled)
+{
+ if (is_enabled)
+ wg_op_mode_set_ASYNC ();
+ else
+ wg_op_mode_unset_ASYNC ();
+}
+
+static void
+wireguard_register_post_node (vlib_main_t *vm)
+
+{
+ wg_async_post_next_t *eit;
+ wg_async_post_next_t *dit;
+
+ eit = &wg_encrypt_async_next;
+ dit = &wg_decrypt_async_next;
+
+ eit->wg4_post_next =
+ vnet_crypto_register_post_node (vm, "wg4-output-tun-post-node");
+ eit->wg6_post_next =
+ vnet_crypto_register_post_node (vm, "wg6-output-tun-post-node");
+
+ dit->wg4_post_next =
+ vnet_crypto_register_post_node (vm, "wg4-input-post-node");
+ dit->wg6_post_next =
+ vnet_crypto_register_post_node (vm, "wg6-input-post-node");
+}
+
+void
+wg_secure_zero_memory (void *v, size_t n)
+{
+ static void *(*const volatile memset_v) (void *, int, size_t) = &memset;
+ memset_v (v, 0, n);
+}
static clib_error_t *
wg_init (vlib_main_t * vm)
@@ -32,9 +71,12 @@ wg_init (vlib_main_t * vm)
wmp->vlib_main = vm;
- wmp->in_fq_index = vlib_frame_queue_main_init (wg_input_node.index, 0);
- wmp->out_fq_index =
- vlib_frame_queue_main_init (wg_output_tun_node.index, 0);
+ wmp->in4_fq_index = vlib_frame_queue_main_init (wg4_input_node.index, 0);
+ wmp->in6_fq_index = vlib_frame_queue_main_init (wg6_input_node.index, 0);
+ wmp->out4_fq_index =
+ vlib_frame_queue_main_init (wg4_output_tun_node.index, 0);
+ wmp->out6_fq_index =
+ vlib_frame_queue_main_init (wg6_output_tun_node.index, 0);
vlib_thread_main_t *tm = vlib_get_thread_main ();
@@ -42,27 +84,32 @@ wg_init (vlib_main_t * vm)
CLIB_CACHE_LINE_BYTES);
wg_timer_wheel_init ();
+ wireguard_register_post_node (vm);
+ wmp->op_mode_flags = 0;
return (NULL);
}
VLIB_INIT_FUNCTION (wg_init);
-/* *INDENT-OFF* */
-VNET_FEATURE_INIT (wg_output_tun, static) =
-{
+VNET_FEATURE_INIT (wg4_output_tun, static) = {
.arc_name = "ip4-output",
- .node_name = "wg-output-tun",
+ .node_name = "wg4-output-tun",
.runs_after = VNET_FEATURES ("gso-ip4"),
};
+VNET_FEATURE_INIT (wg6_output_tun, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "wg6-output-tun",
+ .runs_after = VNET_FEATURES ("gso-ip6"),
+};
+
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Wireguard Protocol",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/wireguard/wireguard.h b/src/plugins/wireguard/wireguard.h
index ef308c4c397..05cefc4f073 100755..100644
--- a/src/plugins/wireguard/wireguard.h
+++ b/src/plugins/wireguard/wireguard.h
@@ -18,16 +18,25 @@
#include <wireguard/wireguard_index_table.h>
#include <wireguard/wireguard_messages.h>
#include <wireguard/wireguard_timer.h>
+#include <vnet/buffer.h>
#define WG_DEFAULT_DATA_SIZE 2048
-extern vlib_node_registration_t wg_input_node;
-extern vlib_node_registration_t wg_output_tun_node;
+extern vlib_node_registration_t wg4_input_node;
+extern vlib_node_registration_t wg6_input_node;
+extern vlib_node_registration_t wg4_output_tun_node;
+extern vlib_node_registration_t wg6_output_tun_node;
typedef struct wg_per_thread_data_t_
{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_crypto_op_t *crypto_ops;
+ vnet_crypto_op_t *chained_crypto_ops;
+ vnet_crypto_op_chunk_t *chunks;
+ vnet_crypto_async_frame_t **async_frames;
u8 data[WG_DEFAULT_DATA_SIZE];
} wg_per_thread_data_t;
+
typedef struct
{
/* convenience */
@@ -37,19 +46,81 @@ typedef struct
wg_index_table_t index_table;
- u32 in_fq_index;
- u32 out_fq_index;
+ u32 in4_fq_index;
+ u32 in6_fq_index;
+ u32 out4_fq_index;
+ u32 out6_fq_index;
wg_per_thread_data_t *per_thread_data;
u8 feature_init;
tw_timer_wheel_16t_2w_512sl_t timer_wheel;
+
+ /* operation mode flags (e.g. async) */
+ u8 op_mode_flags;
} wg_main_t;
+typedef struct
+{
+ /* wg post node index for async crypto */
+ u32 wg4_post_next;
+ u32 wg6_post_next;
+} wg_async_post_next_t;
+
+extern wg_async_post_next_t wg_encrypt_async_next;
+extern wg_async_post_next_t wg_decrypt_async_next;
extern wg_main_t wg_main;
+/**
+ * Wireguard operation mode
+ **/
+#define foreach_wg_op_mode_flags _ (0, ASYNC, "async")
+
+/**
+ * Helper function to set/unset and check op modes
+ **/
+typedef enum wg_op_mode_flags_t_
+{
+#define _(v, f, s) WG_OP_MODE_FLAG_##f = 1 << v,
+ foreach_wg_op_mode_flags
+#undef _
+} __clib_packed wg_op_mode_flags_t;
+
+#define _(a, v, s) \
+ always_inline int wg_op_mode_set_##v (void) \
+ { \
+ return (wg_main.op_mode_flags |= WG_OP_MODE_FLAG_##v); \
+ } \
+ always_inline int wg_op_mode_unset_##v (void) \
+ { \
+ return (wg_main.op_mode_flags &= ~WG_OP_MODE_FLAG_##v); \
+ } \
+ always_inline int wg_op_mode_is_set_##v (void) \
+ { \
+ return (wg_main.op_mode_flags & WG_OP_MODE_FLAG_##v); \
+ }
+foreach_wg_op_mode_flags
+#undef _
+
+ typedef struct
+{
+ u8 __pad[22];
+ u16 next_index;
+} wg_post_data_t;
+
+STATIC_ASSERT (sizeof (wg_post_data_t) <=
+ STRUCT_SIZE_OF (vnet_buffer_opaque_t, unused),
+ "Custom meta-data too large for vnet_buffer_opaque_t");
+
+#define wg_post_data(b) \
+ ((wg_post_data_t *) ((u8 *) ((b)->opaque) + \
+ STRUCT_OFFSET_OF (vnet_buffer_opaque_t, unused)))
+
#define WG_START_EVENT 1
void wg_feature_init (wg_main_t * wmp);
+void wg_set_async_mode (u32 is_enabled);
+
+void wg_secure_zero_memory (void *v, size_t n);
#endif /* __included_wg_h__ */
diff --git a/src/plugins/wireguard/wireguard_api.c b/src/plugins/wireguard/wireguard_api.c
index 36cc2509463..e736efcd6c0 100755..100644
--- a/src/plugins/wireguard/wireguard_api.c
+++ b/src/plugins/wireguard/wireguard_api.c
@@ -27,9 +27,9 @@
#include <wireguard/wireguard_key.h>
#include <wireguard/wireguard.h>
#include <wireguard/wireguard_if.h>
-#include <wireguard/wireguard_peer.h>
#define REPLY_MSG_ID_BASE wmp->msg_id_base
+#include <wireguard/wireguard_peer.h>
#include <vlibapi/api_helper_macros.h>
static void
@@ -47,26 +47,18 @@ static void
ip_address_decode2 (&mp->interface.src_ip, &src);
- if (AF_IP6 == ip_addr_version (&src))
- rv = VNET_API_ERROR_INVALID_PROTOCOL;
+ if (mp->generate_key)
+ curve25519_gen_secret (private_key);
else
- {
- if (mp->generate_key)
- curve25519_gen_secret (private_key);
- else
- clib_memcpy (private_key, mp->interface.private_key,
- NOISE_PUBLIC_KEY_LEN);
-
- rv = wg_if_create (ntohl (mp->interface.user_instance), private_key,
- ntohs (mp->interface.port), &src, &sw_if_index);
- }
+ clib_memcpy (private_key, mp->interface.private_key, NOISE_PUBLIC_KEY_LEN);
+
+ rv = wg_if_create (ntohl (mp->interface.user_instance), private_key,
+ ntohs (mp->interface.port), &src, &sw_if_index);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_WIREGUARD_INTERFACE_CREATE_REPLY,
{
rmp->sw_if_index = htonl(sw_if_index);
});
- /* *INDENT-ON* */
}
static void
@@ -85,9 +77,7 @@ static void
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO(VL_API_WIREGUARD_INTERFACE_DELETE_REPLY);
- /* *INDENT-ON* */
}
typedef struct wg_deatils_walk_t_
@@ -119,6 +109,7 @@ wireguard_if_send_details (index_t wgii, void *data)
local->l_public, NOISE_PUBLIC_KEY_LEN);
rmp->interface.sw_if_index = htonl (wgi->sw_if_index);
rmp->interface.port = htons (wgi->port);
+ rmp->interface.user_instance = htonl (wgi->user_instance);
ip_address_encode2 (&wgi->src_ip, &rmp->interface.src_ip);
rmp->context = ctx->context;
@@ -147,7 +138,15 @@ vl_api_wireguard_interface_dump_t_handler (vl_api_wireguard_interface_dump_t *
.show_private_key = mp->show_private_key,
};
- wg_if_walk (wireguard_if_send_details, &ctx);
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ if (sw_if_index == ~0)
+ wg_if_walk (wireguard_if_send_details, &ctx);
+ else
+ {
+ index_t wgii = wg_if_find_by_sw_if_index (sw_if_index);
+ if (wgii != INDEX_INVALID)
+ wireguard_if_send_details (wgii, &ctx);
+ }
}
static void
@@ -177,29 +176,19 @@ vl_api_wireguard_peer_add_t_handler (vl_api_wireguard_peer_add_t * mp)
for (ii = 0; ii < mp->peer.n_allowed_ips; ii++)
ip_prefix_decode (&mp->peer.allowed_ips[ii], &allowed_ips[ii]);
- if (AF_IP6 == ip_addr_version (&endpoint) ||
- FIB_PROTOCOL_IP6 == allowed_ips[0].fp_proto)
- /* ip6 currently not supported, but the API needs to support it
- * else we'll need to change it later, and that's a PITA */
- rv = VNET_API_ERROR_INVALID_PROTOCOL;
- else
- rv = wg_peer_add (ntohl (mp->peer.sw_if_index),
- mp->peer.public_key,
- ntohl (mp->peer.table_id),
- &ip_addr_46 (&endpoint),
- allowed_ips,
- ntohs (mp->peer.port),
- ntohs (mp->peer.persistent_keepalive), &peeri);
+ rv = wg_peer_add (ntohl (mp->peer.sw_if_index), mp->peer.public_key,
+ ntohl (mp->peer.table_id), &ip_addr_46 (&endpoint),
+ allowed_ips, ntohs (mp->peer.port),
+ ntohs (mp->peer.persistent_keepalive), &peeri);
vec_free (allowed_ips);
done:
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
+
REPLY_MACRO2(VL_API_WIREGUARD_PEER_ADD_REPLY,
{
rmp->peer_index = ntohl (peeri);
});
- /* *INDENT-ON* */
}
static void
@@ -213,13 +202,11 @@ vl_api_wireguard_peer_remove_t_handler (vl_api_wireguard_peer_remove_t * mp)
rv = wg_peer_remove (ntohl (mp->peer_index));
- /* *INDENT-OFF* */
REPLY_MACRO(VL_API_WIREGUARD_PEER_REMOVE_REPLY);
- /* *INDENT-ON* */
}
static walk_rc_t
-send_wg_peers_details (index_t peeri, void *data)
+wg_api_send_peers_details (index_t peeri, void *data)
{
vl_api_wireguard_peers_details_t *rmp;
wg_deatils_walk_t *ctx = data;
@@ -227,7 +214,11 @@ send_wg_peers_details (index_t peeri, void *data)
u8 n_allowed_ips;
size_t ss;
+ if (pool_is_free_index (wg_peer_pool, peeri))
+ return (WALK_CONTINUE);
+
peer = wg_peer_get (peeri);
+
n_allowed_ips = vec_len (peer->allowed_ips);
ss = (sizeof (*rmp) + (n_allowed_ips * sizeof (rmp->peer.allowed_ips[0])));
@@ -237,8 +228,8 @@ send_wg_peers_details (index_t peeri, void *data)
rmp->_vl_msg_id = htons (VL_API_WIREGUARD_PEERS_DETAILS +
wg_main.msg_id_base);
- if (peer->is_dead)
- rmp->peer.flags = WIREGUARD_PEER_STATUS_DEAD;
+ rmp->peer.peer_index = htonl (peeri);
+ rmp->peer.flags = peer->flags;
clib_memcpy (rmp->peer.public_key,
peer->remote.r_public, NOISE_PUBLIC_KEY_LEN);
@@ -246,11 +237,12 @@ send_wg_peers_details (index_t peeri, void *data)
rmp->peer.port = htons (peer->dst.port);
rmp->peer.n_allowed_ips = n_allowed_ips;
rmp->peer.sw_if_index = htonl (peer->wg_sw_if_index);
+ rmp->peer.persistent_keepalive = htons (peer->persistent_keepalive_interval);
+ rmp->peer.table_id = htonl (peer->table_id);
int ii;
for (ii = 0; ii < n_allowed_ips; ii++)
- ip_prefix_encode (&peer->allowed_ips[ii].prefix,
- &rmp->peer.allowed_ips[ii]);
+ ip_prefix_encode (&peer->allowed_ips[ii], &rmp->peer.allowed_ips[ii]);
rmp->context = ctx->context;
@@ -276,7 +268,143 @@ vl_api_wireguard_peers_dump_t_handler (vl_api_wireguard_peers_dump_t * mp)
.context = mp->context,
};
- wg_peer_walk (send_wg_peers_details, &ctx);
+ if (mp->peer_index == ~0)
+ wg_peer_walk (wg_api_send_peers_details, &ctx);
+ else
+ wg_api_send_peers_details (ntohl (mp->peer_index), &ctx);
+}
+
+static vpe_client_registration_t *
+wg_api_client_lookup (wg_peer_t *peer, u32 client_index)
+{
+ uword *p;
+ vpe_client_registration_t *api_client = NULL;
+
+ p = hash_get (peer->api_client_by_client_index, client_index);
+ if (p)
+ api_client = vec_elt_at_index (peer->api_clients, p[0]);
+
+ return api_client;
+}
+
+static walk_rc_t
+wg_api_update_peer_api_client (index_t peeri, void *data)
+{
+ if (pool_is_free_index (wg_peer_pool, peeri))
+ return (WALK_CONTINUE);
+
+ vl_api_want_wireguard_peer_events_t *mp = data;
+ wg_peer_t *peer = wg_peer_get (peeri);
+
+ if (ntohl (mp->sw_if_index) != ~0 &&
+ ntohl (mp->sw_if_index) != peer->wg_sw_if_index)
+ {
+ return (WALK_CONTINUE);
+ }
+
+ vpe_client_registration_t *api_client;
+
+ api_client = wg_api_client_lookup (peer, mp->client_index);
+
+ if (api_client)
+ {
+ if (mp->enable_disable)
+ {
+ return (WALK_CONTINUE);
+ }
+ hash_unset (peer->api_client_by_client_index, api_client->client_index);
+ pool_put (peer->api_clients, api_client);
+ }
+ if (mp->enable_disable)
+ {
+ pool_get (peer->api_clients, api_client);
+ clib_memset (api_client, 0, sizeof (vpe_client_registration_t));
+ api_client->client_index = mp->client_index;
+ api_client->client_pid = mp->pid;
+ hash_set (peer->api_client_by_client_index, mp->client_index,
+ api_client - peer->api_clients);
+ }
+
+ return (WALK_CONTINUE);
+}
+
+static void
+vl_api_want_wireguard_peer_events_t_handler (
+ vl_api_want_wireguard_peer_events_t *mp)
+{
+ wg_main_t *wmp = &wg_main;
+ vl_api_want_wireguard_peer_events_reply_t *rmp;
+ int rv = 0;
+
+ wg_feature_init (wmp);
+
+ if (mp->peer_index == ~0)
+ wg_peer_walk (wg_api_update_peer_api_client, mp);
+ else
+ wg_api_update_peer_api_client (ntohl (mp->peer_index), mp);
+
+ REPLY_MACRO (VL_API_WANT_WIREGUARD_PEER_EVENTS_REPLY);
+}
+
+static void
+wg_api_send_peer_event (vl_api_registration_t *rp, index_t peer_index,
+ wg_peer_flags flags)
+{
+ vl_api_wireguard_peer_event_t *mp = vl_msg_api_alloc (sizeof (*mp));
+ clib_memset (mp, 0, sizeof (*mp));
+
+ mp->_vl_msg_id = htons (VL_API_WIREGUARD_PEER_EVENT + wg_main.msg_id_base);
+ mp->peer_index = htonl (peer_index);
+ mp->flags = flags;
+
+ vl_api_send_msg (rp, (u8 *) mp);
+}
+
+typedef struct
+{
+ index_t peeri;
+ wg_peer_flags flags;
+} wg_api_peer_event_args_t;
+
+static void
+wg_api_peer_event_cb (wg_api_peer_event_args_t *args)
+{
+ wg_peer_t *peer = wg_peer_get (args->peeri);
+ vpe_client_registration_t *api_client;
+ vl_api_registration_t *rp;
+
+ pool_foreach (api_client, peer->api_clients)
+ {
+ rp = vl_api_client_index_to_registration (api_client->client_index);
+ if (rp)
+ {
+ wg_api_send_peer_event (rp, args->peeri, args->flags);
+ }
+ };
+}
+
+void
+wg_api_peer_event (index_t peeri, wg_peer_flags flags)
+{
+ wg_api_peer_event_args_t args = {
+ .peeri = peeri,
+ .flags = flags,
+ };
+
+ vl_api_rpc_call_main_thread (wg_api_peer_event_cb, (u8 *) &args,
+ sizeof (args));
+}
+
+static void
+vl_api_wg_set_async_mode_t_handler (vl_api_wg_set_async_mode_t *mp)
+{
+ wg_main_t *wmp = &wg_main;
+ vl_api_wg_set_async_mode_reply_t *rmp;
+ int rv = 0;
+
+ wg_set_async_mode (mp->async_enable);
+
+ REPLY_MACRO (VL_API_WG_SET_ASYNC_MODE_REPLY);
}
/* set tup the API message handling tables */
diff --git a/src/plugins/wireguard/wireguard_chachapoly.c b/src/plugins/wireguard/wireguard_chachapoly.c
new file mode 100644
index 00000000000..0dd7908d2e2
--- /dev/null
+++ b/src/plugins/wireguard/wireguard_chachapoly.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2022 Rubicon Communications, LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <wireguard/wireguard.h>
+#include <wireguard/wireguard_chachapoly.h>
+#include <wireguard/wireguard_hchacha20.h>
+
+bool
+wg_chacha20poly1305_calc (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst,
+ u8 *aad, u32 aad_len, u64 nonce,
+ vnet_crypto_op_id_t op_id,
+ vnet_crypto_key_index_t key_index)
+{
+ vnet_crypto_op_t _op, *op = &_op;
+ u8 iv[12];
+ u8 tag_[NOISE_AUTHTAG_LEN] = {};
+ u8 src_[] = {};
+
+ clib_memset (iv, 0, 12);
+ clib_memcpy (iv + 4, &nonce, sizeof (nonce));
+
+ vnet_crypto_op_init (op, op_id);
+
+ op->tag_len = NOISE_AUTHTAG_LEN;
+ if (op_id == VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC)
+ {
+ op->tag = src + src_len - NOISE_AUTHTAG_LEN;
+ src_len -= NOISE_AUTHTAG_LEN;
+ op->flags |= VNET_CRYPTO_OP_FLAG_HMAC_CHECK;
+ }
+ else
+ op->tag = tag_;
+
+ op->src = !src ? src_ : src;
+ op->len = src_len;
+
+ op->dst = dst;
+ op->key_index = key_index;
+ op->aad = aad;
+ op->aad_len = aad_len;
+ op->iv = iv;
+
+ vnet_crypto_process_ops (vm, op, 1);
+ if (op_id == VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC)
+ {
+ clib_memcpy (dst + src_len, op->tag, NOISE_AUTHTAG_LEN);
+ }
+
+ return (op->status == VNET_CRYPTO_OP_STATUS_COMPLETED);
+}
+
+void
+wg_xchacha20poly1305_encrypt (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst,
+ u8 *aad, u32 aad_len,
+ u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
+ u8 key[CHACHA20POLY1305_KEY_SIZE])
+{
+ int i;
+ u32 derived_key[CHACHA20POLY1305_KEY_SIZE / sizeof (u32)];
+ u64 h_nonce;
+
+ clib_memcpy (&h_nonce, nonce + 16, sizeof (h_nonce));
+ h_nonce = le64toh (h_nonce);
+ hchacha20 (derived_key, nonce, key);
+
+ for (i = 0; i < (sizeof (derived_key) / sizeof (derived_key[0])); i++)
+ (derived_key[i]) = htole32 ((derived_key[i]));
+
+ uint32_t key_idx;
+
+ key_idx =
+ vnet_crypto_key_add (vm, VNET_CRYPTO_ALG_CHACHA20_POLY1305,
+ (uint8_t *) derived_key, CHACHA20POLY1305_KEY_SIZE);
+
+ wg_chacha20poly1305_calc (vm, src, src_len, dst, aad, aad_len, h_nonce,
+ VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC, key_idx);
+
+ vnet_crypto_key_del (vm, key_idx);
+ wg_secure_zero_memory (derived_key, CHACHA20POLY1305_KEY_SIZE);
+}
+
+bool
+wg_xchacha20poly1305_decrypt (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst,
+ u8 *aad, u32 aad_len,
+ u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
+ u8 key[CHACHA20POLY1305_KEY_SIZE])
+{
+ int ret, i;
+ u32 derived_key[CHACHA20POLY1305_KEY_SIZE / sizeof (u32)];
+ u64 h_nonce;
+
+ clib_memcpy (&h_nonce, nonce + 16, sizeof (h_nonce));
+ h_nonce = le64toh (h_nonce);
+ hchacha20 (derived_key, nonce, key);
+
+ for (i = 0; i < (sizeof (derived_key) / sizeof (derived_key[0])); i++)
+ (derived_key[i]) = htole32 ((derived_key[i]));
+
+ uint32_t key_idx;
+
+ key_idx =
+ vnet_crypto_key_add (vm, VNET_CRYPTO_ALG_CHACHA20_POLY1305,
+ (uint8_t *) derived_key, CHACHA20POLY1305_KEY_SIZE);
+
+ ret =
+ wg_chacha20poly1305_calc (vm, src, src_len, dst, aad, aad_len, h_nonce,
+ VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC, key_idx);
+
+ vnet_crypto_key_del (vm, key_idx);
+ wg_secure_zero_memory (derived_key, CHACHA20POLY1305_KEY_SIZE);
+
+ return ret;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/wireguard/wireguard_chachapoly.h b/src/plugins/wireguard/wireguard_chachapoly.h
new file mode 100644
index 00000000000..f09b2c8dd9d
--- /dev/null
+++ b/src/plugins/wireguard/wireguard_chachapoly.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2022 Rubicon Communications, LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_wg_chachapoly_h__
+#define __included_wg_chachapoly_h__
+
+#include <vlib/vlib.h>
+#include <vnet/crypto/crypto.h>
+
+#define XCHACHA20POLY1305_NONCE_SIZE 24
+#define CHACHA20POLY1305_KEY_SIZE 32
+
+bool wg_chacha20poly1305_calc (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst,
+ u8 *aad, u32 aad_len, u64 nonce,
+ vnet_crypto_op_id_t op_id,
+ vnet_crypto_key_index_t key_index);
+
+void wg_xchacha20poly1305_encrypt (vlib_main_t *vm, u8 *src, u32 src_len,
+ u8 *dst, u8 *aad, u32 aad_len,
+ u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
+ u8 key[CHACHA20POLY1305_KEY_SIZE]);
+
+bool wg_xchacha20poly1305_decrypt (vlib_main_t *vm, u8 *src, u32 src_len,
+ u8 *dst, u8 *aad, u32 aad_len,
+ u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
+ u8 key[CHACHA20POLY1305_KEY_SIZE]);
+
+#endif /* __included_wg_chachapoly_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/wireguard/wireguard_cli.c b/src/plugins/wireguard/wireguard_cli.c
index 3b4bf56a3dc..e412fa36c44 100755..100644
--- a/src/plugins/wireguard/wireguard_cli.c
+++ b/src/plugins/wireguard/wireguard_cli.c
@@ -25,7 +25,7 @@ wg_if_create_cli (vlib_main_t * vm,
{
wg_main_t *wmp = &wg_main;
unformat_input_t _line_input, *line_input = &_line_input;
- u8 private_key[NOISE_PUBLIC_KEY_LEN];
+ u8 private_key[NOISE_PUBLIC_KEY_LEN + 1];
u32 instance, sw_if_index;
ip_address_t src_ip;
clib_error_t *error;
@@ -94,14 +94,12 @@ wg_if_create_cli (vlib_main_t * vm,
/*?
* Create a Wireguard interface.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (wg_if_create_command, static) = {
.path = "wireguard create",
.short_help = "wireguard create listen-port <port> "
"private-key <key> src <IP> [generate-key]",
.function = wg_if_create_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
wg_if_delete_cli (vlib_main_t * vm,
@@ -143,13 +141,11 @@ wg_if_delete_cli (vlib_main_t * vm,
/*?
* Delete a Wireguard interface.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (wg_if_delete_command, static) = {
.path = "wireguard delete",
.short_help = "wireguard delete <interface>",
.function = wg_if_delete_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -162,10 +158,10 @@ wg_peer_add_command_fn (vlib_main_t * vm,
unformat_input_t _line_input, *line_input = &_line_input;
u8 *public_key_64 = 0;
- u8 public_key[NOISE_PUBLIC_KEY_LEN];
+ u8 public_key[NOISE_PUBLIC_KEY_LEN + 1];
fib_prefix_t allowed_ip, *allowed_ips = NULL;
ip_prefix_t pfx;
- ip_address_t ip;
+ ip_address_t ip = ip_address_initializer;
u32 portDst = 0, table_id = 0;
u32 persistent_keepalive = 0;
u32 tun_sw_if_index = ~0;
@@ -192,7 +188,7 @@ wg_peer_add_command_fn (vlib_main_t * vm,
;
else if (unformat (line_input, "table-id %d", &table_id))
;
- else if (unformat (line_input, "port %d", &portDst))
+ else if (unformat (line_input, "dst-port %d", &portDst))
;
else if (unformat (line_input, "persistent-keepalive %d",
&persistent_keepalive))
@@ -213,16 +209,14 @@ wg_peer_add_command_fn (vlib_main_t * vm,
}
}
- if (AF_IP6 == ip_addr_version (&ip) ||
- FIB_PROTOCOL_IP6 == allowed_ip.fp_proto)
- rv = VNET_API_ERROR_INVALID_PROTOCOL;
- else
- rv = wg_peer_add (tun_sw_if_index,
- public_key,
- table_id,
- &ip_addr_46 (&ip),
- allowed_ips,
- portDst, persistent_keepalive, &peer_index);
+ if (0 == vec_len (allowed_ips))
+ {
+ error = clib_error_return (0, "Allowed IPs are not specified");
+ goto done;
+ }
+
+ rv = wg_peer_add (tun_sw_if_index, public_key, table_id, &ip_addr_46 (&ip),
+ allowed_ips, portDst, persistent_keepalive, &peer_index);
switch (rv)
{
@@ -253,16 +247,14 @@ done:
return error;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (wg_peer_add_command, static) =
-{
+VLIB_CLI_COMMAND (wg_peer_add_command, static) = {
.path = "wireguard peer add",
- .short_help = "wireguard peer add <wg_int> public-key <pub_key_other>"
- "endpoint <ip4_dst> allowed-ip <prefix>"
- "dst-port [port_dst] persistent-keepalive [keepalive_interval]",
+ .short_help =
+ "wireguard peer add <wg_int> public-key <pub_key_other> "
+ "endpoint <ip4_dst> allowed-ip <prefix> "
+ "dst-port [port_dst] persistent-keepalive [keepalive_interval]",
.function = wg_peer_add_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
wg_peer_remove_command_fn (vlib_main_t * vm,
@@ -301,14 +293,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (wg_peer_remove_command, static) =
{
.path = "wireguard peer remove",
.short_help = "wireguard peer remove <index>",
.function = wg_peer_remove_command_fn,
};
-/* *INDENT-ON* */
static walk_rc_t
wg_peer_show_one (index_t peeri, void *arg)
@@ -327,14 +317,12 @@ wg_show_peer_command_fn (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (wg_show_peers_command, static) =
{
.path = "show wireguard peer",
.short_help = "show wireguard peer",
.function = wg_show_peer_command_fn,
};
-/* *INDENT-ON* */
static walk_rc_t
wg_if_show_one (index_t itfi, void *arg)
@@ -357,14 +345,67 @@ wg_show_if_command_fn (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (wg_show_itfs_command, static) =
{
.path = "show wireguard interface",
.short_help = "show wireguard",
.function = wg_show_if_command_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+wg_set_async_mode_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ int async_enable = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "on"))
+ async_enable = 1;
+ else if (unformat (line_input, "off"))
+ async_enable = 0;
+ else
+ return (clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input));
+ }
+
+ wg_set_async_mode (async_enable);
+
+ unformat_free (line_input);
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (wg_set_async_mode_command, static) = {
+ .path = "set wireguard async mode",
+ .short_help = "set wireguard async mode on|off",
+ .function = wg_set_async_mode_command_fn,
+};
+
+static clib_error_t *
+wg_show_mode_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vlib_cli_output (vm, "Wireguard mode");
+
+#define _(v, f, s) \
+ vlib_cli_output (vm, "\t%s: %s", s, \
+ (wg_op_mode_is_set_##f () ? "enabled" : "disabled"));
+ foreach_wg_op_mode_flags
+#undef _
+
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (wg_show_modemode_command, static) = {
+ .path = "show wireguard mode",
+ .short_help = "show wireguard mode",
+ .function = wg_show_mode_command_fn,
+};
+
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/wireguard/wireguard_cookie.c b/src/plugins/wireguard/wireguard_cookie.c
index f54ce715906..4ebbfa0fa63 100755..100644
--- a/src/plugins/wireguard/wireguard_cookie.c
+++ b/src/plugins/wireguard/wireguard_cookie.c
@@ -20,6 +20,7 @@
#include <vlib/vlib.h>
#include <wireguard/wireguard_cookie.h>
+#include <wireguard/wireguard_chachapoly.h>
#include <wireguard/wireguard.h>
static void cookie_precompute_key (uint8_t *,
@@ -29,9 +30,14 @@ static void cookie_macs_mac1 (message_macs_t *, const void *, size_t,
const uint8_t[COOKIE_KEY_SIZE]);
static void cookie_macs_mac2 (message_macs_t *, const void *, size_t,
const uint8_t[COOKIE_COOKIE_SIZE]);
-static void cookie_checker_make_cookie (vlib_main_t * vm, cookie_checker_t *,
+static void cookie_checker_make_cookie (vlib_main_t *vm, cookie_checker_t *,
uint8_t[COOKIE_COOKIE_SIZE],
- ip4_address_t ip4, u16 udp_port);
+ ip46_address_t *ip, u16 udp_port);
+
+static void ratelimit_init (ratelimit_t *, ratelimit_entry_t *);
+static void ratelimit_deinit (ratelimit_t *);
+static void ratelimit_gc (ratelimit_t *, bool);
+static bool ratelimit_allow (ratelimit_t *, ip46_address_t *);
/* Public Functions */
void
@@ -43,6 +49,14 @@ cookie_maker_init (cookie_maker_t * cp, const uint8_t key[COOKIE_INPUT_SIZE])
}
void
+cookie_checker_init (cookie_checker_t *cc, ratelimit_entry_t *pool)
+{
+ clib_memset (cc, 0, sizeof (*cc));
+ ratelimit_init (&cc->cc_ratelimit_v4, pool);
+ ratelimit_init (&cc->cc_ratelimit_v6, pool);
+}
+
+void
cookie_checker_update (cookie_checker_t * cc, uint8_t key[COOKIE_INPUT_SIZE])
{
if (key)
@@ -58,6 +72,58 @@ cookie_checker_update (cookie_checker_t * cc, uint8_t key[COOKIE_INPUT_SIZE])
}
void
+cookie_checker_deinit (cookie_checker_t *cc)
+{
+ ratelimit_deinit (&cc->cc_ratelimit_v4);
+ ratelimit_deinit (&cc->cc_ratelimit_v6);
+}
+
+void
+cookie_checker_create_payload (vlib_main_t *vm, cookie_checker_t *cc,
+ message_macs_t *cm,
+ uint8_t nonce[COOKIE_NONCE_SIZE],
+ uint8_t ecookie[COOKIE_ENCRYPTED_SIZE],
+ ip46_address_t *ip, u16 udp_port)
+{
+ uint8_t cookie[COOKIE_COOKIE_SIZE];
+
+ cookie_checker_make_cookie (vm, cc, cookie, ip, udp_port);
+ RAND_bytes (nonce, COOKIE_NONCE_SIZE);
+
+ wg_xchacha20poly1305_encrypt (vm, cookie, COOKIE_COOKIE_SIZE, ecookie,
+ cm->mac1, COOKIE_MAC_SIZE, nonce,
+ cc->cc_cookie_key);
+
+ wg_secure_zero_memory (cookie, sizeof (cookie));
+}
+
+bool
+cookie_maker_consume_payload (vlib_main_t *vm, cookie_maker_t *cp,
+ uint8_t nonce[COOKIE_NONCE_SIZE],
+ uint8_t ecookie[COOKIE_ENCRYPTED_SIZE])
+{
+ uint8_t cookie[COOKIE_COOKIE_SIZE];
+
+ if (cp->cp_mac1_valid == 0)
+ {
+ return false;
+ }
+
+ if (!wg_xchacha20poly1305_decrypt (vm, ecookie, COOKIE_ENCRYPTED_SIZE,
+ cookie, cp->cp_mac1_last, COOKIE_MAC_SIZE,
+ nonce, cp->cp_cookie_key))
+ {
+ return false;
+ }
+
+ clib_memcpy (cp->cp_cookie, cookie, COOKIE_COOKIE_SIZE);
+ cp->cp_birthdate = vlib_time_now (vm);
+ cp->cp_mac1_valid = 0;
+
+ return true;
+}
+
+void
cookie_maker_mac (cookie_maker_t * cp, message_macs_t * cm, void *buf,
size_t len)
{
@@ -76,9 +142,9 @@ cookie_maker_mac (cookie_maker_t * cp, message_macs_t * cm, void *buf,
}
enum cookie_mac_state
-cookie_checker_validate_macs (vlib_main_t * vm, cookie_checker_t * cc,
- message_macs_t * cm, void *buf, size_t len,
- bool busy, ip4_address_t ip4, u16 udp_port)
+cookie_checker_validate_macs (vlib_main_t *vm, cookie_checker_t *cc,
+ message_macs_t *cm, void *buf, size_t len,
+ bool busy, ip46_address_t *ip, u16 udp_port)
{
message_macs_t our_cm;
uint8_t cookie[COOKIE_COOKIE_SIZE];
@@ -93,13 +159,20 @@ cookie_checker_validate_macs (vlib_main_t * vm, cookie_checker_t * cc,
if (!busy)
return VALID_MAC_BUT_NO_COOKIE;
- cookie_checker_make_cookie (vm, cc, cookie, ip4, udp_port);
+ cookie_checker_make_cookie (vm, cc, cookie, ip, udp_port);
cookie_macs_mac2 (&our_cm, buf, len, cookie);
/* If the mac2 is invalid, we want to send a cookie response */
if (clib_memcmp (our_cm.mac2, cm->mac2, COOKIE_MAC_SIZE) != 0)
return VALID_MAC_BUT_NO_COOKIE;
+ /* If the mac2 is valid, we may want to rate limit the peer */
+ ratelimit_t *rl;
+ rl = ip46_address_is_ip4 (ip) ? &cc->cc_ratelimit_v4 : &cc->cc_ratelimit_v6;
+
+ if (!ratelimit_allow (rl, ip))
+ return VALID_MAC_WITH_COOKIE_BUT_RATELIMITED;
+
return VALID_MAC_WITH_COOKIE;
}
@@ -139,9 +212,9 @@ cookie_macs_mac2 (message_macs_t * cm, const void *buf, size_t len,
}
static void
-cookie_checker_make_cookie (vlib_main_t * vm, cookie_checker_t * cc,
+cookie_checker_make_cookie (vlib_main_t *vm, cookie_checker_t *cc,
uint8_t cookie[COOKIE_COOKIE_SIZE],
- ip4_address_t ip4, u16 udp_port)
+ ip46_address_t *ip, u16 udp_port)
{
blake2s_state_t state;
@@ -155,11 +228,138 @@ cookie_checker_make_cookie (vlib_main_t * vm, cookie_checker_t * cc,
blake2s_init_key (&state, COOKIE_COOKIE_SIZE, cc->cc_secret,
COOKIE_SECRET_SIZE);
- blake2s_update (&state, ip4.as_u8, sizeof (ip4_address_t)); //TODO: IP6
+ if (ip46_address_is_ip4 (ip))
+ {
+ blake2s_update (&state, ip->ip4.as_u8, sizeof (ip4_address_t));
+ }
+ else
+ {
+ blake2s_update (&state, ip->ip6.as_u8, sizeof (ip6_address_t));
+ }
blake2s_update (&state, (u8 *) & udp_port, sizeof (u16));
blake2s_final (&state, cookie, COOKIE_COOKIE_SIZE);
}
+static void
+ratelimit_init (ratelimit_t *rl, ratelimit_entry_t *pool)
+{
+ rl->rl_pool = pool;
+}
+
+static void
+ratelimit_deinit (ratelimit_t *rl)
+{
+ ratelimit_gc (rl, /* force */ true);
+ hash_free (rl->rl_table);
+}
+
+static void
+ratelimit_gc (ratelimit_t *rl, bool force)
+{
+ u32 r_key;
+ u32 r_idx;
+ ratelimit_entry_t *r;
+
+ if (force)
+ {
+ /* clang-format off */
+ hash_foreach (r_key, r_idx, rl->rl_table, {
+ r = pool_elt_at_index (rl->rl_pool, r_idx);
+ pool_put (rl->rl_pool, r);
+ });
+ /* clang-format on */
+ return;
+ }
+
+ f64 now = vlib_time_now (vlib_get_main ());
+
+ if ((rl->rl_last_gc + ELEMENT_TIMEOUT) < now)
+ {
+ u32 *r_key_to_del = NULL;
+ u32 *pr_key;
+
+ rl->rl_last_gc = now;
+
+ /* clang-format off */
+ hash_foreach (r_key, r_idx, rl->rl_table, {
+ r = pool_elt_at_index (rl->rl_pool, r_idx);
+ if ((r->r_last_time + ELEMENT_TIMEOUT) < now)
+ {
+ vec_add1 (r_key_to_del, r_key);
+ pool_put (rl->rl_pool, r);
+ }
+ });
+ /* clang-format on */
+
+ vec_foreach (pr_key, r_key_to_del)
+ {
+ hash_unset (rl->rl_table, *pr_key);
+ }
+
+ vec_free (r_key_to_del);
+ }
+}
+
+static bool
+ratelimit_allow (ratelimit_t *rl, ip46_address_t *ip)
+{
+ u32 r_key;
+ uword *p;
+ u32 r_idx;
+ ratelimit_entry_t *r;
+ f64 now = vlib_time_now (vlib_get_main ());
+
+ if (ip46_address_is_ip4 (ip))
+ /* Use all 4 bytes of IPv4 address */
+ r_key = ip->ip4.as_u32;
+ else
+ /* Use top 8 bytes (/64) of IPv6 address */
+ r_key = ip->ip6.as_u32[0] ^ ip->ip6.as_u32[1];
+
+ /* Check if there is already an entry for the IP address */
+ p = hash_get (rl->rl_table, r_key);
+ if (p)
+ {
+ u64 tokens;
+ f64 diff;
+
+ r_idx = p[0];
+ r = pool_elt_at_index (rl->rl_pool, r_idx);
+
+ diff = now - r->r_last_time;
+ r->r_last_time = now;
+
+ tokens = r->r_tokens + diff * NSEC_PER_SEC;
+
+ if (tokens > TOKEN_MAX)
+ tokens = TOKEN_MAX;
+
+ if (tokens >= INITIATION_COST)
+ {
+ r->r_tokens = tokens - INITIATION_COST;
+ return true;
+ }
+
+ r->r_tokens = tokens;
+ return false;
+ }
+
+ /* No entry for the IP address */
+ ratelimit_gc (rl, /* force */ false);
+
+ if (hash_elts (rl->rl_table) >= RATELIMIT_SIZE_MAX)
+ return false;
+
+ pool_get (rl->rl_pool, r);
+ r_idx = r - rl->rl_pool;
+ hash_set (rl->rl_table, r_key, r_idx);
+
+ r->r_last_time = now;
+ r->r_tokens = TOKEN_MAX - INITIATION_COST;
+
+ return true;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/wireguard/wireguard_cookie.h b/src/plugins/wireguard/wireguard_cookie.h
index 489cce81325..7467cf2ed4a 100755..100644
--- a/src/plugins/wireguard/wireguard_cookie.h
+++ b/src/plugins/wireguard/wireguard_cookie.h
@@ -18,14 +18,15 @@
#ifndef __included_wg_cookie_h__
#define __included_wg_cookie_h__
-#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip46_address.h>
#include <wireguard/wireguard_noise.h>
enum cookie_mac_state
{
INVALID_MAC,
VALID_MAC_BUT_NO_COOKIE,
- VALID_MAC_WITH_COOKIE
+ VALID_MAC_WITH_COOKIE,
+ VALID_MAC_WITH_COOKIE_BUT_RATELIMITED,
};
#define COOKIE_MAC_SIZE 16
@@ -50,8 +51,6 @@ enum cookie_mac_state
#define INITIATION_COST (NSEC_PER_SEC / INITIATIONS_PER_SECOND)
#define TOKEN_MAX (INITIATION_COST * INITIATIONS_BURSTABLE)
#define ELEMENT_TIMEOUT 1
-#define IPV4_MASK_SIZE 4 /* Use all 4 bytes of IPv4 address */
-#define IPV6_MASK_SIZE 8 /* Use top 8 bytes (/64) of IPv6 address */
typedef struct cookie_macs
{
@@ -59,6 +58,19 @@ typedef struct cookie_macs
uint8_t mac2[COOKIE_MAC_SIZE];
} message_macs_t;
+typedef struct ratelimit_entry
+{
+ f64 r_last_time;
+ u64 r_tokens;
+} ratelimit_entry_t;
+
+typedef struct ratelimit
+{
+ ratelimit_entry_t *rl_pool;
+ uword *rl_table;
+ f64 rl_last_gc;
+} ratelimit_t;
+
typedef struct cookie_maker
{
uint8_t cp_mac1_key[COOKIE_KEY_SIZE];
@@ -72,6 +84,9 @@ typedef struct cookie_maker
typedef struct cookie_checker
{
+ ratelimit_t cc_ratelimit_v4;
+ ratelimit_t cc_ratelimit_v6;
+
uint8_t cc_mac1_key[COOKIE_KEY_SIZE];
uint8_t cc_cookie_key[COOKIE_KEY_SIZE];
@@ -81,14 +96,22 @@ typedef struct cookie_checker
void cookie_maker_init (cookie_maker_t *, const uint8_t[COOKIE_INPUT_SIZE]);
+void cookie_checker_init (cookie_checker_t *, ratelimit_entry_t *);
void cookie_checker_update (cookie_checker_t *, uint8_t[COOKIE_INPUT_SIZE]);
+void cookie_checker_deinit (cookie_checker_t *);
+void cookie_checker_create_payload (vlib_main_t *vm, cookie_checker_t *cc,
+ message_macs_t *cm,
+ uint8_t nonce[COOKIE_NONCE_SIZE],
+ uint8_t ecookie[COOKIE_ENCRYPTED_SIZE],
+ ip46_address_t *ip, u16 udp_port);
+bool cookie_maker_consume_payload (vlib_main_t *vm, cookie_maker_t *cp,
+ uint8_t nonce[COOKIE_NONCE_SIZE],
+ uint8_t ecookie[COOKIE_ENCRYPTED_SIZE]);
void cookie_maker_mac (cookie_maker_t *, message_macs_t *, void *, size_t);
-enum cookie_mac_state cookie_checker_validate_macs (vlib_main_t * vm,
- cookie_checker_t *,
- message_macs_t *, void *,
- size_t, bool,
- ip4_address_t ip4,
- u16 udp_port);
+enum cookie_mac_state
+cookie_checker_validate_macs (vlib_main_t *vm, cookie_checker_t *,
+ message_macs_t *, void *, size_t, bool,
+ ip46_address_t *ip, u16 udp_port);
#endif /* __included_wg_cookie_h__ */
diff --git a/src/plugins/wireguard/wireguard_handoff.c b/src/plugins/wireguard/wireguard_handoff.c
index d3e37b30c88..195baf209a0 100644
--- a/src/plugins/wireguard/wireguard_handoff.c
+++ b/src/plugins/wireguard/wireguard_handoff.c
@@ -129,40 +129,77 @@ wg_handoff (vlib_main_t * vm,
return n_enq;
}
-VLIB_NODE_FN (wg_handshake_handoff) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+VLIB_NODE_FN (wg4_handshake_handoff)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
{
wg_main_t *wmp = &wg_main;
- return wg_handoff (vm, node, from_frame, wmp->in_fq_index,
+ return wg_handoff (vm, node, from_frame, wmp->in4_fq_index,
WG_HANDOFF_HANDSHAKE);
}
-VLIB_NODE_FN (wg_input_data_handoff) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+VLIB_NODE_FN (wg6_handshake_handoff)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
{
wg_main_t *wmp = &wg_main;
- return wg_handoff (vm, node, from_frame, wmp->in_fq_index,
+ return wg_handoff (vm, node, from_frame, wmp->in6_fq_index,
+ WG_HANDOFF_HANDSHAKE);
+}
+
+VLIB_NODE_FN (wg4_input_data_handoff)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ wg_main_t *wmp = &wg_main;
+
+ return wg_handoff (vm, node, from_frame, wmp->in4_fq_index,
+ WG_HANDOFF_INP_DATA);
+}
+
+VLIB_NODE_FN (wg6_input_data_handoff)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ wg_main_t *wmp = &wg_main;
+
+ return wg_handoff (vm, node, from_frame, wmp->in6_fq_index,
WG_HANDOFF_INP_DATA);
}
-VLIB_NODE_FN (wg_output_tun_handoff) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+VLIB_NODE_FN (wg4_output_tun_handoff)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
{
wg_main_t *wmp = &wg_main;
- return wg_handoff (vm, node, from_frame, wmp->out_fq_index,
+ return wg_handoff (vm, node, from_frame, wmp->out4_fq_index,
WG_HANDOFF_OUT_TUN);
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (wg_handshake_handoff) =
+VLIB_NODE_FN (wg6_output_tun_handoff)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ wg_main_t *wmp = &wg_main;
+
+ return wg_handoff (vm, node, from_frame, wmp->out6_fq_index,
+ WG_HANDOFF_OUT_TUN);
+}
+
+VLIB_REGISTER_NODE (wg4_handshake_handoff) =
+{
+ .name = "wg4-handshake-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (wg_handoff_error_strings),
+ .error_strings = wg_handoff_error_strings,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (wg6_handshake_handoff) =
{
- .name = "wg-handshake-handoff",
+ .name = "wg6-handshake-handoff",
.vector_size = sizeof (u32),
.format_trace = format_wg_handoff_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
@@ -174,9 +211,9 @@ VLIB_REGISTER_NODE (wg_handshake_handoff) =
},
};
-VLIB_REGISTER_NODE (wg_input_data_handoff) =
+VLIB_REGISTER_NODE (wg4_input_data_handoff) =
{
- .name = "wg-input-data-handoff",
+ .name = "wg4-input-data-handoff",
.vector_size = sizeof (u32),
.format_trace = format_wg_handoff_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
@@ -188,9 +225,37 @@ VLIB_REGISTER_NODE (wg_input_data_handoff) =
},
};
-VLIB_REGISTER_NODE (wg_output_tun_handoff) =
+VLIB_REGISTER_NODE (wg6_input_data_handoff) =
+{
+ .name = "wg6-input-data-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (wg_handoff_error_strings),
+ .error_strings = wg_handoff_error_strings,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (wg4_output_tun_handoff) =
+{
+ .name = "wg4-output-tun-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (wg_handoff_error_strings),
+ .error_strings = wg_handoff_error_strings,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (wg6_output_tun_handoff) =
{
- .name = "wg-output-tun-handoff",
+ .name = "wg6-output-tun-handoff",
.vector_size = sizeof (u32),
.format_trace = format_wg_handoff_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
@@ -201,7 +266,6 @@ VLIB_REGISTER_NODE (wg_output_tun_handoff) =
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/wireguard/wireguard_hchacha20.h b/src/plugins/wireguard/wireguard_hchacha20.h
new file mode 100644
index 00000000000..a2d139621c9
--- /dev/null
+++ b/src/plugins/wireguard/wireguard_hchacha20.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Rubicon Communications, LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * chacha-merged.c version 20080118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#ifndef __included_wg_hchacha20_h__
+#define __included_wg_hchacha20_h__
+
+#include <vlib/vlib.h>
+
+/* clang-format off */
+#define U32C(v) (v##U)
+#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
+
+#define ROTL32(v, n) \
+ (U32V((v) << (n)) | ((v) >> (32 - (n))))
+
+#define U8TO32_LITTLE(p) \
+ (((u32)((p)[0]) ) | \
+ ((u32)((p)[1]) << 8) | \
+ ((u32)((p)[2]) << 16) | \
+ ((u32)((p)[3]) << 24))
+
+#define ROTATE(v,c) (ROTL32(v,c))
+#define XOR(v,w) ((v) ^ (w))
+#define PLUS(v,w) (U32V((v) + (w)))
+
+#define QUARTERROUND(a,b,c,d) \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
+/* clang-format on */
+
+static const char sigma[16] = "expand 32-byte k";
+
+static inline void
+hchacha20 (u32 derived_key[8], const u8 nonce[16], const u8 key[32])
+{
+ int i;
+ u32 x[] = { U8TO32_LITTLE (sigma + 0), U8TO32_LITTLE (sigma + 4),
+ U8TO32_LITTLE (sigma + 8), U8TO32_LITTLE (sigma + 12),
+ U8TO32_LITTLE (key + 0), U8TO32_LITTLE (key + 4),
+ U8TO32_LITTLE (key + 8), U8TO32_LITTLE (key + 12),
+ U8TO32_LITTLE (key + 16), U8TO32_LITTLE (key + 20),
+ U8TO32_LITTLE (key + 24), U8TO32_LITTLE (key + 28),
+ U8TO32_LITTLE (nonce + 0), U8TO32_LITTLE (nonce + 4),
+ U8TO32_LITTLE (nonce + 8), U8TO32_LITTLE (nonce + 12) };
+
+ for (i = 20; i > 0; i -= 2)
+ {
+ QUARTERROUND (x[0], x[4], x[8], x[12])
+ QUARTERROUND (x[1], x[5], x[9], x[13])
+ QUARTERROUND (x[2], x[6], x[10], x[14])
+ QUARTERROUND (x[3], x[7], x[11], x[15])
+ QUARTERROUND (x[0], x[5], x[10], x[15])
+ QUARTERROUND (x[1], x[6], x[11], x[12])
+ QUARTERROUND (x[2], x[7], x[8], x[13])
+ QUARTERROUND (x[3], x[4], x[9], x[14])
+ }
+
+ clib_memcpy (derived_key + 0, x + 0, sizeof (u32) * 4);
+ clib_memcpy (derived_key + 4, x + 12, sizeof (u32) * 4);
+}
+
+#endif /* __included_wg_hchacha20_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/wireguard/wireguard_if.c b/src/plugins/wireguard/wireguard_if.c
index f7eb5a1d9e0..afeeda1dd2b 100644
--- a/src/plugins/wireguard/wireguard_if.c
+++ b/src/plugins/wireguard/wireguard_if.c
@@ -32,13 +32,17 @@ static uword *wg_if_instances;
static index_t *wg_if_index_by_sw_if_index;
/* vector of interfaces key'd on their UDP port (in network order) */
-index_t *wg_if_index_by_port;
+index_t **wg_if_indexes_by_port;
+
+/* pool of ratelimit entries */
+static ratelimit_entry_t *wg_ratelimit_pool;
static u8 *
format_wg_if_name (u8 * s, va_list * args)
{
u32 dev_instance = va_arg (*args, u32);
- return format (s, "wg%d", dev_instance);
+ wg_if_t *wgi = wg_if_get (dev_instance);
+ return format (s, "wg%d", wgi->user_instance);
}
u8 *
@@ -49,7 +53,6 @@ format_wg_if (u8 * s, va_list * args)
noise_local_t *local = noise_local_get (wgi->local_idx);
u8 key[NOISE_KEY_LEN_BASE64];
-
s = format (s, "[%d] %U src:%U port:%d",
wgii,
format_vnet_sw_if_index_name, vnet_get_main (),
@@ -113,20 +116,20 @@ wg_remote_get (const uint8_t public[NOISE_PUBLIC_KEY_LEN])
}
static uint32_t
-wg_index_set (noise_remote_t * remote)
+wg_index_set (vlib_main_t *vm, noise_remote_t *remote)
{
wg_main_t *wmp = &wg_main;
u32 rnd_seed = (u32) (vlib_time_now (wmp->vlib_main) * 1e6);
u32 ret =
- wg_index_table_add (&wmp->index_table, remote->r_peer_idx, rnd_seed);
+ wg_index_table_add (vm, &wmp->index_table, remote->r_peer_idx, rnd_seed);
return ret;
}
static void
-wg_index_drop (uint32_t key)
+wg_index_drop (vlib_main_t *vm, uint32_t key)
{
wg_main_t *wmp = &wg_main;
- wg_index_table_del (&wmp->index_table, key);
+ wg_index_table_del (vm, &wmp->index_table, key);
}
static clib_error_t *
@@ -151,11 +154,21 @@ wg_if_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
void
wg_if_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
{
- /* The peers manage the adjacencies */
+ index_t wgii;
+
+ /* Convert any neighbour adjacency that has a next-hop reachable through
+ * the wg interface into a midchain. This is to avoid sending ARP/ND to
+ * resolve the next-hop address via the wg interface. Then, if one of the
+ * peers has matching prefix among allowed prefixes, the midchain will be
+ * updated to the corresponding one.
+ */
+ adj_nbr_midchain_update_rewrite (ai, NULL, NULL, ADJ_FLAG_NONE, NULL);
+
+ wgii = wg_if_find_by_sw_if_index (sw_if_index);
+ wg_if_peer_walk (wg_if_get (wgii), wg_peer_if_adj_change, &ai);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (wg_if_device_class) = {
.name = "Wireguard Tunnel",
.format_device_name = format_wg_if_name,
@@ -167,7 +180,6 @@ VNET_HW_INTERFACE_CLASS(wg_hw_interface_class) = {
.update_adjacency = wg_if_update_adj,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
};
-/* *INDENT-ON* */
/*
* Maintain a bitmap of allocated wg_if instance numbers.
@@ -251,13 +263,6 @@ wg_if_create (u32 user_instance,
*sw_if_indexp = (u32) ~ 0;
/*
- * Check if the required port is already in use
- */
- udp_dst_port_info_t *pi = udp_get_dst_port_info (&udp_main, port, UDP_IP4);
- if (pi)
- return VNET_API_ERROR_UDP_PORT_TAKEN;
-
- /*
* Allocate a wg_if instance. Either select on dynamically
* or try to use the desired user_instance number.
*/
@@ -265,13 +270,11 @@ wg_if_create (u32 user_instance,
if (instance == ~0)
return VNET_API_ERROR_INVALID_REGISTRATION;
- /* *INDENT-OFF* */
struct noise_upcall upcall = {
.u_remote_get = wg_remote_get,
.u_index_set = wg_index_set,
.u_index_drop = wg_index_drop,
};
- /* *INDENT-ON* */
pool_get (noise_local_pool, local);
@@ -283,7 +286,7 @@ wg_if_create (u32 user_instance,
return VNET_API_ERROR_INVALID_REGISTRATION;
}
- pool_get (wg_if_pool, wg_if);
+ pool_get_zero (wg_if_pool, wg_if);
/* tunnel index (or instance) */
u32 t_idx = wg_if - wg_if_pool;
@@ -292,13 +295,20 @@ wg_if_create (u32 user_instance,
if (~0 == wg_if->user_instance)
wg_if->user_instance = t_idx;
- udp_register_dst_port (vlib_get_main (), port, wg_input_node.index, 1);
+ vec_validate_init_empty (wg_if_indexes_by_port, port, NULL);
+ if (vec_len (wg_if_indexes_by_port[port]) == 0)
+ {
+ udp_register_dst_port (vlib_get_main (), port, wg4_input_node.index,
+ UDP_IP4);
+ udp_register_dst_port (vlib_get_main (), port, wg6_input_node.index,
+ UDP_IP6);
+ }
- vec_validate_init_empty (wg_if_index_by_port, port, INDEX_INVALID);
- wg_if_index_by_port[port] = wg_if - wg_if_pool;
+ vec_add1 (wg_if_indexes_by_port[port], t_idx);
wg_if->port = port;
wg_if->local_idx = local - noise_local_pool;
+ cookie_checker_init (&wg_if->cookie_checker, wg_ratelimit_pool);
cookie_checker_update (&wg_if->cookie_checker, local->l_public);
hw_if_index = vnet_register_interface (vnm,
@@ -314,6 +324,8 @@ wg_if_create (u32 user_instance,
ip_address_copy (&wg_if->src_ip, src_ip);
wg_if->sw_if_index = *sw_if_indexp = hi->sw_if_index;
+ vnet_set_interface_l3_output_node (vnm->vlib_main, hi->sw_if_index,
+ (u8 *) "tunnel-output");
return 0;
}
@@ -331,15 +343,38 @@ wg_if_delete (u32 sw_if_index)
return VNET_API_ERROR_INVALID_VALUE;
wg_if_t *wg_if;
- wg_if = wg_if_get (wg_if_find_by_sw_if_index (sw_if_index));
+ index_t wgii = wg_if_find_by_sw_if_index (sw_if_index);
+ wg_if = wg_if_get (wgii);
if (NULL == wg_if)
return VNET_API_ERROR_INVALID_SW_IF_INDEX_2;
if (wg_if_instance_free (wg_if->user_instance) < 0)
return VNET_API_ERROR_INVALID_VALUE_2;
- udp_unregister_dst_port (vlib_get_main (), wg_if->port, 1);
- wg_if_index_by_port[wg_if->port] = INDEX_INVALID;
+ // Remove peers before interface deletion
+ wg_if_peer_walk (wg_if, wg_peer_if_delete, NULL);
+
+ hash_free (wg_if->peers);
+
+ index_t *ii;
+ index_t *ifs = wg_if_indexes_get_by_port (wg_if->port);
+ vec_foreach (ii, ifs)
+ {
+ if (*ii == wgii)
+ {
+ vec_del1 (ifs, ifs - ii);
+ break;
+ }
+ }
+ if (vec_len (ifs) == 0)
+ {
+ udp_unregister_dst_port (vlib_get_main (), wg_if->port, 1);
+ udp_unregister_dst_port (vlib_get_main (), wg_if->port, 0);
+ }
+
+ cookie_checker_deinit (&wg_if->cookie_checker);
+
+ vnet_reset_interface_l3_output_node (vnm->vlib_main, sw_if_index);
vnet_delete_hw_interface (vnm, hw->hw_if_index);
pool_put_index (noise_local_pool, wg_if->local_idx);
pool_put (wg_if_pool, wg_if);
@@ -353,8 +388,12 @@ wg_if_peer_add (wg_if_t * wgi, index_t peeri)
hash_set (wgi->peers, peeri, peeri);
if (1 == hash_elts (wgi->peers))
- vnet_feature_enable_disable ("ip4-output", "wg-output-tun",
- wgi->sw_if_index, 1, 0, 0);
+ {
+ vnet_feature_enable_disable ("ip4-output", "wg4-output-tun",
+ wgi->sw_if_index, 1, 0, 0);
+ vnet_feature_enable_disable ("ip6-output", "wg6-output-tun",
+ wgi->sw_if_index, 1, 0, 0);
+ }
}
void
@@ -363,8 +402,12 @@ wg_if_peer_remove (wg_if_t * wgi, index_t peeri)
hash_unset (wgi->peers, peeri);
if (0 == hash_elts (wgi->peers))
- vnet_feature_enable_disable ("ip4-output", "wg-output-tun",
- wgi->sw_if_index, 0, 0, 0);
+ {
+ vnet_feature_enable_disable ("ip4-output", "wg4-output-tun",
+ wgi->sw_if_index, 0, 0, 0);
+ vnet_feature_enable_disable ("ip6-output", "wg6-output-tun",
+ wgi->sw_if_index, 0, 0, 0);
+ }
}
void
@@ -372,13 +415,11 @@ wg_if_walk (wg_if_walk_cb_t fn, void *data)
{
index_t wgii;
- /* *INDENT-OFF* */
pool_foreach_index (wgii, wg_if_pool)
{
if (WALK_STOP == fn(wgii, data))
break;
}
- /* *INDENT-ON* */
}
index_t
@@ -386,85 +427,14 @@ wg_if_peer_walk (wg_if_t * wgi, wg_if_peer_walk_cb_t fn, void *data)
{
index_t peeri, val;
- /* *INDENT-OFF* */
- hash_foreach (peeri, val, wgi->peers,
- {
- if (WALK_STOP == fn(wgi, peeri, data))
+ hash_foreach (peeri, val, wgi->peers, {
+ if (WALK_STOP == fn (peeri, data))
return peeri;
});
- /* *INDENT-ON* */
return INDEX_INVALID;
}
-
-static void
-wg_if_table_bind_v4 (ip4_main_t * im,
- uword opaque,
- u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
-{
- wg_if_t *wg_if;
-
- wg_if = wg_if_get (wg_if_find_by_sw_if_index (sw_if_index));
- if (NULL == wg_if)
- return;
-
- wg_peer_table_bind_ctx_t ctx = {
- .af = AF_IP4,
- .old_fib_index = old_fib_index,
- .new_fib_index = new_fib_index,
- };
-
- wg_if_peer_walk (wg_if, wg_peer_if_table_change, &ctx);
-}
-
-static void
-wg_if_table_bind_v6 (ip6_main_t * im,
- uword opaque,
- u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
-{
- wg_if_t *wg_if;
-
- wg_if = wg_if_get (wg_if_find_by_sw_if_index (sw_if_index));
- if (NULL == wg_if)
- return;
-
- wg_peer_table_bind_ctx_t ctx = {
- .af = AF_IP6,
- .old_fib_index = old_fib_index,
- .new_fib_index = new_fib_index,
- };
-
- wg_if_peer_walk (wg_if, wg_peer_if_table_change, &ctx);
-}
-
-static clib_error_t *
-wg_if_module_init (vlib_main_t * vm)
-{
- {
- ip4_table_bind_callback_t cb = {
- .function = wg_if_table_bind_v4,
- };
- vec_add1 (ip4_main.table_bind_callbacks, cb);
- }
- {
- ip6_table_bind_callback_t cb = {
- .function = wg_if_table_bind_v6,
- };
- vec_add1 (ip6_main.table_bind_callbacks, cb);
- }
-
- return (NULL);
-}
-
-/* *INDENT-OFF* */
-VLIB_INIT_FUNCTION (wg_if_module_init) =
-{
- .runs_after = VLIB_INITS("ip_main_init"),
-};
-/* *INDENT-ON* */
-
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/wireguard/wireguard_if.h b/src/plugins/wireguard/wireguard_if.h
index 7c11ad9b281..2a6ab8e4be5 100644
--- a/src/plugins/wireguard/wireguard_if.h
+++ b/src/plugins/wireguard/wireguard_if.h
@@ -31,13 +31,15 @@ typedef struct wg_if_t_
cookie_checker_t cookie_checker;
u16 port;
- wg_index_table_t index_table;
-
/* Source IP address for originated packets */
ip_address_t src_ip;
/* hash table of peers on this link */
uword *peers;
+
+ /* Under load params */
+ f64 handshake_counting_end;
+ u32 handshake_num;
} wg_if_t;
@@ -52,8 +54,7 @@ u8 *format_wg_if (u8 * s, va_list * va);
typedef walk_rc_t (*wg_if_walk_cb_t) (index_t wgi, void *data);
void wg_if_walk (wg_if_walk_cb_t fn, void *data);
-typedef walk_rc_t (*wg_if_peer_walk_cb_t) (wg_if_t * wgi, index_t peeri,
- void *data);
+typedef walk_rc_t (*wg_if_peer_walk_cb_t) (index_t peeri, void *data);
index_t wg_if_peer_walk (wg_if_t * wgi, wg_if_peer_walk_cb_t fn, void *data);
void wg_if_peer_add (wg_if_t * wgi, index_t peeri);
@@ -72,18 +73,56 @@ wg_if_get (index_t wgii)
return (pool_elt_at_index (wg_if_pool, wgii));
}
-extern index_t *wg_if_index_by_port;
+extern index_t **wg_if_indexes_by_port;
-static_always_inline wg_if_t *
-wg_if_get_by_port (u16 port)
+static_always_inline index_t *
+wg_if_indexes_get_by_port (u16 port)
{
- if (vec_len (wg_if_index_by_port) < port)
+ if (vec_len (wg_if_indexes_by_port) == 0)
return (NULL);
- if (INDEX_INVALID == wg_if_index_by_port[port])
+ if (vec_len (wg_if_indexes_by_port[port]) == 0)
return (NULL);
- return (wg_if_get (wg_if_index_by_port[port]));
+ return (wg_if_indexes_by_port[port]);
}
+#define HANDSHAKE_COUNTING_INTERVAL 0.5
+#define UNDER_LOAD_INTERVAL 1.0
+#define HANDSHAKE_NUM_PER_PEER_UNTIL_UNDER_LOAD 40
+
+static_always_inline bool
+wg_if_is_under_load (vlib_main_t *vm, wg_if_t *wgi)
+{
+ static f64 wg_under_load_end;
+ f64 now = vlib_time_now (vm);
+ u32 num_until_under_load =
+ hash_elts (wgi->peers) * HANDSHAKE_NUM_PER_PEER_UNTIL_UNDER_LOAD;
+
+ if (wgi->handshake_counting_end < now)
+ {
+ wgi->handshake_counting_end = now + HANDSHAKE_COUNTING_INTERVAL;
+ wgi->handshake_num = 0;
+ }
+ wgi->handshake_num++;
+
+ if (wgi->handshake_num >= num_until_under_load)
+ {
+ wg_under_load_end = now + UNDER_LOAD_INTERVAL;
+ return true;
+ }
+
+ if (wg_under_load_end > now)
+ {
+ return true;
+ }
+
+ return false;
+}
+
+static_always_inline void
+wg_if_dec_handshake_num (wg_if_t *wgi)
+{
+ wgi->handshake_num--;
+}
#endif
diff --git a/src/plugins/wireguard/wireguard_index_table.c b/src/plugins/wireguard/wireguard_index_table.c
index 5f81204b4c0..da53bfd75f1 100755..100644
--- a/src/plugins/wireguard/wireguard_index_table.c
+++ b/src/plugins/wireguard/wireguard_index_table.c
@@ -13,13 +13,15 @@
* limitations under the License.
*/
+#include <vlib/vlib.h>
#include <vppinfra/hash.h>
#include <vppinfra/pool.h>
#include <vppinfra/random.h>
#include <wireguard/wireguard_index_table.h>
u32
-wg_index_table_add (wg_index_table_t * table, u32 peer_pool_idx, u32 rnd_seed)
+wg_index_table_add (vlib_main_t *vm, wg_index_table_t *table,
+ u32 peer_pool_idx, u32 rnd_seed)
{
u32 key;
@@ -29,19 +31,25 @@ wg_index_table_add (wg_index_table_t * table, u32 peer_pool_idx, u32 rnd_seed)
if (hash_get (table->hash, key))
continue;
+ vlib_worker_thread_barrier_sync (vm);
hash_set (table->hash, key, peer_pool_idx);
+ vlib_worker_thread_barrier_release (vm);
break;
}
return key;
}
void
-wg_index_table_del (wg_index_table_t * table, u32 key)
+wg_index_table_del (vlib_main_t *vm, wg_index_table_t *table, u32 key)
{
uword *p;
p = hash_get (table->hash, key);
if (p)
- hash_unset (table->hash, key);
+ {
+ vlib_worker_thread_barrier_sync (vm);
+ hash_unset (table->hash, key);
+ vlib_worker_thread_barrier_release (vm);
+ }
}
u32 *
diff --git a/src/plugins/wireguard/wireguard_index_table.h b/src/plugins/wireguard/wireguard_index_table.h
index 67cae1f49d5..e9aa374c0ca 100755..100644
--- a/src/plugins/wireguard/wireguard_index_table.h
+++ b/src/plugins/wireguard/wireguard_index_table.h
@@ -16,6 +16,7 @@
#ifndef __included_wg_index_table_h__
#define __included_wg_index_table_h__
+#include <vlib/vlib.h>
#include <vppinfra/types.h>
typedef struct
@@ -23,9 +24,9 @@ typedef struct
uword *hash;
} wg_index_table_t;
-u32 wg_index_table_add (wg_index_table_t * table, u32 peer_pool_idx,
- u32 rnd_seed);
-void wg_index_table_del (wg_index_table_t * table, u32 key);
+u32 wg_index_table_add (vlib_main_t *vm, wg_index_table_t *table,
+ u32 peer_pool_idx, u32 rnd_seed);
+void wg_index_table_del (vlib_main_t *vm, wg_index_table_t *table, u32 key);
u32 *wg_index_table_lookup (const wg_index_table_t * table, u32 key);
#endif //__included_wg_index_table_h__
diff --git a/src/plugins/wireguard/wireguard_input.c b/src/plugins/wireguard/wireguard_input.c
index 5db814292f8..1eb7fbfed0b 100644
--- a/src/plugins/wireguard/wireguard_input.c
+++ b/src/plugins/wireguard/wireguard_input.c
@@ -25,14 +25,18 @@
#define foreach_wg_input_error \
_ (NONE, "No error") \
_ (HANDSHAKE_MAC, "Invalid MAC handshake") \
+ _ (HANDSHAKE_RATELIMITED, "Handshake ratelimited") \
_ (PEER, "Peer error") \
_ (INTERFACE, "Interface error") \
_ (DECRYPTION, "Failed during decryption") \
_ (KEEPALIVE_SEND, "Failed while sending Keepalive") \
_ (HANDSHAKE_SEND, "Failed while sending Handshake") \
_ (HANDSHAKE_RECEIVE, "Failed while receiving Handshake") \
- _ (TOO_BIG, "Packet too big") \
- _ (UNDEFINED, "Undefined error")
+ _ (COOKIE_DECRYPTION, "Failed during Cookie decryption") \
+ _ (COOKIE_SEND, "Failed during sending Cookie") \
+ _ (NO_BUFFERS, "No buffers") \
+ _ (UNDEFINED, "Undefined error") \
+ _ (CRYPTO_ENGINE_ERROR, "crypto engine error (packet dropped)")
typedef enum
{
@@ -56,6 +60,12 @@ typedef struct
index_t peer;
} wg_input_trace_t;
+typedef struct
+{
+ index_t peer;
+ u16 next;
+} wg_input_post_trace_t;
+
u8 *
format_wg_message_type (u8 * s, va_list * args)
{
@@ -79,11 +89,27 @@ format_wg_input_trace (u8 * s, va_list * args)
wg_input_trace_t *t = va_arg (*args, wg_input_trace_t *);
- s = format (s, "WG input: \n");
- s = format (s, " Type: %U\n", format_wg_message_type, t->type);
- s = format (s, " peer: %d\n", t->peer);
- s = format (s, " Length: %d\n", t->current_length);
- s = format (s, " Keepalive: %s", t->is_keepalive ? "true" : "false");
+ s = format (s, "Wireguard input: \n");
+ s = format (s, " Type: %U\n", format_wg_message_type, t->type);
+ s = format (s, " Peer: %d\n", t->peer);
+ s = format (s, " Length: %d\n", t->current_length);
+ s = format (s, " Keepalive: %s", t->is_keepalive ? "true" : "false");
+
+ return s;
+}
+
+/* post-node packet trace format function */
+static u8 *
+format_wg_input_post_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+
+ wg_input_post_trace_t *t = va_arg (*args, wg_input_post_trace_t *);
+
+ s = format (s, "WG input post: \n");
+ s = format (s, " peer: %u\n", t->peer);
+ s = format (s, " next: %u\n", t->next);
return s;
}
@@ -93,48 +119,52 @@ typedef enum
WG_INPUT_NEXT_HANDOFF_HANDSHAKE,
WG_INPUT_NEXT_HANDOFF_DATA,
WG_INPUT_NEXT_IP4_INPUT,
+ WG_INPUT_NEXT_IP6_INPUT,
WG_INPUT_NEXT_PUNT,
WG_INPUT_NEXT_ERROR,
WG_INPUT_N_NEXT,
} wg_input_next_t;
-/* static void */
-/* set_peer_address (wg_peer_t * peer, ip4_address_t ip4, u16 udp_port) */
-/* { */
-/* if (peer) */
-/* { */
-/* ip46_address_set_ip4 (&peer->dst.addr, &ip4); */
-/* peer->dst.port = udp_port; */
-/* } */
-/* } */
+static u8
+is_ip4_header (u8 *data)
+{
+ return (data[0] >> 4) == 0x4;
+}
static wg_input_error_t
-wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
+wg_handshake_process (vlib_main_t *vm, wg_main_t *wmp, vlib_buffer_t *b,
+ u32 node_idx, u8 is_ip4)
{
ASSERT (vm->thread_index == 0);
enum cookie_mac_state mac_state;
bool packet_needs_cookie;
bool under_load;
+ index_t *wg_ifs;
wg_if_t *wg_if;
wg_peer_t *peer = NULL;
void *current_b_data = vlib_buffer_get_current (b);
+ ip46_address_t src_ip;
+ if (is_ip4)
+ {
+ ip4_header_t *iph4 =
+ current_b_data - sizeof (udp_header_t) - sizeof (ip4_header_t);
+ ip46_address_set_ip4 (&src_ip, &iph4->src_address);
+ }
+ else
+ {
+ ip6_header_t *iph6 =
+ current_b_data - sizeof (udp_header_t) - sizeof (ip6_header_t);
+ ip46_address_set_ip6 (&src_ip, &iph6->src_address);
+ }
+
udp_header_t *uhd = current_b_data - sizeof (udp_header_t);
- ip4_header_t *iph =
- current_b_data - sizeof (udp_header_t) - sizeof (ip4_header_t);
- ip4_address_t ip4_src = iph->src_address;
- u16 udp_src_port = clib_host_to_net_u16 (uhd->src_port);;
- u16 udp_dst_port = clib_host_to_net_u16 (uhd->dst_port);;
+ u16 udp_src_port = clib_host_to_net_u16 (uhd->src_port);
+ u16 udp_dst_port = clib_host_to_net_u16 (uhd->dst_port);
message_header_t *header = current_b_data;
- under_load = false;
-
- wg_if = wg_if_get_by_port (udp_dst_port);
-
- if (NULL == wg_if)
- return WG_INPUT_ERROR_INTERFACE;
if (PREDICT_FALSE (header->type == MESSAGE_HANDSHAKE_COOKIE))
{
@@ -147,7 +177,9 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
else
return WG_INPUT_ERROR_PEER;
- // TODO: Implement cookie_maker_consume_payload
+ if (!cookie_maker_consume_payload (
+ vm, &peer->cookie_maker, packet->nonce, packet->encrypted_cookie))
+ return WG_INPUT_ERROR_COOKIE_DECRYPTION;
return WG_INPUT_ERROR_NONE;
}
@@ -159,16 +191,40 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
message_macs_t *macs = (message_macs_t *)
((u8 *) current_b_data + len - sizeof (*macs));
- mac_state =
- cookie_checker_validate_macs (vm, &wg_if->cookie_checker, macs,
- current_b_data, len, under_load, ip4_src,
- udp_src_port);
+ index_t *ii;
+ wg_ifs = wg_if_indexes_get_by_port (udp_dst_port);
+ if (NULL == wg_ifs)
+ return WG_INPUT_ERROR_INTERFACE;
+
+ vec_foreach (ii, wg_ifs)
+ {
+ wg_if = wg_if_get (*ii);
+ if (NULL == wg_if)
+ continue;
+
+ under_load = wg_if_is_under_load (vm, wg_if);
+ mac_state = cookie_checker_validate_macs (
+ vm, &wg_if->cookie_checker, macs, current_b_data, len, under_load,
+ &src_ip, udp_src_port);
+ if (mac_state == INVALID_MAC)
+ {
+ wg_if_dec_handshake_num (wg_if);
+ wg_if = NULL;
+ continue;
+ }
+ break;
+ }
+
+ if (NULL == wg_if)
+ return WG_INPUT_ERROR_HANDSHAKE_MAC;
if ((under_load && mac_state == VALID_MAC_WITH_COOKIE)
|| (!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE))
packet_needs_cookie = false;
else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)
packet_needs_cookie = true;
+ else if (mac_state == VALID_MAC_WITH_COOKIE_BUT_RATELIMITED)
+ return WG_INPUT_ERROR_HANDSHAKE_RATELIMITED;
else
return WG_INPUT_ERROR_HANDSHAKE_MAC;
@@ -180,8 +236,16 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
if (packet_needs_cookie)
{
- // TODO: Add processing
+
+ if (!wg_send_handshake_cookie (vm, message->sender_index,
+ &wg_if->cookie_checker, macs,
+ &ip_addr_46 (&wg_if->src_ip),
+ wg_if->port, &src_ip, udp_src_port))
+ return WG_INPUT_ERROR_COOKIE_SEND;
+
+ return WG_INPUT_ERROR_NONE;
}
+
noise_remote_t *rp;
if (noise_consume_initiation
(vm, noise_local_get (wg_if->local_idx), &rp,
@@ -195,10 +259,11 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
return WG_INPUT_ERROR_PEER;
}
- // set_peer_address (peer, ip4_src, udp_src_port);
+ wg_peer_update_endpoint (rp->r_peer_idx, &src_ip, udp_src_port);
+
if (PREDICT_FALSE (!wg_send_handshake_response (vm, peer)))
{
- vlib_node_increment_counter (vm, wg_input_node.index,
+ vlib_node_increment_counter (vm, node_idx,
WG_INPUT_ERROR_HANDSHAKE_SEND, 1);
}
break;
@@ -206,13 +271,27 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
case MESSAGE_HANDSHAKE_RESPONSE:
{
message_handshake_response_t *resp = current_b_data;
+
+ if (packet_needs_cookie)
+ {
+ if (!wg_send_handshake_cookie (vm, resp->sender_index,
+ &wg_if->cookie_checker, macs,
+ &ip_addr_46 (&wg_if->src_ip),
+ wg_if->port, &src_ip, udp_src_port))
+ return WG_INPUT_ERROR_COOKIE_SEND;
+
+ return WG_INPUT_ERROR_NONE;
+ }
+
+ index_t peeri = INDEX_INVALID;
u32 *entry =
wg_index_table_lookup (&wmp->index_table, resp->receiver_index);
if (PREDICT_TRUE (entry != NULL))
{
- peer = wg_peer_get (*entry);
- if (peer->is_dead)
+ peeri = *entry;
+ peer = wg_peer_get (peeri);
+ if (wg_peer_is_dead (peer))
return WG_INPUT_ERROR_PEER;
}
else
@@ -225,12 +304,9 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
{
return WG_INPUT_ERROR_PEER;
}
- if (packet_needs_cookie)
- {
- // TODO: Add processing
- }
- // set_peer_address (peer, ip4_src, udp_src_port);
+ wg_peer_update_endpoint (peeri, &src_ip, udp_src_port);
+
if (noise_remote_begin_session (vm, &peer->remote))
{
@@ -238,9 +314,12 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
wg_timers_handshake_complete (peer);
if (PREDICT_FALSE (!wg_send_keepalive (vm, peer)))
{
- vlib_node_increment_counter (vm, wg_input_node.index,
- WG_INPUT_ERROR_KEEPALIVE_SEND,
- 1);
+ vlib_node_increment_counter (vm, node_idx,
+ WG_INPUT_ERROR_KEEPALIVE_SEND, 1);
+ }
+ else
+ {
+ wg_peer_update_flags (peeri, WG_PEER_ESTABLISHED, true);
}
}
break;
@@ -254,68 +333,450 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
return WG_INPUT_ERROR_NONE;
}
-static_always_inline bool
-fib_prefix_is_cover_addr_4 (const fib_prefix_t * p1,
- const ip4_address_t * ip4)
+static_always_inline int
+wg_input_post_process (vlib_main_t *vm, vlib_buffer_t *b, u16 *next,
+ wg_peer_t *peer, message_data_t *data,
+ bool *is_keepalive)
{
- switch (p1->fp_proto)
+ next[0] = WG_INPUT_NEXT_PUNT;
+ noise_keypair_t *kp;
+ vlib_buffer_t *lb;
+
+ if ((kp = wg_get_active_keypair (&peer->remote, data->receiver_index)) ==
+ NULL)
+ return -1;
+
+ if (!noise_counter_recv (&kp->kp_ctr, data->counter))
{
- case FIB_PROTOCOL_IP4:
- return (ip4_destination_matches_route (&ip4_main,
- &p1->fp_addr.ip4,
- ip4, p1->fp_len) != 0);
- case FIB_PROTOCOL_IP6:
- return (false);
- case FIB_PROTOCOL_MPLS:
- break;
+ return -1;
+ }
+
+ lb = b;
+ /* Find last buffer in the chain */
+ while (lb->flags & VLIB_BUFFER_NEXT_PRESENT)
+ lb = vlib_get_buffer (vm, lb->next_buffer);
+
+ u16 encr_len = vlib_buffer_length_in_chain (vm, b) - sizeof (message_data_t);
+ u16 decr_len = encr_len - NOISE_AUTHTAG_LEN;
+
+ vlib_buffer_advance (b, sizeof (message_data_t));
+ vlib_buffer_chain_increase_length (b, lb, -NOISE_AUTHTAG_LEN);
+ vnet_buffer_offload_flags_clear (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM);
+
+ /* Keepalive packet has zero length */
+ if (decr_len == 0)
+ {
+ *is_keepalive = true;
+ return 0;
+ }
+
+ wg_timers_data_received (peer);
+
+ ip46_address_t src_ip;
+ u8 is_ip4_inner = is_ip4_header (vlib_buffer_get_current (b));
+ if (is_ip4_inner)
+ {
+ ip46_address_set_ip4 (
+ &src_ip, &((ip4_header_t *) vlib_buffer_get_current (b))->src_address);
+ }
+ else
+ {
+ ip46_address_set_ip6 (
+ &src_ip, &((ip6_header_t *) vlib_buffer_get_current (b))->src_address);
}
- return (false);
+
+ const fib_prefix_t *allowed_ip;
+ bool allowed = false;
+
+ /*
+ * we could make this into an ACL, but the expectation
+ * is that there aren't many allowed IPs and thus a linear
+ * walk is faster than an ACL
+ */
+ vec_foreach (allowed_ip, peer->allowed_ips)
+ {
+ if (fib_prefix_is_cover_addr_46 (allowed_ip, &src_ip))
+ {
+ allowed = true;
+ break;
+ }
+ }
+ if (allowed)
+ {
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = peer->wg_sw_if_index;
+ next[0] =
+ is_ip4_inner ? WG_INPUT_NEXT_IP4_INPUT : WG_INPUT_NEXT_IP6_INPUT;
+ }
+
+ return 0;
}
-VLIB_NODE_FN (wg_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+static_always_inline void
+wg_input_process_ops (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_crypto_op_t *ops, vlib_buffer_t *b[], u16 *nexts,
+ u16 drop_next)
{
- message_type_t header_type;
- u32 n_left_from;
- u32 *from;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u16 nexts[VLIB_FRAME_SIZE], *next;
- u32 thread_index = vm->thread_index;
+ u32 n_fail, n_ops = vec_len (ops);
+ vnet_crypto_op_t *op = ops;
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- b = bufs;
- next = nexts;
+ if (n_ops == 0)
+ return;
- vlib_get_buffers (vm, from, bufs, n_left_from);
+ n_fail = n_ops - vnet_crypto_process_ops (vm, op, n_ops);
+
+ while (n_fail)
+ {
+ ASSERT (op - ops < n_ops);
+
+ if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+ {
+ u32 bi = op->user_data;
+ b[bi]->error = node->errors[WG_INPUT_ERROR_DECRYPTION];
+ nexts[bi] = drop_next;
+ n_fail--;
+ }
+ op++;
+ }
+}
+
+static_always_inline void
+wg_input_process_chained_ops (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_crypto_op_t *ops, vlib_buffer_t *b[],
+ u16 *nexts, vnet_crypto_op_chunk_t *chunks,
+ u16 drop_next)
+{
+ u32 n_fail, n_ops = vec_len (ops);
+ vnet_crypto_op_t *op = ops;
+ if (n_ops == 0)
+ return;
+
+ n_fail = n_ops - vnet_crypto_process_chained_ops (vm, op, chunks, n_ops);
+
+ while (n_fail)
+ {
+ ASSERT (op - ops < n_ops);
+
+ if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+ {
+ u32 bi = op->user_data;
+ b[bi]->error = node->errors[WG_INPUT_ERROR_DECRYPTION];
+ nexts[bi] = drop_next;
+ n_fail--;
+ }
+ op++;
+ }
+}
+
+static_always_inline void
+wg_input_chain_crypto (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+ vlib_buffer_t *b, vlib_buffer_t *lb, u8 *start,
+ u32 start_len, u16 *n_ch)
+{
+ vnet_crypto_op_chunk_t *ch;
+ vlib_buffer_t *cb = b;
+ u32 n_chunks = 1;
+
+ vec_add2 (ptd->chunks, ch, 1);
+ ch->len = start_len;
+ ch->src = ch->dst = start;
+ cb = vlib_get_buffer (vm, cb->next_buffer);
+
+ while (1)
+ {
+ vec_add2 (ptd->chunks, ch, 1);
+ n_chunks += 1;
+ if (lb == cb)
+ ch->len = cb->current_length - NOISE_AUTHTAG_LEN;
+ else
+ ch->len = cb->current_length;
+
+ ch->src = ch->dst = vlib_buffer_get_current (cb);
+
+ if (!(cb->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+
+ cb = vlib_get_buffer (vm, cb->next_buffer);
+ }
+
+ if (n_ch)
+ *n_ch = n_chunks;
+}
+
+always_inline void
+wg_prepare_sync_dec_op (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+ vlib_buffer_t *b, vlib_buffer_t *lb,
+ vnet_crypto_op_t **crypto_ops, u8 *src, u32 src_len,
+ u8 *dst, u8 *aad, u32 aad_len,
+ vnet_crypto_key_index_t key_index, u32 bi, u8 *iv)
+{
+ vnet_crypto_op_t _op, *op = &_op;
+ u8 src_[] = {};
+
+ vec_add2_aligned (crypto_ops[0], op, 1, CLIB_CACHE_LINE_BYTES);
+ vnet_crypto_op_init (op, VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC);
+
+ op->tag_len = NOISE_AUTHTAG_LEN;
+ op->tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN;
+ op->key_index = key_index;
+ op->aad = aad;
+ op->aad_len = aad_len;
+ op->iv = iv;
+ op->user_data = bi;
+ op->flags |= VNET_CRYPTO_OP_FLAG_HMAC_CHECK;
+
+ if (b != lb)
+ {
+ /* Chained buffers */
+ op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
+ op->chunk_index = vec_len (ptd->chunks);
+ wg_input_chain_crypto (vm, ptd, b, lb, src, src_len + NOISE_AUTHTAG_LEN,
+ &op->n_chunks);
+ }
+ else
+ {
+ op->src = !src ? src_ : src;
+ op->len = src_len;
+ op->dst = dst;
+ }
+}
+
+static_always_inline void
+wg_input_add_to_frame (vlib_main_t *vm, vnet_crypto_async_frame_t *f,
+ u32 key_index, u32 crypto_len, i16 crypto_start_offset,
+ u32 buffer_index, u16 next_node, u8 *iv, u8 *tag,
+ u8 flags)
+{
+ vnet_crypto_async_frame_elt_t *fe;
+ u16 index;
+
+ ASSERT (f->n_elts < VNET_CRYPTO_FRAME_SIZE);
+
+ index = f->n_elts;
+ fe = &f->elts[index];
+ f->n_elts++;
+ fe->key_index = key_index;
+ fe->crypto_total_length = crypto_len;
+ fe->crypto_start_offset = crypto_start_offset;
+ fe->iv = iv;
+ fe->tag = tag;
+ fe->flags = flags;
+ f->buffer_indices[index] = buffer_index;
+ f->next_node_index[index] = next_node;
+}
+
+static_always_inline enum noise_state_crypt
+wg_input_process (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+ vnet_crypto_op_t **crypto_ops,
+ vnet_crypto_async_frame_t **async_frame, vlib_buffer_t *b,
+ vlib_buffer_t *lb, u32 buf_idx, noise_remote_t *r,
+ uint32_t r_idx, uint64_t nonce, uint8_t *src, size_t srclen,
+ size_t srclen_total, uint8_t *dst, u32 from_idx, u8 *iv,
+ f64 time, u8 is_async, u16 async_next_node)
+{
+ noise_keypair_t *kp;
+ enum noise_state_crypt ret = SC_FAILED;
+
+ if ((kp = wg_get_active_keypair (r, r_idx)) == NULL)
+ {
+ goto error;
+ }
+
+ /* We confirm that our values are within our tolerances. These values
+ * are the same as the encrypt routine.
+ *
+ * kp_ctr isn't locked here, we're happy to accept a racy read. */
+ if (wg_birthdate_has_expired_opt (kp->kp_birthdate, REJECT_AFTER_TIME,
+ time) ||
+ kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES)
+ goto error;
+
+ /* Decrypt, then validate the counter. We don't want to validate the
+ * counter before decrypting as we do not know the message is authentic
+ * prior to decryption. */
+
+ clib_memset (iv, 0, 4);
+ clib_memcpy (iv + 4, &nonce, sizeof (nonce));
+
+ if (is_async)
+ {
+ u8 flags = VNET_CRYPTO_OP_FLAG_HMAC_CHECK;
+ u8 *tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN;
+
+ if (b != lb)
+ flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
+
+ if (NULL == *async_frame ||
+ vnet_crypto_async_frame_is_full (*async_frame))
+ {
+ *async_frame = vnet_crypto_async_get_frame (
+ vm, VNET_CRYPTO_OP_CHACHA20_POLY1305_TAG16_AAD0_DEC);
+ if (PREDICT_FALSE (NULL == *async_frame))
+ goto error;
+ /* Save the frame to the list we'll submit at the end */
+ vec_add1 (ptd->async_frames, *async_frame);
+ }
+
+ wg_input_add_to_frame (vm, *async_frame, kp->kp_recv_index, srclen_total,
+ src - b->data, buf_idx, async_next_node, iv, tag,
+ flags);
+ }
+ else
+ {
+ wg_prepare_sync_dec_op (vm, ptd, b, lb, crypto_ops, src, srclen, dst,
+ NULL, 0, kp->kp_recv_index, from_idx, iv);
+ }
+
+ /* If we've received the handshake confirming data packet then move the
+ * next keypair into current. If we do slide the next keypair in, then
+ * we skip the REKEY_AFTER_TIME_RECV check. This is safe to do as a
+ * data packet can't confirm a session that we are an INITIATOR of. */
+ if (kp == r->r_next)
+ {
+ clib_rwlock_writer_lock (&r->r_keypair_lock);
+ if (kp == r->r_next && kp->kp_local_index == r_idx)
+ {
+ noise_remote_keypair_free (vm, r, &r->r_previous);
+ r->r_previous = r->r_current;
+ r->r_current = r->r_next;
+ r->r_next = NULL;
+
+ ret = SC_CONN_RESET;
+ clib_rwlock_writer_unlock (&r->r_keypair_lock);
+ goto error;
+ }
+ clib_rwlock_writer_unlock (&r->r_keypair_lock);
+ }
+
+ /* Similar to when we encrypt, we want to notify the caller when we
+ * are approaching our tolerances. We notify if:
+ * - we're the initiator and the current keypair is older than
+ * REKEY_AFTER_TIME_RECV seconds. */
+ ret = SC_KEEP_KEY_FRESH;
+ kp = r->r_current;
+ if (kp != NULL && kp->kp_valid && kp->kp_is_initiator &&
+ wg_birthdate_has_expired_opt (kp->kp_birthdate, REKEY_AFTER_TIME_RECV,
+ time))
+ goto error;
+
+ ret = SC_OK;
+error:
+ return ret;
+}
+
+static_always_inline void
+wg_find_outer_addr_port (vlib_buffer_t *b, ip46_address_t *addr, u16 *port,
+ u8 is_ip4)
+{
+ if (is_ip4)
+ {
+ ip4_udp_header_t *ip4_udp_hdr =
+ vlib_buffer_get_current (b) - sizeof (ip4_udp_header_t);
+ ip46_address_set_ip4 (addr, &ip4_udp_hdr->ip4.src_address);
+ *port = clib_net_to_host_u16 (ip4_udp_hdr->udp.src_port);
+ }
+ else
+ {
+ ip6_udp_header_t *ip6_udp_hdr =
+ vlib_buffer_get_current (b) - sizeof (ip6_udp_header_t);
+ ip46_address_set_ip6 (addr, &ip6_udp_hdr->ip6.src_address);
+ *port = clib_net_to_host_u16 (ip6_udp_hdr->udp.src_port);
+ }
+}
+
+always_inline uword
+wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, u8 is_ip4, u16 async_next_node)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
wg_main_t *wmp = &wg_main;
+ wg_per_thread_data_t *ptd =
+ vec_elt_at_index (wmp->per_thread_data, vm->thread_index);
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from = frame->n_vectors;
+
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ vlib_buffer_t *lb;
+ u32 thread_index = vm->thread_index;
+ vnet_crypto_op_t **crypto_ops;
+ const u16 drop_next = WG_INPUT_NEXT_PUNT;
+ message_type_t header_type;
+ vlib_buffer_t *data_bufs[VLIB_FRAME_SIZE];
+ u32 data_bi[VLIB_FRAME_SIZE]; /* buffer index for data */
+ u32 other_bi[VLIB_FRAME_SIZE]; /* buffer index for drop or handoff */
+ u16 other_nexts[VLIB_FRAME_SIZE], *other_next = other_nexts, n_other = 0;
+ u16 data_nexts[VLIB_FRAME_SIZE], *data_next = data_nexts, n_data = 0;
+ u16 n_async = 0;
+ const u8 is_async = wg_op_mode_is_set_ASYNC ();
+ vnet_crypto_async_frame_t *async_frame = NULL;
+
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+ vec_reset_length (ptd->crypto_ops);
+ vec_reset_length (ptd->chained_crypto_ops);
+ vec_reset_length (ptd->chunks);
+ vec_reset_length (ptd->async_frames);
+
+ f64 time = clib_time_now (&vm->clib_time) + vm->time_offset;
+
wg_peer_t *peer = NULL;
+ u32 *last_peer_time_idx = NULL;
+ u32 last_rec_idx = ~0;
+
+ bool is_keepalive = false;
+ u32 *peer_idx = NULL;
+ index_t peeri = INDEX_INVALID;
while (n_left_from > 0)
{
- bool is_keepalive = false;
- next[0] = WG_INPUT_NEXT_PUNT;
+ if (n_left_from > 2)
+ {
+ u8 *p;
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ p = vlib_buffer_get_current (b[1]);
+ CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (vlib_buffer_get_tail (b[1]), CLIB_CACHE_LINE_BYTES,
+ LOAD);
+ }
+
+ other_next[n_other] = WG_INPUT_NEXT_PUNT;
+ data_nexts[n_data] = WG_INPUT_N_NEXT;
+
header_type =
((message_header_t *) vlib_buffer_get_current (b[0]))->type;
- u32 *peer_idx;
if (PREDICT_TRUE (header_type == MESSAGE_DATA))
{
message_data_t *data = vlib_buffer_get_current (b[0]);
-
+ u8 *iv_data = b[0]->pre_data;
+ u32 buf_idx = from[b - bufs];
+ u32 n_bufs;
peer_idx = wg_index_table_lookup (&wmp->index_table,
data->receiver_index);
- if (peer_idx)
+ if (data->receiver_index != last_rec_idx)
{
- peer = wg_peer_get (*peer_idx);
+ peer_idx = wg_index_table_lookup (&wmp->index_table,
+ data->receiver_index);
+ if (PREDICT_TRUE (peer_idx != NULL))
+ {
+ peeri = *peer_idx;
+ peer = wg_peer_get (peeri);
+ last_rec_idx = data->receiver_index;
+ }
+ else
+ {
+ peer = NULL;
+ last_rec_idx = ~0;
+ }
}
- else
+
+ if (PREDICT_FALSE (!peer_idx))
{
- next[0] = WG_INPUT_NEXT_ERROR;
+ other_next[n_other] = WG_INPUT_NEXT_ERROR;
b[0]->error = node->errors[WG_INPUT_ERROR_PEER];
+ other_bi[n_other] = buf_idx;
+ n_other += 1;
goto out;
}
@@ -330,128 +791,445 @@ VLIB_NODE_FN (wg_input_node) (vlib_main_t * vm,
if (PREDICT_TRUE (thread_index != peer->input_thread_index))
{
- next[0] = WG_INPUT_NEXT_HANDOFF_DATA;
+ other_next[n_other] = WG_INPUT_NEXT_HANDOFF_DATA;
+ other_bi[n_other] = buf_idx;
+ n_other += 1;
goto next;
}
- u16 encr_len = b[0]->current_length - sizeof (message_data_t);
- u16 decr_len = encr_len - NOISE_AUTHTAG_LEN;
- if (PREDICT_FALSE (decr_len >= WG_DEFAULT_DATA_SIZE))
+ lb = b[0];
+ n_bufs = vlib_buffer_chain_linearize (vm, b[0]);
+ if (n_bufs == 0)
{
- b[0]->error = node->errors[WG_INPUT_ERROR_TOO_BIG];
+ other_next[n_other] = WG_INPUT_NEXT_ERROR;
+ b[0]->error = node->errors[WG_INPUT_ERROR_NO_BUFFERS];
+ other_bi[n_other] = buf_idx;
+ n_other += 1;
goto out;
}
- u8 *decr_data = wmp->per_thread_data[thread_index].data;
+ if (n_bufs > 1)
+ {
+ vlib_buffer_t *before_last = b[0];
+
+ /* Find last and before last buffer in the chain */
+ while (lb->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ before_last = lb;
+ lb = vlib_get_buffer (vm, lb->next_buffer);
+ }
+
+ /* Ensure auth tag is contiguous and not splitted into two last
+ * buffers */
+ if (PREDICT_FALSE (lb->current_length < NOISE_AUTHTAG_LEN))
+ {
+ u32 len_diff = NOISE_AUTHTAG_LEN - lb->current_length;
+
+ before_last->current_length -= len_diff;
+ if (before_last == b[0])
+ before_last->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ vlib_buffer_advance (lb, (signed) -len_diff);
+
+ clib_memcpy_fast (vlib_buffer_get_current (lb),
+ vlib_buffer_get_tail (before_last),
+ len_diff);
+ }
+ }
+
+ u16 encr_len = b[0]->current_length - sizeof (message_data_t);
+ u16 decr_len = encr_len - NOISE_AUTHTAG_LEN;
+ u16 encr_len_total =
+ vlib_buffer_length_in_chain (vm, b[0]) - sizeof (message_data_t);
+ u16 decr_len_total = encr_len_total - NOISE_AUTHTAG_LEN;
+
+ if (lb != b[0])
+ crypto_ops = &ptd->chained_crypto_ops;
+ else
+ crypto_ops = &ptd->crypto_ops;
- enum noise_state_crypt state_cr = noise_remote_decrypt (vm,
- &peer->remote,
- data->receiver_index,
- data->counter,
- data->encrypted_data,
- encr_len,
- decr_data);
+ enum noise_state_crypt state_cr =
+ wg_input_process (vm, ptd, crypto_ops, &async_frame, b[0], lb,
+ buf_idx, &peer->remote, data->receiver_index,
+ data->counter, data->encrypted_data, decr_len,
+ decr_len_total, data->encrypted_data, n_data,
+ iv_data, time, is_async, async_next_node);
- if (PREDICT_FALSE (state_cr == SC_CONN_RESET))
+ if (PREDICT_FALSE (state_cr == SC_FAILED))
{
- wg_timers_handshake_complete (peer);
+ wg_peer_update_flags (*peer_idx, WG_PEER_ESTABLISHED, false);
+ other_next[n_other] = WG_INPUT_NEXT_ERROR;
+ b[0]->error = node->errors[WG_INPUT_ERROR_DECRYPTION];
+ other_bi[n_other] = buf_idx;
+ n_other += 1;
+ goto out;
}
- else if (PREDICT_FALSE (state_cr == SC_KEEP_KEY_FRESH))
+ if (!is_async)
{
- wg_send_handshake_from_mt (*peer_idx, false);
+ data_bufs[n_data] = b[0];
+ data_bi[n_data] = buf_idx;
+ n_data += 1;
}
- else if (PREDICT_FALSE (state_cr == SC_FAILED))
+ else
{
- next[0] = WG_INPUT_NEXT_ERROR;
- b[0]->error = node->errors[WG_INPUT_ERROR_DECRYPTION];
- goto out;
+ n_async += 1;
}
- clib_memcpy (vlib_buffer_get_current (b[0]), decr_data, decr_len);
- b[0]->current_length = decr_len;
- vnet_buffer_offload_flags_clear (b[0],
- VNET_BUFFER_OFFLOAD_F_UDP_CKSUM);
-
- wg_timers_any_authenticated_packet_received (peer);
- wg_timers_any_authenticated_packet_traversal (peer);
-
- /* Keepalive packet has zero length */
- if (decr_len == 0)
+ if (PREDICT_FALSE (state_cr == SC_CONN_RESET))
{
- is_keepalive = true;
- goto out;
+ wg_timers_handshake_complete (peer);
+ goto next;
}
-
- wg_timers_data_received (peer);
-
- ip4_header_t *iph = vlib_buffer_get_current (b[0]);
-
- const wg_peer_allowed_ip_t *allowed_ip;
- bool allowed = false;
-
- /*
- * we could make this into an ACL, but the expectation
- * is that there aren't many allowed IPs and thus a linear
- * walk is fater than an ACL
- */
- vec_foreach (allowed_ip, peer->allowed_ips)
- {
- if (fib_prefix_is_cover_addr_4 (&allowed_ip->prefix,
- &iph->src_address))
- {
- allowed = true;
- break;
- }
- }
- if (allowed)
+ else if (PREDICT_FALSE (state_cr == SC_KEEP_KEY_FRESH))
{
- vnet_buffer (b[0])->sw_if_index[VLIB_RX] = peer->wg_sw_if_index;
- next[0] = WG_INPUT_NEXT_IP4_INPUT;
+ wg_send_handshake_from_mt (peeri, false);
+ goto next;
}
+ else if (PREDICT_TRUE (state_cr == SC_OK))
+ goto next;
}
else
{
- peer_idx = NULL;
-
/* Handshake packets should be processed in main thread */
if (thread_index != 0)
{
- next[0] = WG_INPUT_NEXT_HANDOFF_HANDSHAKE;
+ other_next[n_other] = WG_INPUT_NEXT_HANDOFF_HANDSHAKE;
+ other_bi[n_other] = from[b - bufs];
+ n_other += 1;
goto next;
}
- wg_input_error_t ret = wg_handshake_process (vm, wmp, b[0]);
+ wg_input_error_t ret =
+ wg_handshake_process (vm, wmp, b[0], node->node_index, is_ip4);
if (ret != WG_INPUT_ERROR_NONE)
{
- next[0] = WG_INPUT_NEXT_ERROR;
+ other_next[n_other] = WG_INPUT_NEXT_ERROR;
b[0]->error = node->errors[ret];
+ other_bi[n_other] = from[b - bufs];
+ n_other += 1;
+ }
+ else
+ {
+ other_bi[n_other] = from[b - bufs];
+ n_other += 1;
}
}
out:
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
{
wg_input_trace_t *t = vlib_add_trace (vm, node, b[0], sizeof (*t));
t->type = header_type;
t->current_length = b[0]->current_length;
t->is_keepalive = is_keepalive;
- t->peer = peer_idx ? *peer_idx : INDEX_INVALID;
+ t->peer = peer_idx ? peeri : INDEX_INVALID;
}
+
next:
n_left_from -= 1;
- next += 1;
b += 1;
}
- vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+
+ /* decrypt packets */
+ wg_input_process_ops (vm, node, ptd->crypto_ops, data_bufs, data_nexts,
+ drop_next);
+ wg_input_process_chained_ops (vm, node, ptd->chained_crypto_ops, data_bufs,
+ data_nexts, ptd->chunks, drop_next);
+
+ /* process after decryption */
+ b = data_bufs;
+ n_left_from = n_data;
+ last_rec_idx = ~0;
+ last_peer_time_idx = NULL;
+
+ while (n_left_from > 0)
+ {
+ bool is_keepalive = false;
+ u32 *peer_idx = NULL;
+
+ if (PREDICT_FALSE (data_next[0] == WG_INPUT_NEXT_PUNT))
+ {
+ goto trace;
+ }
+ if (n_left_from > 2)
+ {
+ u8 *p;
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ p = vlib_buffer_get_current (b[1]);
+ CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (vlib_buffer_get_tail (b[1]), CLIB_CACHE_LINE_BYTES,
+ LOAD);
+ }
+
+ message_data_t *data = vlib_buffer_get_current (b[0]);
+ ip46_address_t out_src_ip;
+ u16 out_udp_src_port;
+
+ wg_find_outer_addr_port (b[0], &out_src_ip, &out_udp_src_port, is_ip4);
+
+ if (data->receiver_index != last_rec_idx)
+ {
+ peer_idx =
+ wg_index_table_lookup (&wmp->index_table, data->receiver_index);
+ if (PREDICT_TRUE (peer_idx != NULL))
+ {
+ peeri = *peer_idx;
+ peer = wg_peer_get (peeri);
+ last_rec_idx = data->receiver_index;
+ }
+ else
+ {
+ peer = NULL;
+ last_rec_idx = ~0;
+ }
+ }
+
+ if (PREDICT_TRUE (peer != NULL))
+ {
+ if (PREDICT_FALSE (wg_input_post_process (vm, b[0], data_next, peer,
+ data, &is_keepalive) < 0))
+ goto trace;
+ }
+ else
+ {
+ data_next[0] = WG_INPUT_NEXT_PUNT;
+ goto trace;
+ }
+
+ if (PREDICT_FALSE (peer_idx && (last_peer_time_idx != peer_idx)))
+ {
+ if (PREDICT_FALSE (
+ !ip46_address_is_equal (&peer->dst.addr, &out_src_ip) ||
+ peer->dst.port != out_udp_src_port))
+ wg_peer_update_endpoint_from_mt (peeri, &out_src_ip,
+ out_udp_src_port);
+ wg_timers_any_authenticated_packet_received_opt (peer, time);
+ wg_timers_any_authenticated_packet_traversal (peer);
+ wg_peer_update_flags (*peer_idx, WG_PEER_ESTABLISHED, true);
+ last_peer_time_idx = peer_idx;
+ }
+
+ vlib_increment_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_RX,
+ vm->thread_index, peer->wg_sw_if_index,
+ 1 /* packets */, b[0]->current_length);
+
+ trace:
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ wg_input_trace_t *t = vlib_add_trace (vm, node, b[0], sizeof (*t));
+ t->type = header_type;
+ t->current_length = b[0]->current_length;
+ t->is_keepalive = is_keepalive;
+ t->peer = peer_idx ? peeri : INDEX_INVALID;
+ }
+
+ b += 1;
+ n_left_from -= 1;
+ data_next += 1;
+ }
+
+ if (n_async)
+ {
+ /* submit all of the open frames */
+ vnet_crypto_async_frame_t **async_frame;
+ vec_foreach (async_frame, ptd->async_frames)
+ {
+ if (PREDICT_FALSE (
+ vnet_crypto_async_submit_open_frame (vm, *async_frame) < 0))
+ {
+ u32 n_drop = (*async_frame)->n_elts;
+ u32 *bi = (*async_frame)->buffer_indices;
+ u16 index = n_other;
+ while (n_drop--)
+ {
+ other_bi[index] = bi[0];
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi[0]);
+ other_nexts[index] = drop_next;
+ b->error = node->errors[WG_INPUT_ERROR_CRYPTO_ENGINE_ERROR];
+ bi++;
+ index++;
+ }
+ n_other += (*async_frame)->n_elts;
+
+ vnet_crypto_async_reset_frame (*async_frame);
+ vnet_crypto_async_free_frame (vm, *async_frame);
+ }
+ }
+ }
+
+ /* enqueue other bufs */
+ if (n_other)
+ vlib_buffer_enqueue_to_next (vm, node, other_bi, other_next, n_other);
+
+ /* enqueue data bufs */
+ if (n_data)
+ vlib_buffer_enqueue_to_next (vm, node, data_bi, data_nexts, n_data);
return frame->n_vectors;
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (wg_input_node) =
+always_inline uword
+wg_input_post (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame,
+ u8 is_ip4)
{
- .name = "wg-input",
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ wg_main_t *wmp = &wg_main;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left = frame->n_vectors;
+ wg_peer_t *peer = NULL;
+ u32 *peer_idx = NULL;
+ u32 *last_peer_time_idx = NULL;
+ index_t peeri = INDEX_INVALID;
+ u32 last_rec_idx = ~0;
+ f64 time = clib_time_now (&vm->clib_time) + vm->time_offset;
+
+ vlib_get_buffers (vm, from, b, n_left);
+
+ if (n_left >= 2)
+ {
+ vlib_prefetch_buffer_header (b[0], LOAD);
+ vlib_prefetch_buffer_header (b[1], LOAD);
+ }
+
+ while (n_left > 0)
+ {
+ if (n_left > 2)
+ {
+ u8 *p;
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ p = vlib_buffer_get_current (b[1]);
+ CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bool is_keepalive = false;
+ message_data_t *data = vlib_buffer_get_current (b[0]);
+ ip46_address_t out_src_ip;
+ u16 out_udp_src_port;
+
+ wg_find_outer_addr_port (b[0], &out_src_ip, &out_udp_src_port, is_ip4);
+
+ if (data->receiver_index != last_rec_idx)
+ {
+ peer_idx =
+ wg_index_table_lookup (&wmp->index_table, data->receiver_index);
+
+ if (PREDICT_TRUE (peer_idx != NULL))
+ {
+ peeri = *peer_idx;
+ peer = wg_peer_get (peeri);
+ last_rec_idx = data->receiver_index;
+ }
+ else
+ {
+ peer = NULL;
+ last_rec_idx = ~0;
+ }
+ }
+
+ if (PREDICT_TRUE (peer != NULL))
+ {
+ if (PREDICT_FALSE (wg_input_post_process (vm, b[0], next, peer, data,
+ &is_keepalive) < 0))
+ goto trace;
+ }
+ else
+ {
+ next[0] = WG_INPUT_NEXT_PUNT;
+ goto trace;
+ }
+
+ if (PREDICT_FALSE (peer_idx && (last_peer_time_idx != peer_idx)))
+ {
+ if (PREDICT_FALSE (
+ !ip46_address_is_equal (&peer->dst.addr, &out_src_ip) ||
+ peer->dst.port != out_udp_src_port))
+ wg_peer_update_endpoint_from_mt (peeri, &out_src_ip,
+ out_udp_src_port);
+ wg_timers_any_authenticated_packet_received_opt (peer, time);
+ wg_timers_any_authenticated_packet_traversal (peer);
+ wg_peer_update_flags (*peer_idx, WG_PEER_ESTABLISHED, true);
+ last_peer_time_idx = peer_idx;
+ }
+
+ vlib_increment_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_RX,
+ vm->thread_index, peer->wg_sw_if_index,
+ 1 /* packets */, b[0]->current_length);
+
+ trace:
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ wg_input_post_trace_t *t =
+ vlib_add_trace (vm, node, b[0], sizeof (*t));
+ t->next = next[0];
+ t->peer = peer_idx ? peeri : INDEX_INVALID;
+ }
+
+ b += 1;
+ next += 1;
+ n_left -= 1;
+ }
+
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (wg4_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return wg_input_inline (vm, node, frame, /* is_ip4 */ 1,
+ wg_decrypt_async_next.wg4_post_next);
+}
+
+VLIB_NODE_FN (wg6_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return wg_input_inline (vm, node, frame, /* is_ip4 */ 0,
+ wg_decrypt_async_next.wg6_post_next);
+}
+
+VLIB_NODE_FN (wg4_input_post_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return wg_input_post (vm, node, from_frame, /* is_ip4 */ 1);
+}
+
+VLIB_NODE_FN (wg6_input_post_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return wg_input_post (vm, node, from_frame, /* is_ip4 */ 0);
+}
+
+VLIB_REGISTER_NODE (wg4_input_node) =
+{
+ .name = "wg4-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (wg_input_error_strings),
+ .error_strings = wg_input_error_strings,
+ .n_next_nodes = WG_INPUT_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [WG_INPUT_NEXT_HANDOFF_HANDSHAKE] = "wg4-handshake-handoff",
+ [WG_INPUT_NEXT_HANDOFF_DATA] = "wg4-input-data-handoff",
+ [WG_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+ [WG_INPUT_NEXT_IP6_INPUT] = "ip6-input",
+ [WG_INPUT_NEXT_PUNT] = "error-punt",
+ [WG_INPUT_NEXT_ERROR] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (wg6_input_node) =
+{
+ .name = "wg6-input",
.vector_size = sizeof (u32),
.format_trace = format_wg_input_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
@@ -460,14 +1238,37 @@ VLIB_REGISTER_NODE (wg_input_node) =
.n_next_nodes = WG_INPUT_N_NEXT,
/* edit / add dispositions here */
.next_nodes = {
- [WG_INPUT_NEXT_HANDOFF_HANDSHAKE] = "wg-handshake-handoff",
- [WG_INPUT_NEXT_HANDOFF_DATA] = "wg-input-data-handoff",
+ [WG_INPUT_NEXT_HANDOFF_HANDSHAKE] = "wg6-handshake-handoff",
+ [WG_INPUT_NEXT_HANDOFF_DATA] = "wg6-input-data-handoff",
[WG_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+ [WG_INPUT_NEXT_IP6_INPUT] = "ip6-input",
[WG_INPUT_NEXT_PUNT] = "error-punt",
[WG_INPUT_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
+
+VLIB_REGISTER_NODE (wg4_input_post_node) = {
+ .name = "wg4-input-post-node",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_input_post_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .sibling_of = "wg4-input",
+
+ .n_errors = ARRAY_LEN (wg_input_error_strings),
+ .error_strings = wg_input_error_strings,
+};
+
+VLIB_REGISTER_NODE (wg6_input_post_node) = {
+ .name = "wg6-input-post-node",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_input_post_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .sibling_of = "wg6-input",
+
+ .n_errors = ARRAY_LEN (wg_input_error_strings),
+ .error_strings = wg_input_error_strings,
+};
+
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/wireguard/wireguard_key.c b/src/plugins/wireguard/wireguard_key.c
index 1ef1d8bf743..1ef1d8bf743 100755..100644
--- a/src/plugins/wireguard/wireguard_key.c
+++ b/src/plugins/wireguard/wireguard_key.c
diff --git a/src/plugins/wireguard/wireguard_key.h b/src/plugins/wireguard/wireguard_key.h
index ed96fb1da91..ed96fb1da91 100755..100644
--- a/src/plugins/wireguard/wireguard_key.h
+++ b/src/plugins/wireguard/wireguard_key.h
diff --git a/src/plugins/wireguard/wireguard_messages.h b/src/plugins/wireguard/wireguard_messages.h
index 3587c5c8a45..3587c5c8a45 100755..100644
--- a/src/plugins/wireguard/wireguard_messages.h
+++ b/src/plugins/wireguard/wireguard_messages.h
diff --git a/src/plugins/wireguard/wireguard_noise.c b/src/plugins/wireguard/wireguard_noise.c
index 850be2c86c8..5fe2e44b03b 100755..100644
--- a/src/plugins/wireguard/wireguard_noise.c
+++ b/src/plugins/wireguard/wireguard_noise.c
@@ -17,6 +17,7 @@
#include <openssl/hmac.h>
#include <wireguard/wireguard.h>
+#include <wireguard/wireguard_chachapoly.h>
/* This implements Noise_IKpsk2:
*
@@ -32,11 +33,13 @@ noise_local_t *noise_local_pool;
static noise_keypair_t *noise_remote_keypair_allocate (noise_remote_t *);
static void noise_remote_keypair_free (vlib_main_t * vm, noise_remote_t *,
noise_keypair_t **);
-static uint32_t noise_remote_handshake_index_get (noise_remote_t *);
-static void noise_remote_handshake_index_drop (noise_remote_t *);
+static uint32_t noise_remote_handshake_index_get (vlib_main_t *vm,
+ noise_remote_t *);
+static void noise_remote_handshake_index_drop (vlib_main_t *vm,
+ noise_remote_t *);
static uint64_t noise_counter_send (noise_counter_t *);
-static bool noise_counter_recv (noise_counter_t *, uint64_t);
+bool noise_counter_recv (noise_counter_t *, uint64_t);
static void noise_kdf (uint8_t *, uint8_t *, uint8_t *, const uint8_t *,
size_t, size_t, size_t, size_t,
@@ -67,8 +70,6 @@ static void noise_msg_ephemeral (uint8_t[NOISE_HASH_LEN],
static void noise_tai64n_now (uint8_t[NOISE_TIMESTAMP_LEN]);
-static void secure_zero_memory (void *v, size_t n);
-
/* Set/Get noise parameters */
void
noise_local_init (noise_local_t * l, struct noise_upcall *upcall)
@@ -87,7 +88,7 @@ noise_local_set_private (noise_local_t * l,
}
void
-noise_remote_init (noise_remote_t * r, uint32_t peer_pool_idx,
+noise_remote_init (vlib_main_t *vm, noise_remote_t *r, uint32_t peer_pool_idx,
const uint8_t public[NOISE_PUBLIC_KEY_LEN],
u32 noise_local_idx)
{
@@ -98,19 +99,19 @@ noise_remote_init (noise_remote_t * r, uint32_t peer_pool_idx,
r->r_local_idx = noise_local_idx;
r->r_handshake.hs_state = HS_ZEROED;
- noise_remote_precompute (r);
+ noise_remote_precompute (vm, r);
}
void
-noise_remote_precompute (noise_remote_t * r)
+noise_remote_precompute (vlib_main_t *vm, noise_remote_t *r)
{
noise_local_t *l = noise_local_get (r->r_local_idx);
if (!curve25519_gen_shared (r->r_ss, l->l_private, r->r_public))
clib_memset (r->r_ss, 0, NOISE_PUBLIC_KEY_LEN);
- noise_remote_handshake_index_drop (r);
- secure_zero_memory (&r->r_handshake, sizeof (r->r_handshake));
+ noise_remote_handshake_index_drop (vm, r);
+ wg_secure_zero_memory (&r->r_handshake, sizeof (r->r_handshake));
}
/* Handshake functions */
@@ -122,7 +123,7 @@ noise_create_initiation (vlib_main_t * vm, noise_remote_t * r,
{
noise_handshake_t *hs = &r->r_handshake;
noise_local_t *l = noise_local_get (r->r_local_idx);
- uint8_t _key[NOISE_SYMMETRIC_KEY_LEN];
+ uint8_t _key[NOISE_SYMMETRIC_KEY_LEN] = { 0 };
uint32_t key_idx;
uint8_t *key;
int ret = false;
@@ -143,6 +144,7 @@ noise_create_initiation (vlib_main_t * vm, noise_remote_t * r,
/* es */
if (!noise_mix_dh (hs->hs_ck, key, hs->hs_e, r->r_public))
goto error;
+ vnet_crypto_key_update (vm, key_idx);
/* s */
noise_msg_encrypt (vm, es, l->l_public, NOISE_PUBLIC_KEY_LEN, key_idx,
@@ -151,17 +153,18 @@ noise_create_initiation (vlib_main_t * vm, noise_remote_t * r,
/* ss */
if (!noise_mix_ss (hs->hs_ck, key, r->r_ss))
goto error;
+ vnet_crypto_key_update (vm, key_idx);
/* {t} */
noise_tai64n_now (ets);
noise_msg_encrypt (vm, ets, ets, NOISE_TIMESTAMP_LEN, key_idx, hs->hs_hash);
- noise_remote_handshake_index_drop (r);
+ noise_remote_handshake_index_drop (vm, r);
hs->hs_state = CREATED_INITIATION;
- hs->hs_local_index = noise_remote_handshake_index_get (r);
+ hs->hs_local_index = noise_remote_handshake_index_get (vm, r);
*s_idx = hs->hs_local_index;
ret = true;
error:
- secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
+ wg_secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
vnet_crypto_key_del (vm, key_idx);
return ret;
}
@@ -177,9 +180,9 @@ noise_consume_initiation (vlib_main_t * vm, noise_local_t * l,
{
noise_remote_t *r;
noise_handshake_t hs;
- uint8_t _key[NOISE_SYMMETRIC_KEY_LEN];
- uint8_t r_public[NOISE_PUBLIC_KEY_LEN];
- uint8_t timestamp[NOISE_TIMESTAMP_LEN];
+ uint8_t _key[NOISE_SYMMETRIC_KEY_LEN] = { 0 };
+ uint8_t r_public[NOISE_PUBLIC_KEY_LEN] = { 0 };
+ uint8_t timestamp[NOISE_TIMESTAMP_LEN] = { 0 };
u32 key_idx;
uint8_t *key;
int ret = false;
@@ -197,6 +200,7 @@ noise_consume_initiation (vlib_main_t * vm, noise_local_t * l,
/* es */
if (!noise_mix_dh (hs.hs_ck, key, l->l_private, ue))
goto error;
+ vnet_crypto_key_update (vm, key_idx);
/* s */
@@ -212,6 +216,7 @@ noise_consume_initiation (vlib_main_t * vm, noise_local_t * l,
/* ss */
if (!noise_mix_ss (hs.hs_ck, key, r->r_ss))
goto error;
+ vnet_crypto_key_update (vm, key_idx);
/* {t} */
if (!noise_msg_decrypt (vm, timestamp, ets,
@@ -238,15 +243,15 @@ noise_consume_initiation (vlib_main_t * vm, noise_local_t * l,
goto error;
/* Ok, we're happy to accept this initiation now */
- noise_remote_handshake_index_drop (r);
+ noise_remote_handshake_index_drop (vm, r);
r->r_handshake = hs;
*rp = r;
ret = true;
error:
- secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
+ wg_secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
vnet_crypto_key_del (vm, key_idx);
- secure_zero_memory (&hs, sizeof (hs));
+ wg_secure_zero_memory (&hs, sizeof (hs));
return ret;
}
@@ -256,8 +261,8 @@ noise_create_response (vlib_main_t * vm, noise_remote_t * r, uint32_t * s_idx,
uint8_t en[0 + NOISE_AUTHTAG_LEN])
{
noise_handshake_t *hs = &r->r_handshake;
- uint8_t _key[NOISE_SYMMETRIC_KEY_LEN];
- uint8_t e[NOISE_PUBLIC_KEY_LEN];
+ uint8_t _key[NOISE_SYMMETRIC_KEY_LEN] = { 0 };
+ uint8_t e[NOISE_PUBLIC_KEY_LEN] = { 0 };
uint32_t key_idx;
uint8_t *key;
int ret = false;
@@ -286,20 +291,21 @@ noise_create_response (vlib_main_t * vm, noise_remote_t * r, uint32_t * s_idx,
/* psk */
noise_mix_psk (hs->hs_ck, hs->hs_hash, key, r->r_psk);
+ vnet_crypto_key_update (vm, key_idx);
/* {} */
noise_msg_encrypt (vm, en, NULL, 0, key_idx, hs->hs_hash);
hs->hs_state = CREATED_RESPONSE;
- hs->hs_local_index = noise_remote_handshake_index_get (r);
+ hs->hs_local_index = noise_remote_handshake_index_get (vm, r);
*r_idx = hs->hs_remote_index;
*s_idx = hs->hs_local_index;
ret = true;
error:
- secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
+ wg_secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
vnet_crypto_key_del (vm, key_idx);
- secure_zero_memory (e, NOISE_PUBLIC_KEY_LEN);
+ wg_secure_zero_memory (e, NOISE_PUBLIC_KEY_LEN);
return ret;
}
@@ -310,8 +316,8 @@ noise_consume_response (vlib_main_t * vm, noise_remote_t * r, uint32_t s_idx,
{
noise_local_t *l = noise_local_get (r->r_local_idx);
noise_handshake_t hs;
- uint8_t _key[NOISE_SYMMETRIC_KEY_LEN];
- uint8_t preshared_key[NOISE_PUBLIC_KEY_LEN];
+ uint8_t _key[NOISE_SYMMETRIC_KEY_LEN] = { 0 };
+ uint8_t preshared_key[NOISE_PUBLIC_KEY_LEN] = { 0 };
uint32_t key_idx;
uint8_t *key;
int ret = false;
@@ -340,6 +346,7 @@ noise_consume_response (vlib_main_t * vm, noise_remote_t * r, uint32_t s_idx,
/* psk */
noise_mix_psk (hs.hs_ck, hs.hs_hash, key, preshared_key);
+ vnet_crypto_key_update (vm, key_idx);
/* {} */
@@ -358,8 +365,8 @@ noise_consume_response (vlib_main_t * vm, noise_remote_t * r, uint32_t s_idx,
ret = true;
}
error:
- secure_zero_memory (&hs, sizeof (hs));
- secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
+ wg_secure_zero_memory (&hs, sizeof (hs));
+ wg_secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
vnet_crypto_key_del (vm, key_idx);
return ret;
}
@@ -407,6 +414,8 @@ noise_remote_begin_session (vlib_main_t * vm, noise_remote_t * r)
/* Now we need to add_new_keypair */
clib_rwlock_writer_lock (&r->r_keypair_lock);
+ /* Activate barrier to synchronization keys between threads */
+ vlib_worker_thread_barrier_sync (vm);
next = r->r_next;
current = r->r_current;
previous = r->r_previous;
@@ -438,19 +447,20 @@ noise_remote_begin_session (vlib_main_t * vm, noise_remote_t * r)
r->r_next = noise_remote_keypair_allocate (r);
*r->r_next = kp;
}
+ vlib_worker_thread_barrier_release (vm);
clib_rwlock_writer_unlock (&r->r_keypair_lock);
- secure_zero_memory (&r->r_handshake, sizeof (r->r_handshake));
+ wg_secure_zero_memory (&r->r_handshake, sizeof (r->r_handshake));
- secure_zero_memory (&kp, sizeof (kp));
+ wg_secure_zero_memory (&kp, sizeof (kp));
return true;
}
void
noise_remote_clear (vlib_main_t * vm, noise_remote_t * r)
{
- noise_remote_handshake_index_drop (r);
- secure_zero_memory (&r->r_handshake, sizeof (r->r_handshake));
+ noise_remote_handshake_index_drop (vm, r);
+ wg_secure_zero_memory (&r->r_handshake, sizeof (r->r_handshake));
clib_rwlock_writer_lock (&r->r_keypair_lock);
noise_remote_keypair_free (vm, r, &r->r_next);
@@ -492,54 +502,6 @@ noise_remote_ready (noise_remote_t * r)
return ret;
}
-static bool
-chacha20poly1305_calc (vlib_main_t * vm,
- u8 * src,
- u32 src_len,
- u8 * dst,
- u8 * aad,
- u32 aad_len,
- u64 nonce,
- vnet_crypto_op_id_t op_id,
- vnet_crypto_key_index_t key_index)
-{
- vnet_crypto_op_t _op, *op = &_op;
- u8 iv[12];
- u8 tag_[NOISE_AUTHTAG_LEN] = { };
- u8 src_[] = { };
-
- clib_memset (iv, 0, 12);
- clib_memcpy (iv + 4, &nonce, sizeof (nonce));
-
- vnet_crypto_op_init (op, op_id);
-
- op->tag_len = NOISE_AUTHTAG_LEN;
- if (op_id == VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC)
- {
- op->tag = src + src_len - NOISE_AUTHTAG_LEN;
- src_len -= NOISE_AUTHTAG_LEN;
- }
- else
- op->tag = tag_;
-
- op->src = !src ? src_ : src;
- op->len = src_len;
-
- op->dst = dst;
- op->key_index = key_index;
- op->aad = aad;
- op->aad_len = aad_len;
- op->iv = iv;
-
- vnet_crypto_process_ops (vm, op, 1);
- if (op_id == VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC)
- {
- clib_memcpy (dst + src_len, op->tag, NOISE_AUTHTAG_LEN);
- }
-
- return (op->status == VNET_CRYPTO_OP_STATUS_COMPLETED);
-}
-
enum noise_state_crypt
noise_remote_encrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t * r_idx,
uint64_t * nonce, uint8_t * src, size_t srclen,
@@ -548,7 +510,6 @@ noise_remote_encrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t * r_idx,
noise_keypair_t *kp;
enum noise_state_crypt ret = SC_FAILED;
- clib_rwlock_reader_lock (&r->r_keypair_lock);
if ((kp = r->r_current) == NULL)
goto error;
@@ -569,9 +530,9 @@ noise_remote_encrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t * r_idx,
* are passed back out to the caller through the provided data pointer. */
*r_idx = kp->kp_remote_index;
- chacha20poly1305_calc (vm, src, srclen, dst, NULL, 0, *nonce,
- VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC,
- kp->kp_send_index);
+ wg_chacha20poly1305_calc (vm, src, srclen, dst, NULL, 0, *nonce,
+ VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC,
+ kp->kp_send_index);
/* If our values are still within tolerances, but we are approaching
* the tolerances, we notify the caller with ESTALE that they should
@@ -588,94 +549,6 @@ noise_remote_encrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t * r_idx,
ret = SC_OK;
error:
- clib_rwlock_reader_unlock (&r->r_keypair_lock);
- return ret;
-}
-
-enum noise_state_crypt
-noise_remote_decrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t r_idx,
- uint64_t nonce, uint8_t * src, size_t srclen,
- uint8_t * dst)
-{
- noise_keypair_t *kp;
- enum noise_state_crypt ret = SC_FAILED;
- clib_rwlock_reader_lock (&r->r_keypair_lock);
-
- if (r->r_current != NULL && r->r_current->kp_local_index == r_idx)
- {
- kp = r->r_current;
- }
- else if (r->r_previous != NULL && r->r_previous->kp_local_index == r_idx)
- {
- kp = r->r_previous;
- }
- else if (r->r_next != NULL && r->r_next->kp_local_index == r_idx)
- {
- kp = r->r_next;
- }
- else
- {
- goto error;
- }
-
- /* We confirm that our values are within our tolerances. These values
- * are the same as the encrypt routine.
- *
- * kp_ctr isn't locked here, we're happy to accept a racy read. */
- if (wg_birthdate_has_expired (kp->kp_birthdate, REJECT_AFTER_TIME) ||
- kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES)
- goto error;
-
- /* Decrypt, then validate the counter. We don't want to validate the
- * counter before decrypting as we do not know the message is authentic
- * prior to decryption. */
- if (!chacha20poly1305_calc (vm, src, srclen, dst, NULL, 0, nonce,
- VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC,
- kp->kp_recv_index))
- goto error;
-
- if (!noise_counter_recv (&kp->kp_ctr, nonce))
- goto error;
-
- /* If we've received the handshake confirming data packet then move the
- * next keypair into current. If we do slide the next keypair in, then
- * we skip the REKEY_AFTER_TIME_RECV check. This is safe to do as a
- * data packet can't confirm a session that we are an INITIATOR of. */
- if (kp == r->r_next)
- {
- clib_rwlock_reader_unlock (&r->r_keypair_lock);
- clib_rwlock_writer_lock (&r->r_keypair_lock);
- if (kp == r->r_next && kp->kp_local_index == r_idx)
- {
- noise_remote_keypair_free (vm, r, &r->r_previous);
- r->r_previous = r->r_current;
- r->r_current = r->r_next;
- r->r_next = NULL;
-
- ret = SC_CONN_RESET;
- clib_rwlock_writer_unlock (&r->r_keypair_lock);
- clib_rwlock_reader_lock (&r->r_keypair_lock);
- goto error;
- }
- clib_rwlock_writer_unlock (&r->r_keypair_lock);
- clib_rwlock_reader_lock (&r->r_keypair_lock);
- }
-
- /* Similar to when we encrypt, we want to notify the caller when we
- * are approaching our tolerances. We notify if:
- * - we're the initiator and the current keypair is older than
- * REKEY_AFTER_TIME_RECV seconds. */
- ret = SC_KEEP_KEY_FRESH;
- kp = r->r_current;
- if (kp != NULL &&
- kp->kp_valid &&
- kp->kp_is_initiator &&
- wg_birthdate_has_expired (kp->kp_birthdate, REKEY_AFTER_TIME_RECV))
- goto error;
-
- ret = SC_OK;
-error:
- clib_rwlock_reader_unlock (&r->r_keypair_lock);
return ret;
}
@@ -689,86 +562,22 @@ noise_remote_keypair_allocate (noise_remote_t * r)
return kp;
}
-static void
-noise_remote_keypair_free (vlib_main_t * vm, noise_remote_t * r,
- noise_keypair_t ** kp)
-{
- noise_local_t *local = noise_local_get (r->r_local_idx);
- struct noise_upcall *u = &local->l_upcall;
- if (*kp)
- {
- u->u_index_drop ((*kp)->kp_local_index);
- vnet_crypto_key_del (vm, (*kp)->kp_send_index);
- vnet_crypto_key_del (vm, (*kp)->kp_recv_index);
- clib_mem_free (*kp);
- }
-}
-
static uint32_t
-noise_remote_handshake_index_get (noise_remote_t * r)
+noise_remote_handshake_index_get (vlib_main_t *vm, noise_remote_t *r)
{
noise_local_t *local = noise_local_get (r->r_local_idx);
struct noise_upcall *u = &local->l_upcall;
- return u->u_index_set (r);
+ return u->u_index_set (vm, r);
}
static void
-noise_remote_handshake_index_drop (noise_remote_t * r)
+noise_remote_handshake_index_drop (vlib_main_t *vm, noise_remote_t *r)
{
noise_handshake_t *hs = &r->r_handshake;
noise_local_t *local = noise_local_get (r->r_local_idx);
struct noise_upcall *u = &local->l_upcall;
if (hs->hs_state != HS_ZEROED)
- u->u_index_drop (hs->hs_local_index);
-}
-
-static uint64_t
-noise_counter_send (noise_counter_t * ctr)
-{
- uint64_t ret;
- ret = ctr->c_send++;
- return ret;
-}
-
-static bool
-noise_counter_recv (noise_counter_t * ctr, uint64_t recv)
-{
- uint64_t i, top, index_recv, index_ctr;
- unsigned long bit;
- bool ret = false;
-
- /* Check that the recv counter is valid */
- if (ctr->c_recv >= REJECT_AFTER_MESSAGES || recv >= REJECT_AFTER_MESSAGES)
- goto error;
-
- /* If the packet is out of the window, invalid */
- if (recv + COUNTER_WINDOW_SIZE < ctr->c_recv)
- goto error;
-
- /* If the new counter is ahead of the current counter, we'll need to
- * zero out the bitmap that has previously been used */
- index_recv = recv / COUNTER_BITS;
- index_ctr = ctr->c_recv / COUNTER_BITS;
-
- if (recv > ctr->c_recv)
- {
- top = clib_min (index_recv - index_ctr, COUNTER_NUM);
- for (i = 1; i <= top; i++)
- ctr->c_backtrack[(i + index_ctr) & (COUNTER_NUM - 1)] = 0;
- ctr->c_recv = recv;
- }
-
- index_recv %= COUNTER_NUM;
- bit = 1ul << (recv % COUNTER_BITS);
-
- if (ctr->c_backtrack[index_recv] & bit)
- goto error;
-
- ctr->c_backtrack[index_recv] |= bit;
-
- ret = true;
-error:
- return ret;
+ u->u_index_drop (vm, hs->hs_local_index);
}
static void
@@ -815,8 +624,8 @@ noise_kdf (uint8_t * a, uint8_t * b, uint8_t * c, const uint8_t * x,
out:
/* Clear sensitive data from stack */
- secure_zero_memory (sec, BLAKE2S_HASH_SIZE);
- secure_zero_memory (out, BLAKE2S_HASH_SIZE + 1);
+ wg_secure_zero_memory (sec, BLAKE2S_HASH_SIZE);
+ wg_secure_zero_memory (out, BLAKE2S_HASH_SIZE + 1);
}
static bool
@@ -831,7 +640,7 @@ noise_mix_dh (uint8_t ck[NOISE_HASH_LEN],
noise_kdf (ck, key, NULL, dh,
NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
ck);
- secure_zero_memory (dh, NOISE_PUBLIC_KEY_LEN);
+ wg_secure_zero_memory (dh, NOISE_PUBLIC_KEY_LEN);
return true;
}
@@ -872,7 +681,7 @@ noise_mix_psk (uint8_t ck[NOISE_HASH_LEN], uint8_t hash[NOISE_HASH_LEN],
NOISE_HASH_LEN, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN,
NOISE_SYMMETRIC_KEY_LEN, ck);
noise_mix_hash (hash, tmp, NOISE_HASH_LEN);
- secure_zero_memory (tmp, NOISE_HASH_LEN);
+ wg_secure_zero_memory (tmp, NOISE_HASH_LEN);
}
static void
@@ -899,8 +708,8 @@ noise_msg_encrypt (vlib_main_t * vm, uint8_t * dst, uint8_t * src,
uint8_t hash[NOISE_HASH_LEN])
{
/* Nonce always zero for Noise_IK */
- chacha20poly1305_calc (vm, src, src_len, dst, hash, NOISE_HASH_LEN, 0,
- VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC, key_idx);
+ wg_chacha20poly1305_calc (vm, src, src_len, dst, hash, NOISE_HASH_LEN, 0,
+ VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC, key_idx);
noise_mix_hash (hash, dst, src_len + NOISE_AUTHTAG_LEN);
}
@@ -910,8 +719,9 @@ noise_msg_decrypt (vlib_main_t * vm, uint8_t * dst, uint8_t * src,
uint8_t hash[NOISE_HASH_LEN])
{
/* Nonce always zero for Noise_IK */
- if (!chacha20poly1305_calc (vm, src, src_len, dst, hash, NOISE_HASH_LEN, 0,
- VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC, key_idx))
+ if (!wg_chacha20poly1305_calc (vm, src, src_len, dst, hash, NOISE_HASH_LEN,
+ 0, VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC,
+ key_idx))
return false;
noise_mix_hash (hash, src, src_len);
return true;
@@ -949,13 +759,6 @@ noise_tai64n_now (uint8_t output[NOISE_TIMESTAMP_LEN])
clib_memcpy (output + sizeof (sec), &nsec, sizeof (nsec));
}
-static void
-secure_zero_memory (void *v, size_t n)
-{
- static void *(*const volatile memset_v) (void *, int, size_t) = &memset;
- memset_v (v, 0, n);
-}
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/wireguard/wireguard_noise.h b/src/plugins/wireguard/wireguard_noise.h
index 5b5a88fa250..fd2c09ebfa5 100755..100644
--- a/src/plugins/wireguard/wireguard_noise.h
+++ b/src/plugins/wireguard/wireguard_noise.h
@@ -121,8 +121,8 @@ typedef struct noise_local
{
void *u_arg;
noise_remote_t *(*u_remote_get) (const uint8_t[NOISE_PUBLIC_KEY_LEN]);
- uint32_t (*u_index_set) (noise_remote_t *);
- void (*u_index_drop) (uint32_t);
+ uint32_t (*u_index_set) (vlib_main_t *, noise_remote_t *);
+ void (*u_index_drop) (vlib_main_t *, uint32_t);
} l_upcall;
} noise_local_t;
@@ -136,15 +136,23 @@ noise_local_get (uint32_t locali)
return (pool_elt_at_index (noise_local_pool, locali));
}
+static_always_inline uint64_t
+noise_counter_send (noise_counter_t *ctr)
+{
+ uint64_t ret;
+ ret = ctr->c_send++;
+ return ret;
+}
+
void noise_local_init (noise_local_t *, struct noise_upcall *);
bool noise_local_set_private (noise_local_t *,
const uint8_t[NOISE_PUBLIC_KEY_LEN]);
-void noise_remote_init (noise_remote_t *, uint32_t,
+void noise_remote_init (vlib_main_t *, noise_remote_t *, uint32_t,
const uint8_t[NOISE_PUBLIC_KEY_LEN], uint32_t);
/* Should be called anytime noise_local_set_private is called */
-void noise_remote_precompute (noise_remote_t *);
+void noise_remote_precompute (vlib_main_t *, noise_remote_t *);
/* Cryptographic functions */
bool noise_create_initiation (vlib_main_t * vm, noise_remote_t *,
@@ -187,12 +195,83 @@ noise_remote_encrypt (vlib_main_t * vm, noise_remote_t *,
uint32_t * r_idx,
uint64_t * nonce,
uint8_t * src, size_t srclen, uint8_t * dst);
-enum noise_state_crypt
-noise_remote_decrypt (vlib_main_t * vm, noise_remote_t *,
- uint32_t r_idx,
- uint64_t nonce,
- uint8_t * src, size_t srclen, uint8_t * dst);
+static_always_inline noise_keypair_t *
+wg_get_active_keypair (noise_remote_t *r, uint32_t r_idx)
+{
+ if (r->r_current != NULL && r->r_current->kp_local_index == r_idx)
+ {
+ return r->r_current;
+ }
+ else if (r->r_previous != NULL && r->r_previous->kp_local_index == r_idx)
+ {
+ return r->r_previous;
+ }
+ else if (r->r_next != NULL && r->r_next->kp_local_index == r_idx)
+ {
+ return r->r_next;
+ }
+ else
+ {
+ return NULL;
+ }
+}
+
+inline bool
+noise_counter_recv (noise_counter_t *ctr, uint64_t recv)
+{
+ uint64_t i, top, index_recv, index_ctr;
+ unsigned long bit;
+ bool ret = false;
+
+ /* Check that the recv counter is valid */
+ if (ctr->c_recv >= REJECT_AFTER_MESSAGES || recv >= REJECT_AFTER_MESSAGES)
+ goto error;
+
+ /* If the packet is out of the window, invalid */
+ if (recv + COUNTER_WINDOW_SIZE < ctr->c_recv)
+ goto error;
+
+ /* If the new counter is ahead of the current counter, we'll need to
+ * zero out the bitmap that has previously been used */
+ index_recv = recv / COUNTER_BITS;
+ index_ctr = ctr->c_recv / COUNTER_BITS;
+
+ if (recv > ctr->c_recv)
+ {
+ top = clib_min (index_recv - index_ctr, COUNTER_NUM);
+ for (i = 1; i <= top; i++)
+ ctr->c_backtrack[(i + index_ctr) & (COUNTER_NUM - 1)] = 0;
+ ctr->c_recv = recv;
+ }
+
+ index_recv %= COUNTER_NUM;
+ bit = 1ul << (recv % COUNTER_BITS);
+
+ if (ctr->c_backtrack[index_recv] & bit)
+ goto error;
+
+ ctr->c_backtrack[index_recv] |= bit;
+
+ ret = true;
+error:
+ return ret;
+}
+
+static_always_inline void
+noise_remote_keypair_free (vlib_main_t *vm, noise_remote_t *r,
+ noise_keypair_t **kp)
+{
+ noise_local_t *local = noise_local_get (r->r_local_idx);
+ struct noise_upcall *u = &local->l_upcall;
+ if (*kp)
+ {
+ u->u_index_drop (vm, (*kp)->kp_local_index);
+ vnet_crypto_key_del (vm, (*kp)->kp_send_index);
+ vnet_crypto_key_del (vm, (*kp)->kp_recv_index);
+ clib_mem_free (*kp);
+ }
+}
#endif /* __included_wg_noise_h__ */
diff --git a/src/plugins/wireguard/wireguard_output_tun.c b/src/plugins/wireguard/wireguard_output_tun.c
index 53a8797c973..c9411f6ff20 100755..100644
--- a/src/plugins/wireguard/wireguard_output_tun.c
+++ b/src/plugins/wireguard/wireguard_output_tun.c
@@ -21,11 +21,12 @@
#include <wireguard/wireguard.h>
#include <wireguard/wireguard_send.h>
-#define foreach_wg_output_error \
- _(NONE, "No error") \
- _(PEER, "Peer error") \
- _(KEYPAIR, "Keypair error") \
- _(TOO_BIG, "packet too big") \
+#define foreach_wg_output_error \
+ _ (NONE, "No error") \
+ _ (PEER, "Peer error") \
+ _ (KEYPAIR, "Keypair error") \
+ _ (NO_BUFFERS, "No buffers") \
+ _ (CRYPTO_ENGINE_ERROR, "crypto engine error (packet dropped)")
typedef enum
{
@@ -51,18 +52,34 @@ typedef enum
typedef struct
{
- ip4_udp_header_t hdr;
index_t peer;
+ u8 header[sizeof (ip6_udp_header_t)];
+ u8 is_ip4;
} wg_output_tun_trace_t;
+typedef struct
+{
+ index_t peer;
+ u32 next_index;
+} wg_output_tun_post_trace_t;
+
u8 *
format_ip4_udp_header (u8 * s, va_list * args)
{
- ip4_udp_header_t *hdr = va_arg (*args, ip4_udp_header_t *);
+ ip4_udp_header_t *hdr4 = va_arg (*args, ip4_udp_header_t *);
+
+ s = format (s, "%U:$U", format_ip4_header, &hdr4->ip4, format_udp_header,
+ &hdr4->udp);
+ return (s);
+}
- s = format (s, "%U:$U",
- format_ip4_header, &hdr->ip4, format_udp_header, &hdr->udp);
+u8 *
+format_ip6_udp_header (u8 *s, va_list *args)
+{
+ ip6_udp_header_t *hdr6 = va_arg (*args, ip6_udp_header_t *);
+ s = format (s, "%U:$U", format_ip6_header, &hdr6->ip6, format_udp_header,
+ &hdr6->udp);
return (s);
}
@@ -76,50 +93,415 @@ format_wg_output_tun_trace (u8 * s, va_list * args)
wg_output_tun_trace_t *t = va_arg (*args, wg_output_tun_trace_t *);
s = format (s, "peer: %d\n", t->peer);
- s = format (s, " Encrypted packet: %U", format_ip4_udp_header, &t->hdr);
+ s = format (s, " Encrypted packet: ");
+
+ s = t->is_ip4 ? format (s, "%U", format_ip4_udp_header, t->header) :
+ format (s, "%U", format_ip6_udp_header, t->header);
return s;
}
-VLIB_NODE_FN (wg_output_tun_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+/* post node - packet trace format function */
+static u8 *
+format_wg_output_tun_post_trace (u8 *s, va_list *args)
{
- u32 n_left_from;
- u32 *from;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u16 nexts[VLIB_FRAME_SIZE], *next;
- u32 thread_index = vm->thread_index;
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- b = bufs;
- next = nexts;
+ wg_output_tun_post_trace_t *t = va_arg (*args, wg_output_tun_post_trace_t *);
- vlib_get_buffers (vm, from, bufs, n_left_from);
+ s = format (s, "peer: %d\n", t->peer);
+ s = format (s, " wg-post: next node index %u", t->next_index);
+ return s;
+}
+
+static_always_inline void
+wg_output_chain_crypto (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+ vlib_buffer_t *b, vlib_buffer_t *lb, u8 *start,
+ u32 start_len, u16 *n_ch)
+{
+ vnet_crypto_op_chunk_t *ch;
+ vlib_buffer_t *cb = b;
+ u32 n_chunks = 1;
+
+ vec_add2 (ptd->chunks, ch, 1);
+ ch->len = start_len;
+ ch->src = ch->dst = start;
+ cb = vlib_get_buffer (vm, cb->next_buffer);
+
+ while (1)
+ {
+ vec_add2 (ptd->chunks, ch, 1);
+ n_chunks += 1;
+ if (lb == cb)
+ ch->len = cb->current_length - NOISE_AUTHTAG_LEN;
+ else
+ ch->len = cb->current_length;
+
+ ch->src = ch->dst = vlib_buffer_get_current (cb);
+
+ if (!(cb->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+
+ cb = vlib_get_buffer (vm, cb->next_buffer);
+ }
+
+ if (n_ch)
+ *n_ch = n_chunks;
+}
+
+static_always_inline void
+wg_prepare_sync_enc_op (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+ vlib_buffer_t *b, vlib_buffer_t *lb,
+ vnet_crypto_op_t **crypto_ops, u8 *src, u32 src_len,
+ u8 *dst, u8 *aad, u32 aad_len, u64 nonce,
+ vnet_crypto_key_index_t key_index, u32 bi, u8 *iv)
+{
+ vnet_crypto_op_t _op, *op = &_op;
+ u8 src_[] = {};
+
+ clib_memset (iv, 0, 4);
+ clib_memcpy (iv + 4, &nonce, sizeof (nonce));
+
+ vec_add2_aligned (crypto_ops[0], op, 1, CLIB_CACHE_LINE_BYTES);
+ vnet_crypto_op_init (op, VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC);
+
+ op->tag_len = NOISE_AUTHTAG_LEN;
+ op->tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN;
+ op->key_index = key_index;
+ op->aad = aad;
+ op->aad_len = aad_len;
+ op->iv = iv;
+ op->user_data = bi;
+
+ if (b != lb)
+ {
+ /* Chained buffers */
+ op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
+ op->chunk_index = vec_len (ptd->chunks);
+ wg_output_chain_crypto (vm, ptd, b, lb, src, src_len, &op->n_chunks);
+ }
+ else
+ {
+ op->src = !src ? src_ : src;
+ op->len = src_len;
+ op->dst = dst;
+ }
+}
+
+static_always_inline void
+wg_output_process_chained_ops (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_crypto_op_t *ops, vlib_buffer_t *b[],
+ u16 *nexts, vnet_crypto_op_chunk_t *chunks,
+ u16 drop_next)
+{
+ u32 n_fail, n_ops = vec_len (ops);
+ vnet_crypto_op_t *op = ops;
+
+ if (n_ops == 0)
+ return;
+
+ n_fail = n_ops - vnet_crypto_process_chained_ops (vm, op, chunks, n_ops);
+
+ while (n_fail)
+ {
+ ASSERT (op - ops < n_ops);
+
+ if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+ {
+ u32 bi = op->user_data;
+ b[bi]->error = node->errors[WG_OUTPUT_ERROR_CRYPTO_ENGINE_ERROR];
+ nexts[bi] = drop_next;
+ n_fail--;
+ }
+ op++;
+ }
+}
+
+static_always_inline void
+wg_output_process_ops (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_crypto_op_t *ops, vlib_buffer_t *b[], u16 *nexts,
+ u16 drop_next)
+{
+ u32 n_fail, n_ops = vec_len (ops);
+ vnet_crypto_op_t *op = ops;
+
+ if (n_ops == 0)
+ return;
+ n_fail = n_ops - vnet_crypto_process_ops (vm, op, n_ops);
+
+ while (n_fail)
+ {
+ ASSERT (op - ops < n_ops);
+
+ if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+ {
+ u32 bi = op->user_data;
+ b[bi]->error = node->errors[WG_OUTPUT_ERROR_CRYPTO_ENGINE_ERROR];
+ nexts[bi] = drop_next;
+ n_fail--;
+ }
+ op++;
+ }
+}
+
+static_always_inline void
+wg_output_tun_add_to_frame (vlib_main_t *vm, vnet_crypto_async_frame_t *f,
+ u32 key_index, u32 crypto_len,
+ i16 crypto_start_offset, u32 buffer_index,
+ u16 next_node, u8 *iv, u8 *tag, u8 flags)
+{
+ vnet_crypto_async_frame_elt_t *fe;
+ u16 index;
+
+ ASSERT (f->n_elts < VNET_CRYPTO_FRAME_SIZE);
+
+ index = f->n_elts;
+ fe = &f->elts[index];
+ f->n_elts++;
+ fe->key_index = key_index;
+ fe->crypto_total_length = crypto_len;
+ fe->crypto_start_offset = crypto_start_offset;
+ fe->iv = iv;
+ fe->tag = tag;
+ fe->flags = flags;
+ f->buffer_indices[index] = buffer_index;
+ f->next_node_index[index] = next_node;
+}
+
+static_always_inline enum noise_state_crypt
+wg_output_tun_process (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+ vlib_buffer_t *b, vlib_buffer_t *lb,
+ vnet_crypto_op_t **crypto_ops, noise_remote_t *r,
+ uint32_t *r_idx, uint64_t *nonce, uint8_t *src,
+ size_t srclen, uint8_t *dst, u32 bi, u8 *iv, f64 time)
+{
+ noise_keypair_t *kp;
+ enum noise_state_crypt ret = SC_FAILED;
+
+ if ((kp = r->r_current) == NULL)
+ goto error;
+
+ /* We confirm that our values are within our tolerances. We want:
+ * - a valid keypair
+ * - our keypair to be less than REJECT_AFTER_TIME seconds old
+ * - our receive counter to be less than REJECT_AFTER_MESSAGES
+ * - our send counter to be less than REJECT_AFTER_MESSAGES
+ */
+ if (!kp->kp_valid ||
+ wg_birthdate_has_expired_opt (kp->kp_birthdate, REJECT_AFTER_TIME,
+ time) ||
+ kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES ||
+ ((*nonce = noise_counter_send (&kp->kp_ctr)) > REJECT_AFTER_MESSAGES))
+ goto error;
+
+ /* We encrypt into the same buffer, so the caller must ensure that buf
+ * has NOISE_AUTHTAG_LEN bytes to store the MAC. The nonce and index
+ * are passed back out to the caller through the provided data pointer. */
+ *r_idx = kp->kp_remote_index;
+
+ wg_prepare_sync_enc_op (vm, ptd, b, lb, crypto_ops, src, srclen, dst, NULL,
+ 0, *nonce, kp->kp_send_index, bi, iv);
+
+ /* If our values are still within tolerances, but we are approaching
+ * the tolerances, we notify the caller with ESTALE that they should
+ * establish a new keypair. The current keypair can continue to be used
+ * until the tolerances are hit. We notify if:
+ * - our send counter is valid and not less than REKEY_AFTER_MESSAGES
+ * - we're the initiator and our keypair is older than
+ * REKEY_AFTER_TIME seconds */
+ ret = SC_KEEP_KEY_FRESH;
+ if ((kp->kp_valid && *nonce >= REKEY_AFTER_MESSAGES) ||
+ (kp->kp_is_initiator && wg_birthdate_has_expired_opt (
+ kp->kp_birthdate, REKEY_AFTER_TIME, time)))
+ goto error;
+
+ ret = SC_OK;
+error:
+ return ret;
+}
+
+static_always_inline enum noise_state_crypt
+wg_add_to_async_frame (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+ vnet_crypto_async_frame_t **async_frame,
+ vlib_buffer_t *b, vlib_buffer_t *lb, u8 *payload,
+ u32 payload_len, u32 bi, u16 next, u16 async_next,
+ noise_remote_t *r, uint32_t *r_idx, uint64_t *nonce,
+ u8 *iv, f64 time)
+{
+ wg_post_data_t *post = wg_post_data (b);
+ u8 flag = 0;
+ u8 *tag;
+ noise_keypair_t *kp;
+
+ post->next_index = next;
+
+ /* crypto */
+ enum noise_state_crypt ret = SC_FAILED;
+
+ if ((kp = r->r_current) == NULL)
+ goto error;
+
+ /* We confirm that our values are within our tolerances. We want:
+ * - a valid keypair
+ * - our keypair to be less than REJECT_AFTER_TIME seconds old
+ * - our receive counter to be less than REJECT_AFTER_MESSAGES
+ * - our send counter to be less than REJECT_AFTER_MESSAGES
+ */
+ if (!kp->kp_valid ||
+ wg_birthdate_has_expired_opt (kp->kp_birthdate, REJECT_AFTER_TIME,
+ time) ||
+ kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES ||
+ ((*nonce = noise_counter_send (&kp->kp_ctr)) > REJECT_AFTER_MESSAGES))
+ goto error;
+
+ /* We encrypt into the same buffer, so the caller must ensure that buf
+ * has NOISE_AUTHTAG_LEN bytes to store the MAC. The nonce and index
+ * are passed back out to the caller through the provided data pointer. */
+ *r_idx = kp->kp_remote_index;
+
+ clib_memset (iv, 0, 4);
+ clib_memcpy (iv + 4, nonce, sizeof (*nonce));
+
+ /* get a frame for this op if we don't yet have one or it's full */
+ if (NULL == *async_frame || vnet_crypto_async_frame_is_full (*async_frame))
+ {
+ *async_frame = vnet_crypto_async_get_frame (
+ vm, VNET_CRYPTO_OP_CHACHA20_POLY1305_TAG16_AAD0_ENC);
+ if (PREDICT_FALSE (NULL == *async_frame))
+ goto error;
+ /* Save the frame to the list we'll submit at the end */
+ vec_add1 (ptd->async_frames, *async_frame);
+ }
+
+ if (b != lb)
+ flag |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
+
+ tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN;
+
+ /* this always succeeds because we know the frame is not full */
+ wg_output_tun_add_to_frame (vm, *async_frame, kp->kp_send_index, payload_len,
+ payload - b->data, bi, async_next, iv, tag,
+ flag);
+
+ /* If our values are still within tolerances, but we are approaching
+ * the tolerances, we notify the caller with ESTALE that they should
+ * establish a new keypair. The current keypair can continue to be used
+ * until the tolerances are hit. We notify if:
+ * - our send counter is valid and not less than REKEY_AFTER_MESSAGES
+ * - we're the initiator and our keypair is older than
+ * REKEY_AFTER_TIME seconds */
+ ret = SC_KEEP_KEY_FRESH;
+ if ((kp->kp_valid && *nonce >= REKEY_AFTER_MESSAGES) ||
+ (kp->kp_is_initiator && wg_birthdate_has_expired_opt (
+ kp->kp_birthdate, REKEY_AFTER_TIME, time)))
+ goto error;
+
+ ret = SC_OK;
+error:
+ return ret;
+}
+
+static_always_inline void
+wg_calc_checksum (vlib_main_t *vm, vlib_buffer_t *b)
+{
+ int bogus = 0;
+ u8 ip_ver_out = (*((u8 *) vlib_buffer_get_current (b)) >> 4);
+
+ /* IPv6 UDP checksum is mandatory */
+ if (ip_ver_out == 6)
+ {
+ ip6_header_t *ip6 =
+ (ip6_header_t *) ((u8 *) vlib_buffer_get_current (b));
+ udp_header_t *udp = ip6_next_header (ip6);
+ udp->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus);
+ }
+}
+
+/* is_ip4 - inner header flag */
+always_inline uword
+wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, u8 is_ip4, u16 async_next_node)
+{
wg_main_t *wmp = &wg_main;
+ wg_per_thread_data_t *ptd =
+ vec_elt_at_index (wmp->per_thread_data, vm->thread_index);
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from = frame->n_vectors;
+ ip4_udp_wg_header_t *hdr4_out = NULL;
+ ip6_udp_wg_header_t *hdr6_out = NULL;
+ message_data_t *message_data_wg = NULL;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ vlib_buffer_t *lb;
+ vnet_crypto_op_t **crypto_ops;
+ u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
+ vlib_buffer_t *sync_bufs[VLIB_FRAME_SIZE];
+ u32 thread_index = vm->thread_index;
+ u16 n_sync = 0;
+ const u16 drop_next = WG_OUTPUT_NEXT_ERROR;
+ const u8 is_async = wg_op_mode_is_set_ASYNC ();
+ vnet_crypto_async_frame_t *async_frame = NULL;
+ u16 n_async = 0;
+ u16 noop_nexts[VLIB_FRAME_SIZE], *noop_next = noop_nexts, n_noop = 0;
+ u16 err = !0;
+ u32 sync_bi[VLIB_FRAME_SIZE];
+ u32 noop_bi[VLIB_FRAME_SIZE];
+
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+ vec_reset_length (ptd->crypto_ops);
+ vec_reset_length (ptd->chained_crypto_ops);
+ vec_reset_length (ptd->chunks);
+ vec_reset_length (ptd->async_frames);
+
wg_peer_t *peer = NULL;
+ u32 adj_index = 0;
+ u32 last_adj_index = ~0;
+ index_t peeri = INDEX_INVALID;
+
+ f64 time = clib_time_now (&vm->clib_time) + vm->time_offset;
while (n_left_from > 0)
{
- ip4_udp_header_t *hdr = vlib_buffer_get_current (b[0]);
- u8 *plain_data = (vlib_buffer_get_current (b[0]) +
- sizeof (ip4_udp_header_t));
- u16 plain_data_len =
- clib_net_to_host_u16 (((ip4_header_t *) plain_data)->length);
- index_t peeri;
+ u8 iph_offset = 0;
+ u8 is_ip4_out = 1;
+ u8 *plain_data;
+ u16 plain_data_len;
+ u16 plain_data_len_total;
+ u16 n_bufs;
+ u16 b_space_left_at_beginning;
+ u32 bi = from[b - bufs];
+
+ if (n_left_from > 2)
+ {
+ u8 *p;
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ p = vlib_buffer_get_current (b[1]);
+ CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (vlib_buffer_get_tail (b[1]), CLIB_CACHE_LINE_BYTES,
+ LOAD);
+ }
- next[0] = WG_OUTPUT_NEXT_ERROR;
- peeri =
- wg_peer_get_by_adj_index (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
- peer = wg_peer_get (peeri);
+ noop_next[0] = WG_OUTPUT_NEXT_ERROR;
+ err = WG_OUTPUT_NEXT_ERROR;
- if (!peer || peer->is_dead)
+ adj_index = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
+
+ if (PREDICT_FALSE (last_adj_index != adj_index))
+ {
+ peeri = wg_peer_get_by_adj_index (adj_index);
+ if (peeri == INDEX_INVALID)
+ {
+ b[0]->error = node->errors[WG_OUTPUT_ERROR_PEER];
+ goto out;
+ }
+ peer = wg_peer_get (peeri);
+ }
+
+ if (!peer || wg_peer_is_dead (peer))
{
b[0]->error = node->errors[WG_OUTPUT_ERROR_PEER];
goto out;
}
-
if (PREDICT_FALSE (~0 == peer->output_thread_index))
{
/* this is the first packet to use this peer, claim the peer
@@ -129,9 +511,10 @@ VLIB_NODE_FN (wg_output_tun_node) (vlib_main_t * vm,
wg_peer_assign_thread (thread_index));
}
- if (PREDICT_TRUE (thread_index != peer->output_thread_index))
+ if (PREDICT_FALSE (thread_index != peer->output_thread_index))
{
- next[0] = WG_OUTPUT_NEXT_HANDOFF;
+ noop_next[0] = WG_OUTPUT_NEXT_HANDOFF;
+ err = WG_OUTPUT_NEXT_HANDOFF;
goto next;
}
@@ -141,31 +524,119 @@ VLIB_NODE_FN (wg_output_tun_node) (vlib_main_t * vm,
b[0]->error = node->errors[WG_OUTPUT_ERROR_KEYPAIR];
goto out;
}
- size_t encrypted_packet_len = message_data_len (plain_data_len);
- /*
- * Ensure there is enough space to write the encrypted data
- * into the packet
- */
- if (PREDICT_FALSE (encrypted_packet_len >= WG_DEFAULT_DATA_SIZE) ||
- PREDICT_FALSE ((b[0]->current_data + encrypted_packet_len) >=
- vlib_buffer_get_default_data_size (vm)))
+ lb = b[0];
+ n_bufs = vlib_buffer_chain_linearize (vm, b[0]);
+ if (n_bufs == 0)
{
- b[0]->error = node->errors[WG_OUTPUT_ERROR_TOO_BIG];
+ b[0]->error = node->errors[WG_OUTPUT_ERROR_NO_BUFFERS];
goto out;
}
- message_data_t *encrypted_packet =
- (message_data_t *) wmp->per_thread_data[thread_index].data;
+ if (n_bufs > 1)
+ {
+ /* Find last buffer in the chain */
+ while (lb->flags & VLIB_BUFFER_NEXT_PRESENT)
+ lb = vlib_get_buffer (vm, lb->next_buffer);
+ }
+
+ /* Ensure there is enough free space at the beginning of the first buffer
+ * to write ethernet header (e.g. IPv6 VxLAN over IPv6 Wireguard will
+ * trigger this)
+ */
+ ASSERT ((signed) b[0]->current_data >=
+ (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
+ b_space_left_at_beginning =
+ b[0]->current_data + VLIB_BUFFER_PRE_DATA_SIZE;
+ if (PREDICT_FALSE (b_space_left_at_beginning <
+ sizeof (ethernet_header_t)))
+ {
+ u32 size_diff =
+ sizeof (ethernet_header_t) - b_space_left_at_beginning;
+
+ /* Can only move buffer when it's single and has enough free space*/
+ if (lb == b[0] &&
+ vlib_buffer_space_left_at_end (vm, b[0]) >= size_diff)
+ {
+ vlib_buffer_move (vm, b[0],
+ b[0]->current_data + (signed) size_diff);
+ }
+ else
+ {
+ b[0]->error = node->errors[WG_OUTPUT_ERROR_NO_BUFFERS];
+ goto out;
+ }
+ }
+
+ /*
+ * Ensure there is enough free space at the end of the last buffer to
+ * write auth tag */
+ if (PREDICT_FALSE (vlib_buffer_space_left_at_end (vm, lb) <
+ NOISE_AUTHTAG_LEN))
+ {
+ u32 tmp_bi = 0;
+ if (vlib_buffer_alloc (vm, &tmp_bi, 1) != 1)
+ {
+ b[0]->error = node->errors[WG_OUTPUT_ERROR_NO_BUFFERS];
+ goto out;
+ }
+ lb = vlib_buffer_chain_buffer (vm, lb, tmp_bi);
+ }
+
+ iph_offset = vnet_buffer (b[0])->ip.save_rewrite_length;
+ plain_data = vlib_buffer_get_current (b[0]) + iph_offset;
+ plain_data_len = b[0]->current_length - iph_offset;
+ plain_data_len_total =
+ vlib_buffer_length_in_chain (vm, b[0]) - iph_offset;
+ size_t encrypted_packet_len = message_data_len (plain_data_len_total);
+ vlib_buffer_chain_increase_length (b[0], lb, NOISE_AUTHTAG_LEN);
+ u8 *iv_data = b[0]->pre_data;
+
+ is_ip4_out = ip46_address_is_ip4 (&peer->src.addr);
+ if (is_ip4_out)
+ {
+ hdr4_out = vlib_buffer_get_current (b[0]);
+ message_data_wg = &hdr4_out->wg;
+ }
+ else
+ {
+ hdr6_out = vlib_buffer_get_current (b[0]);
+ message_data_wg = &hdr6_out->wg;
+ }
+
+ if (PREDICT_FALSE (last_adj_index != adj_index))
+ {
+ wg_timers_any_authenticated_packet_sent_opt (peer, time);
+ wg_timers_data_sent_opt (peer, time);
+ wg_timers_any_authenticated_packet_traversal (peer);
+ last_adj_index = adj_index;
+ }
+
+ /* Here we are sure that can send packet to next node */
+ next[0] = WG_OUTPUT_NEXT_INTERFACE_OUTPUT;
+
+ if (lb != b[0])
+ crypto_ops = &ptd->chained_crypto_ops;
+ else
+ crypto_ops = &ptd->crypto_ops;
enum noise_state_crypt state;
- state =
- noise_remote_encrypt (vm,
- &peer->remote,
- &encrypted_packet->receiver_index,
- &encrypted_packet->counter, plain_data,
- plain_data_len,
- encrypted_packet->encrypted_data);
+
+ if (is_async)
+ {
+ state = wg_add_to_async_frame (
+ vm, ptd, &async_frame, b[0], lb, plain_data, plain_data_len_total,
+ bi, next[0], async_next_node, &peer->remote,
+ &message_data_wg->receiver_index, &message_data_wg->counter,
+ iv_data, time);
+ }
+ else
+ {
+ state = wg_output_tun_process (
+ vm, ptd, b[0], lb, crypto_ops, &peer->remote,
+ &message_data_wg->receiver_index, &message_data_wg->counter,
+ plain_data, plain_data_len, plain_data, n_sync, iv_data, time);
+ }
if (PREDICT_FALSE (state == SC_KEEP_KEY_FRESH))
{
@@ -173,27 +644,31 @@ VLIB_NODE_FN (wg_output_tun_node) (vlib_main_t * vm,
}
else if (PREDICT_FALSE (state == SC_FAILED))
{
- //TODO: Maybe wrong
+ // TODO: Maybe wrong
wg_send_handshake_from_mt (peeri, false);
+ wg_peer_update_flags (peeri, WG_PEER_ESTABLISHED, false);
+ noop_next[0] = WG_OUTPUT_NEXT_ERROR;
goto out;
}
- /* Here we are sure that can send packet to next node */
- next[0] = WG_OUTPUT_NEXT_INTERFACE_OUTPUT;
- encrypted_packet->header.type = MESSAGE_DATA;
-
- clib_memcpy (plain_data, (u8 *) encrypted_packet, encrypted_packet_len);
+ err = WG_OUTPUT_NEXT_INTERFACE_OUTPUT;
- hdr->udp.length = clib_host_to_net_u16 (encrypted_packet_len +
- sizeof (udp_header_t));
- b[0]->current_length = (encrypted_packet_len +
- sizeof (ip4_header_t) + sizeof (udp_header_t));
- ip4_header_set_len_w_chksum
- (&hdr->ip4, clib_host_to_net_u16 (b[0]->current_length));
-
- wg_timers_any_authenticated_packet_sent (peer);
- wg_timers_data_sent (peer);
- wg_timers_any_authenticated_packet_traversal (peer);
+ if (is_ip4_out)
+ {
+ hdr4_out->wg.header.type = MESSAGE_DATA;
+ hdr4_out->udp.length = clib_host_to_net_u16 (encrypted_packet_len +
+ sizeof (udp_header_t));
+ ip4_header_set_len_w_chksum (
+ &hdr4_out->ip4, clib_host_to_net_u16 (encrypted_packet_len +
+ sizeof (ip4_udp_header_t)));
+ }
+ else
+ {
+ hdr6_out->wg.header.type = MESSAGE_DATA;
+ hdr6_out->ip6.payload_length = hdr6_out->udp.length =
+ clib_host_to_net_u16 (encrypted_packet_len +
+ sizeof (udp_header_t));
+ }
out:
if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
@@ -201,23 +676,262 @@ VLIB_NODE_FN (wg_output_tun_node) (vlib_main_t * vm,
{
wg_output_tun_trace_t *t =
vlib_add_trace (vm, node, b[0], sizeof (*t));
- t->hdr = *hdr;
+
t->peer = peeri;
+ t->is_ip4 = is_ip4_out;
+ if (hdr4_out)
+ clib_memcpy (t->header, hdr4_out, sizeof (ip4_udp_header_t));
+ else if (hdr6_out)
+ clib_memcpy (t->header, hdr6_out, sizeof (ip6_udp_header_t));
}
+
next:
+ if (PREDICT_FALSE (err != WG_OUTPUT_NEXT_INTERFACE_OUTPUT))
+ {
+ noop_bi[n_noop] = bi;
+ n_noop++;
+ noop_next++;
+ goto next_left;
+ }
+ if (!is_async)
+ {
+ sync_bi[n_sync] = bi;
+ sync_bufs[n_sync] = b[0];
+ n_sync += 1;
+ next += 1;
+ }
+ else
+ {
+ n_async++;
+ }
+ next_left:
n_left_from -= 1;
- next += 1;
b += 1;
}
+ if (n_sync)
+ {
+ /* wg-output-process-ops */
+ wg_output_process_ops (vm, node, ptd->crypto_ops, sync_bufs, nexts,
+ drop_next);
+ wg_output_process_chained_ops (vm, node, ptd->chained_crypto_ops,
+ sync_bufs, nexts, ptd->chunks, drop_next);
+
+ int n_left_from_sync_bufs = n_sync;
+ while (n_left_from_sync_bufs > 0)
+ {
+ n_left_from_sync_bufs--;
+ wg_calc_checksum (vm, sync_bufs[n_left_from_sync_bufs]);
+ }
+
+ vlib_buffer_enqueue_to_next (vm, node, sync_bi, nexts, n_sync);
+ }
+ if (n_async)
+ {
+ /* submit all of the open frames */
+ vnet_crypto_async_frame_t **async_frame;
+
+ vec_foreach (async_frame, ptd->async_frames)
+ {
+ if (PREDICT_FALSE (
+ vnet_crypto_async_submit_open_frame (vm, *async_frame) < 0))
+ {
+ u32 n_drop = (*async_frame)->n_elts;
+ u32 *bi = (*async_frame)->buffer_indices;
+ u16 index = n_noop;
+ while (n_drop--)
+ {
+ noop_bi[index] = bi[0];
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi[0]);
+ noop_nexts[index] = drop_next;
+ b->error = node->errors[WG_OUTPUT_ERROR_CRYPTO_ENGINE_ERROR];
+ bi++;
+ index++;
+ }
+ n_noop += (*async_frame)->n_elts;
+
+ vnet_crypto_async_reset_frame (*async_frame);
+ vnet_crypto_async_free_frame (vm, *async_frame);
+ }
+ }
+ }
+ if (n_noop)
+ {
+ vlib_buffer_enqueue_to_next (vm, node, noop_bi, noop_nexts, n_noop);
+ }
+
+ return frame->n_vectors;
+}
+
+always_inline uword
+wg_output_tun_post (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left = frame->n_vectors;
+
+ index_t peeri = ~0;
+
+ vlib_get_buffers (vm, from, b, n_left);
+
+ if (n_left >= 4)
+ {
+ vlib_prefetch_buffer_header (b[0], LOAD);
+ vlib_prefetch_buffer_header (b[1], LOAD);
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ vlib_prefetch_buffer_header (b[3], LOAD);
+ }
+
+ while (n_left > 8)
+ {
+ vlib_prefetch_buffer_header (b[4], LOAD);
+ vlib_prefetch_buffer_header (b[5], LOAD);
+ vlib_prefetch_buffer_header (b[6], LOAD);
+ vlib_prefetch_buffer_header (b[7], LOAD);
+
+ next[0] = (wg_post_data (b[0]))->next_index;
+ next[1] = (wg_post_data (b[1]))->next_index;
+ next[2] = (wg_post_data (b[2]))->next_index;
+ next[3] = (wg_post_data (b[3]))->next_index;
+
+ wg_calc_checksum (vm, b[0]);
+ wg_calc_checksum (vm, b[1]);
+ wg_calc_checksum (vm, b[2]);
+ wg_calc_checksum (vm, b[3]);
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ {
+ if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ wg_output_tun_post_trace_t *tr =
+ vlib_add_trace (vm, node, b[0], sizeof (*tr));
+ peeri = wg_peer_get_by_adj_index (
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
+ tr->peer = peeri;
+ tr->next_index = next[0];
+ }
+ if (b[1]->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ wg_output_tun_post_trace_t *tr =
+ vlib_add_trace (vm, node, b[1], sizeof (*tr));
+ peeri = wg_peer_get_by_adj_index (
+ vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
+ tr->next_index = next[1];
+ }
+ if (b[2]->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ wg_output_tun_post_trace_t *tr =
+ vlib_add_trace (vm, node, b[2], sizeof (*tr));
+ peeri = wg_peer_get_by_adj_index (
+ vnet_buffer (b[2])->ip.adj_index[VLIB_TX]);
+ tr->next_index = next[2];
+ }
+ if (b[3]->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ wg_output_tun_post_trace_t *tr =
+ vlib_add_trace (vm, node, b[3], sizeof (*tr));
+ peeri = wg_peer_get_by_adj_index (
+ vnet_buffer (b[3])->ip.adj_index[VLIB_TX]);
+ tr->next_index = next[3];
+ }
+ }
+
+ b += 4;
+ next += 4;
+ n_left -= 4;
+ }
+
+ while (n_left > 0)
+ {
+ wg_calc_checksum (vm, b[0]);
+
+ next[0] = (wg_post_data (b[0]))->next_index;
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ wg_output_tun_post_trace_t *tr =
+ vlib_add_trace (vm, node, b[0], sizeof (*tr));
+ peeri = wg_peer_get_by_adj_index (
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
+ tr->next_index = next[0];
+ }
+
+ b += 1;
+ next += 1;
+ n_left -= 1;
+ }
+
vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
return frame->n_vectors;
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (wg_output_tun_node) =
+VLIB_REGISTER_NODE (wg4_output_tun_post_node) = {
+ .name = "wg4-output-tun-post-node",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_output_tun_post_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .sibling_of = "wg4-output-tun",
+ .n_errors = ARRAY_LEN (wg_output_error_strings),
+ .error_strings = wg_output_error_strings,
+};
+
+VLIB_REGISTER_NODE (wg6_output_tun_post_node) = {
+ .name = "wg6-output-tun-post-node",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_output_tun_post_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .sibling_of = "wg6-output-tun",
+ .n_errors = ARRAY_LEN (wg_output_error_strings),
+ .error_strings = wg_output_error_strings,
+};
+
+VLIB_NODE_FN (wg4_output_tun_post_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return wg_output_tun_post (vm, node, from_frame);
+}
+
+VLIB_NODE_FN (wg6_output_tun_post_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return wg_output_tun_post (vm, node, from_frame);
+}
+
+VLIB_NODE_FN (wg4_output_tun_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return wg_output_tun_inline (vm, node, frame, /* is_ip4 */ 1,
+ wg_encrypt_async_next.wg4_post_next);
+}
+
+VLIB_NODE_FN (wg6_output_tun_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return wg_output_tun_inline (vm, node, frame, /* is_ip4 */ 0,
+ wg_encrypt_async_next.wg6_post_next);
+}
+
+VLIB_REGISTER_NODE (wg4_output_tun_node) =
+{
+ .name = "wg4-output-tun",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_output_tun_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (wg_output_error_strings),
+ .error_strings = wg_output_error_strings,
+ .n_next_nodes = WG_OUTPUT_N_NEXT,
+ .next_nodes = {
+ [WG_OUTPUT_NEXT_HANDOFF] = "wg4-output-tun-handoff",
+ [WG_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx",
+ [WG_OUTPUT_NEXT_ERROR] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (wg6_output_tun_node) =
{
- .name = "wg-output-tun",
+ .name = "wg6-output-tun",
.vector_size = sizeof (u32),
.format_trace = format_wg_output_tun_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
@@ -225,12 +939,11 @@ VLIB_REGISTER_NODE (wg_output_tun_node) =
.error_strings = wg_output_error_strings,
.n_next_nodes = WG_OUTPUT_N_NEXT,
.next_nodes = {
- [WG_OUTPUT_NEXT_HANDOFF] = "wg-output-tun-handoff",
+ [WG_OUTPUT_NEXT_HANDOFF] = "wg6-output-tun-handoff",
[WG_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx",
[WG_OUTPUT_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/wireguard/wireguard_peer.c b/src/plugins/wireguard/wireguard_peer.c
index f47019b110a..e71db86de0b 100644
--- a/src/plugins/wireguard/wireguard_peer.c
+++ b/src/plugins/wireguard/wireguard_peer.c
@@ -22,8 +22,8 @@
#include <wireguard/wireguard_key.h>
#include <wireguard/wireguard_send.h>
#include <wireguard/wireguard.h>
+#include <vnet/tunnel/tunnel_dp.h>
-static fib_source_t wg_fib_source;
wg_peer_t *wg_peer_pool;
index_t *wg_peer_by_adj_index;
@@ -36,48 +36,20 @@ wg_peer_endpoint_reset (wg_peer_endpoint_t * ep)
}
static void
-wg_peer_endpoint_init (wg_peer_endpoint_t * ep,
- const ip46_address_t * addr, u16 port)
+wg_peer_endpoint_init (wg_peer_endpoint_t *ep, const ip46_address_t *addr,
+ u16 port)
{
ip46_address_copy (&ep->addr, addr);
ep->port = port;
}
static void
-wg_peer_fib_flush (wg_peer_t * peer)
-{
- wg_peer_allowed_ip_t *allowed_ip;
-
- vec_foreach (allowed_ip, peer->allowed_ips)
- {
- fib_table_entry_delete_index (allowed_ip->fib_entry_index, wg_fib_source);
- allowed_ip->fib_entry_index = FIB_NODE_INDEX_INVALID;
- }
-}
-
-static void
-wg_peer_fib_populate (wg_peer_t * peer, u32 fib_index)
-{
- wg_peer_allowed_ip_t *allowed_ip;
-
- vec_foreach (allowed_ip, peer->allowed_ips)
- {
- allowed_ip->fib_entry_index =
- fib_table_entry_path_add (fib_index,
- &allowed_ip->prefix,
- wg_fib_source,
- FIB_ENTRY_FLAG_NONE,
- fib_proto_to_dpo (allowed_ip->
- prefix.fp_proto),
- &peer->dst.addr, peer->wg_sw_if_index, ~0, 1,
- NULL, FIB_ROUTE_PATH_FLAG_NONE);
- }
-}
-
-static void
wg_peer_clear (vlib_main_t * vm, wg_peer_t * peer)
{
+ index_t perri = peer - wg_peer_pool;
wg_timers_stop (peer);
+ wg_peer_update_flags (perri, WG_PEER_ESTABLISHED, false);
+ wg_peer_update_flags (perri, WG_PEER_STATUS_DEAD, true);
for (int i = 0; i < WG_N_TIMERS; i++)
{
peer->timers[i] = ~0;
@@ -91,16 +63,16 @@ wg_peer_clear (vlib_main_t * vm, wg_peer_t * peer)
wg_peer_endpoint_reset (&peer->src);
wg_peer_endpoint_reset (&peer->dst);
- if (INDEX_INVALID != peer->adj_index)
+ adj_index_t *adj_index;
+ vec_foreach (adj_index, peer->adj_indices)
{
- adj_unlock (peer->adj_index);
- wg_peer_by_adj_index[peer->adj_index] = INDEX_INVALID;
- }
- wg_peer_fib_flush (peer);
+ wg_peer_by_adj_index[*adj_index] = INDEX_INVALID;
+ if (adj_is_valid (*adj_index))
+ adj_midchain_delegate_unstack (*adj_index);
+ }
peer->input_thread_index = ~0;
peer->output_thread_index = ~0;
- peer->adj_index = INDEX_INVALID;
peer->timer_wheel = 0;
peer->persistent_keepalive_interval = 0;
peer->timer_handshake_attempts = 0;
@@ -111,107 +83,251 @@ wg_peer_clear (vlib_main_t * vm, wg_peer_t * peer)
peer->new_handshake_interval_tick = 0;
peer->rehandshake_interval_tick = 0;
peer->timer_need_another_keepalive = false;
- peer->is_dead = true;
+ peer->handshake_is_sent = false;
+ vec_free (peer->rewrite);
vec_free (peer->allowed_ips);
+ vec_free (peer->adj_indices);
}
static void
wg_peer_init (vlib_main_t * vm, wg_peer_t * peer)
{
- peer->adj_index = INDEX_INVALID;
+ peer->api_client_by_client_index = hash_create (0, sizeof (u32));
+ peer->api_clients = NULL;
wg_peer_clear (vm, peer);
}
-static u8 *
-wg_peer_build_rewrite (const wg_peer_t * peer)
-{
- // v4 only for now
- ip4_udp_header_t *hdr;
- u8 *rewrite = NULL;
-
- vec_validate (rewrite, sizeof (*hdr) - 1);
- hdr = (ip4_udp_header_t *) rewrite;
-
- hdr->ip4.ip_version_and_header_length = 0x45;
- hdr->ip4.ttl = 64;
- hdr->ip4.src_address = peer->src.addr.ip4;
- hdr->ip4.dst_address = peer->dst.addr.ip4;
- hdr->ip4.protocol = IP_PROTOCOL_UDP;
- hdr->ip4.checksum = ip4_header_checksum (&hdr->ip4);
-
- hdr->udp.src_port = clib_host_to_net_u16 (peer->src.port);
- hdr->udp.dst_port = clib_host_to_net_u16 (peer->dst.port);
- hdr->udp.checksum = 0;
-
- return (rewrite);
-}
-
static void
-wg_peer_adj_stack (wg_peer_t * peer)
+wg_peer_adj_stack (wg_peer_t *peer, adj_index_t ai)
{
ip_adjacency_t *adj;
u32 sw_if_index;
wg_if_t *wgi;
+ fib_protocol_t fib_proto;
- adj = adj_get (peer->adj_index);
+ if (!adj_is_valid (ai))
+ return;
+
+ adj = adj_get (ai);
sw_if_index = adj->rewrite_header.sw_if_index;
+ u8 is_ip4 = ip46_address_is_ip4 (&peer->src.addr);
+ fib_proto = is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
wgi = wg_if_get (wg_if_find_by_sw_if_index (sw_if_index));
if (!wgi)
return;
- if (!vnet_sw_interface_is_admin_up (vnet_get_main (), wgi->sw_if_index))
+ if (!vnet_sw_interface_is_admin_up (vnet_get_main (), wgi->sw_if_index) ||
+ !wg_peer_can_send (peer))
{
- adj_midchain_delegate_unstack (peer->adj_index);
+ adj_midchain_delegate_unstack (ai);
}
else
{
- /* *INDENT-OFF* */
fib_prefix_t dst = {
- .fp_len = 32,
- .fp_proto = FIB_PROTOCOL_IP4,
- .fp_addr = peer->dst.addr,
+ .fp_len = is_ip4 ? 32 : 128,
+ .fp_proto = fib_proto,
+ .fp_addr = peer->dst.addr,
};
- /* *INDENT-ON* */
u32 fib_index;
- fib_index = fib_table_find (FIB_PROTOCOL_IP4, peer->table_id);
+ fib_index = fib_table_find (fib_proto, peer->table_id);
+
+ adj_midchain_delegate_stack (ai, fib_index, &dst);
+ }
+}
+
+static void
+wg_peer_adj_reset_stacking (adj_index_t ai)
+{
+ adj_midchain_delegate_remove (ai);
+}
+
+static void
+wg_peer_66_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b,
+ const void *data)
+{
+ u8 iph_offset = 0;
+ ip6_header_t *ip6_out;
+ ip6_header_t *ip6_in;
+
+ /* Must set locally originated otherwise we're not allowed to
+ fragment the packet later */
+ b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
+ ip6_out = vlib_buffer_get_current (b);
+ iph_offset = vnet_buffer (b)->ip.save_rewrite_length;
+ ip6_in = vlib_buffer_get_current (b) + iph_offset;
+
+ ip6_out->ip_version_traffic_class_and_flow_label =
+ ip6_in->ip_version_traffic_class_and_flow_label;
+}
+
+static void
+wg_peer_46_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b,
+ const void *data)
+{
+ u8 iph_offset = 0;
+ ip6_header_t *ip6_out;
+ ip4_header_t *ip4_in;
+
+ /* Must set locally originated otherwise we're not allowed to
+ fragment the packet later */
+ b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
+ ip6_out = vlib_buffer_get_current (b);
+ iph_offset = vnet_buffer (b)->ip.save_rewrite_length;
+ ip4_in = vlib_buffer_get_current (b) + iph_offset;
+
+ u32 vtcfl = 0x6 << 28;
+ vtcfl |= ip4_in->tos << 20;
+ vtcfl |= vnet_buffer (b)->ip.flow_hash & 0x000fffff;
- adj_midchain_delegate_stack (peer->adj_index, fib_index, &dst);
+ ip6_out->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (vtcfl);
+}
+
+static adj_midchain_fixup_t
+wg_peer_get_fixup (wg_peer_t *peer, vnet_link_t lt)
+{
+ if (!ip46_address_is_ip4 (&peer->dst.addr))
+ {
+ if (lt == VNET_LINK_IP4)
+ return (wg_peer_46_fixup);
+ if (lt == VNET_LINK_IP6)
+ return (wg_peer_66_fixup);
}
+ return (NULL);
+}
+
+static void
+wg_peer_disable (vlib_main_t *vm, wg_peer_t *peer)
+{
+ index_t peeri = peer - wg_peer_pool;
+
+ wg_timers_stop (peer);
+ wg_peer_update_flags (peeri, WG_PEER_ESTABLISHED, false);
+
+ for (int i = 0; i < WG_N_TIMERS; i++)
+ {
+ peer->timers[i] = ~0;
+ peer->timers_dispatched[i] = 0;
+ }
+ peer->timer_handshake_attempts = 0;
+
+ peer->last_sent_handshake = vlib_time_now (vm) - (REKEY_TIMEOUT + 1);
+ peer->last_sent_packet = 0;
+ peer->last_received_packet = 0;
+ peer->session_derived = 0;
+ peer->rehandshake_started = 0;
+
+ peer->new_handshake_interval_tick = 0;
+ peer->rehandshake_interval_tick = 0;
+
+ peer->timer_need_another_keepalive = false;
+
+ noise_remote_clear (vm, &peer->remote);
+}
+
+static void
+wg_peer_enable (vlib_main_t *vm, wg_peer_t *peer)
+{
+ index_t peeri = peer - wg_peer_pool;
+ wg_if_t *wg_if;
+ u8 public_key[NOISE_PUBLIC_KEY_LEN];
+
+ wg_if = wg_if_get (wg_if_find_by_sw_if_index (peer->wg_sw_if_index));
+ clib_memcpy (public_key, peer->remote.r_public, NOISE_PUBLIC_KEY_LEN);
+
+ noise_remote_init (vm, &peer->remote, peeri, public_key, wg_if->local_idx);
+
+ wg_timers_send_first_handshake (peer);
}
walk_rc_t
-wg_peer_if_admin_state_change (wg_if_t * wgi, index_t peeri, void *data)
+wg_peer_if_admin_state_change (index_t peeri, void *data)
{
- wg_peer_adj_stack (wg_peer_get (peeri));
+ wg_peer_t *peer;
+ adj_index_t *adj_index;
+ vlib_main_t *vm = vlib_get_main ();
+
+ peer = wg_peer_get (peeri);
+ vec_foreach (adj_index, peer->adj_indices)
+ {
+ wg_peer_adj_stack (peer, *adj_index);
+ }
+
+ if (vnet_sw_interface_is_admin_up (vnet_get_main (), peer->wg_sw_if_index))
+ {
+ wg_peer_enable (vm, peer);
+ }
+ else
+ {
+ wg_peer_disable (vm, peer);
+ }
return (WALK_CONTINUE);
}
walk_rc_t
-wg_peer_if_table_change (wg_if_t * wgi, index_t peeri, void *data)
+wg_peer_if_adj_change (index_t peeri, void *data)
{
- wg_peer_table_bind_ctx_t *ctx = data;
+ adj_index_t *adj_index = data;
+ adj_midchain_fixup_t fixup;
+ ip_adjacency_t *adj;
wg_peer_t *peer;
+ fib_prefix_t *allowed_ip;
+
+ adj = adj_get (*adj_index);
peer = wg_peer_get (peeri);
+ vec_foreach (allowed_ip, peer->allowed_ips)
+ {
+ if (fib_prefix_is_cover_addr_46 (allowed_ip,
+ &adj->sub_type.nbr.next_hop))
+ {
+ vec_add1 (peer->adj_indices, *adj_index);
+
+ vec_validate_init_empty (wg_peer_by_adj_index, *adj_index,
+ INDEX_INVALID);
+ wg_peer_by_adj_index[*adj_index] = peeri;
+
+ fixup = wg_peer_get_fixup (peer, adj_get_link_type (*adj_index));
+ adj_nbr_midchain_update_rewrite (*adj_index, fixup, NULL,
+ ADJ_FLAG_MIDCHAIN_IP_STACK,
+ vec_dup (peer->rewrite));
+
+ wg_peer_adj_stack (peer, *adj_index);
+ return (WALK_STOP);
+ }
+ }
- wg_peer_fib_flush (peer);
- wg_peer_fib_populate (peer, ctx->new_fib_index);
+ return (WALK_CONTINUE);
+}
+adj_walk_rc_t
+wg_peer_adj_walk (adj_index_t ai, void *data)
+{
+ return wg_peer_if_adj_change ((*(index_t *) (data)), &ai) == WALK_CONTINUE ?
+ ADJ_WALK_RC_CONTINUE :
+ ADJ_WALK_RC_STOP;
+}
+
+walk_rc_t
+wg_peer_if_delete (index_t peeri, void *data)
+{
+ wg_peer_remove (peeri);
return (WALK_CONTINUE);
}
static int
-wg_peer_fill (vlib_main_t * vm, wg_peer_t * peer,
- u32 table_id,
- const ip46_address_t * dst,
- u16 port,
+wg_peer_fill (vlib_main_t *vm, wg_peer_t *peer, u32 table_id,
+ const ip46_address_t *dst, u16 port,
u16 persistent_keepalive_interval,
- const fib_prefix_t * allowed_ips, u32 wg_sw_if_index)
+ const fib_prefix_t *allowed_ips, u32 wg_sw_if_index)
{
+ index_t perri = peer - wg_peer_pool;
wg_peer_endpoint_init (&peer->dst, dst, port);
peer->table_id = table_id;
@@ -219,7 +335,7 @@ wg_peer_fill (vlib_main_t * vm, wg_peer_t * peer,
peer->timer_wheel = &wg_main.timer_wheel;
peer->persistent_keepalive_interval = persistent_keepalive_interval;
peer->last_sent_handshake = vlib_time_now (vm) - (REKEY_TIMEOUT + 1);
- peer->is_dead = false;
+ wg_peer_update_flags (perri, WG_PEER_STATUS_DEAD, false);
const wg_if_t *wgi = wg_if_get (wg_if_find_by_sw_if_index (wg_sw_if_index));
@@ -229,53 +345,102 @@ wg_peer_fill (vlib_main_t * vm, wg_peer_t * peer,
ip_address_to_46 (&wgi->src_ip, &peer->src.addr);
peer->src.port = wgi->port;
- /*
- * and an adjacency for the endpoint address in the overlay
- * on the wg interface
- */
- peer->rewrite = wg_peer_build_rewrite (peer);
-
- peer->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
- VNET_LINK_IP4,
- &peer->dst.addr, wgi->sw_if_index);
-
- vec_validate_init_empty (wg_peer_by_adj_index,
- peer->adj_index, INDEX_INVALID);
- wg_peer_by_adj_index[peer->adj_index] = peer - wg_peer_pool;
-
- adj_nbr_midchain_update_rewrite (peer->adj_index,
- NULL,
- NULL,
- ADJ_FLAG_MIDCHAIN_IP_STACK,
- vec_dup (peer->rewrite));
- wg_peer_adj_stack (peer);
-
- /*
- * add a route in the overlay to each of the allowed-ips
- */
- u32 ii;
+ u8 is_ip4 = ip46_address_is_ip4 (&peer->dst.addr);
+ peer->rewrite = wg_build_rewrite (&peer->src.addr, peer->src.port,
+ &peer->dst.addr, peer->dst.port, is_ip4);
+ u32 ii;
vec_validate (peer->allowed_ips, vec_len (allowed_ips) - 1);
-
vec_foreach_index (ii, allowed_ips)
{
- peer->allowed_ips[ii].prefix = allowed_ips[ii];
+ peer->allowed_ips[ii] = allowed_ips[ii];
}
- wg_peer_fib_populate (peer,
- fib_table_get_index_for_sw_if_index
- (FIB_PROTOCOL_IP4, peer->wg_sw_if_index));
-
+ fib_protocol_t proto;
+ FOR_EACH_FIB_IP_PROTOCOL (proto)
+ {
+ adj_nbr_walk (wg_sw_if_index, proto, wg_peer_adj_walk, &perri);
+ }
return (0);
}
+void
+wg_peer_update_flags (index_t peeri, wg_peer_flags flag, bool add_del)
+{
+ wg_peer_t *peer = wg_peer_get (peeri);
+ if ((add_del && (peer->flags & flag)) || (!add_del && !(peer->flags & flag)))
+ {
+ return;
+ }
+
+ peer->flags ^= flag;
+ wg_api_peer_event (peeri, peer->flags);
+}
+
+void
+wg_peer_update_endpoint (index_t peeri, const ip46_address_t *addr, u16 port)
+{
+ wg_peer_t *peer = wg_peer_get (peeri);
+
+ if (ip46_address_is_equal (&peer->dst.addr, addr) && peer->dst.port == port)
+ return;
+
+ wg_peer_endpoint_init (&peer->dst, addr, port);
+
+ u8 is_ip4 = ip46_address_is_ip4 (&peer->dst.addr);
+ vec_free (peer->rewrite);
+ peer->rewrite = wg_build_rewrite (&peer->src.addr, peer->src.port,
+ &peer->dst.addr, peer->dst.port, is_ip4);
+
+ adj_index_t *adj_index;
+ vec_foreach (adj_index, peer->adj_indices)
+ {
+ if (adj_is_valid (*adj_index))
+ {
+ adj_midchain_fixup_t fixup =
+ wg_peer_get_fixup (peer, adj_get_link_type (*adj_index));
+ adj_nbr_midchain_update_rewrite (*adj_index, fixup, NULL,
+ ADJ_FLAG_MIDCHAIN_IP_STACK,
+ vec_dup (peer->rewrite));
+
+ wg_peer_adj_reset_stacking (*adj_index);
+ wg_peer_adj_stack (peer, *adj_index);
+ }
+ }
+}
+
+typedef struct wg_peer_upd_ep_args_t_
+{
+ index_t peeri;
+ ip46_address_t addr;
+ u16 port;
+} wg_peer_upd_ep_args_t;
+
+static void
+wg_peer_update_endpoint_thread_fn (wg_peer_upd_ep_args_t *args)
+{
+ wg_peer_update_endpoint (args->peeri, &args->addr, args->port);
+}
+
+void
+wg_peer_update_endpoint_from_mt (index_t peeri, const ip46_address_t *addr,
+ u16 port)
+{
+ wg_peer_upd_ep_args_t args = {
+ .peeri = peeri,
+ .port = port,
+ };
+
+ ip46_address_copy (&args.addr, addr);
+ vlib_rpc_call_main_thread (wg_peer_update_endpoint_thread_fn, (u8 *) &args,
+ sizeof (args));
+}
+
int
-wg_peer_add (u32 tun_sw_if_index,
- const u8 public_key[NOISE_PUBLIC_KEY_LEN],
- u32 table_id,
- const ip46_address_t * endpoint,
- const fib_prefix_t * allowed_ips,
- u16 port, u16 persistent_keepalive, u32 * peer_index)
+wg_peer_add (u32 tun_sw_if_index, const u8 public_key[NOISE_PUBLIC_KEY_LEN],
+ u32 table_id, const ip46_address_t *endpoint,
+ const fib_prefix_t *allowed_ips, u16 port,
+ u16 persistent_keepalive, u32 *peer_index)
{
wg_if_t *wg_if;
wg_peer_t *peer;
@@ -290,7 +455,6 @@ wg_peer_add (u32 tun_sw_if_index,
if (!wg_if)
return (VNET_API_ERROR_INVALID_SW_IF_INDEX);
- /* *INDENT-OFF* */
pool_foreach (peer, wg_peer_pool)
{
if (!memcmp (peer->remote.r_public, public_key, NOISE_PUBLIC_KEY_LEN))
@@ -298,12 +462,11 @@ wg_peer_add (u32 tun_sw_if_index,
return (VNET_API_ERROR_ENTRY_ALREADY_EXISTS);
}
}
- /* *INDENT-ON* */
if (pool_elts (wg_peer_pool) > MAX_PEERS)
return (VNET_API_ERROR_LIMIT_EXCEEDED);
- pool_get (wg_peer_pool, peer);
+ pool_get_zero (wg_peer_pool, peer);
wg_peer_init (vm, peer);
@@ -317,13 +480,13 @@ wg_peer_add (u32 tun_sw_if_index,
return (rv);
}
- noise_remote_init (&peer->remote, peer - wg_peer_pool, public_key,
+ noise_remote_init (vm, &peer->remote, peer - wg_peer_pool, public_key,
wg_if->local_idx);
cookie_maker_init (&peer->cookie_maker, public_key);
- if (peer->persistent_keepalive_interval != 0)
+ if (vnet_sw_interface_is_admin_up (vnet_get_main (), tun_sw_if_index))
{
- wg_send_keepalive (vm, peer);
+ wg_timers_send_first_handshake (peer);
}
*peer_index = peer - wg_peer_pool;
@@ -347,9 +510,6 @@ wg_peer_remove (index_t peeri)
wgi = wg_if_get (wg_if_find_by_sw_if_index (peer->wg_sw_if_index));
wg_if_peer_remove (wgi, peeri);
- vnet_feature_enable_disable ("ip4-output", "wg-output-tun",
- peer->wg_sw_if_index, 0, 0, 0);
-
noise_remote_clear (wmp->vlib_main, &peer->remote);
wg_peer_clear (wmp->vlib_main, peer);
pool_put (wg_peer_pool, peer);
@@ -362,13 +522,11 @@ wg_peer_walk (wg_peer_walk_cb_t fn, void *data)
{
index_t peeri;
- /* *INDENT-OFF* */
pool_foreach_index (peeri, wg_peer_pool)
{
if (WALK_STOP == fn(peeri, data))
return peeri;
}
- /* *INDENT-ON* */
return INDEX_INVALID;
}
@@ -377,8 +535,8 @@ format_wg_peer_endpoint (u8 * s, va_list * args)
{
wg_peer_endpoint_t *ep = va_arg (*args, wg_peer_endpoint_t *);
- s = format (s, "%U:%d",
- format_ip46_address, &ep->addr, IP46_TYPE_ANY, ep->port);
+ s = format (s, "%U:%d", format_ip46_address, &ep->addr, IP46_TYPE_ANY,
+ ep->port);
return (s);
}
@@ -387,48 +545,37 @@ u8 *
format_wg_peer (u8 * s, va_list * va)
{
index_t peeri = va_arg (*va, index_t);
- wg_peer_allowed_ip_t *allowed_ip;
+ fib_prefix_t *allowed_ip;
+ adj_index_t *adj_index;
u8 key[NOISE_KEY_LEN_BASE64];
wg_peer_t *peer;
peer = wg_peer_get (peeri);
key_to_base64 (peer->remote.r_public, NOISE_PUBLIC_KEY_LEN, key);
- s = format (s, "[%d] endpoint:[%U->%U] %U keep-alive:%d adj:%d",
- peeri,
- format_wg_peer_endpoint, &peer->src,
- format_wg_peer_endpoint, &peer->dst,
- format_vnet_sw_if_index_name, vnet_get_main (),
- peer->wg_sw_if_index,
- peer->persistent_keepalive_interval, peer->adj_index);
- s = format (s, "\n key:%=s %U",
- key, format_hex_bytes, peer->remote.r_public,
- NOISE_PUBLIC_KEY_LEN);
+ s = format (
+ s,
+ "[%d] endpoint:[%U->%U] %U keep-alive:%d flags: %d, api-clients count: %d",
+ peeri, format_wg_peer_endpoint, &peer->src, format_wg_peer_endpoint,
+ &peer->dst, format_vnet_sw_if_index_name, vnet_get_main (),
+ peer->wg_sw_if_index, peer->persistent_keepalive_interval, peer->flags,
+ pool_elts (peer->api_clients));
+ s = format (s, "\n adj:");
+ vec_foreach (adj_index, peer->adj_indices)
+ {
+ s = format (s, " %d", *adj_index);
+ }
+ s = format (s, "\n key:%=s %U", key, format_hex_bytes,
+ peer->remote.r_public, NOISE_PUBLIC_KEY_LEN);
s = format (s, "\n allowed-ips:");
vec_foreach (allowed_ip, peer->allowed_ips)
{
- s = format (s, " %U", format_fib_prefix, &allowed_ip->prefix);
+ s = format (s, " %U", format_fib_prefix, allowed_ip);
}
return s;
}
-static clib_error_t *
-wg_peer_module_init (vlib_main_t * vm)
-{
- /*
- * use a priority better than interface source, so that
- * if the same subnet is added to the wg interface and is
- * used as an allowed IP, then the wireguard soueced prefix
- * wins and traffic is routed to the endpoint rather than dropped
- */
- wg_fib_source = fib_source_allocate ("wireguard", 0x2, FIB_SOURCE_BH_API);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (wg_peer_module_init);
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/wireguard/wireguard_peer.h b/src/plugins/wireguard/wireguard_peer.h
index b60c669ac0f..613c2640ad1 100644
--- a/src/plugins/wireguard/wireguard_peer.h
+++ b/src/plugins/wireguard/wireguard_peer.h
@@ -17,6 +17,8 @@
#ifndef __included_wg_peer_h__
#define __included_wg_peer_h__
+#include <vlibapi/api_helper_macros.h>
+
#include <vnet/ip/ip.h>
#include <wireguard/wireguard_cookie.h>
@@ -31,13 +33,28 @@ typedef struct ip4_udp_header_t_
udp_header_t udp;
} __clib_packed ip4_udp_header_t;
-u8 *format_ip4_udp_header (u8 * s, va_list * va);
+typedef struct ip4_udp_wg_header_t_
+{
+ ip4_header_t ip4;
+ udp_header_t udp;
+ message_data_t wg;
+} __clib_packed ip4_udp_wg_header_t;
+
+typedef struct ip6_udp_header_t_
+{
+ ip6_header_t ip6;
+ udp_header_t udp;
+} __clib_packed ip6_udp_header_t;
-typedef struct wg_peer_allowed_ip_t_
+typedef struct ip6_udp_wg_header_t_
{
- fib_prefix_t prefix;
- fib_node_index_t fib_entry_index;
-} wg_peer_allowed_ip_t;
+ ip6_header_t ip6;
+ udp_header_t udp;
+ message_data_t wg;
+} __clib_packed ip6_udp_wg_header_t;
+
+u8 *format_ip4_udp_header (u8 * s, va_list * va);
+u8 *format_ip6_udp_header (u8 *s, va_list *va);
typedef struct wg_peer_endpoint_t_
{
@@ -45,6 +62,12 @@ typedef struct wg_peer_endpoint_t_
u16 port;
} wg_peer_endpoint_t;
+typedef enum
+{
+ WG_PEER_STATUS_DEAD = 0x1,
+ WG_PEER_ESTABLISHED = 0x2,
+} wg_peer_flags;
+
typedef struct wg_peer
{
noise_remote_t remote;
@@ -57,17 +80,22 @@ typedef struct wg_peer
wg_peer_endpoint_t dst;
wg_peer_endpoint_t src;
u32 table_id;
- adj_index_t adj_index;
+ adj_index_t *adj_indices;
/* rewrite built from address information */
u8 *rewrite;
/* Vector of allowed-ips */
- wg_peer_allowed_ip_t *allowed_ips;
+ fib_prefix_t *allowed_ips;
/* The WG interface this peer is attached to */
u32 wg_sw_if_index;
+ /* API client registered for events */
+ vpe_client_registration_t *api_clients;
+ uword *api_client_by_client_index;
+ wg_peer_flags flags;
+
/* Timers */
tw_timer_wheel_16t_2w_512sl_t *timer_wheel;
u32 timers[WG_N_TIMERS];
@@ -88,7 +116,8 @@ typedef struct wg_peer
bool timer_need_another_keepalive;
- bool is_dead;
+ /* Handshake is sent to main thread? */
+ bool handshake_is_sent;
} wg_peer_t;
typedef struct wg_peer_table_bind_ctx_t_
@@ -111,9 +140,23 @@ index_t wg_peer_walk (wg_peer_walk_cb_t fn, void *data);
u8 *format_wg_peer (u8 * s, va_list * va);
-walk_rc_t wg_peer_if_admin_state_change (wg_if_t * wgi, index_t peeri,
- void *data);
-walk_rc_t wg_peer_if_table_change (wg_if_t * wgi, index_t peeri, void *data);
+walk_rc_t wg_peer_if_admin_state_change (index_t peeri, void *data);
+walk_rc_t wg_peer_if_delete (index_t peeri, void *data);
+walk_rc_t wg_peer_if_adj_change (index_t peeri, void *data);
+adj_walk_rc_t wg_peer_adj_walk (adj_index_t ai, void *data);
+
+void wg_api_peer_event (index_t peeri, wg_peer_flags flags);
+void wg_peer_update_flags (index_t peeri, wg_peer_flags flag, bool add_del);
+void wg_peer_update_endpoint (index_t peeri, const ip46_address_t *addr,
+ u16 port);
+void wg_peer_update_endpoint_from_mt (index_t peeri,
+ const ip46_address_t *addr, u16 port);
+
+static inline bool
+wg_peer_is_dead (wg_peer_t *peer)
+{
+ return peer && peer->flags & WG_PEER_STATUS_DEAD;
+}
/*
* Expoed for the data-plane
@@ -130,6 +173,8 @@ wg_peer_get (index_t peeri)
static inline index_t
wg_peer_get_by_adj_index (index_t ai)
{
+ if (ai >= vec_len (wg_peer_by_adj_index))
+ return INDEX_INVALID;
return (wg_peer_by_adj_index[ai]);
}
@@ -145,6 +190,29 @@ wg_peer_assign_thread (u32 thread_id)
1) : thread_id));
}
+static_always_inline bool
+fib_prefix_is_cover_addr_46 (const fib_prefix_t *p1, const ip46_address_t *ip)
+{
+ switch (p1->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_destination_matches_route (&ip4_main, &p1->fp_addr.ip4,
+ &ip->ip4, p1->fp_len) != 0);
+ case FIB_PROTOCOL_IP6:
+ return (ip6_destination_matches_route (&ip6_main, &p1->fp_addr.ip6,
+ &ip->ip6, p1->fp_len) != 0);
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ return (false);
+}
+
+static inline bool
+wg_peer_can_send (wg_peer_t *peer)
+{
+ return peer && peer->rewrite;
+}
+
#endif // __included_wg_peer_h__
/*
diff --git a/src/plugins/wireguard/wireguard_send.c b/src/plugins/wireguard/wireguard_send.c
index f492e05c175..41b2e7706a1 100755..100644
--- a/src/plugins/wireguard/wireguard_send.c
+++ b/src/plugins/wireguard/wireguard_send.c
@@ -22,11 +22,11 @@
#include <wireguard/wireguard_send.h>
static int
-ip46_enqueue_packet (vlib_main_t * vm, u32 bi0, int is_ip6)
+ip46_enqueue_packet (vlib_main_t *vm, u32 bi0, int is_ip4)
{
vlib_frame_t *f = 0;
u32 lookup_node_index =
- is_ip6 ? ip6_lookup_node.index : ip4_lookup_node.index;
+ is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
f = vlib_get_frame_to_node (vm, lookup_node_index);
/* f can not be NULL here - frame allocation failure causes panic */
@@ -41,25 +41,51 @@ ip46_enqueue_packet (vlib_main_t * vm, u32 bi0, int is_ip6)
}
static void
-wg_buffer_prepend_rewrite (vlib_buffer_t * b0, const wg_peer_t * peer)
+wg_buffer_prepend_rewrite (vlib_main_t *vm, vlib_buffer_t *b0,
+ const u8 *rewrite, u8 is_ip4)
{
- ip4_udp_header_t *hdr;
+ if (is_ip4)
+ {
+ ip4_udp_header_t *hdr4;
+
+ vlib_buffer_advance (b0, -sizeof (*hdr4));
+
+ hdr4 = vlib_buffer_get_current (b0);
+
+ /* copy only ip4 and udp header; wireguard header not needed */
+ clib_memcpy (hdr4, rewrite, sizeof (ip4_udp_header_t));
+
+ hdr4->udp.length =
+ clib_host_to_net_u16 (b0->current_length - sizeof (ip4_header_t));
+ ip4_header_set_len_w_chksum (&hdr4->ip4,
+ clib_host_to_net_u16 (b0->current_length));
+ }
+ else
+ {
+ ip6_udp_header_t *hdr6;
+
+ vlib_buffer_advance (b0, -sizeof (*hdr6));
- vlib_buffer_advance (b0, -sizeof (*hdr));
+ hdr6 = vlib_buffer_get_current (b0);
- hdr = vlib_buffer_get_current (b0);
- clib_memcpy (hdr, peer->rewrite, vec_len (peer->rewrite));
+ /* copy only ip6 and udp header; wireguard header not needed */
+ clib_memcpy (hdr6, rewrite, sizeof (ip6_udp_header_t));
- hdr->udp.length =
- clib_host_to_net_u16 (b0->current_length - sizeof (ip4_header_t));
- ip4_header_set_len_w_chksum (&hdr->ip4,
- clib_host_to_net_u16 (b0->current_length));
+ hdr6->ip6.payload_length = hdr6->udp.length =
+ clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t));
+
+ /* IPv6 UDP checksum is mandatory */
+ int bogus = 0;
+ ip6_header_t *ip6_0 = &(hdr6->ip6);
+ hdr6->udp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6_0, &bogus);
+ ASSERT (bogus == 0);
+ }
}
static bool
-wg_create_buffer (vlib_main_t * vm,
- const wg_peer_t * peer,
- const u8 * packet, u32 packet_len, u32 * bi)
+wg_create_buffer (vlib_main_t *vm, const u8 *rewrite, const u8 *packet,
+ u32 packet_len, u32 *bi, u8 is_ip4)
{
u32 n_buf0 = 0;
vlib_buffer_t *b0;
@@ -75,23 +101,75 @@ wg_create_buffer (vlib_main_t * vm,
b0->current_length = packet_len;
- wg_buffer_prepend_rewrite (b0, peer);
+ wg_buffer_prepend_rewrite (vm, b0, rewrite, is_ip4);
return true;
}
+u8 *
+wg_build_rewrite (ip46_address_t *src_addr, u16 src_port,
+ ip46_address_t *dst_addr, u16 dst_port, u8 is_ip4)
+{
+ if (ip46_address_is_zero (dst_addr) || 0 == dst_port)
+ return NULL;
+
+ u8 *rewrite = NULL;
+ if (is_ip4)
+ {
+ ip4_udp_header_t *hdr;
+
+ /* reserve space for ip4, udp and wireguard headers */
+ vec_validate (rewrite, sizeof (ip4_udp_wg_header_t) - 1);
+ hdr = (ip4_udp_header_t *) rewrite;
+
+ hdr->ip4.ip_version_and_header_length = 0x45;
+ hdr->ip4.ttl = 64;
+ hdr->ip4.src_address = src_addr->ip4;
+ hdr->ip4.dst_address = dst_addr->ip4;
+ hdr->ip4.protocol = IP_PROTOCOL_UDP;
+ hdr->ip4.checksum = ip4_header_checksum (&hdr->ip4);
+
+ hdr->udp.src_port = clib_host_to_net_u16 (src_port);
+ hdr->udp.dst_port = clib_host_to_net_u16 (dst_port);
+ hdr->udp.checksum = 0;
+ }
+ else
+ {
+ ip6_udp_header_t *hdr;
+
+ /* reserve space for ip6, udp and wireguard headers */
+ vec_validate (rewrite, sizeof (ip6_udp_wg_header_t) - 1);
+ hdr = (ip6_udp_header_t *) rewrite;
+
+ hdr->ip6.ip_version_traffic_class_and_flow_label = 0x60;
+ ip6_address_copy (&hdr->ip6.src_address, &src_addr->ip6);
+ ip6_address_copy (&hdr->ip6.dst_address, &dst_addr->ip6);
+ hdr->ip6.protocol = IP_PROTOCOL_UDP;
+ hdr->ip6.hop_limit = 64;
+
+ hdr->udp.src_port = clib_host_to_net_u16 (src_port);
+ hdr->udp.dst_port = clib_host_to_net_u16 (dst_port);
+ hdr->udp.checksum = 0;
+ }
+
+ return (rewrite);
+}
+
bool
wg_send_handshake (vlib_main_t * vm, wg_peer_t * peer, bool is_retry)
{
ASSERT (vm->thread_index == 0);
+ if (!wg_peer_can_send (peer))
+ return false;
+
message_handshake_initiation_t packet;
if (!is_retry)
peer->timer_handshake_attempts = 0;
- if (!wg_birthdate_has_expired (peer->last_sent_handshake,
- REKEY_TIMEOUT) || peer->is_dead)
+ if (!wg_birthdate_has_expired (peer->last_sent_handshake, REKEY_TIMEOUT) ||
+ wg_peer_is_dead (peer))
return true;
if (noise_create_initiation (vm,
@@ -113,11 +191,13 @@ wg_send_handshake (vlib_main_t * vm, wg_peer_t * peer, bool is_retry)
else
return false;
+ u8 is_ip4 = ip46_address_is_ip4 (&peer->dst.addr);
u32 bi0 = 0;
- if (!wg_create_buffer (vm, peer, (u8 *) & packet, sizeof (packet), &bi0))
+ if (!wg_create_buffer (vm, peer->rewrite, (u8 *) &packet, sizeof (packet),
+ &bi0, is_ip4))
return false;
- ip46_enqueue_packet (vm, bi0, false);
+ ip46_enqueue_packet (vm, bi0, is_ip4);
return true;
}
@@ -134,8 +214,11 @@ wg_send_handshake_thread_fn (void *arg)
wg_main_t *wmp = &wg_main;
wg_peer_t *peer = wg_peer_get (a->peer_idx);
+ bool handshake;
wg_send_handshake (wmp->vlib_main, peer, a->is_retry);
+ handshake = false;
+ __atomic_store_n (&peer->handshake_is_sent, handshake, __ATOMIC_RELEASE);
return 0;
}
@@ -147,8 +230,18 @@ wg_send_handshake_from_mt (u32 peer_idx, bool is_retry)
.is_retry = is_retry,
};
- vl_api_rpc_call_main_thread (wg_send_handshake_thread_fn,
- (u8 *) & a, sizeof (a));
+ wg_peer_t *peer = wg_peer_get (peer_idx);
+
+ bool handshake =
+ __atomic_load_n (&peer->handshake_is_sent, __ATOMIC_ACQUIRE);
+
+ if (handshake == false)
+ {
+ handshake = true;
+ __atomic_store_n (&peer->handshake_is_sent, handshake, __ATOMIC_RELEASE);
+ vl_api_rpc_call_main_thread (wg_send_handshake_thread_fn, (u8 *) &a,
+ sizeof (a));
+ }
}
bool
@@ -156,6 +249,9 @@ wg_send_keepalive (vlib_main_t * vm, wg_peer_t * peer)
{
ASSERT (vm->thread_index == 0);
+ if (!wg_peer_can_send (peer))
+ return false;
+
u32 size_of_packet = message_data_len (0);
message_data_t *packet =
(message_data_t *) wg_main.per_thread_data[vm->thread_index].data;
@@ -181,19 +277,22 @@ wg_send_keepalive (vlib_main_t * vm, wg_peer_t * peer)
}
else if (PREDICT_FALSE (state == SC_FAILED))
{
+ wg_peer_update_flags (peer - wg_peer_pool, WG_PEER_ESTABLISHED, false);
ret = false;
goto out;
}
+ u8 is_ip4 = ip46_address_is_ip4 (&peer->dst.addr);
packet->header.type = MESSAGE_DATA;
- if (!wg_create_buffer (vm, peer, (u8 *) packet, size_of_packet, &bi0))
+ if (!wg_create_buffer (vm, peer->rewrite, (u8 *) packet, size_of_packet,
+ &bi0, is_ip4))
{
ret = false;
goto out;
}
- ip46_enqueue_packet (vm, bi0, false);
+ ip46_enqueue_packet (vm, bi0, is_ip4);
wg_timers_any_authenticated_packet_sent (peer);
wg_timers_any_authenticated_packet_traversal (peer);
@@ -207,6 +306,9 @@ wg_send_handshake_response (vlib_main_t * vm, wg_peer_t * peer)
{
message_handshake_response_t packet;
+ if (!wg_peer_can_send (peer))
+ return false;
+
if (noise_create_response (vm,
&peer->remote,
&packet.sender_index,
@@ -223,20 +325,52 @@ wg_send_handshake_response (vlib_main_t * vm, wg_peer_t * peer)
wg_timers_session_derived (peer);
wg_timers_any_authenticated_packet_sent (peer);
wg_timers_any_authenticated_packet_traversal (peer);
- peer->last_sent_handshake = vlib_time_now (vm);
u32 bi0 = 0;
- if (!wg_create_buffer (vm, peer, (u8 *) & packet,
- sizeof (packet), &bi0))
+ u8 is_ip4 = ip46_address_is_ip4 (&peer->dst.addr);
+ if (!wg_create_buffer (vm, peer->rewrite, (u8 *) &packet,
+ sizeof (packet), &bi0, is_ip4))
return false;
- ip46_enqueue_packet (vm, bi0, false);
+ ip46_enqueue_packet (vm, bi0, is_ip4);
+ return true;
}
- else
- return false;
+ return false;
}
- else
+ return false;
+}
+
+bool
+wg_send_handshake_cookie (vlib_main_t *vm, u32 sender_index,
+ cookie_checker_t *cookie_checker,
+ message_macs_t *macs, ip46_address_t *wg_if_addr,
+ u16 wg_if_port, ip46_address_t *remote_addr,
+ u16 remote_port)
+{
+ message_handshake_cookie_t packet;
+ u8 *rewrite;
+
+ packet.header.type = MESSAGE_HANDSHAKE_COOKIE;
+ packet.receiver_index = sender_index;
+
+ cookie_checker_create_payload (vm, cookie_checker, macs, packet.nonce,
+ packet.encrypted_cookie, remote_addr,
+ remote_port);
+
+ u32 bi0 = 0;
+ u8 is_ip4 = ip46_address_is_ip4 (remote_addr);
+ bool ret;
+ rewrite = wg_build_rewrite (wg_if_addr, wg_if_port, remote_addr, remote_port,
+ is_ip4);
+
+ ret = wg_create_buffer (vm, rewrite, (u8 *) &packet, sizeof (packet), &bi0,
+ is_ip4);
+ vec_free (rewrite);
+ if (!ret)
return false;
+
+ ip46_enqueue_packet (vm, bi0, is_ip4);
+
return true;
}
diff --git a/src/plugins/wireguard/wireguard_send.h b/src/plugins/wireguard/wireguard_send.h
index 9575b84b659..419783a5db2 100755..100644
--- a/src/plugins/wireguard/wireguard_send.h
+++ b/src/plugins/wireguard/wireguard_send.h
@@ -19,10 +19,17 @@
#include <wireguard/wireguard_peer.h>
+u8 *wg_build_rewrite (ip46_address_t *src_addr, u16 src_port,
+ ip46_address_t *dst_addr, u16 dst_port, u8 is_ip4);
bool wg_send_keepalive (vlib_main_t * vm, wg_peer_t * peer);
bool wg_send_handshake (vlib_main_t * vm, wg_peer_t * peer, bool is_retry);
void wg_send_handshake_from_mt (u32 peer_index, bool is_retry);
bool wg_send_handshake_response (vlib_main_t * vm, wg_peer_t * peer);
+bool wg_send_handshake_cookie (vlib_main_t *vm, u32 sender_index,
+ cookie_checker_t *cookie_checker,
+ message_macs_t *macs,
+ ip46_address_t *wg_if_addr, u16 wg_if_port,
+ ip46_address_t *remote_addr, u16 remote_port);
always_inline void
ip4_header_set_len_w_chksum (ip4_header_t * ip4, u16 len)
diff --git a/src/plugins/wireguard/wireguard_timer.c b/src/plugins/wireguard/wireguard_timer.c
index b245b853fb5..237e67c1f06 100644
--- a/src/plugins/wireguard/wireguard_timer.c
+++ b/src/plugins/wireguard/wireguard_timer.c
@@ -26,6 +26,13 @@ get_random_u32_max (u32 max)
return random_u32 (&seed) % max;
}
+static u32
+get_random_u32_max_opt (u32 max, f64 time)
+{
+ u32 seed = (u32) (time * 1e6);
+ return random_u32 (&seed) % max;
+}
+
static void
stop_timer (wg_peer_t * peer, u32 timer_id)
{
@@ -66,7 +73,7 @@ start_timer_thread_fn (void *arg)
return 0;
}
-static void
+static_always_inline void
start_timer_from_mt (u32 peer_idx, u32 timer_id, u32 interval_ticks)
{
wg_timers_args a = {
@@ -191,14 +198,14 @@ wg_expired_zero_key_material (vlib_main_t * vm, wg_peer_t * peer)
return;
}
- if (!peer->is_dead)
+ if (!wg_peer_is_dead (peer))
{
noise_remote_clear (vm, &peer->remote);
}
}
-void
-wg_timers_any_authenticated_packet_traversal (wg_peer_t * peer)
+inline void
+wg_timers_any_authenticated_packet_traversal (wg_peer_t *peer)
{
if (peer->persistent_keepalive_interval)
{
@@ -214,6 +221,12 @@ wg_timers_any_authenticated_packet_sent (wg_peer_t * peer)
peer->last_sent_packet = vlib_time_now (vlib_get_main ());
}
+inline void
+wg_timers_any_authenticated_packet_sent_opt (wg_peer_t *peer, f64 time)
+{
+ peer->last_sent_packet = time;
+}
+
void
wg_timers_handshake_initiated (wg_peer_t * peer)
{
@@ -226,6 +239,16 @@ wg_timers_handshake_initiated (wg_peer_t * peer)
}
void
+wg_timers_send_first_handshake (wg_peer_t *peer)
+{
+ // zero value is not allowed
+ peer->new_handshake_interval_tick =
+ get_random_u32_max (REKEY_TIMEOUT_JITTER) + 1;
+ start_timer_from_mt (peer - wg_peer_pool, WG_TIMER_NEW_HANDSHAKE,
+ peer->new_handshake_interval_tick);
+}
+
+void
wg_timers_session_derived (wg_peer_t * peer)
{
peer->session_derived = vlib_time_now (vlib_get_main ());
@@ -246,6 +269,17 @@ wg_timers_data_sent (wg_peer_t * peer)
peer->new_handshake_interval_tick);
}
+inline void
+wg_timers_data_sent_opt (wg_peer_t *peer, f64 time)
+{
+ peer->new_handshake_interval_tick =
+ (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * WHZ +
+ get_random_u32_max_opt (REKEY_TIMEOUT_JITTER, time);
+
+ start_timer_from_mt (peer - wg_peer_pool, WG_TIMER_NEW_HANDSHAKE,
+ peer->new_handshake_interval_tick);
+}
+
/* Should be called after an authenticated data packet is received. */
void
wg_timers_data_received (wg_peer_t * peer)
@@ -275,6 +309,12 @@ wg_timers_any_authenticated_packet_received (wg_peer_t * peer)
peer->last_received_packet = vlib_time_now (vlib_get_main ());
}
+inline void
+wg_timers_any_authenticated_packet_received_opt (wg_peer_t *peer, f64 time)
+{
+ peer->last_received_packet = time;
+}
+
static vlib_node_registration_t wg_timer_mngr_node;
static void
@@ -394,14 +434,12 @@ wg_timers_stop (wg_peer_t * peer)
}
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (wg_timer_mngr_node, static) = {
.function = wg_timer_mngr_fn,
.type = VLIB_NODE_TYPE_PROCESS,
.name =
"wg-timer-manager",
};
-/* *INDENT-ON* */
void
wg_feature_init (wg_main_t * wmp)
diff --git a/src/plugins/wireguard/wireguard_timer.h b/src/plugins/wireguard/wireguard_timer.h
index 6b59a39f815..47638bfd74d 100755..100644
--- a/src/plugins/wireguard/wireguard_timer.h
+++ b/src/plugins/wireguard/wireguard_timer.h
@@ -41,11 +41,16 @@ typedef struct wg_peer wg_peer_t;
void wg_timer_wheel_init ();
void wg_timers_stop (wg_peer_t * peer);
void wg_timers_data_sent (wg_peer_t * peer);
+void wg_timers_data_sent_opt (wg_peer_t *peer, f64 time);
void wg_timers_data_received (wg_peer_t * peer);
void wg_timers_any_authenticated_packet_sent (wg_peer_t * peer);
+void wg_timers_any_authenticated_packet_sent_opt (wg_peer_t *peer, f64 time);
void wg_timers_any_authenticated_packet_received (wg_peer_t * peer);
+void wg_timers_any_authenticated_packet_received_opt (wg_peer_t *peer,
+ f64 time);
void wg_timers_handshake_initiated (wg_peer_t * peer);
void wg_timers_handshake_complete (wg_peer_t * peer);
+void wg_timers_send_first_handshake (wg_peer_t *peer);
void wg_timers_session_derived (wg_peer_t * peer);
void wg_timers_any_authenticated_packet_traversal (wg_peer_t * peer);
@@ -53,10 +58,19 @@ void wg_timers_any_authenticated_packet_traversal (wg_peer_t * peer);
static inline bool
wg_birthdate_has_expired (f64 birthday_seconds, f64 expiration_seconds)
{
+ if (birthday_seconds == 0.0)
+ return true;
f64 now_seconds = vlib_time_now (vlib_get_main ());
return (birthday_seconds + expiration_seconds) < now_seconds;
}
+static_always_inline bool
+wg_birthdate_has_expired_opt (f64 birthday_seconds, f64 expiration_seconds,
+ f64 time)
+{
+ return (birthday_seconds + expiration_seconds) < time;
+}
+
#endif /* __included_wg_timer_h__ */
/*