aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/acl/acl.c211
-rw-r--r--src/plugins/acl/acl_test.c35
-rw-r--r--src/plugins/af_xdp/af_xdp.api90
-rw-r--r--src/plugins/af_xdp/api.c59
-rw-r--r--src/plugins/af_xdp/test_api.c110
-rw-r--r--src/plugins/avf/avf.h3
-rw-r--r--src/plugins/avf/device.c4
-rw-r--r--src/plugins/avf/format.c17
-rw-r--r--src/plugins/avf/input.c2
-rw-r--r--src/plugins/avf/output.c2
-rw-r--r--src/plugins/builtinurl/FEATURE.yaml13
-rw-r--r--src/plugins/builtinurl/builtins.c196
-rw-r--r--src/plugins/builtinurl/builtinurl.api43
-rw-r--r--src/plugins/builtinurl/builtinurl.c137
-rw-r--r--src/plugins/builtinurl/builtinurl.h57
-rw-r--r--src/plugins/builtinurl/builtinurl_test.c66
-rw-r--r--src/plugins/crypto_native/FEATURE.yaml3
-rw-r--r--src/plugins/crypto_native/aes_cbc.c199
-rw-r--r--src/plugins/crypto_native/sha2.c18
-rw-r--r--src/plugins/crypto_openssl/main.c11
-rw-r--r--src/plugins/dev_armada/CMakeLists.txt31
-rw-r--r--src/plugins/dev_armada/README.rst61
-rw-r--r--src/plugins/dev_armada/musdk.h22
-rw-r--r--src/plugins/dev_armada/plugin.c12
-rw-r--r--src/plugins/dev_armada/pp2/counters.c241
-rw-r--r--src/plugins/dev_armada/pp2/format.c198
-rw-r--r--src/plugins/dev_armada/pp2/init.c421
-rw-r--r--src/plugins/dev_armada/pp2/port.c363
-rw-r--r--src/plugins/dev_armada/pp2/pp2.h217
-rw-r--r--src/plugins/dev_armada/pp2/queue.c48
-rw-r--r--src/plugins/dev_armada/pp2/rx.c269
-rw-r--r--src/plugins/dev_armada/pp2/tx.c102
-rw-r--r--src/plugins/dev_ena/ena.c2
-rw-r--r--src/plugins/dev_ena/port.c1
-rw-r--r--src/plugins/dev_ena/rx_node.c9
-rw-r--r--src/plugins/dev_iavf/adminq.c2
-rw-r--r--src/plugins/dev_iavf/counters.c1
-rw-r--r--src/plugins/dev_iavf/format.c1
-rw-r--r--src/plugins/dev_iavf/iavf.c2
-rw-r--r--src/plugins/dev_iavf/port.c60
-rw-r--r--src/plugins/dev_iavf/queue.c1
-rw-r--r--src/plugins/dev_iavf/rx_node.c8
-rw-r--r--src/plugins/dev_iavf/virtchnl.c1
-rw-r--r--src/plugins/dev_iavf/virtchnl_funcs.h4
-rw-r--r--src/plugins/dev_octeon/CMakeLists.txt8
-rw-r--r--src/plugins/dev_octeon/counter.c337
-rw-r--r--src/plugins/dev_octeon/crypto.c1754
-rw-r--r--src/plugins/dev_octeon/crypto.h174
-rw-r--r--src/plugins/dev_octeon/flow.c476
-rw-r--r--src/plugins/dev_octeon/format.c2
-rw-r--r--src/plugins/dev_octeon/init.c125
-rw-r--r--src/plugins/dev_octeon/octeon.h26
-rw-r--r--src/plugins/dev_octeon/port.c206
-rw-r--r--src/plugins/dev_octeon/queue.c1
-rw-r--r--src/plugins/dev_octeon/roc_helper.c14
-rw-r--r--src/plugins/dev_octeon/rx_node.c49
-rw-r--r--src/plugins/dev_octeon/tx_node.c154
-rw-r--r--src/plugins/dhcp/client.c4
-rw-r--r--src/plugins/dhcp/dhcp4_proxy_node.c3
-rw-r--r--src/plugins/dpdk/device/cli.c9
-rw-r--r--src/plugins/dpdk/device/dpdk.h3
-rw-r--r--src/plugins/dpdk/device/dpdk_priv.h36
-rw-r--r--src/plugins/dpdk/device/init.c106
-rw-r--r--src/plugins/dpdk/main.c2
-rw-r--r--src/plugins/flowprobe/flowprobe.c4
-rw-r--r--src/plugins/hs_apps/CMakeLists.txt2
-rw-r--r--src/plugins/hs_apps/echo_client.c14
-rw-r--r--src/plugins/hs_apps/echo_server.c21
-rw-r--r--src/plugins/hs_apps/http_cli.c306
-rw-r--r--src/plugins/hs_apps/http_client.c743
-rw-r--r--src/plugins/hs_apps/http_client_cli.c153
-rw-r--r--src/plugins/hs_apps/http_tps.c195
-rw-r--r--src/plugins/hs_apps/proxy.c846
-rw-r--r--src/plugins/hs_apps/proxy.h51
-rw-r--r--src/plugins/hs_apps/sapi/vpp_echo_common.c4
-rw-r--r--src/plugins/hs_apps/test_builtins.c192
-rw-r--r--src/plugins/hs_apps/vcl/vcl_test.h35
-rw-r--r--src/plugins/hs_apps/vcl/vcl_test_client.c9
-rw-r--r--src/plugins/hs_apps/vcl/vcl_test_protos.c429
-rw-r--r--src/plugins/hs_apps/vcl/vcl_test_server.c57
-rw-r--r--src/plugins/http/CMakeLists.txt5
-rw-r--r--src/plugins/http/http.c1476
-rw-r--r--src/plugins/http/http.h1105
-rw-r--r--src/plugins/http/http_buffer.c2
-rw-r--r--src/plugins/http/http_content_types.h19
-rw-r--r--src/plugins/http/http_header_names.h21
-rw-r--r--src/plugins/http/http_plugin.rst537
-rw-r--r--src/plugins/http/http_status_codes.h27
-rw-r--r--src/plugins/http/http_timer.c18
-rw-r--r--src/plugins/http/http_timer.h36
-rw-r--r--src/plugins/http/test/http_test.c360
-rw-r--r--src/plugins/http_static/builtinurl/json_urls.c65
-rw-r--r--src/plugins/http_static/http_cache.c28
-rw-r--r--src/plugins/http_static/http_cache.h7
-rw-r--r--src/plugins/http_static/http_static.api37
-rw-r--r--src/plugins/http_static/http_static.c35
-rw-r--r--src/plugins/http_static/http_static.h22
-rw-r--r--src/plugins/http_static/http_static_test.c91
-rw-r--r--src/plugins/http_static/static_server.c300
-rw-r--r--src/plugins/ikev2/CMakeLists.txt1
-rw-r--r--src/plugins/ikev2/ikev2.api6
-rw-r--r--src/plugins/ikev2/ikev2.c53
-rw-r--r--src/plugins/ikev2/ikev2_api.c15
-rw-r--r--src/plugins/ikev2/ikev2_crypto.c5
-rw-r--r--src/plugins/ikev2/ikev2_handoff.c165
-rw-r--r--src/plugins/ikev2/ikev2_priv.h6
-rw-r--r--src/plugins/ikev2/ikev2_test.c12
-rw-r--r--src/plugins/linux-cp/lcp_interface.c11
-rw-r--r--src/plugins/mactime/builtins.c1
-rw-r--r--src/plugins/map/ip6_map_t.c5
-rw-r--r--src/plugins/marvell/CMakeLists.txt47
-rw-r--r--src/plugins/marvell/README.rst85
-rw-r--r--src/plugins/marvell/pp2/cli.c135
-rw-r--r--src/plugins/marvell/pp2/format.c200
-rw-r--r--src/plugins/marvell/pp2/input.c392
-rw-r--r--src/plugins/marvell/pp2/output.c121
-rw-r--r--src/plugins/marvell/pp2/pp2.c403
-rw-r--r--src/plugins/marvell/pp2/pp2.h147
-rw-r--r--src/plugins/marvell/pp2/pp2_api.c100
-rw-r--r--src/plugins/marvell/pp2/pp2_test.c144
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_api.c3
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_api.c3
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_in2out.c2
-rw-r--r--src/plugins/nat/pnat/pnat_api.c5
-rw-r--r--src/plugins/netmap/CMakeLists.txt32
-rw-r--r--src/plugins/netmap/FEATURE.yaml12
-rw-r--r--src/plugins/netmap/cli.c236
-rw-r--r--src/plugins/netmap/device.c252
-rw-r--r--src/plugins/netmap/net_netmap.h650
-rw-r--r--src/plugins/netmap/netmap.api56
-rw-r--r--src/plugins/netmap/netmap.c334
-rw-r--r--src/plugins/netmap/netmap.h166
-rw-r--r--src/plugins/netmap/netmap_api.c95
-rw-r--r--src/plugins/netmap/node.c295
-rw-r--r--src/plugins/netmap/plugin.c16
-rw-r--r--src/plugins/npt66/npt66.api13
-rw-r--r--src/plugins/npt66/npt66_node.c71
-rw-r--r--src/plugins/osi/CMakeLists.txt (renamed from src/plugins/builtinurl/CMakeLists.txt)20
-rw-r--r--src/plugins/osi/FEATURE.yaml11
-rw-r--r--src/plugins/osi/node.c335
-rw-r--r--src/plugins/osi/osi.c199
-rw-r--r--src/plugins/osi/osi.h156
-rw-r--r--src/plugins/osi/pg.c106
-rw-r--r--src/plugins/osi/plugin.c23
-rw-r--r--src/plugins/prom/prom.c12
-rw-r--r--src/plugins/pvti/CMakeLists.txt40
-rw-r--r--src/plugins/pvti/FEATURE.yaml8
-rw-r--r--src/plugins/pvti/api.c137
-rw-r--r--src/plugins/pvti/bypass-main.c79
-rw-r--r--src/plugins/pvti/bypass.c202
-rw-r--r--src/plugins/pvti/bypass.h53
-rw-r--r--src/plugins/pvti/input-main.c115
-rw-r--r--src/plugins/pvti/input.c496
-rw-r--r--src/plugins/pvti/input.h87
-rw-r--r--src/plugins/pvti/output-main.c85
-rw-r--r--src/plugins/pvti/output.c543
-rw-r--r--src/plugins/pvti/output.h75
-rw-r--r--src/plugins/pvti/pvti.api111
-rw-r--r--src/plugins/pvti/pvti.c479
-rw-r--r--src/plugins/pvti/pvti.h257
-rw-r--r--src/plugins/pvti/pvti_if.c376
-rw-r--r--src/plugins/pvti/pvti_if.h47
-rw-r--r--src/plugins/quic/quic.c22
-rw-r--r--src/plugins/snort/CMakeLists.txt4
-rw-r--r--src/plugins/snort/cli.c205
-rw-r--r--src/plugins/snort/daq_vpp.c5
-rw-r--r--src/plugins/snort/dequeue.c2
-rw-r--r--src/plugins/snort/enqueue.c2
-rw-r--r--src/plugins/snort/main.c172
-rw-r--r--src/plugins/snort/snort.api226
-rw-r--r--src/plugins/snort/snort.h22
-rw-r--r--src/plugins/snort/snort_api.c405
-rw-r--r--src/plugins/srmpls/CMakeLists.txt30
-rw-r--r--src/plugins/srmpls/FEATURE.yaml9
-rw-r--r--src/plugins/srmpls/dir.dox22
-rw-r--r--src/plugins/srmpls/plugin.c (renamed from src/plugins/marvell/plugin.c)17
-rw-r--r--src/plugins/srmpls/sr_doc.rst215
-rw-r--r--src/plugins/srmpls/sr_mpls.api124
-rw-r--r--src/plugins/srmpls/sr_mpls.h177
-rw-r--r--src/plugins/srmpls/sr_mpls_api.c257
-rw-r--r--src/plugins/srmpls/sr_mpls_policy.c903
-rw-r--r--src/plugins/srmpls/sr_mpls_steering.c897
-rw-r--r--src/plugins/srmpls/sr_mpls_test.c174
-rw-r--r--src/plugins/srtp/srtp.c12
-rw-r--r--src/plugins/tlsmbedtls/tls_mbedtls.c14
-rw-r--r--src/plugins/tlsopenssl/tls_openssl.c19
-rw-r--r--src/plugins/tlspicotls/tls_picotls.c15
-rw-r--r--src/plugins/unittest/fib_test.c51
-rw-r--r--src/plugins/unittest/policer_test.c2
-rw-r--r--src/plugins/unittest/segment_manager_test.c5
-rw-r--r--src/plugins/unittest/session_test.c476
-rw-r--r--src/plugins/unittest/tcp_test.c5
-rw-r--r--src/plugins/unittest/util_test.c30
-rw-r--r--src/plugins/urpf/CMakeLists.txt4
-rw-r--r--src/plugins/urpf/urpf.c27
-rw-r--r--src/plugins/urpf/urpf.h14
-rw-r--r--src/plugins/urpf/urpf_dp.h335
-rw-r--r--src/plugins/wireguard/wireguard_chachapoly.c8
-rw-r--r--src/plugins/wireguard/wireguard_noise.c4
199 files changed, 23149 insertions, 4203 deletions
diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c
index e52e82fcf28..fbd94761027 100644
--- a/src/plugins/acl/acl.c
+++ b/src/plugins/acl/acl.c
@@ -2845,6 +2845,17 @@ acl_set_aclplugin_interface_fn (vlib_main_t * vm,
} \
} while (0)
+#define vec_validate_macip_acl_rules(v, idx) \
+ do \
+ { \
+ if (vec_len (v) < idx + 1) \
+ { \
+ vec_validate (v, idx); \
+ v[idx].is_permit = 0x1; \
+ } \
+ } \
+ while (0)
+
static clib_error_t *
acl_set_aclplugin_acl_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -3062,6 +3073,160 @@ acl_show_aclplugin_macip_interface_fn (vlib_main_t * vm,
return error;
}
+static clib_error_t *
+acl_set_aclplugin_macip_acl_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vl_api_macip_acl_rule_t *rules = 0;
+ int rule_idx = 0;
+ int rv = 0;
+ u32 acl_index = ~0;
+ u32 action = 0;
+ u8 src_mac[6];
+ u8 *tag = 0;
+ u8 mac_mask_all_1[6] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+ ip_prefix_t src_ip;
+
+ unformat_input_t _line_input, *line_input = &_line_input;
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ vec_validate_macip_acl_rules (rules, rule_idx);
+ if (unformat (line_input, "permit"))
+ {
+ rules[rule_idx].is_permit = 1;
+ }
+ else if (unformat (line_input, "deny"))
+ {
+ rules[rule_idx].is_permit = 0;
+ }
+ else if (unformat (line_input, "action %d", &action))
+ {
+ rules[rule_idx].is_permit = action;
+ }
+ else if (unformat (line_input, "ip %U", unformat_ip_prefix, &src_ip))
+ {
+ ip_prefix_encode2 (&src_ip, &rules[rule_idx].src_prefix);
+ }
+ else if (unformat (line_input, "src"))
+ {
+ /* Everything in MACIP is "source" but allow this verbosity */
+ }
+ else if (unformat (line_input, "mac %U", unformat_mac_address, &src_mac))
+ {
+ memcpy (rules[rule_idx].src_mac, &src_mac,
+ sizeof (rules[rule_idx].src_mac));
+ memcpy (rules[rule_idx].src_mac_mask, &mac_mask_all_1,
+ sizeof (rules[rule_idx].src_mac_mask));
+ }
+ else if (unformat (line_input, "mask %U", unformat_mac_address,
+ &src_mac))
+ {
+ memcpy (rules[rule_idx].src_mac_mask, &src_mac,
+ sizeof (rules[rule_idx].src_mac_mask));
+ }
+ else if (unformat (line_input, "tag %s", &tag))
+ ;
+ else if (unformat (line_input, ","))
+ {
+ rule_idx++;
+ }
+ else
+ break;
+ }
+
+ if (!tag)
+ vec_add (tag, "cli", 4);
+
+ rv = macip_acl_add_list (vec_len (rules), rules, &acl_index, tag);
+ vec_free (rules);
+ vec_free (tag);
+
+ unformat_free (line_input);
+ if (rv)
+ return clib_error_return (0, "Failed to set MACIP ACL rule");
+
+ vlib_cli_output (vm, "ACL index:%u", acl_index);
+ return 0;
+}
+
+static clib_error_t *
+acl_macip_delete_aclplugin_acl_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ int rv;
+ u32 macip_acl_index = ~0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "index %u", &macip_acl_index))
+ {
+ /* operate on this acl index (which must exist) */
+ }
+ else
+ break;
+ }
+
+ if (macip_acl_index == ~0)
+ return (clib_error_return (0, "invalid acl index"));
+
+ rv = macip_acl_del_list (macip_acl_index);
+
+ unformat_free (line_input);
+ if (rv)
+ return (clib_error_return (0, "Failed to delete ACL index"));
+
+ vlib_cli_output (vm, "Deleted ACL index:%u", macip_acl_index);
+ return 0;
+}
+
+static clib_error_t *
+acl_set_aclplugin_macip_interface_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ int rv = 0;
+ u32 sw_if_index = ~0;
+ u32 acl_index = ~0;
+ u32 is_add = 1;
+ unformat_input_t _line_input, *line_input = &_line_input;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vnet_sw_interface,
+ vnet_get_main (), &sw_if_index))
+ ;
+ else if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "acl %u", &acl_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return (clib_error_return (0, "invalid interface"));
+
+ if (acl_index == ~0)
+ return (clib_error_return (0, "invalid acl index"));
+
+ rv = macip_acl_interface_add_del_acl (sw_if_index, is_add, acl_index);
+
+ if (rv)
+ return (clib_error_return (0, "Failed to add acl rule to interface"));
+
+ return 0;
+}
+
static void
acl_plugin_show_acl (acl_main_t * am, u32 acl_index)
{
@@ -3632,6 +3797,38 @@ VLIB_CLI_COMMAND (aclplugin_set_acl_command, static) = {
};
/*?
+ * Create an MACIP Access Control List (ACL)
+ * A MACIP ACL is used to add L2-L3 ACL rules.
+ * A MACIP ACL can be added similar to ACL rules by using following command :
+ *
+ * @cliexcmd{set acl-plugin macip acl <permit|deny|action N>
+ * ip <PREFIX> mac <MAC> mask <int> [tag FOO] {use comma
+ * separated list for multiple rules}}
+ ?*/
+VLIB_CLI_COMMAND (aclplugin_macip_set_acl_command, static) = {
+ .path = "set acl-plugin macip acl ",
+ .short_help = "set acl-plugin macip acl <permit|deny|action N> "
+ "ip <PREFIX> mac <MAC> mask <int> [tag FOO] {use comma "
+ "separated list for multiple rules}",
+ .function = acl_set_aclplugin_macip_acl_fn,
+};
+
+/*?
+ * [un]Apply a MACIP ACL to an interface.
+ * The ACL being applied must already exist.
+ *
+ * @cliexpar
+ * <b><em> set acl-plugin macip interface <interface> <acl INDEX> [del]
+ </b></em>
+ * @cliexend
+ ?*/
+VLIB_CLI_COMMAND (aclplugin_macip_set_interface_command, static) = {
+ .path = "set acl-plugin macip interface",
+ .short_help = "set acl-plugin macip interface <interface> <acl INDEX> [del]",
+ .function = acl_set_aclplugin_macip_interface_fn,
+};
+
+/*?
* Delete an Access Control List (ACL)
* Removes an ACL at the specified index, which must exist but not in use by
* any interface.
@@ -3644,6 +3841,20 @@ VLIB_CLI_COMMAND (aclplugin_delete_acl_command, static) = {
.function = acl_delete_aclplugin_acl_fn,
};
+/*?
+ * Delete a MACIP Access Control List (ACL)
+ * Removes an MACIP ACL at the specified index, which must exist but not in
+ * use by
+ * any interface.
+ *
+ * @cliexcmd{delete acl-plugin macip acl index <idx>}
+ ?*/
+VLIB_CLI_COMMAND (aclplugin_macip_delete_acl_command, static) = {
+ .path = "delete acl-plugin macip acl",
+ .short_help = "delete acl-plugin macip acl index <idx>",
+ .function = acl_macip_delete_aclplugin_acl_fn,
+};
+
static clib_error_t *
acl_plugin_config (vlib_main_t * vm, unformat_input_t * input)
{
diff --git a/src/plugins/acl/acl_test.c b/src/plugins/acl/acl_test.c
index 8404689dc06..98803a916cb 100644
--- a/src/plugins/acl/acl_test.c
+++ b/src/plugins/acl/acl_test.c
@@ -114,7 +114,7 @@ static void vl_api_acl_interface_list_details_t_handler
int i;
vat_main_t * vam = acl_test_main.vat_main;
u8 *out = 0;
- vl_api_acl_interface_list_details_t_endian(mp);
+ vl_api_acl_interface_list_details_t_endian (mp, 0 /* from network */);
out = format(out, "sw_if_index: %d, count: %d, n_input: %d\n", mp->sw_if_index, mp->count, mp->n_input);
out = format(out, " input ");
for(i=0; i<mp->count; i++) {
@@ -141,7 +141,8 @@ static void vl_api_acl_interface_etype_whitelist_details_t_handler
int i;
vat_main_t * vam = acl_test_main.vat_main;
u8 *out = 0;
- vl_api_acl_interface_etype_whitelist_details_t_endian(mp);
+ vl_api_acl_interface_etype_whitelist_details_t_endian (
+ mp, 0 /* from network */);
out = format(out, "sw_if_index: %d, count: %d, n_input: %d\n", mp->sw_if_index, mp->count, mp->n_input);
out = format(out, " input ");
for(i=0; i<mp->count; i++) {
@@ -173,15 +174,15 @@ vl_api_acl_rule_t_pretty_format (u8 *out, vl_api_acl_rule_t * a)
inet_ntop(af, &a->src_prefix.address.un, (void *)src, sizeof(src));
inet_ntop(af, &a->dst_prefix.address.un, (void *)dst, sizeof(dst));
- out = format(out, "%s action %d src %s/%d dst %s/%d proto %d sport %d-%d dport %d-%d tcpflags %d mask %d",
- a->src_prefix.address.af ? "ipv6" : "ipv4", a->is_permit,
- src, a->src_prefix.len,
- dst, a->dst_prefix.len,
- a->proto,
- a->srcport_or_icmptype_first, a->srcport_or_icmptype_last,
- a->dstport_or_icmpcode_first, a->dstport_or_icmpcode_last,
- a->tcp_flags_value, a->tcp_flags_mask);
- return(out);
+ out = format (out,
+ "%s action %d src %s/%d dst %s/%d proto %d sport %d-%d dport "
+ "%d-%d tcpflags %d mask %d",
+ a->src_prefix.address.af ? "ipv6" : "ipv4", a->is_permit, src,
+ a->src_prefix.len, dst, a->dst_prefix.len, a->proto,
+ a->srcport_or_icmptype_first, a->srcport_or_icmptype_last,
+ a->dstport_or_icmpcode_first, a->dstport_or_icmpcode_last,
+ a->tcp_flags_value, a->tcp_flags_mask);
+ return (out);
}
@@ -191,9 +192,10 @@ static void vl_api_acl_details_t_handler
{
int i;
vat_main_t * vam = acl_test_main.vat_main;
- vl_api_acl_details_t_endian(mp);
- u8 *out = 0;
- out = format(0, "acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag);
+ vl_api_acl_details_t_endian (mp, 0 /* from network */);
+ u8 *out = 0;
+ out = format (0, "acl_index: %d, count: %d\n tag {%s}\n",
+ mp->acl_index, mp->count, mp->tag);
for(i=0; i<mp->count; i++) {
out = format(out, " ");
out = vl_api_acl_rule_t_pretty_format(out, &mp->r[i]);
@@ -225,8 +227,9 @@ static void vl_api_macip_acl_details_t_handler
{
int i;
vat_main_t * vam = acl_test_main.vat_main;
- vl_api_macip_acl_details_t_endian(mp);
- u8 *out = format(0,"MACIP acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag);
+ vl_api_macip_acl_details_t_endian (mp, 0 /* from network */);
+ u8 *out = format (0, "MACIP acl_index: %d, count: %d\n tag {%s}\n",
+ mp->acl_index, mp->count, mp->tag);
for(i=0; i<mp->count; i++) {
out = format(out, " ");
out = vl_api_macip_acl_rule_t_pretty_format(out, &mp->r[i]);
diff --git a/src/plugins/af_xdp/af_xdp.api b/src/plugins/af_xdp/af_xdp.api
index 4c2908e2037..20aa20b4d7d 100644
--- a/src/plugins/af_xdp/af_xdp.api
+++ b/src/plugins/af_xdp/af_xdp.api
@@ -33,96 +33,6 @@ enumflag af_xdp_flag : u8
/** \brief
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param host_if - Linux netdev interface name
- @param name - new af_xdp interface name (optional)
- @param rxq_num - number of receive queues. 65535 can be used as special value to request all available queues (optional)
- @param rxq_size - receive queue size (optional)
- @param txq_size - transmit queue size (optional)
- @param mode - operation mode (optional)
- @param flags - flags (optional)
- @param prog - eBPF program path (optional)
-*/
-
-define af_xdp_create
-{
- u32 client_index;
- u32 context;
-
- string host_if[64];
- string name[64];
- u16 rxq_num [default=1];
- u16 rxq_size [default=0];
- u16 txq_size [default=0];
- vl_api_af_xdp_mode_t mode [default=0];
- vl_api_af_xdp_flag_t flags [default=0];
- string prog[256];
- option vat_help = "<host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] [prog pathname] [zero-copy|no-zero-copy] [no-syscall-lock]";
- option deprecated;
-};
-
-/** \brief
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param host_if - Linux netdev interface name
- @param name - new af_xdp interface name (optional)
- @param rxq_num - number of receive queues. 65535 can be used as special value to request all available queues (optional)
- @param rxq_size - receive queue size (optional)
- @param txq_size - transmit queue size (optional)
- @param mode - operation mode (optional)
- @param flags - flags (optional)
- @param prog - eBPF program path (optional)
- @param namespace - netns of nic (optional)
-*/
-
-define af_xdp_create_v2
-{
- u32 client_index;
- u32 context;
-
- string host_if[64];
- string name[64];
- u16 rxq_num [default=1];
- u16 rxq_size [default=0];
- u16 txq_size [default=0];
- vl_api_af_xdp_mode_t mode [default=0];
- vl_api_af_xdp_flag_t flags [default=0];
- string prog[256];
- string namespace[64];
- option vat_help = "<host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] [prog pathname] [netns ns] [zero-copy|no-zero-copy] [no-syscall-lock]";
- option deprecated;
-};
-
-/** \brief
- @param context - sender context, to match reply w/ request
- @param retval - return value for request
- @param sw_if_index - software index for the new af_xdp interface
-*/
-
-define af_xdp_create_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
- option deprecated;
-};
-
-/** \brief
- @param context - sender context, to match reply w/ request
- @param retval - return value for request
- @param sw_if_index - software index for the new af_xdp interface
-*/
-
-define af_xdp_create_v2_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
- option deprecated;
-};
-
-/** \brief
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
@param sw_if_index - interface index
*/
diff --git a/src/plugins/af_xdp/api.c b/src/plugins/af_xdp/api.c
index 3e9a3fe2578..9ead9856ff5 100644
--- a/src/plugins/af_xdp/api.c
+++ b/src/plugins/af_xdp/api.c
@@ -57,65 +57,6 @@ af_xdp_api_flags (vl_api_af_xdp_flag_t flags)
}
static void
-vl_api_af_xdp_create_t_handler (vl_api_af_xdp_create_t * mp)
-{
- vlib_main_t *vm = vlib_get_main ();
- af_xdp_main_t *rm = &af_xdp_main;
- vl_api_af_xdp_create_reply_t *rmp;
- af_xdp_create_if_args_t args;
- int rv;
-
- clib_memset (&args, 0, sizeof (af_xdp_create_if_args_t));
-
- args.linux_ifname = mp->host_if[0] ? (char *) mp->host_if : 0;
- args.name = mp->name[0] ? (char *) mp->name : 0;
- args.prog = mp->prog[0] ? (char *) mp->prog : 0;
- args.mode = af_xdp_api_mode (mp->mode);
- args.flags = af_xdp_api_flags (mp->flags);
- args.rxq_size = ntohs (mp->rxq_size);
- args.txq_size = ntohs (mp->txq_size);
- args.rxq_num = ntohs (mp->rxq_num);
-
- af_xdp_create_if (vm, &args);
- rv = args.rv;
-
- REPLY_MACRO2 (VL_API_AF_XDP_CREATE_REPLY,
- ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
-}
-
-static void
-vl_api_af_xdp_create_v2_t_handler (vl_api_af_xdp_create_v2_t *mp)
-{
- vlib_main_t *vm = vlib_get_main ();
- af_xdp_main_t *rm = &af_xdp_main;
- vl_api_af_xdp_create_v2_reply_t *rmp;
- af_xdp_create_if_args_t args;
- int rv;
-
- clib_memset (&args, 0, sizeof (af_xdp_create_if_args_t));
-
- args.linux_ifname = mp->host_if[0] ? (char *) mp->host_if : 0;
- args.name = mp->name[0] ? (char *) mp->name : 0;
- args.prog = mp->prog[0] ? (char *) mp->prog : 0;
- args.netns = mp->namespace[0] ? (char *) mp->namespace : 0;
- args.mode = af_xdp_api_mode (mp->mode);
- args.flags = af_xdp_api_flags (mp->flags);
- args.rxq_size = ntohs (mp->rxq_size);
- args.txq_size = ntohs (mp->txq_size);
- args.rxq_num = ntohs (mp->rxq_num);
-
- af_xdp_create_if (vm, &args);
- rv = args.rv;
-
- /* clang-format off */
- REPLY_MACRO2 (VL_API_AF_XDP_CREATE_V2_REPLY,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* clang-format on */
-}
-
-static void
vl_api_af_xdp_create_v3_t_handler (vl_api_af_xdp_create_v3_t *mp)
{
vlib_main_t *vm = vlib_get_main ();
diff --git a/src/plugins/af_xdp/test_api.c b/src/plugins/af_xdp/test_api.c
index 581697e341d..5f622adcb04 100644
--- a/src/plugins/af_xdp/test_api.c
+++ b/src/plugins/af_xdp/test_api.c
@@ -58,75 +58,7 @@ api_af_xdp_mode (af_xdp_mode_t mode)
return ~0;
}
-/* af_xdp create API */
-static int
-api_af_xdp_create (vat_main_t * vam)
-{
- vl_api_af_xdp_create_t *mp;
- af_xdp_create_if_args_t args;
- int ret;
-
- if (!unformat_user (vam->input, unformat_af_xdp_create_if_args, &args))
- {
- clib_warning ("unknown input `%U'", format_unformat_error, vam->input);
- return -99;
- }
-
- M (AF_XDP_CREATE, mp);
-
- snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s",
- args.linux_ifname ? : "");
- snprintf ((char *) mp->name, sizeof (mp->name), "%s", args.name ? : "");
- mp->rxq_num = clib_host_to_net_u16 (args.rxq_num);
- mp->rxq_size = clib_host_to_net_u16 (args.rxq_size);
- mp->txq_size = clib_host_to_net_u16 (args.txq_size);
- mp->mode = api_af_xdp_mode (args.mode);
- if (args.flags & AF_XDP_CREATE_FLAGS_NO_SYSCALL_LOCK)
- mp->flags |= AF_XDP_API_FLAGS_NO_SYSCALL_LOCK;
- snprintf ((char *) mp->prog, sizeof (mp->prog), "%s", args.prog ? : "");
-
- S (mp);
- W (ret);
-
- return ret;
-}
-
-/* af_xdp create v2 API */
-static int
-api_af_xdp_create_v2 (vat_main_t *vam)
-{
- vl_api_af_xdp_create_v2_t *mp;
- af_xdp_create_if_args_t args;
- int ret;
-
- if (!unformat_user (vam->input, unformat_af_xdp_create_if_args, &args))
- {
- clib_warning ("unknown input `%U'", format_unformat_error, vam->input);
- return -99;
- }
-
- M (AF_XDP_CREATE, mp);
-
- snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s",
- args.linux_ifname ?: "");
- snprintf ((char *) mp->name, sizeof (mp->name), "%s", args.name ?: "");
- snprintf ((char *) mp->namespace, sizeof (mp->namespace), "%s",
- args.netns ?: "");
- mp->rxq_num = clib_host_to_net_u16 (args.rxq_num);
- mp->rxq_size = clib_host_to_net_u16 (args.rxq_size);
- mp->txq_size = clib_host_to_net_u16 (args.txq_size);
- mp->mode = api_af_xdp_mode (args.mode);
- if (args.flags & AF_XDP_CREATE_FLAGS_NO_SYSCALL_LOCK)
- mp->flags |= AF_XDP_API_FLAGS_NO_SYSCALL_LOCK;
- snprintf ((char *) mp->prog, sizeof (mp->prog), "%s", args.prog ?: "");
-
- S (mp);
- W (ret);
-
- return ret;
-}
-
-/* af_xdp create v2 API */
+/* af_xdp create v3 API */
static int
api_af_xdp_create_v3 (vat_main_t *vam)
{
@@ -140,7 +72,7 @@ api_af_xdp_create_v3 (vat_main_t *vam)
return -99;
}
- M (AF_XDP_CREATE, mp);
+ M (AF_XDP_CREATE_V3, mp);
snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s",
args.linux_ifname ?: "");
@@ -160,45 +92,9 @@ api_af_xdp_create_v3 (vat_main_t *vam)
return ret;
}
-/* af_xdp-create reply handler */
-static void
-vl_api_af_xdp_create_reply_t_handler (vl_api_af_xdp_create_reply_t * mp)
-{
- vat_main_t *vam = af_xdp_test_main.vat_main;
- i32 retval = ntohl (mp->retval);
-
- if (retval == 0)
- {
- fformat (vam->ofp, "created af_xdp with sw_if_index %d\n",
- ntohl (mp->sw_if_index));
- }
-
- vam->retval = retval;
- vam->result_ready = 1;
- vam->regenerate_interface_table = 1;
-}
-
-/* af_xdp-create v2 reply handler */
-static void
-vl_api_af_xdp_create_v2_reply_t_handler (vl_api_af_xdp_create_v2_reply_t *mp)
-{
- vat_main_t *vam = af_xdp_test_main.vat_main;
- i32 retval = ntohl (mp->retval);
-
- if (retval == 0)
- {
- fformat (vam->ofp, "created af_xdp with sw_if_index %d\n",
- ntohl (mp->sw_if_index));
- }
-
- vam->retval = retval;
- vam->result_ready = 1;
- vam->regenerate_interface_table = 1;
-}
-
/* af_xdp-create v3 reply handler */
static void
-vl_api_af_xdp_create_v3_reply_t_handler (vl_api_af_xdp_create_v2_reply_t *mp)
+vl_api_af_xdp_create_v3_reply_t_handler (vl_api_af_xdp_create_v3_reply_t *mp)
{
vat_main_t *vam = af_xdp_test_main.vat_main;
i32 retval = mp->retval;
diff --git a/src/plugins/avf/avf.h b/src/plugins/avf/avf.h
index f6f79cf0e09..774aac0151b 100644
--- a/src/plugins/avf/avf.h
+++ b/src/plugins/avf/avf.h
@@ -180,6 +180,7 @@ typedef struct
u8 int_mode;
u8 buffer_pool_index;
u32 queue_index;
+ u64 total_packets;
} avf_rxq_t;
typedef struct
@@ -198,6 +199,8 @@ typedef struct
avf_tx_desc_t *tmp_descs;
u32 *tmp_bufs;
u32 queue_index;
+ u64 total_packets;
+ u64 no_free_tx_count;
} avf_txq_t;
typedef struct
diff --git a/src/plugins/avf/device.c b/src/plugins/avf/device.c
index 1618800c432..98169f0bcfe 100644
--- a/src/plugins/avf/device.c
+++ b/src/plugins/avf/device.c
@@ -288,6 +288,7 @@ avf_rxq_init (vlib_main_t * vm, avf_device_t * ad, u16 qid, u16 rxq_size)
d->qword[0] = vlib_buffer_get_pa (vm, b);
d++;
}
+ rxq->total_packets = 0;
return 0;
}
@@ -337,6 +338,9 @@ avf_txq_init (vlib_main_t * vm, avf_device_t * ad, u16 qid, u16 txq_size)
vec_validate_aligned (txq->tmp_descs, txq->size, CLIB_CACHE_LINE_BYTES);
vec_validate_aligned (txq->tmp_bufs, txq->size, CLIB_CACHE_LINE_BYTES);
+ txq->total_packets = 0;
+ txq->no_free_tx_count = 0;
+
return 0;
}
diff --git a/src/plugins/avf/format.c b/src/plugins/avf/format.c
index 0a153a093d9..436f5b9fbf2 100644
--- a/src/plugins/avf/format.c
+++ b/src/plugins/avf/format.c
@@ -104,6 +104,7 @@ format_avf_device (u8 * s, va_list * args)
u8 *a = 0;
avf_rxq_t *rxq = vec_elt_at_index (ad->rxqs, 0);
avf_txq_t *txq = vec_elt_at_index (ad->txqs, 0);
+ u32 idx = 0;
s = format (s, "rx: queues %u, desc %u (min %u max %u)", ad->n_rx_queues,
rxq->size, AVF_QUEUE_SZ_MIN, AVF_QUEUE_SZ_MAX);
@@ -114,6 +115,22 @@ format_avf_device (u8 * s, va_list * args)
format_avf_device_flags, ad);
s = format (s, "\n%Ucapability flags: %U", format_white_space, indent,
format_avf_vf_cap_flags, ad->cap_flags);
+ s =
+ format (s, "\n%U Rx Queue: Total Packets", format_white_space, indent + 4);
+ for (idx = 0; idx < ad->n_rx_queues; idx++)
+ {
+ rxq = vec_elt_at_index (ad->rxqs, idx);
+ s = format (s, "\n%U %8u : %llu", format_white_space, indent + 4, idx,
+ rxq->total_packets);
+ }
+ s = format (s, "\n%U Tx Queue: Total Packets\t Total Drops",
+ format_white_space, indent + 4);
+ for (idx = 0; idx < ad->n_tx_queues; idx++)
+ {
+ txq = vec_elt_at_index (ad->txqs, idx);
+ s = format (s, "\n%U %8u : %llu\t %llu", format_white_space, indent + 4,
+ idx, txq->total_packets, txq->no_free_tx_count);
+ }
s = format (s, "\n%Unum-queue-pairs %d max-vectors %u max-mtu %u "
"rss-key-size %u rss-lut-size %u", format_white_space, indent,
diff --git a/src/plugins/avf/input.c b/src/plugins/avf/input.c
index 06007db540d..890259c88ab 100644
--- a/src/plugins/avf/input.c
+++ b/src/plugins/avf/input.c
@@ -539,6 +539,8 @@ done:
else
avf_rxq_refill (vm, node, rxq, 0 /* use_va_dma */ );
+ rxq->total_packets += n_rx_packets;
+
return n_rx_packets;
}
diff --git a/src/plugins/avf/output.c b/src/plugins/avf/output.c
index daa86ae86b2..0952886aaee 100644
--- a/src/plugins/avf/output.c
+++ b/src/plugins/avf/output.c
@@ -510,6 +510,7 @@ retry:
avf_tail_write (txq->qtx_tail, txq->next);
txq->n_enqueued += n_desc;
n_left -= n_enq;
+ txq->total_packets += n_enq;
}
if (n_left)
@@ -522,6 +523,7 @@ retry:
vlib_buffer_free (vm, buffers, n_left);
vlib_error_count (vm, node->node_index,
AVF_TX_ERROR_NO_FREE_SLOTS, n_left);
+ txq->no_free_tx_count += n_left;
}
if (tf->shared_queue)
diff --git a/src/plugins/builtinurl/FEATURE.yaml b/src/plugins/builtinurl/FEATURE.yaml
deleted file mode 100644
index ba8e3c7ea7b..00000000000
--- a/src/plugins/builtinurl/FEATURE.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
----
-name: Builtin URL support for the static http or https server
-maintainer: Dave Barach <dave@barachs.net>
-features:
- - Builtin URLs for the static http/https server
-description: "The (builtinurl) plugin adds a set of URLs to the static http/https server.
- Current URLs, all of which return data in .json fmt:
- <root-url>/version.json - vpp version info
- <root-url>/interface_list.json - list of interfaces
- <root-url>/interface_stats - single interface via HTTP POST
- <root-url>/interface_stats - all intfcs via HTTP GET."
-state: development
-properties: [API, CLI, MULTITHREAD]
diff --git a/src/plugins/builtinurl/builtins.c b/src/plugins/builtinurl/builtins.c
deleted file mode 100644
index b04e9dd5c7c..00000000000
--- a/src/plugins/builtinurl/builtins.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * Copyright (c) 2019 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/vnet.h>
-#include <builtinurl/builtinurl.h>
-#include <http_static/http_static.h>
-#include <vpp/app/version.h>
-
-hss_url_handler_rc_t
-handle_get_version (hss_url_handler_args_t *args)
-{
- u8 *s = 0;
-
- /* Build some json bullshit */
- s = format (s, "{\"vpp_details\": {");
- s = format (s, " \"version\": \"%s\",", VPP_BUILD_VER);
- s = format (s, " \"build_date\": \"%s\"}}\r\n", VPP_BUILD_DATE);
-
- args->data = s;
- args->data_len = vec_len (s);
- args->free_vec_data = 1;
- return HSS_URL_HANDLER_OK;
-}
-
-void
-trim_path_from_request (u8 * s, char *path)
-{
- u8 *cp;
- int trim_length = strlen (path) + 1 /* remove '?' */ ;
-
- /* Get rid of the path and question-mark */
- vec_delete (s, trim_length, 0);
-
- /* Tail trim irrelevant browser info */
- cp = s;
- while ((cp - s) < vec_len (s))
- {
- if (*cp == ' ')
- {
- /*
- * Makes request a vector which happens to look
- * like a c-string.
- */
- *cp = 0;
- vec_set_len (s, cp - s);
- break;
- }
- cp++;
- }
-}
-
-hss_url_handler_rc_t
-handle_get_interface_stats (hss_url_handler_args_t *args)
-{
- u8 *s = 0, *stats = 0;
- uword *p;
- u32 *sw_if_indices = 0;
- vnet_hw_interface_t *hi;
- vnet_sw_interface_t *si;
- char *q = "\"";
- int i;
- int need_comma = 0;
- u8 *format_vnet_sw_interface_cntrs (u8 * s, vnet_interface_main_t * im,
- vnet_sw_interface_t * si, int json);
- vnet_main_t *vnm = vnet_get_main ();
- vnet_interface_main_t *im = &vnm->interface_main;
-
- /* Get stats for a single interface via http POST */
- if (args->reqtype == HTTP_REQ_POST)
- {
- trim_path_from_request (args->request, "interface_stats.json");
-
- /* Find the sw_if_index */
- p = hash_get (im->hw_interface_by_name, args->request);
- if (!p)
- {
- s = format (s, "{\"interface_stats\": {[\n");
- s = format (s, " \"name\": \"%s\",", args->request);
- s = format (s, " \"error\": \"%s\"", "UnknownInterface");
- s = format (s, "]}\n");
- goto out;
- }
-
- vec_add1 (sw_if_indices, p[0]);
- }
- else /* default, HTTP_BUILTIN_METHOD_GET */
- {
- pool_foreach (hi, im->hw_interfaces)
- {
- vec_add1 (sw_if_indices, hi->sw_if_index);
- }
- }
-
- s = format (s, "{%sinterface_stats%s: [\n", q, q);
-
- for (i = 0; i < vec_len (sw_if_indices); i++)
- {
- si = vnet_get_sw_interface (vnm, sw_if_indices[i]);
- if (need_comma)
- s = format (s, ",\n");
-
- need_comma = 1;
-
- s = format (s, "{%sname%s: %s%U%s, ", q, q, q,
- format_vnet_sw_if_index_name, vnm, sw_if_indices[i], q);
-
- stats = format_vnet_sw_interface_cntrs (stats, &vnm->interface_main, si,
- 1 /* want json */ );
- if (vec_len (stats))
- s = format (s, "%v}", stats);
- else
- s = format (s, "%snone%s: %strue%s}", q, q, q, q);
- vec_reset_length (stats);
- }
-
- s = format (s, "]}\n");
-
-out:
- args->data = s;
- args->data_len = vec_len (s);
- args->free_vec_data = 1;
- vec_free (sw_if_indices);
- vec_free (stats);
- return HSS_URL_HANDLER_OK;
-}
-
-hss_url_handler_rc_t
-handle_get_interface_list (hss_url_handler_args_t *args)
-{
- u8 *s = 0;
- int i;
- vnet_main_t *vnm = vnet_get_main ();
- vnet_interface_main_t *im = &vnm->interface_main;
- vnet_hw_interface_t *hi;
- u32 *hw_if_indices = 0;
- int need_comma = 0;
-
- /* Construct vector of active hw_if_indexes ... */
- pool_foreach (hi, im->hw_interfaces)
- {
- /* No point in mentioning "local0"... */
- if (hi - im->hw_interfaces)
- vec_add1 (hw_if_indices, hi - im->hw_interfaces);
- }
-
- /* Build answer */
- s = format (s, "{\"interface_list\": [\n");
- for (i = 0; i < vec_len (hw_if_indices); i++)
- {
- if (need_comma)
- s = format (s, ",\n");
- hi = pool_elt_at_index (im->hw_interfaces, hw_if_indices[i]);
- s = format (s, "\"%v\"", hi->name);
- need_comma = 1;
- }
- s = format (s, "]}\n");
- vec_free (hw_if_indices);
-
- args->data = s;
- args->data_len = vec_len (s);
- args->free_vec_data = 1;
- return HSS_URL_HANDLER_OK;
-}
-
-void
-builtinurl_handler_init (builtinurl_main_t * bm)
-{
-
- bm->register_handler (handle_get_version, "version.json", HTTP_REQ_GET);
- bm->register_handler (handle_get_interface_list, "interface_list.json",
- HTTP_REQ_GET);
- bm->register_handler (handle_get_interface_stats, "interface_stats.json",
- HTTP_REQ_GET);
- bm->register_handler (handle_get_interface_stats, "interface_stats.json",
- HTTP_REQ_POST);
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/builtinurl/builtinurl.api b/src/plugins/builtinurl/builtinurl.api
deleted file mode 100644
index f292fd77a8e..00000000000
--- a/src/plugins/builtinurl/builtinurl.api
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * builtinurl.api - binary API skeleton
- *
- * Copyright (c) <current-year> <your-organization>
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file builtinurl.api
- * @brief VPP control-plane API messages.
- *
- * This file defines VPP control-plane binary API messages which are generally
- * called through a shared memory interface.
- */
-
-/* Version and type recitations */
-
-option version = "1.0.0";
-
-/** @brief API to enable / disable builtinurl on an interface
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param enable_disable - 1 to enable, 0 to disable the feature
- @param sw_if_index - interface handle
-*/
-
-autoreply define builtinurl_enable {
- /* Client identifier, set from api_main.my_client_index */
- u32 client_index;
-
- /* Arbitrary context, so client can match reply to request */
- u32 context;
-};
diff --git a/src/plugins/builtinurl/builtinurl.c b/src/plugins/builtinurl/builtinurl.c
deleted file mode 100644
index 749a2c93b8a..00000000000
--- a/src/plugins/builtinurl/builtinurl.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * builtinurl.c - skeleton vpp engine plug-in
- *
- * Copyright (c) 2019 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/vnet.h>
-#include <vnet/plugin/plugin.h>
-#include <builtinurl/builtinurl.h>
-
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
-#include <vpp/app/version.h>
-#include <stdbool.h>
-
-/* define message IDs */
-#include <builtinurl/builtinurl.api_enum.h>
-#include <builtinurl/builtinurl.api_types.h>
-
-#define REPLY_MSG_ID_BASE bmp->msg_id_base
-#include <vlibapi/api_helper_macros.h>
-
-builtinurl_main_t builtinurl_main;
-
-/* Action function shared between message handler and debug CLI */
-
-int
-builtinurl_enable (builtinurl_main_t * bmp)
-{
- void (*fp) (void *, char *, int);
-
- if (bmp->initialized)
- return 0;
-
- /* Look up the builtin URL registration handler */
- fp = vlib_get_plugin_symbol
- ("http_static_plugin.so", "http_static_server_register_builtin_handler");
-
- /* Most likely, the http_static plugin isn't loaded. Done. */
- if (fp == 0)
- return VNET_API_ERROR_NO_SUCH_TABLE;
-
- bmp->register_handler = fp;
- builtinurl_handler_init (bmp);
- bmp->initialized = 1;
-
- return 0;
-}
-
-static clib_error_t *
-builtinurl_enable_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- builtinurl_main_t *bmp = &builtinurl_main;
-
- int rv;
-
- rv = builtinurl_enable (bmp);
-
- switch (rv)
- {
- case 0:
- break;
-
- case VNET_API_ERROR_NO_SUCH_TABLE:
- return clib_error_return
- (0, "http_static_server_register_builtin_handler undefined");
- break;
-
- default:
- return clib_error_return (0, "builtinurl_enable returned %d", rv);
- }
- return 0;
-}
-
-VLIB_CLI_COMMAND (builtinurl_enable_command, static) =
-{
- .path = "builtinurl enable",
- .short_help = "Turn on builtin http/https GET and POST urls",
- .function = builtinurl_enable_command_fn,
-};
-
-/* API message handler */
-static void vl_api_builtinurl_enable_t_handler
- (vl_api_builtinurl_enable_t * mp)
-{
- vl_api_builtinurl_enable_reply_t *rmp;
- builtinurl_main_t *bmp = &builtinurl_main;
- int rv;
-
- rv = builtinurl_enable (bmp);
-
- REPLY_MACRO (VL_API_BUILTINURL_ENABLE_REPLY);
-}
-
-#include <builtinurl/builtinurl.api.c>
-static clib_error_t *
-builtinurl_init (vlib_main_t * vm)
-{
- builtinurl_main_t *bmp = &builtinurl_main;
-
- bmp->vlib_main = vm;
- bmp->vnet_main = vnet_get_main ();
-
- /* Ask for a correctly-sized block of API message decode slots */
- bmp->msg_id_base = setup_message_id_table ();
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (builtinurl_init);
-
-VLIB_PLUGIN_REGISTER () =
-{
- .version = VPP_BUILD_VER,
- .description = "vpp built-in URL support",
-};
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/builtinurl/builtinurl.h b/src/plugins/builtinurl/builtinurl.h
deleted file mode 100644
index 91302c1eee5..00000000000
--- a/src/plugins/builtinurl/builtinurl.h
+++ /dev/null
@@ -1,57 +0,0 @@
-
-/*
- * builtinurl.h - built-in URLs for the http static server
- *
- * Copyright (c) 2019 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __included_builtinurl_h__
-#define __included_builtinurl_h__
-
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-
-#include <vppinfra/hash.h>
-#include <vppinfra/error.h>
-
-typedef struct
-{
- /* API message ID base */
- u16 msg_id_base;
-
- /* GET / POST handler registration function */
- void (*register_handler) (void *, char *, int);
-
- /* Been there, done that */
- int initialized;
-
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
- ethernet_main_t *ethernet_main;
-} builtinurl_main_t;
-
-extern builtinurl_main_t builtinurl_main;
-
-void builtinurl_handler_init (builtinurl_main_t * bm);
-
-#endif /* __included_builtinurl_h__ */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/builtinurl/builtinurl_test.c b/src/plugins/builtinurl/builtinurl_test.c
deleted file mode 100644
index 9edfb81c525..00000000000
--- a/src/plugins/builtinurl/builtinurl_test.c
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * builtinurl.c - skeleton vpp-api-test plug-in
- *
- * Copyright (c) 2019 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vat/vat.h>
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
-#include <vppinfra/error.h>
-#include <stdbool.h>
-
-uword unformat_sw_if_index (unformat_input_t * input, va_list * args);
-
-/* Declare message IDs */
-#include <builtinurl/builtinurl.api_enum.h>
-#include <builtinurl/builtinurl.api_types.h>
-
-typedef struct
-{
- /* API message ID base */
- u16 msg_id_base;
- vat_main_t *vat_main;
-} builtinurl_test_main_t;
-
-builtinurl_test_main_t builtinurl_test_main;
-
-#define __plugin_msg_base builtinurl_test_main.msg_id_base
-#include <vlibapi/vat_helper_macros.h>
-
-static int
-api_builtinurl_enable (vat_main_t * vam)
-{
- vl_api_builtinurl_enable_t *mp;
- int ret;
-
- /* Construct the API message */
- M (BUILTINURL_ENABLE, mp);
-
- /* send it... */
- S (mp);
-
- /* Wait for a reply... */
- W (ret);
- return ret;
-}
-
-#include <builtinurl/builtinurl.api_test.c>
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/crypto_native/FEATURE.yaml b/src/plugins/crypto_native/FEATURE.yaml
index 06f26d4a8cf..d54816d673f 100644
--- a/src/plugins/crypto_native/FEATURE.yaml
+++ b/src/plugins/crypto_native/FEATURE.yaml
@@ -4,6 +4,9 @@ maintainer: Damjan Marion <damarion@cisco.com>
features:
- CBC(128, 192, 256)
- GCM(128, 192, 256)
+ - CTR(128, 192, 256)
+ - SHA(224, 256)
+ - HMAC-SHA(224, 256)
description: "An implementation of a native crypto-engine"
state: production
diff --git a/src/plugins/crypto_native/aes_cbc.c b/src/plugins/crypto_native/aes_cbc.c
index dd7ca3f1cf1..c981897783f 100644
--- a/src/plugins/crypto_native/aes_cbc.c
+++ b/src/plugins/crypto_native/aes_cbc.c
@@ -25,191 +25,40 @@
#pragma GCC optimize ("O3")
#endif
-#if defined(__VAES__) && defined(__AVX512F__)
-#define u8xN u8x64
-#define u32xN u32x16
-#define u32xN_min_scalar u32x16_min_scalar
-#define u32xN_is_all_zero u32x16_is_all_zero
-#define u32xN_splat u32x16_splat
-#elif defined(__VAES__)
-#define u8xN u8x32
-#define u32xN u32x8
-#define u32xN_min_scalar u32x8_min_scalar
-#define u32xN_is_all_zero u32x8_is_all_zero
-#define u32xN_splat u32x8_splat
-#else
-#define u8xN u8x16
-#define u32xN u32x4
-#define u32xN_min_scalar u32x4_min_scalar
-#define u32xN_is_all_zero u32x4_is_all_zero
-#define u32xN_splat u32x4_splat
-#endif
+#define CRYPTO_NATIVE_AES_CBC_ENC_VEC_SIZE 256
static_always_inline u32
aes_ops_enc_aes_cbc (vlib_main_t * vm, vnet_crypto_op_t * ops[],
u32 n_ops, aes_key_size_t ks)
{
crypto_native_main_t *cm = &crypto_native_main;
- int rounds = AES_KEY_ROUNDS (ks);
- u8 placeholder[8192];
- u32 i, j, count, n_left = n_ops;
- u32xN placeholder_mask = { };
- u32xN len = { };
- vnet_crypto_key_index_t key_index[4 * N_AES_LANES];
- u8 *src[4 * N_AES_LANES] = {};
- u8 *dst[4 * N_AES_LANES] = {};
- u8xN r[4] = {};
- u8xN k[15][4] = {};
-
- for (i = 0; i < 4 * N_AES_LANES; i++)
- key_index[i] = ~0;
-
-more:
- for (i = 0; i < 4 * N_AES_LANES; i++)
- if (len[i] == 0)
- {
- if (n_left == 0)
- {
- /* no more work to enqueue, so we are enqueueing placeholder buffer */
- src[i] = dst[i] = placeholder;
- len[i] = sizeof (placeholder);
- placeholder_mask[i] = 0;
- }
- else
- {
- u8x16 t = aes_block_load (ops[0]->iv);
- ((u8x16 *) r)[i] = t;
-
- src[i] = ops[0]->src;
- dst[i] = ops[0]->dst;
- len[i] = ops[0]->len;
- placeholder_mask[i] = ~0;
- if (key_index[i] != ops[0]->key_index)
- {
- aes_cbc_key_data_t *kd;
- key_index[i] = ops[0]->key_index;
- kd = (aes_cbc_key_data_t *) cm->key_data[key_index[i]];
- for (j = 0; j < rounds + 1; j++)
- ((u8x16 *) k[j])[i] = kd->encrypt_key[j];
- }
- ops[0]->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
- n_left--;
- ops++;
- }
- }
-
- count = u32xN_min_scalar (len);
-
- ASSERT (count % 16 == 0);
-
- for (i = 0; i < count; i += 16)
+ u32 i, n_left = n_ops;
+ uword key_indices[CRYPTO_NATIVE_AES_CBC_ENC_VEC_SIZE] = {};
+ u8 *plaintext[CRYPTO_NATIVE_AES_CBC_ENC_VEC_SIZE] = {};
+ uword oplen[CRYPTO_NATIVE_AES_CBC_ENC_VEC_SIZE] = {};
+ u8 *iv[CRYPTO_NATIVE_AES_CBC_ENC_VEC_SIZE] = {};
+ u8 *ciphertext[CRYPTO_NATIVE_AES_CBC_ENC_VEC_SIZE] = {};
+
+ while (n_left)
{
-#if defined(__VAES__) && defined(__AVX512F__)
- r[0] = u8x64_xor3 (r[0], aes_block_load_x4 (src, i), k[0][0]);
- r[1] = u8x64_xor3 (r[1], aes_block_load_x4 (src + 4, i), k[0][1]);
- r[2] = u8x64_xor3 (r[2], aes_block_load_x4 (src + 8, i), k[0][2]);
- r[3] = u8x64_xor3 (r[3], aes_block_load_x4 (src + 12, i), k[0][3]);
-
- for (j = 1; j < rounds; j++)
+ i = 0;
+ while (n_left && i < CRYPTO_NATIVE_AES_CBC_ENC_VEC_SIZE)
{
- r[0] = aes_enc_round_x4 (r[0], k[j][0]);
- r[1] = aes_enc_round_x4 (r[1], k[j][1]);
- r[2] = aes_enc_round_x4 (r[2], k[j][2]);
- r[3] = aes_enc_round_x4 (r[3], k[j][3]);
+ key_indices[i] = ops[0]->key_index;
+ plaintext[i] = ops[0]->src;
+ ciphertext[i] = ops[0]->dst;
+ oplen[i] = ops[0]->len;
+ iv[i] = ops[0]->iv;
+ ops[0]->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
+
+ ops++;
+ n_left--;
+ i++;
}
- r[0] = aes_enc_last_round_x4 (r[0], k[j][0]);
- r[1] = aes_enc_last_round_x4 (r[1], k[j][1]);
- r[2] = aes_enc_last_round_x4 (r[2], k[j][2]);
- r[3] = aes_enc_last_round_x4 (r[3], k[j][3]);
-
- aes_block_store_x4 (dst, i, r[0]);
- aes_block_store_x4 (dst + 4, i, r[1]);
- aes_block_store_x4 (dst + 8, i, r[2]);
- aes_block_store_x4 (dst + 12, i, r[3]);
-#elif defined(__VAES__)
- r[0] = u8x32_xor3 (r[0], aes_block_load_x2 (src, i), k[0][0]);
- r[1] = u8x32_xor3 (r[1], aes_block_load_x2 (src + 2, i), k[0][1]);
- r[2] = u8x32_xor3 (r[2], aes_block_load_x2 (src + 4, i), k[0][2]);
- r[3] = u8x32_xor3 (r[3], aes_block_load_x2 (src + 6, i), k[0][3]);
-
- for (j = 1; j < rounds; j++)
- {
- r[0] = aes_enc_round_x2 (r[0], k[j][0]);
- r[1] = aes_enc_round_x2 (r[1], k[j][1]);
- r[2] = aes_enc_round_x2 (r[2], k[j][2]);
- r[3] = aes_enc_round_x2 (r[3], k[j][3]);
- }
- r[0] = aes_enc_last_round_x2 (r[0], k[j][0]);
- r[1] = aes_enc_last_round_x2 (r[1], k[j][1]);
- r[2] = aes_enc_last_round_x2 (r[2], k[j][2]);
- r[3] = aes_enc_last_round_x2 (r[3], k[j][3]);
-
- aes_block_store_x2 (dst, i, r[0]);
- aes_block_store_x2 (dst + 2, i, r[1]);
- aes_block_store_x2 (dst + 4, i, r[2]);
- aes_block_store_x2 (dst + 6, i, r[3]);
-#else
-#if __x86_64__
- r[0] = u8x16_xor3 (r[0], aes_block_load (src[0] + i), k[0][0]);
- r[1] = u8x16_xor3 (r[1], aes_block_load (src[1] + i), k[0][1]);
- r[2] = u8x16_xor3 (r[2], aes_block_load (src[2] + i), k[0][2]);
- r[3] = u8x16_xor3 (r[3], aes_block_load (src[3] + i), k[0][3]);
-
- for (j = 1; j < rounds; j++)
- {
- r[0] = aes_enc_round_x1 (r[0], k[j][0]);
- r[1] = aes_enc_round_x1 (r[1], k[j][1]);
- r[2] = aes_enc_round_x1 (r[2], k[j][2]);
- r[3] = aes_enc_round_x1 (r[3], k[j][3]);
- }
-
- r[0] = aes_enc_last_round_x1 (r[0], k[j][0]);
- r[1] = aes_enc_last_round_x1 (r[1], k[j][1]);
- r[2] = aes_enc_last_round_x1 (r[2], k[j][2]);
- r[3] = aes_enc_last_round_x1 (r[3], k[j][3]);
-
- aes_block_store (dst[0] + i, r[0]);
- aes_block_store (dst[1] + i, r[1]);
- aes_block_store (dst[2] + i, r[2]);
- aes_block_store (dst[3] + i, r[3]);
-#else
- r[0] ^= aes_block_load (src[0] + i);
- r[1] ^= aes_block_load (src[1] + i);
- r[2] ^= aes_block_load (src[2] + i);
- r[3] ^= aes_block_load (src[3] + i);
- for (j = 0; j < rounds - 1; j++)
- {
- r[0] = vaesmcq_u8 (vaeseq_u8 (r[0], k[j][0]));
- r[1] = vaesmcq_u8 (vaeseq_u8 (r[1], k[j][1]));
- r[2] = vaesmcq_u8 (vaeseq_u8 (r[2], k[j][2]));
- r[3] = vaesmcq_u8 (vaeseq_u8 (r[3], k[j][3]));
- }
- r[0] = vaeseq_u8 (r[0], k[j][0]) ^ k[rounds][0];
- r[1] = vaeseq_u8 (r[1], k[j][1]) ^ k[rounds][1];
- r[2] = vaeseq_u8 (r[2], k[j][2]) ^ k[rounds][2];
- r[3] = vaeseq_u8 (r[3], k[j][3]) ^ k[rounds][3];
- aes_block_store (dst[0] + i, r[0]);
- aes_block_store (dst[1] + i, r[1]);
- aes_block_store (dst[2] + i, r[2]);
- aes_block_store (dst[3] + i, r[3]);
-#endif
-#endif
+ clib_aes_cbc_encrypt_multi ((aes_cbc_key_data_t **) cm->key_data,
+ key_indices, plaintext, oplen, iv, ks,
+ ciphertext, i);
}
-
- len -= u32xN_splat (count);
-
- for (i = 0; i < 4 * N_AES_LANES; i++)
- {
- src[i] += count;
- dst[i] += count;
- }
-
- if (n_left > 0)
- goto more;
-
- if (!u32xN_is_all_zero (len & placeholder_mask))
- goto more;
-
return n_ops;
}
diff --git a/src/plugins/crypto_native/sha2.c b/src/plugins/crypto_native/sha2.c
index 459ce6d8e79..6787f629104 100644
--- a/src/plugins/crypto_native/sha2.c
+++ b/src/plugins/crypto_native/sha2.c
@@ -118,13 +118,25 @@ sha2_key_add (vnet_crypto_key_t *key, clib_sha2_type_t type)
static int
probe ()
{
-#if defined(__SHA__) && defined(__x86_64__)
+#if defined(__x86_64__)
+
+#if defined(__SHA__) && defined(__AVX512F__)
+ if (clib_cpu_supports_sha () && clib_cpu_supports_avx512f ())
+ return 30;
+#elif defined(__SHA__) && defined(__AVX2__)
+ if (clib_cpu_supports_sha () && clib_cpu_supports_avx2 ())
+ return 20;
+#elif defined(__SHA__)
if (clib_cpu_supports_sha ())
- return 50;
-#elif defined(__ARM_FEATURE_SHA2)
+ return 10;
+#endif
+
+#elif defined(__aarch64__)
+#if defined(__ARM_FEATURE_SHA2)
if (clib_cpu_supports_sha2 ())
return 10;
#endif
+#endif
return -1;
}
diff --git a/src/plugins/crypto_openssl/main.c b/src/plugins/crypto_openssl/main.c
index b070cf336a5..c59b5d34a29 100644
--- a/src/plugins/crypto_openssl/main.c
+++ b/src/plugins/crypto_openssl/main.c
@@ -219,6 +219,17 @@ openssl_ops_enc_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[],
vnet_crypto_op_t *op = ops[i];
int len = 0;
+ if (i + 2 < n_ops)
+ {
+ CLIB_PREFETCH (ops[i + 1]->src, 4 * CLIB_CACHE_PREFETCH_BYTES, LOAD);
+ CLIB_PREFETCH (ops[i + 1]->dst, 4 * CLIB_CACHE_PREFETCH_BYTES,
+ STORE);
+
+ CLIB_PREFETCH (ops[i + 2]->src, 4 * CLIB_CACHE_PREFETCH_BYTES, LOAD);
+ CLIB_PREFETCH (ops[i + 2]->dst, 4 * CLIB_CACHE_PREFETCH_BYTES,
+ STORE);
+ }
+
ctx = ptd->evp_cipher_enc_ctx[op->key_index];
EVP_EncryptInit_ex (ctx, 0, 0, NULL, op->iv);
if (op->aad_len)
diff --git a/src/plugins/dev_armada/CMakeLists.txt b/src/plugins/dev_armada/CMakeLists.txt
new file mode 100644
index 00000000000..e755e7bdd46
--- /dev/null
+++ b/src/plugins/dev_armada/CMakeLists.txt
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright(c) 2022 Cisco Systems, Inc.
+
+
+find_path(MUSDK_INCLUDE_DIR NAMES mv_std.h)
+find_library(MUSDK_LIB NAMES libmusdk.a)
+
+if(NOT MUSDK_INCLUDE_DIR OR NOT MUSDK_LIB)
+ message(WARNING "Marvell MUSDK not found - dev_armada plugin disabled")
+ return()
+endif()
+
+get_filename_component(MUSDK_LIB_DIR ${MUSDK_LIB} DIRECTORY)
+set(MUSDK_LINK_FLAGS "-Wl,--whole-archive,${MUSDK_LIB_DIR}/libmusdk.a,--no-whole-archive")
+
+add_vpp_plugin(dev_armada
+ SOURCES
+ plugin.c
+ pp2/counters.c
+ pp2/init.c
+ pp2/format.c
+ pp2/port.c
+ pp2/queue.c
+ pp2/rx.c
+ pp2/tx.c
+
+ LINK_FLAGS
+ ${MUSDK_LINK_FLAGS}
+)
+include_directories(${MUSDK_INCLUDE_DIR})
+
diff --git a/src/plugins/dev_armada/README.rst b/src/plugins/dev_armada/README.rst
new file mode 100644
index 00000000000..2c757d04a06
--- /dev/null
+++ b/src/plugins/dev_armada/README.rst
@@ -0,0 +1,61 @@
+Armada device plugin
+=====================
+
+Overview
+--------
+
+This plugins provides native device support for Marvell PP2 network
+device, found in Marvel Armada family of SOCs.
+It uses Marvell Usermode SDK
+(`MUSDK <https://github.com/MarvellEmbeddedProcessors/musdk-marvell>`__).
+
+Prerequisites
+-------------
+
+Plugins depends on installed MUSDK and Marvell provided linux in Marvell SDK.
+Following kernel modules from MUSDK must be loaded for plugin to work:
+``musdk_cma.ko``
+``mv_pp_uio.ko``
+
+Musdk 18.09.3 compilation steps
+-------------------------------
+
+::
+
+ ./bootstrap
+ ./configure --prefix=/opt/vpp/external/aarch64/ CFLAGS="-Wno-error=unused-result -g -fPIC" --enable-shared=no
+ sed -i -e 's/marvell,mv-pp-uio/generic-uio/' modules/pp2/mv_pp_uio.c
+ sed -i -e 's/O_CREAT/O_CREAT, S_IRUSR | S_IWUSR/' src/lib/file_utils.c
+ make
+ sudo make install
+
+Usage
+-----
+
+Interface Creation and Deletion
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Interfaces are using new vnet dev APIs, CLIs or startup.conf to create and
+delete interfaces.
+
+Sample startup.conf:
+
+::
+
+ devices {
+ dev platform/f2000000.ethernet {
+ port 1 { name ppio1 }
+ }
+
+Device identifier in this example is 'platform/f2000000.ethernet' where
+'platform' is bus name and 'f2000000.ethernet' is linux platform bus
+identifier for specific PP2.
+
+Platform identifier can be found in sysfs:
+
+::
+
+ $ ls /sys/bus/platform/devices | grep ethernet
+ f2000000.ethernet
+
+
diff --git a/src/plugins/dev_armada/musdk.h b/src/plugins/dev_armada/musdk.h
new file mode 100644
index 00000000000..aad2f4a1cef
--- /dev/null
+++ b/src/plugins/dev_armada/musdk.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _MUSDK_H_
+#define _MUSDK_H_
+
+#define MVCONF_DBG_LEVEL 0
+#define MVCONF_PP2_BPOOL_COOKIE_SIZE 32
+#define MVCONF_PP2_BPOOL_DMA_ADDR_SIZE 64
+#define MVCONF_DMA_PHYS_ADDR_T_SIZE 64
+#define MVCONF_SYS_DMA_UIO
+#define MVCONF_TYPES_PUBLIC
+#define MVCONF_DMA_PHYS_ADDR_T_PUBLIC
+
+#include <mv_std.h>
+#include <env/mv_sys_dma.h>
+#include <drivers/mv_pp2.h>
+#include <drivers/mv_pp2_bpool.h>
+#include <drivers/mv_pp2_ppio.h>
+
+#endif /* _MUSDK_H_ */
diff --git a/src/plugins/dev_armada/plugin.c b/src/plugins/dev_armada/plugin.c
new file mode 100644
index 00000000000..1dc465c9a25
--- /dev/null
+++ b/src/plugins/dev_armada/plugin.c
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Marvell Armada Drivers",
+};
diff --git a/src/plugins/dev_armada/pp2/counters.c b/src/plugins/dev_armada/pp2/counters.c
new file mode 100644
index 00000000000..a041138bc79
--- /dev/null
+++ b/src/plugins/dev_armada/pp2/counters.c
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/bus/platform.h>
+#include <vppinfra/ring.h>
+#include <dev_armada/musdk.h>
+#include <dev_armada/pp2/pp2.h>
+
+VLIB_REGISTER_LOG_CLASS (mvpp2_log, static) = {
+ .class_name = "armada",
+ .subclass_name = "pp2-counters",
+};
+
+typedef enum
+{
+ MVPP2_PORT_CTR_RX_BYTES,
+ MVPP2_PORT_CTR_RX_PACKETS,
+ MVPP2_PORT_CTR_RX_UCAST,
+ MVPP2_PORT_CTR_RX_ERRORS,
+ MVPP2_PORT_CTR_RX_FULLQ_DROPPED,
+ MVPP2_PORT_CTR_RX_BM_DROPPED,
+ MVPP2_PORT_CTR_RX_EARLY_DROPPED,
+ MVPP2_PORT_CTR_RX_FIFO_DROPPED,
+ MVPP2_PORT_CTR_RX_CLS_DROPPED,
+
+ MVPP2_PORT_CTR_TX_BYTES,
+ MVPP2_PORT_CTR_TX_PACKETS,
+ MVPP2_PORT_CTR_TX_UCAST,
+ MVPP2_PORT_CTR_TX_ERRORS,
+} mvpp2_port_counter_id_t;
+
+typedef enum
+{
+ MVPP2_RXQ_CTR_ENQ_DESC,
+ MVPP2_RXQ_CTR_DROP_FULLQ,
+ MVPP2_RXQ_CTR_DROP_EARLY,
+ MVPP2_RXQ_CTR_DROP_BM,
+} mvpp2_rxq_counter_id_t;
+
+typedef enum
+{
+ MVPP2_TXQ_CTR_ENQ_DESC,
+ MVPP2_TXQ_CTR_ENQ_DEC_TO_DDR,
+ MVPP2_TXQ_CTR_ENQ_BUF_TO_DDR,
+ MVPP2_TXQ_CTR_DEQ_DESC,
+} mvpp2_txq_counter_id_t;
+
+static vnet_dev_counter_t mvpp2_port_counters[] = {
+ VNET_DEV_CTR_RX_BYTES (MVPP2_PORT_CTR_RX_BYTES),
+ VNET_DEV_CTR_RX_PACKETS (MVPP2_PORT_CTR_RX_PACKETS),
+ VNET_DEV_CTR_RX_DROPS (MVPP2_PORT_CTR_RX_ERRORS),
+ VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_UCAST, RX, PACKETS, "unicast"),
+ VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_FULLQ_DROPPED, RX, PACKETS,
+ "fullq dropped"),
+ VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_BM_DROPPED, RX, PACKETS,
+ "bm dropped"),
+ VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_EARLY_DROPPED, RX, PACKETS,
+ "early dropped"),
+ VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_FIFO_DROPPED, RX, PACKETS,
+ "fifo dropped"),
+ VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_RX_CLS_DROPPED, RX, PACKETS,
+ "cls dropped"),
+
+ VNET_DEV_CTR_TX_BYTES (MVPP2_PORT_CTR_TX_BYTES),
+ VNET_DEV_CTR_TX_PACKETS (MVPP2_PORT_CTR_TX_PACKETS),
+ VNET_DEV_CTR_TX_DROPS (MVPP2_PORT_CTR_TX_ERRORS),
+ VNET_DEV_CTR_VENDOR (MVPP2_PORT_CTR_TX_UCAST, TX, PACKETS, "unicast"),
+};
+
+static vnet_dev_counter_t mvpp2_rxq_counters[] = {
+ VNET_DEV_CTR_VENDOR (MVPP2_RXQ_CTR_ENQ_DESC, RX, DESCRIPTORS, "enqueued"),
+ VNET_DEV_CTR_VENDOR (MVPP2_RXQ_CTR_DROP_FULLQ, RX, PACKETS, "drop fullQ"),
+ VNET_DEV_CTR_VENDOR (MVPP2_RXQ_CTR_DROP_EARLY, RX, PACKETS, "drop early"),
+ VNET_DEV_CTR_VENDOR (MVPP2_RXQ_CTR_DROP_BM, RX, PACKETS, "drop BM"),
+};
+
+static vnet_dev_counter_t mvpp2_txq_counters[] = {
+ VNET_DEV_CTR_VENDOR (MVPP2_TXQ_CTR_ENQ_DESC, TX, DESCRIPTORS, "enqueued"),
+ VNET_DEV_CTR_VENDOR (MVPP2_TXQ_CTR_DEQ_DESC, TX, PACKETS, "dequeued"),
+ VNET_DEV_CTR_VENDOR (MVPP2_TXQ_CTR_ENQ_BUF_TO_DDR, TX, BUFFERS,
+ "enq to DDR"),
+ VNET_DEV_CTR_VENDOR (MVPP2_TXQ_CTR_ENQ_DEC_TO_DDR, TX, DESCRIPTORS,
+ "enq to DDR"),
+};
+
+void
+mvpp2_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_port_add_counters (vm, port, mvpp2_port_counters,
+ ARRAY_LEN (mvpp2_port_counters));
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ vnet_dev_rx_queue_add_counters (vm, q, mvpp2_rxq_counters,
+ ARRAY_LEN (mvpp2_rxq_counters));
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ vnet_dev_tx_queue_add_counters (vm, q, mvpp2_txq_counters,
+ ARRAY_LEN (mvpp2_txq_counters));
+}
+
+void
+mvpp2_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ mvpp2_port_t *mp = vnet_dev_get_port_data (port);
+ struct pp2_ppio_statistics stats;
+ pp2_ppio_get_statistics (mp->ppio, &stats, 1);
+}
+
+void
+mvpp2_rxq_clear_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *q)
+{
+ mvpp2_port_t *mp = vnet_dev_get_port_data (q->port);
+ struct pp2_ppio_inq_statistics stats;
+ pp2_ppio_inq_get_statistics (mp->ppio, 0, q->queue_id, &stats, 1);
+}
+
+void
+mvpp2_txq_clear_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *q)
+{
+ mvpp2_port_t *mp = vnet_dev_get_port_data (q->port);
+ struct pp2_ppio_inq_statistics stats;
+ pp2_ppio_inq_get_statistics (mp->ppio, 0, q->queue_id, &stats, 1);
+}
+
+vnet_dev_rv_t
+mvpp2_port_get_stats (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ mvpp2_port_t *mp = vnet_dev_get_port_data (port);
+ struct pp2_ppio_statistics stats;
+ pp2_ppio_get_statistics (mp->ppio, &stats, 0);
+
+ foreach_vnet_dev_counter (c, port->counter_main)
+ {
+ switch (c->user_data)
+ {
+ case MVPP2_PORT_CTR_RX_BYTES:
+ vnet_dev_counter_value_update (vm, c, stats.rx_bytes);
+ break;
+ case MVPP2_PORT_CTR_RX_PACKETS:
+ vnet_dev_counter_value_update (vm, c, stats.rx_packets);
+ break;
+ case MVPP2_PORT_CTR_RX_UCAST:
+ vnet_dev_counter_value_update (vm, c, stats.rx_unicast_packets);
+ break;
+ case MVPP2_PORT_CTR_RX_ERRORS:
+ vnet_dev_counter_value_update (vm, c, stats.rx_errors);
+ break;
+ case MVPP2_PORT_CTR_TX_BYTES:
+ vnet_dev_counter_value_update (vm, c, stats.tx_bytes);
+ break;
+ case MVPP2_PORT_CTR_TX_PACKETS:
+ vnet_dev_counter_value_update (vm, c, stats.tx_packets);
+ break;
+ case MVPP2_PORT_CTR_TX_UCAST:
+ vnet_dev_counter_value_update (vm, c, stats.tx_unicast_packets);
+ break;
+ case MVPP2_PORT_CTR_TX_ERRORS:
+ vnet_dev_counter_value_update (vm, c, stats.tx_errors);
+ break;
+ case MVPP2_PORT_CTR_RX_FULLQ_DROPPED:
+ vnet_dev_counter_value_update (vm, c, stats.rx_fullq_dropped);
+ break;
+ case MVPP2_PORT_CTR_RX_BM_DROPPED:
+ vnet_dev_counter_value_update (vm, c, stats.rx_bm_dropped);
+ break;
+ case MVPP2_PORT_CTR_RX_EARLY_DROPPED:
+ vnet_dev_counter_value_update (vm, c, stats.rx_early_dropped);
+ break;
+ case MVPP2_PORT_CTR_RX_FIFO_DROPPED:
+ vnet_dev_counter_value_update (vm, c, stats.rx_fifo_dropped);
+ break;
+ case MVPP2_PORT_CTR_RX_CLS_DROPPED:
+ vnet_dev_counter_value_update (vm, c, stats.rx_cls_dropped);
+ break;
+
+ default:
+ ASSERT (0);
+ }
+ }
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ struct pp2_ppio_inq_statistics stats;
+ pp2_ppio_inq_get_statistics (mp->ppio, 0, q->queue_id, &stats, 0);
+
+ foreach_vnet_dev_counter (c, q->counter_main)
+ {
+ switch (c->user_data)
+ {
+ case MVPP2_RXQ_CTR_ENQ_DESC:
+ vnet_dev_counter_value_update (vm, c, stats.enq_desc);
+ break;
+ case MVPP2_RXQ_CTR_DROP_BM:
+ vnet_dev_counter_value_update (vm, c, stats.drop_bm);
+ break;
+ case MVPP2_RXQ_CTR_DROP_EARLY:
+ vnet_dev_counter_value_update (vm, c, stats.drop_early);
+ break;
+ case MVPP2_RXQ_CTR_DROP_FULLQ:
+ vnet_dev_counter_value_update (vm, c, stats.drop_fullq);
+ break;
+ default:
+ ASSERT (0);
+ }
+ }
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ struct pp2_ppio_outq_statistics stats;
+ pp2_ppio_outq_get_statistics (mp->ppio, q->queue_id, &stats, 0);
+
+ foreach_vnet_dev_counter (c, q->counter_main)
+ {
+ switch (c->user_data)
+ {
+ case MVPP2_TXQ_CTR_ENQ_DESC:
+ vnet_dev_counter_value_update (vm, c, stats.enq_desc);
+ break;
+ case MVPP2_TXQ_CTR_DEQ_DESC:
+ vnet_dev_counter_value_update (vm, c, stats.deq_desc);
+ break;
+ case MVPP2_TXQ_CTR_ENQ_BUF_TO_DDR:
+ vnet_dev_counter_value_update (vm, c, stats.enq_buf_to_ddr);
+ break;
+ case MVPP2_TXQ_CTR_ENQ_DEC_TO_DDR:
+ vnet_dev_counter_value_update (vm, c, stats.enq_dec_to_ddr);
+ break;
+ default:
+ ASSERT (0);
+ }
+ }
+ }
+
+ return VNET_DEV_OK;
+}
diff --git a/src/plugins/dev_armada/pp2/format.c b/src/plugins/dev_armada/pp2/format.c
new file mode 100644
index 00000000000..42c4114c512
--- /dev/null
+++ b/src/plugins/dev_armada/pp2/format.c
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/bus/platform.h>
+#include <dev_armada/musdk.h>
+#include <dev_armada/pp2/pp2.h>
+
+static inline u32
+mrvl_get_u32_bits (void *start, int offset, int first, int last)
+{
+ u32 value = *(u32 *) (((u8 *) start) + offset);
+ if ((last == 0) && (first == 31))
+ return value;
+ value >>= last;
+ value &= (1 << (first - last + 1)) - 1;
+ return value;
+}
+
+u8 *
+format_pp2_ppio_link_info (u8 *s, va_list *args)
+{
+ struct pp2_ppio_link_info *li = va_arg (*args, struct pp2_ppio_link_info *);
+
+ char *port_duplex[] = {
+ [MV_NET_LINK_DUPLEX_HALF] = "half",
+ [MV_NET_LINK_DUPLEX_FULL] = "full",
+ };
+
+ u32 port_speeds[] = {
+ [MV_NET_LINK_SPEED_10] = 10, [MV_NET_LINK_SPEED_100] = 100,
+ [MV_NET_LINK_SPEED_1000] = 1000, [MV_NET_LINK_SPEED_2500] = 2500,
+ [MV_NET_LINK_SPEED_10000] = 10000,
+ };
+
+ char *port_phy_modes[] = {
+ [MV_NET_PHY_MODE_NONE] = "NONE",
+ [MV_NET_PHY_MODE_MII] = "MII",
+ [MV_NET_PHY_MODE_GMII] = "GMII",
+ [MV_NET_PHY_MODE_SGMII] = "SGMII",
+ [MV_NET_PHY_MODE_TBI] = "TBI",
+ [MV_NET_PHY_MODE_REVMII] = "REVMII",
+ [MV_NET_PHY_MODE_RMII] = "RMII",
+ [MV_NET_PHY_MODE_RGMII] = "RGMII",
+ [MV_NET_PHY_MODE_RGMII_ID] = "RGMII_ID",
+ [MV_NET_PHY_MODE_RGMII_RXID] = "RGMII_RXID",
+ [MV_NET_PHY_MODE_RGMII_TXID] = "RGMII_TXID",
+ [MV_NET_PHY_MODE_RTBI] = "RTBI",
+ [MV_NET_PHY_MODE_SMII] = "SMII",
+ [MV_NET_PHY_MODE_XGMII] = "XGMII",
+ [MV_NET_PHY_MODE_MOCA] = "MOCA",
+ [MV_NET_PHY_MODE_QSGMII] = "QSGMII",
+ [MV_NET_PHY_MODE_XAUI] = "XAUI",
+ [MV_NET_PHY_MODE_RXAUI] = "RXAUI",
+ [MV_NET_PHY_MODE_KR] = "KR",
+ };
+
+ s =
+ format (s, "duplex %s speed %d up %d phy_mode %s", port_duplex[li->duplex],
+ port_speeds[li->speed], li->up, port_phy_modes[li->phy_mode]);
+
+ return s;
+}
+
+u8 *
+format_mvpp2_port_status (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *);
+ mvpp2_port_t *mp = vnet_dev_get_port_data (port);
+ struct pp2_ppio_link_info li = {};
+
+ if (mp->ppio == 0 || pp2_ppio_get_link_info (mp->ppio, &li))
+ return format (s, "link info not available");
+
+ return format (s, "%U", format_pp2_ppio_link_info, &li);
+}
+
+u8 *
+format_mvpp2_dev_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ mvpp2_device_t *md = vnet_dev_get_data (dev);
+
+ format (s, "pp_id is %u", md->pp_id);
+ return s;
+}
+
+#define foreach_pp2_rx_desc_field \
+ _ (0x00, 6, 0, l3_offset) \
+ _ (0x00, 12, 8, ip_hdlen) \
+ _ (0x00, 14, 13, ec) \
+ _ (0x00, 15, 15, es) \
+ _ (0x00, 19, 16, pool_id) \
+ _ (0x00, 21, 21, hwf_sync) \
+ _ (0x00, 22, 22, l4_chk_ok) \
+ _ (0x00, 23, 23, ip_frg) \
+ _ (0x00, 24, 24, ipv4_hdr_err) \
+ _ (0x00, 27, 25, l4_info) \
+ _ (0x00, 30, 28, l3_info) \
+ _ (0x00, 31, 31, buf_header) \
+ _ (0x04, 5, 0, lookup_id) \
+ _ (0x04, 8, 6, cpu_code) \
+ _ (0x04, 9, 9, pppoe) \
+ _ (0x04, 11, 10, l3_cast_info) \
+ _ (0x04, 13, 12, l2_cast_info) \
+ _ (0x04, 15, 14, vlan_info) \
+ _ (0x04, 31, 16, byte_count) \
+ _ (0x08, 11, 0, gem_port_id) \
+ _ (0x08, 13, 12, color) \
+ _ (0x08, 14, 14, gop_sop_u) \
+ _ (0x08, 15, 15, key_hash_enable) \
+ _ (0x08, 31, 16, l4chk) \
+ _ (0x0c, 31, 0, timestamp) \
+ _ (0x10, 31, 0, buf_phys_ptr_lo) \
+ _ (0x14, 7, 0, buf_phys_ptr_hi) \
+ _ (0x14, 31, 8, key_hash) \
+ _ (0x18, 31, 0, buf_virt_ptr_lo) \
+ _ (0x1c, 7, 0, buf_virt_ptr_hi) \
+ _ (0x1c, 14, 8, buf_qset_no) \
+ _ (0x1c, 15, 15, buf_type) \
+ _ (0x1c, 21, 16, mod_dscp) \
+ _ (0x1c, 24, 22, mod_pri) \
+ _ (0x1c, 25, 25, mdscp) \
+ _ (0x1c, 26, 26, mpri) \
+ _ (0x1c, 27, 27, mgpid) \
+ _ (0x1c, 31, 29, port_num)
+
+u8 *
+format_mvpp2_rx_desc (u8 *s, va_list *args)
+
+{
+ struct pp2_ppio_desc *d = va_arg (*args, struct pp2_ppio_desc *);
+ u32 indent = format_get_indent (s);
+ u32 r32;
+
+#define _(a, b, c, n) \
+ r32 = mrvl_get_u32_bits (d, a, b, c); \
+ if (r32 > 9) \
+ s = format (s, "%s %u (0x%x)", #n, r32, r32); \
+ else \
+ s = format (s, "%s %u", #n, r32); \
+ if (format_get_indent (s) > 72) \
+ s = format (s, "\n%U", format_white_space, indent + 2); \
+ else \
+ s = format (s, " ");
+
+ foreach_pp2_rx_desc_field;
+#undef _
+ return s;
+}
+
+u8 *
+format_mv_dsa_tag (u8 *s, va_list *args)
+{
+ mv_dsa_tag_t *tag = va_arg (*args, mv_dsa_tag_t *);
+ u32 cnt = 0;
+
+#define _(b, n) \
+ if (#n[0] != '_') \
+ s = format (s, "%s" #n " %u", cnt++ ? " " : "", tag->n);
+ foreach_mv_dsa_tag_field
+#undef _
+ return s;
+}
+
+u8 *
+format_mvpp2_rx_trace (u8 *s, va_list *args)
+{
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ vlib_node_t *node = va_arg (*args, vlib_node_t *);
+ mvpp2_rx_trace_t *t = va_arg (*args, mvpp2_rx_trace_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 indent = format_get_indent (s);
+ struct pp2_ppio_desc *d = &t->desc;
+
+ if (t->sw_if_index != CLIB_U32_MAX)
+ s = format (s, "pp2: %U (%d) next-node %U", format_vnet_sw_if_index_name,
+ vnm, t->sw_if_index, t->sw_if_index,
+ format_vlib_next_node_name, vm, node->index, t->next_index);
+ else
+ s = format (s, "pp2: next-node %U", format_vlib_next_node_name, vm,
+ node->index, t->next_index);
+
+ s = format (s, "\n%U%U", format_white_space, indent + 2,
+ format_mvpp2_rx_desc, d);
+ if (t->dsa_tag.as_u32)
+ s = format (s, "\n%Udsa tag: %U", format_white_space, indent + 2,
+ format_mv_dsa_tag, &t->dsa_tag);
+
+ return s;
+}
diff --git a/src/plugins/dev_armada/pp2/init.c b/src/plugins/dev_armada/pp2/init.c
new file mode 100644
index 00000000000..4333dbb352f
--- /dev/null
+++ b/src/plugins/dev_armada/pp2/init.c
@@ -0,0 +1,421 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/bus/platform.h>
+#include <vppinfra/ring.h>
+#include <dev_armada/musdk.h>
+#include <dev_armada/pp2/pp2.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+#include <linux/if.h>
+#include <sys/ioctl.h>
+
+#define MV_SYS_DMA_MEM_SZ (2 << 20)
+
+VLIB_REGISTER_LOG_CLASS (mvpp2_log, static) = {
+ .class_name = "armada",
+ .subclass_name = "init",
+};
+
+static int num_pp2_in_use = 0;
+static int dma_mem_initialized = 0;
+static int global_pp2_initialized = 0;
+
+#define _(f, n, s, d) \
+ { .name = #n, .desc = d, .severity = VL_COUNTER_SEVERITY_##s },
+
+vlib_error_desc_t mvpp2_rx_node_counters[] = { foreach_mvpp2_rx_node_counter };
+vlib_error_desc_t mvpp2_tx_node_counters[] = { foreach_mvpp2_tx_node_counter };
+#undef _
+
+vnet_dev_node_t mvpp2_rx_node = {
+ .error_counters = mvpp2_rx_node_counters,
+ .n_error_counters = ARRAY_LEN (mvpp2_rx_node_counters),
+ .format_trace = format_mvpp2_rx_trace,
+};
+
+vnet_dev_node_t mvpp2_tx_node = {
+ .error_counters = mvpp2_tx_node_counters,
+ .n_error_counters = ARRAY_LEN (mvpp2_tx_node_counters),
+};
+
+static u8 *
+mvpp2_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info)
+{
+ vnet_dev_bus_platform_device_info_t *di = dev_info;
+
+ if (clib_dt_node_is_compatible (di->node, "marvell,armada-7k-pp22"))
+ return format (0, "Marvell Armada Packet Processor v2.2");
+ return 0;
+}
+static void
+mvpp2_global_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ mvpp2_device_t *md = vnet_dev_get_data (dev);
+ log_debug (dev, "");
+ if (--num_pp2_in_use == 0)
+ {
+ if (global_pp2_initialized)
+ {
+ for (u32 i = 0; i < ARRAY_LEN (md->thread); i++)
+ if (md->thread[i].bpool)
+ {
+ pp2_bpool_deinit (md->thread[i].bpool);
+ md->thread[i].bpool = 0;
+ }
+ for (u32 i = 0; i < ARRAY_LEN (md->hif); i++)
+ if (md->hif[i])
+ {
+ pp2_hif_deinit (md->hif[i]);
+ md->hif[i] = 0;
+ }
+
+ pp2_deinit ();
+ global_pp2_initialized = 0;
+ }
+ if (dma_mem_initialized)
+ {
+ mv_sys_dma_mem_destroy ();
+ log_debug (0, "mv_sys_dma_mem_destroy()");
+ dma_mem_initialized = 0;
+ }
+ }
+}
+
+static void
+mvpp2_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ log_debug (dev, "");
+ mvpp2_global_deinit (vm, dev);
+}
+
+static vnet_dev_rv_t
+mvpp2_global_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ mvpp2_device_t *md = vnet_dev_get_data (dev);
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ int mrv;
+ u16 free_hifs, free_bpools;
+ u16 n_threads = vlib_get_n_threads ();
+
+ struct pp2_init_params init_params = {
+ .hif_reserved_map = 0xf,
+ .bm_pool_reserved_map = 0x7,
+ };
+
+ if (num_pp2_in_use++)
+ return rv;
+
+ mrv = mv_sys_dma_mem_init (MV_SYS_DMA_MEM_SZ);
+ if (mrv < 0)
+ {
+ log_err (0, "mv_sys_dma_mem_init failed, err %d", mrv);
+ rv = VNET_DEV_ERR_INIT_FAILED;
+ goto done;
+ }
+
+ dma_mem_initialized = 1;
+ log_debug (0, "mv_sys_dma_mem_init(%u) ok", MV_SYS_DMA_MEM_SZ);
+
+ if ((mrv = pp2_init (&init_params)))
+ {
+ log_err (dev, "pp2_init failed, err %d", mrv);
+ rv = VNET_DEV_ERR_INIT_FAILED;
+ goto done;
+ }
+
+ log_debug (dev, "pp2_init() ok");
+
+ free_hifs = pow2_mask (MVPP2_NUM_HIFS) ^ init_params.hif_reserved_map;
+ free_bpools =
+ pow2_mask (MVPP2_NUM_BPOOLS) ^ init_params.bm_pool_reserved_map;
+
+ if (n_threads > count_set_bits (free_hifs))
+ {
+ log_err (dev, "no enough HIFs (needed %u available %u)", n_threads,
+ count_set_bits (free_hifs));
+ rv = VNET_DEV_ERR_INIT_FAILED;
+ goto done;
+ }
+
+ for (u32 i = 0; i < n_threads; i++)
+ {
+ char match[16];
+ u8 index;
+ struct pp2_hif_params hif_params = {
+ .match = match,
+ .out_size = 2048,
+ };
+ struct pp2_bpool_params bpool_params = {
+ .match = match,
+ .buff_len = vlib_buffer_get_default_data_size (vm),
+ };
+
+ index = get_lowest_set_bit_index (free_hifs);
+ free_hifs ^= 1 << index;
+ snprintf (match, sizeof (match), "hif-%u", index);
+
+ mrv = pp2_hif_init (&hif_params, md->hif + i);
+ if (mrv < 0)
+ {
+ log_err (dev, "pp2_hif_init failed for hif %u thread %u, err %d",
+ index, i, mrv);
+ rv = VNET_DEV_ERR_INIT_FAILED;
+ goto done;
+ }
+ log_debug (dev, "pp2_hif_init(hif %u, thread %u) ok", index, i);
+
+ index = get_lowest_set_bit_index (free_bpools);
+ free_bpools ^= 1 << index;
+ snprintf (match, sizeof (match), "pool-%u:%u", md->pp_id, index);
+
+ mrv = pp2_bpool_init (&bpool_params, &md->thread[i].bpool);
+ if (mrv < 0)
+ {
+ log_err (dev, "pp2_bpool_init failed for bpool %u thread %u, err %d",
+ index, i, mrv);
+ rv = VNET_DEV_ERR_INIT_FAILED;
+ goto done;
+ }
+ log_debug (dev, "pp2_bpool_init(bpool %u, thread %u) pool-%u:%u ok",
+ index, i, md->thread[i].bpool->pp2_id,
+ md->thread[i].bpool->id);
+ for (u32 j = 0; j < ARRAY_LEN (md->thread[0].bre); j++)
+ md->thread[i].bre[j].bpool = md->thread[i].bpool;
+ }
+
+done:
+ return rv;
+}
+
+static vnet_dev_rv_t
+mvpp2_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ mvpp2_device_t *md = vnet_dev_get_data (dev);
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ vnet_dev_bus_platform_device_data_t *dd = vnet_dev_get_bus_data (dev);
+ clib_dt_node_t *sc;
+ clib_dt_node_t *sw = 0;
+ int pp_id = -1;
+
+ if (!clib_dt_node_is_compatible (dd->node, "marvell,armada-7k-pp22"))
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ sc = clib_dt_dereference_node (dd->node, "marvell,system-controller");
+
+ if (sc && vec_len (sc->path) > strlen ("/cpX/"))
+ {
+ if (strncmp ((char *) sc->path, "/cp0/", 4) == 0)
+ pp_id = 0;
+ else if (strncmp ((char *) sc->path, "/cp1/", 4) == 0)
+ pp_id = 1;
+ }
+
+ if (pp_id < 0)
+ return VNET_DEV_ERR_UNKNOWN_DEVICE;
+
+ foreach_clib_dt_tree_node (n, clib_dt_get_root_node (sc))
+ if (clib_dt_node_is_compatible (n, "marvell,mv88e6190") ||
+ clib_dt_node_is_compatible (n, "marvell,mv88e6393x"))
+ {
+ clib_dt_node_t *ports;
+ sw = n;
+ log_debug (dev, "found mv88e6190 compatible switch at %v", n->path);
+ ports = clib_dt_get_child_node (sw, "ports");
+ foreach_clib_dt_child_node (pn, ports)
+ {
+ u32 reg = CLIB_U32_MAX;
+ char *label = "(no label)";
+ clib_dt_property_t *p;
+ clib_dt_node_t *n;
+
+ p = clib_dt_get_node_property_by_name (pn, "reg");
+ if (p)
+ reg = clib_dt_property_get_u32 (p);
+ p = clib_dt_get_node_property_by_name (pn, "label");
+ if (p)
+ label = clib_dt_property_get_string (p);
+
+ log_debug (dev, "port %u label %s", reg, label);
+
+ n = clib_dt_dereference_node (pn, "phy-handle");
+ if (n)
+ log_debug (dev, " phy is %v", n->path);
+
+ n = clib_dt_dereference_node (pn, "sfp");
+ if (n)
+ log_debug (dev, " sfp is %v", n->path);
+
+ n = clib_dt_dereference_node (pn, "ethernet");
+ if (n)
+ log_debug (dev, " connected to %v", n->path);
+
+ p = clib_dt_get_node_property_by_name (pn, "phy-mode");
+ if (p)
+ log_debug (dev, " phy mode is %s",
+ clib_dt_property_get_string (p));
+ }
+ }
+
+ if ((mvpp2_global_init (vm, dev)) != VNET_DEV_OK)
+ return rv;
+
+ md->pp_id = pp_id;
+
+ foreach_clib_dt_child_node (cn, dd->node)
+ {
+ clib_dt_property_t *p;
+ char netdev_name[IFNAMSIZ];
+ struct ifreq s = {};
+ u8 ppio_id;
+ int fd, srv;
+
+ p = clib_dt_get_node_property_by_name (cn, "port-id");
+
+ if (!clib_dt_property_is_u32 (p))
+ continue;
+
+ ppio_id = clib_dt_property_get_u32 (p);
+ log_debug (dev, "found port with ppio id %u", ppio_id);
+
+ if (pp2_ppio_available (md->pp_id, ppio_id) == 0)
+ continue;
+
+ if (pp2_netdev_get_ifname (md->pp_id, ppio_id, netdev_name) < 0)
+ {
+ log_warn (dev, "failed to get ifname, skipping port %u ", ppio_id);
+ continue;
+ }
+
+ srv = -1;
+ if ((fd = socket (PF_INET, SOCK_DGRAM, IPPROTO_IP)) >= 0)
+ {
+ strcpy (s.ifr_name, netdev_name);
+ srv = ioctl (fd, SIOCGIFHWADDR, &s);
+ close (fd);
+ }
+
+ if (srv < 0)
+ {
+ log_warn (dev, "unable to get hw address, skipping port %u",
+ ppio_id);
+ continue;
+ }
+
+ log_debug (dev, "adding ppio %u (netdev name %s, hwaddr %U)", ppio_id,
+ netdev_name, format_ethernet_address, s.ifr_addr.sa_data);
+
+ mvpp2_port_t mvpp2_port = {
+ .ppio_id = ppio_id,
+ };
+
+ if (sw)
+ {
+ clib_dt_node_t *ports = clib_dt_get_child_node (sw, "ports");
+ if (ports)
+ foreach_clib_dt_child_node (sp, ports)
+ {
+ clib_dt_node_t *eth;
+
+ eth = clib_dt_dereference_node (sp, "ethernet");
+
+ if (cn != eth)
+ continue;
+
+ mvpp2_port.is_dsa = 1;
+ mvpp2_port.switch_node = sw;
+ mvpp2_port.switch_port_node = sp;
+ log_debug (dev, "port is connected to switch port %v",
+ sp->path);
+ break;
+ }
+ }
+
+ vnet_dev_port_add_args_t port_add_args = {
+ .port = {
+ .attr = {
+ .type = VNET_DEV_PORT_TYPE_ETHERNET,
+ .max_rx_queues = PP2_PPIO_MAX_NUM_INQS,
+ .max_tx_queues = PP2_PPIO_MAX_NUM_OUTQS,
+ .max_supported_rx_frame_size = 9216,
+ .caps.secondary_interfaces = mvpp2_port.is_dsa != 0,
+ },
+ .ops = {
+ .init = mvpp2_port_init,
+ .deinit = mvpp2_port_deinit,
+ .start = mvpp2_port_start,
+ .stop = mvpp2_port_stop,
+ .add_sec_if = mvpp2_port_add_sec_if,
+ .del_sec_if = mvpp2_port_del_sec_if,
+ .config_change = mvpp2_port_cfg_change,
+ .config_change_validate = mvpp2_port_cfg_change_validate,
+ .format_status = format_mvpp2_port_status,
+ .clear_counters = mvpp2_port_clear_counters,
+ },
+ .data_size = sizeof (mvpp2_port_t),
+ .initial_data = &mvpp2_port,
+ .sec_if_args = VNET_DEV_ARGS (
+ VNET_DEV_ARG_UINT32 (MVPP2_SEC_IF_ARG_DSA_SWITCH, "dsa_switch", "DSA source switch ID", .max= 31),
+ VNET_DEV_ARG_UINT32 (MVPP2_SEC_IF_ARG_DSA_PORT, "dsa_port", "DSA source switch port ID", .max = 31)
+ ),
+ },
+ .rx_node = &mvpp2_rx_node,
+ .tx_node = &mvpp2_tx_node,
+ .rx_queue = {
+ .config = {
+ .data_size = sizeof (mvpp2_rxq_t),
+ .default_size = 512,
+ .multiplier = 32,
+ .min_size = 32,
+ .max_size = 4096,
+ .size_is_power_of_two = 1,
+ },
+ .ops = {
+ .clear_counters = mvpp2_rxq_clear_counters,
+ },
+ },
+ .tx_queue = {
+ .config = {
+ .data_size = sizeof (mvpp2_txq_t),
+ .default_size = 512,
+ .multiplier = 32,
+ .min_size = 32,
+ .max_size = 4096,
+ .size_is_power_of_two = 1,
+ },
+ .ops = {
+ .alloc = mvpp2_txq_alloc,
+ .free = mvpp2_txq_free,
+ .clear_counters = mvpp2_txq_clear_counters,
+ },
+ },
+ };
+
+ vnet_dev_set_hw_addr_eth_mac (&port_add_args.port.attr.hw_addr,
+ (u8 *) s.ifr_addr.sa_data);
+
+ vnet_dev_port_add (vm, dev, ppio_id, &port_add_args);
+ }
+
+ if (rv != VNET_DEV_OK)
+ mvpp2_deinit (vm, dev);
+ return rv;
+}
+
+VNET_DEV_REGISTER_DRIVER (pp2) = {
+ .name = "mvpp2",
+ .bus = PLATFORM_BUS_NAME,
+ .device_data_sz = sizeof (mvpp2_device_t),
+ .ops = {
+ .init = mvpp2_init,
+ .deinit = mvpp2_deinit,
+ .probe = mvpp2_probe,
+ .format_info = format_mvpp2_dev_info,
+ },
+};
diff --git a/src/plugins/dev_armada/pp2/port.c b/src/plugins/dev_armada/pp2/port.c
new file mode 100644
index 00000000000..63a212e80c2
--- /dev/null
+++ b/src/plugins/dev_armada/pp2/port.c
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/bus/platform.h>
+#include <vppinfra/ring.h>
+#include <dev_armada/musdk.h>
+#include <dev_armada/pp2/pp2.h>
+
+VLIB_REGISTER_LOG_CLASS (mvpp2_log, static) = {
+ .class_name = "armada",
+ .subclass_name = "pp2-port",
+};
+
+vnet_dev_rv_t
+mvpp2_port_init (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ mvpp2_device_t *md = vnet_dev_get_data (dev);
+ mvpp2_port_t *mp = vnet_dev_get_port_data (port);
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ vnet_dev_rx_queue_t *rxq0 = vnet_dev_get_port_rx_queue_by_id (port, 0);
+ struct pp2_ppio_link_info li;
+ char match[16];
+ int mrv;
+
+ log_debug (port->dev, "");
+
+ snprintf (match, sizeof (match), "ppio-%d:%d", md->pp_id, port->port_id);
+
+ struct pp2_ppio_params ppio_params = {
+ .match = match,
+ .type = PP2_PPIO_T_NIC,
+ .eth_start_hdr = mp->is_dsa ? PP2_PPIO_HDR_ETH_DSA : PP2_PPIO_HDR_ETH,
+ .inqs_params = {
+ .num_tcs = 1,
+ .tcs_params[0] = {
+ .pkt_offset = 0,
+ .num_in_qs = 1,
+ .inqs_params = &(struct pp2_ppio_inq_params) { .size = rxq0->size },
+ .pools[0][0] = md->thread[rxq0->rx_thread_index].bpool,
+ },
+ },
+ };
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ struct pp2_ppio_outqs_params *oqs = &ppio_params.outqs_params;
+ oqs->outqs_params[q->queue_id].weight = 1;
+ oqs->outqs_params[q->queue_id].size = q->size;
+ oqs->num_outqs++;
+ }
+
+ mrv = pp2_ppio_init (&ppio_params, &mp->ppio);
+ if (mrv)
+ {
+ rv = VNET_DEV_ERR_INIT_FAILED;
+ log_err (dev, "port %u ppio '%s' init failed, rv %d", port->port_id,
+ match, mrv);
+ goto done;
+ }
+ log_debug (dev, "port %u ppio '%s' init ok", port->port_id, match);
+
+ mrv = pp2_ppio_get_link_info (mp->ppio, &li);
+ if (mrv)
+ {
+ rv = VNET_DEV_ERR_INIT_FAILED;
+ log_err (dev, "failed to get link info for port %u, rv %d",
+ port->port_id, mrv);
+ goto done;
+ }
+
+ log_debug (dev, "port %u %U", port->port_id, format_pp2_ppio_link_info, &li);
+
+ for (u32 i = 0; i < VLIB_FRAME_SIZE; i++)
+ mp->desc_ptrs[i] = mp->descs + i;
+
+ mvpp2_port_add_counters (vm, port);
+
+done:
+ if (rv != VNET_DEV_OK)
+ mvpp2_port_stop (vm, port);
+ return rv;
+}
+
+void
+mvpp2_port_deinit (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ mvpp2_port_t *mp = vnet_dev_get_port_data (port);
+
+ log_debug (port->dev, "");
+
+ if (mp->ppio)
+ {
+ pp2_ppio_deinit (mp->ppio);
+ mp->ppio = 0;
+ }
+}
+
+void
+mvpp2_port_poll (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ mvpp2_port_t *mp = vnet_dev_get_port_data (port);
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_port_state_changes_t changes = {};
+ struct pp2_ppio_link_info li;
+ int mrv;
+
+ mrv = pp2_ppio_get_link_info (mp->ppio, &li);
+
+ if (mrv)
+ {
+ log_debug (dev, "pp2_ppio_get_link_info: failed, rv %d", mrv);
+ return;
+ }
+
+ if (mp->last_link_info.up != li.up)
+ {
+ changes.change.link_state = 1;
+ changes.link_state = li.up != 0;
+ log_debug (dev, "link state changed to %u", changes.link_state);
+ }
+
+ if (mp->last_link_info.duplex != li.duplex)
+ {
+ changes.change.link_duplex = 1;
+ changes.full_duplex = li.duplex != 0;
+ log_debug (dev, "link full duplex changed to %u", changes.full_duplex);
+ }
+
+ if (mp->last_link_info.speed != li.speed)
+ {
+ u32 speeds[] = {
+ [MV_NET_LINK_SPEED_AN] = 0,
+ [MV_NET_LINK_SPEED_10] = 10000,
+ [MV_NET_LINK_SPEED_100] = 100000,
+ [MV_NET_LINK_SPEED_1000] = 1000000,
+ [MV_NET_LINK_SPEED_2500] = 2500000,
+ [MV_NET_LINK_SPEED_10000] = 10000000,
+ };
+
+ if (li.speed < ARRAY_LEN (speeds))
+ {
+ changes.change.link_speed = 1;
+ changes.link_speed = speeds[li.speed];
+ log_debug (dev, "link speed changed to %u", changes.link_speed);
+ }
+ }
+
+ if (changes.change.any)
+ {
+ mp->last_link_info = li;
+ vnet_dev_port_state_change (vm, port, changes);
+ }
+
+ mvpp2_port_get_stats (vm, port);
+}
+
+vnet_dev_rv_t
+mvpp2_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ mvpp2_port_t *mp = vnet_dev_get_port_data (port);
+ int mrv;
+
+ log_debug (port->dev, "");
+
+ mrv = pp2_ppio_enable (mp->ppio);
+ if (mrv)
+ {
+ log_err (port->dev, "pp2_ppio_enable() failed, rv %d", mrv);
+ return VNET_DEV_ERR_NOT_READY;
+ }
+
+ mp->is_enabled = 1;
+
+ vnet_dev_poll_port_add (vm, port, 0.5, mvpp2_port_poll);
+
+ return VNET_DEV_OK;
+}
+
+void
+mvpp2_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ int rv;
+ mvpp2_port_t *mp = vnet_dev_get_port_data (port);
+
+ log_debug (port->dev, "");
+
+ if (mp->is_enabled)
+ {
+ vnet_dev_poll_port_remove (vm, port, mvpp2_port_poll);
+
+ rv = pp2_ppio_disable (mp->ppio);
+ if (rv)
+ log_err (port->dev, "pp2_ppio_disable() failed, rv %d", rv);
+
+ vnet_dev_port_state_change (vm, port,
+ (vnet_dev_port_state_changes_t){
+ .change.link_state = 1,
+ .change.link_speed = 1,
+ .link_speed = 0,
+ .link_state = 0,
+ });
+ mp->is_enabled = 0;
+ }
+}
+
+vnet_dev_rv_t
+mvpp2_port_add_sec_if (vlib_main_t *vm, vnet_dev_port_t *port, void *p)
+{
+ vnet_dev_port_interface_t *sif = p;
+ mvpp2_port_t *mp = vnet_dev_get_port_data (port);
+ u32 port_id = CLIB_U32_MAX, switch_id = 0, index;
+
+ if (mp->is_dsa == 0)
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ foreach_vnet_dev_args (a, sif)
+ {
+ switch (a->id)
+ {
+ case MVPP2_SEC_IF_ARG_DSA_PORT:
+ if (a->val_set)
+ port_id = vnet_dev_arg_get_uint32 (a);
+ break;
+ case MVPP2_SEC_IF_ARG_DSA_SWITCH:
+ switch_id = vnet_dev_arg_get_uint32 (a);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (port_id == CLIB_U32_MAX)
+ {
+ log_err (port->dev, "missing dsa_port argument");
+ return VNET_DEV_ERR_INVALID_ARG;
+ }
+
+ log_debug (port->dev, "switch %u port %u", switch_id, port_id);
+
+ mv_dsa_tag_t tag = {
+ .tag_type = MV_DSA_TAG_TYPE_FROM_CPU,
+ .src_port_or_lag = port_id,
+ .src_dev = switch_id,
+ };
+
+ index = switch_id << 5 | port_id;
+
+ sif->user_data = tag.as_u32;
+ uword_bitmap_set_bits_at_index (mp->valid_dsa_src_bitmap, index, 1);
+ mp->dsa_to_sec_if[index] = sif->index;
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+mvpp2_port_del_sec_if (vlib_main_t *vm, vnet_dev_port_t *port, void *p)
+{
+ vnet_dev_port_interface_t *sif = p;
+ mvpp2_port_t *mp = vnet_dev_get_port_data (port);
+ mv_dsa_tag_t tag = { .as_u32 = sif->user_data };
+ u32 index = tag.src_dev << 5 | tag.src_port_or_lag;
+
+ log_debug (port->dev, "switch %u port %u", tag.src_dev, tag.src_port_or_lag);
+
+ uword_bitmap_clear_bits_at_index (mp->valid_dsa_src_bitmap, index, 1);
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+mvpp2_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ break;
+
+ default:
+ rv = VNET_DEV_ERR_NOT_SUPPORTED;
+ };
+
+ return rv;
+}
+
+vnet_dev_rv_t
+mvpp2_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ mvpp2_port_t *mp = vnet_dev_get_port_data (port);
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ eth_addr_t addr;
+ int mrv;
+
+ switch (req->type)
+ {
+
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ mrv = pp2_ppio_set_promisc (mp->ppio, req->promisc);
+ if (mrv)
+ {
+ log_err (port->dev, "pp2_ppio_set_promisc: failed, rv %d", mrv);
+ rv = VNET_DEV_ERR_INTERNAL;
+ }
+ else
+ log_debug (port->dev, "pp2_ppio_set_promisc: promisc %u",
+ req->promisc);
+ break;
+
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ clib_memcpy (&addr, req->addr.eth_mac, sizeof (addr));
+ mrv = pp2_ppio_set_mac_addr (mp->ppio, addr);
+ if (mrv)
+ {
+ log_err (port->dev, "pp2_ppio_set_mac_addr: failed, rv %d", mrv);
+ rv = VNET_DEV_ERR_INTERNAL;
+ }
+ else
+ log_debug (port->dev, "pp2_ppio_set_mac_addr: %U added",
+ format_ethernet_address, &addr);
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ clib_memcpy (&addr, req->addr.eth_mac, sizeof (addr));
+ mrv = pp2_ppio_add_mac_addr (mp->ppio, addr);
+ if (mrv)
+ {
+ log_err (port->dev, "pp2_ppio_add_mac_addr: failed, rv %d", mrv);
+ rv = VNET_DEV_ERR_INTERNAL;
+ }
+ else
+ log_debug (port->dev, "pp2_ppio_add_mac_addr: %U added",
+ format_ethernet_address, &addr);
+ break;
+
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ clib_memcpy (&addr, req->addr.eth_mac, sizeof (addr));
+ mrv = pp2_ppio_remove_mac_addr (mp->ppio, addr);
+ if (mrv)
+ {
+ log_err (port->dev, "pp2_ppio_remove_mac_addr: failed, rv %d", mrv);
+ rv = VNET_DEV_ERR_INTERNAL;
+ }
+ else
+ log_debug (port->dev, "pp2_ppio_remove_mac_addr: %U added",
+ format_ethernet_address, &addr);
+ break;
+
+ default:
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ };
+
+ return rv;
+}
diff --git a/src/plugins/dev_armada/pp2/pp2.h b/src/plugins/dev_armada/pp2/pp2.h
new file mode 100644
index 00000000000..160bfd20c5c
--- /dev/null
+++ b/src/plugins/dev_armada/pp2/pp2.h
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _PP2_H_
+#define _PP2_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/devicetree.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+
+#define MVCONF_DBG_LEVEL 0
+#define MVCONF_PP2_BPOOL_COOKIE_SIZE 32
+#define MVCONF_PP2_BPOOL_DMA_ADDR_SIZE 64
+#define MVCONF_DMA_PHYS_ADDR_T_SIZE 64
+#define MVCONF_SYS_DMA_UIO
+#define MVCONF_TYPES_PUBLIC
+#define MVCONF_DMA_PHYS_ADDR_T_PUBLIC
+
+#include "mv_std.h"
+#include "env/mv_sys_dma.h"
+#include "drivers/mv_pp2.h"
+#include <drivers/mv_pp2_bpool.h>
+#include <drivers/mv_pp2_ppio.h>
+
+#define MVPP2_NUM_HIFS 9
+#define MVPP2_NUM_BPOOLS 16
+#define MVPP2_MAX_THREADS 4
+#define MRVL_PP2_BUFF_BATCH_SZ 32
+#define MV_DSA_N_SRC 32
+
+#define foreach_mv_dsa_tag_field \
+ _ (12, vid) \
+ _ (1, _zero13) \
+ _ (3, pri) \
+ _ (1, cfi_dei) \
+ _ (1, _unused17) \
+ _ (1, src_is_lag) \
+ _ (5, src_port_or_lag) \
+ _ (5, src_dev) \
+ _ (1, src_tagged) \
+ _ (2, tag_type)
+
+typedef enum
+{
+ MV_DSA_TAG_TYPE_TO_CPU = 0,
+ MV_DSA_TAG_TYPE_FROM_CPU = 1,
+ MV_DSA_TAG_TYPE_TO_SNIFFER = 2,
+ MV_DSA_TAG_TYPE_FORWARD = 3
+} mv_dsa_tag_type_t;
+
+typedef enum
+{
+ MVPP2_SEC_IF_ARG_DSA_SWITCH,
+ MVPP2_SEC_IF_ARG_DSA_PORT
+} mvpp2_sec_if_args_t;
+
+typedef union
+{
+ struct
+ {
+#define _(b, n) u32 (n) : (b);
+ foreach_mv_dsa_tag_field
+#undef _
+ };
+ u32 as_u32;
+} mv_dsa_tag_t;
+
+STATIC_ASSERT_SIZEOF (mv_dsa_tag_t, 4);
+
+static_always_inline mv_dsa_tag_t
+mv_dsa_tag_read (void *p)
+{
+ return (mv_dsa_tag_t){ .as_u32 = clib_net_to_host_u32 (*(u32u *) p) };
+}
+
+static_always_inline void
+mv_dsa_tag_write (void *p, mv_dsa_tag_t tag)
+{
+ ((mv_dsa_tag_t *) p)->as_u32 = clib_host_to_net_u32 (tag.as_u32);
+}
+
+typedef struct
+{
+ u8 pp_id;
+ struct pp2_hif *hif[MVPP2_NUM_HIFS];
+ struct
+ {
+ struct pp2_bpool *bpool;
+ struct buff_release_entry bre[MRVL_PP2_BUFF_BATCH_SZ];
+ } thread[MVPP2_NUM_BPOOLS];
+
+} mvpp2_device_t;
+
+typedef struct
+{
+ u8 is_enabled : 1;
+ u8 is_dsa : 1;
+ struct pp2_ppio *ppio;
+ u8 ppio_id;
+ struct pp2_ppio_link_info last_link_info;
+ clib_dt_node_t *switch_node;
+ clib_dt_node_t *switch_port_node;
+
+ struct pp2_ppio_desc descs[VLIB_FRAME_SIZE];
+ struct pp2_ppio_desc *desc_ptrs[VLIB_FRAME_SIZE];
+ uword valid_dsa_src_bitmap[1024 / uword_bits];
+ u16 dsa_to_sec_if[1024];
+} mvpp2_port_t;
+
+typedef struct
+{
+ u16 next;
+ u16 n_enq;
+ u32 *buffers;
+} mvpp2_txq_t;
+
+typedef struct
+{
+} mvpp2_rxq_t;
+
+typedef struct
+{
+ struct pp2_ppio_desc desc;
+ u32 sw_if_index;
+ u16 next_index;
+ mv_dsa_tag_t dsa_tag;
+} mvpp2_rx_trace_t;
+
+/* counters.c */
+void mvpp2_port_add_counters (vlib_main_t *, vnet_dev_port_t *);
+void mvpp2_port_clear_counters (vlib_main_t *, vnet_dev_port_t *);
+void mvpp2_rxq_clear_counters (vlib_main_t *, vnet_dev_rx_queue_t *);
+void mvpp2_txq_clear_counters (vlib_main_t *, vnet_dev_tx_queue_t *);
+vnet_dev_rv_t mvpp2_port_get_stats (vlib_main_t *, vnet_dev_port_t *);
+
+/* format.c */
+format_function_t format_pp2_ppio_link_info;
+format_function_t format_mvpp2_port_status;
+format_function_t format_mvpp2_dev_info;
+format_function_t format_mvpp2_rx_trace;
+format_function_t format_mvpp2_rx_desc;
+format_function_t format_mv_dsa_tag;
+
+/* port.c */
+vnet_dev_port_op_t mvpp2_port_init;
+vnet_dev_port_op_no_rv_t mvpp2_port_deinit;
+vnet_dev_port_op_t mvpp2_port_start;
+vnet_dev_port_op_no_rv_t mvpp2_port_stop;
+vnet_dev_port_op_with_ptr_t mvpp2_port_add_sec_if;
+vnet_dev_port_op_with_ptr_t mvpp2_port_del_sec_if;
+vnet_dev_rv_t mvpp2_port_cfg_change (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t
+mvpp2_port_cfg_change_validate (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+
+/* queue.c */
+vnet_dev_tx_queue_op_t mvpp2_txq_alloc;
+vnet_dev_tx_queue_op_no_rv_t mvpp2_txq_free;
+
+/* inline funcs */
+
+#define log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, mvpp2_log.class, "%U" f, \
+ format_vnet_dev_log, (dev), \
+ clib_string_skip_prefix (__func__, "mvpp2_"), ##__VA_ARGS__)
+#define log_info(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_INFO, mvpp2_log.class, "%U" f, \
+ format_vnet_dev_log, (dev), 0, ##__VA_ARGS__)
+#define log_notice(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_NOTICE, mvpp2_log.class, "%U" f, \
+ format_vnet_dev_log, (dev), 0, ##__VA_ARGS__)
+#define log_warn(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_WARNING, mvpp2_log.class, "%U" f, \
+ format_vnet_dev_log, (dev), 0, ##__VA_ARGS__)
+#define log_err(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, mvpp2_log.class, "%U" f, format_vnet_dev_log, \
+ (dev), 0, ##__VA_ARGS__)
+
+#define foreach_mvpp2_tx_node_counter \
+ _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots") \
+ _ (PPIO_SEND, ppio_semd, ERROR, "pp2_ppio_send errors") \
+ _ (PPIO_GET_NUM_OUTQ_DONE, ppio_get_num_outq_done, ERROR, \
+ "pp2_ppio_get_num_outq_done errors")
+
+typedef enum
+{
+#define _(f, n, s, d) MVPP2_TX_NODE_CTR_##f,
+ foreach_mvpp2_tx_node_counter
+#undef _
+} mvpp2_tx_node_counter_t;
+
+#define foreach_mvpp2_rx_node_counter \
+ _ (PPIO_RECV, ppio_recv, ERROR, "pp2_ppio_recv error") \
+ _ (BPOOL_GET_NUM_BUFFS, bpool_get_num_bufs, ERROR, \
+ "pp2_bpool_get_num_buffs error") \
+ _ (BPOOL_PUT_BUFFS, bpool_put_buffs, ERROR, "pp2_bpool_put_buffs error") \
+ _ (BUFFER_ALLOC, buffer_alloc, ERROR, "buffer alloc error") \
+ _ (UNKNOWN_DSA_SRC, unknown_dsa_src, ERROR, "unknown DSA source") \
+ _ (MAC_CE, mac_ce, ERROR, "MAC error (CRC error)") \
+ _ (MAC_OR, mac_or, ERROR, "overrun error") \
+ _ (MAC_RSVD, mac_rsvd, ERROR, "unknown MAC error") \
+ _ (MAC_RE, mac_re, ERROR, "resource error") \
+ _ (IP_HDR, ip_hdr, ERROR, "ip4 header error")
+
+typedef enum
+{
+#define _(f, n, s, d) MVPP2_RX_NODE_CTR_##f,
+ foreach_mvpp2_rx_node_counter
+#undef _
+} mvpp2_rx_node_counter_t;
+
+#endif /* _PP2_H_ */
diff --git a/src/plugins/dev_armada/pp2/queue.c b/src/plugins/dev_armada/pp2/queue.c
new file mode 100644
index 00000000000..05015414816
--- /dev/null
+++ b/src/plugins/dev_armada/pp2/queue.c
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/bus/platform.h>
+#include <vppinfra/ring.h>
+#include <dev_armada/musdk.h>
+#include <dev_armada/pp2/pp2.h>
+
+VLIB_REGISTER_LOG_CLASS (mvpp2_log, static) = {
+ .class_name = "armada",
+ .subclass_name = "pp2-queue",
+};
+
+vnet_dev_rv_t
+mvpp2_txq_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ mvpp2_txq_t *mtq = vnet_dev_get_tx_queue_data (txq);
+ log_debug (txq->port->dev, "");
+
+ ASSERT (mtq->buffers == 0);
+ if (mtq->buffers == 0)
+ {
+ u32 sz = sizeof (u32) * txq->size;
+ mtq->buffers = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES);
+ clib_memset (mtq->buffers, 0, sz);
+ }
+
+ return rv;
+}
+
+void
+mvpp2_txq_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ mvpp2_txq_t *mtq = vnet_dev_get_tx_queue_data (txq);
+
+ log_debug (txq->port->dev, "");
+ if (mtq->buffers)
+ {
+ clib_mem_free (mtq->buffers);
+ mtq->buffers = 0;
+ }
+}
diff --git a/src/plugins/dev_armada/pp2/rx.c b/src/plugins/dev_armada/pp2/rx.c
new file mode 100644
index 00000000000..5b0e8d35000
--- /dev/null
+++ b/src/plugins/dev_armada/pp2/rx.c
@@ -0,0 +1,269 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2024 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/vector/mask_compare.h>
+#include <vppinfra/vector/compress.h>
+
+#include <dev_armada/pp2/pp2.h>
+
+static_always_inline vlib_buffer_t *
+desc_to_vlib_buffer (vlib_main_t *vm, struct pp2_ppio_desc *d)
+{
+ return vlib_get_buffer (vm, pp2_ppio_inq_desc_get_cookie (d));
+}
+
+static_always_inline u64
+mrvl_pp2_rx_one_if (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_dev_rx_queue_t *rxq,
+ vnet_dev_rx_queue_if_rt_data_t *if_rt_data,
+ struct pp2_ppio_desc **desc_ptrs, u32 n_desc,
+ i32 current_data, i32 len_adj, mv_dsa_tag_t tag)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u64 n_rx_bytes = 0;
+ vlib_buffer_t *b0, *b1;
+ u32 n_trace, n_left = n_desc;
+ u32 buffer_indices[VLIB_FRAME_SIZE], *bi = buffer_indices;
+ struct pp2_ppio_desc **dp = desc_ptrs;
+ u32 next_index = if_rt_data->next_index;
+ vlib_buffer_template_t bt = if_rt_data->buffer_template;
+ u32 sw_if_index = if_rt_data->sw_if_index;
+
+ bt.current_data = current_data;
+
+ for (; n_left >= 4; dp += 2, bi += 2, n_left -= 2)
+ {
+ clib_prefetch_store (desc_to_vlib_buffer (vm, dp[2]));
+ clib_prefetch_store (desc_to_vlib_buffer (vm, dp[3]));
+ b0 = desc_to_vlib_buffer (vm, dp[0]);
+ b1 = desc_to_vlib_buffer (vm, dp[1]);
+ bi[0] = pp2_ppio_inq_desc_get_cookie (dp[0]);
+ bi[1] = pp2_ppio_inq_desc_get_cookie (dp[1]);
+ b0->template = bt;
+ b1->template = bt;
+
+ n_rx_bytes += b0->current_length =
+ pp2_ppio_inq_desc_get_pkt_len (dp[0]) + len_adj;
+ n_rx_bytes += b1->current_length =
+ pp2_ppio_inq_desc_get_pkt_len (dp[1]) + len_adj;
+ }
+
+ for (; n_left; dp++, bi++, n_left--)
+ {
+ b0 = desc_to_vlib_buffer (vm, dp[0]);
+ bi[0] = pp2_ppio_inq_desc_get_cookie (dp[0]);
+ b0->template = bt;
+
+ n_rx_bytes += b0->current_length =
+ pp2_ppio_inq_desc_get_pkt_len (dp[0]) + len_adj;
+ }
+
+ /* trace */
+ n_trace = vlib_get_trace_count (vm, node);
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ for (u32 i = 0; i < n_desc && n_trace > 0; i++)
+ {
+ vlib_buffer_t *b = desc_to_vlib_buffer (vm, desc_ptrs[i]);
+ if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b,
+ /* follow_chain */ 0)))
+ {
+ mvpp2_rx_trace_t *tr;
+ tr = vlib_add_trace (vm, node, b, sizeof (*tr));
+ tr->desc = *desc_ptrs[i];
+ tr->next_index = next_index;
+ tr->sw_if_index = sw_if_index;
+ tr->dsa_tag = tag;
+ n_trace--;
+ }
+ }
+ vlib_set_trace_count (vm, node, n_trace);
+ }
+ vlib_buffer_enqueue_to_single_next (vm, node, buffer_indices, next_index,
+ n_desc);
+
+ vlib_increment_combined_counter (
+ vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ vm->thread_index, sw_if_index, n_desc, n_rx_bytes);
+
+ return n_rx_bytes;
+}
+
+static_always_inline uword
+mrvl_pp2_rx_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port = rxq->port;
+ mvpp2_port_t *mp = vnet_dev_get_port_data (port);
+ mv_dsa_tag_t dsa_tags[VLIB_FRAME_SIZE];
+ u16 n_desc = VLIB_FRAME_SIZE;
+ vlib_buffer_t *b;
+ u32 i;
+
+ if (PREDICT_FALSE (
+ pp2_ppio_recv (mp->ppio, 0, rxq->queue_id, mp->descs, &n_desc)))
+ {
+ vlib_error_count (vm, node->node_index, MVPP2_RX_NODE_CTR_PPIO_RECV, 1);
+ return 0;
+ }
+
+ if (mp->is_dsa)
+ {
+ for (i = 0; i < n_desc; i++)
+ {
+ b = desc_to_vlib_buffer (vm, mp->descs + i);
+ u8 *start = b->data;
+ mv_dsa_tag_t tag = mv_dsa_tag_read (start + 14);
+ dsa_tags[i] = tag;
+ clib_memmove (start + 6, start + 2, 12);
+ }
+
+ vlib_frame_bitmap_t avail_bmp = {};
+ vlib_frame_bitmap_init (avail_bmp, n_desc);
+ u32 n_avail = n_desc;
+
+ while (n_avail)
+ {
+ vlib_frame_bitmap_t selected_bmp = {};
+ struct pp2_ppio_desc *sel_descs[VLIB_FRAME_SIZE];
+ mv_dsa_tag_t tag;
+ u32 n_sel, index;
+
+ tag = dsa_tags[vlib_frame_bitmap_find_first_set (avail_bmp)];
+ index = tag.src_dev << 5 | tag.src_port_or_lag;
+
+ clib_mask_compare_u32 (tag.as_u32, (u32 *) dsa_tags, selected_bmp,
+ n_desc);
+ n_sel = vlib_frame_bitmap_count_set_bits (selected_bmp);
+ n_avail -= n_sel;
+
+ if (uword_bitmap_is_bit_set (mp->valid_dsa_src_bitmap, index))
+ {
+ clib_compress_u64 ((uword *) sel_descs, (uword *) mp->desc_ptrs,
+ selected_bmp, n_desc);
+ mrvl_pp2_rx_one_if (vm, node, rxq,
+ vnet_dev_get_rx_queue_sec_if_rt_data (
+ rxq, mp->dsa_to_sec_if[index]),
+ sel_descs, n_sel, 6, -4, tag);
+ }
+ else
+ {
+ u32 n_free = 0, buffer_indices[VLIB_FRAME_SIZE];
+
+ foreach_vlib_frame_bitmap_set_bit_index (i, selected_bmp)
+ buffer_indices[n_free++] =
+ pp2_ppio_inq_desc_get_cookie (mp->descs + i);
+
+ u32 n_trace = vlib_get_trace_count (vm, node);
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ foreach_vlib_frame_bitmap_set_bit_index (i, selected_bmp)
+ {
+ vlib_buffer_t *b =
+ desc_to_vlib_buffer (vm, mp->descs + i);
+
+ if (PREDICT_TRUE (vlib_trace_buffer (
+ vm, node, VNET_DEV_ETH_RX_PORT_NEXT_DROP, b,
+ /* follow_chain */ 0)))
+ {
+ mvpp2_rx_trace_t *tr;
+ tr = vlib_add_trace (vm, node, b, sizeof (*tr));
+ tr->desc = mp->descs[i];
+ tr->next_index = VNET_DEV_ETH_RX_PORT_NEXT_DROP;
+ tr->sw_if_index = CLIB_U32_MAX;
+ tr->dsa_tag = dsa_tags[i];
+ n_trace--;
+ }
+ if (n_trace == 0)
+ break;
+ }
+ vlib_set_trace_count (vm, node, n_trace);
+ }
+
+ vlib_buffer_free (vm, buffer_indices, n_free);
+ vlib_error_count (vm, node->node_index,
+ MVPP2_RX_NODE_CTR_UNKNOWN_DSA_SRC, 1);
+ }
+ }
+ }
+ else
+ {
+ mrvl_pp2_rx_one_if (vm, node, rxq,
+ vnet_dev_get_rx_queue_if_rt_data (rxq),
+ mp->desc_ptrs, n_desc, 2, 0, (mv_dsa_tag_t){});
+ }
+
+ return n_desc;
+}
+
+static_always_inline void
+mrvl_pp2_rx_refill (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+ mvpp2_device_t *md = vnet_dev_get_data (dev);
+ u32 thread_index = vm->thread_index;
+ struct pp2_hif *hif = md->hif[thread_index];
+ struct pp2_bpool *bpool = md->thread[thread_index].bpool;
+ struct buff_release_entry *bre = md->thread[thread_index].bre;
+ u32 n_bufs, *bi;
+
+ if (PREDICT_FALSE (pp2_bpool_get_num_buffs (bpool, &n_bufs)))
+ {
+ vlib_error_count (vm, node->node_index,
+ MVPP2_RX_NODE_CTR_BPOOL_GET_NUM_BUFFS, 1);
+ return;
+ }
+
+ n_bufs = rxq->size - n_bufs;
+ while (n_bufs >= MRVL_PP2_BUFF_BATCH_SZ)
+ {
+ u16 n_alloc, i;
+ struct buff_release_entry *e = bre;
+ u32 buffer_indices[MRVL_PP2_BUFF_BATCH_SZ];
+
+ n_alloc = vlib_buffer_alloc (vm, buffer_indices, MRVL_PP2_BUFF_BATCH_SZ);
+
+ if (PREDICT_FALSE (n_alloc == 0))
+ {
+ vlib_error_count (vm, node->node_index,
+ MVPP2_RX_NODE_CTR_BUFFER_ALLOC, 1);
+ return;
+ }
+
+ for (i = n_alloc, bi = buffer_indices; i--; e++, bi++)
+ {
+
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi[0]);
+ e->buff.addr = vlib_buffer_get_pa (vm, b) - 64;
+ e->buff.cookie = bi[0];
+ }
+
+ if (PREDICT_FALSE (pp2_bpool_put_buffs (hif, bre, &n_alloc)))
+ {
+ vlib_error_count (vm, node->node_index,
+ MVPP2_RX_NODE_CTR_BPOOL_PUT_BUFFS, 1);
+ vlib_buffer_free (vm, buffer_indices, n_alloc);
+ return;
+ }
+
+ n_bufs -= n_alloc;
+ }
+}
+
+VNET_DEV_NODE_FN (mvpp2_rx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ u32 n_rx = 0;
+ foreach_vnet_dev_rx_queue_runtime (rxq, node)
+ {
+ n_rx += mrvl_pp2_rx_inline (vm, node, frame, rxq);
+ mrvl_pp2_rx_refill (vm, node, rxq);
+ }
+ return n_rx;
+}
diff --git a/src/plugins/dev_armada/pp2/tx.c b/src/plugins/dev_armada/pp2/tx.c
new file mode 100644
index 00000000000..583eec71d60
--- /dev/null
+++ b/src/plugins/dev_armada/pp2/tx.c
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2024 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <dev_armada/pp2/pp2.h>
+
+VNET_DEV_NODE_FN (mvpp2_tx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node);
+ vnet_dev_instance_t *ins = vnet_dev_get_dev_instance (rt->dev_instance);
+ vnet_dev_tx_queue_t *txq = rt->tx_queue;
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+ mvpp2_txq_t *mtq = vnet_dev_get_tx_queue_data (txq);
+ mvpp2_port_t *mp = vnet_dev_get_port_data (port);
+ mvpp2_device_t *md = vnet_dev_get_data (dev);
+ u8 qid = txq->queue_id;
+ u32 *buffers = vlib_frame_vector_args (frame);
+ u32 n_vectors = frame->n_vectors, n_left;
+ u16 n_sent;
+ struct pp2_ppio *ppio = mp->ppio;
+ struct pp2_hif *hif = md->hif[vm->thread_index];
+ struct pp2_ppio_desc descs[VLIB_FRAME_SIZE], *d = descs;
+ u16 sz = txq->size;
+ u16 mask = sz - 1;
+ i16 len_adj = 0;
+
+ if (ins->is_primary_if == 0)
+ {
+ vnet_dev_port_interface_t *sif =
+ vnet_dev_port_get_sec_if_by_index (port, ins->sec_if_index);
+
+ mv_dsa_tag_t tag = { .as_u32 = sif->user_data };
+
+ for (u32 i = 0; i < n_vectors; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffers[i]);
+ u8 *start = vlib_buffer_get_current (b);
+ clib_memmove (start - 4, start, 12);
+ mv_dsa_tag_write (start + 8, tag);
+ }
+ len_adj = 4;
+ }
+
+ if (mtq->n_enq)
+ {
+ u16 n_done = 0;
+ if (PREDICT_FALSE (pp2_ppio_get_num_outq_done (ppio, hif, qid, &n_done)))
+ vlib_error_count (vm, node->node_index,
+ MVPP2_TX_NODE_CTR_PPIO_GET_NUM_OUTQ_DONE, 1);
+
+ if (n_done)
+ {
+ vlib_buffer_free_from_ring (
+ vm, mtq->buffers, (mtq->next - mtq->n_enq) & mask, sz, n_done);
+ mtq->n_enq -= n_done;
+ }
+ }
+
+ n_sent = clib_min (n_vectors, sz - mtq->n_enq);
+
+ for (d = descs, n_left = n_sent; n_left; d++, buffers++, n_left--)
+ {
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, buffers[0]);
+ u64 paddr = vlib_buffer_get_pa (vm, b0);
+
+ pp2_ppio_outq_desc_reset (d);
+ pp2_ppio_outq_desc_set_phys_addr (d, paddr + b0->current_data - len_adj);
+ pp2_ppio_outq_desc_set_pkt_offset (d, 0);
+ pp2_ppio_outq_desc_set_pkt_len (d, b0->current_length + len_adj);
+ }
+
+ buffers = vlib_frame_vector_args (frame);
+
+ if (pp2_ppio_send (ppio, hif, qid, descs, &n_sent))
+ {
+ n_sent = 0;
+ vlib_error_count (vm, node->node_index, MVPP2_TX_NODE_CTR_PPIO_SEND, 1);
+ }
+ else if (n_sent)
+ {
+ vlib_buffer_copy_indices_to_ring (mtq->buffers, buffers,
+ mtq->next & mask, sz, n_sent);
+ mtq->next += n_sent;
+ mtq->n_enq += n_sent;
+ }
+
+ /* free unsent buffers */
+ if (PREDICT_FALSE (n_sent != n_vectors))
+ {
+ vlib_buffer_free (vm, buffers + n_sent, n_vectors - n_sent);
+ vlib_error_count (vm, node->node_index, MVPP2_TX_NODE_CTR_NO_FREE_SLOTS,
+ n_vectors - n_sent);
+ }
+
+ return n_sent;
+}
diff --git a/src/plugins/dev_ena/ena.c b/src/plugins/dev_ena/ena.c
index ead090839c7..ed5c47ed505 100644
--- a/src/plugins/dev_ena/ena.c
+++ b/src/plugins/dev_ena/ena.c
@@ -4,7 +4,7 @@
#include <vnet/vnet.h>
#include <vnet/dev/dev.h>
-#include <vnet/dev/pci.h>
+#include <vnet/dev/bus/pci.h>
#include <dev_ena/ena.h>
#include <dev_ena/ena_inlines.h>
#include <vnet/ethernet/ethernet.h>
diff --git a/src/plugins/dev_ena/port.c b/src/plugins/dev_ena/port.c
index 2b26fefc5e3..95d8ff3a08c 100644
--- a/src/plugins/dev_ena/port.c
+++ b/src/plugins/dev_ena/port.c
@@ -4,7 +4,6 @@
#include <vnet/vnet.h>
#include <vnet/dev/dev.h>
-#include <vnet/dev/pci.h>
#include <dev_ena/ena.h>
#include <dev_ena/ena_inlines.h>
#include <vnet/ethernet/ethernet.h>
diff --git a/src/plugins/dev_ena/rx_node.c b/src/plugins/dev_ena/rx_node.c
index 41fc5b8c943..51c6dbce84c 100644
--- a/src/plugins/dev_ena/rx_node.c
+++ b/src/plugins/dev_ena/rx_node.c
@@ -251,7 +251,6 @@ ena_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vnet_dev_rx_queue_t *rxq)
{
ena_rxq_t *q = vnet_dev_get_rx_queue_data (rxq);
- vnet_dev_port_t *port = rxq->port;
vnet_main_t *vnm = vnet_get_main ();
vlib_buffer_t *buffers[VLIB_FRAME_SIZE], **b;
ena_rx_cdesc_status_t statuses[VLIB_FRAME_SIZE + 8];
@@ -260,13 +259,13 @@ ena_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u16 *csi;
uword n_rx_packets = 0, n_rx_bytes = 0;
vlib_frame_bitmap_t head_bmp = {};
- u32 sw_if_index = port->intf.sw_if_index;
- u32 hw_if_index = port->intf.hw_if_index;
+ u32 sw_if_index = vnet_dev_get_rx_queue_if_sw_if_index (rxq);
+ u32 hw_if_index = vnet_dev_get_rx_queue_if_hw_if_index (rxq);
u32 n_trace, n_deq, n_left;
u32 cq_next = q->cq_next;
- u32 next_index = rxq->next_index;
+ u32 next_index = vnet_dev_get_rx_queue_if_next_index (rxq);
vlib_frame_t *next_frame;
- vlib_buffer_template_t bt = rxq->buffer_template;
+ vlib_buffer_template_t bt = vnet_dev_get_rx_queue_if_buffer_template (rxq);
u32 *bi;
int maybe_chained;
diff --git a/src/plugins/dev_iavf/adminq.c b/src/plugins/dev_iavf/adminq.c
index c12dc8aa2f6..2072c697033 100644
--- a/src/plugins/dev_iavf/adminq.c
+++ b/src/plugins/dev_iavf/adminq.c
@@ -5,7 +5,7 @@
#include <ctype.h>
#include <vnet/vnet.h>
#include <vnet/dev/dev.h>
-#include <vnet/dev/pci.h>
+#include <vnet/dev/bus/pci.h>
#include <vnet/dev/counters.h>
#include <dev_iavf/iavf.h>
#include <dev_iavf/iavf_regs.h>
diff --git a/src/plugins/dev_iavf/counters.c b/src/plugins/dev_iavf/counters.c
index 6dcd01141f0..3ab463edb9a 100644
--- a/src/plugins/dev_iavf/counters.c
+++ b/src/plugins/dev_iavf/counters.c
@@ -4,7 +4,6 @@
#include <vnet/vnet.h>
#include <vnet/dev/dev.h>
-#include <vnet/dev/pci.h>
#include <vnet/dev/counters.h>
#include <dev_iavf/iavf.h>
#include <dev_iavf/virtchnl.h>
diff --git a/src/plugins/dev_iavf/format.c b/src/plugins/dev_iavf/format.c
index 9a3dde47ee9..b4a29e4e20a 100644
--- a/src/plugins/dev_iavf/format.c
+++ b/src/plugins/dev_iavf/format.c
@@ -4,7 +4,6 @@
#include <vnet/vnet.h>
#include <vnet/dev/dev.h>
-#include <vnet/dev/pci.h>
#include <vnet/dev/counters.h>
#include <dev_iavf/iavf.h>
#include <dev_iavf/virtchnl.h>
diff --git a/src/plugins/dev_iavf/iavf.c b/src/plugins/dev_iavf/iavf.c
index d1c2b9edc63..f13440f4161 100644
--- a/src/plugins/dev_iavf/iavf.c
+++ b/src/plugins/dev_iavf/iavf.c
@@ -4,7 +4,7 @@
#include <vnet/vnet.h>
#include <vnet/dev/dev.h>
-#include <vnet/dev/pci.h>
+#include <vnet/dev/bus/pci.h>
#include <vnet/dev/counters.h>
#include <vppinfra/ring.h>
#include <dev_iavf/iavf.h>
diff --git a/src/plugins/dev_iavf/port.c b/src/plugins/dev_iavf/port.c
index 982436d9b45..a0530822688 100644
--- a/src/plugins/dev_iavf/port.c
+++ b/src/plugins/dev_iavf/port.c
@@ -4,7 +4,7 @@
#include <vnet/vnet.h>
#include <vnet/dev/dev.h>
-#include <vnet/dev/pci.h>
+#include <vnet/dev/bus/pci.h>
#include <vnet/dev/counters.h>
#include <dev_iavf/iavf.h>
#include <dev_iavf/iavf_regs.h>
@@ -42,29 +42,35 @@ iavf_port_vlan_strip_disable (vlib_main_t *vm, vnet_dev_port_t *port)
vnet_dev_t *dev = port->dev;
iavf_port_t *ap = vnet_dev_get_port_data (port);
virtchnl_vlan_caps_t vc;
- vnet_dev_rv_t rv;
+ vnet_dev_rv_t rv = VNET_DEV_ERR_NOT_SUPPORTED;
u32 outer, inner;
const u32 mask = VIRTCHNL_VLAN_ETHERTYPE_8100;
- if ((ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) == 0)
- return iavf_vc_op_disable_vlan_stripping (vm, dev);
+ if (ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2)
+ {
+ if ((rv = iavf_vc_op_get_offload_vlan_v2_caps (vm, dev, &vc)))
+ return rv;
- if ((rv = iavf_vc_op_get_offload_vlan_v2_caps (vm, dev, &vc)))
- return rv;
+ outer = vc.offloads.stripping_support.outer;
+ inner = vc.offloads.stripping_support.inner;
- outer = vc.offloads.stripping_support.outer;
- inner = vc.offloads.stripping_support.inner;
+ outer = outer & VIRTCHNL_VLAN_TOGGLE ? outer & mask : 0;
+ inner = inner & VIRTCHNL_VLAN_TOGGLE ? inner & mask : 0;
- outer = outer & VIRTCHNL_VLAN_TOGGLE ? outer & mask : 0;
- inner = inner & VIRTCHNL_VLAN_TOGGLE ? inner & mask : 0;
+ virtchnl_vlan_setting_t vs = {
+ .vport_id = ap->vsi_id,
+ .outer_ethertype_setting = outer,
+ .inner_ethertype_setting = inner,
+ };
- virtchnl_vlan_setting_t vs = {
- .vport_id = ap->vsi_id,
- .outer_ethertype_setting = outer,
- .inner_ethertype_setting = inner,
- };
+ if ((rv = iavf_vc_op_disable_vlan_stripping_v2 (vm, dev, &vs)))
+ return rv;
+ }
- return iavf_vc_op_disable_vlan_stripping_v2 (vm, dev, &vs);
+ if (ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
+ return iavf_vc_op_disable_vlan_stripping (vm, dev);
+
+ return rv;
}
vnet_dev_rv_t
@@ -85,7 +91,7 @@ iavf_port_init_rss (vlib_main_t *vm, vnet_dev_port_t *port)
.key_len = keylen,
};
- clib_memcpy (key->key, default_rss_key, sizeof (default_rss_key));
+ clib_memcpy (key->key, default_rss_key, keylen);
return iavf_vc_op_config_rss_key (vm, dev, key);
}
@@ -257,7 +263,7 @@ avf_msix_n_handler (vlib_main_t *vm, vnet_dev_t *dev, u16 line)
iavf_reg_write (ad, IAVF_VFINT_DYN_CTLN (line), dyn_ctln_enabled.as_u32);
vlib_node_set_interrupt_pending (vlib_get_main_by_index (line),
- port->intf.rx_node_index);
+ vnet_dev_get_port_rx_node_index (port));
}
vnet_dev_rv_t
@@ -275,7 +281,12 @@ iavf_port_init (vlib_main_t *vm, vnet_dev_port_t *port)
u64_bit_set (&ap->intr_mode_per_rxq_bitmap, q->queue_id, 1);
if ((rv = iavf_port_vlan_strip_disable (vm, port)))
- return rv;
+ {
+ if (rv == VNET_DEV_ERR_NOT_SUPPORTED)
+ log_warn (port->dev, "device doesn't support vlan stripping");
+ else
+ return rv;
+ }
if ((rv = iavf_port_init_rss (vm, port)))
return rv;
@@ -414,17 +425,20 @@ iavf_port_add_del_eth_addr (vlib_main_t *vm, vnet_dev_port_t *port,
int is_primary)
{
iavf_port_t *ap = vnet_dev_get_port_data (port);
- virtchnl_ether_addr_list_t al = {
+ u8 buffer[VIRTCHNL_MSG_SZ (virtchnl_ether_addr_list_t, list, 1)];
+ virtchnl_ether_addr_list_t *al = (virtchnl_ether_addr_list_t *) buffer;
+
+ *al = (virtchnl_ether_addr_list_t){
.vsi_id = ap->vsi_id,
.num_elements = 1,
.list[0].primary = is_primary ? 1 : 0,
.list[0].extra = is_primary ? 0 : 1,
};
- clib_memcpy (al.list[0].addr, addr, sizeof (al.list[0].addr));
+ clib_memcpy (al->list[0].addr, addr, sizeof (al->list[0].addr));
- return is_add ? iavf_vc_op_add_eth_addr (vm, port->dev, &al) :
- iavf_vc_op_del_eth_addr (vm, port->dev, &al);
+ return is_add ? iavf_vc_op_add_eth_addr (vm, port->dev, al) :
+ iavf_vc_op_del_eth_addr (vm, port->dev, al);
}
static vnet_dev_rv_t
diff --git a/src/plugins/dev_iavf/queue.c b/src/plugins/dev_iavf/queue.c
index 113c0dbdfc7..51bf69a458a 100644
--- a/src/plugins/dev_iavf/queue.c
+++ b/src/plugins/dev_iavf/queue.c
@@ -4,7 +4,6 @@
#include <vnet/vnet.h>
#include <vnet/dev/dev.h>
-#include <vnet/dev/pci.h>
#include <vnet/dev/counters.h>
#include <vppinfra/ring.h>
#include <dev_iavf/iavf.h>
diff --git a/src/plugins/dev_iavf/rx_node.c b/src/plugins/dev_iavf/rx_node.c
index ee6d7e8def0..bf650f9bfb9 100644
--- a/src/plugins/dev_iavf/rx_node.c
+++ b/src/plugins/dev_iavf/rx_node.c
@@ -249,14 +249,14 @@ iavf_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 thr_idx = vlib_get_thread_index ();
iavf_rt_data_t *rtd = vnet_dev_get_rt_temp_space (vm);
iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
- vlib_buffer_template_t bt = rxq->buffer_template;
+ vlib_buffer_template_t bt = vnet_dev_get_rx_queue_if_buffer_template (rxq);
u32 n_trace, n_rx_packets = 0, n_rx_bytes = 0;
u16 n_tail_desc = 0;
u64 or_qw1 = 0;
u32 *bi, *to_next, n_left_to_next;
- u32 next_index = rxq->next_index;
- u32 sw_if_index = port->intf.sw_if_index;
- u32 hw_if_index = port->intf.hw_if_index;
+ u32 next_index = vnet_dev_get_rx_queue_if_next_index (rxq);
+ u32 sw_if_index = vnet_dev_get_rx_queue_if_sw_if_index (rxq);
+ u32 hw_if_index = vnet_dev_get_rx_queue_if_hw_if_index (rxq);
u16 next = arq->next;
u16 size = rxq->size;
u16 mask = size - 1;
diff --git a/src/plugins/dev_iavf/virtchnl.c b/src/plugins/dev_iavf/virtchnl.c
index eca48106ce3..7e7715262c2 100644
--- a/src/plugins/dev_iavf/virtchnl.c
+++ b/src/plugins/dev_iavf/virtchnl.c
@@ -4,7 +4,6 @@
#include <vnet/vnet.h>
#include <vnet/dev/dev.h>
-#include <vnet/dev/pci.h>
#include <vnet/dev/counters.h>
#include <dev_iavf/iavf.h>
#include <dev_iavf/virtchnl.h>
diff --git a/src/plugins/dev_iavf/virtchnl_funcs.h b/src/plugins/dev_iavf/virtchnl_funcs.h
index e7f3901e0ee..0d4ab2835f4 100644
--- a/src/plugins/dev_iavf/virtchnl_funcs.h
+++ b/src/plugins/dev_iavf/virtchnl_funcs.h
@@ -9,6 +9,10 @@
#include <vnet/dev/dev.h>
#include <dev_iavf/iavf.h>
+/* The "+ 1" fakes a trailing element, but the driver requires that.
+ * Using this "wrong" macro is the easiest solution, as long as
+ * port.c uses buffer sized by the same macro as the functions here.
+ */
#define VIRTCHNL_MSG_SZ(s, e, n) STRUCT_OFFSET_OF (s, e[(n) + 1])
typedef struct
diff --git a/src/plugins/dev_octeon/CMakeLists.txt b/src/plugins/dev_octeon/CMakeLists.txt
index e8abf1a3389..6109de57a7d 100644
--- a/src/plugins/dev_octeon/CMakeLists.txt
+++ b/src/plugins/dev_octeon/CMakeLists.txt
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright(c) 2022 Cisco Systems, Inc.
-if (NOT VPP_PLATFORM_NAME STREQUAL "octeon10")
+if (NOT VPP_PLATFORM_NAME STREQUAL "octeon10" AND NOT VPP_PLATFORM_NAME STREQUAL "octeon9")
return()
endif()
@@ -21,6 +21,10 @@ endif()
include_directories (${OCTEON_ROC_DIR}/)
+if (VPP_PLATFORM_NAME STREQUAL "octeon9")
+ add_compile_definitions(PLATFORM_OCTEON9)
+endif()
+
add_vpp_plugin(dev_octeon
SOURCES
init.c
@@ -31,6 +35,8 @@ add_vpp_plugin(dev_octeon
rx_node.c
tx_node.c
flow.c
+ counter.c
+ crypto.c
MULTIARCH_SOURCES
rx_node.c
diff --git a/src/plugins/dev_octeon/counter.c b/src/plugins/dev_octeon/counter.c
new file mode 100644
index 00000000000..6f57c1ee649
--- /dev/null
+++ b/src/plugins/dev_octeon/counter.c
@@ -0,0 +1,337 @@
+/*
+ * Copyright (c) 2024 Marvell.
+ * SPDX-License-Identifier: Apache-2.0
+ * https://spdx.org/licenses/Apache-2.0.html
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <dev_octeon/octeon.h>
+#include <dev_octeon/common.h>
+
+VLIB_REGISTER_LOG_CLASS (oct_log, static) = {
+ .class_name = "oct",
+ .subclass_name = "counters",
+};
+
+typedef enum
+{
+ OCT_PORT_CTR_RX_BYTES,
+ OCT_PORT_CTR_TX_BYTES,
+ OCT_PORT_CTR_RX_PACKETS,
+ OCT_PORT_CTR_TX_PACKETS,
+ OCT_PORT_CTR_RX_DROPS,
+ OCT_PORT_CTR_TX_DROPS,
+ OCT_PORT_CTR_RX_DROP_BYTES,
+ OCT_PORT_CTR_RX_UCAST,
+ OCT_PORT_CTR_TX_UCAST,
+ OCT_PORT_CTR_RX_MCAST,
+ OCT_PORT_CTR_TX_MCAST,
+ OCT_PORT_CTR_RX_BCAST,
+ OCT_PORT_CTR_TX_BCAST,
+ OCT_PORT_CTR_RX_FCS,
+ OCT_PORT_CTR_RX_ERR,
+ OCT_PORT_CTR_RX_DROP_MCAST,
+ OCT_PORT_CTR_RX_DROP_BCAST,
+ OCT_PORT_CTR_RX_DROP_L3_MCAST,
+ OCT_PORT_CTR_RX_DROP_L3_BCAST,
+} oct_port_counter_id_t;
+
+vnet_dev_counter_t oct_port_counters[] = {
+ VNET_DEV_CTR_RX_BYTES (OCT_PORT_CTR_RX_BYTES),
+ VNET_DEV_CTR_RX_PACKETS (OCT_PORT_CTR_RX_PACKETS),
+ VNET_DEV_CTR_RX_DROPS (OCT_PORT_CTR_RX_DROPS),
+ VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_BYTES, RX, BYTES, "drop bytes"),
+ VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_UCAST, RX, PACKETS, "unicast"),
+ VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_MCAST, RX, PACKETS, "multicast"),
+ VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_BCAST, RX, PACKETS, "broadcast"),
+ VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_FCS, RX, PACKETS, "fcs"),
+ VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_ERR, RX, PACKETS, "error"),
+ VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_MCAST, RX, PACKETS,
+ "drop multicast"),
+ VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_BCAST, RX, PACKETS,
+ "drop broadcast"),
+ VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_L3_MCAST, RX, PACKETS,
+ "drop L3 multicast"),
+ VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_RX_DROP_L3_BCAST, RX, PACKETS,
+ "drop L3 broadcast"),
+
+ VNET_DEV_CTR_TX_BYTES (OCT_PORT_CTR_TX_BYTES),
+ VNET_DEV_CTR_TX_PACKETS (OCT_PORT_CTR_TX_PACKETS),
+ VNET_DEV_CTR_TX_DROPS (OCT_PORT_CTR_TX_DROPS),
+ VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_TX_UCAST, TX, PACKETS, "unicast"),
+ VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_TX_MCAST, TX, PACKETS, "multicast"),
+ VNET_DEV_CTR_VENDOR (OCT_PORT_CTR_TX_BCAST, TX, PACKETS, "broadcast"),
+};
+
+typedef enum
+{
+ OCT_RXQ_CTR_BYTES,
+ OCT_RXQ_CTR_PKTS,
+ OCT_RXQ_CTR_DROPS,
+ OCT_RXQ_CTR_DROP_BYTES,
+ OCT_RXQ_CTR_ERR,
+} oct_rxq_counter_id_t;
+
+vnet_dev_counter_t oct_rxq_counters[] = {
+ VNET_DEV_CTR_RX_BYTES (OCT_RXQ_CTR_BYTES),
+ VNET_DEV_CTR_RX_PACKETS (OCT_RXQ_CTR_PKTS),
+ VNET_DEV_CTR_RX_DROPS (OCT_RXQ_CTR_DROPS),
+ VNET_DEV_CTR_VENDOR (OCT_RXQ_CTR_DROP_BYTES, RX, BYTES, "drop bytes"),
+ VNET_DEV_CTR_VENDOR (OCT_RXQ_CTR_ERR, RX, PACKETS, "error"),
+};
+
+typedef enum
+{
+ OCT_TXQ_CTR_BYTES,
+ OCT_TXQ_CTR_PKTS,
+ OCT_TXQ_CTR_DROPS,
+ OCT_TXQ_CTR_DROP_BYTES,
+} oct_txq_counter_id_t;
+
+vnet_dev_counter_t oct_txq_counters[] = {
+ VNET_DEV_CTR_TX_BYTES (OCT_TXQ_CTR_BYTES),
+ VNET_DEV_CTR_TX_PACKETS (OCT_TXQ_CTR_PKTS),
+ VNET_DEV_CTR_TX_DROPS (OCT_TXQ_CTR_DROPS),
+ VNET_DEV_CTR_VENDOR (OCT_TXQ_CTR_DROP_BYTES, TX, BYTES, "drop bytes"),
+};
+
+static vnet_dev_rv_t
+oct_roc_err (vnet_dev_t *dev, int rv, char *fmt, ...)
+{
+ u8 *s = 0;
+ va_list va;
+
+ va_start (va, fmt);
+ s = va_format (s, fmt, &va);
+ va_end (va);
+
+ log_err (dev, "%v - ROC error %s (%d)", s, roc_error_msg_get (rv), rv);
+
+ vec_free (s);
+ return VNET_DEV_ERR_INTERNAL;
+}
+
+void
+oct_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_port_add_counters (vm, port, oct_port_counters,
+ ARRAY_LEN (oct_port_counters));
+
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ {
+ vnet_dev_rx_queue_add_counters (vm, rxq, oct_rxq_counters,
+ ARRAY_LEN (oct_rxq_counters));
+ }
+
+ foreach_vnet_dev_port_tx_queue (txq, port)
+ {
+ vnet_dev_tx_queue_add_counters (vm, txq, oct_txq_counters,
+ ARRAY_LEN (oct_txq_counters));
+ }
+}
+
+vnet_dev_rv_t
+oct_port_get_stats (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ int rrv;
+ struct roc_nix_stats stats;
+
+ if ((rrv = roc_nix_stats_get (nix, &stats)))
+ return oct_roc_err (dev, rrv, "roc_nix_stats_get() failed");
+
+ foreach_vnet_dev_counter (c, port->counter_main)
+ {
+ switch (c->user_data)
+ {
+ case OCT_PORT_CTR_RX_BYTES:
+ vnet_dev_counter_value_update (vm, c, stats.rx_octs);
+ break;
+ case OCT_PORT_CTR_TX_BYTES:
+ vnet_dev_counter_value_update (vm, c, stats.tx_octs);
+ break;
+ case OCT_PORT_CTR_RX_PACKETS:
+ vnet_dev_counter_value_update (
+ vm, c, stats.rx_ucast + stats.rx_bcast + stats.rx_mcast);
+ break;
+ case OCT_PORT_CTR_TX_PACKETS:
+ vnet_dev_counter_value_update (
+ vm, c, stats.tx_ucast + stats.tx_bcast + stats.tx_mcast);
+ break;
+ case OCT_PORT_CTR_RX_DROPS:
+ vnet_dev_counter_value_update (vm, c, stats.rx_drop);
+ break;
+ case OCT_PORT_CTR_TX_DROPS:
+ vnet_dev_counter_value_update (vm, c, stats.tx_drop);
+ break;
+ case OCT_PORT_CTR_RX_DROP_BYTES:
+ vnet_dev_counter_value_update (vm, c, stats.rx_drop_octs);
+ break;
+ case OCT_PORT_CTR_RX_UCAST:
+ vnet_dev_counter_value_update (vm, c, stats.rx_ucast);
+ break;
+ case OCT_PORT_CTR_TX_UCAST:
+ vnet_dev_counter_value_update (vm, c, stats.tx_ucast);
+ break;
+ case OCT_PORT_CTR_RX_MCAST:
+ vnet_dev_counter_value_update (vm, c, stats.rx_mcast);
+ break;
+ case OCT_PORT_CTR_TX_MCAST:
+ vnet_dev_counter_value_update (vm, c, stats.tx_mcast);
+ break;
+ case OCT_PORT_CTR_RX_BCAST:
+ vnet_dev_counter_value_update (vm, c, stats.rx_bcast);
+ break;
+ case OCT_PORT_CTR_TX_BCAST:
+ vnet_dev_counter_value_update (vm, c, stats.tx_bcast);
+ break;
+ case OCT_PORT_CTR_RX_FCS:
+ vnet_dev_counter_value_update (vm, c, stats.rx_fcs);
+ break;
+ case OCT_PORT_CTR_RX_ERR:
+ vnet_dev_counter_value_update (vm, c, stats.rx_err);
+ break;
+ case OCT_PORT_CTR_RX_DROP_MCAST:
+ vnet_dev_counter_value_update (vm, c, stats.rx_drop_mcast);
+ break;
+ case OCT_PORT_CTR_RX_DROP_BCAST:
+ vnet_dev_counter_value_update (vm, c, stats.rx_drop_bcast);
+ break;
+ case OCT_PORT_CTR_RX_DROP_L3_MCAST:
+ vnet_dev_counter_value_update (vm, c, stats.rx_drop_l3_mcast);
+ break;
+ case OCT_PORT_CTR_RX_DROP_L3_BCAST:
+ vnet_dev_counter_value_update (vm, c, stats.rx_drop_l3_bcast);
+ break;
+ default:
+ ASSERT (0);
+ }
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+oct_rxq_get_stats (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_rx_queue_t *rxq)
+{
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ struct roc_nix_stats_queue qstats;
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ int rrv;
+
+ if ((rrv = roc_nix_stats_queue_get (nix, crq->rq.qid, 1, &qstats)))
+ return oct_roc_err (dev, rrv, "roc_nix_stats_queue_get() failed");
+
+ foreach_vnet_dev_counter (c, rxq->counter_main)
+ {
+ switch (c->user_data)
+ {
+ case OCT_RXQ_CTR_BYTES:
+ vnet_dev_counter_value_update (vm, c, qstats.rx_octs);
+ break;
+ case OCT_RXQ_CTR_PKTS:
+ vnet_dev_counter_value_update (vm, c, qstats.rx_pkts);
+ break;
+ case OCT_RXQ_CTR_DROPS:
+ vnet_dev_counter_value_update (vm, c, qstats.rx_drop_pkts);
+ break;
+ case OCT_RXQ_CTR_DROP_BYTES:
+ vnet_dev_counter_value_update (vm, c, qstats.rx_drop_octs);
+ break;
+ case OCT_RXQ_CTR_ERR:
+ vnet_dev_counter_value_update (vm, c, qstats.rx_error_pkts);
+ break;
+ default:
+ ASSERT (0);
+ }
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+oct_txq_get_stats (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_tx_queue_t *txq)
+{
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ struct roc_nix_stats_queue qstats;
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ int rrv;
+
+ if ((rrv = roc_nix_stats_queue_get (nix, ctq->sq.qid, 0, &qstats)))
+ return oct_roc_err (dev, rrv, "roc_nix_stats_queue_get() failed");
+
+ foreach_vnet_dev_counter (c, txq->counter_main)
+ {
+ switch (c->user_data)
+ {
+ case OCT_TXQ_CTR_BYTES:
+ vnet_dev_counter_value_update (vm, c, qstats.tx_octs);
+ break;
+ case OCT_TXQ_CTR_PKTS:
+ vnet_dev_counter_value_update (vm, c, qstats.tx_pkts);
+ break;
+ case OCT_TXQ_CTR_DROPS:
+ vnet_dev_counter_value_update (vm, c, qstats.tx_drop_pkts);
+ break;
+ case OCT_TXQ_CTR_DROP_BYTES:
+ vnet_dev_counter_value_update (vm, c, qstats.tx_drop_octs);
+ break;
+ default:
+ ASSERT (0);
+ }
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+oct_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ int rrv;
+
+ if ((rrv = roc_nix_stats_reset (nix)))
+ oct_roc_err (dev, rrv, "roc_nix_stats_reset() failed");
+}
+
+void
+oct_rxq_clear_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ vnet_dev_t *dev = rxq->port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ int rrv;
+
+ if ((rrv = roc_nix_stats_queue_reset (nix, crq->rq.qid, 1)))
+ oct_roc_err (dev, rrv,
+ "roc_nix_stats_queue_reset() failed for rx queue %u",
+ rxq->queue_id);
+}
+
+void
+oct_txq_clear_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_t *dev = txq->port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ int rrv;
+
+ if ((rrv = roc_nix_stats_queue_reset (nix, ctq->sq.qid, 0)))
+ oct_roc_err (dev, rrv,
+ "roc_nix_stats_queue_reset() failed for tx queue %u",
+ txq->queue_id);
+}
diff --git a/src/plugins/dev_octeon/crypto.c b/src/plugins/dev_octeon/crypto.c
new file mode 100644
index 00000000000..8796704edf4
--- /dev/null
+++ b/src/plugins/dev_octeon/crypto.c
@@ -0,0 +1,1754 @@
+/*
+ * Copyright (c) 2024 Marvell.
+ * SPDX-License-Identifier: Apache-2.0
+ * https://spdx.org/licenses/Apache-2.0.html
+ */
+
+#include <vnet/dev/dev.h>
+#include <vnet/devices/devices.h>
+#include <dev_octeon/octeon.h>
+#include <dev_octeon/crypto.h>
+#include <base/roc_api.h>
+#include <common.h>
+
+oct_crypto_main_t oct_crypto_main;
+oct_crypto_dev_t oct_crypto_dev;
+
+VLIB_REGISTER_LOG_CLASS (oct_log, static) = {
+ .class_name = "octeon",
+ .subclass_name = "crypto",
+};
+
+static_always_inline void
+oct_map_keyindex_to_session (oct_crypto_sess_t *sess, u32 key_index, u8 type)
+{
+ oct_crypto_main_t *ocm = &oct_crypto_main;
+ oct_crypto_key_t *ckey;
+
+ ckey = vec_elt_at_index (ocm->keys[type], key_index);
+
+ ckey->sess = sess;
+ sess->key_index = key_index;
+}
+
+static_always_inline oct_crypto_sess_t *
+oct_crypto_session_alloc (vlib_main_t *vm, u8 type)
+{
+ extern oct_plt_init_param_t oct_plt_init_param;
+ oct_crypto_sess_t *addr = NULL;
+ oct_crypto_main_t *ocm;
+ oct_crypto_dev_t *ocd;
+ u32 size;
+
+ ocm = &oct_crypto_main;
+ ocd = ocm->crypto_dev[type];
+
+ size = sizeof (oct_crypto_sess_t);
+
+ addr = oct_plt_init_param.oct_plt_zmalloc (size, CLIB_CACHE_LINE_BYTES);
+ if (addr == NULL)
+ {
+ log_err (ocd->dev, "Failed to allocate crypto session memory");
+ return NULL;
+ }
+
+ return addr;
+}
+
+static_always_inline i32
+oct_crypto_session_create (vlib_main_t *vm, vnet_crypto_key_index_t key_index,
+ int op_type)
+{
+ oct_crypto_main_t *ocm = &oct_crypto_main;
+ oct_crypto_sess_t *session;
+ vnet_crypto_key_t *key;
+ oct_crypto_key_t *ckey;
+
+ key = vnet_crypto_get_key (key_index);
+
+ if (key->type == VNET_CRYPTO_KEY_TYPE_LINK)
+ {
+ /*
+ * Read crypto or integ key session. And map link key index to same.
+ */
+ if (key->index_crypto != UINT32_MAX)
+ {
+ ckey = vec_elt_at_index (ocm->keys[op_type], key->index_crypto);
+ session = ckey->sess;
+ }
+ else if (key->index_integ != UINT32_MAX)
+ {
+ ckey = vec_elt_at_index (ocm->keys[op_type], key->index_integ);
+ session = ckey->sess;
+ }
+ else
+ return -1;
+ }
+ else
+ {
+ session = oct_crypto_session_alloc (vm, op_type);
+ if (session == NULL)
+ return -1;
+ }
+
+ oct_map_keyindex_to_session (session, key_index, op_type);
+ return 0;
+}
+
+void
+oct_crypto_key_del_handler (vlib_main_t *vm, vnet_crypto_key_index_t key_index)
+{
+ extern oct_plt_init_param_t oct_plt_init_param;
+ oct_crypto_main_t *ocm = &oct_crypto_main;
+ oct_crypto_key_t *ckey_linked;
+ oct_crypto_key_t *ckey;
+
+ vec_validate (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index);
+
+ ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index);
+ if (ckey->sess)
+ {
+ /*
+ * If in case link algo is pointing to same sesison, reset the pointer.
+ */
+ if (ckey->sess->key_index != key_index)
+ {
+ ckey_linked = vec_elt_at_index (
+ ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], ckey->sess->key_index);
+ ckey_linked->sess = NULL;
+ }
+ oct_plt_init_param.oct_plt_free (ckey->sess);
+ ckey->sess = NULL;
+ }
+
+ ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], key_index);
+ if (ckey->sess)
+ {
+ /*
+ * If in case link algo is pointing to same sesison, reset the pointer.
+ */
+ if (ckey->sess->key_index != key_index)
+ {
+ ckey_linked = vec_elt_at_index (
+ ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], ckey->sess->key_index);
+ ckey_linked->sess = NULL;
+ }
+
+ oct_plt_init_param.oct_plt_free (ckey->sess);
+ ckey->sess = NULL;
+ }
+}
+
+void
+oct_crypto_key_add_handler (vlib_main_t *vm, vnet_crypto_key_index_t key_index)
+{
+ oct_crypto_main_t *ocm = &oct_crypto_main;
+ oct_crypto_key_t *ckey;
+ oct_crypto_dev_t *ocd = &oct_crypto_dev;
+
+ vec_validate (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index);
+ ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_ENCRYPT], key_index);
+ if (ckey->sess == NULL)
+ {
+ if (oct_crypto_session_create (vm, key_index,
+ VNET_CRYPTO_OP_TYPE_ENCRYPT))
+ {
+ log_err (ocd->dev, "Unable to create crypto session");
+ return;
+ }
+ }
+
+ vec_validate (ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], key_index);
+ ckey = vec_elt_at_index (ocm->keys[VNET_CRYPTO_OP_TYPE_DECRYPT], key_index);
+ if (ckey->sess == NULL)
+ {
+ if (oct_crypto_session_create (vm, key_index,
+ VNET_CRYPTO_OP_TYPE_DECRYPT))
+ {
+ log_err (ocd->dev, "Unable to create crypto session");
+ return;
+ }
+ }
+}
+
+void
+oct_crypto_key_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop,
+ vnet_crypto_key_index_t idx)
+{
+ oct_crypto_main_t *ocm = &oct_crypto_main;
+
+ if (kop == VNET_CRYPTO_KEY_OP_DEL)
+ {
+ oct_crypto_key_del_handler (vm, idx);
+ return;
+ }
+ oct_crypto_key_add_handler (vm, idx);
+
+ ocm->started = 1;
+}
+
+static_always_inline void
+oct_crypto_session_free (vlib_main_t *vm, oct_crypto_sess_t *sess)
+{
+ extern oct_plt_init_param_t oct_plt_init_param;
+
+ oct_plt_init_param.oct_plt_free (sess);
+ return;
+}
+
+#ifdef PLATFORM_OCTEON9
+static inline void
+oct_cpt_inst_submit (struct cpt_inst_s *inst, uint64_t lmtline,
+ uint64_t io_addr)
+{
+ uint64_t lmt_status;
+
+ do
+ {
+ /* Copy CPT command to LMTLINE */
+ roc_lmt_mov64 ((void *) lmtline, inst);
+
+ /*
+ * Make sure compiler does not reorder memcpy and ldeor.
+ * LMTST transactions are always flushed from the write
+ * buffer immediately, a DMB is not required to push out
+ * LMTSTs.
+ */
+ asm volatile ("dmb oshst" : : : "memory");
+ lmt_status = roc_lmt_submit_ldeor (io_addr);
+ }
+ while (lmt_status == 0);
+}
+#endif
+
+static_always_inline void
+oct_crypto_burst_submit (oct_crypto_dev_t *crypto_dev, struct cpt_inst_s *inst,
+ u32 n_left)
+{
+ u64 lmt_base;
+ u64 io_addr;
+ u32 count;
+
+#ifdef PLATFORM_OCTEON9
+ lmt_base = crypto_dev->lf.lmt_base;
+ io_addr = crypto_dev->lf.io_addr;
+
+ for (count = 0; count < n_left; count++)
+ oct_cpt_inst_submit (inst + count, lmt_base, io_addr);
+#else
+ u64 *lmt_line[OCT_MAX_LMT_SZ];
+ u64 lmt_arg, core_lmt_id;
+
+ lmt_base = crypto_dev->lmtline.lmt_base;
+ io_addr = crypto_dev->lmtline.io_addr;
+
+ ROC_LMT_CPT_BASE_ID_GET (lmt_base, core_lmt_id);
+
+ for (count = 0; count < 16; count++)
+ {
+ lmt_line[count] = OCT_CPT_LMT_GET_LINE_ADDR (lmt_base, count);
+ }
+
+ while (n_left > OCT_MAX_LMT_SZ)
+ {
+
+ /*
+ * Add a memory barrier so that LMTLINEs from the previous iteration
+ * can be reused for a subsequent transfer.
+ */
+ asm volatile ("dmb oshst" ::: "memory");
+
+ lmt_arg = ROC_CN10K_CPT_LMT_ARG | (u64) core_lmt_id;
+
+ for (count = 0; count < 16; count++)
+ {
+ roc_lmt_mov_seg ((void *) lmt_line[count], inst + count,
+ CPT_LMT_SIZE_COPY);
+ }
+
+ /* Set number of LMTSTs, excluding the first */
+ lmt_arg |= (OCT_MAX_LMT_SZ - 1) << 12;
+
+ roc_lmt_submit_steorl (lmt_arg, io_addr);
+
+ inst += OCT_MAX_LMT_SZ;
+ n_left -= OCT_MAX_LMT_SZ;
+ }
+
+ if (n_left > 0)
+ {
+ /*
+ * Add a memory barrier so that LMTLINEs from the previous iteration
+ * can be reused for a subsequent transfer.
+ */
+ asm volatile ("dmb oshst" ::: "memory");
+
+ lmt_arg = ROC_CN10K_CPT_LMT_ARG | (u64) core_lmt_id;
+
+ for (count = 0; count < n_left; count++)
+ {
+ roc_lmt_mov_seg ((void *) lmt_line[count], inst + count,
+ CPT_LMT_SIZE_COPY);
+ }
+
+ /* Set number of LMTSTs, excluding the first */
+ lmt_arg |= (n_left - 1) << 12;
+
+ roc_lmt_submit_steorl (lmt_arg, io_addr);
+ }
+#endif
+}
+
+static_always_inline uint32_t
+oct_crypto_fill_sg_comp_from_iov (struct roc_sglist_comp *list, uint32_t i,
+ struct roc_se_iov_ptr *from,
+ uint32_t from_offset, uint32_t *psize,
+ struct roc_se_buf_ptr *extra_buf,
+ uint32_t extra_offset)
+{
+ uint32_t extra_len = extra_buf ? extra_buf->size : 0;
+ uint32_t size = *psize;
+ int32_t j;
+
+ for (j = 0; j < from->buf_cnt; j++)
+ {
+ struct roc_sglist_comp *to = &list[i >> 2];
+ uint32_t buf_sz = from->bufs[j].size;
+ void *vaddr = from->bufs[j].vaddr;
+ uint64_t e_vaddr;
+ uint32_t e_len;
+
+ if (PREDICT_FALSE (from_offset))
+ {
+ if (from_offset >= buf_sz)
+ {
+ from_offset -= buf_sz;
+ continue;
+ }
+ e_vaddr = (uint64_t) vaddr + from_offset;
+ e_len = clib_min ((buf_sz - from_offset), size);
+ from_offset = 0;
+ }
+ else
+ {
+ e_vaddr = (uint64_t) vaddr;
+ e_len = clib_min (buf_sz, size);
+ }
+
+ to->u.s.len[i % 4] = clib_host_to_net_u16 (e_len);
+ to->ptr[i % 4] = clib_host_to_net_u64 (e_vaddr);
+
+ if (extra_len && (e_len >= extra_offset))
+ {
+ /* Break the data at given offset */
+ uint32_t next_len = e_len - extra_offset;
+ uint64_t next_vaddr = e_vaddr + extra_offset;
+
+ if (!extra_offset)
+ {
+ i--;
+ }
+ else
+ {
+ e_len = extra_offset;
+ size -= e_len;
+ to->u.s.len[i % 4] = clib_host_to_net_u16 (e_len);
+ }
+
+ extra_len = clib_min (extra_len, size);
+ /* Insert extra data ptr */
+ if (extra_len)
+ {
+ i++;
+ to = &list[i >> 2];
+ to->u.s.len[i % 4] = clib_host_to_net_u16 (extra_len);
+ to->ptr[i % 4] =
+ clib_host_to_net_u64 ((uint64_t) extra_buf->vaddr);
+ size -= extra_len;
+ }
+
+ next_len = clib_min (next_len, size);
+ /* insert the rest of the data */
+ if (next_len)
+ {
+ i++;
+ to = &list[i >> 2];
+ to->u.s.len[i % 4] = clib_host_to_net_u16 (next_len);
+ to->ptr[i % 4] = clib_host_to_net_u64 (next_vaddr);
+ size -= next_len;
+ }
+ extra_len = 0;
+ }
+ else
+ {
+ size -= e_len;
+ }
+ if (extra_offset)
+ extra_offset -= size;
+ i++;
+
+ if (PREDICT_FALSE (!size))
+ break;
+ }
+
+ *psize = size;
+ return (uint32_t) i;
+}
+
+static_always_inline u32
+oct_crypto_fill_sg2_comp_from_iov (struct roc_sg2list_comp *list, u32 i,
+ struct roc_se_iov_ptr *from,
+ u32 from_offset, u32 *psize,
+ struct roc_se_buf_ptr *extra_buf,
+ u32 extra_offset)
+{
+ u32 extra_len = extra_buf ? extra_buf->size : 0;
+ u32 size = *psize, buf_sz, e_len, next_len;
+ struct roc_sg2list_comp *to;
+ u64 e_vaddr, next_vaddr;
+ void *vaddr;
+ i32 j;
+
+ for (j = 0; j < from->buf_cnt; j++)
+ {
+ to = &list[i / 3];
+ buf_sz = from->bufs[j].size;
+ vaddr = from->bufs[j].vaddr;
+
+ if (PREDICT_FALSE (from_offset))
+ {
+ if (from_offset >= buf_sz)
+ {
+ from_offset -= buf_sz;
+ continue;
+ }
+ e_vaddr = (u64) vaddr + from_offset;
+ e_len = clib_min ((buf_sz - from_offset), size);
+ from_offset = 0;
+ }
+ else
+ {
+ e_vaddr = (u64) vaddr;
+ e_len = clib_min (buf_sz, size);
+ }
+
+ to->u.s.len[i % 3] = (e_len);
+ to->ptr[i % 3] = (e_vaddr);
+ to->u.s.valid_segs = (i % 3) + 1;
+
+ if (extra_len && (e_len >= extra_offset))
+ {
+ /* Break the data at given offset */
+ next_len = e_len - extra_offset;
+ next_vaddr = e_vaddr + extra_offset;
+
+ if (!extra_offset)
+ i--;
+ else
+ {
+ e_len = extra_offset;
+ size -= e_len;
+ to->u.s.len[i % 3] = (e_len);
+ }
+
+ extra_len = clib_min (extra_len, size);
+ /* Insert extra data ptr */
+ if (extra_len)
+ {
+ i++;
+ to = &list[i / 3];
+ to->u.s.len[i % 3] = (extra_len);
+ to->ptr[i % 3] = ((u64) extra_buf->vaddr);
+ to->u.s.valid_segs = (i % 3) + 1;
+ size -= extra_len;
+ }
+
+ next_len = clib_min (next_len, size);
+ /* insert the rest of the data */
+ if (next_len)
+ {
+ i++;
+ to = &list[i / 3];
+ to->u.s.len[i % 3] = (next_len);
+ to->ptr[i % 3] = (next_vaddr);
+ to->u.s.valid_segs = (i % 3) + 1;
+ size -= next_len;
+ }
+ extra_len = 0;
+ }
+ else
+ size -= e_len;
+
+ if (extra_offset)
+ extra_offset -= size;
+
+ i++;
+
+ if (PREDICT_FALSE (!size))
+ break;
+ }
+
+ *psize = size;
+ return (u32) i;
+}
+
+static_always_inline uint32_t
+oct_crypto_fill_sg_comp_from_buf (struct roc_sglist_comp *list, uint32_t i,
+ struct roc_se_buf_ptr *from)
+{
+ struct roc_sglist_comp *to = &list[i >> 2];
+
+ to->u.s.len[i % 4] = clib_host_to_net_u16 (from->size);
+ to->ptr[i % 4] = clib_host_to_net_u64 ((uint64_t) from->vaddr);
+ return ++i;
+}
+
+static_always_inline uint32_t
+oct_crypto_fill_sg_comp (struct roc_sglist_comp *list, uint32_t i,
+ uint64_t dma_addr, uint32_t size)
+{
+ struct roc_sglist_comp *to = &list[i >> 2];
+
+ to->u.s.len[i % 4] = clib_host_to_net_u16 (size);
+ to->ptr[i % 4] = clib_host_to_net_u64 (dma_addr);
+ return ++i;
+}
+
+static_always_inline u32
+oct_crypto_fill_sg2_comp (struct roc_sg2list_comp *list, u32 index,
+ u64 dma_addr, u32 size)
+{
+ struct roc_sg2list_comp *to = &list[index / 3];
+
+ to->u.s.len[index % 3] = (size);
+ to->ptr[index % 3] = (dma_addr);
+ to->u.s.valid_segs = (index % 3) + 1;
+ return ++index;
+}
+
+static_always_inline u32
+oct_crypto_fill_sg2_comp_from_buf (struct roc_sg2list_comp *list, u32 index,
+ struct roc_se_buf_ptr *from)
+{
+ struct roc_sg2list_comp *to = &list[index / 3];
+
+ to->u.s.len[index % 3] = (from->size);
+ to->ptr[index % 3] = ((u64) from->vaddr);
+ to->u.s.valid_segs = (index % 3) + 1;
+ return ++index;
+}
+
+static_always_inline int __attribute__ ((unused))
+oct_crypto_sg_inst_prep (struct roc_se_fc_params *params,
+ struct cpt_inst_s *inst, uint64_t offset_ctrl,
+ const uint8_t *iv_s, int iv_len, uint8_t pack_iv,
+ uint8_t pdcp_alg_type, int32_t inputlen,
+ int32_t outputlen, uint32_t passthrough_len,
+ uint32_t req_flags, int pdcp_flag, int decrypt)
+{
+ struct roc_sglist_comp *gather_comp, *scatter_comp;
+ void *m_vaddr = params->meta_buf.vaddr;
+ struct roc_se_buf_ptr *aad_buf = NULL;
+ uint32_t mac_len = 0, aad_len = 0;
+ struct roc_se_ctx *se_ctx;
+ uint32_t i, g_size_bytes;
+ uint64_t *offset_vaddr;
+ uint32_t s_size_bytes;
+ uint8_t *in_buffer;
+ uint32_t size;
+ uint8_t *iv_d;
+ int ret = 0;
+
+ se_ctx = params->ctx;
+ mac_len = se_ctx->mac_len;
+
+ if (PREDICT_FALSE (req_flags & ROC_SE_VALID_AAD_BUF))
+ {
+ /* We don't support both AAD and auth data separately */
+ aad_len = params->aad_buf.size;
+ aad_buf = &params->aad_buf;
+ }
+
+ /* save space for iv */
+ offset_vaddr = m_vaddr;
+
+ m_vaddr =
+ (uint8_t *) m_vaddr + ROC_SE_OFF_CTRL_LEN + PLT_ALIGN_CEIL (iv_len, 8);
+
+ inst->w4.s.opcode_major |= (uint64_t) ROC_DMA_MODE_SG;
+
+ /* iv offset is 0 */
+ *offset_vaddr = offset_ctrl;
+
+ iv_d = ((uint8_t *) offset_vaddr + ROC_SE_OFF_CTRL_LEN);
+
+ if (PREDICT_TRUE (iv_len))
+ memcpy (iv_d, iv_s, iv_len);
+
+ /* DPTR has SG list */
+
+ /* TODO Add error check if space will be sufficient */
+ gather_comp = (struct roc_sglist_comp *) ((uint8_t *) m_vaddr + 8);
+
+ /*
+ * Input Gather List
+ */
+ i = 0;
+
+ /* Offset control word followed by iv */
+
+ i = oct_crypto_fill_sg_comp (gather_comp, i, (uint64_t) offset_vaddr,
+ ROC_SE_OFF_CTRL_LEN + iv_len);
+
+ /* Add input data */
+ if (decrypt && (req_flags & ROC_SE_VALID_MAC_BUF))
+ {
+ size = inputlen - iv_len - mac_len;
+
+ if (PREDICT_TRUE (size))
+ {
+ uint32_t aad_offset = aad_len ? passthrough_len : 0;
+ i = oct_crypto_fill_sg_comp_from_iov (
+ gather_comp, i, params->src_iov, 0, &size, aad_buf, aad_offset);
+ if (PREDICT_FALSE (size))
+ {
+ clib_warning ("Cryptodev: Insufficient buffer"
+ " space, size %d needed",
+ size);
+ return -1;
+ }
+ }
+
+ if (mac_len)
+ i =
+ oct_crypto_fill_sg_comp_from_buf (gather_comp, i, &params->mac_buf);
+ }
+ else
+ {
+ /* input data */
+ size = inputlen - iv_len;
+ if (size)
+ {
+ uint32_t aad_offset = aad_len ? passthrough_len : 0;
+ i = oct_crypto_fill_sg_comp_from_iov (
+ gather_comp, i, params->src_iov, 0, &size, aad_buf, aad_offset);
+ if (PREDICT_FALSE (size))
+ {
+ clib_warning ("Cryptodev: Insufficient buffer space,"
+ " size %d needed",
+ size);
+ return -1;
+ }
+ }
+ }
+
+ in_buffer = m_vaddr;
+ ((uint16_t *) in_buffer)[0] = 0;
+ ((uint16_t *) in_buffer)[1] = 0;
+ ((uint16_t *) in_buffer)[2] = clib_host_to_net_u16 (i);
+
+ g_size_bytes = ((i + 3) / 4) * sizeof (struct roc_sglist_comp);
+ /*
+ * Output Scatter List
+ */
+
+ i = 0;
+ scatter_comp =
+ (struct roc_sglist_comp *) ((uint8_t *) gather_comp + g_size_bytes);
+
+ i = oct_crypto_fill_sg_comp (
+ scatter_comp, i, (uint64_t) offset_vaddr + ROC_SE_OFF_CTRL_LEN, iv_len);
+
+ /* Add output data */
+ if ((!decrypt) && (req_flags & ROC_SE_VALID_MAC_BUF))
+ {
+ size = outputlen - iv_len - mac_len;
+ if (size)
+ {
+
+ uint32_t aad_offset = aad_len ? passthrough_len : 0;
+
+ i = oct_crypto_fill_sg_comp_from_iov (
+ scatter_comp, i, params->dst_iov, 0, &size, aad_buf, aad_offset);
+ if (PREDICT_FALSE (size))
+ {
+ clib_warning ("Cryptodev: Insufficient buffer space,"
+ " size %d needed",
+ size);
+ return -1;
+ }
+ }
+
+ /* mac data */
+ if (mac_len)
+ i =
+ oct_crypto_fill_sg_comp_from_buf (scatter_comp, i, &params->mac_buf);
+ }
+ else
+ {
+ /* Output including mac */
+ size = outputlen - iv_len;
+
+ if (size)
+ {
+ uint32_t aad_offset = aad_len ? passthrough_len : 0;
+
+ i = oct_crypto_fill_sg_comp_from_iov (
+ scatter_comp, i, params->dst_iov, 0, &size, aad_buf, aad_offset);
+
+ if (PREDICT_FALSE (size))
+ {
+ clib_warning ("Cryptodev: Insufficient buffer space,"
+ " size %d needed",
+ size);
+ return -1;
+ }
+ }
+ }
+ ((uint16_t *) in_buffer)[3] = clib_host_to_net_u16 (i);
+ s_size_bytes = ((i + 3) / 4) * sizeof (struct roc_sglist_comp);
+
+ size = g_size_bytes + s_size_bytes + ROC_SG_LIST_HDR_SIZE;
+
+ /* This is DPTR len in case of SG mode */
+ inst->w4.s.dlen = size;
+
+ if (PREDICT_FALSE (size > ROC_SG_MAX_DLEN_SIZE))
+ {
+ clib_warning (
+ "Cryptodev: Exceeds max supported components. Reduce segments");
+ ret = -1;
+ }
+
+ inst->dptr = (uint64_t) in_buffer;
+ return ret;
+}
+
+static_always_inline int __attribute__ ((unused))
+oct_crypto_sg2_inst_prep (struct roc_se_fc_params *params,
+ struct cpt_inst_s *inst, u64 offset_ctrl,
+ const u8 *iv_s, int iv_len, u8 pack_iv,
+ u8 pdcp_alg_type, i32 inputlen, i32 outputlen,
+ u32 passthrough_len, u32 req_flags, int pdcp_flag,
+ int decrypt)
+{
+ u32 mac_len = 0, aad_len = 0, size, index, g_size_bytes;
+ struct roc_sg2list_comp *gather_comp, *scatter_comp;
+ void *m_vaddr = params->meta_buf.vaddr;
+ struct roc_se_buf_ptr *aad_buf = NULL;
+ union cpt_inst_w5 cpt_inst_w5;
+ union cpt_inst_w6 cpt_inst_w6;
+ u16 scatter_sz, gather_sz;
+ struct roc_se_ctx *se_ctx;
+ u64 *offset_vaddr;
+ int ret = 0;
+ u8 *iv_d;
+
+ se_ctx = params->ctx;
+ mac_len = se_ctx->mac_len;
+
+ if (PREDICT_FALSE (req_flags & ROC_SE_VALID_AAD_BUF))
+ {
+ /* We don't support both AAD and auth data separately */
+ aad_len = params->aad_buf.size;
+ aad_buf = &params->aad_buf;
+ }
+
+ /* save space for iv */
+ offset_vaddr = m_vaddr;
+
+ m_vaddr = (u8 *) m_vaddr + ROC_SE_OFF_CTRL_LEN + PLT_ALIGN_CEIL (iv_len, 8);
+
+ inst->w4.s.opcode_major |= (u64) ROC_DMA_MODE_SG;
+
+ /* This is DPTR len in case of SG mode */
+ inst->w4.s.dlen = inputlen + ROC_SE_OFF_CTRL_LEN;
+
+ /* iv offset is 0 */
+ *offset_vaddr = offset_ctrl;
+ iv_d = ((u8 *) offset_vaddr + ROC_SE_OFF_CTRL_LEN);
+
+ if (PREDICT_TRUE (iv_len))
+ clib_memcpy (iv_d, iv_s, iv_len);
+
+ /* DPTR has SG list */
+
+ gather_comp = (struct roc_sg2list_comp *) ((u8 *) m_vaddr);
+
+ /*
+ * Input Gather List
+ */
+ index = 0;
+
+ /* Offset control word followed by iv */
+
+ index = oct_crypto_fill_sg2_comp (gather_comp, index, (u64) offset_vaddr,
+ ROC_SE_OFF_CTRL_LEN + iv_len);
+
+ /* Add input data */
+ if (decrypt && (req_flags & ROC_SE_VALID_MAC_BUF))
+ {
+ size = inputlen - iv_len - mac_len;
+ if (size)
+ {
+ /* input data only */
+ u32 aad_offset = aad_len ? passthrough_len : 0;
+
+ index = oct_crypto_fill_sg2_comp_from_iov (gather_comp, index,
+ params->src_iov, 0, &size,
+ aad_buf, aad_offset);
+
+ if (PREDICT_FALSE (size))
+ {
+ clib_warning ("Cryptodev: Insufficient buffer"
+ " space, size %d needed",
+ size);
+ return -1;
+ }
+ }
+
+ /* mac data */
+ if (mac_len)
+ index = oct_crypto_fill_sg2_comp_from_buf (gather_comp, index,
+ &params->mac_buf);
+ }
+ else
+ {
+ /* input data */
+ size = inputlen - iv_len;
+ if (size)
+ {
+ u32 aad_offset = aad_len ? passthrough_len : 0;
+
+ index = oct_crypto_fill_sg2_comp_from_iov (gather_comp, index,
+ params->src_iov, 0, &size,
+ aad_buf, aad_offset);
+ if (PREDICT_FALSE (size))
+ {
+ clib_warning ("Cryptodev: Insufficient buffer space,"
+ " size %d needed",
+ size);
+ return -1;
+ }
+ }
+ }
+
+ gather_sz = (index + 2) / 3;
+ g_size_bytes = gather_sz * sizeof (struct roc_sg2list_comp);
+
+ /*
+ * Output Scatter List
+ */
+
+ index = 0;
+ scatter_comp =
+ (struct roc_sg2list_comp *) ((u8 *) gather_comp + g_size_bytes);
+
+ index = oct_crypto_fill_sg2_comp (
+ scatter_comp, index, (u64) offset_vaddr + ROC_SE_OFF_CTRL_LEN, iv_len);
+
+ /* Add output data */
+ if ((!decrypt) && (req_flags & ROC_SE_VALID_MAC_BUF))
+ {
+ size = outputlen - iv_len - mac_len;
+ if (size)
+ {
+
+ u32 aad_offset = aad_len ? passthrough_len : 0;
+
+ index = oct_crypto_fill_sg2_comp_from_iov (scatter_comp, index,
+ params->dst_iov, 0, &size,
+ aad_buf, aad_offset);
+ if (PREDICT_FALSE (size))
+ {
+ clib_warning ("Cryptodev: Insufficient buffer space,"
+ " size %d needed",
+ size);
+ return -1;
+ }
+ }
+
+ /* mac data */
+ if (mac_len)
+ index = oct_crypto_fill_sg2_comp_from_buf (scatter_comp, index,
+ &params->mac_buf);
+ }
+ else
+ {
+ /* Output including mac */
+ size = outputlen - iv_len;
+ if (size)
+ {
+ u32 aad_offset = aad_len ? passthrough_len : 0;
+
+ index = oct_crypto_fill_sg2_comp_from_iov (scatter_comp, index,
+ params->dst_iov, 0, &size,
+ aad_buf, aad_offset);
+
+ if (PREDICT_FALSE (size))
+ {
+ clib_warning ("Cryptodev: Insufficient buffer space,"
+ " size %d needed",
+ size);
+ return -1;
+ }
+ }
+ }
+
+ scatter_sz = (index + 2) / 3;
+
+ cpt_inst_w5.s.gather_sz = gather_sz;
+ cpt_inst_w6.s.scatter_sz = scatter_sz;
+
+ cpt_inst_w5.s.dptr = (u64) gather_comp;
+ cpt_inst_w6.s.rptr = (u64) scatter_comp;
+
+ inst->w5.u64 = cpt_inst_w5.u64;
+ inst->w6.u64 = cpt_inst_w6.u64;
+
+ if (PREDICT_FALSE ((scatter_sz >> 4) || (gather_sz >> 4)))
+ {
+ clib_warning (
+ "Cryptodev: Exceeds max supported components. Reduce segments");
+ ret = -1;
+ }
+
+ return ret;
+}
+
+static_always_inline int
+oct_crypto_cpt_hmac_prep (u32 flags, u64 d_offs, u64 d_lens,
+ struct roc_se_fc_params *fc_params,
+ struct cpt_inst_s *inst, u8 is_decrypt)
+{
+ u32 encr_data_len, auth_data_len, aad_len = 0;
+ i32 inputlen, outputlen, enc_dlen, auth_dlen;
+ u32 encr_offset, auth_offset, iv_offset = 0;
+ union cpt_inst_w4 cpt_inst_w4;
+ u32 cipher_type;
+ struct roc_se_ctx *se_ctx;
+ u32 passthrough_len = 0;
+ const u8 *src = NULL;
+ u64 offset_ctrl;
+ u8 iv_len = 16;
+ u8 op_minor;
+ u32 mac_len;
+ int ret;
+
+ encr_offset = ROC_SE_ENCR_OFFSET (d_offs);
+ auth_offset = ROC_SE_AUTH_OFFSET (d_offs);
+ encr_data_len = ROC_SE_ENCR_DLEN (d_lens);
+ auth_data_len = ROC_SE_AUTH_DLEN (d_lens);
+
+ if (PREDICT_FALSE (flags & ROC_SE_VALID_AAD_BUF))
+ {
+ /* We don't support both AAD and auth data separately */
+ auth_data_len = 0;
+ auth_offset = 0;
+ aad_len = fc_params->aad_buf.size;
+ }
+
+ se_ctx = fc_params->ctx;
+ cipher_type = se_ctx->enc_cipher;
+ mac_len = se_ctx->mac_len;
+ cpt_inst_w4.u64 = se_ctx->template_w4.u64;
+ op_minor = cpt_inst_w4.s.opcode_minor;
+
+ if (PREDICT_FALSE (flags & ROC_SE_VALID_AAD_BUF))
+ {
+ /*
+ * When AAD is given, data above encr_offset is pass through
+ * Since AAD is given as separate pointer and not as offset,
+ * this is a special case as we need to fragment input data
+ * into passthrough + encr_data and then insert AAD in between.
+ */
+ passthrough_len = encr_offset;
+ auth_offset = passthrough_len + iv_len;
+ encr_offset = passthrough_len + aad_len + iv_len;
+ auth_data_len = aad_len + encr_data_len;
+ }
+ else
+ {
+ encr_offset += iv_len;
+ auth_offset += iv_len;
+ }
+
+ auth_dlen = auth_offset + auth_data_len;
+ enc_dlen = encr_data_len + encr_offset;
+
+ cpt_inst_w4.s.opcode_major = ROC_SE_MAJOR_OP_FC;
+
+ if (is_decrypt)
+ {
+ cpt_inst_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_DECRYPT;
+
+ if (auth_dlen > enc_dlen)
+ {
+ inputlen = auth_dlen + mac_len;
+ outputlen = auth_dlen;
+ }
+ else
+ {
+ inputlen = enc_dlen + mac_len;
+ outputlen = enc_dlen;
+ }
+ }
+ else
+ {
+ cpt_inst_w4.s.opcode_minor |= ROC_SE_FC_MINOR_OP_ENCRYPT;
+
+ /* Round up to 16 bytes alignment */
+ if (PREDICT_FALSE (encr_data_len & 0xf))
+ {
+ if (PREDICT_TRUE (cipher_type == ROC_SE_AES_CBC) ||
+ (cipher_type == ROC_SE_DES3_CBC))
+ enc_dlen = PLT_ALIGN_CEIL (encr_data_len, 8) + encr_offset;
+ }
+
+ /*
+ * auth_dlen is larger than enc_dlen in Authentication cases
+ * like AES GMAC Authentication
+ */
+ if (PREDICT_FALSE (auth_dlen > enc_dlen))
+ {
+ inputlen = auth_dlen;
+ outputlen = auth_dlen + mac_len;
+ }
+ else
+ {
+ inputlen = enc_dlen;
+ outputlen = enc_dlen + mac_len;
+ }
+ }
+
+ if (op_minor & ROC_SE_FC_MINOR_OP_HMAC_FIRST)
+ outputlen = enc_dlen;
+
+ cpt_inst_w4.s.param1 = encr_data_len;
+ cpt_inst_w4.s.param2 = auth_data_len;
+
+ if (PREDICT_FALSE ((encr_offset >> 16) || (iv_offset >> 8) ||
+ (auth_offset >> 8)))
+ {
+ clib_warning ("Cryptodev: Offset not supported");
+ clib_warning (
+ "Cryptodev: enc_offset: %d, iv_offset : %d, auth_offset: %d",
+ encr_offset, iv_offset, auth_offset);
+ return -1;
+ }
+
+ offset_ctrl = clib_host_to_net_u64 (
+ ((u64) encr_offset << 16) | ((u64) iv_offset << 8) | ((u64) auth_offset));
+
+ src = fc_params->iv_buf;
+
+ inst->w4.u64 = cpt_inst_w4.u64;
+
+#ifdef PLATFORM_OCTEON9
+ ret = oct_crypto_sg_inst_prep (fc_params, inst, offset_ctrl, src, iv_len, 0,
+ 0, inputlen, outputlen, passthrough_len,
+ flags, 0, is_decrypt);
+#else
+ ret = oct_crypto_sg2_inst_prep (fc_params, inst, offset_ctrl, src, iv_len, 0,
+ 0, inputlen, outputlen, passthrough_len,
+ flags, 0, is_decrypt);
+#endif
+
+ if (PREDICT_FALSE (ret))
+ return -1;
+
+ return 0;
+}
+
+static_always_inline int
+oct_crypto_fill_fc_params (oct_crypto_sess_t *sess, struct cpt_inst_s *inst,
+ const bool is_aead, u8 aad_length, u8 *payload,
+ vnet_crypto_async_frame_elt_t *elts, void *mdata,
+ u32 cipher_data_length, u32 cipher_data_offset,
+ u32 auth_data_length, u32 auth_data_offset,
+ vlib_buffer_t *b, u16 adj_len)
+{
+ struct roc_se_fc_params fc_params = { 0 };
+ struct roc_se_ctx *ctx = &sess->cpt_ctx;
+ u64 d_offs = 0, d_lens = 0;
+ vlib_buffer_t *buffer = b;
+ u32 flags = 0, index = 0;
+ u8 op_minor = 0, cpt_op;
+ char src[SRC_IOV_SIZE];
+ u32 *iv_buf;
+
+ cpt_op = sess->cpt_op;
+
+ if (is_aead)
+ {
+ flags |= ROC_SE_VALID_IV_BUF;
+ iv_buf = (u32 *) elts->iv;
+ iv_buf[3] = clib_host_to_net_u32 (0x1);
+ fc_params.iv_buf = elts->iv;
+
+ d_offs = cipher_data_offset;
+ d_offs = d_offs << 16;
+
+ d_lens = cipher_data_length;
+ d_lens = d_lens << 32;
+
+ fc_params.aad_buf.vaddr = elts->aad;
+ fc_params.aad_buf.size = aad_length;
+ flags |= ROC_SE_VALID_AAD_BUF;
+
+ if (sess->cpt_ctx.mac_len)
+ {
+ flags |= ROC_SE_VALID_MAC_BUF;
+ fc_params.mac_buf.size = sess->cpt_ctx.mac_len;
+ fc_params.mac_buf.vaddr = elts->tag;
+ }
+ }
+ else
+ {
+ op_minor = ctx->template_w4.s.opcode_minor;
+
+ flags |= ROC_SE_VALID_IV_BUF;
+
+ fc_params.iv_buf = elts->iv;
+
+ d_offs = cipher_data_offset;
+ d_offs = (d_offs << 16) | auth_data_offset;
+
+ d_lens = cipher_data_length;
+ d_lens = (d_lens << 32) | auth_data_length;
+
+ if (PREDICT_TRUE (sess->cpt_ctx.mac_len))
+ {
+ if (!(op_minor & ROC_SE_FC_MINOR_OP_HMAC_FIRST))
+ {
+ flags |= ROC_SE_VALID_MAC_BUF;
+ fc_params.mac_buf.size = sess->cpt_ctx.mac_len;
+ fc_params.mac_buf.vaddr = elts->digest;
+ }
+ }
+ }
+
+ fc_params.ctx = &sess->cpt_ctx;
+
+ fc_params.src_iov = (void *) src;
+
+ fc_params.src_iov->bufs[index].vaddr = payload;
+ fc_params.src_iov->bufs[index].size = b->current_length - adj_len;
+ index++;
+
+ while (buffer->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ buffer = vlib_get_buffer (vlib_get_main (), buffer->next_buffer);
+ fc_params.src_iov->bufs[index].vaddr =
+ buffer->data + buffer->current_data;
+ fc_params.src_iov->bufs[index].size = buffer->current_length;
+ index++;
+ }
+
+ fc_params.src_iov->buf_cnt = index;
+
+ fc_params.dst_iov = (void *) src;
+
+ fc_params.meta_buf.vaddr = mdata;
+ fc_params.meta_buf.size = OCT_SCATTER_GATHER_BUFFER_SIZE;
+
+ return oct_crypto_cpt_hmac_prep (flags, d_offs, d_lens, &fc_params, inst,
+ cpt_op);
+}
+
+static_always_inline u64
+oct_cpt_inst_w7_get (oct_crypto_sess_t *sess, struct roc_cpt *roc_cpt)
+{
+ union cpt_inst_w7 inst_w7;
+
+ inst_w7.u64 = 0;
+ inst_w7.s.cptr = (u64) &sess->cpt_ctx.se_ctx.fctx;
+ /* Set the engine group */
+ inst_w7.s.egrp = roc_cpt->eng_grp[CPT_ENG_TYPE_IE];
+
+ return inst_w7.u64;
+}
+
+static_always_inline i32
+oct_crypto_link_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess,
+ u32 key_index, u8 type)
+{
+ vnet_crypto_key_t *crypto_key, *auth_key;
+ roc_se_cipher_type enc_type = 0;
+ roc_se_auth_type auth_type = 0;
+ vnet_crypto_key_t *key;
+ u32 digest_len = ~0;
+ i32 rv = 0;
+
+ key = vnet_crypto_get_key (key_index);
+
+ switch (key->async_alg)
+ {
+ case VNET_CRYPTO_ALG_AES_128_CBC_SHA1_TAG12:
+ case VNET_CRYPTO_ALG_AES_192_CBC_SHA1_TAG12:
+ case VNET_CRYPTO_ALG_AES_256_CBC_SHA1_TAG12:
+ enc_type = ROC_SE_AES_CBC;
+ auth_type = ROC_SE_SHA1_TYPE;
+ digest_len = 12;
+ break;
+ case VNET_CRYPTO_ALG_AES_128_CBC_SHA224_TAG14:
+ case VNET_CRYPTO_ALG_AES_192_CBC_SHA224_TAG14:
+ case VNET_CRYPTO_ALG_AES_256_CBC_SHA224_TAG14:
+ enc_type = ROC_SE_AES_CBC;
+ auth_type = ROC_SE_SHA2_SHA224;
+ digest_len = 14;
+ break;
+ case VNET_CRYPTO_ALG_AES_128_CBC_SHA256_TAG16:
+ case VNET_CRYPTO_ALG_AES_192_CBC_SHA256_TAG16:
+ case VNET_CRYPTO_ALG_AES_256_CBC_SHA256_TAG16:
+ enc_type = ROC_SE_AES_CBC;
+ auth_type = ROC_SE_SHA2_SHA256;
+ digest_len = 16;
+ break;
+ case VNET_CRYPTO_ALG_AES_128_CBC_SHA384_TAG24:
+ case VNET_CRYPTO_ALG_AES_192_CBC_SHA384_TAG24:
+ case VNET_CRYPTO_ALG_AES_256_CBC_SHA384_TAG24:
+ enc_type = ROC_SE_AES_CBC;
+ auth_type = ROC_SE_SHA2_SHA384;
+ digest_len = 24;
+ break;
+ case VNET_CRYPTO_ALG_AES_128_CBC_SHA512_TAG32:
+ case VNET_CRYPTO_ALG_AES_192_CBC_SHA512_TAG32:
+ case VNET_CRYPTO_ALG_AES_256_CBC_SHA512_TAG32:
+ enc_type = ROC_SE_AES_CBC;
+ auth_type = ROC_SE_SHA2_SHA512;
+ digest_len = 32;
+ break;
+ case VNET_CRYPTO_ALG_AES_128_CTR_SHA1_TAG12:
+ case VNET_CRYPTO_ALG_AES_192_CTR_SHA1_TAG12:
+ case VNET_CRYPTO_ALG_AES_256_CTR_SHA1_TAG12:
+ enc_type = ROC_SE_AES_CTR;
+ auth_type = ROC_SE_SHA1_TYPE;
+ digest_len = 12;
+ break;
+ case VNET_CRYPTO_ALG_3DES_CBC_MD5_TAG12:
+ enc_type = ROC_SE_DES3_CBC;
+ auth_type = ROC_SE_MD5_TYPE;
+ digest_len = 12;
+ break;
+ case VNET_CRYPTO_ALG_3DES_CBC_SHA1_TAG12:
+ enc_type = ROC_SE_DES3_CBC;
+ auth_type = ROC_SE_SHA1_TYPE;
+ digest_len = 12;
+ break;
+ case VNET_CRYPTO_ALG_3DES_CBC_SHA224_TAG14:
+ enc_type = ROC_SE_DES3_CBC;
+ auth_type = ROC_SE_SHA2_SHA224;
+ digest_len = 14;
+ break;
+ case VNET_CRYPTO_ALG_3DES_CBC_SHA256_TAG16:
+ enc_type = ROC_SE_DES3_CBC;
+ auth_type = ROC_SE_SHA2_SHA256;
+ digest_len = 16;
+ break;
+ case VNET_CRYPTO_ALG_3DES_CBC_SHA384_TAG24:
+ enc_type = ROC_SE_DES3_CBC;
+ auth_type = ROC_SE_SHA2_SHA384;
+ digest_len = 24;
+ break;
+ case VNET_CRYPTO_ALG_3DES_CBC_SHA512_TAG32:
+ enc_type = ROC_SE_DES3_CBC;
+ auth_type = ROC_SE_SHA2_SHA512;
+ digest_len = 32;
+ break;
+ default:
+ clib_warning (
+ "Cryptodev: Undefined link algo %u specified. Key index %u",
+ key->async_alg, key_index);
+ return -1;
+ }
+
+ if (type == VNET_CRYPTO_OP_TYPE_ENCRYPT)
+ sess->cpt_ctx.ciph_then_auth = true;
+ else
+ sess->cpt_ctx.auth_then_ciph = true;
+
+ sess->iv_length = 16;
+ sess->cpt_op = type;
+
+ crypto_key = vnet_crypto_get_key (key->index_crypto);
+ rv = roc_se_ciph_key_set (&sess->cpt_ctx, enc_type, crypto_key->data,
+ vec_len (crypto_key->data));
+ if (rv)
+ {
+ clib_warning ("Cryptodev: Error in setting cipher key for enc type %u",
+ enc_type);
+ return -1;
+ }
+
+ auth_key = vnet_crypto_get_key (key->index_integ);
+
+ rv = roc_se_auth_key_set (&sess->cpt_ctx, auth_type, auth_key->data,
+ vec_len (auth_key->data), digest_len);
+ if (rv)
+ {
+ clib_warning ("Cryptodev: Error in setting auth key for auth type %u",
+ auth_type);
+ return -1;
+ }
+
+ return 0;
+}
+
+static_always_inline i32
+oct_crypto_aead_session_update (vlib_main_t *vm, oct_crypto_sess_t *sess,
+ u32 key_index, u8 type)
+{
+ vnet_crypto_key_t *key = vnet_crypto_get_key (key_index);
+ roc_se_cipher_type enc_type = 0;
+ roc_se_auth_type auth_type = 0;
+ u32 digest_len = ~0;
+ i32 rv = 0;
+
+ switch (key->async_alg)
+ {
+ case VNET_CRYPTO_ALG_AES_128_GCM:
+ case VNET_CRYPTO_ALG_AES_192_GCM:
+ case VNET_CRYPTO_ALG_AES_256_GCM:
+ enc_type = ROC_SE_AES_GCM;
+ sess->aes_gcm = 1;
+ sess->iv_offset = 0;
+ sess->iv_length = 16;
+ sess->cpt_ctx.mac_len = 16;
+ sess->cpt_op = type;
+ digest_len = 16;
+ break;
+ default:
+ clib_warning (
+ "Cryptodev: Undefined cipher algo %u specified. Key index %u",
+ key->async_alg, key_index);
+ return -1;
+ }
+
+ rv = roc_se_ciph_key_set (&sess->cpt_ctx, enc_type, key->data,
+ vec_len (key->data));
+ if (rv)
+ {
+ clib_warning ("Cryptodev: Error in setting cipher key for enc type %u",
+ enc_type);
+ return -1;
+ }
+
+ rv = roc_se_auth_key_set (&sess->cpt_ctx, auth_type, NULL, 0, digest_len);
+ if (rv)
+ {
+ clib_warning ("Cryptodev: Error in setting auth key for auth type %u",
+ auth_type);
+ return -1;
+ }
+
+ return 0;
+}
+
+static_always_inline i32
+oct_crypto_session_init (vlib_main_t *vm, oct_crypto_sess_t *session,
+ vnet_crypto_key_index_t key_index, int op_type)
+{
+ oct_crypto_main_t *ocm = &oct_crypto_main;
+ vnet_crypto_key_t *key;
+ oct_crypto_dev_t *ocd;
+ i32 rv = 0;
+
+ ocd = ocm->crypto_dev[op_type];
+
+ key = vnet_crypto_get_key (key_index);
+
+ if (key->type == VNET_CRYPTO_KEY_TYPE_LINK)
+ rv = oct_crypto_link_session_update (vm, session, key_index, op_type);
+ else
+ rv = oct_crypto_aead_session_update (vm, session, key_index, op_type);
+
+ if (rv)
+ {
+ oct_crypto_session_free (vm, session);
+ return -1;
+ }
+
+ session->crypto_dev = ocd;
+
+ session->cpt_inst_w7 =
+ oct_cpt_inst_w7_get (session, session->crypto_dev->roc_cpt);
+
+ session->initialised = 1;
+
+ return 0;
+}
+
+static_always_inline void
+oct_crypto_update_frame_error_status (vnet_crypto_async_frame_t *f, u32 index,
+ vnet_crypto_op_status_t s)
+{
+ u32 i;
+
+ for (i = index; i < f->n_elts; i++)
+ f->elts[i].status = s;
+
+ if (index == 0)
+ f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED;
+}
+
+static_always_inline int
+oct_crypto_enqueue_enc_dec (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
+ const u8 is_aead, u8 aad_len, const u8 type)
+{
+ u32 i, enq_tail, enc_auth_len, buffer_index, nb_infl_allowed;
+ struct cpt_inst_s inst[VNET_CRYPTO_FRAME_SIZE];
+ u32 crypto_start_offset, integ_start_offset;
+ oct_crypto_main_t *ocm = &oct_crypto_main;
+ vnet_crypto_async_frame_elt_t *elts;
+ oct_crypto_dev_t *crypto_dev = NULL;
+ oct_crypto_inflight_req_t *infl_req;
+ oct_crypto_pending_queue_t *pend_q;
+ u64 dptr_start_ptr, curr_ptr;
+ oct_crypto_sess_t *sess;
+ u32 crypto_total_length;
+ oct_crypto_key_t *key;
+ vlib_buffer_t *buffer;
+ u16 adj_len;
+ int ret;
+
+ /* GCM packets having 8 bytes of aad and 8 bytes of iv */
+ u8 aad_iv = 8 + 8;
+
+ pend_q = &ocm->pend_q[vlib_get_thread_index ()];
+
+ enq_tail = pend_q->enq_tail;
+
+ nb_infl_allowed = pend_q->n_desc - pend_q->n_crypto_inflight;
+ if (PREDICT_FALSE (nb_infl_allowed == 0))
+ {
+ oct_crypto_update_frame_error_status (
+ frame, 0, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ return -1;
+ }
+
+ infl_req = &pend_q->req_queue[enq_tail];
+ infl_req->frame = frame;
+
+ for (i = 0; i < frame->n_elts; i++)
+ {
+ elts = &frame->elts[i];
+ buffer_index = frame->buffer_indices[i];
+ key = vec_elt_at_index (ocm->keys[type], elts->key_index);
+
+ if (PREDICT_FALSE (!key->sess))
+ {
+ oct_crypto_update_frame_error_status (
+ frame, i, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ return -1;
+ }
+
+ sess = key->sess;
+
+ if (PREDICT_FALSE (!sess->initialised))
+ oct_crypto_session_init (vm, sess, elts->key_index, type);
+
+ crypto_dev = sess->crypto_dev;
+
+ clib_memset (inst + i, 0, sizeof (struct cpt_inst_s));
+
+ buffer = vlib_get_buffer (vm, buffer_index);
+
+ if (is_aead)
+ {
+ dptr_start_ptr =
+ (u64) (buffer->data + (elts->crypto_start_offset - aad_iv));
+ curr_ptr = (u64) (buffer->data + buffer->current_data);
+ adj_len = (u16) (dptr_start_ptr - curr_ptr);
+
+ crypto_total_length = elts->crypto_total_length;
+ crypto_start_offset = aad_iv;
+ integ_start_offset = 0;
+
+ ret = oct_crypto_fill_fc_params (
+ sess, inst + i, is_aead, aad_len, (u8 *) dptr_start_ptr, elts,
+ (oct_crypto_scatter_gather_t *) (infl_req->sg_data) + i,
+ crypto_total_length /* cipher_len */,
+ crypto_start_offset /* cipher_offset */, 0 /* auth_len */,
+ integ_start_offset /* auth_off */, buffer, adj_len);
+ if (PREDICT_FALSE (ret < 0))
+ {
+ oct_crypto_update_frame_error_status (
+ frame, i, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ return -1;
+ }
+ }
+ else
+ {
+ dptr_start_ptr = (u64) (buffer->data + elts->integ_start_offset);
+
+ enc_auth_len = elts->crypto_total_length + elts->integ_length_adj;
+
+ curr_ptr = (u64) (buffer->data + buffer->current_data);
+ adj_len = (u16) (dptr_start_ptr - curr_ptr);
+
+ crypto_total_length = elts->crypto_total_length;
+ crypto_start_offset =
+ elts->crypto_start_offset - elts->integ_start_offset;
+ integ_start_offset = 0;
+
+ ret = oct_crypto_fill_fc_params (
+ sess, inst + i, is_aead, aad_len, (u8 *) dptr_start_ptr, elts,
+ (oct_crypto_scatter_gather_t *) (infl_req->sg_data) + i,
+ crypto_total_length /* cipher_len */,
+ crypto_start_offset /* cipher_offset */,
+ enc_auth_len /* auth_len */, integ_start_offset /* auth_off */,
+ buffer, adj_len);
+ if (PREDICT_FALSE (ret < 0))
+ {
+ oct_crypto_update_frame_error_status (
+ frame, i, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ return -1;
+ }
+ }
+
+ inst[i].w7.u64 = sess->cpt_inst_w7;
+ inst[i].res_addr = (u64) &infl_req->res[i];
+ }
+
+ oct_crypto_burst_submit (crypto_dev, inst, frame->n_elts);
+
+ infl_req->elts = frame->n_elts;
+ OCT_MOD_INC (pend_q->enq_tail, pend_q->n_desc);
+ pend_q->n_crypto_inflight++;
+
+ return 0;
+}
+
+int
+oct_crypto_enqueue_linked_alg_enc (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return oct_crypto_enqueue_enc_dec (
+ vm, frame, 0 /* is_aead */, 0 /* aad_len */, VNET_CRYPTO_OP_TYPE_ENCRYPT);
+}
+
+int
+oct_crypto_enqueue_linked_alg_dec (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return oct_crypto_enqueue_enc_dec (
+ vm, frame, 0 /* is_aead */, 0 /* aad_len */, VNET_CRYPTO_OP_TYPE_DECRYPT);
+}
+
+int
+oct_crypto_enqueue_aead_aad_enc (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame, u8 aad_len)
+{
+ return oct_crypto_enqueue_enc_dec (vm, frame, 1 /* is_aead */, aad_len,
+ VNET_CRYPTO_OP_TYPE_ENCRYPT);
+}
+
+static_always_inline int
+oct_crypto_enqueue_aead_aad_dec (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame, u8 aad_len)
+{
+ return oct_crypto_enqueue_enc_dec (vm, frame, 1 /* is_aead */, aad_len,
+ VNET_CRYPTO_OP_TYPE_DECRYPT);
+}
+
+int
+oct_crypto_enqueue_aead_aad_8_enc (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return oct_crypto_enqueue_aead_aad_enc (vm, frame, 8);
+}
+
+int
+oct_crypto_enqueue_aead_aad_12_enc (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return oct_crypto_enqueue_aead_aad_enc (vm, frame, 12);
+}
+
+int
+oct_crypto_enqueue_aead_aad_8_dec (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return oct_crypto_enqueue_aead_aad_dec (vm, frame, 8);
+}
+
+int
+oct_crypto_enqueue_aead_aad_12_dec (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return oct_crypto_enqueue_aead_aad_dec (vm, frame, 12);
+}
+
+vnet_crypto_async_frame_t *
+oct_crypto_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
+ u32 *enqueue_thread_idx)
+{
+ oct_crypto_main_t *ocm = &oct_crypto_main;
+ u32 deq_head, status = VNET_CRYPTO_OP_STATUS_COMPLETED;
+ vnet_crypto_async_frame_elt_t *fe = NULL;
+ oct_crypto_inflight_req_t *infl_req;
+ oct_crypto_pending_queue_t *pend_q;
+ vnet_crypto_async_frame_t *frame;
+ volatile union cpt_res_s *res;
+ int i;
+
+ pend_q = &ocm->pend_q[vlib_get_thread_index ()];
+
+ if (!pend_q->n_crypto_inflight)
+ return NULL;
+
+ deq_head = pend_q->deq_head;
+ infl_req = &pend_q->req_queue[deq_head];
+ frame = infl_req->frame;
+
+ fe = frame->elts;
+
+ for (i = infl_req->deq_elts; i < infl_req->elts; ++i)
+ {
+ res = &infl_req->res[i];
+
+ if (PREDICT_FALSE (res->cn10k.compcode == CPT_COMP_NOT_DONE))
+ return NULL;
+
+ if (PREDICT_FALSE (res->cn10k.uc_compcode))
+ {
+ if (res->cn10k.uc_compcode == ROC_SE_ERR_GC_ICV_MISCOMPARE)
+ status = fe[i].status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
+ else
+ status = fe[i].status = VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR;
+ }
+
+ infl_req->deq_elts++;
+ }
+
+ clib_memset ((void *) infl_req->res, 0,
+ sizeof (union cpt_res_s) * VNET_CRYPTO_FRAME_SIZE);
+
+ OCT_MOD_INC (pend_q->deq_head, pend_q->n_desc);
+ pend_q->n_crypto_inflight--;
+
+ frame->state = status == VNET_CRYPTO_OP_STATUS_COMPLETED ?
+ VNET_CRYPTO_FRAME_STATE_SUCCESS :
+ VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
+
+ *nb_elts_processed = frame->n_elts;
+ *enqueue_thread_idx = frame->enqueue_thread_index;
+
+ infl_req->deq_elts = 0;
+ infl_req->elts = 0;
+
+ return frame;
+}
+
+int
+oct_init_crypto_engine_handlers (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ u32 engine_index;
+
+ engine_index = vnet_crypto_register_engine (vm, "oct_cryptodev", 100,
+ "OCT Cryptodev Engine");
+
+#define _(n, k, t, a) \
+ vnet_crypto_register_enqueue_handler ( \
+ vm, engine_index, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC, \
+ oct_crypto_enqueue_aead_aad_##a##_enc); \
+ vnet_crypto_register_enqueue_handler ( \
+ vm, engine_index, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC, \
+ oct_crypto_enqueue_aead_aad_##a##_dec);
+ foreach_oct_crypto_aead_async_alg
+#undef _
+
+#define _(c, h, k, d) \
+ vnet_crypto_register_enqueue_handler ( \
+ vm, engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC, \
+ oct_crypto_enqueue_linked_alg_enc); \
+ vnet_crypto_register_enqueue_handler ( \
+ vm, engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC, \
+ oct_crypto_enqueue_linked_alg_dec);
+ foreach_oct_crypto_link_async_alg;
+#undef _
+
+ vnet_crypto_register_dequeue_handler (vm, engine_index,
+ oct_crypto_frame_dequeue);
+
+ vnet_crypto_register_key_handler (vm, engine_index, oct_crypto_key_handler);
+
+ return 0;
+}
+
+int
+oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ oct_crypto_main_t *ocm = &oct_crypto_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ extern oct_plt_init_param_t oct_plt_init_param;
+ oct_crypto_inflight_req_t *infl_req_queue;
+ u32 n_inflight_req;
+ int i, j = 0;
+
+ ocm->pend_q = oct_plt_init_param.oct_plt_zmalloc (
+ tm->n_vlib_mains * sizeof (oct_crypto_pending_queue_t),
+ CLIB_CACHE_LINE_BYTES);
+ if (ocm->pend_q == NULL)
+ {
+ log_err (dev, "Failed to allocate memory for crypto pending queue");
+ return -1;
+ }
+
+ /*
+ * Each pending queue will get number of cpt desc / number of cores.
+ * And that desc count is shared across inflight entries.
+ */
+ n_inflight_req =
+ (OCT_CPT_LF_MAX_NB_DESC / tm->n_vlib_mains) / VNET_CRYPTO_FRAME_SIZE;
+
+ for (i = 0; i < tm->n_vlib_mains; ++i)
+ {
+ ocm->pend_q[i].n_desc = n_inflight_req;
+
+ ocm->pend_q[i].req_queue = oct_plt_init_param.oct_plt_zmalloc (
+ ocm->pend_q[i].n_desc * sizeof (oct_crypto_inflight_req_t),
+ CLIB_CACHE_LINE_BYTES);
+ if (ocm->pend_q[i].req_queue == NULL)
+ {
+ log_err (dev,
+ "Failed to allocate memory for crypto inflight request");
+ goto free;
+ }
+
+ for (j = 0; j <= ocm->pend_q[i].n_desc; ++j)
+ {
+ infl_req_queue = &ocm->pend_q[i].req_queue[j];
+
+ infl_req_queue->sg_data = oct_plt_init_param.oct_plt_zmalloc (
+ OCT_SCATTER_GATHER_BUFFER_SIZE * VNET_CRYPTO_FRAME_SIZE,
+ CLIB_CACHE_LINE_BYTES);
+ if (infl_req_queue->sg_data == NULL)
+ {
+ log_err (dev, "Failed to allocate crypto scatter gather memory");
+ goto free;
+ }
+ }
+ }
+ return 0;
+free:
+ for (; i >= 0; i--)
+ {
+ if (ocm->pend_q[i].req_queue == NULL)
+ continue;
+ for (; j >= 0; j--)
+ {
+ infl_req_queue = &ocm->pend_q[i].req_queue[j];
+
+ if (infl_req_queue->sg_data == NULL)
+ continue;
+
+ oct_plt_init_param.oct_plt_free (infl_req_queue->sg_data);
+ }
+ oct_plt_init_param.oct_plt_free (ocm->pend_q[i].req_queue);
+ }
+ oct_plt_init_param.oct_plt_free (ocm->pend_q);
+
+ return -1;
+}
diff --git a/src/plugins/dev_octeon/crypto.h b/src/plugins/dev_octeon/crypto.h
new file mode 100644
index 00000000000..8d17980a55f
--- /dev/null
+++ b/src/plugins/dev_octeon/crypto.h
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2024 Marvell.
+ * SPDX-License-Identifier: Apache-2.0
+ * https://spdx.org/licenses/Apache-2.0.html
+ */
+
+#ifndef _CRYPTO_H_
+#define _CRYPTO_H_
+#include <vnet/crypto/crypto.h>
+#include <vnet/ip/ip.h>
+
+#define OCT_MAX_N_CPT_DEV 2
+
+#define OCT_CPT_LF_MAX_NB_DESC 128000
+
+/* CRYPTO_ID, KEY_LENGTH_IN_BYTES, TAG_LEN, AAD_LEN */
+#define foreach_oct_crypto_aead_async_alg \
+ _ (AES_128_GCM, 16, 16, 8) \
+ _ (AES_128_GCM, 16, 16, 12) \
+ _ (AES_192_GCM, 24, 16, 8) \
+ _ (AES_192_GCM, 24, 16, 12) \
+ _ (AES_256_GCM, 32, 16, 8) \
+ _ (AES_256_GCM, 32, 16, 12)
+
+/* CRYPTO_ID, INTEG_ID, KEY_LENGTH_IN_BYTES, DIGEST_LEN */
+#define foreach_oct_crypto_link_async_alg \
+ _ (AES_128_CBC, SHA1, 16, 12) \
+ _ (AES_192_CBC, SHA1, 24, 12) \
+ _ (AES_256_CBC, SHA1, 32, 12) \
+ _ (AES_128_CBC, SHA256, 16, 16) \
+ _ (AES_192_CBC, SHA256, 24, 16) \
+ _ (AES_256_CBC, SHA256, 32, 16) \
+ _ (AES_128_CBC, SHA384, 16, 24) \
+ _ (AES_192_CBC, SHA384, 24, 24) \
+ _ (AES_256_CBC, SHA384, 32, 24) \
+ _ (AES_128_CBC, SHA512, 16, 32) \
+ _ (AES_192_CBC, SHA512, 24, 32) \
+ _ (AES_256_CBC, SHA512, 32, 32) \
+ _ (3DES_CBC, MD5, 24, 12) \
+ _ (3DES_CBC, SHA1, 24, 12) \
+ _ (3DES_CBC, SHA256, 24, 16) \
+ _ (3DES_CBC, SHA384, 24, 24) \
+ _ (3DES_CBC, SHA512, 24, 32) \
+ _ (AES_128_CTR, SHA1, 16, 12) \
+ _ (AES_192_CTR, SHA1, 24, 12) \
+ _ (AES_256_CTR, SHA1, 32, 12)
+
+#define OCT_MOD_INC(i, l) ((i) == (l - 1) ? (i) = 0 : (i)++)
+
+#define OCT_SCATTER_GATHER_BUFFER_SIZE 1024
+
+#define CPT_LMT_SIZE_COPY (sizeof (struct cpt_inst_s) / 16)
+#define OCT_MAX_LMT_SZ 16
+
+#define SRC_IOV_SIZE \
+ (sizeof (struct roc_se_iov_ptr) + \
+ (sizeof (struct roc_se_buf_ptr) * ROC_MAX_SG_CNT))
+
+#define OCT_CPT_LMT_GET_LINE_ADDR(lmt_addr, lmt_num) \
+ (void *) ((u64) (lmt_addr) + ((u64) (lmt_num) << ROC_LMT_LINE_SIZE_LOG2))
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ struct roc_cpt *roc_cpt;
+ struct roc_cpt_lmtline lmtline;
+ struct roc_cpt_lf lf;
+ vnet_dev_t *dev;
+} oct_crypto_dev_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ /** CPT opcode */
+ u16 cpt_op : 4;
+ /** Flag for AES GCM */
+ u16 aes_gcm : 1;
+ /** IV length in bytes */
+ u8 iv_length;
+ /** Auth IV length in bytes */
+ u8 auth_iv_length;
+ /** IV offset in bytes */
+ u16 iv_offset;
+ /** Auth IV offset in bytes */
+ u16 auth_iv_offset;
+ /** CPT inst word 7 */
+ u64 cpt_inst_w7;
+ /* initialise as part of first packet */
+ u8 initialised;
+ /* store link key index in case of linked algo */
+ vnet_crypto_key_index_t key_index;
+ oct_crypto_dev_t *crypto_dev;
+ struct roc_se_ctx cpt_ctx;
+} oct_crypto_sess_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ oct_crypto_sess_t *sess;
+ oct_crypto_dev_t *crypto_dev;
+} oct_crypto_key_t;
+
+typedef struct oct_crypto_scatter_gather
+{
+ u8 buf[OCT_SCATTER_GATHER_BUFFER_SIZE];
+} oct_crypto_scatter_gather_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ /** Result data of all entries in the frame */
+ volatile union cpt_res_s res[VNET_CRYPTO_FRAME_SIZE];
+ /** Scatter gather data */
+ void *sg_data;
+ /** Frame pointer */
+ vnet_crypto_async_frame_t *frame;
+ /** Number of async elements in frame */
+ u16 elts;
+ /** Next read entry in frame, when dequeue */
+ u16 deq_elts;
+} oct_crypto_inflight_req_t;
+
+typedef struct
+{
+ /** Array of pending request */
+ oct_crypto_inflight_req_t *req_queue;
+ /** Number of inflight operations in queue */
+ u32 n_crypto_inflight;
+ /** Tail of queue to be used for enqueue */
+ u16 enq_tail;
+ /** Head of queue to be used for dequeue */
+ u16 deq_head;
+ /** Number of descriptors */
+ u16 n_desc;
+} oct_crypto_pending_queue_t;
+
+typedef struct
+{
+ oct_crypto_dev_t *crypto_dev[OCT_MAX_N_CPT_DEV];
+ oct_crypto_key_t *keys[VNET_CRYPTO_ASYNC_OP_N_TYPES];
+ oct_crypto_pending_queue_t *pend_q;
+ int n_cpt;
+ u8 started;
+} oct_crypto_main_t;
+
+extern oct_crypto_main_t oct_crypto_main;
+
+void oct_crypto_key_del_handler (vlib_main_t *vm,
+ vnet_crypto_key_index_t key_index);
+
+void oct_crypto_key_add_handler (vlib_main_t *vm,
+ vnet_crypto_key_index_t key_index);
+
+void oct_crypto_key_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop,
+ vnet_crypto_key_index_t idx);
+
+int oct_crypto_enqueue_linked_alg_enc (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame);
+int oct_crypto_enqueue_linked_alg_dec (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame);
+int oct_crypto_enqueue_aead_aad_8_enc (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame);
+int oct_crypto_enqueue_aead_aad_12_enc (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame);
+int oct_crypto_enqueue_aead_aad_8_dec (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame);
+int oct_crypto_enqueue_aead_aad_12_dec (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame);
+vnet_crypto_async_frame_t *oct_crypto_frame_dequeue (vlib_main_t *vm,
+ u32 *nb_elts_processed,
+ u32 *enqueue_thread_idx);
+int oct_init_crypto_engine_handlers (vlib_main_t *vm, vnet_dev_t *dev);
+int oct_conf_sw_queue (vlib_main_t *vm, vnet_dev_t *dev);
+#endif /* _CRYPTO_H_ */
diff --git a/src/plugins/dev_octeon/flow.c b/src/plugins/dev_octeon/flow.c
index 1c367a036ab..5bef25f5369 100644
--- a/src/plugins/dev_octeon/flow.c
+++ b/src/plugins/dev_octeon/flow.c
@@ -46,6 +46,8 @@ VLIB_REGISTER_LOG_CLASS (oct_log, static) = {
(f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
(f->type == VNET_FLOW_TYPE_IP4_GTPU))
+#define FLOW_IS_GENERIC_TYPE(f) (f->type == VNET_FLOW_TYPE_GENERIC)
+
#define OCT_FLOW_UNSUPPORTED_ACTIONS(f) \
((f->actions == VNET_FLOW_ACTION_BUFFER_ADVANCE) || \
(f->actions == VNET_FLOW_ACTION_REDIRECT_TO_NODE))
@@ -71,6 +73,9 @@ VLIB_REGISTER_LOG_CLASS (oct_log, static) = {
_ (62, FLOW_KEY_TYPE_L3_DST, "l3-dst-only") \
_ (63, FLOW_KEY_TYPE_L3_SRC, "l3-src-only")
+#define GTPU_PORT 2152
+#define VXLAN_PORT 4789
+
typedef struct
{
u16 src_port;
@@ -87,6 +92,27 @@ typedef struct
u32 teid;
} gtpu_header_t;
+typedef struct
+{
+ u8 layer;
+ u16 nxt_proto;
+ vnet_dev_port_t *port;
+ struct roc_npc_item_info *items;
+ struct
+ {
+ u8 *spec;
+ u8 *mask;
+ u16 off;
+ } oct_drv;
+ struct
+ {
+ u8 *spec;
+ u8 *mask;
+ u16 off;
+ u16 len;
+ } generic;
+} oct_flow_parse_state;
+
static void
oct_flow_convert_rss_types (u64 *key, u64 rss_types)
{
@@ -163,6 +189,14 @@ oct_flow_rule_create (vnet_dev_port_t *port, struct roc_npc_action *actions,
npc = &oct_port->npc;
+ for (int i = 0; item_info[i].type != ROC_NPC_ITEM_TYPE_END; i++)
+ {
+ log_debug (port->dev, "Flow[%d] Item[%d] type %d spec 0x%U mask 0x%U",
+ flow->index, i, item_info[i].type, format_hex_bytes,
+ item_info[i].spec, item_info[i].size, format_hex_bytes,
+ item_info[i].mask, item_info[i].size);
+ }
+
npc_flow =
roc_npc_flow_create (npc, &attr, item_info, actions, npc->pf_func, &rv);
if (rv)
@@ -183,6 +217,320 @@ oct_flow_rule_create (vnet_dev_port_t *port, struct roc_npc_action *actions,
return VNET_DEV_OK;
}
+static int
+oct_parse_l2 (oct_flow_parse_state *pst)
+{
+ struct roc_npc_flow_item_eth *eth_spec =
+ (struct roc_npc_flow_item_eth *) &pst->oct_drv.spec[pst->oct_drv.off];
+ struct roc_npc_flow_item_eth *eth_mask =
+ (struct roc_npc_flow_item_eth *) &pst->oct_drv.mask[pst->oct_drv.off];
+ ethernet_header_t *eth_hdr_mask =
+ (ethernet_header_t *) &pst->generic.mask[pst->generic.off];
+ ethernet_header_t *eth_hdr =
+ (ethernet_header_t *) &pst->generic.spec[pst->generic.off];
+ u16 tpid, etype;
+
+ tpid = etype = clib_net_to_host_u16 (eth_hdr->type);
+ clib_memcpy_fast (eth_spec, eth_hdr, sizeof (ethernet_header_t));
+ clib_memcpy_fast (eth_mask, eth_hdr_mask, sizeof (ethernet_header_t));
+ eth_spec->has_vlan = 0;
+
+ pst->items[pst->layer].spec = (void *) eth_spec;
+ pst->items[pst->layer].mask = (void *) eth_mask;
+ pst->items[pst->layer].size = sizeof (ethernet_header_t);
+ pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_ETH;
+ pst->generic.off += sizeof (ethernet_header_t);
+ pst->oct_drv.off += sizeof (struct roc_npc_flow_item_eth);
+ pst->layer++;
+
+ /* Parse VLAN Tags if any */
+ struct roc_npc_flow_item_vlan *vlan_spec =
+ (struct roc_npc_flow_item_vlan *) &pst->oct_drv.spec[pst->oct_drv.off];
+ struct roc_npc_flow_item_vlan *vlan_mask =
+ (struct roc_npc_flow_item_vlan *) &pst->oct_drv.mask[pst->oct_drv.off];
+ ethernet_vlan_header_t *vlan_hdr, *vlan_hdr_mask;
+ u8 vlan_cnt = 0;
+
+ while (tpid == ETHERNET_TYPE_DOT1AD || tpid == ETHERNET_TYPE_VLAN)
+ {
+ if (pst->generic.off >= pst->generic.len)
+ break;
+
+ vlan_hdr =
+ (ethernet_vlan_header_t *) &pst->generic.spec[pst->generic.off];
+ vlan_hdr_mask =
+ (ethernet_vlan_header_t *) &pst->generic.mask[pst->generic.off];
+ tpid = etype = clib_net_to_host_u16 (vlan_hdr->type);
+ clib_memcpy (&vlan_spec[vlan_cnt], vlan_hdr,
+ sizeof (ethernet_vlan_header_t));
+ clib_memcpy (&vlan_mask[vlan_cnt], vlan_hdr_mask,
+ sizeof (ethernet_vlan_header_t));
+ pst->items[pst->layer].spec = (void *) &vlan_spec[vlan_cnt];
+ pst->items[pst->layer].mask = (void *) &vlan_mask[vlan_cnt];
+ pst->items[pst->layer].size = sizeof (ethernet_vlan_header_t);
+ pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_VLAN;
+ pst->generic.off += sizeof (ethernet_vlan_header_t);
+ pst->oct_drv.off += sizeof (struct roc_npc_flow_item_vlan);
+ pst->layer++;
+ vlan_cnt++;
+ }
+
+ /* Inner most vlan tag */
+ if (vlan_cnt)
+ vlan_spec[vlan_cnt - 1].has_more_vlan = 0;
+
+ pst->nxt_proto = etype;
+ return 0;
+}
+
+static int
+oct_parse_l3 (oct_flow_parse_state *pst)
+{
+
+ if (pst->generic.off >= pst->generic.len || pst->nxt_proto == 0)
+ return 0;
+
+ if (pst->nxt_proto == ETHERNET_TYPE_MPLS)
+ {
+ int label_stack_bottom = 0;
+ do
+ {
+
+ u8 *mpls_spec = &pst->generic.spec[pst->generic.off];
+ u8 *mpls_mask = &pst->generic.mask[pst->generic.off];
+
+ label_stack_bottom = mpls_spec[2] & 1;
+ pst->items[pst->layer].spec = (void *) mpls_spec;
+ pst->items[pst->layer].mask = (void *) mpls_mask;
+ pst->items[pst->layer].size = sizeof (u32);
+ pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_MPLS;
+ pst->generic.off += sizeof (u32);
+ pst->layer++;
+ }
+ while (label_stack_bottom);
+
+ pst->nxt_proto = 0;
+ return 0;
+ }
+ else if (pst->nxt_proto == ETHERNET_TYPE_IP4)
+ {
+ ip4_header_t *ip4_spec =
+ (ip4_header_t *) &pst->generic.spec[pst->generic.off];
+ ip4_header_t *ip4_mask =
+ (ip4_header_t *) &pst->generic.mask[pst->generic.off];
+ pst->items[pst->layer].spec = (void *) ip4_spec;
+ pst->items[pst->layer].mask = (void *) ip4_mask;
+ pst->items[pst->layer].size = sizeof (ip4_header_t);
+ pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV4;
+ pst->generic.off += sizeof (ip4_header_t);
+ pst->layer++;
+ pst->nxt_proto = ip4_spec->protocol;
+ }
+ else if (pst->nxt_proto == ETHERNET_TYPE_IP6)
+ {
+ struct roc_npc_flow_item_ipv6 *ip6_spec =
+ (struct roc_npc_flow_item_ipv6 *) &pst->oct_drv.spec[pst->oct_drv.off];
+ struct roc_npc_flow_item_ipv6 *ip6_mask =
+ (struct roc_npc_flow_item_ipv6 *) &pst->oct_drv.mask[pst->oct_drv.off];
+ ip6_header_t *ip6_hdr_mask =
+ (ip6_header_t *) &pst->generic.mask[pst->generic.off];
+ ip6_header_t *ip6_hdr =
+ (ip6_header_t *) &pst->generic.spec[pst->generic.off];
+ u8 nxt_hdr = ip6_hdr->protocol;
+
+ clib_memcpy (ip6_spec, ip6_hdr, sizeof (ip6_header_t));
+ clib_memcpy (ip6_mask, ip6_hdr_mask, sizeof (ip6_header_t));
+ pst->items[pst->layer].spec = (void *) ip6_spec;
+ pst->items[pst->layer].mask = (void *) ip6_mask;
+ pst->items[pst->layer].size = sizeof (ip6_header_t);
+ pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV6;
+ pst->generic.off += sizeof (ip6_header_t);
+ pst->oct_drv.off += sizeof (struct roc_npc_flow_item_ipv6);
+ pst->layer++;
+
+ while (nxt_hdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS ||
+ nxt_hdr == IP_PROTOCOL_IP6_DESTINATION_OPTIONS ||
+ nxt_hdr == IP_PROTOCOL_IPV6_ROUTE)
+ {
+ if (pst->generic.off >= pst->generic.len)
+ return 0;
+
+ ip6_ext_header_t *ip6_ext_spec =
+ (ip6_ext_header_t *) &pst->generic.spec[pst->generic.off];
+ ip6_ext_header_t *ip6_ext_mask =
+ (ip6_ext_header_t *) &pst->generic.mask[pst->generic.off];
+ nxt_hdr = ip6_ext_spec->next_hdr;
+
+ pst->items[pst->layer].spec = (void *) ip6_ext_spec;
+ pst->items[pst->layer].mask = (void *) ip6_ext_mask;
+ pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV6_EXT;
+ pst->generic.off += ip6_ext_header_len (ip6_ext_spec);
+ pst->layer++;
+ }
+
+ if (pst->generic.off >= pst->generic.len)
+ return 0;
+
+ if (nxt_hdr == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ ip6_frag_hdr_t *ip6_ext_frag_spec =
+ (ip6_frag_hdr_t *) &pst->generic.spec[pst->generic.off];
+ ip6_frag_hdr_t *ip6_ext_frag_mask =
+ (ip6_frag_hdr_t *) &pst->generic.mask[pst->generic.off];
+
+ pst->items[pst->layer].spec = (void *) ip6_ext_frag_spec;
+ pst->items[pst->layer].mask = (void *) ip6_ext_frag_mask;
+ pst->items[pst->layer].size = sizeof (ip6_frag_hdr_t);
+ pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_IPV6_FRAG_EXT;
+ pst->generic.off += sizeof (ip6_frag_hdr_t);
+ pst->layer++;
+ }
+
+ pst->nxt_proto = nxt_hdr;
+ }
+ /* Unsupported L3. */
+ else
+ return -1;
+
+ return 0;
+}
+
+static int
+oct_parse_l4 (oct_flow_parse_state *pst)
+{
+
+ if (pst->generic.off >= pst->generic.len || pst->nxt_proto == 0)
+ return 0;
+
+#define _(protocol_t, protocol_value, ltype) \
+ if (pst->nxt_proto == protocol_value) \
+ \
+ { \
+ \
+ protocol_t *spec = (protocol_t *) &pst->generic.spec[pst->generic.off]; \
+ protocol_t *mask = (protocol_t *) &pst->generic.mask[pst->generic.off]; \
+ pst->items[pst->layer].spec = spec; \
+ pst->items[pst->layer].mask = mask; \
+ \
+ pst->items[pst->layer].size = sizeof (protocol_t); \
+ \
+ pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_##ltype; \
+ pst->generic.off += sizeof (protocol_t); \
+ pst->layer++; \
+ return 0; \
+ }
+
+ _ (esp_header_t, IP_PROTOCOL_IPSEC_ESP, ESP)
+ _ (udp_header_t, IP_PROTOCOL_UDP, UDP)
+ _ (tcp_header_t, IP_PROTOCOL_TCP, TCP)
+ _ (sctp_header_t, IP_PROTOCOL_SCTP, SCTP)
+ _ (icmp46_header_t, IP_PROTOCOL_ICMP, ICMP)
+ _ (icmp46_header_t, IP_PROTOCOL_ICMP6, ICMP)
+ _ (igmp_header_t, IP_PROTOCOL_IGMP, IGMP)
+ _ (gre_header_t, IP_PROTOCOL_GRE, GRE)
+
+ /* Unsupported L4. */
+ return -1;
+}
+
+static int
+oct_parse_tunnel (oct_flow_parse_state *pst)
+{
+ if (pst->generic.off >= pst->generic.len)
+ return 0;
+
+ if (pst->items[pst->layer - 1].type == ROC_NPC_ITEM_TYPE_GRE)
+ {
+ gre_header_t *gre_hdr = (gre_header_t *) pst->items[pst->layer - 1].spec;
+ pst->nxt_proto = clib_net_to_host_u16 (gre_hdr->protocol);
+ goto parse_l3;
+ }
+
+ else if (pst->items[pst->layer - 1].type == ROC_NPC_ITEM_TYPE_UDP)
+ {
+ udp_header_t *udp_h = (udp_header_t *) pst->items[pst->layer - 1].spec;
+ u16 dport = clib_net_to_host_u16 (udp_h->dst_port);
+
+ if (dport == GTPU_PORT)
+ {
+ gtpu_header_t *gtpu_spec =
+ (gtpu_header_t *) &pst->generic.spec[pst->generic.off];
+ gtpu_header_t *gtpu_mask =
+ (gtpu_header_t *) &pst->generic.mask[pst->generic.off];
+ pst->items[pst->layer].spec = (void *) gtpu_spec;
+ pst->items[pst->layer].mask = (void *) gtpu_mask;
+ pst->items[pst->layer].size = sizeof (gtpu_header_t);
+ pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_GTPU;
+ pst->generic.off += sizeof (gtpu_header_t);
+ pst->layer++;
+ pst->nxt_proto = 0;
+ return 0;
+ }
+ else if (dport == VXLAN_PORT)
+ {
+ vxlan_header_t *vxlan_spec =
+ (vxlan_header_t *) &pst->generic.spec[pst->generic.off];
+ vxlan_header_t *vxlan_mask =
+ (vxlan_header_t *) &pst->generic.spec[pst->generic.off];
+ pst->items[pst->layer].spec = (void *) vxlan_spec;
+ pst->items[pst->layer].mask = (void *) vxlan_mask;
+ pst->items[pst->layer].size = sizeof (vxlan_header_t);
+ pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_VXLAN;
+ pst->generic.off += sizeof (vxlan_header_t);
+ pst->layer++;
+ pst->nxt_proto = 0;
+ goto parse_l2;
+ }
+ }
+ /* No supported Tunnel detected. */
+ else
+ {
+ log_err (pst->port->dev,
+ "Partially parsed till offset %u, not able to parse further",
+ pst->generic.off);
+ return 0;
+ }
+parse_l2:
+ if (oct_parse_l2 (pst))
+ return -1;
+parse_l3:
+ if (oct_parse_l3 (pst))
+ return -1;
+
+ return oct_parse_l4 (pst);
+}
+
+static vnet_dev_rv_t
+oct_flow_generic_pattern_parse (oct_flow_parse_state *pst)
+{
+
+ if (oct_parse_l2 (pst))
+ goto err;
+
+ if (oct_parse_l3 (pst))
+ goto err;
+
+ if (oct_parse_l4 (pst))
+ goto err;
+
+ if (oct_parse_tunnel (pst))
+ goto err;
+
+ if (pst->generic.off < pst->generic.len)
+ {
+ log_err (pst->port->dev,
+ "Partially parsed till offset %u, not able to parse further",
+ pst->generic.off);
+ goto err;
+ }
+
+ pst->items[pst->layer].type = ROC_NPC_ITEM_TYPE_END;
+ return VNET_DEV_OK;
+
+err:
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+}
+
static vnet_dev_rv_t
oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow,
uword *private_data)
@@ -190,12 +538,22 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow,
struct roc_npc_item_info item_info[ROC_NPC_ITEM_TYPE_END] = {};
struct roc_npc_action actions[ROC_NPC_ITEM_TYPE_END] = {};
oct_port_t *oct_port = vnet_dev_get_port_data (port);
+ ethernet_header_t eth_spec = {}, eth_mask = {};
+ sctp_header_t sctp_spec = {}, sctp_mask = {};
+ gtpu_header_t gtpu_spec = {}, gtpu_mask = {};
+ ip4_header_t ip4_spec = {}, ip4_mask = {};
+ ip6_header_t ip6_spec = {}, ip6_mask = {};
+ udp_header_t udp_spec = {}, udp_mask = {};
+ tcp_header_t tcp_spec = {}, tcp_mask = {};
+ esp_header_t esp_spec = {}, esp_mask = {};
u16 l4_src_port = 0, l4_dst_port = 0;
u16 l4_src_mask = 0, l4_dst_mask = 0;
struct roc_npc_action_rss rss_conf = {};
struct roc_npc_action_queue conf = {};
struct roc_npc_action_mark mark = {};
struct roc_npc *npc = &oct_port->npc;
+ u8 *flow_spec = 0, *flow_mask = 0;
+ u8 *drv_spec = 0, *drv_mask = 0;
vnet_dev_rv_t rv = VNET_DEV_OK;
int layer = 0, index = 0;
u16 *queues = NULL;
@@ -203,11 +561,52 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow,
u8 proto = 0;
u16 action = 0;
+ if (FLOW_IS_GENERIC_TYPE (flow))
+ {
+ unformat_input_t input;
+ int rc;
+
+ unformat_init_string (
+ &input, (const char *) flow->generic.pattern.spec,
+ strlen ((const char *) flow->generic.pattern.spec));
+ unformat_user (&input, unformat_hex_string, &flow_spec);
+ unformat_free (&input);
+
+ unformat_init_string (
+ &input, (const char *) flow->generic.pattern.mask,
+ strlen ((const char *) flow->generic.pattern.mask));
+ unformat_user (&input, unformat_hex_string, &flow_mask);
+ unformat_free (&input);
+
+ vec_validate (drv_spec, 1024);
+ vec_validate (drv_mask, 1024);
+ oct_flow_parse_state pst = {
+ .nxt_proto = 0,
+ .port = port,
+ .items = item_info,
+ .oct_drv = { .spec = drv_spec, .mask = drv_mask },
+ .generic = { .spec = flow_spec,
+ .mask = flow_mask,
+ .len = vec_len (flow_spec) },
+ };
+
+ rc = oct_flow_generic_pattern_parse (&pst);
+ if (rc)
+ {
+ vec_free (flow_spec);
+ vec_free (flow_mask);
+ vec_free (drv_spec);
+ vec_free (drv_mask);
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+
+ goto parse_flow_actions;
+ }
+
if (FLOW_IS_ETHERNET_CLASS (flow))
{
- ethernet_header_t eth_spec = { .type = clib_host_to_net_u16 (
- flow->ethernet.eth_hdr.type) },
- eth_mask = { .type = 0xFFFF };
+ eth_spec.type = clib_host_to_net_u16 (flow->ethernet.eth_hdr.type);
+ eth_mask.type = 0xFFFF;
item_info[layer].spec = (void *) &eth_spec;
item_info[layer].mask = (void *) &eth_mask;
@@ -220,10 +619,11 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow,
{
vnet_flow_ip4_t *ip4_hdr = &flow->ip4;
proto = ip4_hdr->protocol.prot;
- ip4_header_t ip4_spec = { .src_address = ip4_hdr->src_addr.addr,
- .dst_address = ip4_hdr->dst_addr.addr },
- ip4_mask = { .src_address = ip4_hdr->src_addr.mask,
- .dst_address = ip4_hdr->dst_addr.mask };
+
+ ip4_spec.src_address = ip4_hdr->src_addr.addr;
+ ip4_spec.dst_address = ip4_hdr->dst_addr.addr;
+ ip4_mask.src_address = ip4_hdr->src_addr.mask;
+ ip4_mask.dst_address = ip4_hdr->dst_addr.mask;
item_info[layer].spec = (void *) &ip4_spec;
item_info[layer].mask = (void *) &ip4_mask;
@@ -245,10 +645,11 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow,
{
vnet_flow_ip6_t *ip6_hdr = &flow->ip6;
proto = ip6_hdr->protocol.prot;
- ip6_header_t ip6_spec = { .src_address = ip6_hdr->src_addr.addr,
- .dst_address = ip6_hdr->dst_addr.addr },
- ip6_mask = { .src_address = ip6_hdr->src_addr.mask,
- .dst_address = ip6_hdr->dst_addr.mask };
+
+ ip6_spec.src_address = ip6_hdr->src_addr.addr;
+ ip6_spec.dst_address = ip6_hdr->dst_addr.addr;
+ ip6_mask.src_address = ip6_hdr->src_addr.mask;
+ ip6_mask.dst_address = ip6_hdr->dst_addr.mask;
item_info[layer].spec = (void *) &ip6_spec;
item_info[layer].mask = (void *) &ip6_mask;
@@ -273,16 +674,15 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow,
switch (proto)
{
case IP_PROTOCOL_UDP:
- item_info[layer].type = ROC_NPC_ITEM_TYPE_UDP;
-
- udp_header_t udp_spec = { .src_port = l4_src_port,
- .dst_port = l4_dst_port },
- udp_mask = { .src_port = l4_src_mask,
- .dst_port = l4_dst_mask };
+ udp_spec.src_port = l4_src_port;
+ udp_spec.dst_port = l4_dst_port;
+ udp_mask.src_port = l4_src_mask;
+ udp_mask.dst_port = l4_dst_mask;
item_info[layer].spec = (void *) &udp_spec;
item_info[layer].mask = (void *) &udp_mask;
item_info[layer].size = sizeof (udp_header_t);
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_UDP;
layer++;
if (FLOW_IS_L4_TUNNEL_TYPE (flow))
@@ -290,14 +690,13 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow,
switch (flow->type)
{
case VNET_FLOW_TYPE_IP4_GTPU:
- item_info[layer].type = ROC_NPC_ITEM_TYPE_GTPU;
- gtpu_header_t gtpu_spec = { .teid = clib_host_to_net_u32 (
- flow->ip4_gtpu.teid) },
- gtpu_mask = { .teid = 0XFFFFFFFF };
+ gtpu_spec.teid = clib_host_to_net_u32 (flow->ip4_gtpu.teid);
+ gtpu_mask.teid = 0XFFFFFFFF;
item_info[layer].spec = (void *) &gtpu_spec;
item_info[layer].mask = (void *) &gtpu_mask;
item_info[layer].size = sizeof (gtpu_header_t);
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_GTPU;
layer++;
break;
@@ -309,42 +708,39 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow,
break;
case IP_PROTOCOL_TCP:
- item_info[layer].type = ROC_NPC_ITEM_TYPE_TCP;
-
- tcp_header_t tcp_spec = { .src_port = l4_src_port,
- .dst_port = l4_dst_port },
- tcp_mask = { .src_port = l4_src_mask,
- .dst_port = l4_dst_mask };
+ tcp_spec.src_port = l4_src_port;
+ tcp_spec.dst_port = l4_dst_port;
+ tcp_mask.src_port = l4_src_mask;
+ tcp_mask.dst_port = l4_dst_mask;
item_info[layer].spec = (void *) &tcp_spec;
item_info[layer].mask = (void *) &tcp_mask;
item_info[layer].size = sizeof (tcp_header_t);
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_TCP;
layer++;
break;
case IP_PROTOCOL_SCTP:
- item_info[layer].type = ROC_NPC_ITEM_TYPE_SCTP;
-
- sctp_header_t sctp_spec = { .src_port = l4_src_port,
- .dst_port = l4_dst_port },
- sctp_mask = { .src_port = l4_src_mask,
- .dst_port = l4_dst_mask };
+ sctp_spec.src_port = l4_src_port;
+ sctp_spec.dst_port = l4_dst_port;
+ sctp_mask.src_port = l4_src_mask;
+ sctp_mask.dst_port = l4_dst_mask;
item_info[layer].spec = (void *) &sctp_spec;
item_info[layer].mask = (void *) &sctp_mask;
item_info[layer].size = sizeof (sctp_header_t);
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_SCTP;
layer++;
break;
case IP_PROTOCOL_IPSEC_ESP:
- item_info[layer].type = ROC_NPC_ITEM_TYPE_ESP;
- esp_header_t esp_spec = { .spi = clib_host_to_net_u32 (
- flow->ip4_ipsec_esp.spi) },
- esp_mask = { .spi = 0xFFFFFFFF };
+ esp_spec.spi = clib_host_to_net_u32 (flow->ip4_ipsec_esp.spi);
+ esp_mask.spi = 0xFFFFFFFF;
item_info[layer].spec = (void *) &esp_spec;
item_info[layer].mask = (void *) &esp_mask;
item_info[layer].size = sizeof (u32);
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_ESP;
layer++;
break;
@@ -357,6 +753,7 @@ oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow,
end_item_info:
item_info[layer].type = ROC_NPC_ITEM_TYPE_END;
+parse_flow_actions:
if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE)
{
conf.index = flow->redirect_queue;
@@ -422,6 +819,11 @@ end_item_info:
if (queues)
clib_mem_free (queues);
+ vec_free (flow_spec);
+ vec_free (flow_mask);
+ vec_free (drv_spec);
+ vec_free (drv_mask);
+
return rv;
}
diff --git a/src/plugins/dev_octeon/format.c b/src/plugins/dev_octeon/format.c
index e624b84f54e..d0f53013d99 100644
--- a/src/plugins/dev_octeon/format.c
+++ b/src/plugins/dev_octeon/format.c
@@ -25,7 +25,7 @@ format_oct_nix_rx_cqe_desc (u8 *s, va_list *args)
typeof (d->sg0) *sg0 = &d->sg0;
typeof (d->sg0) *sg1 = &d->sg1;
- s = format (s, "hdr: cqe_type %u nude %u q %u tag 0x%x", h->cqe_type,
+ s = format (s, "hdr: cqe_type %u nude %u qid %u tag 0x%x", h->cqe_type,
h->node, h->q, h->tag);
s = format (s, "\n%Uparse:", format_white_space, indent);
#define _(n, f) s = format (s, " " #n " " f, p->n)
diff --git a/src/plugins/dev_octeon/init.c b/src/plugins/dev_octeon/init.c
index 8c5ed95b062..99cadddfc24 100644
--- a/src/plugins/dev_octeon/init.c
+++ b/src/plugins/dev_octeon/init.c
@@ -4,12 +4,13 @@
#include <vnet/vnet.h>
#include <vnet/dev/dev.h>
-#include <vnet/dev/pci.h>
+#include <vnet/dev/bus/pci.h>
#include <vnet/dev/counters.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
#include <dev_octeon/octeon.h>
+#include <dev_octeon/crypto.h>
#include <base/roc_api.h>
#include <common.h>
@@ -51,9 +52,12 @@ static struct
}
_ (0xa063, RVU_PF, "Marvell Octeon Resource Virtualization Unit PF"),
- _ (0xa0f8, RVU_VF, "Marvell Octeon Resource Virtualization Unit VF"),
+ _ (0xa064, RVU_VF, "Marvell Octeon Resource Virtualization Unit VF"),
+ _ (0xa0f8, LBK_VF, "Marvell Octeon Loopback Unit VF"),
_ (0xa0f7, SDP_VF, "Marvell Octeon System DPI Packet Interface Unit VF"),
- _ (0xa0f3, CPT_VF, "Marvell Octeon Cryptographic Accelerator Unit VF"),
+ _ (0xa0f3, O10K_CPT_VF,
+ "Marvell Octeon-10 Cryptographic Accelerator Unit VF"),
+ _ (0xa0fe, O9K_CPT_VF, "Marvell Octeon-9 Cryptographic Accelerator Unit VF"),
#undef _
};
@@ -109,6 +113,7 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev)
.reta_sz = ROC_NIX_RSS_RETA_SZ_256,
.max_sqb_count = 512,
.pci_dev = &cd->plt_pci_dev,
+ .hw_vlan_ins = true,
};
if ((rrv = roc_nix_dev_init (cd->nix)))
@@ -130,6 +135,9 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev)
.rx_offloads = {
.ip4_cksum = 1,
},
+ .tx_offloads = {
+ .ip4_cksum = 1,
+ },
},
.ops = {
.init = oct_port_init,
@@ -140,6 +148,7 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev)
.config_change_validate = oct_port_cfg_change_validate,
.format_status = format_oct_port_status,
.format_flow = format_oct_port_flow,
+ .clear_counters = oct_port_clear_counters,
},
.data_size = sizeof (oct_port_t),
.initial_data = &oct_port,
@@ -158,6 +167,7 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev)
.alloc = oct_rx_queue_alloc,
.free = oct_rx_queue_free,
.format_info = format_oct_rxq_info,
+ .clear_counters = oct_rxq_clear_counters,
},
},
.tx_queue = {
@@ -172,6 +182,7 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev)
.alloc = oct_tx_queue_alloc,
.free = oct_tx_queue_free,
.format_info = format_oct_txq_info,
+ .clear_counters = oct_txq_clear_counters,
},
},
};
@@ -183,17 +194,113 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev)
return vnet_dev_port_add (vm, dev, 0, &port_add_args);
}
+static int
+oct_conf_cpt (vlib_main_t *vm, vnet_dev_t *dev, oct_crypto_dev_t *ocd,
+ int nb_lf)
+{
+ struct roc_cpt *roc_cpt = ocd->roc_cpt;
+ int rrv;
+
+ if ((rrv = roc_cpt_eng_grp_add (roc_cpt, CPT_ENG_TYPE_SE)) < 0)
+ {
+ log_err (dev, "Could not add CPT SE engines");
+ return cnx_return_roc_err (dev, rrv, "roc_cpt_eng_grp_add");
+ }
+ if ((rrv = roc_cpt_eng_grp_add (roc_cpt, CPT_ENG_TYPE_IE)) < 0)
+ {
+ log_err (dev, "Could not add CPT IE engines");
+ return cnx_return_roc_err (dev, rrv, "roc_cpt_eng_grp_add");
+ }
+ if (roc_cpt->eng_grp[CPT_ENG_TYPE_IE] != ROC_CPT_DFLT_ENG_GRP_SE_IE)
+ {
+ log_err (dev, "Invalid CPT IE engine group configuration");
+ return -1;
+ }
+ if (roc_cpt->eng_grp[CPT_ENG_TYPE_SE] != ROC_CPT_DFLT_ENG_GRP_SE)
+ {
+ log_err (dev, "Invalid CPT SE engine group configuration");
+ return -1;
+ }
+ if ((rrv = roc_cpt_dev_configure (roc_cpt, nb_lf, false, 0)) < 0)
+ {
+ log_err (dev, "could not configure crypto device %U",
+ format_vlib_pci_addr, roc_cpt->pci_dev->addr);
+ return cnx_return_roc_err (dev, rrv, "roc_cpt_dev_configure");
+ }
+ return 0;
+}
+
+static vnet_dev_rv_t
+oct_conf_cpt_queue (vlib_main_t *vm, vnet_dev_t *dev, oct_crypto_dev_t *ocd)
+{
+ struct roc_cpt *roc_cpt = ocd->roc_cpt;
+ struct roc_cpt_lmtline *cpt_lmtline;
+ struct roc_cpt_lf *cpt_lf;
+ int rrv;
+
+ cpt_lf = &ocd->lf;
+ cpt_lmtline = &ocd->lmtline;
+
+ cpt_lf->nb_desc = OCT_CPT_LF_MAX_NB_DESC;
+ cpt_lf->lf_id = 0;
+ if ((rrv = roc_cpt_lf_init (roc_cpt, cpt_lf)) < 0)
+ return cnx_return_roc_err (dev, rrv, "roc_cpt_lf_init");
+
+ roc_cpt_iq_enable (cpt_lf);
+
+ if ((rrv = roc_cpt_lmtline_init (roc_cpt, cpt_lmtline, 0) < 0))
+ return cnx_return_roc_err (dev, rrv, "roc_cpt_lmtline_init");
+
+ return 0;
+}
+
static vnet_dev_rv_t
oct_init_cpt (vlib_main_t *vm, vnet_dev_t *dev)
{
+ oct_crypto_main_t *ocm = &oct_crypto_main;
+ extern oct_plt_init_param_t oct_plt_init_param;
oct_device_t *cd = vnet_dev_get_data (dev);
+ oct_crypto_dev_t *ocd = NULL;
int rrv;
- struct roc_cpt cpt = {
- .pci_dev = &cd->plt_pci_dev,
- };
- if ((rrv = roc_cpt_dev_init (&cpt)))
+ if (ocm->n_cpt == OCT_MAX_N_CPT_DEV || ocm->started)
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ ocd = oct_plt_init_param.oct_plt_zmalloc (sizeof (oct_crypto_dev_t),
+ CLIB_CACHE_LINE_BYTES);
+
+ ocd->roc_cpt = oct_plt_init_param.oct_plt_zmalloc (sizeof (struct roc_cpt),
+ CLIB_CACHE_LINE_BYTES);
+ ocd->roc_cpt->pci_dev = &cd->plt_pci_dev;
+
+ ocd->dev = dev;
+
+ if ((rrv = roc_cpt_dev_init (ocd->roc_cpt)))
return cnx_return_roc_err (dev, rrv, "roc_cpt_dev_init");
+
+ if ((rrv = oct_conf_cpt (vm, dev, ocd, 1)))
+ return rrv;
+
+ if ((rrv = oct_conf_cpt_queue (vm, dev, ocd)))
+ return rrv;
+
+ if (!ocm->n_cpt)
+ {
+ /*
+ * Initialize s/w queues, which are common across multiple
+ * crypto devices
+ */
+ oct_conf_sw_queue (vm, dev);
+
+ ocm->crypto_dev[0] = ocd;
+ }
+
+ ocm->crypto_dev[1] = ocd;
+
+ oct_init_crypto_engine_handlers (vm, dev);
+
+ ocm->n_cpt++;
+
return VNET_DEV_OK;
}
@@ -244,10 +351,12 @@ oct_init (vlib_main_t *vm, vnet_dev_t *dev)
{
case OCT_DEVICE_TYPE_RVU_PF:
case OCT_DEVICE_TYPE_RVU_VF:
+ case OCT_DEVICE_TYPE_LBK_VF:
case OCT_DEVICE_TYPE_SDP_VF:
return oct_init_nix (vm, dev);
- case OCT_DEVICE_TYPE_CPT_VF:
+ case OCT_DEVICE_TYPE_O10K_CPT_VF:
+ case OCT_DEVICE_TYPE_O9K_CPT_VF:
return oct_init_cpt (vm, dev);
default:
diff --git a/src/plugins/dev_octeon/octeon.h b/src/plugins/dev_octeon/octeon.h
index 92ec953ed23..ccf8f62880d 100644
--- a/src/plugins/dev_octeon/octeon.h
+++ b/src/plugins/dev_octeon/octeon.h
@@ -12,6 +12,12 @@
#include <vnet/flow/flow.h>
#include <vnet/udp/udp.h>
#include <vnet/ipsec/esp.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/icmp46_packet.h>
+#include <vnet/ip/igmp_packet.h>
+#include <vnet/gre/packet.h>
+#include <vxlan/vxlan.h>
#include <base/roc_api.h>
#include <dev_octeon/hw_defs.h>
@@ -22,8 +28,10 @@ typedef enum
OCT_DEVICE_TYPE_UNKNOWN = 0,
OCT_DEVICE_TYPE_RVU_PF,
OCT_DEVICE_TYPE_RVU_VF,
+ OCT_DEVICE_TYPE_LBK_VF,
OCT_DEVICE_TYPE_SDP_VF,
- OCT_DEVICE_TYPE_CPT_VF,
+ OCT_DEVICE_TYPE_O10K_CPT_VF,
+ OCT_DEVICE_TYPE_O9K_CPT_VF,
} __clib_packed oct_device_type_t;
typedef struct
@@ -34,7 +42,6 @@ typedef struct
u8 full_duplex : 1;
u32 speed;
struct plt_pci_device plt_pci_dev;
- struct roc_cpt cpt;
struct roc_nix *nix;
} oct_device_t;
@@ -95,7 +102,6 @@ typedef struct
u64 aura_handle;
u64 io_addr;
void *lmt_addr;
-
oct_npa_batch_alloc_cl128_t *ba_buffer;
u8 ba_first_cl;
u8 ba_num_cl;
@@ -140,6 +146,17 @@ vnet_dev_rv_t oct_flow_validate_params (vlib_main_t *, vnet_dev_port_t *,
vnet_dev_rv_t oct_flow_query (vlib_main_t *, vnet_dev_port_t *, u32, uword,
u64 *);
+/* counter.c */
+void oct_port_add_counters (vlib_main_t *, vnet_dev_port_t *);
+void oct_port_clear_counters (vlib_main_t *, vnet_dev_port_t *);
+void oct_rxq_clear_counters (vlib_main_t *, vnet_dev_rx_queue_t *);
+void oct_txq_clear_counters (vlib_main_t *, vnet_dev_tx_queue_t *);
+vnet_dev_rv_t oct_port_get_stats (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t oct_rxq_get_stats (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_rx_queue_t *);
+vnet_dev_rv_t oct_txq_get_stats (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_tx_queue_t *);
+
#define log_debug(dev, f, ...) \
vlib_log (VLIB_LOG_LEVEL_DEBUG, oct_log.class, "%U: " f, \
format_vnet_dev_addr, (dev), ##__VA_ARGS__)
@@ -162,7 +179,8 @@ vnet_dev_rv_t oct_flow_query (vlib_main_t *, vnet_dev_port_t *, u32, uword,
_ (AURA_BATCH_ALLOC_ISSUE_FAIL, aura_batch_alloc_issue_fail, ERROR, \
"aura batch alloc issue failed") \
_ (AURA_BATCH_ALLOC_NOT_READY, aura_batch_alloc_not_ready, ERROR, \
- "aura batch alloc not ready")
+ "aura batch alloc not ready") \
+ _ (MTU_EXCEEDED, mtu_exceeded, ERROR, "mtu exceeded")
typedef enum
{
diff --git a/src/plugins/dev_octeon/port.c b/src/plugins/dev_octeon/port.c
index d5f78301adf..528683fa3c7 100644
--- a/src/plugins/dev_octeon/port.c
+++ b/src/plugins/dev_octeon/port.c
@@ -4,7 +4,6 @@
#include <vnet/vnet.h>
#include <vnet/dev/dev.h>
-#include <vnet/dev/pci.h>
#include <vnet/dev/counters.h>
#include <dev_octeon/octeon.h>
#include <dev_octeon/common.h>
@@ -54,11 +53,83 @@ oct_roc_err (vnet_dev_t *dev, int rv, char *fmt, ...)
}
vnet_dev_rv_t
+oct_port_pause_flow_control_init (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ struct roc_nix_fc_cfg fc_cfg;
+ struct roc_nix_sq *sq;
+ struct roc_nix_cq *cq;
+ struct roc_nix_rq *rq;
+ int rrv;
+
+ /* pause flow control is not supported on SDP/LBK devices */
+ if (roc_nix_is_sdp (nix) || roc_nix_is_lbk (nix))
+ {
+ log_notice (dev,
+ "pause flow control is not supported on SDP/LBK devices");
+ return VNET_DEV_OK;
+ }
+
+ fc_cfg.type = ROC_NIX_FC_RXCHAN_CFG;
+ fc_cfg.rxchan_cfg.enable = true;
+ rrv = roc_nix_fc_config_set (nix, &fc_cfg);
+ if (rrv)
+ return oct_roc_err (dev, rrv, "roc_nix_fc_config_set failed");
+
+ memset (&fc_cfg, 0, sizeof (struct roc_nix_fc_cfg));
+ fc_cfg.type = ROC_NIX_FC_RQ_CFG;
+ fc_cfg.rq_cfg.enable = true;
+ fc_cfg.rq_cfg.tc = 0;
+
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ {
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+
+ rq = &crq->rq;
+ cq = &crq->cq;
+
+ fc_cfg.rq_cfg.rq = rq->qid;
+ fc_cfg.rq_cfg.cq_drop = cq->drop_thresh;
+
+ rrv = roc_nix_fc_config_set (nix, &fc_cfg);
+ if (rrv)
+ return oct_roc_err (dev, rrv, "roc_nix_fc_config_set failed");
+ }
+
+ memset (&fc_cfg, 0, sizeof (struct roc_nix_fc_cfg));
+ fc_cfg.type = ROC_NIX_FC_TM_CFG;
+ fc_cfg.tm_cfg.tc = 0;
+ fc_cfg.tm_cfg.enable = true;
+
+ foreach_vnet_dev_port_tx_queue (txq, port)
+ {
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+
+ sq = &ctq->sq;
+
+ fc_cfg.tm_cfg.sq = sq->qid;
+ rrv = roc_nix_fc_config_set (nix, &fc_cfg);
+ if (rrv)
+ return oct_roc_err (dev, rrv, "roc_nix_fc_config_set failed");
+ }
+
+ /* By default, enable pause flow control */
+ rrv = roc_nix_fc_mode_set (nix, ROC_NIX_FC_FULL);
+ if (rrv)
+ return oct_roc_err (dev, rrv, "roc_nix_fc_mode_set failed");
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port)
{
vnet_dev_t *dev = port->dev;
oct_device_t *cd = vnet_dev_get_data (dev);
oct_port_t *cp = vnet_dev_get_port_data (port);
+ u8 mac_addr[PLT_ETHER_ADDR_LEN];
struct roc_nix *nix = cd->nix;
vnet_dev_rv_t rv;
int rrv;
@@ -76,6 +147,22 @@ oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port)
}
cp->lf_allocated = 1;
+ if (!roc_nix_is_vf_or_sdp (nix))
+ {
+ if ((rrv = roc_nix_npc_mac_addr_get (nix, mac_addr)))
+ {
+ oct_port_deinit (vm, port);
+ return oct_roc_err (dev, rrv, "roc_nix_npc_mac_addr_get failed");
+ }
+
+ /* Sync MAC address to CGX/RPM table */
+ if ((rrv = roc_nix_mac_addr_set (nix, mac_addr)))
+ {
+ oct_port_deinit (vm, port);
+ return oct_roc_err (dev, rrv, "roc_nix_mac_addr_set failed");
+ }
+ }
+
if ((rrv = roc_nix_tm_init (nix)))
{
oct_port_deinit (vm, port);
@@ -124,6 +211,21 @@ oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port)
return rv;
}
+ oct_port_add_counters (vm, port);
+
+ if ((rrv = roc_nix_mac_mtu_set (nix, port->max_rx_frame_size)))
+ {
+ rv = oct_roc_err (dev, rrv, "roc_nix_mac_mtu_set() failed");
+ return rv;
+ }
+
+ /* Configure pause frame flow control*/
+ if ((rv = oct_port_pause_flow_control_init (vm, port)))
+ {
+ oct_port_deinit (vm, port);
+ return rv;
+ }
+
return VNET_DEV_OK;
}
@@ -172,7 +274,22 @@ oct_port_poll (vlib_main_t *vm, vnet_dev_port_t *port)
vnet_dev_port_state_changes_t changes = {};
int rrv;
- if (roc_nix_is_lbk (nix))
+ if (oct_port_get_stats (vm, port))
+ return;
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ if (oct_rxq_get_stats (vm, port, q))
+ return;
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ if (oct_txq_get_stats (vm, port, q))
+ return;
+ }
+
+ if (roc_nix_is_lbk (nix) || roc_nix_is_sdp (nix))
{
link_info.status = 1;
link_info.full_duplex = 1;
@@ -203,7 +320,8 @@ oct_port_poll (vlib_main_t *vm, vnet_dev_port_t *port)
if (cd->speed != link_info.speed)
{
changes.change.link_speed = 1;
- changes.link_speed = link_info.speed;
+ /* Convert to Kbps */
+ changes.link_speed = link_info.speed * 1000;
cd->speed = link_info.speed;
}
@@ -327,12 +445,6 @@ oct_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
ctq->n_enq = 0;
}
- if ((rrv = roc_nix_mac_mtu_set (nix, 9200)))
- {
- rv = oct_roc_err (dev, rrv, "roc_nix_mac_mtu_set() failed");
- goto done;
- }
-
if ((rrv = roc_nix_npc_rx_ena_dis (nix, true)))
{
rv = oct_roc_err (dev, rrv, "roc_nix_npc_rx_ena_dis() failed");
@@ -376,6 +488,18 @@ oct_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
foreach_vnet_dev_port_tx_queue (q, port)
oct_txq_stop (vm, q);
+
+ vnet_dev_port_state_change (vm, port,
+ (vnet_dev_port_state_changes_t){
+ .change.link_state = 1,
+ .change.link_speed = 1,
+ .link_speed = 0,
+ .link_state = 0,
+ });
+
+ /* Update the device status */
+ cd->status = 0;
+ cd->speed = 0;
}
vnet_dev_rv_t
@@ -385,7 +509,7 @@ oct_validate_config_promisc_mode (vnet_dev_port_t *port, int enable)
oct_device_t *cd = vnet_dev_get_data (dev);
struct roc_nix *nix = cd->nix;
- if (roc_nix_is_vf_or_sdp (nix))
+ if (roc_nix_is_sdp (nix) || roc_nix_is_lbk (nix))
return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
return VNET_DEV_OK;
@@ -405,6 +529,9 @@ oct_op_config_promisc_mode (vlib_main_t *vm, vnet_dev_port_t *port, int enable)
return oct_roc_err (dev, rv, "roc_nix_npc_promisc_ena_dis failed");
}
+ if (!roc_nix_is_pf (nix))
+ return VNET_DEV_OK;
+
rv = roc_nix_mac_promisc_mode_enable (nix, enable);
if (rv)
{
@@ -416,6 +543,61 @@ oct_op_config_promisc_mode (vlib_main_t *vm, vnet_dev_port_t *port, int enable)
return VNET_DEV_OK;
}
+static vnet_dev_rv_t
+oct_port_add_del_eth_addr (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_hw_addr_t *addr, int is_add,
+ int is_primary)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ i32 rrv;
+
+ if (is_primary)
+ {
+ if (is_add)
+ {
+ /* Update mac address at NPC */
+ rrv = roc_nix_npc_mac_addr_set (nix, (u8 *) addr);
+ if (rrv)
+ rv = oct_roc_err (dev, rrv, "roc_nix_npc_mac_addr_set() failed");
+
+ /* Update mac address at CGX for PFs only */
+ if (!roc_nix_is_vf_or_sdp (nix))
+ {
+ rrv = roc_nix_mac_addr_set (nix, (u8 *) addr);
+ if (rrv)
+ {
+ /* Rollback to previous mac address */
+ roc_nix_npc_mac_addr_set (nix,
+ (u8 *) &port->primary_hw_addr);
+ rv = oct_roc_err (dev, rrv, "roc_nix_mac_addr_set() failed");
+ }
+ }
+ }
+ }
+
+ return rv;
+}
+
+vnet_dev_rv_t
+oct_op_config_max_rx_len (vlib_main_t *vm, vnet_dev_port_t *port,
+ u32 rx_frame_size)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ i32 rrv;
+
+ rrv = roc_nix_mac_max_rx_len_set (nix, rx_frame_size);
+ if (rrv)
+ rv = oct_roc_err (dev, rrv, "roc_nix_mac_max_rx_len_set() failed");
+
+ return rv;
+}
+
vnet_dev_rv_t
oct_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port,
vnet_dev_port_cfg_change_req_t *req)
@@ -465,6 +647,9 @@ oct_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
break;
case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ rv = oct_port_add_del_eth_addr (vm, port, &req->addr,
+ /* is_add */ 1,
+ /* is_primary */ 1);
break;
case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
@@ -474,6 +659,7 @@ oct_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
break;
case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ rv = oct_op_config_max_rx_len (vm, port, req->max_rx_frame_size);
break;
case VNET_DEV_PORT_CFG_ADD_RX_FLOW:
diff --git a/src/plugins/dev_octeon/queue.c b/src/plugins/dev_octeon/queue.c
index d6ae794fb8d..58d391b8508 100644
--- a/src/plugins/dev_octeon/queue.c
+++ b/src/plugins/dev_octeon/queue.c
@@ -4,7 +4,6 @@
#include <vnet/vnet.h>
#include <vnet/dev/dev.h>
-#include <vnet/dev/pci.h>
#include <vnet/dev/counters.h>
#include <dev_octeon/octeon.h>
#include <vnet/ethernet/ethernet.h>
diff --git a/src/plugins/dev_octeon/roc_helper.c b/src/plugins/dev_octeon/roc_helper.c
index f10c2cb578b..c1166b654cf 100644
--- a/src/plugins/dev_octeon/roc_helper.c
+++ b/src/plugins/dev_octeon/roc_helper.c
@@ -49,6 +49,12 @@ oct_plt_get_thread_index (void)
return __os_thread_index;
}
+static u64
+oct_plt_get_cache_line_size (void)
+{
+ return CLIB_CACHE_LINE_BYTES;
+}
+
static void
oct_drv_physmem_free (vlib_main_t *vm, void *mem)
{
@@ -69,13 +75,12 @@ oct_drv_physmem_alloc (vlib_main_t *vm, u32 size, u32 align)
if (align)
{
- /* Force cache line alloc in case alignment is less than cache line */
- align = align < CLIB_CACHE_LINE_BYTES ? CLIB_CACHE_LINE_BYTES : align;
+ /* Force ROC align alloc in case alignment is less than ROC align */
+ align = align < ROC_ALIGN ? ROC_ALIGN : align;
mem = vlib_physmem_alloc_aligned_on_numa (vm, size, align, 0);
}
else
- mem =
- vlib_physmem_alloc_aligned_on_numa (vm, size, CLIB_CACHE_LINE_BYTES, 0);
+ mem = vlib_physmem_alloc_aligned_on_numa (vm, size, ROC_ALIGN, 0);
if (!mem)
return NULL;
@@ -178,4 +183,5 @@ oct_plt_init_param_t oct_plt_init_param = {
.oct_plt_spinlock_unlock = oct_plt_spinlock_unlock,
.oct_plt_spinlock_trylock = oct_plt_spinlock_trylock,
.oct_plt_get_thread_index = oct_plt_get_thread_index,
+ .oct_plt_get_cache_line_size = oct_plt_get_cache_line_size,
};
diff --git a/src/plugins/dev_octeon/rx_node.c b/src/plugins/dev_octeon/rx_node.c
index 997f1356199..b057c4d7047 100644
--- a/src/plugins/dev_octeon/rx_node.c
+++ b/src/plugins/dev_octeon/rx_node.c
@@ -104,7 +104,9 @@ oct_rx_batch (vlib_main_t *vm, oct_rx_node_ctx_t *ctx,
{
oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
vlib_buffer_template_t bt = rxq->buffer_template;
- u32 n_left;
+ u32 b0_err_flags = 0, b1_err_flags = 0;
+ u32 b2_err_flags = 0, b3_err_flags = 0;
+ u32 n_left, err_flags = 0;
oct_nix_rx_cqe_desc_t *d = ctx->next_desc;
vlib_buffer_t *b[4];
@@ -145,6 +147,13 @@ oct_rx_batch (vlib_main_t *vm, oct_rx_node_ctx_t *ctx,
oct_rx_attach_tail (vm, ctx, b[2], d + 2);
oct_rx_attach_tail (vm, ctx, b[3], d + 3);
}
+
+ b0_err_flags = (d[0].parse.w[0] >> 20) & 0xFFF;
+ b1_err_flags = (d[1].parse.w[0] >> 20) & 0xFFF;
+ b2_err_flags = (d[2].parse.w[0] >> 20) & 0xFFF;
+ b3_err_flags = (d[3].parse.w[0] >> 20) & 0xFFF;
+
+ err_flags |= b0_err_flags | b1_err_flags | b2_err_flags | b3_err_flags;
}
for (; n_left; d += 1, n_left -= 1, ctx->to_next += 1)
@@ -157,14 +166,51 @@ oct_rx_batch (vlib_main_t *vm, oct_rx_node_ctx_t *ctx,
ctx->n_segs += 1;
if (d[0].sg0.segs > 1)
oct_rx_attach_tail (vm, ctx, b[0], d + 0);
+
+ err_flags |= ((d[0].parse.w[0] >> 20) & 0xFFF);
}
plt_write64 ((crq->cq.wdata | n), crq->cq.door);
ctx->n_rx_pkts += n;
ctx->n_left_to_next -= n;
+ if (err_flags)
+ ctx->parse_w0_or = (err_flags << 20);
+
return n;
}
+#ifdef PLATFORM_OCTEON9
+static_always_inline u32
+oct_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, u16 n_refill)
+{
+ u32 n_alloc, n_free;
+ u32 buffer_indices[n_refill];
+ vlib_buffer_t *buffers[n_refill];
+ u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq);
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ u64 aura = roc_npa_aura_handle_to_aura (crq->aura_handle);
+ const uint64_t addr =
+ roc_npa_aura_handle_to_base (crq->aura_handle) + NPA_LF_AURA_OP_FREE0;
+
+ if (n_refill < 256)
+ return 0;
+
+ n_alloc = vlib_buffer_alloc (vm, buffer_indices, n_refill);
+ if (PREDICT_FALSE (n_alloc < n_refill))
+ goto alloc_fail;
+
+ vlib_get_buffers (vm, buffer_indices, (vlib_buffer_t **) buffers, n_alloc);
+
+ for (n_free = 0; n_free < n_alloc; n_free++)
+ roc_store_pair ((u64) buffers[n_free], aura, addr);
+
+ return n_alloc;
+
+alloc_fail:
+ vlib_buffer_unalloc_to_pool (vm, buffer_indices, n_alloc, bpi);
+ return 0;
+}
+#else
static_always_inline void
oct_rxq_refill_batch (vlib_main_t *vm, u64 lmt_id, u64 addr,
oct_npa_lf_aura_batch_free_line_t *lines, u32 *bi,
@@ -260,6 +306,7 @@ oct_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, u16 n_refill)
return n_enq;
}
+#endif
static_always_inline void
oct_rx_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
diff --git a/src/plugins/dev_octeon/tx_node.c b/src/plugins/dev_octeon/tx_node.c
index 0dbf8759d35..f42f18d989b 100644
--- a/src/plugins/dev_octeon/tx_node.c
+++ b/src/plugins/dev_octeon/tx_node.c
@@ -22,13 +22,54 @@ typedef struct
u32 n_tx_bytes;
u32 n_drop;
vlib_buffer_t *drop[VLIB_FRAME_SIZE];
+ u32 n_exd_mtu;
+ vlib_buffer_t *exd_mtu[VLIB_FRAME_SIZE];
u32 batch_alloc_not_ready;
u32 batch_alloc_issue_fail;
+ int max_pkt_len;
u16 lmt_id;
u64 lmt_ioaddr;
lmt_line_t *lmt_lines;
} oct_tx_ctx_t;
+#ifdef PLATFORM_OCTEON9
+static_always_inline u32
+oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq)
+{
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ u16 off = ctq->hdr_off;
+ u64 ah = ctq->aura_handle;
+ u32 n_freed = 0, n;
+
+ ah = ctq->aura_handle;
+
+ if ((n = roc_npa_aura_op_available (ah)) >= 32)
+ {
+ u64 buffers[n];
+ u32 bi[n];
+
+ n_freed = roc_npa_aura_op_bulk_alloc (ah, buffers, n, 0, 1);
+ vlib_get_buffer_indices_with_offset (vm, (void **) &buffers, bi, n_freed,
+ off);
+ vlib_buffer_free_no_next (vm, bi, n_freed);
+ }
+
+ return n_freed;
+}
+
+static_always_inline void
+oct_lmt_copy (void *lmt_addr, u64 io_addr, void *desc, u64 dwords)
+{
+ u64 lmt_status;
+
+ do
+ {
+ roc_lmt_mov_seg (lmt_addr, desc, dwords);
+ lmt_status = roc_lmt_submit_ldeor (io_addr);
+ }
+ while (lmt_status == 0);
+}
+#else
static_always_inline u32
oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq)
{
@@ -130,10 +171,12 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq)
return n_freed;
}
+#endif
static_always_inline u8
oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b,
- lmt_line_t *line, u32 flags, int simple, int trace)
+ lmt_line_t *line, u32 flags, int simple, int trace, u32 *n,
+ u8 *dpl)
{
u8 n_dwords = 2;
u32 total_len = 0;
@@ -148,6 +191,17 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b,
},
};
+ if (PREDICT_FALSE (vlib_buffer_length_in_chain (vm, b) > ctx->max_pkt_len))
+ {
+ ctx->exd_mtu[ctx->n_exd_mtu++] = b;
+ return 0;
+ }
+
+#ifdef PLATFORM_OCTEON9
+ /* Override line for Octeon9 */
+ line = ctx->lmt_lines;
+#endif
+
if (!simple && flags & VLIB_BUFFER_NEXT_PRESENT)
{
u8 n_tail_segs = 0;
@@ -159,7 +213,7 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b,
tail_segs[n_tail_segs++] = t;
if (n_tail_segs > 5)
{
- ctx->drop[ctx->n_drop++] = t;
+ ctx->drop[ctx->n_drop++] = b;
return 0;
}
}
@@ -201,19 +255,18 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b,
if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
{
d.hdr_w1.ol3type = NIX_SENDL3TYPE_IP4_CKSUM;
- d.hdr_w1.ol3ptr = vnet_buffer (b)->l3_hdr_offset;
- d.hdr_w1.ol4ptr =
- vnet_buffer (b)->l3_hdr_offset + sizeof (ip4_header_t);
+ d.hdr_w1.ol3ptr = vnet_buffer (b)->l3_hdr_offset - b->current_data;
+ d.hdr_w1.ol4ptr = d.hdr_w1.ol3ptr + sizeof (ip4_header_t);
}
if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
{
d.hdr_w1.ol4type = NIX_SENDL4TYPE_UDP_CKSUM;
- d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset;
+ d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset - b->current_data;
}
else if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
{
d.hdr_w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
- d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset;
+ d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset - b->current_data;
}
}
@@ -228,8 +281,15 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b,
t->sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX];
}
+#ifdef PLATFORM_OCTEON9
+ oct_lmt_copy (line, ctx->lmt_ioaddr, &d, n_dwords);
+#else
for (u32 i = 0; i < n_dwords; i++)
line->dwords[i] = d.as_u128[i];
+#endif
+
+ *dpl = n_dwords;
+ *n = *n + 1;
return n_dwords;
}
@@ -239,8 +299,9 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq,
vlib_buffer_t **b, u32 n_pkts, int trace)
{
u8 dwords_per_line[16], *dpl = dwords_per_line;
- u64 lmt_arg, ioaddr, n_lines;
- u32 n_left, or_flags_16 = 0;
+ u64 __attribute__ ((unused)) lmt_arg, ioaddr, n_lines;
+ u32 __attribute__ ((unused)) or_flags_16 = 0;
+ u32 n_left, n = 0;
const u32 not_simple_flags =
VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD;
lmt_line_t *l = ctx->lmt_lines;
@@ -248,7 +309,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq,
/* Data Store Memory Barrier - outer shareable domain */
asm volatile("dmb oshst" ::: "memory");
- for (n_left = n_pkts; n_left >= 8; n_left -= 8, b += 8, l += 8)
+ for (n_left = n_pkts; n_left >= 8; n_left -= 8, b += 8)
{
u32 f0, f1, f2, f3, f4, f5, f6, f7, or_f = 0;
vlib_prefetch_buffer_header (b[8], LOAD);
@@ -269,49 +330,56 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq,
if ((or_f & not_simple_flags) == 0)
{
int simple = 1;
- oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace);
- oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[1], l + n, f1, simple, trace, &n, &dpl[n]);
vlib_prefetch_buffer_header (b[13], LOAD);
- oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace);
- oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[2], l + n, f2, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[3], l + n, f3, simple, trace, &n, &dpl[n]);
vlib_prefetch_buffer_header (b[14], LOAD);
- oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace);
- oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[4], l + n, f4, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[5], l + n, f5, simple, trace, &n, &dpl[n]);
vlib_prefetch_buffer_header (b[15], LOAD);
- oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace);
- oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace);
- dpl[0] = dpl[1] = dpl[2] = dpl[3] = 2;
- dpl[4] = dpl[5] = dpl[6] = dpl[7] = 2;
+ oct_tx_enq1 (vm, ctx, b[6], l + n, f6, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[7], l + n, f7, simple, trace, &n, &dpl[n]);
}
else
{
int simple = 0;
- dpl[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace);
- dpl[1] = oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[1], l + n, f1, simple, trace, &n, &dpl[n]);
vlib_prefetch_buffer_header (b[13], LOAD);
- dpl[2] = oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace);
- dpl[3] = oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[2], l + n, f2, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[3], l + n, f3, simple, trace, &n, &dpl[n]);
vlib_prefetch_buffer_header (b[14], LOAD);
- dpl[4] = oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace);
- dpl[5] = oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[4], l + n, f4, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[5], l + n, f5, simple, trace, &n, &dpl[n]);
vlib_prefetch_buffer_header (b[15], LOAD);
- dpl[6] = oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace);
- dpl[7] = oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[6], l + n, f6, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[7], l + n, f7, simple, trace, &n, &dpl[n]);
}
- dpl += 8;
+ dpl += n;
+ l += n;
+ n = 0;
}
- for (; n_left > 0; n_left -= 1, b += 1, l += 1)
+ for (; n_left > 0; n_left -= 1, b += 1)
{
u32 f0 = b[0]->flags;
- dpl++[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, 0, trace);
+ oct_tx_enq1 (vm, ctx, b[0], l, f0, 0, trace, &n, &dpl[n]);
or_flags_16 |= f0;
+ dpl += n;
+ l += n;
+ n = 0;
}
lmt_arg = ctx->lmt_id;
ioaddr = ctx->lmt_ioaddr;
- n_lines = n_pkts;
+ n_lines = dpl - dwords_per_line;
+
+ if (PREDICT_FALSE (!n_lines))
+ return n_pkts;
+#ifndef PLATFORM_OCTEON9
if (PREDICT_FALSE (or_flags_16 & VLIB_BUFFER_NEXT_PRESENT))
{
dpl = dwords_per_line;
@@ -340,6 +408,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq,
}
roc_lmt_submit_steorl (lmt_arg, ioaddr);
+#endif
return n_pkts;
}
@@ -350,11 +419,17 @@ VNET_DEV_NODE_FN (oct_tx_node)
vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node);
vnet_dev_tx_queue_t *txq = rt->tx_queue;
oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_t *dev = txq->port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
u32 node_index = node->node_index;
u32 *from = vlib_frame_vector_args (frame);
u32 n, n_enq, n_left, n_pkts = frame->n_vectors;
vlib_buffer_t *buffers[VLIB_FRAME_SIZE + 8], **b = buffers;
+#ifdef PLATFORM_OCTEON9
+ u64 lmt_id = 0;
+#else
u64 lmt_id = vm->thread_index << ROC_LMT_LINES_PER_CORE_LOG2;
+#endif
oct_tx_ctx_t ctx = {
.node = node,
@@ -363,6 +438,7 @@ VNET_DEV_NODE_FN (oct_tx_node)
.sq = ctq->sq.qid,
.sizem1 = 1,
},
+ .max_pkt_len = roc_nix_max_pkt_len (cd->nix),
.lmt_id = lmt_id,
.lmt_ioaddr = ctq->io_addr,
.lmt_lines = ctq->lmt_addr + (lmt_id << ROC_LMT_LINE_SIZE_LOG2),
@@ -396,7 +472,7 @@ VNET_DEV_NODE_FN (oct_tx_node)
n += oct_tx_enq16 (vm, &ctx, txq, b, n_left, /* trace */ 0);
}
- ctq->n_enq = n_enq + n;
+ ctq->n_enq = n_enq + n - ctx.n_drop - ctx.n_exd_mtu;
if (n < n_pkts)
{
@@ -411,6 +487,10 @@ VNET_DEV_NODE_FN (oct_tx_node)
vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_CHAIN_TOO_LONG,
ctx.n_drop);
+ if (PREDICT_FALSE (ctx.n_exd_mtu))
+ vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_MTU_EXCEEDED,
+ ctx.n_exd_mtu);
+
if (ctx.batch_alloc_not_ready)
vlib_error_count (vm, node_index,
OCT_TX_NODE_CTR_AURA_BATCH_ALLOC_NOT_READY,
@@ -431,5 +511,13 @@ VNET_DEV_NODE_FN (oct_tx_node)
n_pkts -= ctx.n_drop;
}
+ if (PREDICT_FALSE (ctx.n_exd_mtu))
+ {
+ u32 bi[VLIB_FRAME_SIZE];
+ vlib_get_buffer_indices (vm, ctx.exd_mtu, bi, ctx.n_exd_mtu);
+ vlib_buffer_free (vm, bi, ctx.n_exd_mtu);
+ n_pkts -= ctx.n_exd_mtu;
+ }
+
return n_pkts;
}
diff --git a/src/plugins/dhcp/client.c b/src/plugins/dhcp/client.c
index 8fa67c616b2..d81d2935577 100644
--- a/src/plugins/dhcp/client.c
+++ b/src/plugins/dhcp/client.c
@@ -1153,7 +1153,9 @@ dhcp_client_set_command_fn (vlib_main_t * vm,
a->is_add = is_add;
a->sw_if_index = sw_if_index;
a->hostname = hostname;
- a->client_identifier = format (0, "vpp 1.1%c", 0);
+ a->client_identifier =
+ format (0, "%U", format_ethernet_address,
+ vnet_sw_interface_get_hw_address (vnet_get_main (), sw_if_index));
a->set_broadcast_flag = set_broadcast_flag;
/*
diff --git a/src/plugins/dhcp/dhcp4_proxy_node.c b/src/plugins/dhcp/dhcp4_proxy_node.c
index 2b49d49bb7f..740ae8043e0 100644
--- a/src/plugins/dhcp/dhcp4_proxy_node.c
+++ b/src/plugins/dhcp/dhcp4_proxy_node.c
@@ -321,7 +321,8 @@ dhcp_proxy_to_server_input (vlib_main_t * vm,
o->length += id_len + 5;
}
- len = o->length + 3;
+ /* 2 bytes for option header 82+len */
+ len = o->length + 2;
b0->current_length += len;
/* Fix IP header length and checksum */
old_l0 = ip0->length;
diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c
index c838800deb4..77f9a27f97b 100644
--- a/src/plugins/dpdk/device/cli.c
+++ b/src/plugins/dpdk/device/cli.c
@@ -89,12 +89,18 @@ show_dpdk_physmem (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
clib_error_t *err = 0;
- u32 pipe_max_size;
int fds[2];
u8 *s = 0;
int n, n_try;
FILE *f;
+ /*
+ * XXX: Pipes on FreeBSD grow dynamically up to 64KB (FreeBSD 15), don't
+ * manually tweak this value on FreeBSD at the moment.
+ */
+#ifdef __linux__
+ u32 pipe_max_size;
+
err = clib_sysfs_read ("/proc/sys/fs/pipe-max-size", "%u", &pipe_max_size);
if (err)
@@ -112,6 +118,7 @@ show_dpdk_physmem (vlib_main_t * vm, unformat_input_t * input,
err = clib_error_return_unix (0, "fcntl(F_SETPIPE_SZ)");
goto error;
}
+#endif /* __linux__ */
if (fcntl (fds[0], F_SETFL, O_NONBLOCK) == -1)
{
diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h
index 88a4d9ff618..2440439989f 100644
--- a/src/plugins/dpdk/device/dpdk.h
+++ b/src/plugins/dpdk/device/dpdk.h
@@ -210,6 +210,8 @@ typedef struct
struct rte_eth_stats last_stats;
struct rte_eth_xstat *xstats;
f64 time_last_stats_update;
+ vlib_simple_counter_main_t xstats_counters;
+ u32 *xstats_symlinks;
/* mac address */
u8 *default_mac_address;
@@ -240,6 +242,7 @@ typedef struct
_ (num_rx_desc) \
_ (num_tx_desc) \
_ (max_lro_pkt_size) \
+ _ (disable_rxq_int) \
_ (rss_fn)
typedef enum
diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h
index cb7b185c112..794953da55e 100644
--- a/src/plugins/dpdk/device/dpdk_priv.h
+++ b/src/plugins/dpdk/device/dpdk_priv.h
@@ -47,28 +47,36 @@ dpdk_device_flag_set (dpdk_device_t *xd, __typeof__ (xd->flags) flag, int val)
xd->flags = val ? xd->flags | flag : xd->flags & ~flag;
}
+void dpdk_counters_xstats_init (dpdk_device_t *xd);
+
static inline void
-dpdk_get_xstats (dpdk_device_t * xd)
+dpdk_get_xstats (dpdk_device_t *xd, u32 thread_index)
{
- int len, ret;
-
+ int ret;
+ int i;
if (!(xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP))
return;
- len = rte_eth_xstats_get (xd->port_id, NULL, 0);
- if (len < 0)
- return;
-
- vec_validate (xd->xstats, len - 1);
-
- ret = rte_eth_xstats_get (xd->port_id, xd->xstats, len);
- if (ret < 0 || ret > len)
+ ret = rte_eth_xstats_get (xd->port_id, xd->xstats, vec_len (xd->xstats));
+ if (ret < 0)
{
- vec_set_len (xd->xstats, 0);
+ dpdk_log_warn ("rte_eth_xstats_get(%d) failed: %d", xd->port_id, ret);
+ return;
+ }
+ else if (ret != vec_len (xd->xstats))
+ {
+ dpdk_log_warn (
+ "rte_eth_xstats_get(%d) returned %d/%d stats. Resetting counters.",
+ xd->port_id, ret, vec_len (xd->xstats));
+ dpdk_counters_xstats_init (xd);
return;
}
- vec_set_len (xd->xstats, len);
+ vec_foreach_index (i, xd->xstats)
+ {
+ vlib_set_simple_counter (&xd->xstats_counters, thread_index, i,
+ xd->xstats[i].value);
+ }
}
#define DPDK_UPDATE_COUNTER(vnm, tidx, xd, stat, cnt) \
@@ -107,7 +115,7 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now)
DPDK_UPDATE_COUNTER (vnm, thread_index, xd, ierrors,
VNET_INTERFACE_COUNTER_RX_ERROR);
- dpdk_get_xstats (xd);
+ dpdk_get_xstats (xd, thread_index);
}
#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
index 2d038b907bf..ec9e6045de7 100644
--- a/src/plugins/dpdk/device/init.c
+++ b/src/plugins/dpdk/device/init.c
@@ -30,7 +30,7 @@
#include <dpdk/cryptodev/cryptodev.h>
#include <vlib/pci/pci.h>
#include <vlib/vmbus/vmbus.h>
-
+#include <vlib/stats/stats.h>
#include <rte_ring.h>
#include <rte_vect.h>
@@ -226,6 +226,75 @@ dpdk_find_startup_config (struct rte_eth_dev_info *di)
return &dm->conf->default_devconf;
}
+/*
+ * Initialise the xstats counters for a device
+ */
+void
+dpdk_counters_xstats_init (dpdk_device_t *xd)
+{
+ int len, ret, i;
+ struct rte_eth_xstat_name *xstats_names = 0;
+
+ if (vec_len (xd->xstats_symlinks) > 0)
+ {
+ /* xstats already initialized. Reset counters */
+ vec_foreach_index (i, xd->xstats_symlinks)
+ {
+ vlib_stats_remove_entry (xd->xstats_symlinks[i]);
+ }
+ }
+ else
+ {
+ xd->xstats_counters.stat_segment_name =
+ (char *) format (0, "/if/xstats/%d%c", xd->sw_if_index, 0);
+ xd->xstats_counters.counters = 0;
+ }
+
+ len = rte_eth_xstats_get_names (xd->port_id, 0, 0);
+ if (len < 0)
+ {
+ dpdk_log_err ("[%u] rte_eth_xstats_get_names failed: %d. DPDK xstats "
+ "not configured.",
+ xd->port_id, len);
+ return;
+ }
+
+ vlib_validate_simple_counter (&xd->xstats_counters, len);
+ vlib_zero_simple_counter (&xd->xstats_counters, len);
+
+ vec_validate (xstats_names, len - 1);
+ vec_validate (xd->xstats, len - 1);
+ vec_validate (xd->xstats_symlinks, len - 1);
+
+ ret = rte_eth_xstats_get_names (xd->port_id, xstats_names, len);
+ if (ret >= 0 && ret <= len)
+ {
+ vec_foreach_index (i, xstats_names)
+ {
+ /* There is a bug in the ENA driver where the xstats names are not
+ * unique. */
+ xd->xstats_symlinks[i] = vlib_stats_add_symlink (
+ xd->xstats_counters.stats_entry_index, i, "/interfaces/%U/%s%c",
+ format_vnet_sw_if_index_name, vnet_get_main (), xd->sw_if_index,
+ xstats_names[i].name, 0);
+ if (xd->xstats_symlinks[i] == STAT_SEGMENT_INDEX_INVALID)
+ {
+ xd->xstats_symlinks[i] = vlib_stats_add_symlink (
+ xd->xstats_counters.stats_entry_index, i,
+ "/interfaces/%U/%s_%d%c", format_vnet_sw_if_index_name,
+ vnet_get_main (), xd->sw_if_index, xstats_names[i].name, i, 0);
+ }
+ }
+ }
+ else
+ {
+ dpdk_log_err ("[%u] rte_eth_xstats_get_names failed: %d. DPDK xstats "
+ "not configured.",
+ xd->port_id, ret);
+ }
+ vec_free (xstats_names);
+}
+
static clib_error_t *
dpdk_lib_init (dpdk_main_t * dm)
{
@@ -434,6 +503,14 @@ dpdk_lib_init (dpdk_main_t * dm)
else if (dr && dr->n_tx_desc)
xd->conf.n_tx_desc = dr->n_tx_desc;
+ if (xd->conf.n_tx_desc > di.tx_desc_lim.nb_max)
+ {
+ dpdk_log_warn ("[%u] Configured number of TX descriptors (%u) is "
+ "bigger than maximum supported (%u)",
+ port_id, xd->conf.n_tx_desc, di.tx_desc_lim.nb_max);
+ xd->conf.n_tx_desc = di.tx_desc_lim.nb_max;
+ }
+
dpdk_log_debug (
"[%u] n_rx_queues: %u n_tx_queues: %u n_rx_desc: %u n_tx_desc: %u",
port_id, xd->conf.n_rx_queues, xd->conf.n_tx_queues,
@@ -519,6 +596,9 @@ dpdk_lib_init (dpdk_main_t * dm)
if (devconf->max_lro_pkt_size)
xd->conf.max_lro_pkt_size = devconf->max_lro_pkt_size;
+ if (devconf->disable_rxq_int)
+ xd->conf.enable_rxq_int = 0;
+
dpdk_device_setup (xd);
/* rss queues should be configured after dpdk_device_setup() */
@@ -532,6 +612,7 @@ dpdk_lib_init (dpdk_main_t * dm)
if (vec_len (xd->errors))
dpdk_log_err ("[%u] setup failed Errors:\n %U", port_id,
format_dpdk_device_errors, xd);
+ dpdk_counters_xstats_init (xd);
}
for (int i = 0; i < vec_len (dm->devices); i++)
@@ -659,7 +740,8 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
;
/* Cisco VIC */
else if (d->vendor_id == 0x1137 &&
- (d->device_id == 0x0043 || d->device_id == 0x0071))
+ (d->device_id == 0x0043 || d->device_id == 0x0071 ||
+ d->device_id == 0x02b7))
;
/* Chelsio T4/T5 */
else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000)
@@ -936,6 +1018,10 @@ dpdk_device_config (dpdk_config_main_t *conf, void *addr,
if (error)
break;
}
+ else if (unformat (input, "no-rx-interrupts"))
+ {
+ devconf->disable_rxq_int = 1;
+ }
else if (unformat (input, "tso on"))
{
devconf->tso = DPDK_DEVICE_TSO_ON;
@@ -1045,19 +1131,21 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
dpdk_main_t *dm = &dpdk_main;
clib_error_t *error = 0;
dpdk_config_main_t *conf = &dpdk_config_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
dpdk_device_config_t *devconf;
vlib_pci_addr_t pci_addr = { 0 };
vlib_vmbus_addr_t vmbus_addr = { 0 };
unformat_input_t sub_input;
+#ifdef __linux
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
uword default_hugepage_sz, x;
+ u8 file_prefix = 0;
+#endif /* __linux__ */
u8 *s, *tmp = 0;
int ret, i;
int num_whitelisted = 0;
int eal_no_hugetlb = 0;
u8 no_pci = 0;
u8 no_vmbus = 0;
- u8 file_prefix = 0;
u8 *socket_mem = 0;
u32 vendor, device, domain, bus, func;
void *fmt_func;
@@ -1217,6 +1305,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
}
foreach_eal_double_hyphen_predicate_arg
#undef _
+#ifdef __linux__
#define _(a) \
else if (unformat(input, #a " %s", &s)) \
{ \
@@ -1232,6 +1321,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
}
foreach_eal_double_hyphen_arg
#undef _
+#endif /* __linux__ */
#define _(a,b) \
else if (unformat(input, #a " %s", &s)) \
{ \
@@ -1258,6 +1348,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
{
vec_add1 (conf->eal_init_args, (u8 *) "--in-memory");
+#ifdef __linux__
+ /*
+ * FreeBSD performs huge page prealloc through a dedicated kernel mode
+ * this process is only required on Linux.
+ */
default_hugepage_sz = clib_mem_get_default_hugepage_size ();
clib_bitmap_foreach (x, tm->cpu_socket_bitmap)
@@ -1272,6 +1367,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
if ((e = clib_sysfs_prealloc_hugepages(x, 0, n_pages)))
clib_error_report (e);
}
+#endif /* __linux__ */
}
/* on/off dpdk's telemetry thread */
@@ -1280,6 +1376,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
vec_add1 (conf->eal_init_args, (u8 *) "--no-telemetry");
}
+#ifdef __linux__
if (!file_prefix)
{
tmp = format (0, "--file-prefix%c", 0);
@@ -1287,6 +1384,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
tmp = format (0, "vpp%c", 0);
vec_add1 (conf->eal_init_args, tmp);
}
+#endif
if (no_pci == 0 && geteuid () == 0)
dpdk_bind_devices_to_uio (conf);
diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c
index 9781d0ed7f0..437cfbd230e 100644
--- a/src/plugins/dpdk/main.c
+++ b/src/plugins/dpdk/main.c
@@ -50,7 +50,7 @@ rte_delay_us_override (unsigned us)
{
/* Only suspend for the admin_down_process */
vlib_process_t *proc = vlib_get_current_process (vm);
- if (!(proc->flags & VLIB_PROCESS_IS_RUNNING) ||
+ if (proc->state != VLIB_PROCESS_STATE_RUNNING ||
(proc->node_runtime.node_index !=
admin_up_down_process_node.index))
return 0;
diff --git a/src/plugins/flowprobe/flowprobe.c b/src/plugins/flowprobe/flowprobe.c
index 58a7cfe22f1..ee0a8eb8a31 100644
--- a/src/plugins/flowprobe/flowprobe.c
+++ b/src/plugins/flowprobe/flowprobe.c
@@ -48,7 +48,7 @@ uword flowprobe_walker_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
VNET_FEATURE_INIT (flowprobe_input_ip4_unicast, static) = {
.arc_name = "ip4-unicast",
.node_name = "flowprobe-input-ip4",
- .runs_before = VNET_FEATURES ("ip4-lookup"),
+ .runs_before = VNET_FEATURES ("ip4-lookup", "ip4-inacl"),
};
VNET_FEATURE_INIT (flowprobe_input_ip4_multicast, static) = {
.arc_name = "ip4-multicast",
@@ -58,7 +58,7 @@ VNET_FEATURE_INIT (flowprobe_input_ip4_multicast, static) = {
VNET_FEATURE_INIT (flowprobe_input_ip6_unicast, static) = {
.arc_name = "ip6-unicast",
.node_name = "flowprobe-input-ip6",
- .runs_before = VNET_FEATURES ("ip6-lookup"),
+ .runs_before = VNET_FEATURES ("ip6-lookup", "ip6-inacl"),
};
VNET_FEATURE_INIT (flowprobe_input_ip6_multicast, static) = {
.arc_name = "ip6-multicast",
diff --git a/src/plugins/hs_apps/CMakeLists.txt b/src/plugins/hs_apps/CMakeLists.txt
index 179c9c7a4c4..eae100949d4 100644
--- a/src/plugins/hs_apps/CMakeLists.txt
+++ b/src/plugins/hs_apps/CMakeLists.txt
@@ -21,8 +21,10 @@ add_vpp_plugin(hs_apps
hs_apps.c
http_cli.c
http_client_cli.c
+ http_client.c
http_tps.c
proxy.c
+ test_builtins.c
)
##############################################################################
diff --git a/src/plugins/hs_apps/echo_client.c b/src/plugins/hs_apps/echo_client.c
index d1443e75e80..d5edffbd02e 100644
--- a/src/plugins/hs_apps/echo_client.c
+++ b/src/plugins/hs_apps/echo_client.c
@@ -429,8 +429,11 @@ ec_init (vlib_main_t *vm)
ecm->app_is_init = 1;
+ session_enable_disable_args_t args = { .is_en = 1,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_RULE_TABLE };
vlib_worker_thread_barrier_sync (vm);
- vnet_session_enable_disable (vm, 1 /* turn on session and transports */);
+ vnet_session_enable_disable (vm, &args);
/* Turn on the builtin client input nodes */
foreach_vlib_main ()
@@ -943,15 +946,16 @@ ec_connect_rpc (void *args)
a->api_context = ci;
if (needs_crypto)
{
- session_endpoint_alloc_ext_cfg (&a->sep_ext,
- TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
- a->sep_ext.ext_cfg->crypto.ckpair_index = ecm->ckpair_index;
+ transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg (
+ &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO,
+ sizeof (transport_endpt_crypto_cfg_t));
+ ext_cfg->crypto.ckpair_index = ecm->ckpair_index;
}
rv = vnet_connect (a);
if (needs_crypto)
- clib_mem_free (a->sep_ext.ext_cfg);
+ session_endpoint_free_ext_cfgs (&a->sep_ext);
if (rv)
{
diff --git a/src/plugins/hs_apps/echo_server.c b/src/plugins/hs_apps/echo_server.c
index 0243252434a..b981e775b57 100644
--- a/src/plugins/hs_apps/echo_server.c
+++ b/src/plugins/hs_apps/echo_server.c
@@ -591,6 +591,7 @@ echo_server_listen ()
i32 rv;
echo_server_main_t *esm = &echo_server_main;
vnet_listen_args_t _args = {}, *args = &_args;
+ int needs_crypto;
if ((rv = parse_uri (esm->server_uri, &args->sep_ext)))
{
@@ -598,11 +599,14 @@ echo_server_listen ()
}
args->app_index = esm->app_index;
args->sep_ext.port = hs_make_data_port (args->sep_ext.port);
- if (echo_client_transport_needs_crypto (args->sep_ext.transport_proto))
+ needs_crypto =
+ echo_client_transport_needs_crypto (args->sep_ext.transport_proto);
+ if (needs_crypto)
{
- session_endpoint_alloc_ext_cfg (&args->sep_ext,
- TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
- args->sep_ext.ext_cfg->crypto.ckpair_index = esm->ckpair_index;
+ transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg (
+ &args->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO,
+ sizeof (transport_endpt_crypto_cfg_t));
+ ext_cfg->crypto.ckpair_index = esm->ckpair_index;
}
if (args->sep_ext.transport_proto == TRANSPORT_PROTO_UDP)
@@ -612,8 +616,8 @@ echo_server_listen ()
rv = vnet_listen (args);
esm->listener_handle = args->handle;
- if (args->sep_ext.ext_cfg)
- clib_mem_free (args->sep_ext.ext_cfg);
+ if (needs_crypto)
+ session_endpoint_free_ext_cfgs (&args->sep_ext);
return rv;
}
@@ -736,7 +740,10 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
goto cleanup;
}
- vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
+ session_enable_disable_args_t args = { .is_en = 1,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_RULE_TABLE };
+ vnet_session_enable_disable (vm, &args);
if (!server_uri_set)
{
diff --git a/src/plugins/hs_apps/http_cli.c b/src/plugins/hs_apps/http_cli.c
index 5d4d49c0fba..dfa90f9eced 100644
--- a/src/plugins/hs_apps/http_cli.c
+++ b/src/plugins/hs_apps/http_cli.c
@@ -17,12 +17,29 @@
#include <vnet/session/application_interface.h>
#include <vnet/session/session.h>
#include <http/http.h>
+#include <http/http_header_names.h>
+#include <http/http_content_types.h>
+
+#define HCS_DEBUG 0
+
+#if HCS_DEBUG
+#define HCS_DBG(_fmt, _args...) clib_warning (_fmt, ##_args)
+#else
+#define HCS_DBG(_fmt, _args...)
+#endif
+
+typedef struct
+{
+ u32 handle;
+ u8 *uri;
+} hcs_uri_map_t;
typedef struct
{
u32 hs_index;
u32 thread_index;
u64 node_index;
+ u8 plain_text;
u8 *buf;
} hcs_cli_args_t;
@@ -34,6 +51,7 @@ typedef struct
u8 *tx_buf;
u32 tx_offset;
u32 vpp_session_index;
+ http_header_t *resp_headers;
} hcs_session_t;
typedef struct
@@ -50,6 +68,16 @@ typedef struct
u32 fifo_size;
u8 *uri;
vlib_main_t *vlib_main;
+
+ /* hash table to store uri -> uri map pool index */
+ uword *index_by_uri;
+
+ /* pool of uri maps */
+ hcs_uri_map_t *uri_map_pool;
+
+ /* for appns */
+ u8 *appns_id;
+ u64 appns_secret;
} hcs_main_t;
static hcs_main_t hcs_main;
@@ -143,26 +171,48 @@ start_send_data (hcs_session_t *hs, http_status_code_t status)
{
http_msg_t msg;
session_t *ts;
+ u8 *headers_buf = 0;
int rv;
+ if (vec_len (hs->resp_headers))
+ {
+ headers_buf = http_serialize_headers (hs->resp_headers);
+ vec_free (hs->resp_headers);
+ msg.data.headers_offset = 0;
+ msg.data.headers_len = vec_len (headers_buf);
+ }
+ else
+ {
+ msg.data.headers_offset = 0;
+ msg.data.headers_len = 0;
+ }
+
msg.type = HTTP_MSG_REPLY;
msg.code = status;
- msg.content_type = HTTP_CONTENT_TEXT_HTML;
msg.data.type = HTTP_MSG_DATA_INLINE;
- msg.data.len = vec_len (hs->tx_buf);
+ msg.data.body_len = vec_len (hs->tx_buf);
+ msg.data.body_offset = msg.data.headers_len;
+ msg.data.len = msg.data.body_len + msg.data.headers_len;
ts = session_get (hs->vpp_session_index, hs->thread_index);
rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg);
ASSERT (rv == sizeof (msg));
- if (!msg.data.len)
+ if (msg.data.headers_len)
+ {
+ rv = svm_fifo_enqueue (ts->tx_fifo, vec_len (headers_buf), headers_buf);
+ ASSERT (rv == msg.data.headers_len);
+ vec_free (headers_buf);
+ }
+
+ if (!msg.data.body_len)
goto done;
rv = svm_fifo_enqueue (ts->tx_fifo, vec_len (hs->tx_buf), hs->tx_buf);
if (rv != vec_len (hs->tx_buf))
{
- hs->tx_offset = rv;
+ hs->tx_offset = (rv > 0) ? rv : 0;
svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
}
else
@@ -173,7 +223,7 @@ start_send_data (hcs_session_t *hs, http_status_code_t status)
done:
if (svm_fifo_set_event (ts->tx_fifo))
- session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+ session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
}
static void
@@ -181,6 +231,7 @@ send_data_to_http (void *rpc_args)
{
hcs_cli_args_t *args = (hcs_cli_args_t *) rpc_args;
hcs_session_t *hs;
+ http_content_type_t type = HTTP_CONTENT_TEXT_HTML;
hs = hcs_session_get (args->thread_index, args->hs_index);
if (!hs)
@@ -190,6 +241,13 @@ send_data_to_http (void *rpc_args)
}
hs->tx_buf = args->buf;
+ if (args->plain_text)
+ type = HTTP_CONTENT_TEXT_PLAIN;
+
+ http_add_header (&hs->resp_headers,
+ http_header_name_token (HTTP_HEADER_CONTENT_TYPE),
+ http_content_type_token (type));
+
start_send_data (hs, HTTP_STATUS_OK);
cleanup:
@@ -218,17 +276,9 @@ hcs_cli_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
{
if (request[i] == '/')
request[i] = ' ';
- else if (request[i] == ' ')
- {
- /* vlib_cli_input is vector-based, no need for a NULL */
- vec_set_len (request, i);
- break;
- }
i++;
}
-
- /* Generate the html header */
- html = format (0, html_header_template, request /* title */ );
+ HCS_DBG ("%v", request);
/* Run the command */
unformat_init_vector (&input, vec_dup (request));
@@ -236,9 +286,17 @@ hcs_cli_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
unformat_free (&input);
request = 0;
- /* Generate the html page */
- html = format (html, "%v", reply);
- html = format (html, html_footer);
+ if (args->plain_text)
+ {
+ html = format (0, "%v", reply);
+ }
+ else
+ {
+ /* Generate the html page */
+ html = format (0, html_header_template, request /* title */);
+ html = format (html, "%v", reply);
+ html = format (html, html_footer);
+ }
/* Send it */
rpc_args = clib_mem_alloc (sizeof (*args));
@@ -308,9 +366,11 @@ hcs_ts_rx_callback (session_t *ts)
hcs_cli_args_t args = {};
hcs_session_t *hs;
http_msg_t msg;
- int rv;
+ int rv, is_encoded = 0;
hs = hcs_session_get (ts->thread_index, ts->opaque);
+ hs->tx_buf = 0;
+ hs->resp_headers = 0;
/* Read the http message header */
rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
@@ -318,16 +378,66 @@ hcs_ts_rx_callback (session_t *ts)
if (msg.type != HTTP_MSG_REQUEST || msg.method_type != HTTP_REQ_GET)
{
- hs->tx_buf = 0;
+ http_add_header (&hs->resp_headers,
+ http_header_name_token (HTTP_HEADER_ALLOW),
+ http_token_lit ("GET"));
start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED);
- return 0;
+ goto done;
+ }
+
+ if (msg.data.target_path_len == 0 ||
+ msg.data.target_form != HTTP_TARGET_ORIGIN_FORM)
+ {
+ start_send_data (hs, HTTP_STATUS_BAD_REQUEST);
+ goto done;
}
/* send the command to a new/recycled vlib process */
- vec_validate (args.buf, msg.data.len - 1);
- rv = svm_fifo_dequeue (ts->rx_fifo, msg.data.len, args.buf);
- ASSERT (rv == msg.data.len);
- vec_set_len (args.buf, rv);
+ vec_validate (args.buf, msg.data.target_path_len - 1);
+ rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_path_offset,
+ msg.data.target_path_len, args.buf);
+ ASSERT (rv == msg.data.target_path_len);
+ HCS_DBG ("%v", args.buf);
+ if (http_validate_abs_path_syntax (args.buf, &is_encoded))
+ {
+ start_send_data (hs, HTTP_STATUS_BAD_REQUEST);
+ vec_free (args.buf);
+ goto done;
+ }
+ if (is_encoded)
+ {
+ u8 *decoded = http_percent_decode (args.buf, vec_len (args.buf));
+ vec_free (args.buf);
+ args.buf = decoded;
+ }
+
+ if (msg.data.headers_len)
+ {
+ u8 *headers = 0;
+ http_header_table_t *ht;
+ vec_validate (headers, msg.data.headers_len - 1);
+ rv = svm_fifo_peek (ts->rx_fifo, msg.data.headers_offset,
+ msg.data.headers_len, headers);
+ ASSERT (rv == msg.data.headers_len);
+ if (http_parse_headers (headers, &ht))
+ {
+ start_send_data (hs, HTTP_STATUS_BAD_REQUEST);
+ vec_free (args.buf);
+ vec_free (headers);
+ goto done;
+ }
+ const char *accept_value =
+ http_get_header (ht, http_header_name_str (HTTP_HEADER_ACCEPT));
+ if (accept_value)
+ {
+ HCS_DBG ("client accept: %s", accept_value);
+ /* just for testing purpose, we don't care about precedence */
+ if (strstr (accept_value, "text/plain"))
+ args.plain_text = 1;
+ }
+ http_free_header_table (ht);
+ vec_free (headers);
+ }
args.hs_index = hs->session_index;
args.thread_index = ts->thread_index;
@@ -338,6 +448,9 @@ hcs_ts_rx_callback (session_t *ts)
sizeof (args));
else
alloc_cli_process (&args);
+
+done:
+ svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.len);
return 0;
}
@@ -372,7 +485,7 @@ hcs_ts_tx_callback (session_t *ts)
}
if (svm_fifo_set_event (ts->tx_fifo))
- session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+ session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
return 0;
}
@@ -488,6 +601,11 @@ hcs_attach ()
hcm->fifo_size ? hcm->fifo_size : 32 << 10;
a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hcm->prealloc_fifos;
+ if (hcm->appns_id)
+ {
+ a->namespace_id = hcm->appns_id;
+ a->options[APP_OPTIONS_NAMESPACE_SECRET] = hcm->appns_secret;
+ }
if (vnet_application_attach (a))
{
@@ -522,15 +640,15 @@ hcs_listen ()
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
hcs_main_t *hcm = &hcs_main;
vnet_listen_args_t _a, *a = &_a;
- char *uri = "tcp://0.0.0.0/80";
u8 need_crypto;
int rv;
+ char *uri;
clib_memset (a, 0, sizeof (*a));
a->app_index = hcm->app_index;
- if (hcm->uri)
- uri = (char *) hcm->uri;
+ uri = (char *) hcm->uri;
+ ASSERT (uri);
if (parse_uri (uri, &sep))
return -1;
@@ -542,15 +660,24 @@ hcs_listen ()
if (need_crypto)
{
- session_endpoint_alloc_ext_cfg (&a->sep_ext,
- TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
- a->sep_ext.ext_cfg->crypto.ckpair_index = hcm->ckpair_index;
+ transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg (
+ &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO,
+ sizeof (transport_endpt_crypto_cfg_t));
+ ext_cfg->crypto.ckpair_index = hcm->ckpair_index;
}
rv = vnet_listen (a);
+ if (rv == 0)
+ {
+ hcs_uri_map_t *map;
+ pool_get_zero (hcm->uri_map_pool, map);
+ map->uri = vec_dup (uri);
+ map->handle = a->handle;
+ hash_set_mem (hcm->index_by_uri, map->uri, map - hcm->uri_map_pool);
+ }
if (need_crypto)
- clib_mem_free (a->sep_ext.ext_cfg);
+ session_endpoint_free_ext_cfgs (&a->sep_ext);
return rv;
}
@@ -567,6 +694,43 @@ hcs_detach ()
}
static int
+hcs_unlisten ()
+{
+ hcs_main_t *hcm = &hcs_main;
+ vnet_unlisten_args_t _a, *a = &_a;
+ char *uri;
+ int rv = 0;
+ uword *value;
+
+ clib_memset (a, 0, sizeof (*a));
+ a->app_index = hcm->app_index;
+
+ uri = (char *) hcm->uri;
+ ASSERT (uri);
+
+ value = hash_get_mem (hcm->index_by_uri, uri);
+ if (value)
+ {
+ hcs_uri_map_t *map = pool_elt_at_index (hcm->uri_map_pool, *value);
+
+ a->handle = map->handle;
+ rv = vnet_unlisten (a);
+ if (rv == 0)
+ {
+ hash_unset_mem (hcm->index_by_uri, uri);
+ vec_free (map->uri);
+ pool_put (hcm->uri_map_pool, map);
+ if (pool_elts (hcm->uri_map_pool) == 0)
+ hcs_detach ();
+ }
+ }
+ else
+ return -1;
+
+ return rv;
+}
+
+static int
hcs_create (vlib_main_t *vm)
{
vlib_thread_main_t *vtm = vlib_get_thread_main ();
@@ -599,6 +763,8 @@ hcs_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
hcs_main_t *hcm = &hcs_main;
u64 seg_size;
int rv;
+ u32 listener_add = ~0;
+ clib_error_t *error = 0;
hcm->prealloc_fifos = 0;
hcm->private_segment_size = 0;
@@ -617,13 +783,32 @@ hcs_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
hcm->private_segment_size = seg_size;
else if (unformat (line_input, "fifo-size %d", &hcm->fifo_size))
hcm->fifo_size <<= 10;
- else if (unformat (line_input, "uri %s", &hcm->uri))
+ else if (unformat (line_input, "uri %_%v%_", &hcm->uri))
;
+ else if (unformat (line_input, "appns %_%v%_", &hcm->appns_id))
+ ;
+ else if (unformat (line_input, "secret %lu", &hcm->appns_secret))
+ ;
+ else if (unformat (line_input, "listener"))
+ {
+ if (unformat (line_input, "add"))
+ listener_add = 1;
+ else if (unformat (line_input, "del"))
+ listener_add = 0;
+ else
+ {
+ unformat_free (line_input);
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
else
{
unformat_free (line_input);
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
}
}
@@ -631,10 +816,43 @@ hcs_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
start_server:
+ if (hcm->uri == 0)
+ hcm->uri = format (0, "tcp://0.0.0.0/80");
+
if (hcm->app_index != (u32) ~0)
- return clib_error_return (0, "test http server is already running");
+ {
+ if (hcm->appns_id && (listener_add != ~0))
+ {
+ error = clib_error_return (
+ 0, "appns must not be specified for listener add/del");
+ goto done;
+ }
+ if (listener_add == 1)
+ {
+ if (hcs_listen ())
+ error =
+ clib_error_return (0, "failed to start listening %v", hcm->uri);
+ goto done;
+ }
+ else if (listener_add == 0)
+ {
+ rv = hcs_unlisten ();
+ if (rv != 0)
+ error = clib_error_return (
+ 0, "failed to stop listening %v, rv = %d", hcm->uri, rv);
+ goto done;
+ }
+ else
+ {
+ error = clib_error_return (0, "test http server is already running");
+ goto done;
+ }
+ }
- vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
+ session_enable_disable_args_t args = { .is_en = 1,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_RULE_TABLE };
+ vnet_session_enable_disable (vm, &args);
rv = hcs_create (vm);
switch (rv)
@@ -642,16 +860,23 @@ start_server:
case 0:
break;
default:
- return clib_error_return (0, "server_create returned %d", rv);
+ {
+ error = clib_error_return (0, "server_create returned %d", rv);
+ goto done;
+ }
}
- return 0;
+done:
+ vec_free (hcm->appns_id);
+ vec_free (hcm->uri);
+ return error;
}
VLIB_CLI_COMMAND (hcs_create_command, static) = {
.path = "http cli server",
.short_help = "http cli server [uri <uri>] [fifo-size <nbytes>] "
- "[private-segment-size <nMG>] [prealloc-fifos <n>]",
+ "[private-segment-size <nMG>] [prealloc-fifos <n>] "
+ "[listener <add|del>] [appns <app-ns> secret <appns-secret>]",
.function = hcs_create_command_fn,
};
@@ -662,6 +887,7 @@ hcs_main_init (vlib_main_t *vm)
hcs->app_index = ~0;
hcs->vlib_main = vm;
+ hcs->index_by_uri = hash_create_vec (0, sizeof (u8), sizeof (uword));
return 0;
}
diff --git a/src/plugins/hs_apps/http_client.c b/src/plugins/hs_apps/http_client.c
new file mode 100644
index 00000000000..05a87ec7de8
--- /dev/null
+++ b/src/plugins/hs_apps/http_client.c
@@ -0,0 +1,743 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#include <vnet/session/application.h>
+#include <vnet/session/application_interface.h>
+#include <vnet/session/session.h>
+#include <http/http.h>
+#include <http/http_header_names.h>
+#include <http/http_content_types.h>
+#include <http/http_status_codes.h>
+#include <vppinfra/unix.h>
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 session_index;
+ u32 thread_index;
+ u32 vpp_session_index;
+ u64 to_recv;
+ u8 is_closed;
+} hc_session_t;
+
+typedef struct
+{
+ hc_session_t *sessions;
+ u32 thread_index;
+ vlib_main_t *vlib_main;
+} hc_worker_t;
+
+typedef struct
+{
+ u32 app_index;
+ u32 cli_node_index;
+ u8 attached;
+ u8 *uri;
+ session_endpoint_cfg_t connect_sep;
+ u8 *target;
+ u8 *headers_buf;
+ u8 *data;
+ u64 data_offset;
+ hc_worker_t *wrk;
+ u8 *resp_headers;
+ u8 *http_response;
+ u8 *response_status;
+ http_header_ht_t *custom_header;
+ u8 is_file;
+ u8 use_ptr;
+ u8 *filename;
+ bool verbose;
+ f64 timeout;
+ http_req_method_t req_method;
+} hc_main_t;
+
+typedef enum
+{
+ HC_CONNECT_FAILED = 1,
+ HC_TRANSPORT_CLOSED,
+ HC_REPLY_RECEIVED,
+} hc_cli_signal_t;
+
+static hc_main_t hc_main;
+
+static inline hc_worker_t *
+hc_worker_get (u32 thread_index)
+{
+ return &hc_main.wrk[thread_index];
+}
+
+static inline hc_session_t *
+hc_session_get (u32 session_index, u32 thread_index)
+{
+ hc_worker_t *wrk = hc_worker_get (thread_index);
+ wrk->vlib_main = vlib_get_main_by_index (thread_index);
+ return pool_elt_at_index (wrk->sessions, session_index);
+}
+
+static void
+hc_ho_session_free (u32 hs_index)
+{
+ hc_worker_t *wrk = hc_worker_get (0);
+ pool_put_index (wrk->sessions, hs_index);
+}
+
+static hc_session_t *
+hc_session_alloc (hc_worker_t *wrk)
+{
+ hc_session_t *s;
+
+ pool_get_zero (wrk->sessions, s);
+ s->session_index = s - wrk->sessions;
+ s->thread_index = wrk->thread_index;
+
+ return s;
+}
+
+static int
+hc_session_connected_callback (u32 app_index, u32 hc_session_index,
+ session_t *s, session_error_t err)
+{
+ hc_main_t *hcm = &hc_main;
+ hc_session_t *hc_session, *new_hc_session;
+ hc_worker_t *wrk;
+ http_msg_t msg;
+ u64 to_send;
+ u32 n_enq;
+ u8 n_segs;
+ int rv;
+ http_header_ht_t *header;
+ http_header_t *req_headers = 0;
+ u32 new_hc_index;
+
+ HTTP_DBG (1, "ho hc_index: %d", hc_session_index);
+
+ if (err)
+ {
+ clib_warning ("hc_session_index[%d] connected error: %U",
+ hc_session_index, format_session_error, err);
+ vlib_process_signal_event_mt (hcm->wrk->vlib_main, hcm->cli_node_index,
+ HC_CONNECT_FAILED, 0);
+ return -1;
+ }
+
+ hc_session = hc_session_get (hc_session_index, 0);
+ wrk = hc_worker_get (s->thread_index);
+ new_hc_session = hc_session_alloc (wrk);
+ new_hc_index = new_hc_session->session_index;
+ clib_memcpy_fast (new_hc_session, hc_session, sizeof (*hc_session));
+ hc_session->vpp_session_index = s->session_index;
+
+ new_hc_session->session_index = new_hc_index;
+ new_hc_session->thread_index = s->thread_index;
+ new_hc_session->vpp_session_index = s->session_index;
+ HTTP_DBG (1, "new hc_index: %d", new_hc_session->session_index);
+ s->opaque = new_hc_index;
+
+ if (hcm->req_method == HTTP_REQ_POST)
+ {
+ if (hcm->is_file)
+ http_add_header (
+ &req_headers, http_header_name_token (HTTP_HEADER_CONTENT_TYPE),
+ http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM));
+ else
+ http_add_header (
+ &req_headers, http_header_name_token (HTTP_HEADER_CONTENT_TYPE),
+ http_content_type_token (HTTP_CONTENT_APP_X_WWW_FORM_URLENCODED));
+ }
+
+ vec_foreach (header, hcm->custom_header)
+ http_add_header (&req_headers, (const char *) header->name,
+ vec_len (header->name), (const char *) header->value,
+ vec_len (header->value));
+
+ hcm->headers_buf = http_serialize_headers (req_headers);
+ vec_free (req_headers);
+
+ msg.method_type = hcm->req_method;
+ if (hcm->req_method == HTTP_REQ_POST)
+ msg.data.body_len = vec_len (hcm->data);
+ else
+ msg.data.body_len = 0;
+
+ msg.type = HTTP_MSG_REQUEST;
+ /* request target */
+ msg.data.target_form = HTTP_TARGET_ORIGIN_FORM;
+ msg.data.target_path_len = vec_len (hcm->target);
+ /* custom headers */
+ msg.data.headers_len = vec_len (hcm->headers_buf);
+ /* total length */
+ msg.data.len =
+ msg.data.target_path_len + msg.data.headers_len + msg.data.body_len;
+
+ if (hcm->use_ptr)
+ {
+ uword target = pointer_to_uword (hcm->target);
+ uword headers = pointer_to_uword (hcm->headers_buf);
+ uword body = pointer_to_uword (hcm->data);
+ msg.data.type = HTTP_MSG_DATA_PTR;
+ svm_fifo_seg_t segs[4] = {
+ { (u8 *) &msg, sizeof (msg) },
+ { (u8 *) &target, sizeof (target) },
+ { (u8 *) &headers, sizeof (headers) },
+ { (u8 *) &body, sizeof (body) },
+ };
+
+ n_segs = (hcm->req_method == HTTP_REQ_GET) ? 3 : 4;
+ rv = svm_fifo_enqueue_segments (s->tx_fifo, segs, n_segs,
+ 0 /* allow partial */);
+ if (hcm->req_method == HTTP_REQ_POST)
+ ASSERT (rv == (sizeof (msg) + sizeof (target) + sizeof (headers) +
+ sizeof (body)));
+ else
+ ASSERT (rv == (sizeof (msg) + sizeof (target) + sizeof (headers)));
+ goto done;
+ }
+
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.target_path_offset = 0;
+ msg.data.headers_offset = msg.data.target_path_len;
+ msg.data.body_offset = msg.data.headers_offset + msg.data.headers_len;
+
+ rv = svm_fifo_enqueue (s->tx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ rv = svm_fifo_enqueue (s->tx_fifo, vec_len (hcm->target), hcm->target);
+ ASSERT (rv == vec_len (hcm->target));
+
+ rv = svm_fifo_enqueue (s->tx_fifo, vec_len (hcm->headers_buf),
+ hcm->headers_buf);
+ ASSERT (rv == msg.data.headers_len);
+
+ if (hcm->req_method == HTTP_REQ_POST)
+ {
+ to_send = vec_len (hcm->data);
+ n_enq = clib_min (svm_fifo_size (s->tx_fifo), to_send);
+
+ rv = svm_fifo_enqueue (s->tx_fifo, n_enq, hcm->data);
+ if (rv < to_send)
+ {
+ hcm->data_offset = (rv > 0) ? rv : 0;
+ svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ }
+ }
+
+done:
+ if (svm_fifo_set_event (s->tx_fifo))
+ session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX);
+
+ return 0;
+}
+
+static void
+hc_session_disconnect_callback (session_t *s)
+{
+ hc_main_t *hcm = &hc_main;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+ int rv;
+
+ a->handle = session_handle (s);
+ a->app_index = hcm->app_index;
+ if ((rv = vnet_disconnect_session (a)))
+ clib_warning ("warning: disconnect returned: %U", format_session_error,
+ rv);
+}
+
+static void
+hc_session_transport_closed_callback (session_t *s)
+{
+ hc_main_t *hcm = &hc_main;
+ vlib_process_signal_event_mt (hcm->wrk->vlib_main, hcm->cli_node_index,
+ HC_TRANSPORT_CLOSED, 0);
+}
+
+static void
+hc_ho_cleanup_callback (session_t *ts)
+{
+ HTTP_DBG (1, "ho hc_index: %d:", ts->opaque);
+ hc_ho_session_free (ts->opaque);
+}
+
+static void
+hc_session_reset_callback (session_t *s)
+{
+ hc_main_t *hcm = &hc_main;
+ hc_session_t *hc_session;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+ int rv;
+
+ hc_session = hc_session_get (s->opaque, s->thread_index);
+ hc_session->is_closed = 1;
+
+ a->handle = session_handle (s);
+ a->app_index = hcm->app_index;
+ if ((rv = vnet_disconnect_session (a)))
+ clib_warning ("warning: disconnect returned: %U", format_session_error,
+ rv);
+}
+
+static int
+hc_rx_callback (session_t *s)
+{
+ hc_main_t *hcm = &hc_main;
+ hc_session_t *hc_session;
+ http_msg_t msg;
+ int rv;
+
+ hc_session = hc_session_get (s->opaque, s->thread_index);
+
+ if (hc_session->is_closed)
+ {
+ clib_warning ("hc_session_index[%d] is closed", s->opaque);
+ return -1;
+ }
+
+ if (hc_session->to_recv == 0)
+ {
+ rv = svm_fifo_dequeue (s->rx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (msg.type != HTTP_MSG_REPLY)
+ {
+ clib_warning ("unexpected msg type %d", msg.type);
+ return -1;
+ }
+
+ if (msg.data.headers_len)
+ {
+ http_header_table_t *ht;
+ vec_validate (hcm->resp_headers, msg.data.headers_len - 1);
+ rv = svm_fifo_peek (s->rx_fifo, msg.data.headers_offset,
+ msg.data.headers_len, hcm->resp_headers);
+
+ ASSERT (rv == msg.data.headers_len);
+ HTTP_DBG (1, (char *) hcm->resp_headers);
+
+ if (http_parse_headers (hcm->resp_headers, &ht))
+ {
+ clib_warning ("invalid headers received");
+ return -1;
+ }
+ http_free_header_table (ht);
+
+ hcm->response_status =
+ format (0, "%U", format_http_status_code, msg.code);
+ }
+
+ if (msg.data.body_len == 0)
+ {
+ svm_fifo_dequeue_drop_all (s->rx_fifo);
+ goto done;
+ }
+
+ /* drop everything up to body */
+ svm_fifo_dequeue_drop (s->rx_fifo, msg.data.body_offset);
+ hc_session->to_recv = msg.data.body_len;
+ if (msg.code != HTTP_STATUS_OK && hc_session->to_recv == 0)
+ {
+ goto done;
+ }
+ vec_validate (hcm->http_response, msg.data.body_len - 1);
+ vec_reset_length (hcm->http_response);
+ }
+
+ u32 max_deq = svm_fifo_max_dequeue (s->rx_fifo);
+
+ u32 n_deq = clib_min (hc_session->to_recv, max_deq);
+ u32 curr = vec_len (hcm->http_response);
+ rv = svm_fifo_dequeue (s->rx_fifo, n_deq, hcm->http_response + curr);
+ if (rv < 0)
+ {
+ clib_warning ("app dequeue(n=%d) failed; rv = %d", n_deq, rv);
+ return -1;
+ }
+
+ ASSERT (rv == n_deq);
+ vec_set_len (hcm->http_response, curr + n_deq);
+ ASSERT (hc_session->to_recv >= rv);
+ hc_session->to_recv -= rv;
+
+done:
+ if (hc_session->to_recv == 0)
+ {
+ hc_session_disconnect_callback (s);
+ vlib_process_signal_event_mt (hcm->wrk->vlib_main, hcm->cli_node_index,
+ HC_REPLY_RECEIVED, 0);
+ }
+
+ return 0;
+}
+
+static int
+hc_tx_callback (session_t *s)
+{
+ hc_main_t *hcm = &hc_main;
+ u64 to_send;
+ int rv;
+
+ to_send = vec_len (hcm->data) - hcm->data_offset;
+ rv = svm_fifo_enqueue (s->tx_fifo, to_send, hcm->data + hcm->data_offset);
+
+ if (rv <= 0)
+ {
+ svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ return 0;
+ }
+
+ if (rv < to_send)
+ {
+ hcm->data_offset += rv;
+ svm_fifo_add_want_deq_ntf (s->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ }
+
+ if (svm_fifo_set_event (s->tx_fifo))
+ session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX);
+
+ return 0;
+}
+
+static session_cb_vft_t hc_session_cb_vft = {
+ .session_connected_callback = hc_session_connected_callback,
+ .session_disconnect_callback = hc_session_disconnect_callback,
+ .session_transport_closed_callback = hc_session_transport_closed_callback,
+ .session_reset_callback = hc_session_reset_callback,
+ .builtin_app_rx_callback = hc_rx_callback,
+ .builtin_app_tx_callback = hc_tx_callback,
+ .half_open_cleanup_callback = hc_ho_cleanup_callback,
+};
+
+static clib_error_t *
+hc_attach ()
+{
+ hc_main_t *hcm = &hc_main;
+ vnet_app_attach_args_t _a, *a = &_a;
+ u64 options[18];
+ int rv;
+
+ clib_memset (a, 0, sizeof (*a));
+ clib_memset (options, 0, sizeof (options));
+
+ a->api_client_index = APP_INVALID_INDEX;
+ a->name = format (0, "http_client");
+ a->session_cb_vft = &hc_session_cb_vft;
+ a->options = options;
+ a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
+
+ if ((rv = vnet_application_attach (a)))
+ return clib_error_return (0, "attach returned: %U", format_session_error,
+ rv);
+
+ hcm->app_index = a->app_index;
+ vec_free (a->name);
+ hcm->attached = 1;
+
+ return 0;
+}
+
+static int
+hc_connect_rpc (void *rpc_args)
+{
+ vnet_connect_args_t *a = rpc_args;
+ int rv;
+
+ rv = vnet_connect (a);
+ if (rv > 0)
+ clib_warning (0, "connect returned: %U", format_session_error, rv);
+
+ vec_free (a);
+ return rv;
+}
+
+static void
+hc_connect ()
+{
+ hc_main_t *hcm = &hc_main;
+ vnet_connect_args_t *a = 0;
+ hc_worker_t *wrk;
+ hc_session_t *hc_session;
+
+ vec_validate (a, 0);
+ clib_memset (a, 0, sizeof (a[0]));
+
+ clib_memcpy (&a->sep_ext, &hcm->connect_sep, sizeof (hcm->connect_sep));
+ a->app_index = hcm->app_index;
+
+ /* allocate http session on main thread */
+ wrk = hc_worker_get (0);
+ hc_session = hc_session_alloc (wrk);
+ a->api_context = hc_session->session_index;
+
+ session_send_rpc_evt_to_thread_force (transport_cl_thread (), hc_connect_rpc,
+ a);
+}
+
+static clib_error_t *
+hc_run (vlib_main_t *vm)
+{
+ hc_main_t *hcm = &hc_main;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ u32 num_threads;
+ hc_worker_t *wrk;
+ uword event_type, *event_data = 0;
+ clib_error_t *err;
+ FILE *file_ptr;
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+ vec_validate (hcm->wrk, num_threads - 1);
+ vec_foreach (wrk, hcm->wrk)
+ wrk->thread_index = wrk - hcm->wrk;
+
+ if ((err = hc_attach ()))
+ return clib_error_return (0, "http client attach: %U", format_clib_error,
+ err);
+
+ hc_connect ();
+
+ vlib_process_wait_for_event_or_clock (vm, hcm->timeout);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case ~0:
+ err = clib_error_return (0, "error: timeout");
+ break;
+ case HC_CONNECT_FAILED:
+ err = clib_error_return (0, "error: failed to connect");
+ break;
+ case HC_TRANSPORT_CLOSED:
+ err = clib_error_return (0, "error: transport closed");
+ break;
+ case HC_REPLY_RECEIVED:
+ if (hcm->filename)
+ {
+ file_ptr =
+ fopen ((char *) format (0, "/tmp/%v", hcm->filename), "w");
+ if (file_ptr == NULL)
+ {
+ vlib_cli_output (vm, "couldn't open file %v", hcm->filename);
+ }
+ else
+ {
+ fprintf (file_ptr, "< %s\n< %s\n< %s", hcm->response_status,
+ hcm->resp_headers, hcm->http_response);
+ fclose (file_ptr);
+ vlib_cli_output (vm, "file saved (/tmp/%v)", hcm->filename);
+ }
+ }
+ if (hcm->verbose)
+ vlib_cli_output (vm, "< %v\n< %v", hcm->response_status,
+ hcm->resp_headers);
+ vlib_cli_output (vm, "<\n%v", hcm->http_response);
+
+ break;
+ default:
+ err = clib_error_return (0, "error: unexpected event %d", event_type);
+ break;
+ }
+
+ vec_free (event_data);
+ return err;
+}
+
+static int
+hc_detach ()
+{
+ hc_main_t *hcm = &hc_main;
+ vnet_app_detach_args_t _da, *da = &_da;
+ int rv;
+
+ if (!hcm->attached)
+ return 0;
+
+ da->app_index = hcm->app_index;
+ da->api_client_index = APP_INVALID_INDEX;
+ rv = vnet_application_detach (da);
+ hcm->attached = 0;
+ hcm->app_index = APP_INVALID_INDEX;
+
+ return rv;
+}
+
+static void
+hcc_worker_cleanup (hc_worker_t *wrk)
+{
+ pool_free (wrk->sessions);
+}
+
+static void
+hc_cleanup ()
+{
+ hc_main_t *hcm = &hc_main;
+ hc_worker_t *wrk;
+ http_header_ht_t *header;
+
+ vec_foreach (wrk, hcm->wrk)
+ hcc_worker_cleanup (wrk);
+
+ vec_free (hcm->uri);
+ vec_free (hcm->target);
+ vec_free (hcm->headers_buf);
+ vec_free (hcm->data);
+ vec_free (hcm->resp_headers);
+ vec_free (hcm->http_response);
+ vec_free (hcm->response_status);
+ vec_free (hcm->wrk);
+ vec_free (hcm->filename);
+ vec_foreach (header, hcm->custom_header)
+ {
+ vec_free (header->name);
+ vec_free (header->value);
+ }
+ vec_free (hcm->custom_header);
+}
+
+static clib_error_t *
+hc_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ hc_main_t *hcm = &hc_main;
+ clib_error_t *err = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 *path = 0;
+ u8 *file_data;
+ http_header_ht_t new_header;
+ u8 *name;
+ u8 *value;
+ int rv;
+ hcm->timeout = 10;
+
+ if (hcm->attached)
+ return clib_error_return (0, "failed: already running!");
+
+ hcm->use_ptr = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "expected required arguments");
+
+ hcm->req_method =
+ (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) &&
+ unformat (line_input, "post") ?
+ HTTP_REQ_POST :
+ HTTP_REQ_GET;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "uri %s", &hcm->uri))
+ ;
+ else if (unformat (line_input, "data %v", &hcm->data))
+ hcm->is_file = 0;
+ else if (unformat (line_input, "target %s", &hcm->target))
+ ;
+ else if (unformat (line_input, "file %s", &path))
+ hcm->is_file = 1;
+ else if (unformat (line_input, "use-ptr"))
+ hcm->use_ptr = 1;
+ else if (unformat (line_input, "save-to %s", &hcm->filename))
+ {
+ if (strstr ((char *) hcm->filename, "..") ||
+ strchr ((char *) hcm->filename, '/'))
+ {
+ err = clib_error_return (
+ 0, "illegal characters in filename '%v'", hcm->filename);
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "header %v:%v", &name, &value))
+ {
+ new_header.name = name;
+ new_header.value = value;
+ vec_add1 (hcm->custom_header, new_header);
+ }
+ else if (unformat (line_input, "verbose"))
+ hcm->verbose = true;
+ else if (unformat (line_input, "timeout %f", &hcm->timeout))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (!hcm->uri)
+ {
+ err = clib_error_return (0, "URI not defined");
+ goto done;
+ }
+ if (!hcm->target)
+ {
+ err = clib_error_return (0, "target not defined");
+ goto done;
+ }
+ if (!hcm->data && hcm->req_method == HTTP_REQ_POST)
+ {
+ if (path)
+ {
+ err = clib_file_contents ((char *) path, &file_data);
+ if (err)
+ goto done;
+ hcm->data = file_data;
+ }
+ else
+ {
+ err = clib_error_return (0, "data not defined");
+ goto done;
+ }
+ }
+
+ if ((rv = parse_uri ((char *) hcm->uri, &hcm->connect_sep)))
+ {
+ err =
+ clib_error_return (0, "URI parse error: %U", format_session_error, rv);
+ goto done;
+ }
+
+ session_enable_disable_args_t args = { .is_en = 1,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_RULE_TABLE };
+ vlib_worker_thread_barrier_sync (vm);
+ vnet_session_enable_disable (vm, &args);
+ vlib_worker_thread_barrier_release (vm);
+
+ hcm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index;
+
+ err = hc_run (vm);
+
+ if ((rv = hc_detach ()))
+ {
+ /* don't override last error */
+ if (!err)
+ err = clib_error_return (0, "detach returned: %U",
+ format_session_error, rv);
+ else
+ clib_warning ("warning: detach returned: %U", format_session_error,
+ rv);
+ }
+
+done:
+ vec_free (path);
+ hc_cleanup ();
+ unformat_free (line_input);
+ return err;
+}
+
+VLIB_CLI_COMMAND (hc_command, static) = {
+ .path = "http client",
+ .short_help = "[post] uri http://<ip-addr> target <origin-form> "
+ "[data <form-urlencoded> | file <file-path>] [use-ptr] "
+ "[save-to <filename>] [header <Key:Value>] [verbose] "
+ "[timeout <seconds> (default = 10)]",
+ .function = hc_command_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+hc_main_init ()
+{
+ hc_main_t *hcm = &hc_main;
+ hcm->app_index = APP_INVALID_INDEX;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (hc_main_init);
diff --git a/src/plugins/hs_apps/http_client_cli.c b/src/plugins/hs_apps/http_client_cli.c
index 085a2b69bf7..861af7f03e2 100644
--- a/src/plugins/hs_apps/http_client_cli.c
+++ b/src/plugins/hs_apps/http_client_cli.c
@@ -13,11 +13,12 @@
* limitations under the License.
*/
-#include <vnet/session/application.h>
#include <vnet/session/application_interface.h>
#include <vnet/session/session.h>
#include <http/http.h>
-#include <hs_apps/http_cli.h>
+#include <http/http_header_names.h>
+#include <http/http_content_types.h>
+#include <http/http_status_codes.h>
#define HCC_DEBUG 0
@@ -34,14 +35,14 @@ typedef struct
u32 thread_index;
u32 rx_offset;
u32 vpp_session_index;
- u32 to_recv;
+ u64 to_recv;
u8 is_closed;
+ http_header_t *req_headers;
} hcc_session_t;
typedef struct
{
hcc_session_t *sessions;
- u8 *rx_buf;
u32 thread_index;
} hcc_worker_t;
@@ -68,6 +69,8 @@ typedef struct
typedef enum
{
HCC_REPLY_RECEIVED = 100,
+ HCC_TRANSPORT_CLOSED,
+ HCC_CONNECT_FAILED,
} hcc_cli_signal_t;
static hcc_main_t hcc_main;
@@ -96,10 +99,10 @@ hcc_session_get (u32 hs_index, u32 thread_index)
}
static void
-hcc_session_free (u32 thread_index, hcc_session_t *hs)
+hcc_ho_session_free (u32 hs_index)
{
- hcc_worker_t *wrk = hcc_worker_get (thread_index);
- pool_put (wrk->sessions, hs);
+ hcc_worker_t *wrk = hcc_worker_get (0);
+ pool_put_index (wrk->sessions, hs_index);
}
static int
@@ -128,43 +131,68 @@ hcc_ts_connected_callback (u32 app_index, u32 hc_index, session_t *as,
hcc_session_t *hs, *new_hs;
hcc_worker_t *wrk;
http_msg_t msg;
+ u8 *headers_buf;
+ u32 new_hs_index;
int rv;
- HCC_DBG ("hc_index: %d", hc_index);
+ HCC_DBG ("ho hc_index: %d", hc_index);
if (err)
{
clib_warning ("connected error: hc_index(%d): %U", hc_index,
format_session_error, err);
+ vlib_process_signal_event_mt (hcm->vlib_main, hcm->cli_node_index,
+ HCC_CONNECT_FAILED, 0);
return -1;
}
- /* TODO delete half open session once the support is added in http layer */
hs = hcc_session_get (hc_index, 0);
wrk = hcc_worker_get (as->thread_index);
new_hs = hcc_session_alloc (wrk);
+ new_hs_index = new_hs->session_index;
clib_memcpy_fast (new_hs, hs, sizeof (*hs));
-
- hs->vpp_session_index = as->session_index;
+ new_hs->session_index = new_hs_index;
+ new_hs->thread_index = as->thread_index;
+ new_hs->vpp_session_index = as->session_index;
+ HCC_DBG ("new hc_index: %d", new_hs->session_index);
+ as->opaque = new_hs_index;
+
+ http_add_header (&new_hs->req_headers,
+ http_header_name_token (HTTP_HEADER_ACCEPT),
+ http_content_type_token (HTTP_CONTENT_TEXT_HTML));
+ headers_buf = http_serialize_headers (new_hs->req_headers);
+ vec_free (new_hs->req_headers);
msg.type = HTTP_MSG_REQUEST;
msg.method_type = HTTP_REQ_GET;
- msg.content_type = HTTP_CONTENT_TEXT_HTML;
+ /* request target */
+ msg.data.target_form = HTTP_TARGET_ORIGIN_FORM;
+ msg.data.target_path_offset = 0;
+ msg.data.target_path_len = vec_len (hcm->http_query);
+ /* custom headers */
+ msg.data.headers_offset = msg.data.target_path_len;
+ msg.data.headers_len = vec_len (headers_buf);
+ /* request body */
+ msg.data.body_len = 0;
+ /* data type and total length */
msg.data.type = HTTP_MSG_DATA_INLINE;
- msg.data.len = vec_len (hcm->http_query);
+ msg.data.len =
+ msg.data.target_path_len + msg.data.headers_len + msg.data.body_len;
- svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) },
- { hcm->http_query, vec_len (hcm->http_query) } };
+ svm_fifo_seg_t segs[3] = { { (u8 *) &msg, sizeof (msg) },
+ { hcm->http_query, vec_len (hcm->http_query) },
+ { headers_buf, vec_len (headers_buf) } };
- rv = svm_fifo_enqueue_segments (as->tx_fifo, segs, 2, 0 /* allow partial */);
- if (rv < 0 || rv != sizeof (msg) + vec_len (hcm->http_query))
+ rv = svm_fifo_enqueue_segments (as->tx_fifo, segs, 3, 0 /* allow partial */);
+ vec_free (headers_buf);
+ if (rv < 0 || rv != sizeof (msg) + msg.data.len)
{
clib_warning ("failed app enqueue");
return -1;
}
if (svm_fifo_set_event (as->tx_fifo))
- session_send_io_evt_to_thread (as->tx_fifo, SESSION_IO_EVT_TX);
+ session_program_tx_io_evt (as->handle, SESSION_IO_EVT_TX);
return 0;
}
@@ -219,23 +247,32 @@ hcc_ts_rx_callback (session_t *ts)
if (hs->to_recv == 0)
{
+ /* read the http message header */
rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
ASSERT (rv == sizeof (msg));
- if (msg.type != HTTP_MSG_REPLY || msg.code != HTTP_STATUS_OK)
+ if (msg.type != HTTP_MSG_REPLY)
{
clib_warning ("unexpected msg type %d", msg.type);
return 0;
}
- vec_validate (hcm->http_response, msg.data.len - 1);
+ /* drop everything up to body */
+ svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.body_offset);
+ hs->to_recv = msg.data.body_len;
+ if (msg.code != HTTP_STATUS_OK && hs->to_recv == 0)
+ {
+ hcm->http_response = format (0, "request failed, response code: %U",
+ format_http_status_code, msg.code);
+ goto done;
+ }
+ vec_validate (hcm->http_response, msg.data.body_len - 1);
vec_reset_length (hcm->http_response);
- hs->to_recv = msg.data.len;
}
u32 max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
u32 n_deq = clib_min (hs->to_recv, max_deq);
- u32 curr = vec_len (hcm->http_response);
+ u64 curr = vec_len (hcm->http_response);
rv = svm_fifo_dequeue (ts->rx_fifo, n_deq, hcm->http_response + curr);
if (rv < 0)
{
@@ -249,10 +286,12 @@ hcc_ts_rx_callback (session_t *ts)
vec_set_len (hcm->http_response, curr + n_deq);
ASSERT (hs->to_recv >= rv);
hs->to_recv -= rv;
- HCC_DBG ("app rcvd %d, remains %d", rv, hs->to_recv);
+ HCC_DBG ("app rcvd %d, remains %llu", rv, hs->to_recv);
+done:
if (hs->to_recv == 0)
{
+ HCC_DBG ("all data received, going to disconnect");
hcc_session_disconnect (ts);
vlib_process_signal_event_mt (hcm->vlib_main, hcm->cli_node_index,
HCC_REPLY_RECEIVED, 0);
@@ -262,15 +301,21 @@ hcc_ts_rx_callback (session_t *ts)
}
static void
-hcc_ts_cleanup_callback (session_t *s, session_cleanup_ntf_t ntf)
+hcc_ts_transport_closed (session_t *s)
{
- hcc_session_t *hs;
+ hcc_main_t *hcm = &hcc_main;
- hs = hcc_session_get (s->thread_index, s->opaque);
- if (!hs)
- return;
+ HCC_DBG ("transport closed");
- hcc_session_free (s->thread_index, hs);
+ vlib_process_signal_event_mt (hcm->vlib_main, hcm->cli_node_index,
+ HCC_TRANSPORT_CLOSED, 0);
+}
+
+static void
+hcc_ho_cleanup_callback (session_t *ts)
+{
+ HCC_DBG ("ho hc_index: %d:", ts->opaque);
+ hcc_ho_session_free (ts->opaque);
}
static session_cb_vft_t hcc_session_cb_vft = {
@@ -280,7 +325,8 @@ static session_cb_vft_t hcc_session_cb_vft = {
.builtin_app_rx_callback = hcc_ts_rx_callback,
.builtin_app_tx_callback = hcc_ts_tx_callback,
.session_reset_callback = hcc_ts_reset_callback,
- .session_cleanup_callback = hcc_ts_cleanup_callback,
+ .session_transport_closed_callback = hcc_ts_transport_closed,
+ .half_open_cleanup_callback = hcc_ho_cleanup_callback,
};
static clib_error_t *
@@ -335,6 +381,7 @@ hcc_connect_rpc (void *rpc_args)
if (rv)
clib_warning (0, "connect returned: %U", format_session_error, rv);
+ session_endpoint_free_ext_cfgs (&a->sep_ext);
vec_free (a);
return rv;
}
@@ -353,6 +400,7 @@ hcc_connect ()
hcc_main_t *hcm = &hcc_main;
hcc_worker_t *wrk;
hcc_session_t *hs;
+ transport_endpt_ext_cfg_t *ext_cfg;
vec_validate (a, 0);
clib_memset (a, 0, sizeof (a[0]));
@@ -360,6 +408,11 @@ hcc_connect ()
clib_memcpy (&a->sep_ext, &hcm->connect_sep, sizeof (hcm->connect_sep));
a->app_index = hcm->app_index;
+ /* set http (response) timeout to 10 seconds */
+ ext_cfg = session_endpoint_add_ext_cfg (
+ &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (ext_cfg->opaque));
+ ext_cfg->opaque = 10;
+
/* allocate http session on main thread */
wrk = hcc_worker_get (0);
hs = hcc_session_alloc (wrk);
@@ -380,7 +433,7 @@ hcc_run (vlib_main_t *vm, int print_output)
hcc_worker_t *wrk;
num_threads = 1 /* main thread */ + vtm->n_threads;
- vec_validate (hcm->wrk, num_threads);
+ vec_validate (hcm->wrk, num_threads - 1);
vec_foreach (wrk, hcm->wrk)
{
wrk->thread_index = wrk - hcm->wrk;
@@ -409,7 +462,12 @@ hcc_run (vlib_main_t *vm, int print_output)
case HCC_REPLY_RECEIVED:
if (print_output)
vlib_cli_output (vm, "%v", hcm->http_response);
- vec_free (hcm->http_response);
+ break;
+ case HCC_TRANSPORT_CLOSED:
+ err = clib_error_return (0, "error, transport closed");
+ break;
+ case HCC_CONNECT_FAILED:
+ err = clib_error_return (0, "failed to connect");
break;
default:
err = clib_error_return (0, "unexpected event %d", event_type);
@@ -440,6 +498,28 @@ hcc_detach ()
return rv;
}
+static void
+hcc_worker_cleanup (hcc_worker_t *wrk)
+{
+ pool_free (wrk->sessions);
+}
+
+static void
+hcc_cleanup ()
+{
+ hcc_main_t *hcm = &hcc_main;
+ hcc_worker_t *wrk;
+
+ vec_foreach (wrk, hcm->wrk)
+ hcc_worker_cleanup (wrk);
+
+ vec_free (hcm->uri);
+ vec_free (hcm->http_query);
+ vec_free (hcm->http_response);
+ vec_free (hcm->appns_id);
+ vec_free (hcm->wrk);
+}
+
static clib_error_t *
hcc_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
@@ -489,7 +569,6 @@ hcc_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
}
- vec_free (hcm->appns_id);
hcm->appns_id = appns_id;
hcm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index;
@@ -505,8 +584,11 @@ hcc_command_fn (vlib_main_t *vm, unformat_input_t *input,
goto done;
}
+ session_enable_disable_args_t args = { .is_en = 1,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_RULE_TABLE };
vlib_worker_thread_barrier_sync (vm);
- vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */);
+ vnet_session_enable_disable (vm, &args);
vlib_worker_thread_barrier_release (vm);
err = hcc_run (vm, print_output);
@@ -520,8 +602,7 @@ hcc_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
done:
- vec_free (hcm->uri);
- vec_free (hcm->http_query);
+ hcc_cleanup ();
unformat_free (line_input);
return err;
}
diff --git a/src/plugins/hs_apps/http_tps.c b/src/plugins/hs_apps/http_tps.c
index 920f7ea731f..a40a31caf63 100644
--- a/src/plugins/hs_apps/http_tps.c
+++ b/src/plugins/hs_apps/http_tps.c
@@ -17,6 +17,10 @@
#include <vnet/session/application_interface.h>
#include <vnet/session/session.h>
#include <http/http.h>
+#include <http/http_header_names.h>
+#include <http/http_content_types.h>
+
+#define HTS_RX_BUF_SIZE (64 << 10)
typedef struct
{
@@ -26,6 +30,8 @@ typedef struct
u64 data_len;
u64 data_offset;
u32 vpp_session_index;
+ u64 left_recv;
+ u64 total_recv;
union
{
/** threshold after which connection is closed */
@@ -34,6 +40,8 @@ typedef struct
u32 close_rate;
};
u8 *uri;
+ u8 *rx_buf;
+ http_header_t *resp_headers;
} hts_session_t;
typedef struct hts_listen_cfg_
@@ -102,6 +110,8 @@ hts_session_free (hts_session_t *hs)
if (htm->debug_level > 0)
clib_warning ("Freeing session %u", hs->session_index);
+ vec_free (hs->rx_buf);
+
if (CLIB_DEBUG)
clib_memset (hs, 0xfa, sizeof (*hs));
@@ -151,7 +161,7 @@ hts_session_tx_zc (hts_session_t *hs, session_t *ts)
svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
if (svm_fifo_set_event (ts->tx_fifo))
- session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+ session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
}
static void
@@ -198,7 +208,7 @@ hts_session_tx_no_zc (hts_session_t *hs, session_t *ts)
svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
if (svm_fifo_set_event (ts->tx_fifo))
- session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+ session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
}
static inline void
@@ -223,22 +233,46 @@ hts_start_send_data (hts_session_t *hs, http_status_code_t status)
{
http_msg_t msg;
session_t *ts;
+ u8 *headers_buf = 0;
+ u32 n_segs = 1;
+ svm_fifo_seg_t seg[2];
int rv;
+ if (vec_len (hs->resp_headers))
+ {
+ headers_buf = http_serialize_headers (hs->resp_headers);
+ vec_free (hs->resp_headers);
+ msg.data.headers_offset = 0;
+ msg.data.headers_len = vec_len (headers_buf);
+ seg[1].data = headers_buf;
+ seg[1].len = msg.data.headers_len;
+ n_segs = 2;
+ }
+ else
+ {
+ msg.data.headers_offset = 0;
+ msg.data.headers_len = 0;
+ }
+
msg.type = HTTP_MSG_REPLY;
msg.code = status;
- msg.content_type = HTTP_CONTENT_APP_OCTET_STREAM;
msg.data.type = HTTP_MSG_DATA_INLINE;
- msg.data.len = hs->data_len;
+ msg.data.body_len = hs->data_len;
+ msg.data.body_offset = msg.data.headers_len;
+ msg.data.len = msg.data.body_len + msg.data.headers_len;
+ seg[0].data = (u8 *) &msg;
+ seg[0].len = sizeof (msg);
ts = session_get (hs->vpp_session_index, hs->thread_index);
- rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg);
- ASSERT (rv == sizeof (msg));
+ rv = svm_fifo_enqueue_segments (ts->tx_fifo, seg, n_segs,
+ 0 /* allow partial */);
+ vec_free (headers_buf);
+ ASSERT (rv == (sizeof (msg) + msg.data.headers_len));
- if (!msg.data.len)
+ if (!msg.data.body_len)
{
if (svm_fifo_set_event (ts->tx_fifo))
- session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+ session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
return;
}
@@ -246,7 +280,7 @@ hts_start_send_data (hts_session_t *hs, http_status_code_t status)
}
static int
-try_test_file (hts_session_t *hs, u8 *request)
+try_test_file (hts_session_t *hs, u8 *target)
{
char *test_str = "test_file";
hts_main_t *htm = &hts_main;
@@ -254,10 +288,10 @@ try_test_file (hts_session_t *hs, u8 *request)
uword file_size;
int rc = 0;
- if (memcmp (request, test_str, clib_strnlen (test_str, 9)))
+ if (memcmp (target, test_str, clib_strnlen (test_str, 9)))
return -1;
- unformat_init_vector (&input, vec_dup (request));
+ unformat_init_vector (&input, vec_dup (target));
if (!unformat (&input, "test_file_%U", unformat_memory_size, &file_size))
{
rc = -1;
@@ -286,6 +320,10 @@ try_test_file (hts_session_t *hs, u8 *request)
}
}
+ http_add_header (&hs->resp_headers,
+ http_header_name_token (HTTP_HEADER_CONTENT_TYPE),
+ http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM));
+
hts_start_send_data (hs, HTTP_STATUS_OK);
done:
@@ -294,39 +332,121 @@ done:
return rc;
}
+static inline void
+hts_session_rx_body (hts_session_t *hs, session_t *ts)
+{
+ hts_main_t *htm = &hts_main;
+ u32 n_deq;
+ int rv;
+
+ n_deq = svm_fifo_max_dequeue (ts->rx_fifo);
+ if (!htm->no_zc)
+ {
+ svm_fifo_dequeue_drop_all (ts->rx_fifo);
+ }
+ else
+ {
+ n_deq = clib_min (n_deq, HTS_RX_BUF_SIZE);
+ rv = svm_fifo_dequeue (ts->rx_fifo, n_deq, hs->rx_buf);
+ ASSERT (rv == n_deq);
+ }
+ hs->left_recv -= n_deq;
+
+ if (hs->close_threshold > 0)
+ {
+ if ((f64) (hs->total_recv - hs->left_recv) / hs->total_recv >
+ hs->close_threshold)
+ hts_disconnect_transport (hs);
+ }
+
+ if (hs->left_recv == 0)
+ {
+ hts_start_send_data (hs, HTTP_STATUS_OK);
+ vec_free (hs->rx_buf);
+ }
+}
+
static int
hts_ts_rx_callback (session_t *ts)
{
+ hts_main_t *htm = &hts_main;
hts_session_t *hs;
- u8 *request = 0;
+ u8 *target = 0;
http_msg_t msg;
int rv;
hs = hts_session_get (ts->thread_index, ts->opaque);
- /* Read the http message header */
- rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
- ASSERT (rv == sizeof (msg));
-
- if (msg.type != HTTP_MSG_REQUEST || msg.method_type != HTTP_REQ_GET)
+ if (hs->left_recv == 0)
{
- hts_start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED);
- goto done;
- }
+ hs->data_len = 0;
+ hs->resp_headers = 0;
+ hs->rx_buf = 0;
- if (!msg.data.len)
- {
- hts_start_send_data (hs, HTTP_STATUS_BAD_REQUEST);
- goto done;
- }
+ /* Read the http message header */
+ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (msg.type != HTTP_MSG_REQUEST)
+ {
+ hts_start_send_data (hs, HTTP_STATUS_INTERNAL_ERROR);
+ goto done;
+ }
+ if (msg.method_type != HTTP_REQ_GET && msg.method_type != HTTP_REQ_POST)
+ {
+ http_add_header (&hs->resp_headers,
+ http_header_name_token (HTTP_HEADER_ALLOW),
+ http_token_lit ("GET, POST"));
+ hts_start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED);
+ goto done;
+ }
- vec_validate (request, msg.data.len - 1);
- rv = svm_fifo_dequeue (ts->rx_fifo, msg.data.len, request);
+ if (msg.data.target_path_len == 0 ||
+ msg.data.target_form != HTTP_TARGET_ORIGIN_FORM)
+ {
+ hts_start_send_data (hs, HTTP_STATUS_BAD_REQUEST);
+ goto done;
+ }
- if (try_test_file (hs, request))
- hts_start_send_data (hs, HTTP_STATUS_NOT_FOUND);
+ vec_validate (target, msg.data.target_path_len - 1);
+ rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_path_offset,
+ msg.data.target_path_len, target);
+ ASSERT (rv == msg.data.target_path_len);
-done:
+ if (htm->debug_level)
+ clib_warning ("%s request target: %v",
+ msg.method_type == HTTP_REQ_GET ? "GET" : "POST",
+ target);
+
+ if (msg.method_type == HTTP_REQ_GET)
+ {
+ if (try_test_file (hs, target))
+ hts_start_send_data (hs, HTTP_STATUS_NOT_FOUND);
+ vec_free (target);
+ }
+ else
+ {
+ vec_free (target);
+ if (!msg.data.body_len)
+ {
+ hts_start_send_data (hs, HTTP_STATUS_BAD_REQUEST);
+ goto done;
+ }
+ /* drop everything up to body */
+ svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.body_offset);
+ hs->left_recv = msg.data.body_len;
+ hs->total_recv = msg.data.body_len;
+ if (htm->no_zc)
+ vec_validate (hs->rx_buf, HTS_RX_BUF_SIZE - 1);
+ hts_session_rx_body (hs, ts);
+ return 0;
+ }
+
+ done:
+ svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.len);
+ }
+ else
+ hts_session_rx_body (hs, ts);
return 0;
}
@@ -354,6 +474,7 @@ hts_ts_accept_callback (session_t *ts)
hs = hts_session_alloc (ts->thread_index);
hs->vpp_session_index = ts->session_index;
+ hs->left_recv = 0;
ts->opaque = hs->session_index;
ts->session_state = SESSION_STATE_READY;
@@ -520,15 +641,16 @@ hts_start_listen (hts_main_t *htm, session_endpoint_cfg_t *sep, u8 *uri,
if (need_crypto)
{
- session_endpoint_alloc_ext_cfg (&a->sep_ext,
- TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
- a->sep_ext.ext_cfg->crypto.ckpair_index = htm->ckpair_index;
+ transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg (
+ &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO,
+ sizeof (transport_endpt_crypto_cfg_t));
+ ext_cfg->crypto.ckpair_index = htm->ckpair_index;
}
rv = vnet_listen (a);
if (need_crypto)
- clib_mem_free (a->sep_ext.ext_cfg);
+ session_endpoint_free_ext_cfgs (&a->sep_ext);
if (rv)
return rv;
@@ -717,7 +839,10 @@ start_server:
if (htm->app_index == (u32) ~0)
{
- vnet_session_enable_disable (vm, 1 /* is_enable */);
+ session_enable_disable_args_t args = { .is_en = 1,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_RULE_TABLE };
+ vnet_session_enable_disable (vm, &args);
if (hts_create (vm))
{
diff --git a/src/plugins/hs_apps/proxy.c b/src/plugins/hs_apps/proxy.c
index e8fedf921a5..7079da5eb94 100644
--- a/src/plugins/hs_apps/proxy.c
+++ b/src/plugins/hs_apps/proxy.c
@@ -19,50 +19,145 @@
#include <vnet/session/application_interface.h>
#include <hs_apps/proxy.h>
#include <vnet/tcp/tcp.h>
+#include <http/http.h>
+#include <http/http_header_names.h>
proxy_main_t proxy_main;
#define TCP_MSS 1460
-typedef struct
+static proxy_session_side_ctx_t *
+proxy_session_side_ctx_alloc (proxy_worker_t *wrk)
{
- session_endpoint_cfg_t sep;
- u32 app_index;
- u32 api_context;
-} proxy_connect_args_t;
+ proxy_session_side_ctx_t *ctx;
+
+ pool_get_zero (wrk->ctx_pool, ctx);
+ ctx->sc_index = ctx - wrk->ctx_pool;
+ ctx->ps_index = ~0;
+
+ return ctx;
+}
static void
-proxy_cb_fn (void *data, u32 data_len)
+proxy_session_side_ctx_free (proxy_worker_t *wrk,
+ proxy_session_side_ctx_t *ctx)
{
- proxy_connect_args_t *pa = (proxy_connect_args_t *) data;
- vnet_connect_args_t a;
+ pool_put (wrk->ctx_pool, ctx);
+}
- clib_memset (&a, 0, sizeof (a));
- a.api_context = pa->api_context;
- a.app_index = pa->app_index;
- clib_memcpy (&a.sep_ext, &pa->sep, sizeof (pa->sep));
- vnet_connect (&a);
- if (a.sep_ext.ext_cfg)
- clib_mem_free (a.sep_ext.ext_cfg);
+static proxy_session_side_ctx_t *
+proxy_session_side_ctx_get (proxy_worker_t *wrk, u32 ctx_index)
+{
+ return pool_elt_at_index (wrk->ctx_pool, ctx_index);
}
static void
-proxy_call_main_thread (vnet_connect_args_t * a)
+proxy_send_http_resp (session_t *s, http_status_code_t sc,
+ http_header_t *resp_headers)
{
- if (vlib_get_thread_index () == 0)
+ http_msg_t msg;
+ int rv;
+ u8 *headers_buf = 0;
+
+ if (vec_len (resp_headers))
{
- vnet_connect (a);
- if (a->sep_ext.ext_cfg)
- clib_mem_free (a->sep_ext.ext_cfg);
+ headers_buf = http_serialize_headers (resp_headers);
+ msg.data.len = msg.data.headers_len = vec_len (headers_buf);
}
else
+ msg.data.len = msg.data.headers_len = 0;
+
+ msg.type = HTTP_MSG_REPLY;
+ msg.code = sc;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.headers_offset = 0;
+ msg.data.body_len = 0;
+ msg.data.body_offset = 0;
+ rv = svm_fifo_enqueue (s->tx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+ if (msg.data.headers_len)
+ {
+ rv = svm_fifo_enqueue (s->tx_fifo, vec_len (headers_buf), headers_buf);
+ ASSERT (rv == vec_len (headers_buf));
+ vec_free (headers_buf);
+ }
+
+ if (svm_fifo_set_event (s->tx_fifo))
+ session_program_tx_io_evt (s->handle, SESSION_IO_EVT_TX);
+}
+
+static void
+proxy_do_connect (vnet_connect_args_t *a)
+{
+ ASSERT (session_vlib_thread_is_cl_thread ());
+ vnet_connect (a);
+ session_endpoint_free_ext_cfgs (&a->sep_ext);
+}
+
+static void
+proxy_handle_connects_rpc (void *args)
+{
+ u32 thread_index = pointer_to_uword (args), n_connects = 0, n_pending;
+ proxy_worker_t *wrk;
+ u32 max_connects;
+
+ wrk = proxy_worker_get (thread_index);
+
+ clib_spinlock_lock (&wrk->pending_connects_lock);
+
+ n_pending = clib_fifo_elts (wrk->pending_connects);
+ max_connects = clib_min (32, n_pending);
+ vec_validate (wrk->burst_connects, max_connects);
+
+ while (n_connects < max_connects)
+ clib_fifo_sub1 (wrk->pending_connects, wrk->burst_connects[n_connects++]);
+
+ clib_spinlock_unlock (&wrk->pending_connects_lock);
+
+ /* Do connects without locking pending_connects */
+ n_connects = 0;
+ while (n_connects < max_connects)
{
- proxy_connect_args_t args;
- args.api_context = a->api_context;
- args.app_index = a->app_index;
- clib_memcpy (&args.sep, &a->sep_ext, sizeof (a->sep_ext));
- vl_api_rpc_call_main_thread (proxy_cb_fn, (u8 *) & args, sizeof (args));
+ proxy_do_connect (&wrk->burst_connects[n_connects]);
+ n_connects += 1;
}
+
+ /* More work to do, program rpc */
+ if (max_connects < n_pending)
+ session_send_rpc_evt_to_thread_force (
+ transport_cl_thread (), proxy_handle_connects_rpc,
+ uword_to_pointer ((uword) thread_index, void *));
+}
+
+static void
+proxy_program_connect (vnet_connect_args_t *a)
+{
+ u32 connects_thread = transport_cl_thread (), thread_index, n_pending;
+ proxy_worker_t *wrk;
+
+ thread_index = vlib_get_thread_index ();
+
+ /* If already on first worker, handle request */
+ if (thread_index == connects_thread)
+ {
+ proxy_do_connect (a);
+ return;
+ }
+
+ /* If not on first worker, queue request */
+ wrk = proxy_worker_get (thread_index);
+
+ clib_spinlock_lock (&wrk->pending_connects_lock);
+
+ clib_fifo_add1 (wrk->pending_connects, *a);
+ n_pending = clib_fifo_elts (wrk->pending_connects);
+
+ clib_spinlock_unlock (&wrk->pending_connects_lock);
+
+ if (n_pending == 1)
+ session_send_rpc_evt_to_thread_force (
+ connects_thread, proxy_handle_connects_rpc,
+ uword_to_pointer ((uword) thread_index, void *));
}
static proxy_session_t *
@@ -85,16 +180,6 @@ proxy_session_get (u32 ps_index)
return pool_elt_at_index (pm->sessions, ps_index);
}
-static inline proxy_session_t *
-proxy_session_get_if_valid (u32 ps_index)
-{
- proxy_main_t *pm = &proxy_main;
-
- if (pool_is_free_index (pm->sessions, ps_index))
- return 0;
- return pool_elt_at_index (pm->sessions, ps_index);
-}
-
static void
proxy_session_free (proxy_session_t *ps)
{
@@ -115,7 +200,7 @@ proxy_session_postponed_free_rpc (void *arg)
clib_spinlock_lock_if_init (&pm->sessions_lock);
ps = proxy_session_get (ps_index);
- segment_manager_dealloc_fifos (ps->server_rx_fifo, ps->server_tx_fifo);
+ segment_manager_dealloc_fifos (ps->po.rx_fifo, ps->po.tx_fifo);
proxy_session_free (ps);
clib_spinlock_unlock_if_init (&pm->sessions_lock);
@@ -126,54 +211,79 @@ proxy_session_postponed_free_rpc (void *arg)
static void
proxy_session_postponed_free (proxy_session_t *ps)
{
- session_send_rpc_evt_to_thread (ps->po_thread_index,
+ /* Passive open session handle has been invalidated so we don't have thread
+ * index at this point */
+ session_send_rpc_evt_to_thread (ps->po.rx_fifo->master_thread_index,
proxy_session_postponed_free_rpc,
uword_to_pointer (ps->ps_index, void *));
}
static void
+proxy_session_close_po (proxy_session_t *ps)
+{
+ vnet_disconnect_args_t _a = {}, *a = &_a;
+ proxy_main_t *pm = &proxy_main;
+
+ ASSERT (!vlib_num_workers () ||
+ CLIB_SPINLOCK_IS_LOCKED (&pm->sessions_lock));
+
+ a->handle = ps->po.session_handle;
+ a->app_index = pm->server_app_index;
+ vnet_disconnect_session (a);
+
+ ps->po_disconnected = 1;
+}
+
+static void
+proxy_session_close_ao (proxy_session_t *ps)
+{
+ vnet_disconnect_args_t _a = {}, *a = &_a;
+ proxy_main_t *pm = &proxy_main;
+
+ ASSERT (!vlib_num_workers () ||
+ CLIB_SPINLOCK_IS_LOCKED (&pm->sessions_lock));
+
+ a->handle = ps->ao.session_handle;
+ a->app_index = pm->active_open_app_index;
+ vnet_disconnect_session (a);
+
+ ps->ao_disconnected = 1;
+}
+
+static void
proxy_try_close_session (session_t * s, int is_active_open)
{
proxy_main_t *pm = &proxy_main;
- proxy_session_t *ps = 0;
- vnet_disconnect_args_t _a, *a = &_a;
+ proxy_session_side_ctx_t *sc;
+ proxy_session_t *ps;
+ proxy_worker_t *wrk;
+
+ wrk = proxy_worker_get (s->thread_index);
+ sc = proxy_session_side_ctx_get (wrk, s->opaque);
clib_spinlock_lock_if_init (&pm->sessions_lock);
- ps = proxy_session_get (s->opaque);
+ ps = proxy_session_get (sc->ps_index);
if (is_active_open)
{
- a->handle = ps->vpp_active_open_handle;
- a->app_index = pm->active_open_app_index;
- vnet_disconnect_session (a);
- ps->ao_disconnected = 1;
+ proxy_session_close_ao (ps);
if (!ps->po_disconnected)
{
- ASSERT (ps->vpp_server_handle != SESSION_INVALID_HANDLE);
- a->handle = ps->vpp_server_handle;
- a->app_index = pm->server_app_index;
- vnet_disconnect_session (a);
- ps->po_disconnected = 1;
+ ASSERT (ps->po.session_handle != SESSION_INVALID_HANDLE);
+ proxy_session_close_po (ps);
}
}
else
{
- a->handle = ps->vpp_server_handle;
- a->app_index = pm->server_app_index;
- vnet_disconnect_session (a);
- ps->po_disconnected = 1;
+ proxy_session_close_po (ps);
if (!ps->ao_disconnected && !ps->active_open_establishing)
{
/* Proxy session closed before active open */
- if (ps->vpp_active_open_handle != SESSION_INVALID_HANDLE)
- {
- a->handle = ps->vpp_active_open_handle;
- a->app_index = pm->active_open_app_index;
- vnet_disconnect_session (a);
- }
+ if (ps->ao.session_handle != SESSION_INVALID_HANDLE)
+ proxy_session_close_ao (ps);
ps->ao_disconnected = 1;
}
}
@@ -181,29 +291,63 @@ proxy_try_close_session (session_t * s, int is_active_open)
}
static void
+proxy_try_side_ctx_cleanup (session_t *s)
+{
+ proxy_main_t *pm = &proxy_main;
+ proxy_session_t *ps;
+ proxy_session_side_ctx_t *sc;
+ proxy_worker_t *wrk;
+
+ wrk = proxy_worker_get (s->thread_index);
+ sc = proxy_session_side_ctx_get (wrk, s->opaque);
+ if (sc->state == PROXY_SC_S_CREATED)
+ return;
+
+ clib_spinlock_lock_if_init (&pm->sessions_lock);
+
+ ps = proxy_session_get (sc->ps_index);
+
+ if (!ps->po_disconnected)
+ proxy_session_close_po (ps);
+
+ if (!ps->ao_disconnected)
+ proxy_session_close_ao (ps);
+
+ clib_spinlock_unlock_if_init (&pm->sessions_lock);
+}
+
+static void
proxy_try_delete_session (session_t * s, u8 is_active_open)
{
proxy_main_t *pm = &proxy_main;
proxy_session_t *ps = 0;
+ proxy_session_side_ctx_t *sc;
+ proxy_worker_t *wrk;
+ u32 ps_index;
+
+ wrk = proxy_worker_get (s->thread_index);
+ sc = proxy_session_side_ctx_get (wrk, s->opaque);
+ ps_index = sc->ps_index;
+
+ proxy_session_side_ctx_free (wrk, sc);
clib_spinlock_lock_if_init (&pm->sessions_lock);
- ps = proxy_session_get (s->opaque);
+ ps = proxy_session_get (ps_index);
if (is_active_open)
{
- ps->vpp_active_open_handle = SESSION_INVALID_HANDLE;
+ ps->ao.session_handle = SESSION_INVALID_HANDLE;
/* Revert master thread index change on connect notification */
- ps->server_rx_fifo->master_thread_index = ps->po_thread_index;
+ ps->po.rx_fifo->master_thread_index =
+ ps->po.tx_fifo->master_thread_index;
/* Passive open already cleaned up */
- if (ps->vpp_server_handle == SESSION_INVALID_HANDLE)
+ if (ps->po.session_handle == SESSION_INVALID_HANDLE)
{
- ASSERT (s->rx_fifo->refcnt == 1);
-
/* The two sides of the proxy on different threads */
- if (ps->po_thread_index != s->thread_index)
+ if (ps->po.tx_fifo->master_thread_index != s->thread_index)
{
/* This is not the right thread to delete the fifos */
s->rx_fifo = 0;
@@ -211,14 +355,17 @@ proxy_try_delete_session (session_t * s, u8 is_active_open)
proxy_session_postponed_free (ps);
}
else
- proxy_session_free (ps);
+ {
+ ASSERT (s->rx_fifo->refcnt == 1);
+ proxy_session_free (ps);
+ }
}
}
else
{
- ps->vpp_server_handle = SESSION_INVALID_HANDLE;
+ ps->po.session_handle = SESSION_INVALID_HANDLE;
- if (ps->vpp_active_open_handle == SESSION_INVALID_HANDLE)
+ if (ps->ao.session_handle == SESSION_INVALID_HANDLE)
{
if (!ps->active_open_establishing)
proxy_session_free (ps);
@@ -275,16 +422,26 @@ static int
proxy_accept_callback (session_t * s)
{
proxy_main_t *pm = &proxy_main;
+ proxy_session_side_ctx_t *sc;
proxy_session_t *ps;
+ proxy_worker_t *wrk;
+ transport_proto_t tp = session_get_transport_proto (s);
+
+ wrk = proxy_worker_get (s->thread_index);
+ sc = proxy_session_side_ctx_alloc (wrk);
+ s->opaque = sc->sc_index;
clib_spinlock_lock_if_init (&pm->sessions_lock);
ps = proxy_session_alloc ();
- ps->vpp_server_handle = session_handle (s);
- ps->vpp_active_open_handle = SESSION_INVALID_HANDLE;
- ps->po_thread_index = s->thread_index;
- s->opaque = ps->ps_index;
+ ps->po.session_handle = session_handle (s);
+ ps->po.rx_fifo = s->rx_fifo;
+ ps->po.tx_fifo = s->tx_fifo;
+
+ ps->ao.session_handle = SESSION_INVALID_HANDLE;
+ sc->ps_index = ps->ps_index;
+ sc->is_http = tp == TRANSPORT_PROTO_HTTP ? 1 : 0;
clib_spinlock_unlock_if_init (&pm->sessions_lock);
@@ -325,92 +482,167 @@ proxy_transport_needs_crypto (transport_proto_t proto)
return proto == TRANSPORT_PROTO_TLS;
}
-static int
-proxy_rx_callback (session_t * s)
+static void
+proxy_session_start_connect (proxy_session_side_ctx_t *sc, session_t *s)
{
+ int actual_transfer __attribute__ ((unused));
+ vnet_connect_args_t _a = {}, *a = &_a;
proxy_main_t *pm = &proxy_main;
- u32 thread_index = vlib_get_thread_index ();
- svm_fifo_t *ao_tx_fifo;
+ u32 max_dequeue, ps_index;
proxy_session_t *ps;
-
- ASSERT (s->thread_index == thread_index);
+ transport_proto_t tp = session_get_transport_proto (s);
clib_spinlock_lock_if_init (&pm->sessions_lock);
- ps = proxy_session_get (s->opaque);
+ ps = proxy_session_get (sc->ps_index);
- if (PREDICT_TRUE (ps->vpp_active_open_handle != SESSION_INVALID_HANDLE))
+ /* maybe we were already here */
+ if (ps->active_open_establishing)
{
clib_spinlock_unlock_if_init (&pm->sessions_lock);
+ return;
+ }
- ao_tx_fifo = s->rx_fifo;
+ ps->active_open_establishing = 1;
+ ps_index = ps->ps_index;
- /*
- * Send event for active open tx fifo
- */
- if (svm_fifo_set_event (ao_tx_fifo))
+ clib_spinlock_unlock_if_init (&pm->sessions_lock);
+
+ if (tp == TRANSPORT_PROTO_HTTP)
+ {
+ http_msg_t msg;
+ u8 *target_buf = 0;
+ http_uri_t target_uri;
+ http_header_t *resp_headers = 0;
+ session_endpoint_cfg_t target_sep = SESSION_ENDPOINT_CFG_NULL;
+ int rv;
+
+ rv = svm_fifo_dequeue (s->rx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (msg.type != HTTP_MSG_REQUEST)
+ {
+ proxy_send_http_resp (s, HTTP_STATUS_INTERNAL_ERROR, 0);
+ return;
+ }
+ if (msg.method_type != HTTP_REQ_CONNECT)
{
- u32 ao_thread_index = ao_tx_fifo->master_thread_index;
- u32 ao_session_index = ao_tx_fifo->shr->master_session_index;
- if (session_send_io_evt_to_thread_custom (&ao_session_index,
- ao_thread_index,
- SESSION_IO_EVT_TX))
- clib_warning ("failed to enqueue tx evt");
+ http_add_header (&resp_headers,
+ http_header_name_token (HTTP_HEADER_ALLOW),
+ http_token_lit ("CONNECT"));
+ proxy_send_http_resp (s, HTTP_STATUS_METHOD_NOT_ALLOWED,
+ resp_headers);
+ vec_free (resp_headers);
+ return;
}
- if (svm_fifo_max_enqueue (ao_tx_fifo) <= TCP_MSS)
- svm_fifo_add_want_deq_ntf (ao_tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ if (msg.data.target_form != HTTP_TARGET_AUTHORITY_FORM ||
+ msg.data.target_path_len == 0)
+ {
+ proxy_send_http_resp (s, HTTP_STATUS_BAD_REQUEST, 0);
+ return;
+ }
+
+ /* read target uri */
+ target_buf = vec_new (u8, msg.data.target_path_len);
+ rv = svm_fifo_peek (s->rx_fifo, msg.data.target_path_offset,
+ msg.data.target_path_len, target_buf);
+ ASSERT (rv == msg.data.target_path_len);
+ svm_fifo_dequeue_drop (s->rx_fifo, msg.data.len);
+ rv = http_parse_authority_form_target (target_buf, &target_uri);
+ vec_free (target_buf);
+ if (rv)
+ {
+ proxy_send_http_resp (s, HTTP_STATUS_BAD_REQUEST, 0);
+ return;
+ }
+ target_sep.is_ip4 = target_uri.is_ip4;
+ target_sep.ip = target_uri.ip;
+ target_sep.port = target_uri.port;
+ target_sep.transport_proto = TRANSPORT_PROTO_TCP;
+ clib_memcpy (&a->sep_ext, &target_sep, sizeof (target_sep));
}
else
{
- vnet_connect_args_t _a, *a = &_a;
- svm_fifo_t *tx_fifo, *rx_fifo;
- u32 max_dequeue, ps_index;
- int actual_transfer __attribute__ ((unused));
+ max_dequeue = svm_fifo_max_dequeue_cons (s->rx_fifo);
+ if (PREDICT_FALSE (max_dequeue == 0))
+ return;
- rx_fifo = s->rx_fifo;
- tx_fifo = s->tx_fifo;
+ max_dequeue = clib_min (pm->rcv_buffer_size, max_dequeue);
+ actual_transfer =
+ svm_fifo_peek (s->rx_fifo, 0 /* relative_offset */, max_dequeue,
+ pm->rx_buf[s->thread_index]);
- ASSERT (rx_fifo->master_thread_index == thread_index);
- ASSERT (tx_fifo->master_thread_index == thread_index);
+ /* Expectation is that here actual data just received is parsed and based
+ * on its contents, the destination and parameters of the connect to the
+ * upstream are decided
+ */
- max_dequeue = svm_fifo_max_dequeue_cons (s->rx_fifo);
+ clib_memcpy (&a->sep_ext, &pm->client_sep, sizeof (pm->client_sep));
+ }
- if (PREDICT_FALSE (max_dequeue == 0))
- {
- clib_spinlock_unlock_if_init (&pm->sessions_lock);
- return 0;
- }
+ a->api_context = ps_index;
+ a->app_index = pm->active_open_app_index;
- max_dequeue = clib_min (pm->rcv_buffer_size, max_dequeue);
- actual_transfer = svm_fifo_peek (rx_fifo, 0 /* relative_offset */ ,
- max_dequeue, pm->rx_buf[thread_index]);
+ if (proxy_transport_needs_crypto (a->sep.transport_proto))
+ {
+ transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg (
+ &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO,
+ sizeof (transport_endpt_crypto_cfg_t));
+ ext_cfg->crypto.ckpair_index = pm->ckpair_index;
+ }
- /* $$$ your message in this space: parse url, etc. */
+ proxy_program_connect (a);
+}
- clib_memset (a, 0, sizeof (*a));
+static int
+proxy_rx_callback (session_t *s)
+{
+ proxy_session_side_ctx_t *sc;
+ svm_fifo_t *ao_tx_fifo;
+ proxy_session_t *ps;
+ proxy_worker_t *wrk;
- ps->server_rx_fifo = rx_fifo;
- ps->server_tx_fifo = tx_fifo;
- ps->active_open_establishing = 1;
- ps_index = ps->ps_index;
+ ASSERT (s->thread_index == vlib_get_thread_index ());
- clib_spinlock_unlock_if_init (&pm->sessions_lock);
+ wrk = proxy_worker_get (s->thread_index);
+ sc = proxy_session_side_ctx_get (wrk, s->opaque);
- clib_memcpy (&a->sep_ext, &pm->client_sep, sizeof (pm->client_sep));
- a->api_context = ps_index;
- a->app_index = pm->active_open_app_index;
+ if (PREDICT_FALSE (sc->state < PROXY_SC_S_ESTABLISHED))
+ {
+ proxy_main_t *pm = &proxy_main;
- if (proxy_transport_needs_crypto (a->sep.transport_proto))
+ if (sc->state == PROXY_SC_S_CREATED)
{
- session_endpoint_alloc_ext_cfg (&a->sep_ext,
- TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
- a->sep_ext.ext_cfg->crypto.ckpair_index = pm->ckpair_index;
+ proxy_session_start_connect (sc, s);
+ sc->state = PROXY_SC_S_CONNECTING;
+ return 0;
}
- proxy_call_main_thread (a);
+ clib_spinlock_lock_if_init (&pm->sessions_lock);
+
+ ps = proxy_session_get (sc->ps_index);
+ sc->pair = ps->ao;
+
+ clib_spinlock_unlock_if_init (&pm->sessions_lock);
+
+ if (sc->pair.session_handle == SESSION_INVALID_HANDLE)
+ return 0;
+
+ sc->state = PROXY_SC_S_ESTABLISHED;
}
+ ao_tx_fifo = s->rx_fifo;
+
+ /*
+ * Send event for active open tx fifo
+ */
+ if (svm_fifo_set_event (ao_tx_fifo))
+ session_program_tx_io_evt (sc->pair.session_handle, SESSION_IO_EVT_TX);
+
+ if (svm_fifo_max_enqueue (ao_tx_fifo) <= TCP_MSS)
+ svm_fifo_add_want_deq_ntf (ao_tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+
return 0;
}
@@ -418,20 +650,20 @@ static void
proxy_force_ack (void *handlep)
{
transport_connection_t *tc;
- session_t *ao_s;
+ session_t *s;
- ao_s = session_get_from_handle (pointer_to_uword (handlep));
- if (session_get_transport_proto (ao_s) != TRANSPORT_PROTO_TCP)
+ s = session_get_from_handle (pointer_to_uword (handlep));
+ if (session_get_transport_proto (s) != TRANSPORT_PROTO_TCP)
return;
- tc = session_get_transport (ao_s);
+ tc = session_get_transport (s);
tcp_send_ack ((tcp_connection_t *) tc);
}
static int
proxy_tx_callback (session_t * proxy_s)
{
- proxy_main_t *pm = &proxy_main;
- proxy_session_t *ps;
+ proxy_session_side_ctx_t *sc;
+ proxy_worker_t *wrk;
u32 min_free;
min_free = clib_min (svm_fifo_size (proxy_s->tx_fifo) >> 3, 128 << 10);
@@ -441,21 +673,17 @@ proxy_tx_callback (session_t * proxy_s)
return 0;
}
- clib_spinlock_lock_if_init (&pm->sessions_lock);
-
- ps = proxy_session_get (proxy_s->opaque);
-
- if (ps->vpp_active_open_handle == SESSION_INVALID_HANDLE)
- goto unlock;
+ wrk = proxy_worker_get (proxy_s->thread_index);
+ sc = proxy_session_side_ctx_get (wrk, proxy_s->opaque);
+ if (sc->state < PROXY_SC_S_ESTABLISHED)
+ return 0;
/* Force ack on active open side to update rcv wnd. Make sure it's done on
* the right thread */
- void *arg = uword_to_pointer (ps->vpp_active_open_handle, void *);
- session_send_rpc_evt_to_thread (ps->server_rx_fifo->master_thread_index,
- proxy_force_ack, arg);
-
-unlock:
- clib_spinlock_unlock_if_init (&pm->sessions_lock);
+ void *arg = uword_to_pointer (sc->pair.session_handle, void *);
+ session_send_rpc_evt_to_thread (
+ session_thread_from_handle (sc->pair.session_handle), proxy_force_ack,
+ arg);
return 0;
}
@@ -464,7 +692,10 @@ static void
proxy_cleanup_callback (session_t * s, session_cleanup_ntf_t ntf)
{
if (ntf == SESSION_CLEANUP_TRANSPORT)
- return;
+ {
+ proxy_try_side_ctx_cleanup (s);
+ return;
+ }
proxy_try_delete_session (s, 0 /* is_active_open */ );
}
@@ -490,10 +721,17 @@ active_open_alloc_session_fifos (session_t *s)
clib_spinlock_lock_if_init (&pm->sessions_lock);
+ /* Active open opaque is pointing at proxy session */
ps = proxy_session_get (s->opaque);
- txf = ps->server_rx_fifo;
- rxf = ps->server_tx_fifo;
+ if (ps->po_disconnected)
+ {
+ clib_spinlock_unlock_if_init (&pm->sessions_lock);
+ return SESSION_E_ALLOC;
+ }
+
+ txf = ps->po.rx_fifo;
+ rxf = ps->po.tx_fifo;
/*
* Reset the active-open tx-fifo master indices so the active-open session
@@ -524,31 +762,43 @@ active_open_connected_callback (u32 app_index, u32 opaque,
{
proxy_main_t *pm = &proxy_main;
proxy_session_t *ps;
- u8 thread_index = vlib_get_thread_index ();
-
- /*
- * Setup proxy session handle.
- */
- clib_spinlock_lock_if_init (&pm->sessions_lock);
-
- ps = proxy_session_get (opaque);
+ proxy_worker_t *wrk;
+ proxy_session_side_ctx_t *sc;
+ session_t *po_s;
+ transport_proto_t tp;
/* Connection failed */
if (err)
{
- vnet_disconnect_args_t _a, *a = &_a;
+ clib_spinlock_lock_if_init (&pm->sessions_lock);
- a->handle = ps->vpp_server_handle;
- a->app_index = pm->server_app_index;
- vnet_disconnect_session (a);
- ps->po_disconnected = 1;
- }
- else
- {
- ps->vpp_active_open_handle = session_handle (s);
- ps->active_open_establishing = 0;
+ ps = proxy_session_get (opaque);
+ po_s = session_get_from_handle (ps->po.session_handle);
+ tp = session_get_transport_proto (po_s);
+ if (tp == TRANSPORT_PROTO_HTTP)
+ {
+ proxy_send_http_resp (po_s, HTTP_STATUS_BAD_GATEWAY, 0);
+ }
+ ps->ao_disconnected = 1;
+ proxy_session_close_po (ps);
+
+ clib_spinlock_unlock_if_init (&pm->sessions_lock);
+
+ return 0;
}
+ wrk = proxy_worker_get (s->thread_index);
+
+ clib_spinlock_lock_if_init (&pm->sessions_lock);
+
+ ps = proxy_session_get (opaque);
+
+ ps->ao.rx_fifo = s->rx_fifo;
+ ps->ao.tx_fifo = s->tx_fifo;
+ ps->ao.session_handle = session_handle (s);
+
+ ps->active_open_establishing = 0;
+
/* Passive open session was already closed! */
if (ps->po_disconnected)
{
@@ -558,21 +808,136 @@ active_open_connected_callback (u32 app_index, u32 opaque,
return -1;
}
- s->opaque = opaque;
+ po_s = session_get_from_handle (ps->po.session_handle);
+ tp = session_get_transport_proto (po_s);
+
+ sc = proxy_session_side_ctx_alloc (wrk);
+ sc->pair = ps->po;
+ sc->ps_index = ps->ps_index;
clib_spinlock_unlock_if_init (&pm->sessions_lock);
- /*
- * Send event for active open tx fifo
- */
- ASSERT (s->thread_index == thread_index);
- if (svm_fifo_set_event (s->tx_fifo))
- session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX);
+ sc->state = PROXY_SC_S_ESTABLISHED;
+ s->opaque = sc->sc_index;
+ sc->is_http = tp == TRANSPORT_PROTO_HTTP ? 1 : 0;
+
+ if (tp == TRANSPORT_PROTO_HTTP)
+ {
+ proxy_send_http_resp (po_s, HTTP_STATUS_OK, 0);
+ }
+ else
+ {
+ /*
+ * Send event for active open tx fifo
+ */
+ ASSERT (s->thread_index == vlib_get_thread_index ());
+ if (svm_fifo_set_event (s->tx_fifo))
+ session_program_tx_io_evt (session_handle (s), SESSION_IO_EVT_TX);
+ }
return 0;
}
static void
+active_open_migrate_po_fixup_rpc (void *arg)
+{
+ u32 ps_index = pointer_to_uword (arg);
+ proxy_session_side_ctx_t *po_sc;
+ proxy_main_t *pm = &proxy_main;
+ session_handle_t po_sh;
+ proxy_worker_t *wrk;
+ proxy_session_t *ps;
+ session_t *po_s;
+
+ wrk = proxy_worker_get (vlib_get_thread_index ());
+
+ clib_spinlock_lock_if_init (&pm->sessions_lock);
+
+ ps = proxy_session_get (ps_index);
+
+ po_s = session_get_from_handle (ps->po.session_handle);
+ po_s->rx_fifo = ps->po.rx_fifo;
+ po_s->tx_fifo = ps->po.tx_fifo;
+
+ po_sc = proxy_session_side_ctx_get (wrk, po_s->opaque);
+ po_sc->pair = ps->ao;
+ po_sh = ps->po.session_handle;
+
+ clib_spinlock_unlock_if_init (&pm->sessions_lock);
+
+ session_program_tx_io_evt (po_sh, SESSION_IO_EVT_TX);
+}
+
+static void
+active_open_migrate_rpc (void *arg)
+{
+ u32 ps_index = pointer_to_uword (arg);
+ proxy_main_t *pm = &proxy_main;
+ proxy_session_side_ctx_t *sc;
+ proxy_worker_t *wrk;
+ proxy_session_t *ps;
+ session_t *s;
+
+ wrk = proxy_worker_get (vlib_get_thread_index ());
+ sc = proxy_session_side_ctx_alloc (wrk);
+
+ clib_spinlock_lock_if_init (&pm->sessions_lock);
+
+ ps = proxy_session_get (ps_index);
+ sc->ps_index = ps->ps_index;
+
+ s = session_get_from_handle (ps->ao.session_handle);
+ s->opaque = sc->sc_index;
+ s->flags &= ~SESSION_F_IS_MIGRATING;
+
+ /* Fixup passive open session because of migration and zc */
+ ps->ao.rx_fifo = ps->po.tx_fifo = s->rx_fifo;
+ ps->ao.tx_fifo = ps->po.rx_fifo = s->tx_fifo;
+
+ ps->po.tx_fifo->shr->master_session_index =
+ session_index_from_handle (ps->po.session_handle);
+ ps->po.tx_fifo->master_thread_index =
+ session_thread_from_handle (ps->po.session_handle);
+
+ sc->pair = ps->po;
+
+ clib_spinlock_unlock_if_init (&pm->sessions_lock);
+
+ session_send_rpc_evt_to_thread (
+ session_thread_from_handle (sc->pair.session_handle),
+ active_open_migrate_po_fixup_rpc, uword_to_pointer (sc->ps_index, void *));
+}
+
+static void
+active_open_migrate_callback (session_t *s, session_handle_t new_sh)
+{
+ proxy_main_t *pm = &proxy_main;
+ proxy_session_side_ctx_t *sc;
+ proxy_session_t *ps;
+ proxy_worker_t *wrk;
+
+ wrk = proxy_worker_get (s->thread_index);
+ sc = proxy_session_side_ctx_get (wrk, s->opaque);
+
+ /* NOTE: this is just an example. ZC makes this migration rather
+ * tedious. Probably better approaches could be found */
+ clib_spinlock_lock_if_init (&pm->sessions_lock);
+
+ ps = proxy_session_get (sc->ps_index);
+ ps->ao.session_handle = new_sh;
+ ps->ao.rx_fifo = 0;
+ ps->ao.tx_fifo = 0;
+
+ clib_spinlock_unlock_if_init (&pm->sessions_lock);
+
+ session_send_rpc_evt_to_thread (session_thread_from_handle (new_sh),
+ active_open_migrate_rpc,
+ uword_to_pointer (sc->ps_index, void *));
+
+ proxy_session_side_ctx_free (wrk, sc);
+}
+
+static void
active_open_reset_callback (session_t * s)
{
proxy_try_close_session (s, 1 /* is_active_open */ );
@@ -618,10 +983,8 @@ active_open_rx_callback (session_t * s)
static int
active_open_tx_callback (session_t * ao_s)
{
- proxy_main_t *pm = &proxy_main;
- transport_connection_t *tc;
- proxy_session_t *ps;
- session_t *proxy_s;
+ proxy_session_side_ctx_t *sc;
+ proxy_worker_t *wrk;
u32 min_free;
min_free = clib_min (svm_fifo_size (ao_s->tx_fifo) >> 3, 128 << 10);
@@ -631,23 +994,27 @@ active_open_tx_callback (session_t * ao_s)
return 0;
}
- clib_spinlock_lock_if_init (&pm->sessions_lock);
-
- ps = proxy_session_get_if_valid (ao_s->opaque);
- if (!ps)
- goto unlock;
-
- if (ps->vpp_server_handle == ~0)
- goto unlock;
+ wrk = proxy_worker_get (ao_s->thread_index);
+ sc = proxy_session_side_ctx_get (wrk, ao_s->opaque);
- proxy_s = session_get_from_handle (ps->vpp_server_handle);
-
- /* Force ack on proxy side to update rcv wnd */
- tc = session_get_transport (proxy_s);
- tcp_send_ack ((tcp_connection_t *) tc);
+ if (sc->state < PROXY_SC_S_ESTABLISHED)
+ return 0;
-unlock:
- clib_spinlock_unlock_if_init (&pm->sessions_lock);
+ if (sc->is_http)
+ {
+ /* notify HTTP transport */
+ session_t *po = session_get_from_handle (sc->pair.session_handle);
+ session_send_io_evt_to_thread_custom (
+ &po->session_index, po->thread_index, SESSION_IO_EVT_RX);
+ }
+ else
+ {
+ /* Force ack on proxy side to update rcv wnd */
+ void *arg = uword_to_pointer (sc->pair.session_handle, void *);
+ session_send_rpc_evt_to_thread (
+ session_thread_from_handle (sc->pair.session_handle), proxy_force_ack,
+ arg);
+ }
return 0;
}
@@ -664,6 +1031,7 @@ active_open_cleanup_callback (session_t * s, session_cleanup_ntf_t ntf)
static session_cb_vft_t active_open_clients = {
.session_reset_callback = active_open_reset_callback,
.session_connected_callback = active_open_connected_callback,
+ .session_migrate_callback = active_open_migrate_callback,
.session_accept_callback = active_open_create_callback,
.session_disconnect_callback = active_open_disconnect_callback,
.session_cleanup_callback = active_open_cleanup_callback,
@@ -756,22 +1124,26 @@ proxy_server_listen ()
{
proxy_main_t *pm = &proxy_main;
vnet_listen_args_t _a, *a = &_a;
- int rv;
+ int rv, need_crypto;
clib_memset (a, 0, sizeof (*a));
a->app_index = pm->server_app_index;
clib_memcpy (&a->sep_ext, &pm->server_sep, sizeof (pm->server_sep));
- if (proxy_transport_needs_crypto (a->sep.transport_proto))
+ /* Make sure listener is marked connected for transports like udp */
+ a->sep_ext.transport_flags = TRANSPORT_CFG_F_CONNECTED;
+ need_crypto = proxy_transport_needs_crypto (a->sep.transport_proto);
+ if (need_crypto)
{
- session_endpoint_alloc_ext_cfg (&a->sep_ext,
- TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
- a->sep_ext.ext_cfg->crypto.ckpair_index = pm->ckpair_index;
+ transport_endpt_ext_cfg_t *ext_cfg = session_endpoint_add_ext_cfg (
+ &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO,
+ sizeof (transport_endpt_crypto_cfg_t));
+ ext_cfg->crypto.ckpair_index = pm->ckpair_index;
}
rv = vnet_listen (a);
- if (a->sep_ext.ext_cfg)
- clib_mem_free (a->sep_ext.ext_cfg);
+ if (need_crypto)
+ session_endpoint_free_ext_cfgs (&a->sep_ext);
return rv;
}
@@ -797,15 +1169,25 @@ proxy_server_create (vlib_main_t * vm)
{
vlib_thread_main_t *vtm = vlib_get_thread_main ();
proxy_main_t *pm = &proxy_main;
+ proxy_worker_t *wrk;
u32 num_threads;
int i;
+ if (vlib_num_workers ())
+ clib_spinlock_init (&pm->sessions_lock);
+
num_threads = 1 /* main thread */ + vtm->n_threads;
vec_validate (pm->rx_buf, num_threads - 1);
for (i = 0; i < num_threads; i++)
vec_validate (pm->rx_buf[i], pm->rcv_buffer_size);
+ vec_validate (pm->workers, vlib_num_workers ());
+ vec_foreach (wrk, pm->workers)
+ {
+ clib_spinlock_init (&wrk->pending_connects_lock);
+ }
+
proxy_server_add_ckpair ();
if (proxy_server_attach ())
@@ -813,11 +1195,6 @@ proxy_server_create (vlib_main_t * vm)
clib_warning ("failed to attach server app");
return -1;
}
- if (proxy_server_listen ())
- {
- clib_warning ("failed to start listening");
- return -1;
- }
if (active_open_attach ())
{
clib_warning ("failed to attach active open app");
@@ -849,9 +1226,6 @@ proxy_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
pm->private_segment_count = 0;
pm->segment_size = 512 << 20;
- if (vlib_num_workers ())
- clib_spinlock_init (&pm->sessions_lock);
-
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -897,35 +1271,45 @@ proxy_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
default_server_uri);
server_uri = format (0, "%s%c", default_server_uri, 0);
}
- if (!client_uri)
- {
- clib_warning ("No client-uri provided, Using default: %s",
- default_client_uri);
- client_uri = format (0, "%s%c", default_client_uri, 0);
- }
-
if (parse_uri ((char *) server_uri, &pm->server_sep))
{
error = clib_error_return (0, "Invalid server uri %v", server_uri);
goto done;
}
- if (parse_uri ((char *) client_uri, &pm->client_sep))
+
+ /* http proxy get target within request */
+ if (pm->server_sep.transport_proto != TRANSPORT_PROTO_HTTP)
{
- error = clib_error_return (0, "Invalid client uri %v", client_uri);
- goto done;
+ if (!client_uri)
+ {
+ clib_warning ("No client-uri provided, Using default: %s",
+ default_client_uri);
+ client_uri = format (0, "%s%c", default_client_uri, 0);
+ }
+ if (parse_uri ((char *) client_uri, &pm->client_sep))
+ {
+ error = clib_error_return (0, "Invalid client uri %v", client_uri);
+ goto done;
+ }
}
- vnet_session_enable_disable (vm, 1 /* turn on session and transport */ );
-
- rv = proxy_server_create (vm);
- switch (rv)
+ if (pm->server_app_index == APP_INVALID_INDEX)
{
- case 0:
- break;
- default:
- error = clib_error_return (0, "server_create returned %d", rv);
+ session_enable_disable_args_t args = { .is_en = 1,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_RULE_TABLE };
+ vnet_session_enable_disable (vm, &args);
+ rv = proxy_server_create (vm);
+ if (rv)
+ {
+ error = clib_error_return (0, "server_create returned %d", rv);
+ goto done;
+ }
}
+ if (proxy_server_listen ())
+ error = clib_error_return (0, "failed to start listening");
+
done:
unformat_free (line_input);
vec_free (client_uri);
@@ -933,14 +1317,13 @@ done:
return error;
}
-VLIB_CLI_COMMAND (proxy_create_command, static) =
-{
+VLIB_CLI_COMMAND (proxy_create_command, static) = {
.path = "test proxy server",
- .short_help = "test proxy server [server-uri <tcp://ip/port>]"
- "[client-uri <tcp://ip/port>][fifo-size <nn>[k|m]]"
- "[max-fifo-size <nn>[k|m]][high-watermark <nn>]"
- "[low-watermark <nn>][rcv-buf-size <nn>][prealloc-fifos <nn>]"
- "[private-segment-size <mem>][private-segment-count <nn>]",
+ .short_help = "test proxy server [server-uri <proto://ip/port>]"
+ "[client-uri <tcp://ip/port>][fifo-size <nn>[k|m]]"
+ "[max-fifo-size <nn>[k|m]][high-watermark <nn>]"
+ "[low-watermark <nn>][rcv-buf-size <nn>][prealloc-fifos <nn>]"
+ "[private-segment-size <mem>][private-segment-count <nn>]",
.function = proxy_server_create_command_fn,
};
@@ -950,6 +1333,7 @@ proxy_main_init (vlib_main_t * vm)
proxy_main_t *pm = &proxy_main;
pm->server_client_index = ~0;
pm->active_open_client_index = ~0;
+ pm->server_app_index = APP_INVALID_INDEX;
return 0;
}
diff --git a/src/plugins/hs_apps/proxy.h b/src/plugins/hs_apps/proxy.h
index 26f4de2f729..789e5613520 100644
--- a/src/plugins/hs_apps/proxy.h
+++ b/src/plugins/hs_apps/proxy.h
@@ -26,23 +26,57 @@
#include <vnet/session/session.h>
#include <vnet/session/application_interface.h>
+#define foreach_proxy_session_side_state \
+ _ (CREATED, "created") \
+ _ (CONNECTING, "connecting") \
+ _ (ESTABLISHED, "establiehed") \
+ _ (CLOSED, "closed")
+
+typedef enum proxy_session_side_state_
+{
+#define _(sym, str) PROXY_SC_S_##sym,
+ foreach_proxy_session_side_state
+#undef _
+} proxy_session_side_state_t;
+typedef struct proxy_session_side_
+{
+ session_handle_t session_handle;
+ svm_fifo_t *rx_fifo;
+ svm_fifo_t *tx_fifo;
+} proxy_session_side_t;
+
+typedef struct proxy_session_side_ctx_
+{
+ proxy_session_side_t pair;
+ proxy_session_side_state_t state;
+ u32 sc_index;
+ u32 ps_index;
+ u8 is_http;
+} proxy_session_side_ctx_t;
+
typedef struct
{
- svm_fifo_t *server_rx_fifo;
- svm_fifo_t *server_tx_fifo;
+ proxy_session_side_t po; /**< passive open side */
+ proxy_session_side_t ao; /**< active open side */
- session_handle_t vpp_server_handle;
- session_handle_t vpp_active_open_handle;
volatile int active_open_establishing;
volatile int po_disconnected;
volatile int ao_disconnected;
u32 ps_index;
- u32 po_thread_index;
} proxy_session_t;
+typedef struct proxy_worker_
+{
+ proxy_session_side_ctx_t *ctx_pool;
+ clib_spinlock_t pending_connects_lock;
+ vnet_connect_args_t *pending_connects;
+ vnet_connect_args_t *burst_connects;
+} proxy_worker_t;
+
typedef struct
{
+ proxy_worker_t *workers; /**< per-thread data */
proxy_session_t *sessions; /**< session pool, shared */
clib_spinlock_t sessions_lock; /**< lock for session pool */
u8 **rx_buf; /**< intermediate rx buffers */
@@ -75,6 +109,13 @@ typedef struct
extern proxy_main_t proxy_main;
+static inline proxy_worker_t *
+proxy_worker_get (u32 thread_index)
+{
+ proxy_main_t *pm = &proxy_main;
+ return vec_elt_at_index (pm->workers, thread_index);
+}
+
#endif /* __included_proxy_h__ */
/*
diff --git a/src/plugins/hs_apps/sapi/vpp_echo_common.c b/src/plugins/hs_apps/sapi/vpp_echo_common.c
index 5ce04d1b75b..09ba583cf78 100644
--- a/src/plugins/hs_apps/sapi/vpp_echo_common.c
+++ b/src/plugins/hs_apps/sapi/vpp_echo_common.c
@@ -330,8 +330,8 @@ format_transport_proto (u8 * s, va_list * args)
case TRANSPORT_PROTO_UDP:
s = format (s, "UDP");
break;
- case TRANSPORT_PROTO_NONE:
- s = format (s, "NONE");
+ case TRANSPORT_PROTO_CT:
+ s = format (s, "CT");
break;
case TRANSPORT_PROTO_TLS:
s = format (s, "TLS");
diff --git a/src/plugins/hs_apps/test_builtins.c b/src/plugins/hs_apps/test_builtins.c
new file mode 100644
index 00000000000..c314e71b5df
--- /dev/null
+++ b/src/plugins/hs_apps/test_builtins.c
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#include <http_static/http_static.h>
+#include <vppinfra/tw_timer_2t_1w_2048sl.h>
+
+typedef struct
+{
+ u32 stop_timer_handle;
+ hss_session_handle_t sh;
+} tw_timer_elt_t;
+
+typedef struct tb_main_
+{
+ tw_timer_elt_t *delayed_resps;
+ tw_timer_wheel_2t_1w_2048sl_t tw;
+ hss_session_send_fn send_data;
+ u8 *test_data;
+} tb_main_t;
+
+static tb_main_t tb_main;
+
+static uword
+test_builtins_timer_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ vlib_frame_t *f)
+{
+ tb_main_t *tbm = &tb_main;
+ f64 now, timeout = 1.0;
+ uword *event_data = 0;
+ uword __clib_unused event_type;
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+ now = vlib_time_now (vm);
+ event_type = vlib_process_get_events (vm, (uword **) &event_data);
+
+ /* expire timers */
+ tw_timer_expire_timers_2t_1w_2048sl (&tbm->tw, now);
+
+ vec_reset_length (event_data);
+ }
+ return 0;
+}
+
+VLIB_REGISTER_NODE (test_builtins_timer_process_node) = {
+ .function = test_builtins_timer_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "test-builtins-timer-process",
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+
+static void
+send_data_to_hss (hss_session_handle_t sh, u8 *data, u8 free_vec_data)
+{
+ tb_main_t *tbm = &tb_main;
+ hss_url_handler_args_t args = {};
+
+ args.sh = sh;
+ args.data = data;
+ args.data_len = vec_len (data);
+ args.ct = HTTP_CONTENT_TEXT_PLAIN;
+ args.sc = HTTP_STATUS_OK;
+ args.free_vec_data = free_vec_data;
+
+ tbm->send_data (&args);
+}
+
+static hss_url_handler_rc_t
+handle_get_test1 (hss_url_handler_args_t *args)
+{
+ u8 *data;
+
+ clib_warning ("get request on test1");
+ data = format (0, "hello");
+ send_data_to_hss (args->sh, data, 1);
+
+ return HSS_URL_HANDLER_ASYNC;
+}
+
+static hss_url_handler_rc_t
+handle_get_test2 (hss_url_handler_args_t *args)
+{
+ u8 *data;
+
+ clib_warning ("get request on test2");
+ data = format (0, "some data");
+ send_data_to_hss (args->sh, data, 1);
+
+ return HSS_URL_HANDLER_ASYNC;
+}
+
+static void
+delayed_resp_cb (u32 *expired_timers)
+{
+ tb_main_t *tbm = &tb_main;
+ int i;
+ u32 pool_index;
+ tw_timer_elt_t *e;
+ u8 *data;
+
+ for (i = 0; i < vec_len (expired_timers); i++)
+ {
+ pool_index = expired_timers[i] & 0x7FFFFFFF;
+ e = pool_elt_at_index (tbm->delayed_resps, pool_index);
+ clib_warning ("sending delayed data");
+ data = format (0, "delayed data");
+ send_data_to_hss (e->sh, data, 1);
+ pool_put (tbm->delayed_resps, e);
+ }
+}
+
+static hss_url_handler_rc_t
+handle_get_test_delayed (hss_url_handler_args_t *args)
+{
+ tb_main_t *tbm = &tb_main;
+ tw_timer_elt_t *e;
+
+ clib_warning ("get request on test_delayed");
+ pool_get (tbm->delayed_resps, e);
+ e->sh = args->sh;
+ e->stop_timer_handle =
+ tw_timer_start_2t_1w_2048sl (&tbm->tw, e - tbm->delayed_resps, 0, 5);
+
+ return HSS_URL_HANDLER_ASYNC;
+}
+
+static hss_url_handler_rc_t
+handle_post_test3 (hss_url_handler_args_t *args)
+{
+ send_data_to_hss (args->sh, 0, 0);
+ return HSS_URL_HANDLER_ASYNC;
+}
+
+static hss_url_handler_rc_t
+handle_get_64bytes (hss_url_handler_args_t *args)
+{
+ tb_main_t *tbm = &tb_main;
+ send_data_to_hss (args->sh, tbm->test_data, 0);
+ return HSS_URL_HANDLER_ASYNC;
+}
+
+static void
+test_builtins_init (vlib_main_t *vm)
+{
+ tb_main_t *tbm = &tb_main;
+ hss_register_url_fn fp;
+ vlib_node_t *n;
+
+ fp = vlib_get_plugin_symbol ("http_static_plugin.so",
+ "hss_register_url_handler");
+
+ if (fp == 0)
+ {
+ clib_warning ("http_static_plugin.so not loaded...");
+ return;
+ }
+
+ tbm->test_data = format (
+ 0, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
+
+ (*fp) (handle_get_test1, "test1", HTTP_REQ_GET);
+ (*fp) (handle_get_test2, "test2", HTTP_REQ_GET);
+ (*fp) (handle_get_test_delayed, "test_delayed", HTTP_REQ_GET);
+ (*fp) (handle_post_test3, "test3", HTTP_REQ_POST);
+ (*fp) (handle_get_64bytes, "64B", HTTP_REQ_GET);
+
+ tbm->send_data =
+ vlib_get_plugin_symbol ("http_static_plugin.so", "hss_session_send_data");
+
+ tw_timer_wheel_init_2t_1w_2048sl (&tbm->tw, delayed_resp_cb, 1.0, ~0);
+
+ vlib_node_set_state (vm, test_builtins_timer_process_node.index,
+ VLIB_NODE_STATE_POLLING);
+ n = vlib_get_node (vm, test_builtins_timer_process_node.index);
+ vlib_start_process (vm, n->runtime_index);
+}
+
+static clib_error_t *
+test_builtins_enable_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ test_builtins_init (vm);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (test_builtins_enable_command, static) = {
+ .path = "test-url-handler enable",
+ .short_help = "test-url-handler enable",
+ .function = test_builtins_enable_command_fn,
+};
diff --git a/src/plugins/hs_apps/vcl/vcl_test.h b/src/plugins/hs_apps/vcl/vcl_test.h
index 0ce27ef84e2..11667fb144a 100644
--- a/src/plugins/hs_apps/vcl/vcl_test.h
+++ b/src/plugins/hs_apps/vcl/vcl_test.h
@@ -124,7 +124,7 @@ typedef struct
typedef struct
{
- const vcl_test_proto_vft_t *protos[VPPCOM_PROTO_SRTP + 1];
+ const vcl_test_proto_vft_t *protos[VPPCOM_PROTO_HTTP + 1];
uint32_t ckpair_index;
hs_test_cfg_t cfg;
vcl_test_wrk_t *wrk;
@@ -420,6 +420,39 @@ vcl_test_write (vcl_test_session_t *ts, void *buf, uint32_t nbytes)
return (tx_bytes);
}
+static inline int
+vcl_test_write_ds (vcl_test_session_t *ts)
+{
+ vcl_test_stats_t *stats = &ts->stats;
+ int tx_bytes;
+
+ do
+ {
+ stats->tx_xacts++;
+ if (ts->ds[1].len)
+ tx_bytes = vppcom_session_write_segments (ts->fd, ts->ds, 2);
+ else
+ tx_bytes = vppcom_session_write_segments (ts->fd, ts->ds, 1);
+
+ if (tx_bytes < 0)
+ errno = -tx_bytes;
+ if ((tx_bytes == 0) ||
+ ((tx_bytes < 0) && ((errno == EAGAIN) || (errno == EWOULDBLOCK))))
+ stats->rx_eagain++;
+ }
+ while ((tx_bytes == 0) ||
+ ((tx_bytes < 0) && ((errno == EAGAIN) || (errno == EWOULDBLOCK))));
+
+ if (tx_bytes < 0)
+ {
+ vterr ("vppcom_session_write_segments()", -errno);
+ }
+ else
+ stats->tx_bytes += tx_bytes;
+
+ return (tx_bytes);
+}
+
static inline void
dump_help (void)
{
diff --git a/src/plugins/hs_apps/vcl/vcl_test_client.c b/src/plugins/hs_apps/vcl/vcl_test_client.c
index a4a10b562ff..8bac1f00b9d 100644
--- a/src/plugins/hs_apps/vcl/vcl_test_client.c
+++ b/src/plugins/hs_apps/vcl/vcl_test_client.c
@@ -419,13 +419,8 @@ vtc_worker_run_select (vcl_test_client_worker_t *wrk)
if (vcm->incremental_stats)
vtc_inc_stats_check (ts);
}
- if ((!check_rx && ts->stats.tx_bytes >= ts->cfg.total_bytes) ||
- (check_rx && ts->stats.rx_bytes >= ts->cfg.total_bytes))
- {
- clock_gettime (CLOCK_REALTIME, &ts->stats.stop);
- ts->is_done = 1;
- n_active_sessions--;
- }
+ if (vtc_session_check_is_done (ts, check_rx))
+ n_active_sessions -= 1;
}
}
diff --git a/src/plugins/hs_apps/vcl/vcl_test_protos.c b/src/plugins/hs_apps/vcl/vcl_test_protos.c
index cd1ac2b24f4..9c81c5f17a1 100644
--- a/src/plugins/hs_apps/vcl/vcl_test_protos.c
+++ b/src/plugins/hs_apps/vcl/vcl_test_protos.c
@@ -14,6 +14,23 @@
*/
#include <hs_apps/vcl/vcl_test.h>
+#include <http/http.h>
+#include <http/http_header_names.h>
+#include <http/http_content_types.h>
+
+typedef enum vcl_test_http_state_
+{
+ VCL_TEST_HTTP_IDLE = 0,
+ VCL_TEST_HTTP_IN_PROGRESS,
+ VCL_TEST_HTTP_COMPLETED,
+} vcl_test_http_state_t;
+
+typedef struct vcl_test_http_ctx_t
+{
+ u8 is_server;
+ vcl_test_http_state_t test_state;
+ u64 rem_data;
+} vcl_test_http_ctx_t;
static int
vt_tcp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
@@ -978,6 +995,418 @@ static const vcl_test_proto_vft_t vcl_test_srtp = {
VCL_TEST_REGISTER_PROTO (VPPCOM_PROTO_SRTP, vcl_test_srtp);
+static void
+vt_http_session_init (vcl_test_session_t *ts, u8 is_server)
+{
+ vcl_test_http_ctx_t *http_ctx;
+
+ http_ctx = malloc (sizeof (vcl_test_http_ctx_t));
+ memset (http_ctx, 0, sizeof (*http_ctx));
+ http_ctx->is_server = is_server;
+ ts->opaque = http_ctx;
+}
+
+static inline void
+vt_http_send_reply_msg (vcl_test_session_t *ts, http_status_code_t status)
+{
+ http_msg_t msg;
+ int rv = 0;
+
+ memset (&msg, 0, sizeof (http_msg_t));
+ msg.type = HTTP_MSG_REPLY;
+ msg.code = status;
+
+ vppcom_data_segment_t segs[1] = { { (u8 *) &msg, sizeof (msg) } };
+
+ do
+ {
+ rv = vppcom_session_write_segments (ts->fd, segs, 1);
+
+ if (rv < 0)
+ {
+ errno = -rv;
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ continue;
+
+ vterr ("vppcom_session_write()", -errno);
+ break;
+ }
+ }
+ while (rv <= 0);
+}
+
+static inline int
+vt_process_http_server_read_msg (vcl_test_session_t *ts, void *buf,
+ uint32_t nbytes)
+{
+ http_msg_t msg;
+ u8 *target_path = 0;
+ vcl_test_http_ctx_t *vcl_test_http_ctx = (vcl_test_http_ctx_t *) ts->opaque;
+ vcl_test_stats_t *stats = &ts->stats;
+ int rv = 0;
+
+ do
+ {
+ stats->rx_xacts++;
+ rv = vppcom_session_read (ts->fd, buf, nbytes);
+
+ if (rv <= 0)
+ {
+ errno = -rv;
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ {
+ stats->rx_eagain++;
+ continue;
+ }
+
+ vterr ("vppcom_session_read()", -errno);
+ return 0;
+ }
+
+ if (PREDICT_TRUE (vcl_test_http_ctx->test_state ==
+ VCL_TEST_HTTP_IN_PROGRESS))
+ {
+ vcl_test_http_ctx->rem_data -= rv;
+
+ if (vcl_test_http_ctx->rem_data == 0)
+ {
+ vcl_test_http_ctx->test_state = VCL_TEST_HTTP_COMPLETED;
+ vt_http_send_reply_msg (ts, HTTP_STATUS_OK);
+ }
+ }
+ else if (PREDICT_FALSE (vcl_test_http_ctx->test_state ==
+ VCL_TEST_HTTP_IDLE))
+ {
+ msg = *(http_msg_t *) buf;
+
+ /* verify that we have received http post request from client */
+ if (msg.type != HTTP_MSG_REQUEST || msg.method_type != HTTP_REQ_POST)
+ {
+ vt_http_send_reply_msg (ts, HTTP_STATUS_METHOD_NOT_ALLOWED);
+ vterr ("error! only POST requests allowed from client", 0);
+ return 0;
+ }
+
+ if (msg.data.target_form != HTTP_TARGET_ORIGIN_FORM)
+ {
+ vt_http_send_reply_msg (ts, HTTP_STATUS_BAD_REQUEST);
+ vterr ("error! http target not in origin form", 0);
+ return 0;
+ }
+
+ /* validate target path syntax */
+ if (msg.data.target_path_len)
+ {
+ vec_validate (target_path, msg.data.target_path_len - 1);
+ memcpy (target_path,
+ buf + sizeof (msg) + msg.data.target_path_offset - 1,
+ msg.data.target_path_len + 1);
+ if (http_validate_abs_path_syntax (target_path, 0))
+ {
+ vt_http_send_reply_msg (ts, HTTP_STATUS_BAD_REQUEST);
+ vterr ("error! target path is not absolute", 0);
+ vec_free (target_path);
+ return 0;
+ }
+ vec_free (target_path);
+ }
+
+ /* read body */
+ if (msg.data.body_len)
+ {
+ vcl_test_http_ctx->rem_data = msg.data.body_len;
+ /* | <http_msg_t> | <target> | <headers> | <body> | */
+ vcl_test_http_ctx->rem_data -=
+ (rv - sizeof (msg) - msg.data.body_offset);
+ vcl_test_http_ctx->test_state = VCL_TEST_HTTP_IN_PROGRESS;
+ }
+ }
+
+ if (rv < nbytes)
+ stats->rx_incomp++;
+ }
+ while (rv <= 0);
+
+ stats->rx_bytes += rv;
+ return (rv);
+}
+
+static inline int
+vt_process_http_client_read_msg (vcl_test_session_t *ts, void *buf,
+ uint32_t nbytes)
+{
+ http_msg_t msg;
+ int rv = 0;
+
+ do
+ {
+ rv = vppcom_session_read (ts->fd, buf, nbytes);
+
+ if (rv < 0)
+ {
+ errno = -rv;
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ continue;
+
+ vterr ("vppcom_session_read()", -errno);
+ break;
+ }
+ }
+ while (!rv);
+
+ msg = *(http_msg_t *) buf;
+
+ if (msg.type == HTTP_MSG_REPLY && msg.code == HTTP_STATUS_OK)
+ vtinf ("received 200 OK from server");
+ else
+ vterr ("received unexpected reply from server", 0);
+
+ return (rv);
+}
+
+static inline int
+vt_process_http_client_write_msg (vcl_test_session_t *ts, void *buf,
+ uint32_t nbytes)
+{
+ http_msg_t msg;
+ http_header_t *req_headers = 0;
+ u8 *headers_buf = 0;
+ u8 *target;
+ vcl_test_http_ctx_t *vcl_test_http_ctx = (vcl_test_http_ctx_t *) ts->opaque;
+ vcl_test_stats_t *stats = &ts->stats;
+ int rv = 0;
+
+ if (PREDICT_TRUE (vcl_test_http_ctx->test_state ==
+ VCL_TEST_HTTP_IN_PROGRESS))
+ {
+ do
+ {
+ rv = vppcom_session_write (
+ ts->fd, buf, clib_min (nbytes, vcl_test_http_ctx->rem_data));
+
+ if (rv <= 0)
+ {
+ errno = -rv;
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ {
+ stats->tx_eagain++;
+ continue;
+ }
+
+ vterr ("vppcom_session_write()", -errno);
+ return 0;
+ }
+
+ vcl_test_http_ctx->rem_data -= rv;
+
+ if (vcl_test_http_ctx->rem_data == 0)
+ {
+ vcl_test_http_ctx->test_state = VCL_TEST_HTTP_COMPLETED;
+ vtinf ("client finished sending %ld bytes of data",
+ ts->cfg.total_bytes);
+ }
+
+ if (rv < nbytes)
+ stats->tx_incomp++;
+ }
+ while (rv <= 0);
+ }
+
+ else if (PREDICT_FALSE (vcl_test_http_ctx->test_state == VCL_TEST_HTTP_IDLE))
+ {
+ http_add_header (
+ &req_headers, http_header_name_token (HTTP_HEADER_CONTENT_TYPE),
+ http_content_type_token (HTTP_CONTENT_APP_OCTET_STREAM));
+ headers_buf = http_serialize_headers (req_headers);
+ vec_free (req_headers);
+
+ memset (&msg, 0, sizeof (http_msg_t));
+ msg.type = HTTP_MSG_REQUEST;
+ msg.method_type = HTTP_REQ_POST;
+
+ /* target */
+ msg.data.target_form = HTTP_TARGET_ORIGIN_FORM;
+ target = (u8 *) "/vcl_test_http\0";
+ msg.data.target_path_len = strlen ((char *) target);
+
+ /* headers */
+ msg.data.headers_offset = msg.data.target_path_len;
+ msg.data.headers_len = vec_len (headers_buf);
+
+ /* body */
+ msg.data.body_offset = msg.data.headers_offset + msg.data.headers_len;
+ msg.data.body_len = ts->cfg.total_bytes;
+
+ msg.data.len =
+ msg.data.target_path_len + msg.data.headers_len + msg.data.body_len;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+
+ vppcom_data_segment_t segs[3] = { { (u8 *) &msg, sizeof (msg) },
+ { target, strlen ((char *) target) },
+ { headers_buf,
+ vec_len (headers_buf) } };
+
+ do
+ {
+ rv = vppcom_session_write_segments (ts->fd, segs, 3);
+
+ if (rv <= 0)
+ {
+ errno = -rv;
+ if (errno == EAGAIN || errno == EWOULDBLOCK)
+ {
+ stats->tx_eagain++;
+ continue;
+ }
+
+ vterr ("vppcom_session_write_segments()", -errno);
+ vec_free (headers_buf);
+ return 0;
+ }
+ }
+ while (rv <= 0);
+
+ vcl_test_http_ctx->test_state = VCL_TEST_HTTP_IN_PROGRESS;
+ vcl_test_http_ctx->rem_data = ts->cfg.total_bytes;
+ vec_free (headers_buf);
+ }
+
+ stats->tx_bytes += rv;
+ return (rv);
+}
+
+static inline int
+vt_process_http_server_write_msg (vcl_test_session_t *ts, void *buf,
+ uint32_t nbytes)
+{
+ return 0;
+}
+
+static inline int
+vt_http_read (vcl_test_session_t *ts, void *buf, uint32_t nbytes)
+{
+ vcl_test_http_ctx_t *vcl_test_http_ctx = (vcl_test_http_ctx_t *) ts->opaque;
+
+ if (vcl_test_http_ctx->is_server)
+ return vt_process_http_server_read_msg (ts, buf, nbytes);
+ else
+ return vt_process_http_client_read_msg (ts, buf, nbytes);
+}
+
+static inline int
+vt_http_write (vcl_test_session_t *ts, void *buf, uint32_t nbytes)
+{
+ vcl_test_http_ctx_t *vcl_test_http_ctx = (vcl_test_http_ctx_t *) ts->opaque;
+
+ if (vcl_test_http_ctx->is_server)
+ return vt_process_http_server_write_msg (ts, buf, nbytes);
+ else
+ return vt_process_http_client_write_msg (ts, buf, nbytes);
+}
+
+static int
+vt_http_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
+{
+ uint32_t flags, flen;
+ int rv;
+
+ ts->fd = vppcom_session_create (VPPCOM_PROTO_HTTP, ts->noblk_connect);
+ if (ts->fd < 0)
+ {
+ vterr ("vppcom_session_create()", ts->fd);
+ return ts->fd;
+ }
+
+ rv = vppcom_session_connect (ts->fd, endpt);
+ if (rv < 0 && rv != VPPCOM_EINPROGRESS)
+ {
+ vterr ("vppcom_session_connect()", rv);
+ return rv;
+ }
+
+ ts->read = vt_http_read;
+ ts->write = vt_http_write;
+
+ if (!ts->noblk_connect)
+ {
+ flags = O_NONBLOCK;
+ flen = sizeof (flags);
+ vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
+ vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+ }
+
+ vt_http_session_init (ts, 0 /* is_server */);
+
+ return 0;
+}
+
+static int
+vt_http_listen (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
+{
+ int rv;
+
+ ts->fd = vppcom_session_create (VPPCOM_PROTO_HTTP, 1 /* is_nonblocking */);
+ if (ts->fd < 0)
+ {
+ vterr ("vppcom_session_create()", ts->fd);
+ return ts->fd;
+ }
+
+ rv = vppcom_session_bind (ts->fd, endpt);
+ if (rv < 0)
+ {
+ vterr ("vppcom_session_bind()", rv);
+ return rv;
+ }
+
+ rv = vppcom_session_listen (ts->fd, 10);
+ if (rv < 0)
+ {
+ vterr ("vppcom_session_listen()", rv);
+ return rv;
+ }
+
+ return 0;
+}
+
+static int
+vt_http_accept (int listen_fd, vcl_test_session_t *ts)
+{
+ int client_fd;
+
+ client_fd = vppcom_session_accept (listen_fd, &ts->endpt, 0);
+ if (client_fd < 0)
+ {
+ vterr ("vppcom_session_accept()", client_fd);
+ return client_fd;
+ }
+
+ ts->fd = client_fd;
+ ts->is_open = 1;
+ ts->read = vt_http_read;
+ ts->write = vt_http_write;
+
+ vt_http_session_init (ts, 1 /* is_server */);
+
+ return 0;
+}
+
+static int
+vt_http_close (vcl_test_session_t *ts)
+{
+ free (ts->opaque);
+ return 0;
+}
+
+static const vcl_test_proto_vft_t vcl_test_http = {
+ .open = vt_http_connect,
+ .listen = vt_http_listen,
+ .accept = vt_http_accept,
+ .close = vt_http_close,
+};
+
+VCL_TEST_REGISTER_PROTO (VPPCOM_PROTO_HTTP, vcl_test_http);
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/hs_apps/vcl/vcl_test_server.c b/src/plugins/hs_apps/vcl/vcl_test_server.c
index d17a2089ba7..008539f2585 100644
--- a/src/plugins/hs_apps/vcl/vcl_test_server.c
+++ b/src/plugins/hs_apps/vcl/vcl_test_server.c
@@ -282,11 +282,7 @@ vts_server_process_rx (vcl_test_session_t *conn, int rx_bytes)
if (conn->cfg.test == HS_TEST_TYPE_BI)
{
if (vsm->use_ds)
- {
- (void) vcl_test_write (conn, conn->ds[0].data, conn->ds[0].len);
- if (conn->ds[1].len)
- (void) vcl_test_write (conn, conn->ds[1].data, conn->ds[1].len);
- }
+ (void) vcl_test_write_ds (conn);
else
(void) vcl_test_write (conn, conn->rxbuf, rx_bytes);
}
@@ -420,36 +416,41 @@ static void
vcl_test_init_endpoint_addr (vcl_test_server_main_t * vsm)
{
struct sockaddr_storage *servaddr = &vsm->servaddr;
- memset (servaddr, 0, sizeof (*servaddr));
if (vsm->server_cfg.address_ip6)
{
struct sockaddr_in6 *server_addr = (struct sockaddr_in6 *) servaddr;
- server_addr->sin6_family = AF_INET6;
- server_addr->sin6_addr = in6addr_any;
- server_addr->sin6_port = htons (vsm->server_cfg.port);
+ vsm->server_cfg.endpt.is_ip4 = 0;
+ vsm->server_cfg.endpt.ip = (uint8_t *) &server_addr->sin6_addr;
+ vsm->server_cfg.endpt.port = htons (vsm->server_cfg.port);
}
else
{
struct sockaddr_in *server_addr = (struct sockaddr_in *) servaddr;
- server_addr->sin_family = AF_INET;
- server_addr->sin_addr.s_addr = htonl (INADDR_ANY);
- server_addr->sin_port = htons (vsm->server_cfg.port);
+ vsm->server_cfg.endpt.is_ip4 = 1;
+ vsm->server_cfg.endpt.ip = (uint8_t *) &server_addr->sin_addr;
+ vsm->server_cfg.endpt.port = htons (vsm->server_cfg.port);
}
+}
+
+static void
+vcl_test_clear_endpoint_addr (vcl_test_server_main_t *vsm)
+{
+ struct sockaddr_storage *servaddr = &vsm->servaddr;
+
+ memset (&vsm->servaddr, 0, sizeof (vsm->servaddr));
if (vsm->server_cfg.address_ip6)
{
struct sockaddr_in6 *server_addr = (struct sockaddr_in6 *) servaddr;
- vsm->server_cfg.endpt.is_ip4 = 0;
- vsm->server_cfg.endpt.ip = (uint8_t *) &server_addr->sin6_addr;
- vsm->server_cfg.endpt.port = (uint16_t) server_addr->sin6_port;
+ server_addr->sin6_family = AF_INET6;
+ server_addr->sin6_addr = in6addr_any;
}
else
{
struct sockaddr_in *server_addr = (struct sockaddr_in *) servaddr;
- vsm->server_cfg.endpt.is_ip4 = 1;
- vsm->server_cfg.endpt.ip = (uint8_t *) &server_addr->sin_addr;
- vsm->server_cfg.endpt.port = (uint16_t) server_addr->sin_port;
+ server_addr->sin_family = AF_INET;
+ server_addr->sin_addr.s_addr = htonl (INADDR_ANY);
}
}
@@ -460,9 +461,10 @@ vcl_test_server_process_opts (vcl_test_server_main_t * vsm, int argc,
int v, c;
vsm->server_cfg.proto = VPPCOM_PROTO_TCP;
+ vcl_test_clear_endpoint_addr (vsm);
opterr = 0;
- while ((c = getopt (argc, argv, "6DLsw:hp:S")) != -1)
+ while ((c = getopt (argc, argv, "6DLsw:hp:SB:")) != -1)
switch (c)
{
case '6':
@@ -473,7 +475,22 @@ vcl_test_server_process_opts (vcl_test_server_main_t * vsm, int argc,
if (vppcom_unformat_proto (&vsm->server_cfg.proto, optarg))
vtwrn ("Invalid vppcom protocol %s, defaulting to TCP", optarg);
break;
-
+ case 'B':
+ if (vsm->server_cfg.address_ip6)
+ {
+ if (inet_pton (
+ AF_INET6, optarg,
+ &((struct sockaddr_in6 *) &vsm->servaddr)->sin6_addr) != 1)
+ vtwrn ("couldn't parse ipv6 addr %s", optarg);
+ }
+ else
+ {
+ if (inet_pton (
+ AF_INET, optarg,
+ &((struct sockaddr_in *) &vsm->servaddr)->sin_addr) != 1)
+ vtwrn ("couldn't parse ipv4 addr %s", optarg);
+ }
+ break;
case 'D':
vsm->server_cfg.proto = VPPCOM_PROTO_UDP;
break;
diff --git a/src/plugins/http/CMakeLists.txt b/src/plugins/http/CMakeLists.txt
index d9cd84a3955..075b8d6817b 100644
--- a/src/plugins/http/CMakeLists.txt
+++ b/src/plugins/http/CMakeLists.txt
@@ -17,3 +17,8 @@ add_vpp_plugin(http
http_buffer.c
http_timer.c
)
+
+add_vpp_plugin(http_unittest
+ SOURCES
+ test/http_test.c
+)
diff --git a/src/plugins/http/http.c b/src/plugins/http/http.c
index 0fa113c8155..6659de9689f 100644
--- a/src/plugins/http/http.c
+++ b/src/plugins/http/http.c
@@ -16,11 +16,11 @@
#include <http/http.h>
#include <vnet/session/session.h>
#include <http/http_timer.h>
+#include <http/http_status_codes.h>
static http_main_t http_main;
#define HTTP_FIFO_THRESH (16 << 10)
-#define CONTENT_LEN_STR "Content-Length: "
/* HTTP state machine result */
typedef enum http_sm_result_t_
@@ -30,24 +30,12 @@ typedef enum http_sm_result_t_
HTTP_SM_ERROR = -1,
} http_sm_result_t;
-const char *http_status_code_str[] = {
-#define _(c, s, str) str,
- foreach_http_status_code
-#undef _
-};
-
-const char *http_content_type_str[] = {
-#define _(s, ext, str) str,
- foreach_http_content_type
-#undef _
-};
-
const http_buffer_type_t msg_to_buf_type[] = {
[HTTP_MSG_DATA_INLINE] = HTTP_BUFFER_FIFO,
[HTTP_MSG_DATA_PTR] = HTTP_BUFFER_PTR,
};
-u8 *
+static u8 *
format_http_state (u8 *s, va_list *va)
{
http_state_t state = va_arg (*va, http_state_t);
@@ -83,6 +71,24 @@ format_http_state (u8 *s, va_list *va)
} \
while (0)
+static inline int
+http_state_is_tx_valid (http_conn_t *hc)
+{
+ http_state_t state = hc->http_state;
+ return (state == HTTP_STATE_APP_IO_MORE_DATA ||
+ state == HTTP_STATE_WAIT_APP_REPLY ||
+ state == HTTP_STATE_WAIT_APP_METHOD);
+}
+
+static inline int
+http_state_is_rx_valid (http_conn_t *hc)
+{
+ http_state_t state = hc->http_state;
+ return (state == HTTP_STATE_WAIT_SERVER_REPLY ||
+ state == HTTP_STATE_CLIENT_IO_MORE_DATA ||
+ state == HTTP_STATE_WAIT_CLIENT_METHOD);
+}
+
static inline http_worker_t *
http_worker_get (u32 thread_index)
{
@@ -111,6 +117,15 @@ http_conn_get_w_thread (u32 hc_index, u32 thread_index)
return pool_elt_at_index (wrk->conn_pool, hc_index);
}
+static inline http_conn_t *
+http_conn_get_w_thread_if_valid (u32 hc_index, u32 thread_index)
+{
+ http_worker_t *wrk = http_worker_get (thread_index);
+ if (pool_is_free_index (wrk->conn_pool, hc_index))
+ return 0;
+ return pool_elt_at_index (wrk->conn_pool, hc_index);
+}
+
void
http_conn_free (http_conn_t *hc)
{
@@ -118,6 +133,35 @@ http_conn_free (http_conn_t *hc)
pool_put (wrk->conn_pool, hc);
}
+static inline http_conn_t *
+http_ho_conn_get (u32 ho_hc_index)
+{
+ http_main_t *hm = &http_main;
+ return pool_elt_at_index (hm->ho_conn_pool, ho_hc_index);
+}
+
+void
+http_ho_conn_free (http_conn_t *ho_hc)
+{
+ http_main_t *hm = &http_main;
+ pool_put (hm->ho_conn_pool, ho_hc);
+}
+
+static inline u32
+http_ho_conn_alloc (void)
+{
+ http_main_t *hm = &http_main;
+ http_conn_t *hc;
+
+ pool_get_aligned_safe (hm->ho_conn_pool, hc, CLIB_CACHE_LINE_BYTES);
+ clib_memset (hc, 0, sizeof (*hc));
+ hc->h_hc_index = hc - hm->ho_conn_pool;
+ hc->h_pa_session_handle = SESSION_INVALID_HANDLE;
+ hc->h_tc_session_handle = SESSION_INVALID_HANDLE;
+ hc->timeout = HTTP_CONN_TIMEOUT;
+ return hc->h_hc_index;
+}
+
static u32
http_listener_alloc (void)
{
@@ -126,6 +170,7 @@ http_listener_alloc (void)
pool_get_zero (hm->listener_pool, lhc);
lhc->c_c_index = lhc - hm->listener_pool;
+ lhc->timeout = HTTP_CONN_TIMEOUT;
return lhc->c_c_index;
}
@@ -140,6 +185,7 @@ http_listener_free (http_conn_t *lhc)
{
http_main_t *hm = &http_main;
+ vec_free (lhc->app_name);
if (CLIB_DEBUG)
memset (lhc, 0xfc, sizeof (*lhc));
pool_put (hm->listener_pool, lhc);
@@ -160,20 +206,47 @@ http_disconnect_transport (http_conn_t *hc)
}
static void
+http_conn_invalidate_timer_cb (u32 hs_handle)
+{
+ http_conn_t *hc;
+
+ hc =
+ http_conn_get_w_thread_if_valid (hs_handle & 0x00FFFFFF, hs_handle >> 24);
+
+ HTTP_DBG (1, "hc [%u]%x", hs_handle >> 24, hs_handle & 0x00FFFFFF);
+ if (!hc)
+ {
+ HTTP_DBG (1, "already deleted");
+ return;
+ }
+
+ hc->timer_handle = HTTP_TIMER_HANDLE_INVALID;
+ hc->pending_timer = 1;
+}
+
+static void
http_conn_timeout_cb (void *hc_handlep)
{
http_conn_t *hc;
uword hs_handle;
hs_handle = pointer_to_uword (hc_handlep);
- hc = http_conn_get_w_thread (hs_handle & 0x00FFFFFF, hs_handle >> 24);
+ hc =
+ http_conn_get_w_thread_if_valid (hs_handle & 0x00FFFFFF, hs_handle >> 24);
- HTTP_DBG (1, "terminate thread %d index %d hs %llx", hs_handle >> 24,
- hs_handle & 0x00FFFFFF, hc);
+ HTTP_DBG (1, "hc [%u]%x", hs_handle >> 24, hs_handle & 0x00FFFFFF);
if (!hc)
- return;
+ {
+ HTTP_DBG (1, "already deleted");
+ return;
+ }
+
+ if (!hc->pending_timer)
+ {
+ HTTP_DBG (1, "timer not pending");
+ return;
+ }
- hc->timer_handle = ~0;
session_transport_closing_notify (&hc->connection);
http_disconnect_transport (hc);
}
@@ -193,6 +266,7 @@ http_ts_accept_callback (session_t *ts)
hc_index = http_conn_alloc_w_thread (ts->thread_index);
hc = http_conn_get_w_thread (hc_index, ts->thread_index);
clib_memcpy_fast (hc, lhc, sizeof (*lhc));
+ hc->timer_handle = HTTP_TIMER_HANDLE_INVALID;
hc->c_thread_index = ts->thread_index;
hc->h_hc_index = hc_index;
@@ -225,6 +299,7 @@ http_ts_accept_callback (session_t *ts)
if ((rv = app_worker_init_accepted (as)))
{
HTTP_DBG (1, "failed to allocate fifos");
+ hc->h_pa_session_handle = SESSION_INVALID_HANDLE;
session_free (as);
return rv;
}
@@ -266,20 +341,25 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts,
app_worker_t *app_wrk;
int rv;
+ ho_hc = http_ho_conn_get (ho_hc_index);
+ ASSERT (ho_hc->state == HTTP_CONN_STATE_CONNECTING);
+
if (err)
{
- clib_warning ("ERROR: %d", err);
+ clib_warning ("half-open hc index %d, error: %U", ho_hc_index,
+ format_session_error, err);
+ app_wrk = app_worker_get_if_valid (ho_hc->h_pa_wrk_index);
+ if (app_wrk)
+ app_worker_connect_notify (app_wrk, 0, err, ho_hc->h_pa_app_api_ctx);
return 0;
}
new_hc_index = http_conn_alloc_w_thread (ts->thread_index);
hc = http_conn_get_w_thread (new_hc_index, ts->thread_index);
- ho_hc = http_conn_get_w_thread (ho_hc_index, 0);
-
- ASSERT (ho_hc->state == HTTP_CONN_STATE_CONNECTING);
clib_memcpy_fast (hc, ho_hc, sizeof (*hc));
+ hc->timer_handle = HTTP_TIMER_HANDLE_INVALID;
hc->c_thread_index = ts->thread_index;
hc->h_tc_session_handle = session_handle (ts);
hc->c_c_index = new_hc_index;
@@ -301,8 +381,8 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts,
as->session_type = session_type_from_proto_and_ip (
TRANSPORT_PROTO_HTTP, session_type_is_ip4 (ts->session_type));
- HTTP_DBG (1, "half-open hc index %d, hc index %d", ho_hc_index,
- new_hc_index);
+ HTTP_DBG (1, "half-open hc index %x, hc [%u]%x", ho_hc_index,
+ ts->thread_index, new_hc_index);
app_wrk = app_worker_get (hc->h_pa_wrk_index);
if (!app_wrk)
@@ -359,47 +439,54 @@ http_ts_reset_callback (session_t *ts)
*/
static const char *http_error_template = "HTTP/1.1 %s\r\n"
"Date: %U GMT\r\n"
- "Content-Type: text/html\r\n"
"Connection: close\r\n"
- "Pragma: no-cache\r\n"
"Content-Length: 0\r\n\r\n";
-static const char *http_redirect_template = "HTTP/1.1 %s\r\n";
-
/**
* http response boilerplate
*/
static const char *http_response_template = "HTTP/1.1 %s\r\n"
"Date: %U GMT\r\n"
- "Expires: %U GMT\r\n"
- "Server: VPP Static\r\n"
- "Content-Type: %s\r\n"
- "Content-Length: %lu\r\n\r\n";
+ "Server: %v\r\n";
+
+static const char *content_len_template = "Content-Length: %llu\r\n";
+
+/**
+ * http request boilerplate
+ */
+static const char *http_get_request_template = "GET %s HTTP/1.1\r\n"
+ "Host: %v\r\n"
+ "User-Agent: %v\r\n"
+ "%s";
-static const char *http_request_template = "GET %s HTTP/1.1\r\n"
- "User-Agent: VPP HTTP client\r\n"
- "Accept: */*\r\n";
+static const char *http_post_request_template = "POST %s HTTP/1.1\r\n"
+ "Host: %v\r\n"
+ "User-Agent: %v\r\n"
+ "Content-Length: %llu\r\n"
+ "%s";
static u32
-http_send_data (http_conn_t *hc, u8 *data, u32 length, u32 offset)
+http_send_data (http_conn_t *hc, u8 *data, u32 length)
{
const u32 max_burst = 64 << 10;
session_t *ts;
u32 to_send;
- int sent;
+ int rv;
ts = session_get_from_handle (hc->h_tc_session_handle);
- to_send = clib_min (length - offset, max_burst);
- sent = svm_fifo_enqueue (ts->tx_fifo, to_send, data + offset);
-
- if (sent <= 0)
- return offset;
+ to_send = clib_min (length, max_burst);
+ rv = svm_fifo_enqueue (ts->tx_fifo, to_send, data);
+ if (rv <= 0)
+ {
+ clib_warning ("svm_fifo_enqueue failed, rv %d", rv);
+ return 0;
+ }
if (svm_fifo_set_event (ts->tx_fifo))
- session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+ session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
- return (offset + sent);
+ return rv;
}
static void
@@ -415,37 +502,70 @@ http_send_error (http_conn_t *hc, http_status_code_t ec)
now = clib_timebase_now (&hm->timebase);
data = format (0, http_error_template, http_status_code_str[ec],
format_clib_timebase_time, now);
- http_send_data (hc, data, vec_len (data), 0);
+ HTTP_DBG (3, "%v", data);
+ http_send_data (hc, data, vec_len (data));
vec_free (data);
}
static int
http_read_message (http_conn_t *hc)
{
- u32 max_deq, cursize;
+ u32 max_deq;
session_t *ts;
int n_read;
ts = session_get_from_handle (hc->h_tc_session_handle);
- cursize = vec_len (hc->rx_buf);
max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
if (PREDICT_FALSE (max_deq == 0))
return -1;
- vec_validate (hc->rx_buf, cursize + max_deq - 1);
- n_read = svm_fifo_dequeue (ts->rx_fifo, max_deq, hc->rx_buf + cursize);
+ vec_validate (hc->rx_buf, max_deq - 1);
+ n_read = svm_fifo_peek (ts->rx_fifo, 0, max_deq, hc->rx_buf);
ASSERT (n_read == max_deq);
+ HTTP_DBG (1, "read %u bytes from rx_fifo", n_read);
+
+ return 0;
+}
+
+static void
+http_read_message_drop (http_conn_t *hc, u32 len)
+{
+ session_t *ts;
+
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+ svm_fifo_dequeue_drop (ts->rx_fifo, len);
+ vec_reset_length (hc->rx_buf);
if (svm_fifo_is_empty (ts->rx_fifo))
svm_fifo_unset_event (ts->rx_fifo);
+}
- vec_set_len (hc->rx_buf, cursize + n_read);
- return 0;
+static void
+http_read_message_drop_all (http_conn_t *hc)
+{
+ session_t *ts;
+
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+ svm_fifo_dequeue_drop_all (ts->rx_fifo);
+ vec_reset_length (hc->rx_buf);
+
+ if (svm_fifo_is_empty (ts->rx_fifo))
+ svm_fifo_unset_event (ts->rx_fifo);
}
-static int
-v_find_index (u8 *vec, u32 offset, char *str)
+/**
+ * @brief Find the first occurrence of the string in the vector.
+ *
+ * @param vec The vector to be scanned.
+ * @param offset Search offset in the vector.
+ * @param num Maximum number of characters to be searched if non-zero.
+ * @param str The string to be searched.
+ *
+ * @return @c -1 if the string is not found within the vector; index otherwise.
+ */
+static inline int
+v_find_index (u8 *vec, u32 offset, u32 num, char *str)
{
int start_index = offset;
u32 slen = (u32) strnlen_s_inline (str, 16);
@@ -456,7 +576,15 @@ v_find_index (u8 *vec, u32 offset, char *str)
if (vlen <= slen)
return -1;
- for (; start_index < (vlen - slen); start_index++)
+ int end_index = vlen - slen;
+ if (num)
+ {
+ if (num < slen)
+ return -1;
+ end_index = clib_min (end_index, offset + num - slen);
+ }
+
+ for (; start_index <= end_index; start_index++)
{
if (!memcmp (vec + start_index, str, slen))
return start_index;
@@ -465,50 +593,445 @@ v_find_index (u8 *vec, u32 offset, char *str)
return -1;
}
+static void
+http_identify_optional_query (http_conn_t *hc)
+{
+ int i;
+ for (i = hc->target_path_offset;
+ i < (hc->target_path_offset + hc->target_path_len); i++)
+ {
+ if (hc->rx_buf[i] == '?')
+ {
+ hc->target_query_offset = i + 1;
+ hc->target_query_len = hc->target_path_offset + hc->target_path_len -
+ hc->target_query_offset;
+ hc->target_path_len = hc->target_path_len - hc->target_query_len - 1;
+ break;
+ }
+ }
+}
+
+static int
+http_get_target_form (http_conn_t *hc)
+{
+ int i;
+
+ /* "*" */
+ if ((hc->rx_buf[hc->target_path_offset] == '*') &&
+ (hc->target_path_len == 1))
+ {
+ hc->target_form = HTTP_TARGET_ASTERISK_FORM;
+ return 0;
+ }
+
+ /* 1*( "/" segment ) [ "?" query ] */
+ if (hc->rx_buf[hc->target_path_offset] == '/')
+ {
+ /* drop leading slash */
+ hc->target_path_len--;
+ hc->target_path_offset++;
+ hc->target_form = HTTP_TARGET_ORIGIN_FORM;
+ http_identify_optional_query (hc);
+ return 0;
+ }
+
+ /* scheme "://" host [ ":" port ] *( "/" segment ) [ "?" query ] */
+ i = v_find_index (hc->rx_buf, hc->target_path_offset, hc->target_path_len,
+ "://");
+ if (i > 0)
+ {
+ hc->target_form = HTTP_TARGET_ABSOLUTE_FORM;
+ http_identify_optional_query (hc);
+ return 0;
+ }
+
+ /* host ":" port */
+ for (i = hc->target_path_offset;
+ i < (hc->target_path_offset + hc->target_path_len); i++)
+ {
+ if ((hc->rx_buf[i] == ':') && (isdigit (hc->rx_buf[i + 1])))
+ {
+ hc->target_form = HTTP_TARGET_AUTHORITY_FORM;
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
static int
-http_parse_header (http_conn_t *hc, int *content_length)
+http_parse_request_line (http_conn_t *hc, http_status_code_t *ec)
{
- unformat_input_t input;
- int i, len;
- u8 *line;
+ int i, target_len;
+ u32 next_line_offset, method_offset;
+
+ /* request-line = method SP request-target SP HTTP-version CRLF */
+ i = v_find_index (hc->rx_buf, 8, 0, "\r\n");
+ if (i < 0)
+ {
+ clib_warning ("request line incomplete");
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+ HTTP_DBG (2, "request line length: %d", i);
+ hc->control_data_len = i + 2;
+ next_line_offset = hc->control_data_len;
+
+ /* there should be at least one more CRLF */
+ if (vec_len (hc->rx_buf) < (next_line_offset + 2))
+ {
+ clib_warning ("malformed message, too short");
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
- i = v_find_index (hc->rx_buf, hc->rx_buf_offset, CONTENT_LEN_STR);
+ /*
+ * RFC9112 2.2:
+ * In the interest of robustness, a server that is expecting to receive and
+ * parse a request-line SHOULD ignore at least one empty line (CRLF)
+ * received prior to the request-line.
+ */
+ method_offset = hc->rx_buf[0] == '\r' && hc->rx_buf[1] == '\n' ? 2 : 0;
+ /* parse method */
+ if (!memcmp (hc->rx_buf + method_offset, "GET ", 4))
+ {
+ HTTP_DBG (0, "GET method");
+ hc->method = HTTP_REQ_GET;
+ hc->target_path_offset = method_offset + 4;
+ }
+ else if (!memcmp (hc->rx_buf + method_offset, "POST ", 5))
+ {
+ HTTP_DBG (0, "POST method");
+ hc->method = HTTP_REQ_POST;
+ hc->target_path_offset = method_offset + 5;
+ }
+ else if (!memcmp (hc->rx_buf + method_offset, "CONNECT ", 8))
+ {
+ HTTP_DBG (0, "CONNECT method");
+ hc->method = HTTP_REQ_CONNECT;
+ hc->target_path_offset = method_offset + 8;
+ hc->is_tunnel = 1;
+ }
+ else
+ {
+ if (hc->rx_buf[method_offset] - 'A' <= 'Z' - 'A')
+ {
+ clib_warning ("method not implemented: %8v", hc->rx_buf);
+ *ec = HTTP_STATUS_NOT_IMPLEMENTED;
+ return -1;
+ }
+ else
+ {
+ clib_warning ("not method name: %8v", hc->rx_buf);
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+ }
+
+ /* find version */
+ i = v_find_index (hc->rx_buf, next_line_offset - 11, 11, " HTTP/");
if (i < 0)
{
- clib_warning ("cannot find '%s' in the header!", CONTENT_LEN_STR);
+ clib_warning ("HTTP version not present");
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+ /* verify major version */
+ if (isdigit (hc->rx_buf[i + 6]))
+ {
+ if (hc->rx_buf[i + 6] != '1')
+ {
+ clib_warning ("HTTP major version '%c' not supported",
+ hc->rx_buf[i + 6]);
+ *ec = HTTP_STATUS_HTTP_VERSION_NOT_SUPPORTED;
+ return -1;
+ }
+ }
+ else
+ {
+ clib_warning ("HTTP major version '%c' is not digit", hc->rx_buf[i + 6]);
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+
+ /* parse request-target */
+ HTTP_DBG (2, "http at %d", i);
+ target_len = i - hc->target_path_offset;
+ HTTP_DBG (2, "target_len %d", target_len);
+ if (target_len < 1)
+ {
+ clib_warning ("request-target not present");
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+ hc->target_path_len = target_len;
+ hc->target_query_offset = 0;
+ hc->target_query_len = 0;
+ if (http_get_target_form (hc))
+ {
+ clib_warning ("invalid target");
+ *ec = HTTP_STATUS_BAD_REQUEST;
return -1;
}
+ HTTP_DBG (2, "request-target path length: %u", hc->target_path_len);
+ HTTP_DBG (2, "request-target path offset: %u", hc->target_path_offset);
+ HTTP_DBG (2, "request-target query length: %u", hc->target_query_len);
+ HTTP_DBG (2, "request-target query offset: %u", hc->target_query_offset);
+
+ /* set buffer offset to nex line start */
+ hc->rx_buf_offset = next_line_offset;
+
+ return 0;
+}
+
+#define expect_char(c) \
+ if (*p++ != c) \
+ { \
+ clib_warning ("unexpected character"); \
+ return -1; \
+ }
- hc->rx_buf_offset = i;
+#define parse_int(val, mul) \
+ do \
+ { \
+ if (!isdigit (*p)) \
+ { \
+ clib_warning ("expected digit"); \
+ return -1; \
+ } \
+ val += mul * (*p++ - '0'); \
+ } \
+ while (0)
- i = v_find_index (hc->rx_buf, hc->rx_buf_offset, "\n");
+static int
+http_parse_status_line (http_conn_t *hc)
+{
+ int i;
+ u32 next_line_offset;
+ u8 *p, *end;
+ u16 status_code = 0;
+
+ i = v_find_index (hc->rx_buf, 0, 0, "\r\n");
+ /* status-line = HTTP-version SP status-code SP [ reason-phrase ] CRLF */
if (i < 0)
{
- clib_warning ("end of line missing; incomplete data");
+ clib_warning ("status line incomplete");
+ return -1;
+ }
+ HTTP_DBG (2, "status line length: %d", i);
+ if (i < 12)
+ {
+ clib_warning ("status line too short (%d)", i);
+ return -1;
+ }
+ hc->control_data_len = i + 2;
+ next_line_offset = hc->control_data_len;
+ p = hc->rx_buf;
+ end = hc->rx_buf + i;
+
+ /* there should be at least one more CRLF */
+ if (vec_len (hc->rx_buf) < (next_line_offset + 2))
+ {
+ clib_warning ("malformed message, too short");
+ return -1;
+ }
+
+ /* parse version */
+ expect_char ('H');
+ expect_char ('T');
+ expect_char ('T');
+ expect_char ('P');
+ expect_char ('/');
+ expect_char ('1');
+ expect_char ('.');
+ if (!isdigit (*p++))
+ {
+ clib_warning ("invalid HTTP minor version");
+ return -1;
+ }
+
+ /* skip space(s) */
+ if (*p != ' ')
+ {
+ clib_warning ("no space after HTTP version");
+ return -1;
+ }
+ do
+ {
+ p++;
+ if (p == end)
+ {
+ clib_warning ("no status code");
+ return -1;
+ }
+ }
+ while (*p == ' ');
+
+ /* parse status code */
+ if ((end - p) < 3)
+ {
+ clib_warning ("not enough characters for status code");
return -1;
}
+ parse_int (status_code, 100);
+ parse_int (status_code, 10);
+ parse_int (status_code, 1);
+ if (status_code < 100 || status_code > 599)
+ {
+ clib_warning ("invalid status code %d", status_code);
+ return -1;
+ }
+ hc->status_code = status_code;
+ HTTP_DBG (0, "status code: %d", hc->status_code);
- len = i - hc->rx_buf_offset;
- line = vec_new (u8, len);
- clib_memcpy (line, hc->rx_buf + hc->rx_buf_offset, len);
+ /* set buffer offset to nex line start */
+ hc->rx_buf_offset = next_line_offset;
+
+ return 0;
+}
+
+static int
+http_identify_headers (http_conn_t *hc, http_status_code_t *ec)
+{
+ int i;
- unformat_init_vector (&input, line);
- if (!unformat (&input, CONTENT_LEN_STR "%d", content_length))
+ /* check if we have any header */
+ if ((hc->rx_buf[hc->rx_buf_offset] == '\r') &&
+ (hc->rx_buf[hc->rx_buf_offset + 1] == '\n'))
{
- clib_warning ("failed to unformat content length!");
+ /* just another CRLF -> no headers */
+ HTTP_DBG (2, "no headers");
+ hc->headers_len = 0;
+ hc->control_data_len += 2;
+ return 0;
+ }
+
+ /* find empty line indicating end of header section */
+ i = v_find_index (hc->rx_buf, hc->rx_buf_offset, 0, "\r\n\r\n");
+ if (i < 0)
+ {
+ clib_warning ("cannot find header section end");
+ *ec = HTTP_STATUS_BAD_REQUEST;
return -1;
}
- unformat_free (&input);
+ hc->headers_offset = hc->rx_buf_offset;
+ hc->headers_len = i - hc->rx_buf_offset + 2;
+ hc->control_data_len += (hc->headers_len + 2);
+ HTTP_DBG (2, "headers length: %u", hc->headers_len);
+ HTTP_DBG (2, "headers offset: %u", hc->headers_offset);
+
+ return 0;
+}
+
+static int
+http_identify_message_body (http_conn_t *hc, http_status_code_t *ec)
+{
+ int i, value_len;
+ u8 *end, *p, *value_start;
+ u64 body_len = 0, digit;
+
+ hc->body_len = 0;
+
+ if (hc->headers_len == 0)
+ {
+ HTTP_DBG (2, "no header, no message-body");
+ return 0;
+ }
+ if (hc->is_tunnel)
+ {
+ HTTP_DBG (2, "tunnel, no message-body");
+ return 0;
+ }
+
+ /* TODO check for chunked transfer coding */
- /* skip rest of the header */
- hc->rx_buf_offset += len;
- i = v_find_index (hc->rx_buf, hc->rx_buf_offset, "<html>");
+ /* try to find Content-Length header */
+ i = v_find_index (hc->rx_buf, hc->headers_offset, hc->headers_len,
+ "Content-Length:");
if (i < 0)
{
- clib_warning ("<html> tag not found");
+ HTTP_DBG (2, "Content-Length header not present, no message-body");
+ return 0;
+ }
+ hc->rx_buf_offset = i + 15;
+
+ i = v_find_index (hc->rx_buf, hc->rx_buf_offset, hc->headers_len, "\r\n");
+ if (i < 0)
+ {
+ clib_warning ("end of line missing");
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+ value_len = i - hc->rx_buf_offset;
+ if (value_len < 1)
+ {
+ clib_warning ("invalid header, content length value missing");
+ *ec = HTTP_STATUS_BAD_REQUEST;
return -1;
}
- hc->rx_buf_offset = i;
+
+ end = hc->rx_buf + hc->rx_buf_offset + value_len;
+ p = hc->rx_buf + hc->rx_buf_offset;
+ /* skip leading whitespace */
+ while (1)
+ {
+ if (p == end)
+ {
+ clib_warning ("value not found");
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+ else if (*p != ' ' && *p != '\t')
+ {
+ break;
+ }
+ p++;
+ value_len--;
+ }
+ value_start = p;
+ /* skip trailing whitespace */
+ p = value_start + value_len - 1;
+ while (*p == ' ' || *p == '\t')
+ {
+ p--;
+ value_len--;
+ }
+
+ if (value_len < 1)
+ {
+ clib_warning ("value not found");
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+
+ p = value_start;
+ for (i = 0; i < value_len; i++)
+ {
+ /* check for digit */
+ if (!isdigit (*p))
+ {
+ clib_warning ("expected digit");
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+ digit = *p - '0';
+ u64 new_body_len = body_len * 10 + digit;
+ /* check for overflow */
+ if (new_body_len < body_len)
+ {
+ clib_warning ("too big number, overflow");
+ *ec = HTTP_STATUS_BAD_REQUEST;
+ return -1;
+ }
+ body_len = new_body_len;
+ p++;
+ }
+
+ hc->body_len = body_len;
+
+ hc->body_offset = hc->headers_offset + hc->headers_len + 2;
+ HTTP_DBG (2, "body length: %llu", hc->body_len);
+ HTTP_DBG (2, "body offset: %u", hc->body_offset);
return 0;
}
@@ -516,92 +1039,95 @@ http_parse_header (http_conn_t *hc, int *content_length)
static http_sm_result_t
http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp)
{
- int i, rv, content_length;
+ int rv;
http_msg_t msg = {};
app_worker_t *app_wrk;
session_t *as;
+ u32 len, max_enq, body_sent;
http_status_code_t ec;
+ http_main_t *hm = &http_main;
rv = http_read_message (hc);
/* Nothing yet, wait for data or timer expire */
if (rv)
- return HTTP_SM_STOP;
+ {
+ HTTP_DBG (1, "no data to deq");
+ return HTTP_SM_STOP;
+ }
+
+ HTTP_DBG (3, "%v", hc->rx_buf);
if (vec_len (hc->rx_buf) < 8)
{
- ec = HTTP_STATUS_BAD_REQUEST;
+ clib_warning ("response buffer too short");
goto error;
}
- if ((i = v_find_index (hc->rx_buf, 0, "200 OK")) >= 0)
- {
- msg.type = HTTP_MSG_REPLY;
- msg.content_type = HTTP_CONTENT_TEXT_HTML;
- msg.code = HTTP_STATUS_OK;
- msg.data.type = HTTP_MSG_DATA_INLINE;
- msg.data.len = 0;
+ rv = http_parse_status_line (hc);
+ if (rv)
+ goto error;
- rv = http_parse_header (hc, &content_length);
- if (rv)
- {
- clib_warning ("failed to parse http reply");
- session_transport_closing_notify (&hc->connection);
- http_disconnect_transport (hc);
- return -1;
- }
- msg.data.len = content_length;
- u32 dlen = vec_len (hc->rx_buf) - hc->rx_buf_offset;
- as = session_get_from_handle (hc->h_pa_session_handle);
- svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) },
- { &hc->rx_buf[hc->rx_buf_offset], dlen } };
-
- rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2,
- 0 /* allow partial */);
- if (rv < 0)
- {
- clib_warning ("error enqueue");
- return HTTP_SM_ERROR;
- }
+ rv = http_identify_headers (hc, &ec);
+ if (rv)
+ goto error;
- hc->rx_buf_offset += dlen;
- hc->to_recv = content_length - dlen;
+ rv = http_identify_message_body (hc, &ec);
+ if (rv)
+ goto error;
- if (hc->rx_buf_offset == vec_len (hc->rx_buf))
- {
- vec_reset_length (hc->rx_buf);
- hc->rx_buf_offset = 0;
- }
+ /* send at least "control data" which is necessary minimum,
+ * if there is some space send also portion of body */
+ as = session_get_from_handle (hc->h_pa_session_handle);
+ max_enq = svm_fifo_max_enqueue (as->rx_fifo);
+ max_enq -= sizeof (msg);
+ if (max_enq < hc->control_data_len)
+ {
+ clib_warning ("not enough room for control data in app's rx fifo");
+ goto error;
+ }
+ len = clib_min (max_enq, vec_len (hc->rx_buf));
+
+ msg.type = HTTP_MSG_REPLY;
+ msg.code = hm->sc_by_u16[hc->status_code];
+ msg.data.headers_offset = hc->headers_offset;
+ msg.data.headers_len = hc->headers_len;
+ msg.data.body_offset = hc->body_offset;
+ msg.data.body_len = hc->body_len;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.len = len;
- if (hc->to_recv == 0)
- {
- hc->rx_buf_offset = 0;
- vec_reset_length (hc->rx_buf);
- http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD);
- }
- else
- {
- http_state_change (hc, HTTP_STATE_CLIENT_IO_MORE_DATA);
- }
+ svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) },
+ { hc->rx_buf, len } };
- app_wrk = app_worker_get_if_valid (as->app_wrk_index);
- if (app_wrk)
- app_worker_rx_notify (app_wrk, as);
- return HTTP_SM_STOP;
+ rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0 /* allow partial */);
+ ASSERT (rv == (sizeof (msg) + len));
+
+ http_read_message_drop (hc, len);
+
+ body_sent = len - hc->control_data_len;
+ hc->to_recv = hc->body_len - body_sent;
+ if (hc->to_recv == 0)
+ {
+ /* all sent, we are done */
+ http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD);
}
else
{
- HTTP_DBG (0, "Unknown http method %v", hc->rx_buf);
- ec = HTTP_STATUS_METHOD_NOT_ALLOWED;
- goto error;
+ /* stream rest of the response body */
+ http_state_change (hc, HTTP_STATE_CLIENT_IO_MORE_DATA);
}
-error:
+ app_wrk = app_worker_get_if_valid (as->app_wrk_index);
+ if (app_wrk)
+ app_worker_rx_notify (app_wrk, as);
+ return HTTP_SM_STOP;
- http_send_error (hc, ec);
+error:
+ http_read_message_drop_all (hc);
session_transport_closing_notify (&hc->connection);
+ session_transport_closed_notify (&hc->connection);
http_disconnect_transport (hc);
-
return HTTP_SM_ERROR;
}
@@ -612,9 +1138,9 @@ http_state_wait_client_method (http_conn_t *hc, transport_send_params_t *sp)
app_worker_t *app_wrk;
http_msg_t msg;
session_t *as;
- int i, rv;
- u32 len;
- u8 *buf;
+ int rv;
+ u32 len, max_enq, body_sent;
+ u64 max_deq;
rv = http_read_message (hc);
@@ -622,64 +1148,76 @@ http_state_wait_client_method (http_conn_t *hc, transport_send_params_t *sp)
if (rv)
return HTTP_SM_STOP;
+ HTTP_DBG (3, "%v", hc->rx_buf);
+
if (vec_len (hc->rx_buf) < 8)
{
ec = HTTP_STATUS_BAD_REQUEST;
goto error;
}
- if ((i = v_find_index (hc->rx_buf, 0, "GET ")) >= 0)
- {
- hc->method = HTTP_REQ_GET;
- hc->rx_buf_offset = i + 5;
+ rv = http_parse_request_line (hc, &ec);
+ if (rv)
+ goto error;
- i = v_find_index (hc->rx_buf, hc->rx_buf_offset, "HTTP");
- if (i < 0)
- {
- ec = HTTP_STATUS_BAD_REQUEST;
- goto error;
- }
+ rv = http_identify_headers (hc, &ec);
+ if (rv)
+ goto error;
- HTTP_DBG (0, "GET method %v", hc->rx_buf);
- len = i - hc->rx_buf_offset - 1;
- }
- else if ((i = v_find_index (hc->rx_buf, 0, "POST ")) >= 0)
- {
- hc->method = HTTP_REQ_POST;
- hc->rx_buf_offset = i + 6;
- len = vec_len (hc->rx_buf) - hc->rx_buf_offset - 1;
- HTTP_DBG (0, "POST method %v", hc->rx_buf);
- }
- else
+ rv = http_identify_message_body (hc, &ec);
+ if (rv)
+ goto error;
+
+ /* send at least "control data" which is necessary minimum,
+ * if there is some space send also portion of body */
+ as = session_get_from_handle (hc->h_pa_session_handle);
+ max_enq = svm_fifo_max_enqueue (as->rx_fifo);
+ if (max_enq < hc->control_data_len)
{
- HTTP_DBG (0, "Unknown http method %v", hc->rx_buf);
- ec = HTTP_STATUS_METHOD_NOT_ALLOWED;
+ clib_warning ("not enough room for control data in app's rx fifo");
+ ec = HTTP_STATUS_INTERNAL_ERROR;
goto error;
}
-
- buf = &hc->rx_buf[hc->rx_buf_offset];
+ /* do not dequeue more than one HTTP request, we do not support pipelining */
+ max_deq =
+ clib_min (hc->control_data_len + hc->body_len, vec_len (hc->rx_buf));
+ len = clib_min (max_enq, max_deq);
msg.type = HTTP_MSG_REQUEST;
msg.method_type = hc->method;
- msg.content_type = HTTP_CONTENT_TEXT_HTML;
msg.data.type = HTTP_MSG_DATA_INLINE;
msg.data.len = len;
+ msg.data.target_form = hc->target_form;
+ msg.data.target_path_offset = hc->target_path_offset;
+ msg.data.target_path_len = hc->target_path_len;
+ msg.data.target_query_offset = hc->target_query_offset;
+ msg.data.target_query_len = hc->target_query_len;
+ msg.data.headers_offset = hc->headers_offset;
+ msg.data.headers_len = hc->headers_len;
+ msg.data.body_offset = hc->body_offset;
+ msg.data.body_len = hc->body_len;
+
+ svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) },
+ { hc->rx_buf, len } };
- svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, { buf, len } };
-
- as = session_get_from_handle (hc->h_pa_session_handle);
rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0 /* allow partial */);
- if (rv < 0 || rv != sizeof (msg) + len)
+ ASSERT (rv == (sizeof (msg) + len));
+
+ body_sent = len - hc->control_data_len;
+ hc->to_recv = hc->body_len - body_sent;
+ if (hc->to_recv == 0)
{
- clib_warning ("failed app enqueue");
- /* This should not happen as we only handle 1 request per session,
- * and fifo is allocated, but going forward we should consider
- * rescheduling */
- return HTTP_SM_ERROR;
+ /* drop everything, we do not support pipelining */
+ http_read_message_drop_all (hc);
+ /* all sent, we are done */
+ http_state_change (hc, HTTP_STATE_WAIT_APP_REPLY);
+ }
+ else
+ {
+ http_read_message_drop (hc, len);
+ /* stream rest of the response body */
+ http_state_change (hc, HTTP_STATE_CLIENT_IO_MORE_DATA);
}
-
- vec_free (hc->rx_buf);
- http_state_change (hc, HTTP_STATE_WAIT_APP_REPLY);
app_wrk = app_worker_get_if_valid (as->app_wrk_index);
if (app_wrk)
@@ -688,7 +1226,7 @@ http_state_wait_client_method (http_conn_t *hc, transport_send_params_t *sp)
return HTTP_SM_STOP;
error:
-
+ http_read_message_drop_all (hc);
http_send_error (hc, ec);
session_transport_closing_notify (&hc->connection);
http_disconnect_transport (hc);
@@ -700,13 +1238,14 @@ static http_sm_result_t
http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp)
{
http_main_t *hm = &http_main;
- u8 *header;
- u32 offset;
+ u8 *response;
+ u32 sent;
f64 now;
session_t *as;
http_status_code_t sc;
http_msg_t msg;
int rv;
+ http_sm_result_t sm_result = HTTP_SM_ERROR;
as = session_get_from_handle (hc->h_pa_session_handle);
@@ -727,60 +1266,97 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp)
goto error;
}
- http_buffer_init (&hc->tx_buf, msg_to_buf_type[msg.data.type], as->tx_fifo,
- msg.data.len);
+ if (msg.code >= HTTP_N_STATUS)
+ {
+ clib_warning ("unsupported status code: %d", msg.code);
+ return HTTP_SM_ERROR;
+ }
/*
- * Add headers. For now:
+ * Add "protocol layer" headers:
* - current time
- * - expiration time
- * - content type
+ * - server name
* - data length
*/
now = clib_timebase_now (&hm->timebase);
+ response = format (0, http_response_template, http_status_code_str[msg.code],
+ /* Date */
+ format_clib_timebase_time, now,
+ /* Server */
+ hc->app_name);
+
+ /* RFC9110 9.3.6: A server MUST NOT send Content-Length header field in a
+ * 2xx (Successful) response to CONNECT. */
+ if (hc->is_tunnel && http_status_code_str[msg.code][0] == '2')
+ {
+ ASSERT (msg.data.body_len == 0);
+ hc->state = HTTP_CONN_STATE_TUNNEL;
+ /* cleanup some stuff we don't need anymore in tunnel mode */
+ http_conn_timer_stop (hc);
+ vec_free (hc->rx_buf);
+ http_buffer_free (&hc->tx_buf);
+ }
+ else
+ response = format (response, content_len_template, msg.data.body_len);
- switch (msg.code)
- {
- case HTTP_STATUS_OK:
- header =
- format (0, http_response_template, http_status_code_str[msg.code],
- /* Date */
- format_clib_timebase_time, now,
- /* Expires */
- format_clib_timebase_time, now + 600.0,
- /* Content type */
- http_content_type_str[msg.content_type],
- /* Length */
- msg.data.len);
- break;
- case HTTP_STATUS_MOVED:
- header =
- format (0, http_redirect_template, http_status_code_str[msg.code]);
- /* Location: http(s)://new-place already queued up as data */
- break;
- default:
- return HTTP_SM_ERROR;
+ /* Add headers from app (if any) */
+ if (msg.data.headers_len)
+ {
+ HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len);
+ if (msg.data.type == HTTP_MSG_DATA_PTR)
+ {
+ uword app_headers_ptr;
+ rv = svm_fifo_dequeue (as->tx_fifo, sizeof (app_headers_ptr),
+ (u8 *) &app_headers_ptr);
+ ASSERT (rv == sizeof (app_headers_ptr));
+ vec_append (response, uword_to_pointer (app_headers_ptr, u8 *));
+ }
+ else
+ {
+ u32 orig_len = vec_len (response);
+ vec_resize (response, msg.data.headers_len);
+ u8 *p = response + orig_len;
+ rv = svm_fifo_dequeue (as->tx_fifo, msg.data.headers_len, p);
+ ASSERT (rv == msg.data.headers_len);
+ }
}
+ else
+ {
+ /* No headers from app */
+ response = format (response, "\r\n");
+ }
+ HTTP_DBG (3, "%v", response);
- offset = http_send_data (hc, header, vec_len (header), 0);
- if (offset != vec_len (header))
+ sent = http_send_data (hc, response, vec_len (response));
+ if (sent != vec_len (response))
{
- clib_warning ("couldn't send response header!");
+ clib_warning ("sending status-line and headers failed!");
sc = HTTP_STATUS_INTERNAL_ERROR;
- vec_free (header);
+ vec_free (response);
goto error;
}
- vec_free (header);
+ vec_free (response);
- /* Start sending the actual data */
- http_state_change (hc, HTTP_STATE_APP_IO_MORE_DATA);
+ if (msg.data.body_len)
+ {
+ /* Start sending the actual data */
+ http_buffer_init (&hc->tx_buf, msg_to_buf_type[msg.data.type],
+ as->tx_fifo, msg.data.body_len);
+ http_state_change (hc, HTTP_STATE_APP_IO_MORE_DATA);
+ sm_result = HTTP_SM_CONTINUE;
+ }
+ else
+ {
+ /* No response body, we are done */
+ http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD);
+ sm_result = HTTP_SM_STOP;
+ }
- ASSERT (sp->max_burst_size >= offset);
- sp->max_burst_size -= offset;
- return HTTP_SM_CONTINUE;
+ ASSERT (sp->max_burst_size >= sent);
+ sp->max_burst_size -= sent;
+ return sm_result;
error:
- clib_warning ("unexpected msg type from app %u", msg.type);
http_send_error (hc, sc);
http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD);
session_transport_closing_notify (&hc->connection);
@@ -793,9 +1369,11 @@ http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp)
{
http_msg_t msg;
session_t *as;
- u8 *buf = 0, *request;
- u32 offset;
+ u8 *target_buff = 0, *request = 0, *target;
+ u32 sent;
int rv;
+ http_sm_result_t sm_result = HTTP_SM_ERROR;
+ http_state_t next_state;
as = session_get_from_handle (hc->h_pa_session_handle);
@@ -814,29 +1392,131 @@ http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp)
goto error;
}
- vec_validate (buf, msg.data.len - 1);
- rv = svm_fifo_dequeue (as->tx_fifo, msg.data.len, buf);
- ASSERT (rv == msg.data.len);
+ /* read request target */
+ if (msg.data.type == HTTP_MSG_DATA_PTR)
+ {
+ uword target_ptr;
+ rv = svm_fifo_dequeue (as->tx_fifo, sizeof (target_ptr),
+ (u8 *) &target_ptr);
+ ASSERT (rv == sizeof (target_ptr));
+ target = uword_to_pointer (target_ptr, u8 *);
+ }
+ else
+ {
+ vec_validate (target_buff, msg.data.target_path_len - 1);
+ rv =
+ svm_fifo_dequeue (as->tx_fifo, msg.data.target_path_len, target_buff);
+ ASSERT (rv == msg.data.target_path_len);
+ target = target_buff;
+ }
- request = format (0, http_request_template, buf);
- offset = http_send_data (hc, request, vec_len (request), 0);
- if (offset != vec_len (request))
+ /* currently we support only GET and POST method */
+ if (msg.method_type == HTTP_REQ_GET)
+ {
+ if (msg.data.body_len)
+ {
+ clib_warning ("GET request shouldn't include data");
+ goto error;
+ }
+ /*
+ * Add "protocol layer" headers:
+ * - host
+ * - user agent
+ */
+ request = format (0, http_get_request_template,
+ /* target */
+ target,
+ /* Host */
+ hc->host,
+ /* User-Agent */
+ hc->app_name,
+ /* Any headers from app? */
+ msg.data.headers_len ? "" : "\r\n");
+
+ next_state = HTTP_STATE_WAIT_SERVER_REPLY;
+ sm_result = HTTP_SM_STOP;
+ }
+ else if (msg.method_type == HTTP_REQ_POST)
{
- clib_warning ("sending request failed!");
+ if (!msg.data.body_len)
+ {
+ clib_warning ("POST request should include data");
+ goto error;
+ }
+ /*
+ * Add "protocol layer" headers:
+ * - host
+ * - user agent
+ * - content length
+ */
+ request = format (0, http_post_request_template,
+ /* target */
+ target,
+ /* Host */
+ hc->host,
+ /* User-Agent */
+ hc->app_name,
+ /* Content-Length */
+ msg.data.body_len,
+ /* Any headers from app? */
+ msg.data.headers_len ? "" : "\r\n");
+
+ http_buffer_init (&hc->tx_buf, msg_to_buf_type[msg.data.type],
+ as->tx_fifo, msg.data.body_len);
+
+ next_state = HTTP_STATE_APP_IO_MORE_DATA;
+ sm_result = HTTP_SM_CONTINUE;
+ }
+ else
+ {
+ clib_warning ("unsupported method %d", msg.method_type);
goto error;
}
- http_state_change (hc, HTTP_STATE_WAIT_SERVER_REPLY);
+ /* Add headers from app (if any) */
+ if (msg.data.headers_len)
+ {
+ HTTP_DBG (0, "got headers from app, len %d", msg.data.headers_len);
+ if (msg.data.type == HTTP_MSG_DATA_PTR)
+ {
+ uword app_headers_ptr;
+ rv = svm_fifo_dequeue (as->tx_fifo, sizeof (app_headers_ptr),
+ (u8 *) &app_headers_ptr);
+ ASSERT (rv == sizeof (app_headers_ptr));
+ vec_append (request, uword_to_pointer (app_headers_ptr, u8 *));
+ }
+ else
+ {
+ u32 orig_len = vec_len (request);
+ vec_resize (request, msg.data.headers_len);
+ u8 *p = request + orig_len;
+ rv = svm_fifo_dequeue (as->tx_fifo, msg.data.headers_len, p);
+ ASSERT (rv == msg.data.headers_len);
+ }
+ }
+ HTTP_DBG (3, "%v", request);
- vec_free (buf);
- vec_free (request);
+ sent = http_send_data (hc, request, vec_len (request));
+ if (sent != vec_len (request))
+ {
+ clib_warning ("sending request-line and headers failed!");
+ sm_result = HTTP_SM_ERROR;
+ goto error;
+ }
- return HTTP_SM_STOP;
+ http_state_change (hc, next_state);
+ goto done;
error:
+ svm_fifo_dequeue_drop_all (as->tx_fifo);
session_transport_closing_notify (&hc->connection);
+ session_transport_closed_notify (&hc->connection);
http_disconnect_transport (hc);
- return HTTP_SM_ERROR;
+
+done:
+ vec_free (target_buff);
+ vec_free (request);
+ return sm_result;
}
static http_sm_result_t
@@ -891,7 +1571,14 @@ http_state_client_io_more_data (http_conn_t *hc, transport_send_params_t *sp)
return HTTP_SM_ERROR;
}
hc->to_recv -= rv;
- HTTP_DBG (1, "drained %d from ts; remains %d", rv, hc->to_recv);
+ HTTP_DBG (1, "drained %d from ts; remains %lu", rv, hc->to_recv);
+
+ /* Finished transaction:
+ * server back to HTTP_STATE_WAIT_APP_REPLY
+ * client to HTTP_STATE_WAIT_APP_METHOD */
+ if (hc->to_recv == 0)
+ http_state_change (hc, hc->is_server ? HTTP_STATE_WAIT_APP_REPLY :
+ HTTP_STATE_WAIT_APP_METHOD);
app_wrk = app_worker_get_if_valid (as->app_wrk_index);
if (app_wrk)
@@ -929,7 +1616,7 @@ http_state_app_io_more_data (http_conn_t *hc, transport_send_params_t *sp)
if (!http_buffer_is_drained (hb))
{
if (sent && svm_fifo_set_event (ts->tx_fifo))
- session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+ session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH)
{
@@ -943,10 +1630,13 @@ http_state_app_io_more_data (http_conn_t *hc, transport_send_params_t *sp)
else
{
if (sent && svm_fifo_set_event (ts->tx_fifo))
- session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX_FLUSH);
+ session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX_FLUSH);
- /* Finished transaction, back to HTTP_STATE_WAIT_METHOD */
- http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD);
+ /* Finished transaction:
+ * server back to HTTP_STATE_WAIT_METHOD
+ * client to HTTP_STATE_WAIT_SERVER_REPLY */
+ http_state_change (hc, hc->is_server ? HTTP_STATE_WAIT_CLIENT_METHOD :
+ HTTP_STATE_WAIT_SERVER_REPLY);
http_buffer_free (&hc->tx_buf);
}
@@ -987,23 +1677,75 @@ http_req_run_state_machine (http_conn_t *hc, transport_send_params_t *sp)
}
static int
+http_tunnel_rx (session_t *ts, http_conn_t *hc)
+{
+ u32 max_deq, max_enq, max_read, n_segs = 2;
+ svm_fifo_seg_t segs[n_segs];
+ int n_written = 0;
+ session_t *as;
+ app_worker_t *app_wrk;
+
+ HTTP_DBG (1, "tunnel received data from client");
+
+ as = session_get_from_handle (hc->h_pa_session_handle);
+
+ max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
+ if (PREDICT_FALSE (max_deq == 0))
+ {
+ HTTP_DBG (1, "max_deq == 0");
+ return 0;
+ }
+ max_enq = svm_fifo_max_enqueue (as->rx_fifo);
+ if (max_enq == 0)
+ {
+ HTTP_DBG (1, "app's rx fifo full");
+ svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ return 0;
+ }
+ max_read = clib_min (max_enq, max_deq);
+ svm_fifo_segments (ts->rx_fifo, 0, segs, &n_segs, max_read);
+ n_written = svm_fifo_enqueue_segments (as->rx_fifo, segs, n_segs, 0);
+ ASSERT (n_written > 0);
+ HTTP_DBG (1, "transfered %u bytes", n_written);
+ svm_fifo_dequeue_drop (ts->rx_fifo, n_written);
+ app_wrk = app_worker_get_if_valid (as->app_wrk_index);
+ if (app_wrk)
+ app_worker_rx_notify (app_wrk, as);
+ if (svm_fifo_max_dequeue_cons (ts->rx_fifo))
+ session_program_rx_io_evt (session_handle (ts));
+
+ return 0;
+}
+
+static int
http_ts_rx_callback (session_t *ts)
{
http_conn_t *hc;
+ HTTP_DBG (1, "hc [%u]%x", ts->thread_index, ts->opaque);
+
hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
- if (!hc)
+
+ if (hc->state == HTTP_CONN_STATE_CLOSED)
{
- clib_warning ("http connection not found (ts %d)", ts->opaque);
- return -1;
+ HTTP_DBG (1, "conn closed");
+ svm_fifo_dequeue_drop_all (ts->tx_fifo);
+ return 0;
}
- if (hc->state == HTTP_CONN_STATE_CLOSED)
+ if (hc->state == HTTP_CONN_STATE_TUNNEL)
+ return http_tunnel_rx (ts, hc);
+
+ if (!http_state_is_rx_valid (hc))
{
+ if (hc->state != HTTP_CONN_STATE_CLOSED)
+ clib_warning ("app data req state '%U' session state %u",
+ format_http_state, hc->http_state, hc->state);
svm_fifo_dequeue_drop_all (ts->tx_fifo);
return 0;
}
+ HTTP_DBG (1, "run state machine");
http_req_run_state_machine (hc, 0);
if (hc->state == HTTP_CONN_STATE_TRANSPORT_CLOSED)
@@ -1020,6 +1762,7 @@ http_ts_builtin_tx_callback (session_t *ts)
http_conn_t *hc;
hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
+ HTTP_DBG (1, "transport connection reschedule");
transport_connection_reschedule (&hc->connection);
return 0;
@@ -1034,21 +1777,36 @@ http_ts_cleanup_callback (session_t *ts, session_cleanup_ntf_t ntf)
return;
hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
- if (!hc)
- {
- clib_warning ("no http connection for %u", ts->session_index);
- return;
- }
+
+ HTTP_DBG (1, "going to free hc [%u]%x", ts->thread_index, ts->opaque);
vec_free (hc->rx_buf);
http_buffer_free (&hc->tx_buf);
- http_conn_timer_stop (hc);
+
+ if (hc->pending_timer == 0)
+ http_conn_timer_stop (hc);
session_transport_delete_notify (&hc->connection);
+
+ if (!hc->is_server)
+ {
+ vec_free (hc->app_name);
+ vec_free (hc->host);
+ }
http_conn_free (hc);
}
+static void
+http_ts_ho_cleanup_callback (session_t *ts)
+{
+ http_conn_t *ho_hc;
+ HTTP_DBG (1, "half open: %x", ts->opaque);
+ ho_hc = http_ho_conn_get (ts->opaque);
+ session_half_open_delete_notify (&ho_hc->connection);
+ http_ho_conn_free (ho_hc);
+}
+
int
http_add_segment_callback (u32 client_index, u64 segment_handle)
{
@@ -1068,6 +1826,7 @@ static session_cb_vft_t http_app_cb_vft = {
.session_connected_callback = http_ts_connected_callback,
.session_reset_callback = http_ts_reset_callback,
.session_cleanup_callback = http_ts_cleanup_callback,
+ .half_open_cleanup_callback = http_ts_ho_cleanup_callback,
.add_segment_callback = http_add_segment_callback,
.del_segment_callback = http_del_segment_callback,
.builtin_app_rx_callback = http_ts_rx_callback,
@@ -1090,8 +1849,6 @@ http_transport_enable (vlib_main_t *vm, u8 is_en)
return 0;
}
- vec_validate (hm->wrk, vlib_num_workers ());
-
clib_memset (a, 0, sizeof (*a));
clib_memset (options, 0, sizeof (options));
@@ -1113,10 +1870,16 @@ http_transport_enable (vlib_main_t *vm, u8 is_en)
hm->app_index = a->app_index;
vec_free (a->name);
+ if (hm->is_init)
+ return 0;
+
+ vec_validate (hm->wrk, vlib_num_workers ());
+
clib_timebase_init (&hm->timebase, 0 /* GMT */, CLIB_TIMEBASE_DAYLIGHT_NONE,
&vm->clib_time /* share the system clock */);
- http_timers_init (vm, http_conn_timeout_cb);
+ http_timers_init (vm, http_conn_timeout_cb, http_conn_invalidate_timer_cb);
+ hm->is_init = 1;
return 0;
}
@@ -1131,6 +1894,8 @@ http_transport_connect (transport_endpoint_cfg_t *tep)
http_conn_t *hc;
int error;
u32 hc_index;
+ session_t *ho;
+ transport_endpt_ext_cfg_t *ext_cfg;
app_worker_t *app_wrk = app_worker_get (sep->app_wrk_index);
clib_memset (cargs, 0, sizeof (*cargs));
@@ -1140,18 +1905,48 @@ http_transport_connect (transport_endpoint_cfg_t *tep)
app = application_get (app_wrk->app_index);
cargs->sep_ext.ns_index = app->ns_index;
- hc_index = http_conn_alloc_w_thread (0 /* ts->thread_index */);
- hc = http_conn_get_w_thread (hc_index, 0);
+ hc_index = http_ho_conn_alloc ();
+ hc = http_ho_conn_get (hc_index);
hc->h_pa_wrk_index = sep->app_wrk_index;
hc->h_pa_app_api_ctx = sep->opaque;
hc->state = HTTP_CONN_STATE_CONNECTING;
cargs->api_context = hc_index;
+ ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_HTTP);
+ if (ext_cfg)
+ {
+ HTTP_DBG (1, "app set timeout %u", ext_cfg->opaque);
+ hc->timeout = ext_cfg->opaque;
+ }
+
+ hc->is_server = 0;
+
+ if (vec_len (app->name))
+ hc->app_name = vec_dup (app->name);
+ else
+ hc->app_name = format (0, "VPP HTTP client");
+
+ if (sep->is_ip4)
+ hc->host = format (0, "%U:%d", format_ip4_address, &sep->ip.ip4,
+ clib_net_to_host_u16 (sep->port));
+ else
+ hc->host = format (0, "%U:%d", format_ip6_address, &sep->ip.ip6,
+ clib_net_to_host_u16 (sep->port));
+
HTTP_DBG (1, "hc ho_index %x", hc_index);
if ((error = vnet_connect (cargs)))
return error;
+ ho = session_alloc_for_half_open (&hc->connection);
+ ho->app_wrk_index = app_wrk->wrk_index;
+ ho->ho_index = app_worker_add_half_open (app_wrk, session_handle (ho));
+ ho->opaque = sep->opaque;
+ ho->session_type =
+ session_type_from_proto_and_ip (TRANSPORT_PROTO_HTTP, sep->is_ip4);
+ hc->h_tc_session_handle = cargs->sh;
+ hc->c_s_index = ho->session_index;
+
return 0;
}
@@ -1163,11 +1958,12 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
http_main_t *hm = &http_main;
session_endpoint_cfg_t *sep;
app_worker_t *app_wrk;
- transport_proto_t tp;
+ transport_proto_t tp = TRANSPORT_PROTO_TCP;
app_listener_t *al;
application_t *app;
http_conn_t *lhc;
u32 lhc_index;
+ transport_endpt_ext_cfg_t *ext_cfg;
sep = (session_endpoint_cfg_t *) tep;
@@ -1177,7 +1973,13 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
args->app_index = hm->app_index;
args->sep_ext = *sep;
args->sep_ext.ns_index = app->ns_index;
- tp = sep->ext_cfg ? TRANSPORT_PROTO_TLS : TRANSPORT_PROTO_TCP;
+
+ ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
+ if (ext_cfg)
+ {
+ HTTP_DBG (1, "app set tls");
+ tp = TRANSPORT_PROTO_TLS;
+ }
args->sep_ext.transport_proto = tp;
if (vnet_listen (args))
@@ -1186,6 +1988,13 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
lhc_index = http_listener_alloc ();
lhc = http_listener_get (lhc_index);
+ ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_HTTP);
+ if (ext_cfg && ext_cfg->opaque)
+ {
+ HTTP_DBG (1, "app set timeout %u", ext_cfg->opaque);
+ lhc->timeout = ext_cfg->opaque;
+ }
+
/* Grab transport connection listener and link to http listener */
lhc->h_tc_session_handle = args->handle;
al = app_listener_get_w_handle (lhc->h_tc_session_handle);
@@ -1199,6 +2008,13 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
lhc->c_s_index = app_listener_index;
lhc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
+ lhc->is_server = 1;
+
+ if (vec_len (app->name))
+ lhc->app_name = vec_dup (app->name);
+ else
+ lhc->app_name = format (0, "VPP server app");
+
return lhc_index;
}
@@ -1230,7 +2046,7 @@ http_transport_close (u32 hc_index, u32 thread_index)
session_t *as;
http_conn_t *hc;
- HTTP_DBG (1, "App disconnecting %x", hc_index);
+ HTTP_DBG (1, "App disconnecting [%u]%x", thread_index, hc_index);
hc = http_conn_get_w_thread (hc_index, thread_index);
if (hc->state == HTTP_CONN_STATE_CONNECTING)
@@ -1239,7 +2055,11 @@ http_transport_close (u32 hc_index, u32 thread_index)
http_disconnect_transport (hc);
return;
}
-
+ else if (hc->state == HTTP_CONN_STATE_CLOSED)
+ {
+ HTTP_DBG (1, "nothing to do, already closed");
+ return;
+ }
as = session_get_from_handle (hc->h_pa_session_handle);
/* Nothing more to send, confirm close */
@@ -1270,27 +2090,84 @@ http_transport_get_listener (u32 listener_index)
}
static int
+http_tunnel_tx (http_conn_t *hc, session_t *as, transport_send_params_t *sp)
+{
+ u32 max_deq, max_enq, max_read, n_segs = 2;
+ svm_fifo_seg_t segs[n_segs];
+ session_t *ts;
+ int n_written = 0;
+
+ HTTP_DBG (1, "tunnel received data from target");
+
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+
+ max_deq = svm_fifo_max_dequeue_cons (as->tx_fifo);
+ if (PREDICT_FALSE (max_deq == 0))
+ {
+ HTTP_DBG (1, "max_deq == 0");
+ goto check_fifo;
+ }
+ max_enq = svm_fifo_max_enqueue_prod (ts->tx_fifo);
+ if (max_enq == 0)
+ {
+ HTTP_DBG (1, "ts tx fifo full");
+ goto check_fifo;
+ }
+ max_read = clib_min (max_enq, max_deq);
+ max_read = clib_min (max_read, sp->max_burst_size);
+ svm_fifo_segments (as->tx_fifo, 0, segs, &n_segs, max_read);
+ n_written = svm_fifo_enqueue_segments (ts->tx_fifo, segs, n_segs, 0);
+ ASSERT (n_written > 0);
+ HTTP_DBG (1, "transfered %u bytes", n_written);
+ sp->bytes_dequeued += n_written;
+ sp->max_burst_size -= n_written;
+ svm_fifo_dequeue_drop (as->tx_fifo, n_written);
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
+
+check_fifo:
+ /* Deschedule and wait for deq notification if ts fifo is almost full */
+ if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH)
+ {
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ transport_connection_deschedule (&hc->connection);
+ sp->flags |= TRANSPORT_SND_F_DESCHED;
+ }
+
+ return n_written > 0 ? clib_max (n_written / TRANSPORT_PACER_MIN_MSS, 1) : 0;
+}
+
+static int
http_app_tx_callback (void *session, transport_send_params_t *sp)
{
session_t *as = (session_t *) session;
u32 max_burst_sz, sent;
http_conn_t *hc;
- HTTP_DBG (1, "app session conn index %x", as->connection_index);
+ HTTP_DBG (1, "hc [%u]%x", as->thread_index, as->connection_index);
hc = http_conn_get_w_thread (as->connection_index, as->thread_index);
+
+ max_burst_sz = sp->max_burst_size * TRANSPORT_PACER_MIN_MSS;
+ sp->max_burst_size = max_burst_sz;
+
+ if (hc->state == HTTP_CONN_STATE_TUNNEL)
+ return http_tunnel_tx (hc, as, sp);
+
if (!http_state_is_tx_valid (hc))
{
if (hc->state != HTTP_CONN_STATE_CLOSED)
- clib_warning ("app data req state '%U' session state %u",
- format_http_state, hc->http_state, hc->state);
+ {
+ clib_warning ("hc [%u]%x invalid tx state http state "
+ "'%U', session state %u",
+ as->thread_index, as->connection_index,
+ format_http_state, hc->http_state, hc->state);
+ }
svm_fifo_dequeue_drop_all (as->tx_fifo);
return 0;
}
- max_burst_sz = sp->max_burst_size * TRANSPORT_PACER_MIN_MSS;
- sp->max_burst_size = max_burst_sz;
-
+ HTTP_DBG (1, "run state machine");
http_req_run_state_machine (hc, sp);
if (hc->state == HTTP_CONN_STATE_APP_CLOSED)
@@ -1304,6 +2181,19 @@ http_app_tx_callback (void *session, transport_send_params_t *sp)
return sent > 0 ? clib_max (sent / TRANSPORT_PACER_MIN_MSS, 1) : 0;
}
+static int
+http_app_rx_evt_cb (transport_connection_t *tc)
+{
+ http_conn_t *hc = (http_conn_t *) tc;
+ HTTP_DBG (1, "hc [%u]%x", vlib_get_thread_index (), hc->h_hc_index);
+ session_t *ts = session_get_from_handle (hc->h_tc_session_handle);
+
+ if (hc->state == HTTP_CONN_STATE_TUNNEL)
+ return http_tunnel_rx (ts, hc);
+
+ return 0;
+}
+
static void
http_transport_get_endpoint (u32 hc_index, u32 thread_index,
transport_endpoint_t *tep, u8 is_lcl)
@@ -1361,6 +2251,9 @@ format_http_conn_state (u8 *s, va_list *args)
case HTTP_CONN_STATE_ESTABLISHED:
s = format (s, "ESTABLISHED");
break;
+ case HTTP_CONN_STATE_TUNNEL:
+ s = format (s, "TUNNEL");
+ break;
case HTTP_CONN_STATE_TRANSPORT_CLOSED:
s = format (s, "TRANSPORT_CLOSED");
break;
@@ -1412,18 +2305,61 @@ format_http_transport_listener (u8 *s, va_list *args)
return s;
}
+static u8 *
+format_http_transport_half_open (u8 *s, va_list *args)
+{
+ u32 ho_index = va_arg (*args, u32);
+ u32 __clib_unused thread_index = va_arg (*args, u32);
+ u32 __clib_unused verbose = va_arg (*args, u32);
+ http_conn_t *ho_hc;
+ session_t *tcp_ho;
+
+ ho_hc = http_ho_conn_get (ho_index);
+ tcp_ho = session_get_from_handle (ho_hc->h_tc_session_handle);
+
+ s = format (s, "[%d:%d][H] half-open app_wrk %u ts %d:%d",
+ ho_hc->c_thread_index, ho_hc->c_s_index, ho_hc->h_pa_wrk_index,
+ tcp_ho->thread_index, tcp_ho->session_index);
+ return s;
+}
+
+static transport_connection_t *
+http_transport_get_ho (u32 ho_hc_index)
+{
+ http_conn_t *ho_hc;
+
+ HTTP_DBG (1, "half open: %x", ho_hc_index);
+ ho_hc = http_ho_conn_get (ho_hc_index);
+ return &ho_hc->connection;
+}
+
+static void
+http_transport_cleanup_ho (u32 ho_hc_index)
+{
+ http_conn_t *ho_hc;
+
+ HTTP_DBG (1, "half open: %x", ho_hc_index);
+ ho_hc = http_ho_conn_get (ho_hc_index);
+ session_cleanup_half_open (ho_hc->h_tc_session_handle);
+ http_ho_conn_free (ho_hc);
+}
+
static const transport_proto_vft_t http_proto = {
.enable = http_transport_enable,
.connect = http_transport_connect,
.start_listen = http_start_listen,
.stop_listen = http_stop_listen,
.close = http_transport_close,
+ .cleanup_ho = http_transport_cleanup_ho,
.custom_tx = http_app_tx_callback,
+ .app_rx_evt = http_app_rx_evt_cb,
.get_connection = http_transport_get_connection,
.get_listener = http_transport_get_listener,
+ .get_half_open = http_transport_get_ho,
.get_transport_endpoint = http_transport_get_endpoint,
.format_connection = format_http_transport_connection,
.format_listener = format_http_transport_listener,
+ .format_half_open = format_http_transport_half_open,
.transport_options = {
.name = "http",
.short_name = "H",
@@ -1436,6 +2372,7 @@ static clib_error_t *
http_transport_init (vlib_main_t *vm)
{
http_main_t *hm = &http_main;
+ int i;
transport_register_protocol (TRANSPORT_PROTO_HTTP, &http_proto,
FIB_PROTOCOL_IP4, ~0);
@@ -1447,7 +2384,26 @@ http_transport_init (vlib_main_t *vm)
hm->first_seg_size = 32 << 20;
hm->fifo_size = 512 << 10;
- return 0;
+ /* Setup u16 to http_status_code_t map */
+ /* Unrecognized status code is equivalent to the x00 status */
+ vec_validate (hm->sc_by_u16, 599);
+ for (i = 100; i < 200; i++)
+ hm->sc_by_u16[i] = HTTP_STATUS_CONTINUE;
+ for (i = 200; i < 300; i++)
+ hm->sc_by_u16[i] = HTTP_STATUS_OK;
+ for (i = 300; i < 400; i++)
+ hm->sc_by_u16[i] = HTTP_STATUS_MULTIPLE_CHOICES;
+ for (i = 400; i < 500; i++)
+ hm->sc_by_u16[i] = HTTP_STATUS_BAD_REQUEST;
+ for (i = 500; i < 600; i++)
+ hm->sc_by_u16[i] = HTTP_STATUS_INTERNAL_ERROR;
+
+ /* Registered status codes */
+#define _(c, s, str) hm->sc_by_u16[c] = HTTP_STATUS_##s;
+ foreach_http_status_code
+#undef _
+
+ return 0;
}
VLIB_INIT_FUNCTION (http_transport_init);
diff --git a/src/plugins/http/http.h b/src/plugins/http/http.h
index dbae5ac4611..a117f374efa 100644
--- a/src/plugins/http/http.h
+++ b/src/plugins/http/http.h
@@ -16,6 +16,8 @@
#ifndef SRC_PLUGINS_HTTP_HTTP_H_
#define SRC_PLUGINS_HTTP_HTTP_H_
+#include <ctype.h>
+
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
@@ -49,11 +51,20 @@ typedef struct http_conn_id_
STATIC_ASSERT (sizeof (http_conn_id_t) <= TRANSPORT_CONN_ID_LEN,
"ctx id must be less than TRANSPORT_CONN_ID_LEN");
+typedef struct
+{
+ char *base;
+ uword len;
+} http_token_t;
+
+#define http_token_lit(s) (s), sizeof (s) - 1
+
typedef enum http_conn_state_
{
HTTP_CONN_STATE_LISTEN,
HTTP_CONN_STATE_CONNECTING,
HTTP_CONN_STATE_ESTABLISHED,
+ HTTP_CONN_STATE_TUNNEL,
HTTP_CONN_STATE_TRANSPORT_CLOSED,
HTTP_CONN_STATE_APP_CLOSED,
HTTP_CONN_STATE_CLOSED
@@ -75,6 +86,7 @@ typedef enum http_req_method_
{
HTTP_REQ_GET = 0,
HTTP_REQ_POST,
+ HTTP_REQ_CONNECT,
} http_req_method_t;
typedef enum http_msg_type_
@@ -83,86 +95,96 @@ typedef enum http_msg_type_
HTTP_MSG_REPLY
} http_msg_type_t;
+typedef enum http_target_form_
+{
+ HTTP_TARGET_ORIGIN_FORM,
+ HTTP_TARGET_ABSOLUTE_FORM,
+ HTTP_TARGET_AUTHORITY_FORM,
+ HTTP_TARGET_ASTERISK_FORM
+} http_target_form_t;
+
#define foreach_http_content_type \
- _ (APP_7Z, ".7z", "application / x - 7z - compressed") \
- _ (APP_DOC, ".doc", "application / msword") \
+ _ (APP_7Z, ".7z", "application/x-7z-compressed") \
+ _ (APP_DOC, ".doc", "application/msword") \
_ (APP_DOCX, ".docx", \
- "application / vnd.openxmlformats - " \
+ "application/vnd.openxmlformats-" \
"officedocument.wordprocessingml.document") \
- _ (APP_EPUB, ".epub", "application / epub + zip") \
- _ (APP_FONT, ".eot", "application / vnd.ms - fontobject") \
- _ (APP_JAR, ".jar", "application / java - archive") \
- _ (APP_JSON, ".json", "application / json") \
- _ (APP_JSON_LD, ".jsonld", "application / ld + json") \
- _ (APP_MPKG, ".mpkg", "application / vnd.apple.installer + xml") \
- _ (APP_ODP, ".odp", "application / vnd.oasis.opendocument.presentation") \
- _ (APP_ODS, ".ods", "application / vnd.oasis.opendocument.spreadsheet") \
- _ (APP_ODT, ".odt", "application / vnd.oasis.opendocument.text") \
- _ (APP_OGX, ".ogx", "application / ogg") \
- _ (APP_PDF, ".pdf", "application / pdf") \
- _ (APP_PHP, ".php", "application / x - httpd - php") \
- _ (APP_PPT, ".ppt", "application / vnd.ms - powerpoint") \
- _ (APP_PPTX, ".pptx", "application / vnd.ms - powerpoint") \
- _ (APP_RAR, ".rar", "application / vnd.rar") \
- _ (APP_RTF, ".rtf", "application / rtf") \
- _ (APP_SH, ".sh", "application / x - sh") \
- _ (APP_TAR, ".tar", "application / x - tar") \
- _ (APP_VSD, ".vsd", "application / vnd.visio") \
- _ (APP_XHTML, ".xhtml", "application / xhtml + xml") \
- _ (APP_XLS, ".xls", "application / vnd.ms - excel") \
- _ (APP_XML, ".xml", "application / xml") \
+ _ (APP_EPUB, ".epub", "application/epub+zip") \
+ _ (APP_FONT, ".eot", "application/vnd.ms-fontobject") \
+ _ (APP_JAR, ".jar", "application/java-archive") \
+ _ (APP_JSON, ".json", "application/json") \
+ _ (APP_JSON_LD, ".jsonld", "application/ld+json") \
+ _ (APP_MPKG, ".mpkg", "application/vnd.apple.installer+xml") \
+ _ (APP_ODP, ".odp", "application/vnd.oasis.opendocument.presentation") \
+ _ (APP_ODS, ".ods", "application/vnd.oasis.opendocument.spreadsheet") \
+ _ (APP_ODT, ".odt", "application/vnd.oasis.opendocument.text") \
+ _ (APP_OGX, ".ogx", "application/ogg") \
+ _ (APP_PDF, ".pdf", "application/pdf") \
+ _ (APP_PHP, ".php", "application/x-httpd-php") \
+ _ (APP_PPT, ".ppt", "application/vnd.ms-powerpoint") \
+ _ (APP_PPTX, ".pptx", "application/vnd.ms-powerpoint") \
+ _ (APP_RAR, ".rar", "application/vnd.rar") \
+ _ (APP_RTF, ".rtf", "application/rtf") \
+ _ (APP_SH, ".sh", "application/x-sh") \
+ _ (APP_TAR, ".tar", "application/x-tar") \
+ _ (APP_VSD, ".vsd", "application/vnd.visio") \
+ _ (APP_XHTML, ".xhtml", "application/xhtml+xml") \
+ _ (APP_XLS, ".xls", "application/vnd.ms-excel") \
+ _ (APP_XML, ".xml", "application/xml") \
_ (APP_XSLX, ".xlsx", \
- "application / vnd.openxmlformats - officedocument.spreadsheetml.sheet") \
- _ (APP_XUL, ".xul", "application / vnd.mozilla.xul + xml") \
- _ (APP_ZIP, ".zip", "application / zip") \
- _ (AUDIO_AAC, ".aac", "audio / aac") \
- _ (AUDIO_CD, ".cda", "application / x - cdf") \
- _ (AUDIO_WAV, ".wav", "audio / wav") \
- _ (AUDIO_WEBA, ".weba", "audio / webm") \
- _ (AUDO_MIDI, ".midi", "audio / midi") \
- _ (AUDO_MID, ".mid", "audo / midi") \
- _ (AUDO_MP3, ".mp3", "audio / mpeg") \
- _ (AUDO_OGA, ".oga", "audio / ogg") \
- _ (AUDO_OPUS, ".opus", "audio / opus") \
- _ (APP_OCTET_STREAM, ".bin", "application / octet - stream") \
- _ (BZIP2, ".bz2", "application / x - bzip2") \
- _ (BZIP, ".bz", "application / x - bzip") \
- _ (FONT_OTF, ".otf", "font / otf") \
- _ (FONT_TTF, ".ttf", "font / ttf") \
- _ (FONT_WOFF2, ".woff2", "font / woff2") \
- _ (FONT_WOFF, ".woff", "font / woff") \
- _ (GZIP, ".gz", "application / gzip") \
- _ (IMAGE_AVIF, ".avif", "image / avif") \
- _ (IMAGE_BMP, ".bmp", "image / bmp") \
- _ (IMAGE_GIF, ".gif", "image / gif") \
- _ (IMAGE_ICON, ".ico", "image / vnd.microsoft.icon") \
- _ (IMAGE_JPEG, ".jpeg", "image / jpeg") \
- _ (IMAGE_JPG, ".jpg", "image / jpeg") \
- _ (IMAGE_PNG, ".png", "image / png") \
- _ (IMAGE_SVG, ".svg", "image / svg + xml") \
- _ (IMAGE_TIFF, ".tiff", "image / tiff") \
- _ (IMAGE_TIF, ".tif", "image / tiff") \
- _ (IMAGE_WEBP, ".webp", "image / webp") \
- _ (SCRIPT_CSH, ".csh", "application / x - csh") \
- _ (TEXT_ABIWORD, ".abw", "application / x - abiword") \
- _ (TEXT_ARCHIVE, ".arc", "application / x - freearc") \
- _ (TEXT_AZW, ".azw", "application / vnd.amazon.ebook") \
- _ (TEXT_CALENDAR, ".ics", "text / calendar") \
- _ (TEXT_CSS, ".css", "text / css") \
- _ (TEXT_CSV, ".csv", "text / csv") \
- _ (TEXT_HTM, ".htm", "text / html") \
- _ (TEXT_HTML, ".html", "text / html") \
- _ (TEXT_JS, ".js", "text / javascript") \
- _ (TEXT_MJS, ".mjs", "text / javascript") \
- _ (TEXT_PLAIN, ".txt", "text / plain") \
- _ (VIDEO_3GP2, ".3g2", "video / 3gpp2") \
- _ (VIDEO_3GP, ".3gp", "video / 3gpp") \
- _ (VIDEO_AVI, ".avi", "video / x - msvideo") \
- _ (VIDEO_MP4, ".mp4", "video / mp4") \
- _ (VIDEO_MPEG, ".mpeg", "video / mpeg") \
- _ (VIDEO_OGG, ".ogv", "video / ogg") \
- _ (VIDEO_TS, ".ts", "video / mp2t") \
- _ (VIDEO_WEBM, ".webm", "video / webm")
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") \
+ _ (APP_XUL, ".xul", "application/vnd.mozilla.xul+xml") \
+ _ (APP_X_WWW_FORM_URLENCODED, ".invalid", \
+ "application/x-www-form-urlencoded") \
+ _ (APP_ZIP, ".zip", "application/zip") \
+ _ (AUDIO_AAC, ".aac", "audio/aac") \
+ _ (AUDIO_CD, ".cda", "application/x-cdf") \
+ _ (AUDIO_WAV, ".wav", "audio/wav") \
+ _ (AUDIO_WEBA, ".weba", "audio/webm") \
+ _ (AUDO_MIDI, ".midi", "audio/midi") \
+ _ (AUDO_MID, ".mid", "audo/midi") \
+ _ (AUDO_MP3, ".mp3", "audio/mpeg") \
+ _ (AUDO_OGA, ".oga", "audio/ogg") \
+ _ (AUDO_OPUS, ".opus", "audio/opus") \
+ _ (APP_OCTET_STREAM, ".bin", "application/octet-stream") \
+ _ (BZIP2, ".bz2", "application/x-bzip2") \
+ _ (BZIP, ".bz", "application/x-bzip") \
+ _ (FONT_OTF, ".otf", "font/otf") \
+ _ (FONT_TTF, ".ttf", "font/ttf") \
+ _ (FONT_WOFF2, ".woff2", "font/woff2") \
+ _ (FONT_WOFF, ".woff", "font/woff") \
+ _ (GZIP, ".gz", "application/gzip") \
+ _ (IMAGE_AVIF, ".avif", "image/avif") \
+ _ (IMAGE_BMP, ".bmp", "image/bmp") \
+ _ (IMAGE_GIF, ".gif", "image/gif") \
+ _ (IMAGE_ICON, ".ico", "image/vnd.microsoft.icon") \
+ _ (IMAGE_JPEG, ".jpeg", "image/jpeg") \
+ _ (IMAGE_JPG, ".jpg", "image/jpeg") \
+ _ (IMAGE_PNG, ".png", "image/png") \
+ _ (IMAGE_SVG, ".svg", "image/svg+xml") \
+ _ (IMAGE_TIFF, ".tiff", "image/tiff") \
+ _ (IMAGE_TIF, ".tif", "image/tiff") \
+ _ (IMAGE_WEBP, ".webp", "image/webp") \
+ _ (SCRIPT_CSH, ".csh", "application/x-csh") \
+ _ (TEXT_ABIWORD, ".abw", "application/x-abiword") \
+ _ (TEXT_ARCHIVE, ".arc", "application/x-freearc") \
+ _ (TEXT_AZW, ".azw", "application/vnd.amazon.ebook") \
+ _ (TEXT_CALENDAR, ".ics", "text/calendar") \
+ _ (TEXT_CSS, ".css", "text/css") \
+ _ (TEXT_CSV, ".csv", "text/csv") \
+ _ (TEXT_HTM, ".htm", "text/html") \
+ _ (TEXT_HTML, ".html", "text/html") \
+ _ (TEXT_JS, ".js", "text/javascript") \
+ _ (TEXT_MJS, ".mjs", "text/javascript") \
+ _ (TEXT_PLAIN, ".txt", "text/plain") \
+ _ (VIDEO_3GP2, ".3g2", "video/3gpp2") \
+ _ (VIDEO_3GP, ".3gp", "video/3gpp") \
+ _ (VIDEO_AVI, ".avi", "video/x-msvideo") \
+ _ (VIDEO_MP4, ".mp4", "video/mp4") \
+ _ (VIDEO_MPEG, ".mpeg", "video/mpeg") \
+ _ (VIDEO_OGG, ".ogv", "video/ogg") \
+ _ (VIDEO_TS, ".ts", "video/mp2t") \
+ _ (VIDEO_WEBM, ".webm", "video/webm")
typedef enum http_content_type_
{
@@ -172,12 +194,50 @@ typedef enum http_content_type_
} http_content_type_t;
#define foreach_http_status_code \
+ _ (100, CONTINUE, "100 Continue") \
+ _ (101, SWITCHING_PROTOCOLS, "101 Switching Protocols") \
_ (200, OK, "200 OK") \
+ _ (201, CREATED, "201 Created") \
+ _ (202, ACCEPTED, "202 Accepted") \
+ _ (203, NON_UTHORITATIVE_INFORMATION, "203 Non-Authoritative Information") \
+ _ (204, NO_CONTENT, "204 No Content") \
+ _ (205, RESET_CONTENT, "205 Reset Content") \
+ _ (206, PARTIAL_CONTENT, "206 Partial Content") \
+ _ (300, MULTIPLE_CHOICES, "300 Multiple Choices") \
_ (301, MOVED, "301 Moved Permanently") \
+ _ (302, FOUND, "302 Found") \
+ _ (303, SEE_OTHER, "303 See Other") \
+ _ (304, NOT_MODIFIED, "304 Not Modified") \
+ _ (305, USE_PROXY, "305 Use Proxy") \
+ _ (307, TEMPORARY_REDIRECT, "307 Temporary Redirect") \
+ _ (308, PERMANENT_REDIRECT, "308 Permanent Redirect") \
_ (400, BAD_REQUEST, "400 Bad Request") \
+ _ (401, UNAUTHORIZED, "401 Unauthorized") \
+ _ (402, PAYMENT_REQUIRED, "402 Payment Required") \
+ _ (403, FORBIDDEN, "403 Forbidden") \
_ (404, NOT_FOUND, "404 Not Found") \
_ (405, METHOD_NOT_ALLOWED, "405 Method Not Allowed") \
- _ (500, INTERNAL_ERROR, "500 Internal Server Error")
+ _ (406, NOT_ACCEPTABLE, "406 Not Acceptable") \
+ _ (407, PROXY_AUTHENTICATION_REQUIRED, "407 Proxy Authentication Required") \
+ _ (408, REQUEST_TIMEOUT, "408 Request Timeout") \
+ _ (409, CONFLICT, "409 Conflict") \
+ _ (410, GONE, "410 Gone") \
+ _ (411, LENGTH_REQUIRED, "411 Length Required") \
+ _ (412, PRECONDITION_FAILED, "412 Precondition Failed") \
+ _ (413, CONTENT_TOO_LARGE, "413 Content Too Large") \
+ _ (414, URI_TOO_LONG, "414 URI Too Long") \
+ _ (415, UNSUPPORTED_MEDIA_TYPE, "415 Unsupported Media Type") \
+ _ (416, RANGE_NOT_SATISFIABLE, "416 Range Not Satisfiable") \
+ _ (417, EXPECTATION_FAILED, "417 Expectation Failed") \
+ _ (421, MISDIRECTED_REQUEST, "421 Misdirected Request") \
+ _ (422, UNPROCESSABLE_CONTENT, "422 Unprocessable_Content") \
+ _ (426, UPGRADE_REQUIRED, "426 Upgrade Required") \
+ _ (500, INTERNAL_ERROR, "500 Internal Server Error") \
+ _ (501, NOT_IMPLEMENTED, "501 Not Implemented") \
+ _ (502, BAD_GATEWAY, "502 Bad Gateway") \
+ _ (503, SERVICE_UNAVAILABLE, "503 Service Unavailable") \
+ _ (504, GATEWAY_TIMEOUT, "504 Gateway Timeout") \
+ _ (505, HTTP_VERSION_NOT_SUPPORTED, "505 HTTP Version Not Supported")
typedef enum http_status_code_
{
@@ -187,6 +247,101 @@ typedef enum http_status_code_
HTTP_N_STATUS
} http_status_code_t;
+#define foreach_http_header_name \
+ _ (ACCEPT, "Accept") \
+ _ (ACCEPT_CHARSET, "Accept-Charset") \
+ _ (ACCEPT_ENCODING, "Accept-Encoding") \
+ _ (ACCEPT_LANGUAGE, "Accept-Language") \
+ _ (ACCEPT_RANGES, "Accept-Ranges") \
+ _ (ACCESS_CONTROL_ALLOW_CREDENTIALS, "Access-Control-Allow-Credentials") \
+ _ (ACCESS_CONTROL_ALLOW_HEADERS, "Access-Control-Allow-Headers") \
+ _ (ACCESS_CONTROL_ALLOW_METHODS, "Access-Control-Allow-Methods") \
+ _ (ACCESS_CONTROL_ALLOW_ORIGIN, "Access-Control-Allow-Origin") \
+ _ (ACCESS_CONTROL_EXPOSE_HEADERS, "Access-Control-Expose-Headers") \
+ _ (ACCESS_CONTROL_MAX_AGE, "Access-Control-Max-Age") \
+ _ (ACCESS_CONTROL_REQUEST_HEADERS, "Access-Control-Request-Headers") \
+ _ (ACCESS_CONTROL_REQUEST_METHOD, "Access-Control-Request-Method") \
+ _ (AGE, "Age") \
+ _ (ALLOW, "Allow") \
+ _ (ALPN, "ALPN") \
+ _ (ALT_SVC, "Alt-Svc") \
+ _ (ALT_USED, "Alt-Used") \
+ _ (ALTERNATES, "Alternates") \
+ _ (AUTHENTICATION_CONTROL, "Authentication-Control") \
+ _ (AUTHENTICATION_INFO, "Authentication-Info") \
+ _ (AUTHORIZATION, "Authorization") \
+ _ (CACHE_CONTROL, "Cache-Control") \
+ _ (CACHE_STATUS, "Cache-Status") \
+ _ (CAPSULE_PROTOCOL, "Capsule-Protocol") \
+ _ (CDN_CACHE_CONTROL, "CDN-Cache-Control") \
+ _ (CDN_LOOP, "CDN-Loop") \
+ _ (CLIENT_CERT, "Client-Cert") \
+ _ (CLIENT_CERT_CHAIN, "Client-Cert-Chain") \
+ _ (CLOSE, "Close") \
+ _ (CONNECTION, "Connection") \
+ _ (CONTENT_DIGEST, "Content-Digest") \
+ _ (CONTENT_DISPOSITION, "Content-Disposition") \
+ _ (CONTENT_ENCODING, "Content-Encoding") \
+ _ (CONTENT_LANGUAGE, "Content-Language") \
+ _ (CONTENT_LENGTH, "Content-Length") \
+ _ (CONTENT_LOCATION, "Content-Location") \
+ _ (CONTENT_RANGE, "Content-Range") \
+ _ (CONTENT_TYPE, "Content-Type") \
+ _ (COOKIE, "Cookie") \
+ _ (DATE, "Date") \
+ _ (DIGEST, "Digest") \
+ _ (DPOP, "DPoP") \
+ _ (DPOP_NONCE, "DPoP-Nonce") \
+ _ (EARLY_DATA, "Early-Data") \
+ _ (ETAG, "ETag") \
+ _ (EXPECT, "Expect") \
+ _ (EXPIRES, "Expires") \
+ _ (FORWARDED, "Forwarded") \
+ _ (FROM, "From") \
+ _ (HOST, "Host") \
+ _ (IF_MATCH, "If-Match") \
+ _ (IF_MODIFIED_SINCE, "If-Modified-Since") \
+ _ (IF_NONE_MATCH, "If-None-Match") \
+ _ (IF_RANGE, "If-Range") \
+ _ (IF_UNMODIFIED_SINCE, "If-Unmodified-Since") \
+ _ (KEEP_ALIVE, "Keep-Alive") \
+ _ (LAST_MODIFIED, "Last-Modified") \
+ _ (LINK, "Link") \
+ _ (LOCATION, "Location") \
+ _ (MAX_FORWARDS, "Max-Forwards") \
+ _ (ORIGIN, "Origin") \
+ _ (PRIORITY, "Priority") \
+ _ (PROXY_AUTHENTICATE, "Proxy-Authenticate") \
+ _ (PROXY_AUTHENTICATION_INFO, "Proxy-Authentication-Info") \
+ _ (PROXY_AUTHORIZATION, "Proxy-Authorization") \
+ _ (PROXY_STATUS, "Proxy-Status") \
+ _ (RANGE, "Range") \
+ _ (REFERER, "Referer") \
+ _ (REPR_DIGEST, "Repr-Digest") \
+ _ (SET_COOKIE, "Set-Cookie") \
+ _ (SIGNATURE, "Signature") \
+ _ (SIGNATURE_INPUT, "Signature-Input") \
+ _ (STRICT_TRANSPORT_SECURITY, "Strict-Transport-Security") \
+ _ (RETRY_AFTER, "Retry-After") \
+ _ (SERVER, "Server") \
+ _ (TE, "TE") \
+ _ (TRAILER, "Trailer") \
+ _ (TRANSFER_ENCODING, "Transfer-Encoding") \
+ _ (UPGRADE, "Upgrade") \
+ _ (USER_AGENT, "User-Agent") \
+ _ (VARY, "Vary") \
+ _ (VIA, "Via") \
+ _ (WANT_CONTENT_DIGEST, "Want-Content-Digest") \
+ _ (WANT_REPR_DIGEST, "Want-Repr-Digest") \
+ _ (WWW_AUTHENTICATE, "WWW-Authenticate")
+
+typedef enum http_header_name_
+{
+#define _(sym, str) HTTP_HEADER_##sym,
+ foreach_http_header_name
+#undef _
+} http_header_name_t;
+
typedef enum http_msg_data_type_
{
HTTP_MSG_DATA_INLINE,
@@ -197,6 +352,15 @@ typedef struct http_msg_data_
{
http_msg_data_type_t type;
u64 len;
+ http_target_form_t target_form;
+ u32 target_path_offset;
+ u32 target_path_len;
+ u32 target_query_offset;
+ u32 target_query_len;
+ u32 headers_offset;
+ u32 headers_len;
+ u32 body_offset;
+ u64 body_len;
u8 data[0];
} http_msg_data_t;
@@ -208,7 +372,6 @@ typedef struct http_msg_
http_req_method_t method_type;
http_status_code_t code;
};
- http_content_type_t content_type;
http_msg_data_t data;
} http_msg_t;
@@ -227,6 +390,11 @@ typedef struct http_tc_
http_conn_state_t state;
u32 timer_handle;
+ u32 timeout;
+ u8 pending_timer;
+ u8 *app_name;
+ u8 *host;
+ u8 is_server;
/*
* Current request
@@ -236,8 +404,20 @@ typedef struct http_tc_
u8 *rx_buf;
u32 rx_buf_offset;
http_buffer_t tx_buf;
- u32 to_recv;
+ u64 to_recv;
u32 bytes_dequeued;
+ u32 control_data_len; /* start line + headers + empty line */
+ http_target_form_t target_form;
+ u32 target_path_offset;
+ u32 target_path_len;
+ u32 target_query_offset;
+ u32 target_query_len;
+ u32 headers_offset;
+ u32 headers_len;
+ u32 body_offset;
+ u64 body_len;
+ u16 status_code;
+ u8 is_tunnel;
} http_conn_t;
typedef struct http_worker_
@@ -249,14 +429,17 @@ typedef struct http_main_
{
http_worker_t *wrk;
http_conn_t *listener_pool;
+ http_conn_t *ho_conn_pool;
u32 app_index;
clib_timebase_t timebase;
+ u16 *sc_by_u16;
/*
* Runtime config
*/
u8 debug_level;
+ u8 is_init;
/*
* Config
@@ -266,14 +449,764 @@ typedef struct http_main_
u32 fifo_size;
} http_main_t;
-static inline int
-http_state_is_tx_valid (http_conn_t *hc)
+always_inline int
+_validate_target_syntax (u8 *target, u32 len, int is_query, int *is_encoded)
+{
+ int encoded = 0;
+ u32 i;
+
+ static uword valid_chars[4] = {
+ /* !$&'()*+,-./0123456789:;= */
+ 0x2fffffd200000000,
+ /* @ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ */
+ 0x47fffffe87ffffff,
+ 0x0000000000000000,
+ 0x0000000000000000,
+ };
+
+ for (i = 0; i < len; i++)
+ {
+ if (clib_bitmap_get_no_check (valid_chars, target[i]))
+ continue;
+ /* target was already split after first question mark,
+ * for query it is valid character */
+ if (is_query && target[i] == '?')
+ continue;
+ /* pct-encoded = "%" HEXDIG HEXDIG */
+ if (target[i] == '%')
+ {
+ if ((i + 2) >= len)
+ return -1;
+ if (!isxdigit (target[i + 1]) || !isxdigit (target[i + 2]))
+ return -1;
+ i += 2;
+ encoded = 1;
+ continue;
+ }
+ clib_warning ("invalid character %d", target[i]);
+ return -1;
+ }
+ if (is_encoded)
+ *is_encoded = encoded;
+ return 0;
+}
+
+/**
+ * An "absolute-path" rule validation (RFC9110 section 4.1).
+ *
+ * @param path Vector of target path to validate.
+ * @param is_encoded Return flag that indicates if percent-encoded (optional).
+ *
+ * @return @c 0 on success.
+ */
+always_inline int
+http_validate_abs_path_syntax (u8 *path, int *is_encoded)
+{
+ return _validate_target_syntax (path, vec_len (path), 0, is_encoded);
+}
+
+/**
+ * A "query" rule validation (RFC3986 section 2.1).
+ *
+ * @param query Vector of target query to validate.
+ * @param is_encoded Return flag that indicates if percent-encoded (optional).
+ *
+ * @return @c 0 on success.
+ */
+always_inline int
+http_validate_query_syntax (u8 *query, int *is_encoded)
+{
+ return _validate_target_syntax (query, vec_len (query), 1, is_encoded);
+}
+
+#define htoi(x) (isdigit (x) ? (x - '0') : (tolower (x) - 'a' + 10))
+
+/**
+ * Decode percent-encoded data.
+ *
+ * @param src Data to decode.
+ * @param len Length of data to decode.
+ *
+ * @return New vector with decoded data.
+ *
+ * The caller is always responsible to free the returned vector.
+ */
+always_inline u8 *
+http_percent_decode (u8 *src, u32 len)
{
- http_state_t state = hc->http_state;
- return (state == HTTP_STATE_APP_IO_MORE_DATA ||
- state == HTTP_STATE_CLIENT_IO_MORE_DATA ||
- state == HTTP_STATE_WAIT_APP_REPLY ||
- state == HTTP_STATE_WAIT_APP_METHOD);
+ u32 i;
+ u8 *decoded_uri = 0;
+
+ for (i = 0; i < len; i++)
+ {
+ if (src[i] == '%')
+ {
+ u8 c = (htoi (src[i + 1]) << 4) | htoi (src[i + 2]);
+ vec_add1 (decoded_uri, c);
+ i += 2;
+ }
+ else
+ vec_add1 (decoded_uri, src[i]);
+ }
+ return decoded_uri;
+}
+
+/**
+ * Remove dot segments from path (RFC3986 section 5.2.4)
+ *
+ * @param path Path to sanitize.
+ *
+ * @return New vector with sanitized path.
+ *
+ * The caller is always responsible to free the returned vector.
+ */
+always_inline u8 *
+http_path_remove_dot_segments (u8 *path)
+{
+ u32 *segments = 0, *segments_len = 0, segment_len;
+ u8 *new_path = 0;
+ int i, ii;
+
+ if (!path)
+ return vec_new (u8, 0);
+
+ segments = vec_new (u32, 1);
+ /* first segment */
+ segments[0] = 0;
+ /* find all segments */
+ for (i = 1; i < (vec_len (path) - 1); i++)
+ {
+ if (path[i] == '/')
+ vec_add1 (segments, i + 1);
+ }
+ /* dummy tail */
+ vec_add1 (segments, vec_len (path));
+
+ /* scan all segments for "." and ".." */
+ segments_len = vec_new (u32, vec_len (segments) - 1);
+ for (i = 0; i < vec_len (segments_len); i++)
+ {
+ segment_len = segments[i + 1] - segments[i];
+ if (segment_len == 2 && path[segments[i]] == '.')
+ segment_len = 0;
+ else if (segment_len == 3 && path[segments[i]] == '.' &&
+ path[segments[i] + 1] == '.')
+ {
+ segment_len = 0;
+ /* remove parent (if any) */
+ for (ii = i - 1; ii >= 0; ii--)
+ {
+ if (segments_len[ii])
+ {
+ segments_len[ii] = 0;
+ break;
+ }
+ }
+ }
+ segments_len[i] = segment_len;
+ }
+
+ /* we might end with empty path, so return at least empty vector */
+ new_path = vec_new (u8, 0);
+ /* append all valid segments */
+ for (i = 0; i < vec_len (segments_len); i++)
+ {
+ if (segments_len[i])
+ vec_add (new_path, path + segments[i], segments_len[i]);
+ }
+ vec_free (segments);
+ vec_free (segments_len);
+ return new_path;
+}
+
+always_inline int
+_parse_field_name (u8 **pos, u8 *end, u8 **field_name_start,
+ u32 *field_name_len)
+{
+ u32 name_len = 0;
+ u8 *p;
+
+ static uword tchar[4] = {
+ /* !#$%'*+-.0123456789 */
+ 0x03ff6cba00000000,
+ /* ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~ */
+ 0x57ffffffc7fffffe,
+ 0x0000000000000000,
+ 0x0000000000000000,
+ };
+
+ p = *pos;
+
+ *field_name_start = p;
+ while (p != end)
+ {
+ if (clib_bitmap_get_no_check (tchar, *p))
+ {
+ name_len++;
+ p++;
+ }
+ else if (*p == ':')
+ {
+ if (name_len == 0)
+ {
+ clib_warning ("empty field name");
+ return -1;
+ }
+ *field_name_len = name_len;
+ p++;
+ *pos = p;
+ return 0;
+ }
+ else
+ {
+ clib_warning ("invalid character %d", *p);
+ return -1;
+ }
+ }
+ clib_warning ("field name end not found");
+ return -1;
+}
+
+always_inline int
+_parse_field_value (u8 **pos, u8 *end, u8 **field_value_start,
+ u32 *field_value_len)
+{
+ u32 value_len = 0;
+ u8 *p;
+
+ p = *pos;
+
+ /* skip leading whitespace */
+ while (1)
+ {
+ if (p == end)
+ {
+ clib_warning ("field value not found");
+ return -1;
+ }
+ else if (*p != ' ' && *p != '\t')
+ {
+ break;
+ }
+ p++;
+ }
+
+ *field_value_start = p;
+ while (p != end)
+ {
+ if (*p == '\r')
+ {
+ if ((end - p) < 1)
+ {
+ clib_warning ("incorrect field line end");
+ return -1;
+ }
+ p++;
+ if (*p == '\n')
+ {
+ if (value_len == 0)
+ {
+ clib_warning ("empty field value");
+ return -1;
+ }
+ p++;
+ *pos = p;
+ /* skip trailing whitespace */
+ p = *field_value_start + value_len - 1;
+ while (*p == ' ' || *p == '\t')
+ {
+ p--;
+ value_len--;
+ }
+ *field_value_len = value_len;
+ return 0;
+ }
+ clib_warning ("CR without LF");
+ return -1;
+ }
+ if (*p < ' ' && *p != '\t')
+ {
+ clib_warning ("invalid character %d", *p);
+ return -1;
+ }
+ p++;
+ value_len++;
+ }
+
+ clib_warning ("field value end not found");
+ return -1;
+}
+
+typedef struct
+{
+ u8 *name;
+ u8 *value;
+} http_header_ht_t;
+
+typedef struct
+{
+ http_token_t name;
+ http_token_t value;
+} http_header_t;
+
+typedef struct
+{
+ http_header_ht_t *headers;
+ uword *value_by_name;
+} http_header_table_t;
+
+/**
+ * Free header table's memory.
+ *
+ * @param ht Header table to free.
+ */
+always_inline void
+http_free_header_table (http_header_table_t *ht)
+{
+ http_header_ht_t *header;
+ vec_foreach (header, ht->headers)
+ {
+ vec_free (header->name);
+ vec_free (header->value);
+ }
+ vec_free (ht->headers);
+ hash_free (ht->value_by_name);
+ clib_mem_free (ht);
+}
+
+/**
+ * Parse headers in given vector.
+ *
+ * @param headers Vector to parse.
+ * @param [out] header_table Parsed headers in case of success.
+ *
+ * @return @c 0 on success.
+ *
+ * The caller is responsible to free the returned @c header_table
+ * using @c http_free_header_table .
+ */
+always_inline int
+http_parse_headers (u8 *headers, http_header_table_t **header_table)
+{
+ u8 *pos, *end, *name_start, *value_start, *name;
+ u32 name_len, value_len;
+ int rv;
+ http_header_ht_t *header;
+ http_header_table_t *ht;
+ uword *p;
+
+ end = headers + vec_len (headers);
+ pos = headers;
+
+ ht = clib_mem_alloc (sizeof (*ht));
+ ht->value_by_name = hash_create_string (0, sizeof (uword));
+ ht->headers = 0;
+ do
+ {
+ rv = _parse_field_name (&pos, end, &name_start, &name_len);
+ if (rv != 0)
+ {
+ http_free_header_table (ht);
+ return rv;
+ }
+ rv = _parse_field_value (&pos, end, &value_start, &value_len);
+ if (rv != 0)
+ {
+ http_free_header_table (ht);
+ return rv;
+ }
+ name = vec_new (u8, name_len);
+ clib_memcpy (name, name_start, name_len);
+ vec_terminate_c_string (name);
+ /* check if header is repeated */
+ p = hash_get_mem (ht->value_by_name, name);
+ if (p)
+ {
+ /* if yes combine values */
+ header = vec_elt_at_index (ht->headers, p[0]);
+ vec_pop (header->value); /* drop null byte */
+ header->value = format (header->value, ", %U%c", format_ascii_bytes,
+ value_start, value_len, 0);
+ vec_free (name);
+ continue;
+ }
+ /* or create new record */
+ vec_add2 (ht->headers, header, sizeof (*header));
+ header->name = name;
+ header->value = vec_new (u8, value_len);
+ clib_memcpy (header->value, value_start, value_len);
+ vec_terminate_c_string (header->value);
+ hash_set_mem (ht->value_by_name, header->name, header - ht->headers);
+ }
+ while (pos != end);
+
+ *header_table = ht;
+
+ return 0;
+}
+
+/**
+ * Try to find given header name in header table.
+ *
+ * @param header_table Header table to search.
+ * @param name Header name to match.
+ *
+ * @return Header's value in case of success, @c 0 otherwise.
+ */
+always_inline const char *
+http_get_header (http_header_table_t *header_table, const char *name)
+{
+ uword *p;
+ http_header_ht_t *header;
+
+ p = hash_get_mem (header_table->value_by_name, name);
+ if (p)
+ {
+ header = vec_elt_at_index (header_table->headers, p[0]);
+ return (const char *) header->value;
+ }
+
+ return 0;
+}
+
+/**
+ * Add header to the list.
+ *
+ * @param headers Header list.
+ * @param name Pointer to header's name buffer.
+ * @param name_len Length of the name.
+ * @param value Pointer to header's value buffer.
+ * @param value_len Length of the value.
+ *
+ * @note Headers added at protocol layer: Date, Server, Content-Length
+ */
+always_inline void
+http_add_header (http_header_t **headers, const char *name, uword name_len,
+ const char *value, uword value_len)
+{
+ http_header_t *header;
+ vec_add2 (*headers, header, 1);
+ header->name.base = (char *) name;
+ header->name.len = name_len;
+ header->value.base = (char *) value;
+ header->value.len = value_len;
+}
+
+/**
+ * Serialize the header list.
+ *
+ * @param headers Header list to serialize.
+ *
+ * @return New vector with serialized headers.
+ *
+ * The caller is always responsible to free the returned vector.
+ */
+always_inline u8 *
+http_serialize_headers (http_header_t *headers)
+{
+ u8 *headers_buf = 0, *dst;
+ u32 headers_buf_len = 2;
+ http_header_t *header;
+
+ vec_foreach (header, headers)
+ headers_buf_len += header->name.len + header->value.len + 4;
+
+ vec_validate (headers_buf, headers_buf_len - 1);
+ dst = headers_buf;
+
+ vec_foreach (header, headers)
+ {
+ clib_memcpy (dst, header->name.base, header->name.len);
+ dst += header->name.len;
+ *dst++ = ':';
+ *dst++ = ' ';
+ clib_memcpy (dst, header->value.base, header->value.len);
+ dst += header->value.len;
+ *dst++ = '\r';
+ *dst++ = '\n';
+ }
+ *dst++ = '\r';
+ *dst = '\n';
+ return headers_buf;
+}
+
+typedef struct
+{
+ ip46_address_t ip;
+ u16 port;
+ u8 is_ip4;
+} http_uri_t;
+
+always_inline int
+http_parse_authority_form_target (u8 *target, http_uri_t *authority)
+{
+ unformat_input_t input;
+ u32 port;
+ int rv = 0;
+
+ unformat_init_vector (&input, vec_dup (target));
+ if (unformat (&input, "[%U]:%d", unformat_ip6_address, &authority->ip.ip6,
+ &port))
+ {
+ authority->port = clib_host_to_net_u16 (port);
+ authority->is_ip4 = 0;
+ }
+ else if (unformat (&input, "%U:%d", unformat_ip4_address, &authority->ip.ip4,
+ &port))
+ {
+ authority->port = clib_host_to_net_u16 (port);
+ authority->is_ip4 = 1;
+ }
+ /* TODO reg-name resolution */
+ else
+ {
+ clib_warning ("unsupported format '%v'", target);
+ rv = -1;
+ }
+ unformat_free (&input);
+ return rv;
+}
+
+always_inline u8 *
+http_serialize_authority_form_target (http_uri_t *authority)
+{
+ u8 *s;
+
+ if (authority->is_ip4)
+ s = format (0, "%U:%d", format_ip4_address, &authority->ip.ip4,
+ clib_net_to_host_u16 (authority->port));
+ else
+ s = format (0, "[%U]:%d", format_ip6_address, &authority->ip.ip6,
+ clib_net_to_host_u16 (authority->port));
+
+ return s;
+}
+
+typedef enum http_url_scheme_
+{
+ HTTP_URL_SCHEME_HTTP,
+ HTTP_URL_SCHEME_HTTPS,
+} http_url_scheme_t;
+
+typedef struct
+{
+ http_url_scheme_t scheme;
+ u16 port;
+ u32 host_offset;
+ u32 host_len;
+ u32 path_offset;
+ u32 path_len;
+ u8 host_is_ip6;
+} http_url_t;
+
+always_inline int
+_parse_port (u8 **pos, u8 *end, u16 *port)
+{
+ u32 value = 0;
+ u8 *p = *pos;
+
+ if (!isdigit (*p))
+ return -1;
+ value = *p - '0';
+ p++;
+
+ while (p != end)
+ {
+ if (!isdigit (*p))
+ break;
+ value = value * 10 + *p - '0';
+ if (value > CLIB_U16_MAX)
+ return -1;
+ p++;
+ }
+ *pos = p;
+ *port = clib_host_to_net_u16 ((u16) value);
+ return 0;
+}
+
+/**
+ * An "absolute-form" URL parsing.
+ *
+ * @param url Vector of target URL to validate.
+ * @param parsed Parsed URL metadata in case of success.
+ *
+ * @return @c 0 on success.
+ */
+always_inline int
+http_parse_absolute_form (u8 *url, http_url_t *parsed)
+{
+ u8 *token_start, *token_end, *end;
+ int is_encoded = 0;
+
+ static uword valid_chars[4] = {
+ /* -.0123456789 */
+ 0x03ff600000000000,
+ /* ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz */
+ 0x07fffffe07fffffe,
+ 0x0000000000000000,
+ 0x0000000000000000,
+ };
+
+ if (vec_len (url) < 9)
+ {
+ clib_warning ("uri too short");
+ return -1;
+ }
+
+ clib_memset (parsed, 0, sizeof (*parsed));
+
+ end = url + vec_len (url);
+
+ /* parse scheme */
+ if (!memcmp (url, "http:// ", 7))
+ {
+ parsed->scheme = HTTP_URL_SCHEME_HTTP;
+ parsed->port = clib_host_to_net_u16 (80);
+ parsed->host_offset = 7;
+ }
+ else if (!memcmp (url, "https:// ", 8))
+ {
+ parsed->scheme = HTTP_URL_SCHEME_HTTPS;
+ parsed->port = clib_host_to_net_u16 (443);
+ parsed->host_offset = 8;
+ }
+ else
+ {
+ clib_warning ("invalid scheme");
+ return -1;
+ }
+ token_start = url + parsed->host_offset;
+
+ /* parse host */
+ if (*token_start == '[')
+ /* IPv6 address */
+ {
+ parsed->host_is_ip6 = 1;
+ parsed->host_offset++;
+ token_end = ++token_start;
+ while (1)
+ {
+ if (token_end == end)
+ {
+ clib_warning ("invalid host, IPv6 addr not terminated with ']'");
+ return -1;
+ }
+ else if (*token_end == ']')
+ {
+ parsed->host_len = token_end - token_start;
+ token_start = token_end + 1;
+ break;
+ }
+ else if (*token_end != ':' && *token_end != '.' &&
+ !isxdigit (*token_end))
+ {
+ clib_warning ("invalid character '%u'", *token_end);
+ return -1;
+ }
+ token_end++;
+ }
+ }
+ else
+ {
+ token_end = token_start;
+ while (token_end != end && *token_end != ':' && *token_end != '/')
+ {
+ if (!clib_bitmap_get_no_check (valid_chars, *token_end))
+ {
+ clib_warning ("invalid character '%u'", *token_end);
+ return -1;
+ }
+ token_end++;
+ }
+ parsed->host_len = token_end - token_start;
+ token_start = token_end;
+ }
+
+ if (!parsed->host_len)
+ {
+ clib_warning ("zero length host");
+ return -1;
+ }
+
+ /* parse port, if any */
+ if (token_start != end && *token_start == ':')
+ {
+ token_end = ++token_start;
+ if (_parse_port (&token_end, end, &parsed->port))
+ {
+ clib_warning ("invalid port");
+ return -1;
+ }
+ token_start = token_end;
+ }
+
+ if (token_start == end)
+ return 0;
+
+ token_start++; /* drop leading slash */
+ parsed->path_offset = token_start - url;
+ parsed->path_len = end - token_start;
+
+ if (parsed->path_len)
+ return _validate_target_syntax (token_start, parsed->path_len, 0,
+ &is_encoded);
+
+ return 0;
+}
+
+/**
+ * Parse target host and port of UDP tunnel over HTTP.
+ *
+ * @param path Path in format "{target_host}/{target_port}/".
+ * @param path_len Length of given path.
+ * @param parsed Parsed target in case of success..
+ *
+ * @return @c 0 on success.
+ *
+ * @note Only IPv4 literals and IPv6 literals supported.
+ */
+always_inline int
+http_parse_masque_host_port (u8 *path, u32 path_len, http_uri_t *parsed)
+{
+ u8 *p, *end, *decoded_host;
+ u32 host_len;
+ unformat_input_t input;
+
+ p = path;
+ end = path + path_len;
+ clib_memset (parsed, 0, sizeof (*parsed));
+
+ while (p != end && *p != '/')
+ p++;
+
+ host_len = p - path;
+ if (!host_len || (host_len == path_len) || (host_len + 1 == path_len))
+ return -1;
+ decoded_host = http_percent_decode (path, host_len);
+ unformat_init_vector (&input, decoded_host);
+ if (unformat (&input, "%U", unformat_ip4_address, &parsed->ip.ip4))
+ parsed->is_ip4 = 1;
+ else if (unformat (&input, "%U", unformat_ip6_address, &parsed->ip.ip6))
+ parsed->is_ip4 = 0;
+ else
+ {
+ unformat_free (&input);
+ clib_warning ("unsupported target_host format");
+ return -1;
+ }
+ unformat_free (&input);
+
+ p++;
+ if (_parse_port (&p, end, &parsed->port))
+ {
+ clib_warning ("invalid port");
+ return -1;
+ }
+
+ if (p == end || *p != '/')
+ return -1;
+
+ return 0;
}
#endif /* SRC_PLUGINS_HTTP_HTTP_H_ */
diff --git a/src/plugins/http/http_buffer.c b/src/plugins/http/http_buffer.c
index f3dc308dbf8..bc1b8c08630 100644
--- a/src/plugins/http/http_buffer.c
+++ b/src/plugins/http/http_buffer.c
@@ -173,7 +173,7 @@ buf_ptr_drain (http_buffer_t *hb, u32 len)
bf->segs[1].data += len;
bf->segs[0].len -= len;
- HTTP_DBG (1, "drained %u left %u", len, bf->segs[1].len);
+ HTTP_DBG (1, "drained %u left %u", len, bf->segs[0].len);
if (!bf->segs[0].len)
{
diff --git a/src/plugins/http/http_content_types.h b/src/plugins/http/http_content_types.h
new file mode 100644
index 00000000000..ddc02566db7
--- /dev/null
+++ b/src/plugins/http/http_content_types.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#ifndef SRC_PLUGINS_HTTP_HTTP_CONTENT_TYPES_H_
+#define SRC_PLUGINS_HTTP_HTTP_CONTENT_TYPES_H_
+
+#include <http/http.h>
+
+static http_token_t http_content_types[] = {
+#define _(s, ext, str) { http_token_lit (str) },
+ foreach_http_content_type
+#undef _
+};
+
+#define http_content_type_token(e) \
+ http_content_types[e].base, http_content_types[e].len
+
+#endif /* SRC_PLUGINS_HTTP_HTTP_CONTENT_TYPES_H_ */
diff --git a/src/plugins/http/http_header_names.h b/src/plugins/http/http_header_names.h
new file mode 100644
index 00000000000..99acac786db
--- /dev/null
+++ b/src/plugins/http/http_header_names.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#ifndef SRC_PLUGINS_HTTP_HTTP_HEADER_NAMES_H_
+#define SRC_PLUGINS_HTTP_HTTP_HEADER_NAMES_H_
+
+#include <http/http.h>
+
+static http_token_t http_header_names[] = {
+#define _(sym, str) { http_token_lit (str) },
+ foreach_http_header_name
+#undef _
+};
+
+#define http_header_name_token(e) \
+ http_header_names[e].base, http_header_names[e].len
+
+#define http_header_name_str(e) http_header_names[e].base
+
+#endif /* SRC_PLUGINS_HTTP_HTTP_HEADER_NAMES_H_ */
diff --git a/src/plugins/http/http_plugin.rst b/src/plugins/http/http_plugin.rst
new file mode 100644
index 00000000000..f86c796bd83
--- /dev/null
+++ b/src/plugins/http/http_plugin.rst
@@ -0,0 +1,537 @@
+.. _http_plugin:
+
+.. toctree::
+
+HTTP Plugin
+===========
+
+Overview
+--------
+
+This plugin adds the HTTP protocol to VPP's Host Stack.
+As a result parsing and serializing of HTTP/1 requests or responses are available for internal VPP applications.
+
+Usage
+-----
+
+The plugin exposes following inline functions: ``http_validate_abs_path_syntax``, ``http_validate_query_syntax``,
+``http_percent_decode``, ``http_path_remove_dot_segments``, ``http_parse_headers``, ``http_get_header``,
+``http_free_header_table``, ``http_add_header``, ``http_serialize_headers``, ``http_parse_authority_form_target``,
+``http_serialize_authority_form_target``, ``http_parse_absolute_form``, ``http_parse_masque_host_port``.
+
+It relies on the hoststack constructs and uses ``http_msg_data_t`` data structure for passing metadata to/from applications.
+
+Server application
+^^^^^^^^^^^^^^^^^^
+
+Server application sets ``TRANSPORT_PROTO_HTTP`` as ``transport_proto`` in session endpoint configuration when registering to listen.
+
+Receiving data
+""""""""""""""
+
+HTTP plugin sends message header with metadata for parsing, in form of offset and length, followed by all data bytes as received from transport.
+
+Application will get pre-parsed following items:
+
+* HTTP method
+* target form
+* target path offset and length
+* target query offset and length
+* header section offset and length
+* body offset and length
+
+The example below reads HTTP message header in ``builtin_app_rx_callback``, which is first step application should do:
+
+.. code-block:: C
+
+ #include <http/http.h>
+ http_msg_t msg;
+ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+As next step application might validate message and method type, for example application only expects to receive GET requests:
+
+.. code-block:: C
+
+ if (msg.type != HTTP_MSG_REQUEST || msg.method_type != HTTP_REQ_GET)
+ {
+ /* your error handling */
+ }
+
+Now application can start reading HTTP data. First let's read the target path:
+
+.. code-block:: C
+
+ u8 *target_path;
+ vec_validate (target_path, msg.data.target_path_len - 1);
+ rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_path_offset, msg.data.target_path_len, target_path);
+ ASSERT (rv == msg.data.target_path_len);
+
+Application might also want to know target form which is stored in ``msg.data.target_form``, you can read more about target forms in RFC9112 section 3.2.
+In case of origin form HTTP plugin always sets ``target_path_offset`` after leading slash character.
+
+Example bellow validates "absolute-path" rule, as described in RFC9110 section 4.1, in case of target in origin form, additionally application can get information if percent encoding is used and decode path:
+
+.. code-block:: C
+
+ int is_encoded = 0;
+ if (msg.data.target_form == HTTP_TARGET_ORIGIN_FORM)
+ {
+ if (http_validate_abs_path_syntax (target_path, &is_encoded))
+ {
+ /* your error handling */
+ }
+ if (is_encoded)
+ {
+ u8 *decoded = http_percent_decode (target_path, vec_len (target_path));
+ vec_free (target_path);
+ target_path = decoded;
+ }
+ }
+
+More on topic when to decode in RFC3986 section 2.4.
+
+When application serves static files, it is highly recommended to sanitize target path by removing dot segments (you don't want to risk path traversal attack):
+
+.. code-block:: C
+
+ u8 *sanitized_path;
+ sanitized_path = http_path_remove_dot_segments (target_path);
+
+Let's move to target query which is optional. Percent encoding might be used too, but we skip it for brevity:
+
+.. code-block:: C
+
+ u8 *target_query = 0;
+ if (msg.data.target_query_len)
+ {
+ vec_validate (target_query, msg.data.target_query_len - 1);
+ rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_query_offset,
+ msg.data.target_query_len, target_query);
+ ASSERT (rv == msg.data.target_query_len);
+ if (http_validate_query_syntax (target_query, 0))
+ {
+ /* your error handling */
+ }
+ }
+
+And now for something completely different, headers.
+Headers are parsed using a generic algorithm, independent of the individual header names.
+When header is repeated, its combined value consists of all values separated by comma, concatenated in order as received.
+Following example shows how to parse headers:
+
+.. code-block:: C
+
+ #include <http/http_header_names.h>
+ if (msg.data.headers_len)
+ {
+ u8 *headers = 0;
+ http_header_table_t *ht;
+ vec_validate (headers, msg.data.headers_len - 1);
+ rv = svm_fifo_peek (ts->rx_fifo, msg.data.headers_offset,
+ msg.data.headers_len, headers);
+ ASSERT (rv == msg.data.headers_len);
+ if (http_parse_headers (headers, &ht))
+ {
+ /* your error handling */
+ }
+ /* get Accept header */
+ const char *accept_value = http_get_header (ht, http_header_name_str (HTTP_HEADER_ACCEPT));
+ if (accept_value)
+ {
+ /* do something interesting */
+ }
+ http_free_header_table (ht);
+ vec_free (headers);
+ }
+
+Finally application reads body (if any), which might be received in multiple pieces (depends on size), so we might need some state machine in ``builtin_app_rx_callback``.
+We will add following members to our session context structure:
+
+.. code-block:: C
+
+ typedef struct
+ {
+ /* ... */
+ u64 to_recv;
+ u8 *resp_body;
+ } session_ctx_t;
+
+First we prepare vector for response body, do it only once when you are reading metadata:
+
+.. code-block:: C
+
+ /* drop everything up to body */
+ svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.body_offset);
+ ctx->to_recv = msg.data.body_len;
+ /* prepare vector for response body */
+ vec_validate (ctx->resp_body, msg.data.body_len - 1);
+ vec_reset_length (ctx->resp_body);
+
+Now we can start reading body content, following block of code could be executed multiple times:
+
+.. code-block:: C
+
+ /* dequeue */
+ u32 n_deq = svm_fifo_max_dequeue (ts->rx_fifo);
+ /* current offset */
+ u64 curr = vec_len (ctx->resp_body);
+ rv = svm_fifo_dequeue (ts->rx_fifo, n_deq, ctx->resp_body + curr);
+ ASSERT (rv == n_deq);
+ /* update length of the vector */
+ vec_set_len (ctx->resp_body, curr + n_deq);
+ /* update number of remaining bytes to receive */
+ ctx->to_recv -= rv;
+ /* check if all data received */
+ if (ctx->to_recv == 0)
+ {
+ /* we are done */
+ /* send 200 OK response */
+ }
+
+Sending data
+""""""""""""""
+
+When server application sends response back to HTTP layer it starts with message metadata, followed by optional serialized headers and finally body (if any).
+
+Application should set following items:
+
+* Status code
+* target form
+* header section offset and length
+* body offset and length
+
+Application could pass headers back to HTTP layer. Header list is created dynamically as vector of ``http_header_t``,
+where we store only pointers to buffers (zero copy).
+Well known header names are predefined.
+The list is serialized just before you send buffer to HTTP layer.
+
+.. note::
+ Following headers are added at protocol layer and **MUST NOT** be set by application: Date, Server, Content-Length
+
+Following example shows how to create headers section:
+
+.. code-block:: C
+
+ #include <http/http.h>
+ #include <http/http_header_names.h>
+ #include <http/http_content_types.h>
+ http_header_t *resp_headers = 0;
+ u8 *headers_buf = 0;
+ http_add_header (resp_headers,
+ http_header_name_token (HTTP_HEADER_CONTENT_TYPE),
+ http_content_type_token (HTTP_CONTENT_TEXT_HTML));
+ http_add_header (resp_headers,
+ http_header_name_token (HTTP_HEADER_CACHE_CONTROL),
+ http_token_lit ("max-age=600"));
+ http_add_header (resp_headers,
+ http_header_name_token (HTTP_HEADER_LOCATION),
+ (const char *) redirect, vec_len (redirect));
+ headers_buf = http_serialize_headers (resp_headers);
+
+The example below show how to create and send response HTTP message metadata:
+
+.. code-block:: C
+
+ http_msg_t msg;
+ msg.type = HTTP_MSG_REPLY;
+ msg.code = HTTP_STATUS_MOVED
+ msg.data.headers_offset = 0;
+ msg.data.headers_len = vec_len (headers_buf);
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.body_len = vec_len (tx_buf);
+ msg.data.body_offset = msg.data.headers_len;
+ msg.data.len = msg.data.body_len + msg.data.headers_len;
+ ts = session_get (hs->vpp_session_index, hs->thread_index);
+ rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+Next you will send your serialized headers:
+
+.. code-block:: C
+
+ rv = svm_fifo_enqueue (ts->tx_fifo, vec_len (headers_buf), headers_buf);
+ ASSERT (rv == msg.data.headers_len);
+ vec_free (headers_buf);
+
+Finally application sends response body:
+
+.. code-block:: C
+
+ rv = svm_fifo_enqueue (ts->tx_fifo, vec_len (tx_buf), tx_buf);
+ if (rv != vec_len (hs->tx_buf))
+ {
+ hs->tx_offset = rv;
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ }
+ else
+ {
+ vec_free (tx_buf);
+ }
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
+
+Examples above shows how to send body and headers by copy, alternatively you could pass them as pointer:
+
+.. code-block:: C
+
+ msg.data.type = HTTP_MSG_DATA_PTR;
+ /* code omitted for brevity */
+ if (msg.data.headers_len)
+ {
+ uword headers = pointer_to_uword (headers_buf);
+ rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (headers), (u8 *) &headers);
+ ASSERT (rv == sizeof (headers));
+ }
+ uword data = pointer_to_uword (tx_buf);
+ rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (data), (u8 *) &data);
+ ASSERT (rv == sizeof (data));
+
+In this case you need to free data when you receive next request or when session is closed.
+
+
+Client application
+^^^^^^^^^^^^^^^^^^
+
+Client application opens connection with vnet URI where transport protocol is set to ``http``.
+
+Sending data
+""""""""""""""
+
+HTTP request is sent when connection is successfully established in ``session_connected_callback``.
+
+When client application sends message to HTTP layer it starts with message metadata, followed by request target, optional headers and body (if any) buffers.
+
+Application should set following items:
+
+* HTTP method
+* target form, offset and length
+* header section offset and length
+* body offset and length
+
+Application could pass headers to HTTP layer. Header list is created dynamically as vector of ``http_header_t``,
+where we store only pointers to buffers (zero copy).
+Well known header names are predefined.
+The list is serialized just before you send buffer to HTTP layer.
+
+.. note::
+ Following headers are added at protocol layer and **MUST NOT** be set by application: Host, User-Agent
+
+
+The example below shows how to create headers section:
+
+.. code-block:: C
+
+ #include <http/http.h>
+ #include <http/http_header_names.h>
+ #include <http/http_content_types.h>
+ http_header_t *req_headers = 0;
+ u8 *headers_buf = 0;
+ http_add_header (req_headers,
+ http_header_name_token (HTTP_HEADER_ACCEPT),
+ http_content_type_token (HTTP_CONTENT_TEXT_HTML));
+ headers_buf = http_serialize_headers (req_headers);
+ vec_free (hs->req_headers);
+
+Following example shows how to set message metadata:
+
+.. code-block:: C
+
+ http_msg_t msg;
+ msg.type = HTTP_MSG_REQUEST;
+ msg.method_type = HTTP_REQ_GET;
+ msg.data.headers_offset = 0;
+ /* request target */
+ msg.data.target_form = HTTP_TARGET_ORIGIN_FORM;
+ msg.data.target_path_offset = 0;
+ msg.data.target_path_len = vec_len (target);
+ /* custom headers */
+ msg.data.headers_offset = msg.data.target_path_len;
+ msg.data.headers_len = vec_len (headers_buf);
+ /* no request body because we are doing GET request */
+ msg.data.body_len = 0;
+ /* data type and total length */
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.len = msg.data.target_path_len + msg.data.headers_len + msg.data.body_len;
+
+Finally application sends everything to HTTP layer:
+
+.. code-block:: C
+
+ svm_fifo_seg_t segs[3] = { { (u8 *) &msg, sizeof (msg) }, /* message metadata */
+ { target, vec_len (target) }, /* request target */
+ { headers_buf, vec_len (headers_buf) } }; /* serialized headers */
+ rv = svm_fifo_enqueue_segments (as->tx_fifo, segs, 3, 0 /* allow partial */);
+ vec_free (headers_buf);
+ if (rv < 0 || rv != sizeof (msg) + msg.data.len)
+ {
+ clib_warning ("failed app enqueue");
+ return -1;
+ }
+ if (svm_fifo_set_event (as->tx_fifo))
+ session_program_tx_io_evt (as->handle, SESSION_IO_EVT_TX);
+
+Examples above shows how to send buffers by copy, alternatively you could pass them as pointer:
+
+.. code-block:: C
+
+ msg.data.type = HTTP_MSG_DATA_PTR;
+ msg.method_type = HTTP_REQ_POST;
+ msg.data.body_len = vec_len (data);
+ /* code omitted for brevity */
+ uword target = pointer_to_uword (target);
+ uword headers = pointer_to_uword (headers_buf);
+ uword body = pointer_to_uword (data);
+ svm_fifo_seg_t segs[4] = {
+ { (u8 *) &msg, sizeof (msg) },
+ { (u8 *) &target, sizeof (target) },
+ { (u8 *) &headers, sizeof (headers) },
+ { (u8 *) &body, sizeof (body) },
+ };
+ rv = svm_fifo_enqueue_segments (s->tx_fifo, segs, 4, 0 /* allow partial */);
+ ASSERT (rv == (sizeof (msg) + sizeof (target) + sizeof (headers) + sizeof (body)));
+
+In this case you need to free data when you receive response or when session is closed.
+
+Receiving data
+""""""""""""""
+
+HTTP plugin sends message header with metadata for parsing, in form of offset and length, followed by all data bytes as received from transport.
+
+Application will get pre-parsed following items:
+
+* status code
+* header section offset and length
+* body offset and length
+
+The example below reads HTTP message header in ``builtin_app_rx_callback``, which is first step application should do:
+
+.. code-block:: C
+
+ #include <http/http.h>
+ http_msg_t msg;
+ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+As next step application might validate message type and status code:
+
+.. code-block:: C
+
+ if (msg.type != HTTP_MSG_REPLY)
+ {
+ /* your error handling */
+ }
+ if (msg.code != HTTP_STATUS_OK)
+ {
+ /* your error handling */
+ /* of course you can continue with steps bellow */
+ /* you might be interested in some headers or body content (if any) */
+ }
+
+Headers are parsed using a generic algorithm, independent of the individual header names.
+When header is repeated, its combined value consists of all values separated by comma, concatenated in order as received.
+Following example shows how to parse headers:
+
+.. code-block:: C
+
+ #include <http/http_header_names.h>
+ if (msg.data.headers_len)
+ {
+ u8 *headers = 0;
+ http_header_table_t *ht;
+ vec_validate (headers, msg.data.headers_len - 1);
+ rv = svm_fifo_peek (ts->rx_fifo, msg.data.headers_offset,
+ msg.data.headers_len, headers);
+ ASSERT (rv == msg.data.headers_len);
+ if (http_parse_headers (headers, &ht))
+ {
+ /* your error handling */
+ }
+ /* get Content-Type header */
+ const char *content_type = http_get_header (ht, http_header_name_str (HTTP_HEADER_CONTENT_TYPE));
+ if (content_type)
+ {
+ /* do something interesting */
+ }
+ http_free_header_table (ht);
+ vec_free (headers);
+ }
+
+Finally application reads body, which might be received in multiple pieces (depends on size), so we might need some state machine in ``builtin_app_rx_callback``.
+We will add following members to our session context structure:
+
+.. code-block:: C
+
+ typedef struct
+ {
+ /* ... */
+ u64 to_recv;
+ u8 *resp_body;
+ } session_ctx_t;
+
+First we prepare vector for response body, do it only once when you are reading metadata:
+
+.. code-block:: C
+
+ /* drop everything up to body */
+ svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.body_offset);
+ ctx->to_recv = msg.data.body_len;
+ /* prepare vector for response body */
+ vec_validate (ctx->resp_body, msg.data.body_len - 1);
+ vec_reset_length (ctx->resp_body);
+
+Now we can start reading body content, following block of code could be executed multiple times:
+
+.. code-block:: C
+
+ /* dequeue */
+ u32 max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
+ u32 n_deq = clib_min (to_recv, max_deq);
+ /* current offset */
+ u64 curr = vec_len (ctx->resp_body);
+ rv = svm_fifo_dequeue (ts->rx_fifo, n_deq, ctx->resp_body + curr);
+ if (rv < 0 || rv != n_deq)
+ {
+ /* your error handling */
+ }
+ /* update length of the vector */
+ vec_set_len (ctx->resp_body, curr + n_deq);
+ /* update number of remaining bytes to receive */
+ ASSERT (to_recv >= rv);
+ ctx->to_recv -= rv;
+ /* check if all data received */
+ if (ctx->to_recv == 0)
+ {
+ /* we are done */
+ /* close the session if you don't want to send another request */
+ /* and update state machine... */
+ }
+
+HTTP timeout
+^^^^^^^^^^^^
+
+HTTP plugin sets session inactivity timeout by default to 60 seconds.
+Client and server applications can pass custom timeout value (in seconds) using extended configuration when doing connect or start listening respectively.
+You just need to add extended configuration to session endpoint configuration which is part of ``vnet_connect_args_t`` and ``vnet_listen_args_t``.
+HTTP plugin use ``opaque`` member of ``transport_endpt_ext_cfg_t``, unsigned 32bit integer seems to be sufficient (allowing the timeout to be set up to 136 years).
+
+The example below sets HTTP session timeout to 30 seconds (server application):
+
+.. code-block:: C
+
+ vnet_listen_args_t _a, *a = &_a;
+ session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
+ transport_endpt_ext_cfg_t *ext_cfg;
+ int rv;
+ clib_memset (a, 0, sizeof (*a));
+ clib_memcpy (&a->sep_ext, &sep, sizeof (sep));
+ /* add new extended config entry */
+ ext_cfg = session_endpoint_add_ext_cfg (
+ &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (ext_cfg->opaque));
+ /* your custom timeout value in seconds */
+ ext_cfg->opaque = 30;
+ /* rest of the settings omitted for brevity */
+ rv = vnet_listen (a);
+ /* don't forget to free extended config */
+ session_endpoint_free_ext_cfgs (&a->sep_ext);
+ /* ... */
diff --git a/src/plugins/http/http_status_codes.h b/src/plugins/http/http_status_codes.h
new file mode 100644
index 00000000000..100095c8f42
--- /dev/null
+++ b/src/plugins/http/http_status_codes.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#ifndef SRC_PLUGINS_HTTP_HTTP_STATUS_CODES_H_
+#define SRC_PLUGINS_HTTP_HTTP_STATUS_CODES_H_
+
+#include <http/http.h>
+
+static const char *http_status_code_str[] = {
+#define _(c, s, str) str,
+ foreach_http_status_code
+#undef _
+};
+
+static inline u8 *
+format_http_status_code (u8 *s, va_list *va)
+{
+ http_status_code_t status_code = va_arg (*va, http_status_code_t);
+ if (status_code < HTTP_N_STATUS)
+ s = format (s, "%s", http_status_code_str[status_code]);
+ else
+ s = format (s, "invalid status code %d", status_code);
+ return s;
+}
+
+#endif /* SRC_PLUGINS_HTTP_HTTP_STATUS_CODES_H_ */
diff --git a/src/plugins/http/http_timer.c b/src/plugins/http/http_timer.c
index 42fe69076fe..580f31657a9 100644
--- a/src/plugins/http/http_timer.c
+++ b/src/plugins/http/http_timer.c
@@ -29,7 +29,15 @@ http_timer_process_expired_cb (u32 *expired_timers)
{
/* Get session handle. The first bit is the timer id */
hs_handle = expired_timers[i] & 0x7FFFFFFF;
- session_send_rpc_evt_to_thread (hs_handle >> 24, twc->cb_fn,
+ twc->invalidate_cb (hs_handle);
+ }
+ for (i = 0; i < vec_len (expired_timers); i++)
+ {
+ /* Get session handle. The first bit is the timer id */
+ hs_handle = expired_timers[i] & 0x7FFFFFFF;
+ HTTP_DBG (1, "rpc to hc [%u]%x", hs_handle >> 24,
+ hs_handle & 0x00FFFFFF);
+ session_send_rpc_evt_to_thread (hs_handle >> 24, twc->rpc_cb,
uword_to_pointer (hs_handle, void *));
}
}
@@ -66,15 +74,19 @@ VLIB_REGISTER_NODE (http_timer_process_node) = {
};
void
-http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *cb_fn)
+http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *rpc_cb,
+ http_conn_invalidate_timer_fn *invalidate_cb)
{
http_tw_ctx_t *twc = &http_tw_ctx;
vlib_node_t *n;
+ ASSERT (twc->tw.timers == 0);
+
tw_timer_wheel_init_2t_1w_2048sl (&twc->tw, http_timer_process_expired_cb,
1.0 /* timer interval */, ~0);
clib_spinlock_init (&twc->tw_lock);
- twc->cb_fn = cb_fn;
+ twc->rpc_cb = rpc_cb;
+ twc->invalidate_cb = invalidate_cb;
vlib_node_set_state (vm, http_timer_process_node.index,
VLIB_NODE_STATE_POLLING);
diff --git a/src/plugins/http/http_timer.h b/src/plugins/http/http_timer.h
index eec5a4595fe..43d20d004d8 100644
--- a/src/plugins/http/http_timer.h
+++ b/src/plugins/http/http_timer.h
@@ -19,34 +19,37 @@
#include <http/http.h>
#include <vppinfra/tw_timer_2t_1w_2048sl.h>
-#define HTTP_CONN_TIMEOUT 60
+#define HTTP_CONN_TIMEOUT 60
+#define HTTP_TIMER_HANDLE_INVALID ((u32) ~0)
typedef void (http_conn_timeout_fn) (void *);
+typedef void (http_conn_invalidate_timer_fn) (u32 hs_handle);
typedef struct http_tw_ctx_
{
tw_timer_wheel_2t_1w_2048sl_t tw;
clib_spinlock_t tw_lock;
- http_conn_timeout_fn *cb_fn;
+ http_conn_timeout_fn *rpc_cb;
+ http_conn_invalidate_timer_fn *invalidate_cb;
} http_tw_ctx_t;
extern http_tw_ctx_t http_tw_ctx;
-void http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *cb_fn);
+void http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *rpc_cb,
+ http_conn_invalidate_timer_fn *invalidate_cb);
static inline void
http_conn_timer_start (http_conn_t *hc)
{
http_tw_ctx_t *twc = &http_tw_ctx;
u32 hs_handle;
- u64 timeout;
- timeout = HTTP_CONN_TIMEOUT;
+ ASSERT (hc->timer_handle == HTTP_TIMER_HANDLE_INVALID);
hs_handle = hc->c_thread_index << 24 | hc->c_c_index;
clib_spinlock_lock (&twc->tw_lock);
hc->timer_handle =
- tw_timer_start_2t_1w_2048sl (&twc->tw, hs_handle, 0, timeout);
+ tw_timer_start_2t_1w_2048sl (&twc->tw, hs_handle, 0, hc->timeout);
clib_spinlock_unlock (&twc->tw_lock);
}
@@ -55,12 +58,13 @@ http_conn_timer_stop (http_conn_t *hc)
{
http_tw_ctx_t *twc = &http_tw_ctx;
- if (hc->timer_handle == ~0)
+ hc->pending_timer = 0;
+ if (hc->timer_handle == HTTP_TIMER_HANDLE_INVALID)
return;
clib_spinlock_lock (&twc->tw_lock);
tw_timer_stop_2t_1w_2048sl (&twc->tw, hc->timer_handle);
- hc->timer_handle = ~0;
+ hc->timer_handle = HTTP_TIMER_HANDLE_INVALID;
clib_spinlock_unlock (&twc->tw_lock);
}
@@ -68,15 +72,17 @@ static inline void
http_conn_timer_update (http_conn_t *hc)
{
http_tw_ctx_t *twc = &http_tw_ctx;
- u64 timeout;
-
- if (hc->timer_handle == ~0)
- return;
-
- timeout = HTTP_CONN_TIMEOUT;
+ u32 hs_handle;
clib_spinlock_lock (&twc->tw_lock);
- tw_timer_update_2t_1w_2048sl (&twc->tw, hc->timer_handle, timeout);
+ if (hc->timer_handle != HTTP_TIMER_HANDLE_INVALID)
+ tw_timer_update_2t_1w_2048sl (&twc->tw, hc->timer_handle, hc->timeout);
+ else
+ {
+ hs_handle = hc->c_thread_index << 24 | hc->c_c_index;
+ hc->timer_handle =
+ tw_timer_start_2t_1w_2048sl (&twc->tw, hs_handle, 0, hc->timeout);
+ }
clib_spinlock_unlock (&twc->tw_lock);
}
diff --git a/src/plugins/http/test/http_test.c b/src/plugins/http/test/http_test.c
new file mode 100644
index 00000000000..d4ac8f46f29
--- /dev/null
+++ b/src/plugins/http/test/http_test.c
@@ -0,0 +1,360 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <http/http.h>
+
+#define HTTP_TEST_I(_cond, _comment, _args...) \
+ ({ \
+ int _evald = (_cond); \
+ if (!(_evald)) \
+ { \
+ vlib_cli_output (vm, "FAIL:%d: " _comment "\n", __LINE__, ##_args); \
+ } \
+ else \
+ { \
+ vlib_cli_output (vm, "PASS:%d: " _comment "\n", __LINE__, ##_args); \
+ } \
+ _evald; \
+ })
+
+#define HTTP_TEST(_cond, _comment, _args...) \
+ { \
+ if (!HTTP_TEST_I (_cond, _comment, ##_args)) \
+ { \
+ return 1; \
+ } \
+ }
+
+static int
+http_test_authority_form (vlib_main_t *vm)
+{
+ u8 *target = 0, *formated_target = 0;
+ http_uri_t authority;
+ int rv;
+
+ target = format (0, "10.10.2.45:20");
+ rv = http_parse_authority_form_target (target, &authority);
+ HTTP_TEST ((rv == 0), "'%v' should be valid", target);
+ formated_target = http_serialize_authority_form_target (&authority);
+ rv = vec_cmp (target, formated_target);
+ HTTP_TEST ((rv == 0), "'%v' should match '%v'", target, formated_target);
+ vec_free (target);
+ vec_free (formated_target);
+
+ target = format (0, "[dead:beef::1234]:443");
+ rv = http_parse_authority_form_target (target, &authority);
+ HTTP_TEST ((rv == 0), "'%v' should be valid", target);
+ formated_target = http_serialize_authority_form_target (&authority);
+ rv = vec_cmp (target, formated_target);
+ HTTP_TEST ((rv == 0), "'%v' should match '%v'", target, formated_target);
+ vec_free (target);
+ vec_free (formated_target);
+
+ target = format (0, "example.com:80");
+ rv = http_parse_authority_form_target (target, &authority);
+ HTTP_TEST ((rv != 0), "'%v' reg-name not supported", target);
+ vec_free (target);
+
+ target = format (0, "10.10.2.45");
+ rv = http_parse_authority_form_target (target, &authority);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", target);
+ vec_free (target);
+
+ target = format (0, "1000.10.2.45:20");
+ rv = http_parse_authority_form_target (target, &authority);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", target);
+ vec_free (target);
+
+ target = format (0, "[xyz0::1234]:443");
+ rv = http_parse_authority_form_target (target, &authority);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", target);
+ vec_free (target);
+
+ return 0;
+}
+
+static int
+http_test_absolute_form (vlib_main_t *vm)
+{
+ u8 *url = 0;
+ http_url_t parsed_url;
+ int rv;
+
+ url = format (0, "https://example.org/.well-known/masque/udp/1.2.3.4/123/");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv == 0), "'%v' should be valid", url);
+ HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTPS),
+ "scheme should be https");
+ HTTP_TEST ((parsed_url.host_is_ip6 == 0), "host_is_ip6=%u should be 0",
+ parsed_url.host_is_ip6);
+ HTTP_TEST ((parsed_url.host_offset == strlen ("https://")),
+ "host_offset=%u should be %u", parsed_url.host_offset,
+ strlen ("https://"));
+ HTTP_TEST ((parsed_url.host_len == strlen ("example.org")),
+ "host_len=%u should be %u", parsed_url.host_len,
+ strlen ("example.org"));
+ HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 443),
+ "port=%u should be 443", clib_net_to_host_u16 (parsed_url.port));
+ HTTP_TEST ((parsed_url.path_offset == strlen ("https://example.org/")),
+ "path_offset=%u should be %u", parsed_url.path_offset,
+ strlen ("https://example.org/"));
+ HTTP_TEST (
+ (parsed_url.path_len == strlen (".well-known/masque/udp/1.2.3.4/123/")),
+ "path_len=%u should be %u", parsed_url.path_len,
+ strlen (".well-known/masque/udp/1.2.3.4/123/"));
+ vec_free (url);
+
+ url = format (0, "http://vpp-example.org");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv == 0), "'%v' should be valid", url);
+ HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTP),
+ "scheme should be http");
+ HTTP_TEST ((parsed_url.host_is_ip6 == 0), "host_is_ip6=%u should be 0",
+ parsed_url.host_is_ip6);
+ HTTP_TEST ((parsed_url.host_offset == strlen ("http://")),
+ "host_offset=%u should be %u", parsed_url.host_offset,
+ strlen ("http://"));
+ HTTP_TEST ((parsed_url.host_len == strlen ("vpp-example.org")),
+ "host_len=%u should be %u", parsed_url.host_len,
+ strlen ("vpp-example.org"));
+ HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 80),
+ "port=%u should be 80", clib_net_to_host_u16 (parsed_url.port));
+ HTTP_TEST ((parsed_url.path_len == 0), "path_len=%u should be 0",
+ parsed_url.path_len);
+ vec_free (url);
+
+ url = format (0, "http://1.2.3.4:8080/abcd");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv == 0), "'%v' should be valid", url);
+ HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTP),
+ "scheme should be http");
+ HTTP_TEST ((parsed_url.host_is_ip6 == 0), "host_is_ip6=%u should be 0",
+ parsed_url.host_is_ip6);
+ HTTP_TEST ((parsed_url.host_offset == strlen ("http://")),
+ "host_offset=%u should be %u", parsed_url.host_offset,
+ strlen ("http://"));
+ HTTP_TEST ((parsed_url.host_len == strlen ("1.2.3.4")),
+ "host_len=%u should be %u", parsed_url.host_len,
+ strlen ("1.2.3.4"));
+ HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 8080),
+ "port=%u should be 8080", clib_net_to_host_u16 (parsed_url.port));
+ HTTP_TEST ((parsed_url.path_offset == strlen ("http://1.2.3.4:8080/")),
+ "path_offset=%u should be %u", parsed_url.path_offset,
+ strlen ("http://1.2.3.4:8080/"));
+ HTTP_TEST ((parsed_url.path_len == strlen ("abcd")),
+ "path_len=%u should be %u", parsed_url.path_len, strlen ("abcd"));
+ vec_free (url);
+
+ url = format (0, "https://[dead:beef::1234]/abcd");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv == 0), "'%v' should be valid", url);
+ HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTPS),
+ "scheme should be https");
+ HTTP_TEST ((parsed_url.host_is_ip6 == 1), "host_is_ip6=%u should be 1",
+ parsed_url.host_is_ip6);
+ HTTP_TEST ((parsed_url.host_offset == strlen ("https://[")),
+ "host_offset=%u should be %u", parsed_url.host_offset,
+ strlen ("https://["));
+ HTTP_TEST ((parsed_url.host_len == strlen ("dead:beef::1234")),
+ "host_len=%u should be %u", parsed_url.host_len,
+ strlen ("dead:beef::1234"));
+ HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 443),
+ "port=%u should be 443", clib_net_to_host_u16 (parsed_url.port));
+ HTTP_TEST ((parsed_url.path_offset == strlen ("https://[dead:beef::1234]/")),
+ "path_offset=%u should be %u", parsed_url.path_offset,
+ strlen ("https://[dead:beef::1234]/"));
+ HTTP_TEST ((parsed_url.path_len == strlen ("abcd")),
+ "path_len=%u should be %u", parsed_url.path_len, strlen ("abcd"));
+ vec_free (url);
+
+ url = format (0, "http://[::ffff:192.0.2.128]:8080/");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv == 0), "'%v' should be valid", url);
+ HTTP_TEST ((parsed_url.scheme == HTTP_URL_SCHEME_HTTP),
+ "scheme should be http");
+ HTTP_TEST ((parsed_url.host_is_ip6 == 1), "host_is_ip6=%u should be 1",
+ parsed_url.host_is_ip6);
+ HTTP_TEST ((parsed_url.host_offset == strlen ("http://[")),
+ "host_offset=%u should be %u", parsed_url.host_offset,
+ strlen ("http://["));
+ HTTP_TEST ((parsed_url.host_len == strlen ("::ffff:192.0.2.128")),
+ "host_len=%u should be %u", parsed_url.host_len,
+ strlen ("::ffff:192.0.2.128"));
+ HTTP_TEST ((clib_net_to_host_u16 (parsed_url.port) == 8080),
+ "port=%u should be 8080", clib_net_to_host_u16 (parsed_url.port));
+ HTTP_TEST ((parsed_url.path_len == 0), "path_len=%u should be 0",
+ parsed_url.path_len);
+ vec_free (url);
+
+ url = format (0, "http://[dead:beef::1234/abc");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", url);
+ vec_free (url);
+
+ url = format (0, "http://[dead|beef::1234]/abc");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", url);
+ vec_free (url);
+
+ url = format (0, "http:example.org:8080/abcd");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", url);
+ vec_free (url);
+
+ url = format (0, "htt://example.org:8080/abcd");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", url);
+ vec_free (url);
+
+ url = format (0, "http://");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", url);
+ vec_free (url);
+
+ url = format (0, "http:///abcd");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", url);
+ vec_free (url);
+
+ url = format (0, "http://example.org:808080/abcd");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", url);
+ vec_free (url);
+
+ url = format (0, "http://example.org/a%%3Xbcd");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", url);
+ vec_free (url);
+
+ url = format (0, "http://example.org/a%%3");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", url);
+ vec_free (url);
+
+ url = format (0, "http://example.org/a[b]cd");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", url);
+ vec_free (url);
+
+ url = format (0, "http://exa[m]ple.org/abcd");
+ rv = http_parse_absolute_form (url, &parsed_url);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", url);
+ vec_free (url);
+
+ return 0;
+}
+
+static int
+http_test_parse_masque_host_port (vlib_main_t *vm)
+{
+ u8 *path = 0;
+ http_uri_t target;
+ int rv;
+
+ path = format (0, "10.10.2.45/443/");
+ rv = http_parse_masque_host_port (path, vec_len (path), &target);
+ HTTP_TEST ((rv == 0), "'%v' should be valid", path);
+ HTTP_TEST ((target.is_ip4 == 1), "is_ip4=%d should be 1", target.is_ip4);
+ HTTP_TEST ((clib_net_to_host_u16 (target.port) == 443),
+ "port=%u should be 443", clib_net_to_host_u16 (target.port));
+ HTTP_TEST ((target.ip.ip4.data[0] == 10 && target.ip.ip4.data[1] == 10 &&
+ target.ip.ip4.data[2] == 2 && target.ip.ip4.data[3] == 45),
+ "target.ip=%U should be 10.10.2.45", format_ip4_address,
+ &target.ip.ip4);
+ vec_free (path);
+
+ path = format (0, "dead%%3Abeef%%3A%%3A1234/80/");
+ rv = http_parse_masque_host_port (path, vec_len (path), &target);
+ HTTP_TEST ((rv == 0), "'%v' should be valid", path);
+ HTTP_TEST ((target.is_ip4 == 0), "is_ip4=%d should be 0", target.is_ip4);
+ HTTP_TEST ((clib_net_to_host_u16 (target.port) == 80),
+ "port=%u should be 80", clib_net_to_host_u16 (target.port));
+ HTTP_TEST ((clib_net_to_host_u16 (target.ip.ip6.as_u16[0]) == 0xdead &&
+ clib_net_to_host_u16 (target.ip.ip6.as_u16[1]) == 0xbeef &&
+ target.ip.ip6.as_u16[2] == 0 && target.ip.ip6.as_u16[3] == 0 &&
+ target.ip.ip6.as_u16[4] == 0 && target.ip.ip6.as_u16[5] == 0 &&
+ target.ip.ip6.as_u16[6] == 0 &&
+ clib_net_to_host_u16 (target.ip.ip6.as_u16[7]) == 0x1234),
+ "target.ip=%U should be dead:beef::1234", format_ip6_address,
+ &target.ip.ip6);
+ vec_free (path);
+
+ path = format (0, "example.com/443/");
+ rv = http_parse_masque_host_port (path, vec_len (path), &target);
+ HTTP_TEST ((rv != 0), "'%v' reg-name not supported", path);
+ vec_free (path);
+
+ path = format (0, "10.10.2.45/443443/");
+ rv = http_parse_masque_host_port (path, vec_len (path), &target);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", path);
+ vec_free (path);
+
+ path = format (0, "/443/");
+ rv = http_parse_masque_host_port (path, vec_len (path), &target);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", path);
+ vec_free (path);
+
+ path = format (0, "10.10.2.45/");
+ rv = http_parse_masque_host_port (path, vec_len (path), &target);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", path);
+ vec_free (path);
+
+ path = format (0, "10.10.2.45");
+ rv = http_parse_masque_host_port (path, vec_len (path), &target);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", path);
+ vec_free (path);
+
+ path = format (0, "10.10.2.45/443");
+ rv = http_parse_masque_host_port (path, vec_len (path), &target);
+ HTTP_TEST ((rv != 0), "'%v' should be invalid", path);
+ vec_free (path);
+
+ return 0;
+}
+
+static clib_error_t *
+test_http_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ int res = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "authority-form"))
+ res = http_test_authority_form (vm);
+ else if (unformat (input, "absolute-form"))
+ res = http_test_absolute_form (vm);
+ else if (unformat (input, "parse-masque-host-port"))
+ res = http_test_parse_masque_host_port (vm);
+ else if (unformat (input, "all"))
+ {
+ if ((res = http_test_authority_form (vm)))
+ goto done;
+ if ((res = http_test_absolute_form (vm)))
+ goto done;
+ if ((res = http_test_parse_masque_host_port (vm)))
+ goto done;
+ }
+ else
+ break;
+ }
+
+done:
+ if (res)
+ return clib_error_return (0, "HTTP unit test failed");
+ return 0;
+}
+
+VLIB_CLI_COMMAND (test_http_command) = {
+ .path = "test http",
+ .short_help = "http unit tests",
+ .function = test_http_command_fn,
+};
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "HTTP - Unit Test",
+ .default_disabled = 1,
+};
diff --git a/src/plugins/http_static/builtinurl/json_urls.c b/src/plugins/http_static/builtinurl/json_urls.c
index 808893aac79..19c5245e4b2 100644
--- a/src/plugins/http_static/builtinurl/json_urls.c
+++ b/src/plugins/http_static/builtinurl/json_urls.c
@@ -20,77 +20,68 @@ hss_url_handler_rc_t
handle_get_version (hss_url_handler_args_t *args)
{
u8 *s = 0;
+ unformat_input_t input;
+ int verbose = 0;
+
+ if (args->query)
+ {
+ unformat_init_vector (&input, args->query);
+ if (unformat (&input, "verbose="))
+ {
+ if (unformat (&input, "true"))
+ verbose = 1;
+ }
+ }
s = format (s, "{\"vpp_details\": {");
s = format (s, " \"version\": \"%s\",", VPP_BUILD_VER);
+ if (verbose)
+ {
+ s = format (s, " \"build_by\": \"%s\",", VPP_BUILD_USER);
+ s = format (s, " \"build_host\": \"%s\",", VPP_BUILD_HOST);
+ s = format (s, " \"build_dir\": \"%s\",", VPP_BUILD_TOPDIR);
+ }
s = format (s, " \"build_date\": \"%s\"}}\r\n", VPP_BUILD_DATE);
args->data = s;
args->data_len = vec_len (s);
+ args->ct = HTTP_CONTENT_APP_JSON;
args->free_vec_data = 1;
return HSS_URL_HANDLER_OK;
}
-void
-trim_path_from_request (u8 *s, char *path)
-{
- u8 *cp;
- int trim_length = strlen (path) + 1 /* remove '?' */;
-
- /* Get rid of the path and question-mark */
- vec_delete (s, trim_length, 0);
-
- /* Tail trim irrelevant browser info */
- cp = s;
- while ((cp - s) < vec_len (s))
- {
- if (*cp == ' ')
- {
- /*
- * Makes request a vector which happens to look
- * like a c-string.
- */
- *cp = 0;
- vec_set_len (s, cp - s);
- break;
- }
- cp++;
- }
-}
-
hss_url_handler_rc_t
handle_get_interface_stats (hss_url_handler_args_t *args)
{
u8 *s = 0, *stats = 0;
- uword *p;
- u32 *sw_if_indices = 0;
+ u32 sw_if_index, *sw_if_indices = 0;
vnet_hw_interface_t *hi;
vnet_sw_interface_t *si;
char *q = "\"";
int i;
int need_comma = 0;
+ unformat_input_t input;
u8 *format_vnet_sw_interface_cntrs (u8 * s, vnet_interface_main_t * im,
vnet_sw_interface_t * si, int json);
vnet_main_t *vnm = vnet_get_main ();
vnet_interface_main_t *im = &vnm->interface_main;
/* Get stats for a single interface via http POST */
- if (args->reqtype == HTTP_REQ_POST)
+ if (args->req_type == HTTP_REQ_POST)
{
- trim_path_from_request (args->request, "interface_stats.json");
-
+ unformat_init_vector (&input, args->req_data);
/* Find the sw_if_index */
- p = hash_get (im->hw_interface_by_name, args->request);
- if (!p)
+ if (!unformat (&input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
{
s = format (s, "{\"interface_stats\": {[\n");
- s = format (s, " \"name\": \"%s\",", args->request);
+ s = format (s, " \"name\": \"%s\",", args->req_data);
s = format (s, " \"error\": \"%s\"", "UnknownInterface");
s = format (s, "]}\n");
goto out;
}
- vec_add1 (sw_if_indices, p[0]);
+ vec_add1 (sw_if_indices, sw_if_index);
}
else /* default, HTTP_BUILTIN_METHOD_GET */
{
@@ -127,6 +118,7 @@ handle_get_interface_stats (hss_url_handler_args_t *args)
out:
args->data = s;
args->data_len = vec_len (s);
+ args->ct = HTTP_CONTENT_APP_JSON;
args->free_vec_data = 1;
vec_free (sw_if_indices);
vec_free (stats);
@@ -167,6 +159,7 @@ handle_get_interface_list (hss_url_handler_args_t *args)
args->data = s;
args->data_len = vec_len (s);
+ args->ct = HTTP_CONTENT_APP_JSON;
args->free_vec_data = 1;
return HSS_URL_HANDLER_OK;
}
diff --git a/src/plugins/http_static/http_cache.c b/src/plugins/http_static/http_cache.c
index 8b9751b7f78..2e63e335d47 100644
--- a/src/plugins/http_static/http_cache.c
+++ b/src/plugins/http_static/http_cache.c
@@ -17,6 +17,8 @@
#include <vppinfra/bihash_template.c>
#include <vppinfra/unix.h>
#include <vlib/vlib.h>
+#include <sys/stat.h>
+#include <vppinfra/time_range.h>
static void
hss_cache_lock (hss_cache_t *hc)
@@ -153,7 +155,7 @@ lru_update (hss_cache_t *hc, hss_cache_entry_t *ep, f64 now)
static void
hss_cache_attach_entry (hss_cache_t *hc, u32 ce_index, u8 **data,
- u64 *data_len)
+ u64 *data_len, u8 **last_modified)
{
hss_cache_entry_t *ce;
@@ -162,6 +164,7 @@ hss_cache_attach_entry (hss_cache_t *hc, u32 ce_index, u8 **data,
ce->inuse++;
*data = ce->data;
*data_len = vec_len (ce->data);
+ *last_modified = ce->last_modified;
/* Update the cache entry, mark it in-use */
lru_update (hc, ce, vlib_time_now (vlib_get_main ()));
@@ -209,16 +212,15 @@ hss_cache_lookup (hss_cache_t *hc, u8 *path)
u32
hss_cache_lookup_and_attach (hss_cache_t *hc, u8 *path, u8 **data,
- u64 *data_len)
+ u64 *data_len, u8 **last_modified)
{
u32 ce_index;
-
/* Make sure nobody removes the entry while we look it up */
hss_cache_lock (hc);
ce_index = hss_cache_lookup (hc, path);
if (ce_index != ~0)
- hss_cache_attach_entry (hc, ce_index, data, data_len);
+ hss_cache_attach_entry (hc, ce_index, data, data_len, last_modified);
hss_cache_unlock (hc);
@@ -260,6 +262,7 @@ hss_cache_do_evictions (hss_cache_t *hc)
hc->cache_evictions++;
vec_free (ce->filename);
vec_free (ce->data);
+ vec_free (ce->last_modified);
if (hc->debug_level > 1)
clib_warning ("pool put index %d", ce - hc->cache_pool);
@@ -271,13 +274,15 @@ hss_cache_do_evictions (hss_cache_t *hc)
}
u32
-hss_cache_add_and_attach (hss_cache_t *hc, u8 *path, u8 **data, u64 *data_len)
+hss_cache_add_and_attach (hss_cache_t *hc, u8 *path, u8 **data, u64 *data_len,
+ u8 **last_modified)
{
BVT (clib_bihash_kv) kv;
hss_cache_entry_t *ce;
clib_error_t *error;
u8 *file_data;
u32 ce_index;
+ struct stat dm;
hss_cache_lock (hc);
@@ -298,11 +303,17 @@ hss_cache_add_and_attach (hss_cache_t *hc, u8 *path, u8 **data, u64 *data_len)
pool_get_zero (hc->cache_pool, ce);
ce->filename = vec_dup (path);
ce->data = file_data;
+ if (stat ((char *) path, &dm) == 0)
+ {
+ ce->last_modified =
+ format (0, "%U GMT", format_clib_timebase_time, (f64) dm.st_mtime);
+ }
/* Attach cache entry without additional lock */
ce->inuse++;
*data = file_data;
*data_len = vec_len (file_data);
+ *last_modified = ce->last_modified;
lru_add (hc, ce, vlib_time_now (vlib_get_main ()));
hc->cache_size += vec_len (ce->data);
@@ -364,6 +375,7 @@ hss_cache_clear (hss_cache_t *hc)
hc->cache_evictions++;
vec_free (ce->filename);
vec_free (ce->data);
+ vec_free (ce->last_modified);
if (hc->debug_level > 1)
clib_warning ("pool put index %d", ce - hc->cache_pool);
pool_put (hc->cache_pool, ce);
@@ -421,19 +433,19 @@ format_hss_cache (u8 *s, va_list *args)
{
s = format (s, "cache size %lld bytes, limit %lld bytes, evictions %lld",
hc->cache_size, hc->cache_limit, hc->cache_evictions);
- return 0;
+ return s;
}
vm = vlib_get_main ();
now = vlib_time_now (vm);
- s = format (s, "%U", format_hss_cache_entry, 0 /* header */, now);
+ s = format (s, "%U\n", format_hss_cache_entry, 0 /* header */, now);
for (index = hc->first_index; index != ~0;)
{
ce = pool_elt_at_index (hc->cache_pool, index);
index = ce->next_index;
- s = format (s, "%U", format_hss_cache_entry, ce, now);
+ s = format (s, "%U\n", format_hss_cache_entry, ce, now);
}
s = format (s, "%40s%12lld", "Total Size", hc->cache_size);
diff --git a/src/plugins/http_static/http_cache.h b/src/plugins/http_static/http_cache.h
index a89ed5e7e94..21f71a924d5 100644
--- a/src/plugins/http_static/http_cache.h
+++ b/src/plugins/http_static/http_cache.h
@@ -22,6 +22,9 @@ typedef struct hss_cache_entry_
{
/** Name of the file */
u8 *filename;
+ /** Last modified date, format:
+ * <day-name>, <day> <month> <year> <hour>:<minute>:<second> GMT */
+ u8 *last_modified;
/** Contents of the file, as a u8 * vector */
u8 *data;
/** Last time the cache entry was used */
@@ -58,9 +61,9 @@ typedef struct hss_cache_
} hss_cache_t;
u32 hss_cache_lookup_and_attach (hss_cache_t *hc, u8 *path, u8 **data,
- u64 *data_len);
+ u64 *data_len, u8 **last_modified);
u32 hss_cache_add_and_attach (hss_cache_t *hc, u8 *path, u8 **data,
- u64 *data_len);
+ u64 *data_len, u8 **last_modified);
void hss_cache_detach_entry (hss_cache_t *hc, u32 ce_index);
u32 hss_cache_clear (hss_cache_t *hc);
void hss_cache_init (hss_cache_t *hc, uword cache_size, u8 debug_level);
diff --git a/src/plugins/http_static/http_static.api b/src/plugins/http_static/http_static.api
index 4d6d8bfe9b5..dd4f513a420 100644
--- a/src/plugins/http_static/http_static.api
+++ b/src/plugins/http_static/http_static.api
@@ -2,7 +2,8 @@
/** \file
This file defines static http server control-plane API messages
*/
-option version = "2.1.0";
+
+option version = "2.2.0";
/** \brief Configure and enable the static http server
@param client_index - opaque cookie to identify the sender
@@ -16,6 +17,39 @@ option version = "2.1.0";
*/
autoreply define http_static_enable {
+ option deprecated;
+
+ /* Client identifier, set from api_main.my_client_index */
+ u32 client_index;
+
+ /* Arbitrary context, so client can match reply to request */
+ u32 context;
+ /* Typical options */
+ u32 fifo_size;
+ u32 cache_size_limit;
+ /* Unusual options */
+ u32 prealloc_fifos;
+ u32 private_segment_size;
+
+ /* Root of the html path */
+ string www_root[256];
+ /* The bind URI */
+ string uri[256];
+};
+
+/** \brief Configure and enable the static http server
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param fifo_size - size (in bytes) of the session FIFOs
+ @param cache_size_limit - size (in bytes) of the in-memory file data cache
+ @param max_age - how long a response is considered fresh (in seconds)
+ @param prealloc_fifos - number of preallocated fifos (usually 0)
+ @param private_segment_size - fifo segment size (usually 0)
+ @param www_root - html root path
+ @param uri - bind URI, defaults to "tcp://0.0.0.0/80"
+*/
+
+autoreply define http_static_enable_v2 {
/* Client identifier, set from api_main.my_client_index */
u32 client_index;
@@ -24,6 +58,7 @@ autoreply define http_static_enable {
/* Typical options */
u32 fifo_size;
u32 cache_size_limit;
+ u32 max_age [default=600];
/* Unusual options */
u32 prealloc_fifos;
u32 private_segment_size;
diff --git a/src/plugins/http_static/http_static.c b/src/plugins/http_static/http_static.c
index 8f8fe37b7c1..967b8474af8 100644
--- a/src/plugins/http_static/http_static.c
+++ b/src/plugins/http_static/http_static.c
@@ -66,7 +66,7 @@ hss_register_url_handler (hss_url_handler_fn fp, const char *url,
*/
static int
hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos,
- u32 private_segment_size, u8 *www_root, u8 *uri)
+ u32 private_segment_size, u8 *www_root, u8 *uri, u32 max_age)
{
hss_main_t *hsm = &hss_main;
int rv;
@@ -77,6 +77,7 @@ hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos,
hsm->private_segment_size = private_segment_size;
hsm->www_root = format (0, "%s%c", www_root, 0);
hsm->uri = format (0, "%s%c", uri, 0);
+ hsm->max_age = max_age;
if (vec_len (hsm->www_root) < 2)
return VNET_API_ERROR_INVALID_VALUE;
@@ -84,7 +85,10 @@ hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos,
if (hsm->app_index != ~0)
return VNET_API_ERROR_APP_ALREADY_ATTACHED;
- vnet_session_enable_disable (hsm->vlib_main, 1 /* turn on TCP, etc. */);
+ session_enable_disable_args_t args = { .is_en = 1,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_RULE_TABLE };
+ vnet_session_enable_disable (hsm->vlib_main, &args);
rv = hss_create (hsm->vlib_main);
switch (rv)
@@ -110,14 +114,33 @@ static void vl_api_http_static_enable_t_handler
mp->uri[ARRAY_LEN (mp->uri) - 1] = 0;
mp->www_root[ARRAY_LEN (mp->www_root) - 1] = 0;
- rv =
- hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit),
- ntohl (mp->prealloc_fifos),
- ntohl (mp->private_segment_size), mp->www_root, mp->uri);
+ rv = hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit),
+ ntohl (mp->prealloc_fifos),
+ ntohl (mp->private_segment_size), mp->www_root, mp->uri,
+ HSS_DEFAULT_MAX_AGE);
REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_REPLY);
}
+/* API message handler */
+static void
+vl_api_http_static_enable_v2_t_handler (vl_api_http_static_enable_v2_t *mp)
+{
+ vl_api_http_static_enable_v2_reply_t *rmp;
+ hss_main_t *hsm = &hss_main;
+ int rv;
+
+ mp->uri[ARRAY_LEN (mp->uri) - 1] = 0;
+ mp->www_root[ARRAY_LEN (mp->www_root) - 1] = 0;
+
+ rv = hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit),
+ ntohl (mp->prealloc_fifos),
+ ntohl (mp->private_segment_size), mp->www_root, mp->uri,
+ ntohl (mp->max_age));
+
+ REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_V2_REPLY);
+}
+
#include <http_static/http_static.api.c>
static clib_error_t *
hss_api_init (vlib_main_t *vm)
diff --git a/src/plugins/http_static/http_static.h b/src/plugins/http_static/http_static.h
index 2850d356b74..fac24db4ec9 100644
--- a/src/plugins/http_static/http_static.h
+++ b/src/plugins/http_static/http_static.h
@@ -23,6 +23,8 @@
#include <vppinfra/error.h>
#include <http_static/http_cache.h>
+#define HSS_DEFAULT_MAX_AGE 600
+
/** @file http_static.h
* Static http server definitions
*/
@@ -45,13 +47,15 @@ typedef struct
/** Data length */
u64 data_len;
/** Current data send offset */
- u32 data_offset;
+ u64 data_offset;
/** Need to free data in detach_cache_entry */
int free_data;
/** File cache pool index */
u32 cache_pool_index;
- /** Content type, e.g. text, text/javascript, etc. */
- http_content_type_t content_type;
+ /** Response header list */
+ http_header_t *resp_headers;
+ /** Serialized headers to send */
+ u8 *headers_buf;
} hss_session_t;
typedef struct hss_session_handle_
@@ -79,8 +83,9 @@ typedef struct hss_url_handler_args_
/* Request args */
struct
{
- u8 *request;
- http_req_method_t reqtype;
+ u8 *query;
+ u8 *req_data;
+ http_req_method_t req_type;
};
/* Reply args */
@@ -90,6 +95,7 @@ typedef struct hss_url_handler_args_
uword data_len;
u8 free_vec_data;
http_status_code_t sc;
+ http_content_type_t ct;
};
};
} hss_url_handler_args_t;
@@ -152,6 +158,12 @@ typedef struct
u8 enable_url_handlers;
/** Max cache size before LRU occurs */
u64 cache_size;
+ /** How long a response is considered fresh (in seconds) */
+ u32 max_age;
+ /** Formatted max_age: "max-age=xyz" */
+ u8 *max_age_formatted;
+ /** Timeout during which client connection will stay open */
+ u32 keepalive_timeout;
/** hash table of file extensions to mime types string indices */
uword *mime_type_indices_by_file_extensions;
diff --git a/src/plugins/http_static/http_static_test.c b/src/plugins/http_static/http_static_test.c
index 3503a1b0812..f701c8b9ee7 100644
--- a/src/plugins/http_static/http_static_test.c
+++ b/src/plugins/http_static/http_static_test.c
@@ -18,6 +18,7 @@
#include <vlibapi/api.h>
#include <vlibmemory/api.h>
#include <vppinfra/error.h>
+#include <http_static/http_static.h>
uword unformat_sw_if_index (unformat_input_t * input, va_list * args);
@@ -126,6 +127,96 @@ api_http_static_enable (vat_main_t * vam)
return ret;
}
+static int
+api_http_static_enable_v2 (vat_main_t *vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_http_static_enable_v2_t *mp;
+ u64 tmp;
+ u8 *www_root = 0;
+ u8 *uri = 0;
+ u32 prealloc_fifos = 0;
+ u32 private_segment_size = 0;
+ u32 fifo_size = 8 << 10;
+ u32 cache_size_limit = 1 << 20;
+ u32 max_age = HSS_DEFAULT_MAX_AGE;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "www-root %s", &www_root))
+ ;
+ else if (unformat (line_input, "prealloc-fifos %d", &prealloc_fifos))
+ ;
+ else if (unformat (line_input, "private-segment-size %U",
+ unformat_memory_size, &tmp))
+ {
+ if (tmp >= 0x100000000ULL)
+ {
+ errmsg ("private segment size %llu, too large", tmp);
+ return -99;
+ }
+ private_segment_size = (u32) tmp;
+ }
+ else if (unformat (line_input, "fifo-size %U", unformat_memory_size,
+ &tmp))
+ {
+ if (tmp >= 0x100000000ULL)
+ {
+ errmsg ("fifo-size %llu, too large", tmp);
+ return -99;
+ }
+ fifo_size = (u32) tmp;
+ }
+ else if (unformat (line_input, "cache-size %U", unformat_memory_size,
+ &tmp))
+ {
+ if (tmp < (128ULL << 10))
+ {
+ errmsg ("cache-size must be at least 128kb");
+ return -99;
+ }
+ cache_size_limit = (u32) tmp;
+ }
+ else if (unformat (line_input, "max-age %d", &max_age))
+ ;
+ else if (unformat (line_input, "uri %s", &uri))
+ ;
+ else
+ {
+ errmsg ("unknown input `%U'", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (www_root == 0)
+ {
+ errmsg ("Must specify www-root");
+ return -99;
+ }
+
+ if (uri == 0)
+ uri = format (0, "tcp://0.0.0.0/80%c", 0);
+
+ /* Construct the API message */
+ M (HTTP_STATIC_ENABLE_V2, mp);
+ strncpy_s ((char *) mp->www_root, 256, (const char *) www_root, 256);
+ strncpy_s ((char *) mp->uri, 256, (const char *) uri, 256);
+ mp->fifo_size = ntohl (fifo_size);
+ mp->cache_size_limit = ntohl (cache_size_limit);
+ mp->prealloc_fifos = ntohl (prealloc_fifos);
+ mp->private_segment_size = ntohl (private_segment_size);
+ mp->max_age = ntohl (max_age);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
#include <http_static/http_static.api_test.c>
/*
diff --git a/src/plugins/http_static/static_server.c b/src/plugins/http_static/static_server.c
index 040cdca9d7a..d1ece75ce37 100644
--- a/src/plugins/http_static/static_server.c
+++ b/src/plugins/http_static/static_server.c
@@ -19,6 +19,9 @@
#include <sys/stat.h>
#include <unistd.h>
+#include <http/http_header_names.h>
+#include <http/http_content_types.h>
+
/** @file static_server.c
* Static http server, sufficient to serve .html / .css / .js content.
*/
@@ -55,8 +58,6 @@ hss_session_free (hss_session_t *hs)
{
hss_main_t *hsm = &hss_main;
- pool_put (hsm->sessions[hs->thread_index], hs);
-
if (CLIB_DEBUG)
{
u32 save_thread_index;
@@ -65,6 +66,8 @@ hss_session_free (hss_session_t *hs)
memset (hs, 0xfa, sizeof (*hs));
hs->thread_index = save_thread_index;
}
+
+ pool_put (hsm->sessions[hs->thread_index], hs);
}
/** \brief Disconnect a session
@@ -83,48 +86,84 @@ start_send_data (hss_session_t *hs, http_status_code_t status)
{
http_msg_t msg;
session_t *ts;
+ u8 *headers_buf = 0;
+ u32 n_enq;
+ u64 to_send;
int rv;
ts = session_get (hs->vpp_session_index, hs->thread_index);
+ if (vec_len (hs->resp_headers))
+ {
+ headers_buf = http_serialize_headers (hs->resp_headers);
+ vec_free (hs->resp_headers);
+ msg.data.headers_offset = 0;
+ msg.data.headers_len = vec_len (headers_buf);
+ }
+ else
+ {
+ msg.data.headers_offset = 0;
+ msg.data.headers_len = 0;
+ }
+
msg.type = HTTP_MSG_REPLY;
msg.code = status;
- msg.content_type = hs->content_type;
- msg.data.len = hs->data_len;
+ msg.data.body_len = hs->data_len;
+ msg.data.len = msg.data.body_len + msg.data.headers_len;
- if (hs->data_len > hss_main.use_ptr_thresh)
+ if (msg.data.len > hss_main.use_ptr_thresh)
{
msg.data.type = HTTP_MSG_DATA_PTR;
rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg);
ASSERT (rv == sizeof (msg));
+ if (msg.data.headers_len)
+ {
+ hs->headers_buf = headers_buf;
+ uword headers = pointer_to_uword (hs->headers_buf);
+ rv =
+ svm_fifo_enqueue (ts->tx_fifo, sizeof (headers), (u8 *) &headers);
+ ASSERT (rv == sizeof (headers));
+ }
+
uword data = pointer_to_uword (hs->data);
rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (data), (u8 *) &data);
- ASSERT (rv == sizeof (sizeof (data)));
+ ASSERT (rv == sizeof (data));
goto done;
}
msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.body_offset = msg.data.headers_len;
rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg);
ASSERT (rv == sizeof (msg));
- if (!msg.data.len)
+ if (msg.data.headers_len)
+ {
+ rv = svm_fifo_enqueue (ts->tx_fifo, vec_len (headers_buf), headers_buf);
+ ASSERT (rv == msg.data.headers_len);
+ vec_free (headers_buf);
+ }
+
+ if (!msg.data.body_len)
goto done;
- rv = svm_fifo_enqueue (ts->tx_fifo, hs->data_len, hs->data);
+ to_send = hs->data_len;
+ n_enq = clib_min (svm_fifo_size (ts->tx_fifo), to_send);
+
+ rv = svm_fifo_enqueue (ts->tx_fifo, n_enq, hs->data);
- if (rv != hs->data_len)
+ if (rv < to_send)
{
- hs->data_offset = rv;
+ hs->data_offset = (rv > 0) ? rv : 0;
svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
}
done:
if (svm_fifo_set_event (ts->tx_fifo))
- session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+ session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
}
__clib_export void
@@ -142,6 +181,15 @@ hss_session_send_data (hss_url_handler_args_t *args)
hs->data = args->data;
hs->data_len = args->data_len;
hs->free_data = args->free_vec_data;
+
+ /* Set content type only if we have some response data */
+ if (hs->data_len)
+ {
+ http_add_header (&hs->resp_headers,
+ http_header_name_token (HTTP_HEADER_CONTENT_TYPE),
+ http_content_type_token (args->ct));
+ }
+
start_send_data (hs, args->sc);
}
@@ -212,30 +260,27 @@ content_type_from_request (u8 *request)
static int
try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
- u8 *request)
+ u8 *target_path, u8 *target_query, u8 *data)
{
http_status_code_t sc = HTTP_STATUS_OK;
hss_url_handler_args_t args = {};
uword *p, *url_table;
- http_content_type_t type;
int rv;
- if (!hsm->enable_url_handlers || !request)
+ if (!hsm->enable_url_handlers || !target_path)
return -1;
/* zero-length? try "index.html" */
- if (vec_len (request) == 0)
+ if (vec_len (target_path) == 0)
{
- request = format (request, "index.html");
+ target_path = format (target_path, "index.html");
}
- type = content_type_from_request (request);
-
/* Look for built-in GET / POST handlers */
url_table =
(rt == HTTP_REQ_GET) ? hsm->get_url_handlers : hsm->post_url_handlers;
- p = hash_get_mem (url_table, request);
+ p = hash_get_mem (url_table, target_path);
if (!p)
return -1;
@@ -244,10 +289,12 @@ try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
hs->cache_pool_index = ~0;
if (hsm->debug_level > 0)
- clib_warning ("%s '%s'", (rt == HTTP_REQ_GET) ? "GET" : "POST", request);
+ clib_warning ("%s '%s'", (rt == HTTP_REQ_GET) ? "GET" : "POST",
+ target_path);
- args.reqtype = rt;
- args.request = request;
+ args.req_type = rt;
+ args.query = target_query;
+ args.req_data = data;
args.sh.thread_index = hs->thread_index;
args.sh.session_index = hs->session_index;
@@ -260,18 +307,25 @@ try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
if (rv == HSS_URL_HANDLER_ERROR)
{
clib_warning ("builtin handler %llx hit on %s '%s' but failed!", p[0],
- (rt == HTTP_REQ_GET) ? "GET" : "POST", request);
- sc = HTTP_STATUS_NOT_FOUND;
+ (rt == HTTP_REQ_GET) ? "GET" : "POST", target_path);
+ sc = HTTP_STATUS_BAD_GATEWAY;
}
hs->data = args.data;
hs->data_len = args.data_len;
hs->free_data = args.free_vec_data;
- hs->content_type = type;
+
+ /* Set content type only if we have some response data */
+ if (hs->data_len)
+ {
+ http_add_header (&hs->resp_headers,
+ http_header_name_token (HTTP_HEADER_CONTENT_TYPE),
+ http_content_type_token (args.ct));
+ }
start_send_data (hs, sc);
- if (!hs->data)
+ if (!hs->data_len)
hss_session_disconnect_transport (hs);
return 0;
@@ -335,18 +389,20 @@ try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path)
}
redirect =
- format (0,
- "Location: http%s://%U%s%s\r\n\r\n",
- proto == TRANSPORT_PROTO_TLS ? "s" : "", format_ip46_address,
- &endpt.ip, endpt.is_ip4, print_port ? port_str : (u8 *) "", path);
+ format (0, "http%s://%U%s%s", proto == TRANSPORT_PROTO_TLS ? "s" : "",
+ format_ip46_address, &endpt.ip, endpt.is_ip4,
+ print_port ? port_str : (u8 *) "", path);
if (hsm->debug_level > 0)
clib_warning ("redirect: %s", redirect);
vec_free (port_str);
- hs->data = redirect;
- hs->data_len = vec_len (redirect);
+ http_add_header (&hs->resp_headers,
+ http_header_name_token (HTTP_HEADER_LOCATION),
+ (const char *) redirect, vec_len (redirect));
+ hs->data = redirect; /* TODO: find better way */
+ hs->data_len = 0;
hs->free_data = 1;
return HTTP_STATUS_MOVED;
@@ -354,29 +410,28 @@ try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path)
static int
try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
- u8 *request)
+ u8 *target)
{
http_status_code_t sc = HTTP_STATUS_OK;
- u8 *path;
+ u8 *path, *sanitized_path;
u32 ce_index;
http_content_type_t type;
+ u8 *last_modified;
/* Feature not enabled */
if (!hsm->www_root)
return -1;
- type = content_type_from_request (request);
+ /* Remove dot segments to prevent path traversal */
+ sanitized_path = http_path_remove_dot_segments (target);
/*
* Construct the file to open
- * Browsers are capable of sporadically including a leading '/'
*/
- if (!request)
+ if (!target)
path = format (0, "%s%c", hsm->www_root, 0);
- else if (request[0] == '/')
- path = format (0, "%s%s%c", hsm->www_root, request, 0);
else
- path = format (0, "%s/%s%c", hsm->www_root, request, 0);
+ path = format (0, "%s/%s%c", hsm->www_root, sanitized_path, 0);
if (hsm->debug_level > 0)
clib_warning ("%s '%s'", (rt == HTTP_REQ_GET) ? "GET" : "POST", path);
@@ -386,8 +441,8 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
hs->data_offset = 0;
- ce_index =
- hss_cache_lookup_and_attach (&hsm->cache, path, &hs->data, &hs->data_len);
+ ce_index = hss_cache_lookup_and_attach (&hsm->cache, path, &hs->data,
+ &hs->data_len, &last_modified);
if (ce_index == ~0)
{
if (!file_path_is_valid (path))
@@ -406,8 +461,8 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
sc = try_index_file (hsm, hs, path);
goto done;
}
- ce_index =
- hss_cache_add_and_attach (&hsm->cache, path, &hs->data, &hs->data_len);
+ ce_index = hss_cache_add_and_attach (&hsm->cache, path, &hs->data,
+ &hs->data_len, &last_modified);
if (ce_index == ~0)
{
sc = HTTP_STATUS_INTERNAL_ERROR;
@@ -418,43 +473,61 @@ try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
hs->path = path;
hs->cache_pool_index = ce_index;
-done:
+ /* Set following headers only for happy path:
+ * Content-Type
+ * Cache-Control max-age
+ */
+ type = content_type_from_request (target);
+ http_add_header (&hs->resp_headers,
+ http_header_name_token (HTTP_HEADER_CONTENT_TYPE),
+ http_content_type_token (type));
+ http_add_header (
+ &hs->resp_headers, http_header_name_token (HTTP_HEADER_CACHE_CONTROL),
+ (const char *) hsm->max_age_formatted, vec_len (hsm->max_age_formatted));
+ http_add_header (&hs->resp_headers,
+ http_header_name_token (HTTP_HEADER_LAST_MODIFIED),
+ (const char *) last_modified, vec_len (last_modified));
- hs->content_type = type;
+done:
+ vec_free (sanitized_path);
start_send_data (hs, sc);
- if (!hs->data)
+ if (!hs->data_len)
hss_session_disconnect_transport (hs);
return 0;
}
-static int
-handle_request (hss_session_t *hs, http_req_method_t rt, u8 *request)
+static void
+handle_request (hss_session_t *hs, http_req_method_t rt, u8 *target_path,
+ u8 *target_query, u8 *data)
{
hss_main_t *hsm = &hss_main;
- if (!try_url_handler (hsm, hs, rt, request))
- return 0;
+ if (!try_url_handler (hsm, hs, rt, target_path, target_query, data))
+ return;
- if (!try_file_handler (hsm, hs, rt, request))
- return 0;
+ if (!try_file_handler (hsm, hs, rt, target_path))
+ return;
/* Handler did not find anything return 404 */
start_send_data (hs, HTTP_STATUS_NOT_FOUND);
hss_session_disconnect_transport (hs);
-
- return 0;
}
static int
hss_ts_rx_callback (session_t *ts)
{
hss_session_t *hs;
- u8 *request = 0;
+ u8 *target_path = 0, *target_query = 0, *data = 0;
http_msg_t msg;
int rv;
hs = hss_session_get (ts->thread_index, ts->opaque);
+ if (hs->free_data)
+ vec_free (hs->data);
+ hs->data = 0;
+ hs->resp_headers = 0;
+ vec_free (hs->headers_buf);
/* Read the http message header */
rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
@@ -463,26 +536,66 @@ hss_ts_rx_callback (session_t *ts)
if (msg.type != HTTP_MSG_REQUEST ||
(msg.method_type != HTTP_REQ_GET && msg.method_type != HTTP_REQ_POST))
{
- hs->data = 0;
+ http_add_header (&hs->resp_headers,
+ http_header_name_token (HTTP_HEADER_ALLOW),
+ http_token_lit ("GET, POST"));
start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED);
- return 0;
+ goto done;
}
- /* Read request */
- if (msg.data.len)
+ if (msg.data.target_form != HTTP_TARGET_ORIGIN_FORM)
{
- vec_validate (request, msg.data.len - 1);
- rv = svm_fifo_dequeue (ts->rx_fifo, msg.data.len, request);
- ASSERT (rv == msg.data.len);
- /* request must be a proper C-string in addition to a vector */
- vec_add1 (request, 0);
+ start_send_data (hs, HTTP_STATUS_BAD_REQUEST);
+ goto done;
}
- /* Find and send data */
- handle_request (hs, msg.method_type, request);
+ /* Read target path */
+ if (msg.data.target_path_len)
+ {
+ vec_validate (target_path, msg.data.target_path_len - 1);
+ rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_path_offset,
+ msg.data.target_path_len, target_path);
+ ASSERT (rv == msg.data.target_path_len);
+ if (http_validate_abs_path_syntax (target_path, 0))
+ {
+ start_send_data (hs, HTTP_STATUS_BAD_REQUEST);
+ goto done;
+ }
+ /* Target path must be a proper C-string in addition to a vector */
+ vec_add1 (target_path, 0);
+ }
- vec_free (request);
+ /* Read target query */
+ if (msg.data.target_query_len)
+ {
+ vec_validate (target_query, msg.data.target_query_len - 1);
+ rv = svm_fifo_peek (ts->rx_fifo, msg.data.target_query_offset,
+ msg.data.target_query_len, target_query);
+ ASSERT (rv == msg.data.target_query_len);
+ if (http_validate_query_syntax (target_query, 0))
+ {
+ start_send_data (hs, HTTP_STATUS_BAD_REQUEST);
+ goto done;
+ }
+ }
+ /* Read body */
+ if (msg.data.body_len)
+ {
+ vec_validate (data, msg.data.body_len - 1);
+ rv = svm_fifo_peek (ts->rx_fifo, msg.data.body_offset, msg.data.body_len,
+ data);
+ ASSERT (rv == msg.data.body_len);
+ }
+
+ /* Find and send data */
+ handle_request (hs, msg.method_type, target_path, target_query, data);
+
+done:
+ vec_free (target_path);
+ vec_free (target_query);
+ vec_free (data);
+ svm_fifo_dequeue_drop (ts->rx_fifo, msg.data.len);
return 0;
}
@@ -490,7 +603,8 @@ static int
hss_ts_tx_callback (session_t *ts)
{
hss_session_t *hs;
- u32 to_send;
+ u32 n_enq;
+ u64 to_send;
int rv;
hs = hss_session_get (ts->thread_index, ts->opaque);
@@ -498,7 +612,9 @@ hss_ts_tx_callback (session_t *ts)
return 0;
to_send = hs->data_len - hs->data_offset;
- rv = svm_fifo_enqueue (ts->tx_fifo, to_send, hs->data + hs->data_offset);
+ n_enq = clib_min (svm_fifo_size (ts->tx_fifo), to_send);
+
+ rv = svm_fifo_enqueue (ts->tx_fifo, n_enq, hs->data + hs->data_offset);
if (rv <= 0)
{
@@ -513,7 +629,7 @@ hss_ts_tx_callback (session_t *ts)
}
if (svm_fifo_set_event (ts->tx_fifo))
- session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+ session_program_tx_io_evt (ts->handle, SESSION_IO_EVT_TX);
return 0;
}
@@ -607,6 +723,7 @@ hss_ts_cleanup (session_t *s, session_cleanup_ntf_t ntf)
hs->data = 0;
hs->data_offset = 0;
hs->free_data = 0;
+ vec_free (hs->headers_buf);
vec_free (hs->path);
hss_session_free (hs);
@@ -630,7 +747,7 @@ hss_attach ()
hss_main_t *hsm = &hss_main;
u64 options[APP_OPTIONS_N_OPTIONS];
vnet_app_attach_args_t _a, *a = &_a;
- u32 segment_size = 128 << 20;
+ u64 segment_size = 128 << 20;
clib_memset (a, 0, sizeof (*a));
clib_memset (options, 0, sizeof (options));
@@ -687,6 +804,7 @@ hss_listen (void)
vnet_listen_args_t _a, *a = &_a;
char *uri = "tcp://0.0.0.0/80";
u8 need_crypto;
+ transport_endpt_ext_cfg_t *ext_cfg;
int rv;
clib_memset (a, 0, sizeof (*a));
@@ -703,17 +821,21 @@ hss_listen (void)
sep.transport_proto = TRANSPORT_PROTO_HTTP;
clib_memcpy (&a->sep_ext, &sep, sizeof (sep));
+ ext_cfg = session_endpoint_add_ext_cfg (
+ &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_HTTP, sizeof (ext_cfg->opaque));
+ ext_cfg->opaque = hsm->keepalive_timeout;
+
if (need_crypto)
{
- session_endpoint_alloc_ext_cfg (&a->sep_ext,
- TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
- a->sep_ext.ext_cfg->crypto.ckpair_index = hsm->ckpair_index;
+ ext_cfg = session_endpoint_add_ext_cfg (
+ &a->sep_ext, TRANSPORT_ENDPT_EXT_CFG_CRYPTO,
+ sizeof (transport_endpt_crypto_cfg_t));
+ ext_cfg->crypto.ckpair_index = hsm->ckpair_index;
}
rv = vnet_listen (a);
- if (need_crypto)
- clib_mem_free (a->sep_ext.ext_cfg);
+ session_endpoint_free_ext_cfgs (&a->sep_ext);
return rv;
}
@@ -757,6 +879,8 @@ hss_create (vlib_main_t *vm)
if (hsm->enable_url_handlers)
hss_url_handlers_init (hsm);
+ hsm->max_age_formatted = format (0, "max-age=%d", hsm->max_age);
+
return 0;
}
@@ -777,6 +901,8 @@ hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
hsm->private_segment_size = 0;
hsm->fifo_size = 0;
hsm->cache_size = 10 << 20;
+ hsm->max_age = HSS_DEFAULT_MAX_AGE;
+ hsm->keepalive_timeout = 60;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -801,6 +927,9 @@ hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
;
else if (unformat (line_input, "debug %d", &hsm->debug_level))
;
+ else if (unformat (line_input, "keepalive-timeout %d",
+ &hsm->keepalive_timeout))
+ ;
else if (unformat (line_input, "debug"))
hsm->debug_level = 1;
else if (unformat (line_input, "ptr-thresh %U", unformat_memory_size,
@@ -808,6 +937,8 @@ hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
;
else if (unformat (line_input, "url-handlers"))
hsm->enable_url_handlers = 1;
+ else if (unformat (line_input, "max-age %d", &hsm->max_age))
+ ;
else
{
error = clib_error_return (0, "unknown input `%U'",
@@ -836,7 +967,10 @@ no_input:
goto done;
}
- vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
+ session_enable_disable_args_t args = { .is_en = 1,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_RULE_TABLE };
+ vnet_session_enable_disable (vm, &args);
if ((rv = hss_create (vm)))
{
@@ -859,14 +993,16 @@ done:
* http static server www-root /tmp/www uri tcp://0.0.0.0/80 cache-size 2m
* @cliend
* @cliexcmd{http static server www-root <path> [prealloc-fios <nn>]
- * [private-segment-size <nnMG>] [fifo-size <nbytes>] [uri <uri>]}
+ * [private-segment-size <nnMG>] [fifo-size <nbytes>] [uri <uri>]
+ * [keepalive-timeout <nn>]}
?*/
VLIB_CLI_COMMAND (hss_create_command, static) = {
.path = "http static server",
.short_help =
"http static server www-root <path> [prealloc-fifos <nn>]\n"
- "[private-segment-size <nnMG>] [fifo-size <nbytes>] [uri <uri>]\n"
- "[ptr-thresh <nn>] [url-handlers] [debug [nn]]\n",
+ "[private-segment-size <nnMG>] [fifo-size <nbytes>] [max-age <nseconds>]\n"
+ "[uri <uri>] [ptr-thresh <nn>] [url-handlers] [debug [nn]]\n"
+ "[keepalive-timeout <nn>]\n",
.function = hss_create_command_fn,
};
@@ -876,7 +1012,7 @@ format_hss_session (u8 *s, va_list *args)
hss_session_t *hs = va_arg (*args, hss_session_t *);
int __clib_unused verbose = va_arg (*args, int);
- s = format (s, "\n path %s, data length %u, data_offset %u",
+ s = format (s, "\n path %s, data length %llu, data_offset %llu",
hs->path ? hs->path : (u8 *) "[none]", hs->data_len,
hs->data_offset);
return s;
diff --git a/src/plugins/ikev2/CMakeLists.txt b/src/plugins/ikev2/CMakeLists.txt
index 568271ed7d9..dd2b49d6651 100644
--- a/src/plugins/ikev2/CMakeLists.txt
+++ b/src/plugins/ikev2/CMakeLists.txt
@@ -27,6 +27,7 @@ add_vpp_plugin(ikev2
ikev2_crypto.c
ikev2_format.c
ikev2_payload.c
+ ikev2_handoff.c
API_FILES
ikev2_types.api
diff --git a/src/plugins/ikev2/ikev2.api b/src/plugins/ikev2/ikev2.api
index de276e7f3ea..e2ff8fb8268 100644
--- a/src/plugins/ikev2/ikev2.api
+++ b/src/plugins/ikev2/ikev2.api
@@ -658,6 +658,12 @@ counters ikev2 {
units "packets";
description "IKE AUTH SA requests received";
};
+ handoff {
+ severity info;
+ type counter64;
+ units "packets";
+ description "IKE packets handoff";
+ };
};
paths {
"/err/ikev2-ip4" "ike";
diff --git a/src/plugins/ikev2/ikev2.c b/src/plugins/ikev2/ikev2.c
index 9bea2c96d12..f66469a24d1 100644
--- a/src/plugins/ikev2/ikev2.c
+++ b/src/plugins/ikev2/ikev2.c
@@ -97,6 +97,7 @@ format_ikev2_gen_sa_error (u8 * s, va_list * args)
typedef enum
{
IKEV2_NEXT_IP4_LOOKUP,
+ IKEV2_NEXT_IP4_HANDOFF,
IKEV2_NEXT_IP4_ERROR_DROP,
IKEV2_IP4_N_NEXT,
} ikev2_ip4_next_t;
@@ -104,6 +105,7 @@ typedef enum
typedef enum
{
IKEV2_NEXT_IP6_LOOKUP,
+ IKEV2_NEXT_IP6_HANDOFF,
IKEV2_NEXT_IP6_ERROR_DROP,
IKEV2_IP6_N_NEXT,
} ikev2_ip6_next_t;
@@ -3187,6 +3189,7 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
ikev2_main_per_thread_data_t *ptd = ikev2_get_per_thread_data ();
+ u32 thread_index = vm->thread_index;
ikev2_stats_t _stats, *stats = &_stats;
int res;
@@ -3213,6 +3216,14 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
int ip_hdr_sz = 0;
int is_req = 0;
+ if (PREDICT_TRUE (thread_index != km->handoff_thread))
+ {
+ vlib_node_increment_counter (vm, node->node_index,
+ IKEV2_ERROR_HANDOFF, 1);
+
+ next[0] = is_ip4 ? IKEV2_NEXT_IP4_HANDOFF : IKEV2_NEXT_IP6_HANDOFF;
+ goto out;
+ }
if (natt)
{
u8 *ptr = vlib_buffer_get_current (b0);
@@ -3723,6 +3734,8 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
ikev2_delete_sa (ptd, sa0);
}
+
+ out:
if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
&& (b0->flags & VLIB_BUFFER_IS_TRACED)))
{
@@ -3775,6 +3788,7 @@ VLIB_REGISTER_NODE (ikev2_node_ip4,static) = {
.n_next_nodes = IKEV2_IP4_N_NEXT,
.next_nodes = {
[IKEV2_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IKEV2_NEXT_IP4_HANDOFF] = "ikev2-ip4-handoff",
[IKEV2_NEXT_IP4_ERROR_DROP] = "error-drop",
},
};
@@ -3792,6 +3806,7 @@ VLIB_REGISTER_NODE (ikev2_node_ip4_natt,static) = {
.n_next_nodes = IKEV2_IP4_N_NEXT,
.next_nodes = {
[IKEV2_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IKEV2_NEXT_IP4_HANDOFF] = "ikev2-ip4-natt-handoff",
[IKEV2_NEXT_IP4_ERROR_DROP] = "error-drop",
},
};
@@ -3809,6 +3824,7 @@ VLIB_REGISTER_NODE (ikev2_node_ip6,static) = {
.n_next_nodes = IKEV2_IP6_N_NEXT,
.next_nodes = {
[IKEV2_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IKEV2_NEXT_IP4_HANDOFF] = "ikev2-ip6-handoff",
[IKEV2_NEXT_IP6_ERROR_DROP] = "error-drop",
},
};
@@ -5126,6 +5142,8 @@ ikev2_init (vlib_main_t * vm)
km->liveness_period = IKEV2_LIVENESS_PERIOD_CHECK;
km->liveness_max_retries = IKEV2_LIVENESS_RETRIES;
+ km->handoff_thread = vlib_num_workers () ? 1 : 0;
+
return 0;
}
@@ -5133,6 +5151,31 @@ VLIB_INIT_FUNCTION (ikev2_init) = {
.runs_after = VLIB_INITS ("ipsec_init", "ipsec_punt_init"),
};
+static clib_error_t *
+ikev2_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ ikev2_main_t *km = &ikev2_main;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "handoff-thread %d", &km->handoff_thread))
+ {
+ if (km->handoff_thread > vlib_num_workers ())
+ {
+ return clib_error_return (0, "wrong handoff-thread %d",
+ km->handoff_thread);
+ }
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'", format_unformat_error,
+ input);
+ }
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (ikev2_config, "ikev2");
+
static u8
ikev2_mngr_process_child_sa (ikev2_sa_t * sa, ikev2_child_sa_t * csa,
u8 del_old_ids)
@@ -5447,6 +5490,7 @@ ikev2_send_informational_request (ikev2_sa_t * sa)
}
dp = sa->dst_port ? sa->dst_port : ikev2_get_port (sa);
+
ikev2_send_ike (km->vlib_main, src, dst, bi0, len, ikev2_get_port (sa), dp,
sa->sw_if_index);
}
@@ -5625,6 +5669,15 @@ ikev2_lazy_init (ikev2_main_t *km)
if (!km->dns_resolve_name_ptr)
ikev2_log_error ("cannot load symbols from dns plugin");
+ km->handoff_ip4_fq_index =
+ vlib_frame_queue_main_init (ikev2_node_ip4.index, 0);
+
+ km->handoff_ip4_natt_fq_index =
+ vlib_frame_queue_main_init (ikev2_node_ip4_natt.index, 0);
+
+ km->handoff_ip6_fq_index =
+ vlib_frame_queue_main_init (ikev2_node_ip6.index, 0);
+
/* wake up ikev2 process */
vlib_process_signal_event (vlib_get_first_main (),
ikev2_mngr_process_node.index, 0, 0);
diff --git a/src/plugins/ikev2/ikev2_api.c b/src/plugins/ikev2/ikev2_api.c
index a3e71668126..e09bde3cbe2 100644
--- a/src/plugins/ikev2/ikev2_api.c
+++ b/src/plugins/ikev2/ikev2_api.c
@@ -173,7 +173,7 @@ send_profile (ikev2_profile_t * profile, vl_api_registration_t * reg,
rmp->profile.lifetime_jitter = profile->lifetime_jitter;
rmp->profile.handover = profile->handover;
- vl_api_ikev2_profile_t_endian (&rmp->profile);
+ vl_api_ikev2_profile_t_endian (&rmp->profile, 1 /* to network */);
vl_api_send_msg (reg, (u8 *) rmp);
}
@@ -291,7 +291,7 @@ send_sa (ikev2_sa_t * sa, vl_api_ikev2_sa_dump_t * mp, u32 api_sa_index)
ikev2_copy_stats (&rsa->stats, &sa->stats);
- vl_api_ikev2_sa_t_endian(rsa);
+ vl_api_ikev2_sa_t_endian (rsa, 1 /* to network */);
});
}
@@ -382,7 +382,7 @@ send_sa_v2 (ikev2_sa_t *sa, vl_api_ikev2_sa_v2_dump_t *mp, u32 api_sa_index)
ikev2_copy_stats (&rsa->stats, &sa->stats);
- vl_api_ikev2_sa_v2_t_endian (rsa);
+ vl_api_ikev2_sa_v2_t_endian (rsa, 1 /* to network */);
});
}
@@ -476,7 +476,7 @@ send_sa_v3 (ikev2_sa_t *sa, vl_api_ikev2_sa_v3_dump_t *mp, u32 api_sa_index)
ikev2_copy_stats (&rsa->stats, &sa->stats);
- vl_api_ikev2_sa_v3_t_endian (rsa);
+ vl_api_ikev2_sa_v3_t_endian (rsa, 1 /* to network */);
});
}
@@ -549,7 +549,7 @@ send_child_sa (ikev2_child_sa_t * child,
k->sk_ar_len);
}
- vl_api_ikev2_child_sa_t_endian (&rmp->child_sa);
+ vl_api_ikev2_child_sa_t_endian (&rmp->child_sa, 1 /* to network */);
});
}
@@ -577,6 +577,7 @@ vl_api_ikev2_child_sa_dump_t_handler (vl_api_ikev2_child_sa_dump_t * mp)
vec_foreach (child, sa->childs)
{
u32 child_sa_index = child - sa->childs;
+ sai = ikev2_encode_sa_index (sai, tkm - im->per_thread_data);
send_child_sa (child, mp, child_sa_index, sai);
}
}
@@ -628,7 +629,7 @@ send_child_sa_v2 (ikev2_child_sa_t *child, vl_api_ikev2_child_sa_v2_dump_t *mp,
clib_memcpy (&k->sk_ar, child->sk_ar, k->sk_ar_len);
}
- vl_api_ikev2_child_sa_v2_t_endian (&rmp->child_sa);
+ vl_api_ikev2_child_sa_v2_t_endian (&rmp->child_sa, 1 /* to network */);
});
}
@@ -700,7 +701,7 @@ static void
rmp->ts.sa_index = api_sa_index;
rmp->ts.child_sa_index = child_sa_index;
cp_ts (&rmp->ts, ts, mp->is_initiator);
- vl_api_ikev2_ts_t_endian (&rmp->ts);
+ vl_api_ikev2_ts_t_endian (&rmp->ts, 1 /* to network */);
});
}
}
diff --git a/src/plugins/ikev2/ikev2_crypto.c b/src/plugins/ikev2/ikev2_crypto.c
index 3d4ad0a28ed..58167e2322e 100644
--- a/src/plugins/ikev2/ikev2_crypto.c
+++ b/src/plugins/ikev2/ikev2_crypto.c
@@ -481,15 +481,14 @@ ikev2_encrypt_data (ikev2_main_per_thread_data_t * ptd, ikev2_sa_t * sa,
int
BN_bn2binpad (const BIGNUM * a, unsigned char *to, int tolen)
{
- int r = BN_bn2bin (a, to);
+ int r = BN_num_bytes (a);
ASSERT (tolen >= r);
int pad = tolen - r;
if (pad)
{
- vec_insert (to, pad, 0);
clib_memset (to, 0, pad);
- vec_dec_len (to, pad);
}
+ BN_bn2bin (a, to + pad);
return tolen;
}
#endif
diff --git a/src/plugins/ikev2/ikev2_handoff.c b/src/plugins/ikev2/ikev2_handoff.c
new file mode 100644
index 00000000000..8f55985bce8
--- /dev/null
+++ b/src/plugins/ikev2/ikev2_handoff.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <ikev2/ikev2_priv.h>
+
+extern ikev2_main_t ikev2_main;
+
+#define foreach_ikev2_handoff_error _ (CONGESTION_DROP, "congestion drop")
+
+typedef enum
+{
+#define _(sym, str) IKEV2_HANDOFF_ERROR_##sym,
+ foreach_ikev2_handoff_error
+#undef _
+ IKEV2_HANDOFF_N_ERROR,
+} ikev2_handoff_error_t;
+
+static char *ikev2_handoff_error_strings[] = {
+#define _(sym, string) string,
+ foreach_ikev2_handoff_error
+#undef _
+};
+
+typedef struct ikev2_handoff_trace_t_
+{
+ u32 current_worker_index;
+ u32 next_worker_index;
+} ikev2_handoff_trace_t;
+
+u8 *
+format_ikev2_handoff_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+
+ ikev2_handoff_trace_t *t = va_arg (*args, ikev2_handoff_trace_t *);
+ s = format (s, "ikev2 handoff %d to %d", t->current_worker_index,
+ t->next_worker_index);
+ return s;
+}
+
+static_always_inline uword
+ikev2_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, u32 fq_index)
+{
+ ikev2_main_t *km = &ikev2_main;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u16 thread_indices[VLIB_FRAME_SIZE], *ti;
+ u32 n_enq, n_left_from, *from;
+ u32 this_thread;
+
+ this_thread = vm->thread_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+
+ b = bufs;
+ ti = thread_indices;
+
+ while (n_left_from > 0)
+ {
+ ti[0] = km->handoff_thread;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ b[0]->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ikev2_handoff_trace_t *t =
+ vlib_add_trace (vm, node, b[0], sizeof (*t));
+ t->current_worker_index = this_thread;
+ t->next_worker_index = ti[0];
+ }
+ n_left_from--;
+ ti++;
+ b++;
+ }
+
+ n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
+ thread_indices, frame->n_vectors, 1);
+
+ if (n_enq < frame->n_vectors)
+ vlib_node_increment_counter (vm, node->node_index,
+ IKEV2_HANDOFF_ERROR_CONGESTION_DROP,
+ frame->n_vectors - n_enq);
+ return n_enq;
+}
+
+/* Do worker handoff based on the ikev2's thread_index */
+VLIB_NODE_FN (ikev2_ip4_handoff)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ ikev2_main_t *km = &ikev2_main;
+
+ return ikev2_handoff_inline (vm, node, from_frame, km->handoff_ip4_fq_index);
+}
+
+VLIB_NODE_FN (ikev2_ip4_natt_handoff)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ ikev2_main_t *km = &ikev2_main;
+
+ return ikev2_handoff_inline (vm, node, from_frame,
+ km->handoff_ip4_natt_fq_index);
+}
+
+VLIB_NODE_FN (ikev2_ip6_handoff)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ ikev2_main_t *km = &ikev2_main;
+
+ return ikev2_handoff_inline (vm, node, from_frame, km->handoff_ip6_fq_index);
+}
+
+VLIB_REGISTER_NODE (ikev2_ip4_handoff) = {
+ .name = "ikev2-ip4-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ikev2_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(ikev2_handoff_error_strings),
+ .error_strings = ikev2_handoff_error_strings,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (ikev2_ip4_natt_handoff) = {
+ .name = "ikev2-ip4-natt-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ikev2_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(ikev2_handoff_error_strings),
+ .error_strings = ikev2_handoff_error_strings,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (ikev2_ip6_handoff) = {
+ .name = "ikev2-ip6-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ikev2_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(ikev2_handoff_error_strings),
+ .error_strings = ikev2_handoff_error_strings,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
diff --git a/src/plugins/ikev2/ikev2_priv.h b/src/plugins/ikev2/ikev2_priv.h
index 0639809e9b1..96313182552 100644
--- a/src/plugins/ikev2/ikev2_priv.h
+++ b/src/plugins/ikev2/ikev2_priv.h
@@ -571,6 +571,12 @@ typedef struct
/* punt handle for IPsec NATT IPSEC_PUNT_IP4_SPI_UDP_0 reason */
vlib_punt_hdl_t punt_hdl;
+ /** Worker handoff */
+ u32 handoff_thread;
+ u32 handoff_ip4_fq_index;
+ u32 handoff_ip4_natt_fq_index;
+ u32 handoff_ip6_fq_index;
+
} ikev2_main_t;
extern ikev2_main_t ikev2_main;
diff --git a/src/plugins/ikev2/ikev2_test.c b/src/plugins/ikev2/ikev2_test.c
index 5682d7058f6..93683a5b5dc 100644
--- a/src/plugins/ikev2/ikev2_test.c
+++ b/src/plugins/ikev2/ikev2_test.c
@@ -391,7 +391,7 @@ vl_api_ikev2_sa_details_t_handler (vl_api_ikev2_sa_details_t * mp)
ip_address_t iaddr;
ip_address_t raddr;
vl_api_ikev2_keys_t *k = &sa->keys;
- vl_api_ikev2_sa_t_endian (sa);
+ vl_api_ikev2_sa_t_endian (sa, 0 /* from network */);
ip_address_decode2 (&sa->iaddr, &iaddr);
ip_address_decode2 (&sa->raddr, &raddr);
@@ -461,7 +461,7 @@ vl_api_ikev2_sa_v2_details_t_handler (vl_api_ikev2_sa_v2_details_t *mp)
ip_address_t iaddr;
ip_address_t raddr;
vl_api_ikev2_keys_t *k = &sa->keys;
- vl_api_ikev2_sa_v2_t_endian (sa);
+ vl_api_ikev2_sa_v2_t_endian (sa, 0 /* from network */);
ip_address_decode2 (&sa->iaddr, &iaddr);
ip_address_decode2 (&sa->raddr, &raddr);
@@ -533,7 +533,7 @@ vl_api_ikev2_sa_v3_details_t_handler (vl_api_ikev2_sa_v3_details_t *mp)
ip_address_t iaddr;
ip_address_t raddr;
vl_api_ikev2_keys_t *k = &sa->keys;
- vl_api_ikev2_sa_v3_t_endian (sa);
+ vl_api_ikev2_sa_v3_t_endian (sa, 0 /* from network */);
ip_address_decode2 (&sa->iaddr, &iaddr);
ip_address_decode2 (&sa->raddr, &raddr);
@@ -619,7 +619,7 @@ vl_api_ikev2_child_sa_details_t_handler (vl_api_ikev2_child_sa_details_t * mp)
vat_main_t *vam = ikev2_test_main.vat_main;
vl_api_ikev2_child_sa_t *child_sa = &mp->child_sa;
vl_api_ikev2_keys_t *k = &child_sa->keys;
- vl_api_ikev2_child_sa_t_endian (child_sa);
+ vl_api_ikev2_child_sa_t_endian (child_sa, 0 /* from network */);
fformat (vam->ofp, " child sa %u:\n", child_sa->child_sa_index);
@@ -696,7 +696,7 @@ vl_api_ikev2_child_sa_v2_details_t_handler (
vat_main_t *vam = ikev2_test_main.vat_main;
vl_api_ikev2_child_sa_t *child_sa = &mp->child_sa;
vl_api_ikev2_keys_t *k = &child_sa->keys;
- vl_api_ikev2_child_sa_t_endian (child_sa);
+ vl_api_ikev2_child_sa_t_endian (child_sa, 0 /* from network */);
fformat (vam->ofp, " child sa %u:\n", child_sa->child_sa_index);
@@ -784,7 +784,7 @@ static void
vat_main_t *vam = ikev2_test_main.vat_main;
vl_api_ikev2_ts_t *ts = &mp->ts;
ip_address_t start_addr, end_addr;
- vl_api_ikev2_ts_t_endian (ts);
+ vl_api_ikev2_ts_t_endian (ts, 0 /* from network */);
ip_address_decode2 (&ts->start_addr, &start_addr);
ip_address_decode2 (&ts->end_addr, &end_addr);
diff --git a/src/plugins/linux-cp/lcp_interface.c b/src/plugins/linux-cp/lcp_interface.c
index e1f4a6a1d69..61665ad4146 100644
--- a/src/plugins/linux-cp/lcp_interface.c
+++ b/src/plugins/linux-cp/lcp_interface.c
@@ -258,7 +258,11 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
vec_validate_init_empty (lip_db_by_host, host_sw_if_index, INDEX_INVALID);
lip_db_by_phy[phy_sw_if_index] = lipi;
lip_db_by_host[host_sw_if_index] = lipi;
- hash_set (lip_db_by_vif, host_index, lipi);
+
+ if (clib_strcmp ((char *) ns, (char *) lcp_get_default_ns ()) == 0)
+ {
+ hash_set (lip_db_by_vif, host_index, lipi);
+ }
lip->lip_host_sw_if_index = host_sw_if_index;
lip->lip_phy_sw_if_index = phy_sw_if_index;
@@ -997,7 +1001,8 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
clib_max (1, lcp_get_default_num_queues (0 /* is_tx */)),
.num_tx_queues =
clib_max (1, lcp_get_default_num_queues (1 /* is_tx */)),
- .id = hw->hw_if_index,
+ .id = ~0,
+ .auto_id_offset = 4096,
.sw_if_index = ~0,
.rx_ring_sz = 256,
.tx_ring_sz = 256,
@@ -1094,7 +1099,7 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
* This controls whether the host can RX/TX.
*/
sw = vnet_get_sw_interface (vnm, phy_sw_if_index);
- lip = lcp_itf_pair_get (lcp_itf_pair_find_by_vif (vif_index));
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index));
LCP_ITF_PAIR_INFO ("pair create: %U sw-flags %u hw-flags %u",
format_lcp_itf_pair, lip, sw->flags, hw->flags);
vnet_sw_interface_admin_up (vnm, host_sw_if_index);
diff --git a/src/plugins/mactime/builtins.c b/src/plugins/mactime/builtins.c
index c487d0375bf..f726d3c03ed 100644
--- a/src/plugins/mactime/builtins.c
+++ b/src/plugins/mactime/builtins.c
@@ -147,6 +147,7 @@ handle_get_mactime (hss_url_handler_args_t *args)
args->data = s;
args->data_len = vec_len (s);
+ args->ct = HTTP_CONTENT_APP_JSON;
args->free_vec_data = 1;
return HSS_URL_HANDLER_OK;
}
diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c
index 51853d619e6..f8d894a013a 100644
--- a/src/plugins/map/ip6_map_t.c
+++ b/src/plugins/map/ip6_map_t.c
@@ -151,9 +151,8 @@ ip6_map_t_icmp (vlib_main_t * vm,
vnet_buffer (p0)->map_t.map_domain_index);
ctx0.d = d0;
ctx0.sender_port = 0;
- if (!ip6_get_port
- (vm, p0, ip60, p0->current_length, NULL, &ctx0.sender_port,
- NULL, NULL, NULL, NULL))
+ if (!ip6_get_port (vm, p0, ip60, p0->current_length, NULL,
+ &ctx0.sender_port, NULL, NULL, NULL, NULL, NULL))
{
// In case of 1:1 mapping, we don't care about the port
if (!(d0->ea_bits_len == 0 && d0->rules))
diff --git a/src/plugins/marvell/CMakeLists.txt b/src/plugins/marvell/CMakeLists.txt
deleted file mode 100644
index b48ac72aa08..00000000000
--- a/src/plugins/marvell/CMakeLists.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2018 Cisco and/or its affiliates.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
- return()
-endif()
-
-find_path(MUSDK_INCLUDE_DIR NAMES mv_std.h)
-find_library(MUSDK_LIB NAMES libmusdk.a)
-
-if(MUSDK_INCLUDE_DIR AND MUSDK_LIB)
- get_filename_component(MUSDK_LIB_DIR ${MUSDK_LIB} DIRECTORY)
- set(MUSDK_LINK_FLAGS "-Wl,--whole-archive,${MUSDK_LIB_DIR}/libmusdk.a,--no-whole-archive")
- add_vpp_plugin(marvell
- SOURCES
- plugin.c
- pp2/cli.c
- pp2/format.c
- pp2/input.c
- pp2/output.c
- pp2/pp2.c
- pp2/pp2_api.c
-
- API_FILES
- pp2/pp2.api
-
- API_TEST_SOURCES
- pp2/pp2_test.c
-
- LINK_FLAGS
- ${MUSDK_LINK_FLAGS}
- )
- include_directories(${MUSDK_INCLUDE_DIR})
- message(STATUS "Found Marvell MUSDK in ${MUSDK_INCLUDE_DIR}")
-else()
- message(WARNING "Marvell MUSDK not found - marvell_plugin disabled")
-endif()
diff --git a/src/plugins/marvell/README.rst b/src/plugins/marvell/README.rst
deleted file mode 100644
index 19cf1c49d0e..00000000000
--- a/src/plugins/marvell/README.rst
+++ /dev/null
@@ -1,85 +0,0 @@
-Marvell device plugin
-=====================
-
-Overview
---------
-
-This plugins provides native device support for Marvell PP2 network
-device, by use of Marvell Usermode SDK
-(`MUSDK <https://github.com/MarvellEmbeddedProcessors/musdk-marvell>`__).
-Code is developed and tested on
-`MACCHIATObin <http://macchiatobin.net>`__ board.
-
-Prerequisites
--------------
-
-Plugins depends on installed MUSDK and Marvell provided linux
-`kernel <https://github.com/MarvellEmbeddedProcessors/linux-marvell>`__
-with MUSDK provided kernel patches (see ``patches/linux`` in musdk repo
-and relevant documentation. Kernel version used: **4.14.22
-armada-18.09.3** MUSDK version used: **armada-18.09.3** Following kernel
-modules from MUSDK must be loaded for plugin to work: \*
-``musdk_cma.ko`` \* ``mv_pp_uio.ko``
-
-Musdk 18.09.3 compilation steps
--------------------------------
-
-::
-
- ./bootstrap
- ./configure --prefix=/opt/vpp/external/aarch64/ CFLAGS="-Wno-error=unused-result -g -fPIC" --enable-shared=no
- sed -i -e 's/marvell,mv-pp-uio/generic-uio/' modules/pp2/mv_pp_uio.c
- sed -i -e 's/O_CREAT/O_CREAT, S_IRUSR | S_IWUSR/' src/lib/file_utils.c
- make
- sudo make install
-
-Usage
------
-
-Interface Creation
-~~~~~~~~~~~~~~~~~~
-
-Interfaces are dynamically created with following CLI:
-
-::
-
- create interface marvell pp2 name eth0
- set interface state mv-ppio-0/0 up
-
-Where ``eth0`` is linux interface name and ``mv-ppio-X/Y`` is VPP
-interface name where X is PP2 device ID and Y is PPIO ID Interface needs
-to be assigned to MUSDK in FDT configuration and linux interface state
-must be up.
-
-Interface Deletion
-~~~~~~~~~~~~~~~~~~
-
-Interface can be deleted with following CLI:
-
-::
-
- delete interface marvell pp2 <interface name>
-
-Interface Statistics
-~~~~~~~~~~~~~~~~~~~~
-
-Interface statistics can be displayed with
-``sh hardware-interface mv-ppio0/0`` command.
-
-Interaction with DPDK plugin
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-This plugin doesn’t have any dependency on DPDK or DPDK plugin but it
-can work with DPDK plugin enabled or disabled. It is observed that
-performance is better around 30% when DPDK plugin is disabled, as DPDK
-plugin registers own buffer manager, which needs to deal with additional
-metadata in each packet.
-
-DPKD plugin can be disabled by adding following config to the
-startup.conf.
-
-::
-
- plugins {
- dpdk_plugin.so { disable }
- }
diff --git a/src/plugins/marvell/pp2/cli.c b/src/plugins/marvell/pp2/cli.c
deleted file mode 100644
index f4ecb1873c9..00000000000
--- a/src/plugins/marvell/pp2/cli.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- *------------------------------------------------------------------
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-#include <stdint.h>
-#include <net/if.h>
-#include <sys/ioctl.h>
-#include <inttypes.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ethernet/ethernet.h>
-
-#include <marvell/pp2/pp2.h>
-
-static clib_error_t *
-mrvl_pp2_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- mrvl_pp2_create_if_args_t args = { 0 };
- uint val;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "name %s", &args.name))
- ;
- else if (unformat (line_input, "rx-queue-size %u", &val))
- args.rx_q_sz = val;
- else if (unformat (line_input, "tx-queue-size %u", &val))
- args.tx_q_sz = val;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
- unformat_free (line_input);
-
-
- mrvl_pp2_create_if (&args);
-
- vec_free (args.name);
-
- return args.error;
-}
-
-VLIB_CLI_COMMAND (mrvl_pp2_create_command, static) = {
- .path = "create interface marvell pp2",
- .short_help = "create interface marvell pp2 [name <ifname>] [rx-queue-size slots] [tx-queue-size slots]",
- .function = mrvl_pp2_create_command_fn,
-};
-
-static clib_error_t *
-mrvl_pp2_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- u32 sw_if_index = ~0;
- vnet_hw_interface_t *hw;
- mrvl_pp2_main_t *mm = &mrvl_pp2_main;
- mrvl_pp2_if_t *dif;
- vnet_main_t *vnm = vnet_get_main ();
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "sw_if_index %d", &sw_if_index))
- ;
- else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
- vnm, &sw_if_index))
- ;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
- unformat_free (line_input);
-
- if (sw_if_index == ~0)
- return clib_error_return (0,
- "please specify interface name or sw_if_index");
-
- hw = vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index);
- if (hw == NULL || mrvl_pp2_device_class.index != hw->dev_class_index)
- return clib_error_return (0, "not a Marvell PP2 interface");
-
- dif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
-
- mrvl_pp2_delete_if (dif);
-
- return 0;
-}
-
-VLIB_CLI_COMMAND (mrvl_pp2_delete_command, static) = {
- .path = "delete interface marvell pp2",
- .short_help = "delete interface marvell pp2 "
- "{<interface> | sw_if_index <sw_idx>}",
- .function = mrvl_pp2_delete_command_fn,
-};
-
-clib_error_t *
-mrvl_pp2_cli_init (vlib_main_t * vm)
-{
- /* initialize binary API */
- mrvl_pp2_plugin_api_hookup (vm);
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (mrvl_pp2_cli_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/marvell/pp2/format.c b/src/plugins/marvell/pp2/format.c
deleted file mode 100644
index 877010ea561..00000000000
--- a/src/plugins/marvell/pp2/format.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- *------------------------------------------------------------------
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/plugin/plugin.h>
-#include <marvell/pp2/pp2.h>
-
-static inline u32
-mrvl_get_u32_bits (void *start, int offset, int first, int last)
-{
- u32 value = *(u32 *) (((u8 *) start) + offset);
- if ((last == 0) && (first == 31))
- return value;
- value >>= last;
- value &= (1 << (first - last + 1)) - 1;
- return value;
-}
-
-u8 *
-format_mrvl_pp2_interface_name (u8 * s, va_list * args)
-{
- mrvl_pp2_main_t *ppm = &mrvl_pp2_main;
- u32 dev_instance = va_arg (*args, u32);
- mrvl_pp2_if_t *ppif = pool_elt_at_index (ppm->interfaces, dev_instance);
- return format (s, "mv-ppio-%d/%d", ppif->ppio->pp2_id, ppif->ppio->port_id);
-}
-
-#define foreach_ppio_statistics_entry \
- _(rx_packets) \
- _(rx_fullq_dropped) \
- _(rx_bm_dropped) \
- _(rx_early_dropped) \
- _(rx_fifo_dropped) \
- _(rx_cls_dropped) \
- _(tx_packets)
-
-#define foreach_ppio_inq_statistics_entry \
- _(enq_desc) \
- _(drop_early) \
- _(drop_fullq) \
- _(drop_bm)
-
-#define foreach_ppio_outq_statistics_entry \
- _(enq_desc) \
- _(enq_dec_to_ddr) \
- _(enq_buf_to_ddr) \
- _(deq_desc)
-
-u8 *
-format_mrvl_pp2_interface (u8 * s, va_list * args)
-{
- mrvl_pp2_main_t *ppm = &mrvl_pp2_main;
- u32 dev_instance = va_arg (*args, u32);
- u32 indent = format_get_indent (s);
- mrvl_pp2_if_t *ppif = pool_elt_at_index (ppm->interfaces, dev_instance);
- struct pp2_ppio_statistics stat;
- int i;
- u8 *s2 = 0;
-
- pp2_ppio_get_statistics (ppif->ppio, &stat, 0);
-
-#define _(c) if (stat.c) \
- s2 = format (s2, "\n%U%-25U%16Ld", \
- format_white_space, indent + 2, \
- format_c_identifier, #c, stat.c);
- foreach_ppio_statistics_entry;
-
- if (vec_len (s2))
- s = format (s, "Interface statistics:%v", s2);
- vec_reset_length (s2);
-
- vec_foreach_index (i, ppif->inqs)
- {
- struct pp2_ppio_inq_statistics stat = { 0 };
- pp2_ppio_inq_get_statistics (ppif->ppio, 0, i, &stat, 0);
-
- foreach_ppio_inq_statistics_entry;
-
- if (vec_len (s2))
- s = format (s, "\n%UInput queue %u statistics:%v",
- format_white_space, indent, i, s2);
- vec_reset_length (s2);
- }
- vec_foreach_index (i, ppif->outqs)
- {
- struct pp2_ppio_outq_statistics stat = { 0 };
-
- pp2_ppio_outq_get_statistics (ppif->ppio, i, &stat, 0);
-
- foreach_ppio_outq_statistics_entry;
-
- if (vec_len (s2))
- s = format (s, "\n%UOutput queue %u statistics:%v",
- format_white_space, indent, i, s2);
- vec_reset_length (s2);
- }
-#undef _
- vec_free (s2);
- return s;
-}
-
-#define foreach_pp2_rx_desc_field \
- _(0x00, 6, 0, l3_offset) \
- _(0x00, 12, 8, ip_hdlen) \
- _(0x00, 14, 13, ec) \
- _(0x00, 15, 15, es) \
- _(0x00, 19, 16, pool_id) \
- _(0x00, 21, 21, hwf_sync) \
- _(0x00, 22, 22, l4_chk_ok) \
- _(0x00, 23, 23, ip_frg) \
- _(0x00, 24, 24, ipv4_hdr_err) \
- _(0x00, 27, 25, l4_info) \
- _(0x00, 30, 28, l3_info) \
- _(0x00, 31, 31, buf_header) \
- _(0x04, 5, 0, lookup_id) \
- _(0x04, 8, 6, cpu_code) \
- _(0x04, 9, 9, pppoe) \
- _(0x04, 11, 10, l3_cast_info) \
- _(0x04, 13, 12, l2_cast_info) \
- _(0x04, 15, 14, vlan_info) \
- _(0x04, 31, 16, byte_count) \
- _(0x08, 11, 0, gem_port_id) \
- _(0x08, 13, 12, color) \
- _(0x08, 14, 14, gop_sop_u) \
- _(0x08, 15, 15, key_hash_enable) \
- _(0x08, 31, 16, l4chk) \
- _(0x0c, 31, 0, timestamp) \
- _(0x10, 31, 0, buf_phys_ptr_lo) \
- _(0x14, 7, 0, buf_phys_ptr_hi) \
- _(0x14, 31, 8, key_hash) \
- _(0x18, 31, 0, buf_virt_ptr_lo) \
- _(0x1c, 7, 0, buf_virt_ptr_hi) \
- _(0x1c, 14, 8, buf_qset_no) \
- _(0x1c, 15, 15, buf_type) \
- _(0x1c, 21, 16, mod_dscp) \
- _(0x1c, 24, 22, mod_pri) \
- _(0x1c, 25, 25, mdscp) \
- _(0x1c, 26, 26, mpri) \
- _(0x1c, 27, 27, mgpid) \
- _(0x1c, 31, 29, port_num)
-
-u8 *
-format_mrvl_pp2_input_trace (u8 * s, va_list * args)
-{
- vlib_main_t *vm = va_arg (*args, vlib_main_t *);
- vlib_node_t *node = va_arg (*args, vlib_node_t *);
- mrvl_pp2_input_trace_t *t = va_arg (*args, mrvl_pp2_input_trace_t *);
- vnet_main_t *vnm = vnet_get_main ();
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
- u32 indent = format_get_indent (s);
- struct pp2_ppio_desc *d = &t->desc;
- u32 r32;
-
- s = format (s, "pp2: %v (%d) next-node %U",
- hi->name, t->hw_if_index, format_vlib_next_node_name, vm,
- node->index, t->next_index);
- s = format (s, "\n%U", format_white_space, indent + 2);
-
-#define _(a, b, c, n) \
- r32 = mrvl_get_u32_bits (d, a, b, c); \
- if (r32 > 9) \
- s = format (s, "%s %u (0x%x)", #n, r32, r32); \
- else \
- s = format (s, "%s %u", #n,r32); \
- if (format_get_indent (s) > 72) \
- s = format (s, "\n%U", format_white_space, indent + 2); \
- else s = format (s, " ");
-
- foreach_pp2_rx_desc_field;
-#undef _
- return s;
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/marvell/pp2/input.c b/src/plugins/marvell/pp2/input.c
deleted file mode 100644
index 2545f91becb..00000000000
--- a/src/plugins/marvell/pp2/input.c
+++ /dev/null
@@ -1,392 +0,0 @@
-/*
- *------------------------------------------------------------------
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#define _GNU_SOURCE
-#include <stdint.h>
-#include <net/if.h>
-#include <sys/ioctl.h>
-#include <sys/uio.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/devices.h>
-#include <vnet/interface/rx_queue_funcs.h>
-
-#include <marvell/pp2/pp2.h>
-
-#define foreach_mrvl_pp2_input_error \
- _(PPIO_RECV, "pp2_ppio_recv error") \
- _(BPOOL_GET_NUM_BUFFS, "pp2_bpool_get_num_buffs error") \
- _(BPOOL_PUT_BUFFS, "pp2_bpool_put_buffs error") \
- _(BUFFER_ALLOC, "buffer alloc error") \
- _(MAC_CE, "MAC error (CRC error)") \
- _(MAC_OR, "overrun error") \
- _(MAC_RSVD, "unknown MAC error") \
- _(MAC_RE, "resource error") \
- _(IP_HDR, "ip4 header error")
-
-typedef enum
-{
-#define _(f,s) MRVL_PP2_INPUT_ERROR_##f,
- foreach_mrvl_pp2_input_error
-#undef _
- MRVL_PP2_INPUT_N_ERROR,
-} mrvl_pp2_input_error_t;
-
-static __clib_unused char *mrvl_pp2_input_error_strings[] = {
-#define _(n,s) s,
- foreach_mrvl_pp2_input_error
-#undef _
-};
-
-static_always_inline void
-mrvl_pp2_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node, u32 next0,
- vlib_buffer_t * b0, uword * n_trace,
- mrvl_pp2_if_t * ppif, struct pp2_ppio_desc *d)
-{
- if (PREDICT_TRUE (
- vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0)))
- {
- mrvl_pp2_input_trace_t *tr;
- vlib_set_trace_count (vm, node, --(*n_trace));
- tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->next_index = next0;
- tr->hw_if_index = ppif->hw_if_index;
- clib_memcpy_fast (&tr->desc, d, sizeof (struct pp2_ppio_desc));
- }
-}
-
-static_always_inline u16
-mrvl_pp2_set_buf_data_len_flags (vlib_buffer_t * b, struct pp2_ppio_desc *d,
- u32 add_flags)
-{
- u16 len;
- len = pp2_ppio_inq_desc_get_pkt_len (d);
- b->total_length_not_including_first_buffer = 0;
- b->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID | add_flags;
-
- if (add_flags & VNET_BUFFER_F_L2_HDR_OFFSET_VALID)
- vnet_buffer (b)->l2_hdr_offset = 2;
-
- if (add_flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID)
- {
- u16 offset = DM_RXD_GET_L3_OFF (d);
- vnet_buffer (b)->l3_hdr_offset = offset;
- b->current_data = offset;
- b->current_length = len - offset + 2;
- }
- else
- {
- b->current_data = 2;
- b->current_length = len;
- }
-
- if (add_flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID)
- vnet_buffer (b)->l4_hdr_offset = vnet_buffer (b)->l3_hdr_offset +
- DM_RXD_GET_IPHDR_LEN (d) * 4;
-
- return len;
-}
-
-static_always_inline u16
-mrvl_pp2_next_from_desc (vlib_node_runtime_t * node, struct pp2_ppio_desc * d,
- vlib_buffer_t * b, u32 * next)
-{
- u8 l3_info;
- /* ES bit set means MAC error - drop and count */
- if (PREDICT_FALSE (DM_RXD_GET_ES (d)))
- {
- *next = VNET_DEVICE_INPUT_NEXT_DROP;
- u8 ec = DM_RXD_GET_EC (d);
- if (ec == 0)
- b->error = node->errors[MRVL_PP2_INPUT_ERROR_MAC_CE];
- else if (ec == 1)
- b->error = node->errors[MRVL_PP2_INPUT_ERROR_MAC_OR];
- else if (ec == 2)
- b->error = node->errors[MRVL_PP2_INPUT_ERROR_MAC_RSVD];
- else if (ec == 3)
- b->error = node->errors[MRVL_PP2_INPUT_ERROR_MAC_RE];
- return mrvl_pp2_set_buf_data_len_flags (b, d, 0);
- }
- l3_info = DM_RXD_GET_L3_PRS_INFO (d);
-
- /* ipv4 packet can be value 1, 2 or 3 */
- if (PREDICT_TRUE ((l3_info - 1) < 3))
- {
- if (PREDICT_FALSE (DM_RXD_GET_L3_IP4_HDR_ERR (d) != 0))
- {
- *next = VNET_DEVICE_INPUT_NEXT_DROP;
- b->error = node->errors[MRVL_PP2_INPUT_ERROR_IP_HDR];
- return mrvl_pp2_set_buf_data_len_flags (b, d, 0);
- }
- *next = VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT;
- return mrvl_pp2_set_buf_data_len_flags
- (b, d,
- VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L4_HDR_OFFSET_VALID | VNET_BUFFER_F_IS_IP4);
- }
-
- /* ipv4 packet can be value 4 or 5 */
- if (PREDICT_TRUE ((l3_info - 4) < 2))
- {
- *next = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
- return mrvl_pp2_set_buf_data_len_flags
- (b, d,
- VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L4_HDR_OFFSET_VALID | VNET_BUFFER_F_IS_IP6);
- }
-
- *next = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- return mrvl_pp2_set_buf_data_len_flags (b, d,
- VNET_BUFFER_F_L2_HDR_OFFSET_VALID);
-}
-
-static_always_inline uword
-mrvl_pp2_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, mrvl_pp2_if_t * ppif,
- u16 qid)
-{
- vnet_main_t *vnm = vnet_get_main ();
- mrvl_pp2_main_t *ppm = &mrvl_pp2_main;
- u32 thread_index = vm->thread_index;
- mrvl_pp2_inq_t *inq = vec_elt_at_index (ppif->inqs, qid);
- uword n_trace = vlib_get_trace_count (vm, node);
- mrvl_pp2_per_thread_data_t *ptd =
- vec_elt_at_index (ppm->per_thread_data, thread_index);
- u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- u32 sw_if_index[VLIB_N_RX_TX];
- u32 n_rx_packets = 0;
- u32 n_rx_bytes = 0;
- u32 *to_next = 0;
- struct pp2_ppio_desc *d;
- u16 n_desc = VLIB_FRAME_SIZE;
- u32 n_bufs;
- u32 *buffers;
- int i;
-
- vec_validate_aligned (ptd->descs, n_desc, CLIB_CACHE_LINE_BYTES);
- if (PREDICT_FALSE (pp2_ppio_recv (ppif->ppio, 0, qid, ptd->descs, &n_desc)))
- {
- vlib_error_count (vm, node->node_index, MRVL_PP2_INPUT_ERROR_PPIO_RECV,
- 1);
- n_desc = 0;
- }
- n_rx_packets = n_desc;
-
- for (i = 0; i < n_desc; i++)
- ptd->buffers[i] = pp2_ppio_inq_desc_get_cookie (&ptd->descs[i]);
-
- d = ptd->descs;
- buffers = ptd->buffers;
- sw_if_index[VLIB_RX] = ppif->sw_if_index;
- sw_if_index[VLIB_TX] = (u32) ~ 0;
- while (n_desc)
- {
- u32 n_left_to_next;
- vlib_buffer_t *b0, *b1;
- u32 bi0, bi1;
- u32 next0, next1;
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
- while (n_desc >= 4 && n_left_to_next >= 2)
- {
- /* prefetch */
- bi0 = buffers[0];
- bi1 = buffers[1];
- to_next[0] = bi0;
- to_next[1] = bi1;
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
-
- if (PREDICT_TRUE (ppif->per_interface_next_index == ~0))
- {
- n_rx_bytes += mrvl_pp2_next_from_desc (node, d, b0, &next0);
- n_rx_bytes += mrvl_pp2_next_from_desc (node, d + 1, b1, &next1);
- vnet_feature_start_device_input (ppif->sw_if_index, &next0, b0);
- vnet_feature_start_device_input (ppif->sw_if_index, &next1, b1);
- }
- else
- {
- n_rx_bytes += mrvl_pp2_set_buf_data_len_flags (b0, d, 0);
- n_rx_bytes += mrvl_pp2_set_buf_data_len_flags (b1, d + 1, 0);
- next0 = next1 = ppif->per_interface_next_index;
- }
-
- clib_memcpy_fast (vnet_buffer (b0)->sw_if_index, sw_if_index,
- sizeof (sw_if_index));
- clib_memcpy_fast (vnet_buffer (b1)->sw_if_index, sw_if_index,
- sizeof (sw_if_index));
-
- if (PREDICT_FALSE (n_trace > 0))
- {
- mrvl_pp2_input_trace (vm, node, next0, b0, &n_trace, ppif, d);
- if (n_trace > 0)
- mrvl_pp2_input_trace (vm, node, next1, b1, &n_trace, ppif,
- d + 1);
- }
-
- to_next += 2;
- n_left_to_next -= 2;
- d += 2;
- buffers += 2;
- n_desc -= 2;
-
- /* enqueue */
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
- n_left_to_next, bi0, bi1, next0,
- next1);
-
- }
- while (n_desc && n_left_to_next)
- {
- u32 bi0 = buffers[0];
- to_next[0] = bi0;
- b0 = vlib_get_buffer (vm, bi0);
-
- if (PREDICT_TRUE (ppif->per_interface_next_index == ~0))
- {
- n_rx_bytes += mrvl_pp2_next_from_desc (node, d, b0, &next0);
- vnet_feature_start_device_input (ppif->sw_if_index, &next0, b0);
- }
- else
- {
- n_rx_bytes += mrvl_pp2_set_buf_data_len_flags (b0, d, 0);
- next0 = ppif->per_interface_next_index;
- }
-
- clib_memcpy_fast (vnet_buffer (b0)->sw_if_index, sw_if_index,
- sizeof (sw_if_index));
-
- if (PREDICT_FALSE (n_trace > 0))
- mrvl_pp2_input_trace (vm, node, next0, b0, &n_trace, ppif, d);
-
- to_next += 1;
- n_left_to_next--;
- d++;
- buffers++;
- n_desc--;
-
- /* enqueue */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
- vlib_increment_combined_counter (vnm->
- interface_main.combined_sw_if_counters +
- VNET_INTERFACE_COUNTER_RX, thread_index,
- ppif->hw_if_index, n_rx_packets,
- n_rx_bytes);
-
- if (PREDICT_FALSE (pp2_bpool_get_num_buffs (inq->bpool, &n_bufs)))
- {
- vlib_error_count (vm, node->node_index,
- MRVL_PP2_INPUT_ERROR_BPOOL_GET_NUM_BUFFS, 1);
- goto done;
- }
-
- n_bufs = inq->size - n_bufs;
- while (n_bufs >= MRVL_PP2_BUFF_BATCH_SZ)
- {
- u16 n_alloc, i;
- struct buff_release_entry *e = ptd->bre;
- u32 *buffers = ptd->buffers;
-
- n_alloc = vlib_buffer_alloc (vm, ptd->buffers, MRVL_PP2_BUFF_BATCH_SZ);
- i = n_alloc;
-
- if (PREDICT_FALSE (n_alloc == 0))
- {
- vlib_error_count (vm, node->node_index,
- MRVL_PP2_INPUT_ERROR_BUFFER_ALLOC, 1);
- goto done;
- }
-
- while (i--)
- {
- u32 bi = buffers[0];
- vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- e->buff.addr = vlib_buffer_get_pa (vm, b) - 64;
- e->buff.cookie = bi;
- e->bpool = inq->bpool;
- e++;
- buffers++;
- }
-
- i = n_alloc;
- if (PREDICT_FALSE (pp2_bpool_put_buffs (ptd->hif, ptd->bre, &i)))
- {
- vlib_error_count (vm, node->node_index,
- MRVL_PP2_INPUT_ERROR_BPOOL_PUT_BUFFS, 1);
- vlib_buffer_free (vm, ptd->buffers, n_alloc);
- goto done;
- }
-
- if (PREDICT_FALSE (i != n_alloc))
- vlib_buffer_free (vm, ptd->buffers + i, n_alloc - i);
-
- n_bufs -= i;
- }
-
-done:
- return n_rx_packets;
-}
-
-uword
-mrvl_pp2_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- u32 n_rx = 0;
- mrvl_pp2_main_t *ppm = &mrvl_pp2_main;
- vnet_hw_if_rxq_poll_vector_t *pv;
-
- pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
-
- for (int i = 0; i < vec_len (pv); i++)
- {
- mrvl_pp2_if_t *ppif;
- ppif = vec_elt_at_index (ppm->interfaces, pv[i].dev_instance);
- if (ppif->flags & MRVL_PP2_IF_F_ADMIN_UP)
- n_rx +=
- mrvl_pp2_device_input_inline (vm, node, frame, ppif, pv[i].queue_id);
- }
- return n_rx;
-}
-
-VLIB_REGISTER_NODE (mrvl_pp2_input_node) = {
- .function = mrvl_pp2_input_fn,
- .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
- .name = "mrvl-pp2-input",
- .sibling_of = "device-input",
- .format_trace = format_mrvl_pp2_input_trace,
- .type = VLIB_NODE_TYPE_INPUT,
- .state = VLIB_NODE_STATE_POLLING,
- .n_errors = MRVL_PP2_INPUT_N_ERROR,
- .error_strings = mrvl_pp2_input_error_strings,
-};
-
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/marvell/pp2/output.c b/src/plugins/marvell/pp2/output.c
deleted file mode 100644
index 911b2f55a17..00000000000
--- a/src/plugins/marvell/pp2/output.c
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- *------------------------------------------------------------------
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <stdint.h>
-#include <net/if.h>
-#include <sys/ioctl.h>
-#include <sys/uio.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/devices.h>
-
-#include <marvell/pp2/pp2.h>
-
-uword
-mrvl_pp2_interface_tx (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
- mrvl_pp2_main_t *ppm = &mrvl_pp2_main;
- vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
- mrvl_pp2_if_t *ppif = pool_elt_at_index (ppm->interfaces, rd->dev_instance);
- u32 thread_index = vm->thread_index;
- mrvl_pp2_per_thread_data_t *ptd =
- vec_elt_at_index (ppm->per_thread_data, thread_index);
- u8 qid = thread_index;
- mrvl_pp2_outq_t *outq = vec_elt_at_index (ppif->outqs, qid);
- u32 *buffers = vlib_frame_vector_args (frame);
- u16 n_desc = frame->n_vectors, n_left = n_desc, n_sent = n_desc, n_done;
- struct pp2_ppio_desc *d;
- u16 mask = outq->size - 1;
-
- if (PREDICT_FALSE (pp2_ppio_get_num_outq_done (ppif->ppio, ptd->hif, qid,
- &n_done)))
- {
- n_done = 0;
- vlib_error_count (vm, node->node_index,
- MRVL_PP2_TX_ERROR_PPIO_GET_NUM_OUTQ_DONE, 1);
- }
-
- if (n_done)
- {
- u16 n_free = clib_min (n_done, outq->size - (outq->tail & mask));
- vlib_buffer_free (vm, outq->buffers + (outq->tail & mask), n_free);
- if (PREDICT_FALSE (n_free < n_done))
- vlib_buffer_free (vm, outq->buffers, n_done - n_free);
- outq->tail += n_done;
- }
-
- vec_validate_aligned (ptd->descs, n_left, CLIB_CACHE_LINE_BYTES);
- d = ptd->descs;
- while (n_left)
- {
- u32 bi0 = buffers[0];
- vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
- u64 paddr = vlib_buffer_get_pa (vm, b0);
-
- pp2_ppio_outq_desc_reset (d);
- pp2_ppio_outq_desc_set_phys_addr (d, paddr + b0->current_data);
- pp2_ppio_outq_desc_set_pkt_offset (d, 0);
- pp2_ppio_outq_desc_set_pkt_len (d, b0->current_length);
- d++;
- buffers++;
- n_left--;
- }
-
- if (pp2_ppio_send (ppif->ppio, ptd->hif, qid, ptd->descs, &n_sent))
- {
- n_sent = 0;
- vlib_error_count (vm, node->node_index, MRVL_PP2_TX_ERROR_PPIO_SEND, 1);
- }
-
- /* free unsent buffers */
- if (PREDICT_FALSE (n_sent != n_desc))
- {
- vlib_buffer_free (vm, vlib_frame_vector_args (frame) + n_sent,
- frame->n_vectors - n_sent);
- vlib_error_count (vm, node->node_index, MRVL_PP2_TX_ERROR_NO_FREE_SLOTS,
- frame->n_vectors - n_sent);
- }
-
- /* store buffer index for each enqueued packet into the ring
- so we can know what to free after packet is sent */
- if (n_sent)
- {
- u16 slot = outq->head & mask;
- buffers = vlib_frame_vector_args (frame);
- u16 n_copy = clib_min (outq->size - slot, n_sent);
-
- vlib_buffer_copy_indices (outq->buffers + slot, buffers, n_copy);
- if (PREDICT_FALSE (n_copy < n_sent))
- clib_memcpy_fast (outq->buffers, buffers + n_copy,
- (n_sent - n_copy) * sizeof (u32));
-
- outq->head += n_sent;
- }
-
- return n_sent;
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/marvell/pp2/pp2.c b/src/plugins/marvell/pp2/pp2.c
deleted file mode 100644
index 030ab9b4496..00000000000
--- a/src/plugins/marvell/pp2/pp2.c
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- *------------------------------------------------------------------
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/plugin/plugin.h>
-#include <marvell/pp2/pp2.h>
-#include <vnet/interface/rx_queue_funcs.h>
-
-/* size of DMA memory used by musdk (not used for buffers) */
-#define MV_SYS_DMA_MEM_SZ (2 << 20)
-/* number of HIFs reserved (first X) */
-#define NUM_HIFS_RSVD 4
-/* number of buffer pools reserved (first X) */
-#define NUM_BPOOLS_RSVD 7
-
-mrvl_pp2_main_t mrvl_pp2_main;
-extern vnet_device_class_t ppa2_device_class;
-
-static void
-mrvl_pp2_main_deinit ()
-{
- mrvl_pp2_main_t *ppm = &mrvl_pp2_main;
- int i;
- vec_foreach_index (i, ppm->per_thread_data)
- {
- mrvl_pp2_per_thread_data_t *ptd = vec_elt_at_index (ppm->per_thread_data,
- i);
- if (ptd->hif)
- pp2_hif_deinit (ptd->hif);
- vec_free (ptd->descs);
- }
- vec_free (ppm->per_thread_data);
- pp2_deinit ();
- mv_sys_dma_mem_destroy ();
-}
-
-static clib_error_t *
-mrvl_pp2_main_init ()
-{
- mrvl_pp2_main_t *ppm = &mrvl_pp2_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- clib_error_t *err = 0;
- struct pp2_init_params init_params = { 0 };
- int i, rv;
- u8 *s = 0;
-
- rv = mv_sys_dma_mem_init (MV_SYS_DMA_MEM_SZ);
- if (rv)
- return clib_error_return (0, "mv_sys_dma_mem_init failed, rv = %u", rv);
-
- init_params.hif_reserved_map = ((1 << NUM_HIFS_RSVD) - 1);
- init_params.bm_pool_reserved_map = ((1 << NUM_BPOOLS_RSVD) - 1);
- rv = pp2_init (&init_params);
- if (rv)
- {
- err = clib_error_return (0, "mrvl_pp2_init failed, rv = %u", rv);
- goto done;
- }
-
- vec_validate_aligned (ppm->per_thread_data, tm->n_vlib_mains - 1,
- CLIB_CACHE_LINE_BYTES);
-
- vec_foreach_index (i, ppm->per_thread_data)
- {
- mrvl_pp2_per_thread_data_t *ptd = vec_elt_at_index (ppm->per_thread_data,
- i);
- struct pp2_hif_params hif_params = { 0 };
- vec_reset_length (s);
- s = format (s, "hif-%d%c", NUM_HIFS_RSVD + i, 0);
- hif_params.match = (char *) s;
- hif_params.out_size = 2048; /* FIXME */
- if (pp2_hif_init (&hif_params, &ptd->hif))
- {
- err = clib_error_return (0, "hif '%s' init failed", s);
- goto done;
- }
- }
-
-done:
- if (err)
- mrvl_pp2_main_deinit ();
- vec_free (s);
- return err;
-}
-
-static u32
-mrvl_pp2_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi,
- u32 flags)
-{
- /* nothing for now */
- return 0;
-}
-
-void
-mrvl_pp2_delete_if (mrvl_pp2_if_t * ppif)
-{
- vlib_main_t *vm = vlib_get_main ();
- vnet_main_t *vnm = vnet_get_main ();
- mrvl_pp2_main_t *ppm = &mrvl_pp2_main;
- mrvl_pp2_outq_t *outq;
- mrvl_pp2_inq_t *inq;
-
- if (ppif->hw_if_index != ~0)
- ethernet_delete_interface (vnm, ppif->hw_if_index);
-
- if (ppif->ppio)
- {
- pp2_ppio_disable (ppif->ppio);
- pp2_ppio_deinit (ppif->ppio);
- }
-
- /* free buffers hanging in the tx ring */
- vec_foreach (outq, ppif->outqs)
- {
- while (outq->tail < outq->head)
- {
- u16 slot = outq->tail & (outq->size - 1);
- vlib_buffer_free (vm, outq->buffers + slot, 1);
- outq->tail++;
- }
- vec_free (outq->buffers);
- }
- vec_free (ppif->outqs);
-
- /* free buffers hangin in the rx buffer pool */
- vec_foreach (inq, ppif->inqs)
- if (inq->bpool)
- {
- u32 n_bufs = 0;
- pp2_bpool_get_num_buffs (inq->bpool, &n_bufs);
- while (n_bufs--)
- {
- struct pp2_buff_inf binf;
- if (pp2_bpool_get_buff (ppm->per_thread_data[0].hif, inq->bpool,
- &binf) == 0)
- {
- u32 bi = binf.cookie;
- vlib_buffer_free (vm, &bi, 1);
- }
- }
- pp2_bpool_deinit (inq->bpool);
- }
- vec_free (ppif->inqs);
-
-
- pool_put (ppm->interfaces, ppif);
-
- if (pool_elts (ppm->interfaces) == 0)
- mrvl_pp2_main_deinit ();
-}
-
-void
-mrvl_pp2_create_if (mrvl_pp2_create_if_args_t * args)
-{
- vlib_main_t *vm = vlib_get_main ();
- vnet_main_t *vnm = vnet_get_main ();
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- vnet_eth_interface_registration_t eir = {};
- mrvl_pp2_main_t *ppm = &mrvl_pp2_main;
- struct pp2_bpool_params bpool_params = { 0 };
- struct pp2_ppio_params ppio_params = { 0 };
- struct pp2_ppio_inq_params inq_params = { 0 };
- vnet_sw_interface_t *sw;
- mrvl_pp2_if_t *ppif = 0;
- u8 pp2_id, port_id, *s = 0;
- eth_addr_t mac_addr;
- u8 n_outqs, n_inqs = 1;
- int i;
-
- if (tm->n_vlib_mains > PP2_PPIO_MAX_NUM_OUTQS)
- {
- args->rv = VNET_API_ERROR_INIT_FAILED;
- args->error = clib_error_return (0, "number of threads (main + workers)"
- " is bigger than number of output "
- "queues (%u)", PP2_PPIO_MAX_NUM_OUTQS);
- return;
- }
- n_outqs = tm->n_vlib_mains;
-
- /* defaults */
- args->tx_q_sz = args->tx_q_sz ? args->tx_q_sz : 2 * VLIB_FRAME_SIZE;
- args->rx_q_sz = args->rx_q_sz ? args->rx_q_sz : 2 * VLIB_FRAME_SIZE;
-
- if (vec_len (ppm->per_thread_data) == 0)
- {
- if ((args->error = mrvl_pp2_main_init ()) != 0)
- {
- args->rv = VNET_API_ERROR_INIT_FAILED;
- return;
- }
- }
-
- pool_get_zero (ppm->interfaces, ppif);
- ppif->dev_instance = ppif - ppm->interfaces;
- ppif->hw_if_index = ~0;
- vec_validate_aligned (ppif->inqs, n_inqs - 1, CLIB_CACHE_LINE_BYTES);
- vec_validate_aligned (ppif->outqs, n_outqs - 1, CLIB_CACHE_LINE_BYTES);
-
- for (i = 0; i < n_inqs; i++)
- {
- mrvl_pp2_inq_t *inq = vec_elt_at_index (ppif->inqs, i);
- inq->size = args->rx_q_sz;
- }
- for (i = 0; i < n_outqs; i++)
- {
- mrvl_pp2_outq_t *outq = vec_elt_at_index (ppif->outqs, i);
- outq->size = args->tx_q_sz;
- vec_validate_aligned (outq->buffers, outq->size, CLIB_CACHE_LINE_BYTES);
- }
-
- if (pp2_netdev_get_ppio_info ((char *) args->name, &pp2_id, &port_id))
- {
- args->rv = VNET_API_ERROR_INVALID_INTERFACE;
- args->error = clib_error_return (0, "Invalid interface '%s'",
- args->name);
- goto error;
- }
-
- /* FIXME bpool bit select per pp */
- s = format (s, "pool-%d:%d%c", pp2_id, pp2_id + 8, 0);
- bpool_params.match = (char *) s;
- bpool_params.buff_len = vlib_buffer_get_default_data_size (vm);
- /* FIXME +64 ? */
- if (pp2_bpool_init (&bpool_params, &ppif->inqs[0].bpool))
- {
- args->rv = VNET_API_ERROR_INIT_FAILED;
- args->error = clib_error_return (0, "bpool '%s' init failed", s);
- goto error;
- }
- vec_reset_length (s);
-
- s = format (s, "ppio-%d:%d%c", pp2_id, port_id, 0);
- ppio_params.match = (char *) s;
- ppio_params.type = PP2_PPIO_T_NIC;
- inq_params.size = args->rx_q_sz;
- ppio_params.inqs_params.num_tcs = 1;
- ppio_params.inqs_params.tcs_params[0].pkt_offset = 0;
- ppio_params.inqs_params.tcs_params[0].num_in_qs = n_inqs;
- ppio_params.inqs_params.tcs_params[0].inqs_params = &inq_params;
- ppio_params.inqs_params.tcs_params[0].pools[0][0] = ppif->inqs[0].bpool;
- ppio_params.outqs_params.num_outqs = n_outqs;
- for (i = 0; i < n_outqs; i++)
- {
- ppio_params.outqs_params.outqs_params[i].weight = 1;
- ppio_params.outqs_params.outqs_params[i].size = args->tx_q_sz;
- }
- if (pp2_ppio_init (&ppio_params, &ppif->ppio))
- {
- args->rv = VNET_API_ERROR_INIT_FAILED;
- args->error = clib_error_return (0, "ppio '%s' init failed", s);
- goto error;
- }
- vec_reset_length (s);
-
- if (pp2_ppio_get_mac_addr (ppif->ppio, mac_addr))
- {
- args->rv = VNET_API_ERROR_INIT_FAILED;
- args->error =
- clib_error_return (0, "%s: pp2_ppio_get_mac_addr failed", s);
- goto error;
- }
-
- eir.dev_class_index = mrvl_pp2_device_class.index;
- eir.dev_instance = ppif->dev_instance;
- eir.address = mac_addr;
- eir.cb.flag_change = mrvl_pp2_eth_flag_change;
- ppif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
-
- sw = vnet_get_hw_sw_interface (vnm, ppif->hw_if_index);
- ppif->sw_if_index = sw->sw_if_index;
- ppif->per_interface_next_index = ~0;
- args->sw_if_index = sw->sw_if_index;
- vnet_hw_if_set_input_node (vnm, ppif->hw_if_index,
- mrvl_pp2_input_node.index);
- /* FIXME: only one RX queue ? */
- ppif->inqs[0].queue_index = vnet_hw_if_register_rx_queue (
- vnm, ppif->hw_if_index, 0, VNET_HW_IF_RXQ_THREAD_ANY);
-
- vnet_hw_if_set_rx_queue_mode (vnm, ppif->inqs[0].queue_index,
- VNET_HW_IF_RX_MODE_POLLING);
- vnet_hw_if_update_runtime_data (vnm, ppif->hw_if_index);
- vnet_hw_interface_set_flags (vnm, ppif->hw_if_index,
- VNET_HW_INTERFACE_FLAG_LINK_UP);
- goto done;
-
-error:
- mrvl_pp2_delete_if (ppif);
-done:
- vec_free (s);
-}
-
-static clib_error_t *
-mrvl_pp2_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
- u32 flags)
-{
- mrvl_pp2_main_t *ppm = &mrvl_pp2_main;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- mrvl_pp2_if_t *ppif = pool_elt_at_index (ppm->interfaces, hw->dev_instance);
- static clib_error_t *error = 0;
- int is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
- int rv;
-
- if (is_up)
- rv = pp2_ppio_enable (ppif->ppio);
- else
- rv = pp2_ppio_disable (ppif->ppio);
-
- if (rv)
- return clib_error_return (0, "failed to %s interface",
- is_up ? "enable" : "disable");
-
- if (is_up)
- ppif->flags |= MRVL_PP2_IF_F_ADMIN_UP;
- else
- ppif->flags &= ~MRVL_PP2_IF_F_ADMIN_UP;
-
- return error;
-}
-
-static void
-mrvl_pp2_clear_interface_counters (u32 instance)
-{
- mrvl_pp2_main_t *ppm = &mrvl_pp2_main;
- mrvl_pp2_if_t *ppif = pool_elt_at_index (ppm->interfaces, instance);
- struct pp2_ppio_statistics stats;
-
- pp2_ppio_get_statistics (ppif->ppio, &stats, 1);
-}
-
-static void
-mrvl_pp2_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
- u32 node_index)
-{
- mrvl_pp2_main_t *ppm = &mrvl_pp2_main;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- mrvl_pp2_if_t *ppif = pool_elt_at_index (ppm->interfaces, hw->dev_instance);
-
- /* Shut off redirection */
- if (node_index == ~0)
- {
- ppif->per_interface_next_index = node_index;
- return;
- }
-
- ppif->per_interface_next_index =
- vlib_node_add_next (vlib_get_main (), mrvl_pp2_input_node.index,
- node_index);
-}
-
-static char *mrvl_pp2_tx_func_error_strings[] = {
-#define _(n,s) s,
- foreach_mrvl_pp2_tx_func_error
-#undef _
-};
-
-VNET_DEVICE_CLASS (mrvl_pp2_device_class,) =
-{
- .name = "Marvell PPv2 interface",
- .format_device_name = format_mrvl_pp2_interface_name,
- .format_device = format_mrvl_pp2_interface,
- .tx_function = mrvl_pp2_interface_tx,
- .tx_function_n_errors = MRVL_PP2_TX_N_ERROR,
- .tx_function_error_strings = mrvl_pp2_tx_func_error_strings,
- .admin_up_down_function = mrvl_pp2_interface_admin_up_down,
- .clear_counters = mrvl_pp2_clear_interface_counters,
- .rx_redirect_to_node = mrvl_pp2_set_interface_next_node,
-};
-
-static clib_error_t *
-mrvl_pp2_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-VLIB_INIT_FUNCTION (mrvl_pp2_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/marvell/pp2/pp2.h b/src/plugins/marvell/pp2/pp2.h
deleted file mode 100644
index abb8e573a37..00000000000
--- a/src/plugins/marvell/pp2/pp2.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/*
- *------------------------------------------------------------------
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#define MVCONF_DBG_LEVEL 0
-#define MVCONF_PP2_BPOOL_COOKIE_SIZE 32
-#define MVCONF_PP2_BPOOL_DMA_ADDR_SIZE 64
-#define MVCONF_DMA_PHYS_ADDR_T_SIZE 64
-#define MVCONF_SYS_DMA_UIO
-#define MVCONF_TYPES_PUBLIC
-#define MVCONF_DMA_PHYS_ADDR_T_PUBLIC
-
-#include <vlib/vlib.h>
-
-#include "mv_std.h"
-#include "env/mv_sys_dma.h"
-#include "drivers/mv_pp2.h"
-#include <drivers/mv_pp2_bpool.h>
-#include <drivers/mv_pp2_ppio.h>
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u16 size;
- u32 queue_index;
- struct pp2_bpool *bpool;
-} mrvl_pp2_inq_t;
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u16 size;
- u32 *buffers;
- u16 head;
- u16 tail;
-} mrvl_pp2_outq_t;
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u32 flags;
-#define MRVL_PP2_IF_F_ADMIN_UP (1 << 0)
- struct pp2_ppio *ppio;
- u32 per_interface_next_index;
-
- mrvl_pp2_inq_t *inqs;
- mrvl_pp2_outq_t *outqs;
-
- u32 dev_instance;
- u32 sw_if_index;
- u32 hw_if_index;
-} mrvl_pp2_if_t;
-
-#define MRVL_PP2_BUFF_BATCH_SZ VLIB_FRAME_SIZE
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- struct pp2_hif *hif;
- struct pp2_ppio_desc *descs;
- struct buff_release_entry bre[MRVL_PP2_BUFF_BATCH_SZ];
- u32 buffers[VLIB_FRAME_SIZE];
-} mrvl_pp2_per_thread_data_t;
-
-typedef struct
-{
- mrvl_pp2_if_t *interfaces;
- mrvl_pp2_per_thread_data_t *per_thread_data;
-
- /* API message ID base */
- u16 msg_id_base;
-} mrvl_pp2_main_t;
-
-extern vnet_device_class_t mrvl_pp2_device_class;
-extern mrvl_pp2_main_t mrvl_pp2_main;
-
-typedef struct
-{
- u8 *name;
- u16 rx_q_sz;
- u16 tx_q_sz;
-
- /* return */
- i32 rv;
- u32 sw_if_index;
- clib_error_t *error;
-} mrvl_pp2_create_if_args_t;
-
-void mrvl_pp2_create_if (mrvl_pp2_create_if_args_t * args);
-void mrvl_pp2_delete_if (mrvl_pp2_if_t * dfif);
-clib_error_t *mrvl_pp2_plugin_api_hookup (vlib_main_t * vm);
-
-/* output.c */
-
-#define foreach_mrvl_pp2_tx_func_error \
- _(NO_FREE_SLOTS, "no free tx slots") \
- _(PPIO_SEND, "pp2_ppio_send errors") \
- _(PPIO_GET_NUM_OUTQ_DONE, "pp2_ppio_get_num_outq_done errors")
-
-typedef enum
-{
-#define _(f,s) MRVL_PP2_TX_ERROR_##f,
- foreach_mrvl_pp2_tx_func_error
-#undef _
- MRVL_PP2_TX_N_ERROR,
-} mrvl_pp2_tx_func_error_t;
-
-uword mrvl_pp2_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame);
-
-/* input.c */
-
-typedef struct
-{
- u32 next_index;
- u32 hw_if_index;
- struct pp2_ppio_desc desc;
-} mrvl_pp2_input_trace_t;
-
-extern vlib_node_registration_t mrvl_pp2_input_node;
-
-/* format.c */
-format_function_t format_mrvl_pp2_input_trace;
-format_function_t format_mrvl_pp2_interface;
-format_function_t format_mrvl_pp2_interface_name;
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/marvell/pp2/pp2_api.c b/src/plugins/marvell/pp2/pp2_api.c
deleted file mode 100644
index c1f3a9e1d1d..00000000000
--- a/src/plugins/marvell/pp2/pp2_api.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- *------------------------------------------------------------------
- * Copyright (c) 2019 Arm Limited.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ethernet/ethernet.h>
-
-#include <marvell/pp2/pp2.h>
-
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
-
-/* define message IDs */
-#include <marvell/pp2/pp2.api_enum.h>
-#include <marvell/pp2/pp2.api_types.h>
-
-#define REPLY_MSG_ID_BASE (pp2->msg_id_base)
-#include <vlibapi/api_helper_macros.h>
-
-static void
-vl_api_mrvl_pp2_create_t_handler (vl_api_mrvl_pp2_create_t * mp)
-{
- mrvl_pp2_main_t *pp2 = &mrvl_pp2_main;
- mrvl_pp2_create_if_args_t args = { 0 };
- vl_api_mrvl_pp2_create_reply_t *rmp;
- int rv;
-
- args.name = format (0, "%s", mp->if_name);
- args.rx_q_sz = ntohs (mp->rx_q_sz);
- args.tx_q_sz = ntohs (mp->tx_q_sz);
- mrvl_pp2_create_if (&args);
- rv = args.rv;
- vec_free (args.name);
- if (args.error)
- {
- clib_error_free (args.error);
- }
- REPLY_MACRO2 (VL_API_MRVL_PP2_CREATE_REPLY,
- ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
-}
-
-static void
-vl_api_mrvl_pp2_delete_t_handler (vl_api_mrvl_pp2_delete_t * mp)
-{
- vnet_main_t *vnm = vnet_get_main ();
- vnet_hw_interface_t *hw;
- mrvl_pp2_main_t *pp2 = &mrvl_pp2_main;
- vl_api_mrvl_pp2_delete_reply_t *rmp;
- mrvl_pp2_if_t *dif;
- int rv = 0;
- mp->sw_if_index = ntohl (mp->sw_if_index);
- hw = vnet_get_sup_hw_interface (vnm, mp->sw_if_index);
- if (hw == NULL || mrvl_pp2_device_class.index != hw->dev_class_index)
- {
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto reply;
- }
-
- dif = pool_elt_at_index (pp2->interfaces, hw->dev_instance);
-
- mrvl_pp2_delete_if (dif);
-
-reply:
- REPLY_MACRO (VL_API_MRVL_PP2_DELETE_REPLY);
-}
-
-#include <marvell/pp2/pp2.api.c>
-/* set up the API message handling tables */
-clib_error_t *
-mrvl_pp2_plugin_api_hookup (vlib_main_t * vm)
-{
- mrvl_pp2_main_t *pp2 = &mrvl_pp2_main;
-
- /* ask for a correctly-sized block of API message decode slots */
- pp2->msg_id_base = setup_message_id_table ();
-
- return 0;
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/marvell/pp2/pp2_test.c b/src/plugins/marvell/pp2/pp2_test.c
deleted file mode 100644
index 26a9e9a6e34..00000000000
--- a/src/plugins/marvell/pp2/pp2_test.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- *------------------------------------------------------------------
- * Copyright (c) 2019 Arm Limited.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ethernet/ethernet.h>
-
-#include <vat/vat.h>
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
-
-#include <vppinfra/error.h>
-#include <marvell/pp2/pp2.h>
-
-#define __plugin_msg_base pp2_test_main.msg_id_base
-#include <vlibapi/vat_helper_macros.h>
-
-/* declare message IDs */
-#include <marvell/pp2/pp2.api_enum.h>
-#include <marvell/pp2/pp2.api_types.h>
-
-typedef struct
-{
- /* API message ID base */
- u16 msg_id_base;
- vat_main_t *vat_main;
-} pp2_test_main_t;
-
-pp2_test_main_t pp2_test_main;
-
-/* mrvl_pp2 create API */
-static int
-api_mrvl_pp2_create (vat_main_t * vam)
-{
- unformat_input_t *i = vam->input;
- vl_api_mrvl_pp2_create_t *mp;
- mrvl_pp2_create_if_args_t args;
- int ret;
- u16 size;
-
- clib_memset (&args, 0, sizeof (mrvl_pp2_create_if_args_t));
- while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (i, "name %s", &args.name))
- ;
- else if (unformat (i, "rx-queue-size %u", &size))
- args.rx_q_sz = size;
- else if (unformat (i, "tx-queue-size %u", &size))
- args.tx_q_sz = size;
- else
- {
- clib_warning ("unknown input '%U'", format_unformat_error, i);
- return -99;
- }
- }
-
- M (MRVL_PP2_CREATE, mp);
-
- strncpy_s ((char *) mp->if_name, ARRAY_LEN (mp->if_name),
- (char *) (args.name), strlen ((char *) args.name));
- mp->rx_q_sz = clib_host_to_net_u16 (args.rx_q_sz);
- mp->tx_q_sz = clib_host_to_net_u16 (args.tx_q_sz);
-
- S (mp);
- W (ret);
-
- vec_free (args.name);
-
- return ret;
-}
-
-/* mrvl_pp2 create reply handler */
-static void
-vl_api_mrvl_pp2_create_reply_t_handler (vl_api_mrvl_pp2_create_reply_t * mp)
-{
- vat_main_t *vam = pp2_test_main.vat_main;
- i32 retval = ntohl (mp->retval);
-
- if (retval == 0)
- {
- fformat (vam->ofp, "created mrvl_pp2 with sw_if_index %d\n",
- ntohl (mp->sw_if_index));
- }
-
- vam->retval = retval;
- vam->result_ready = 1;
- vam->regenerate_interface_table = 1;
-}
-
-
-/* mrvl_pp2 delete API */
-static int
-api_mrvl_pp2_delete (vat_main_t * vam)
-{
- unformat_input_t *i = vam->input;
- //vnet_main_t *vnm = vnet_get_main ();
- vl_api_mrvl_pp2_delete_t *mp;
- u32 sw_if_index = 0;
- int ret;
-
- while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (i, "sw_if_index %d", &sw_if_index))
- ;
- else
- {
- clib_warning ("unknown input '%U'", format_unformat_error, i);
- return -99;
- }
- }
-
- M (MRVL_PP2_DELETE, mp);
-
- mp->sw_if_index = clib_host_to_net_u32 (sw_if_index);
-
- S (mp);
- W (ret);
-
- return ret;
-}
-
-#include <marvell/pp2/pp2.api_test.c>
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_api.c b/src/plugins/nat/nat44-ed/nat44_ed_api.c
index 1f01410afce..b6c9d51d777 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_api.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_api.c
@@ -442,7 +442,8 @@ send_nat44_ed_output_interface_details (u32 index, vl_api_registration_t *rp,
/* Endian hack until apigen registers _details
* endian functions */
- vl_api_nat44_ed_output_interface_details_t_endian (rmp);
+ vl_api_nat44_ed_output_interface_details_t_endian (rmp,
+ 1 /* to network */);
rmp->_vl_msg_id = htons (rmp->_vl_msg_id);
rmp->context = htonl (rmp->context);
}));
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_api.c b/src/plugins/nat/nat44-ei/nat44_ei_api.c
index 8671a556929..454a5032c6a 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_api.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_api.c
@@ -751,7 +751,8 @@ send_nat44_ei_output_interface_details (u32 index, vl_api_registration_t *rp,
/* Endian hack until apigen registers _details
* endian functions */
- vl_api_nat44_ei_output_interface_details_t_endian (rmp);
+ vl_api_nat44_ei_output_interface_details_t_endian (rmp,
+ 1 /* to network */);
rmp->_vl_msg_id = htons (rmp->_vl_msg_id);
rmp->context = htonl (rmp->context);
}));
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c
index 01b333a5234..3b981d69986 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c
@@ -859,7 +859,7 @@ nat44_ei_icmp_in2out (vlib_buffer_t *b0, ip4_header_t *ip0,
nat44_ei_main_t *nm = &nat44_ei_main;
vlib_main_t *vm = vlib_get_main ();
ip4_address_t addr;
- u16 port;
+ u16 port = 0;
u32 fib_index;
nat_protocol_t proto;
icmp_echo_header_t *echo0, *inner_echo0 = 0;
diff --git a/src/plugins/nat/pnat/pnat_api.c b/src/plugins/nat/pnat/pnat_api.c
index 02e61219d1e..a4e7ff192bf 100644
--- a/src/plugins/nat/pnat/pnat_api.c
+++ b/src/plugins/nat/pnat/pnat_api.c
@@ -116,7 +116,8 @@ static void send_bindings_details(u32 index, vl_api_registration_t *rp,
/* Endian hack until apigen registers _details
* endian functions */
- vl_api_pnat_bindings_details_t_endian(rmp);
+ vl_api_pnat_bindings_details_t_endian(
+ rmp, 1 /* to network */);
rmp->_vl_msg_id = htons(rmp->_vl_msg_id);
rmp->context = htonl(rmp->context);
}));
@@ -158,7 +159,7 @@ static void send_interfaces_details(u32 index, vl_api_registration_t *rp,
/* Endian hack until apigen registers _details
* endian functions */
- vl_api_pnat_interfaces_details_t_endian(rmp);
+ vl_api_pnat_interfaces_details_t_endian(rmp, 1 /* to network */);
rmp->_vl_msg_id = htons(rmp->_vl_msg_id);
rmp->context = htonl(rmp->context);
}));
diff --git a/src/plugins/netmap/CMakeLists.txt b/src/plugins/netmap/CMakeLists.txt
new file mode 100644
index 00000000000..d53a9e0911a
--- /dev/null
+++ b/src/plugins/netmap/CMakeLists.txt
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (c) 2024 Tom Jones <thj@freebsd.org>
+#
+# This software was developed by Tom Jones <thj@freebsd.org> under sponsorship
+# from the FreeBSD Foundation.
+#
+
+if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD")
+ message(WARNING "Netmap is only currently support on FreeBSD - netmap plugin disabled")
+ return()
+endif()
+
+add_vpp_plugin(netmap
+ SOURCES
+ plugin.c
+ netmap.c
+ node.c
+ device.c
+ cli.c
+ netmap_api.c
+
+ MULTIARCH_SOURCES
+ node.c
+ device.c
+
+ INSTALL_HEADERS
+ netmap.h
+ net_netmap.h
+
+ API_FILES
+ netmap.api
+)
diff --git a/src/plugins/netmap/FEATURE.yaml b/src/plugins/netmap/FEATURE.yaml
new file mode 100644
index 00000000000..a9dfb2163e4
--- /dev/null
+++ b/src/plugins/netmap/FEATURE.yaml
@@ -0,0 +1,12 @@
+---
+name: Netmap Device
+maintainer: Tom Jones <thj@freebsd.org>
+features:
+ - L4 checksum offload
+description: "Create a netmap interface, which is a high speed user-space
+ interface that allows VPP to patch to a physical or virtual NIC
+ without the use of DPDK"
+missing:
+ - API dump
+state: production
+properties: [API, CLI, STATS, MULTITHREAD]
diff --git a/src/plugins/netmap/cli.c b/src/plugins/netmap/cli.c
new file mode 100644
index 00000000000..b54d397ecbe
--- /dev/null
+++ b/src/plugins/netmap/cli.c
@@ -0,0 +1,236 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <netmap/net_netmap.h>
+#include <netmap/netmap.h>
+
+static clib_error_t *
+netmap_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 *host_if_name = NULL;
+ u8 hwaddr[6];
+ u8 *hw_addr_ptr = 0;
+ int r;
+ u8 is_pipe = 0;
+ u8 is_master = 0;
+ u32 sw_if_index = ~0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &host_if_name))
+ ;
+ else
+ if (unformat
+ (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr))
+ hw_addr_ptr = hwaddr;
+ else if (unformat (line_input, "pipe"))
+ is_pipe = 1;
+ else if (unformat (line_input, "master"))
+ is_master = 1;
+ else if (unformat (line_input, "slave"))
+ is_master = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (host_if_name == NULL)
+ {
+ error = clib_error_return (0, "missing host interface name");
+ goto done;
+ }
+
+ r =
+ netmap_create_if (vm, host_if_name, hw_addr_ptr, is_pipe, is_master,
+ &sw_if_index);
+
+ if (r == VNET_API_ERROR_SYSCALL_ERROR_1)
+ {
+ error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno);
+ goto done;
+ }
+
+ if (r == VNET_API_ERROR_INVALID_INTERFACE)
+ {
+ error = clib_error_return (0, "Invalid interface name");
+ goto done;
+ }
+
+ if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS)
+ {
+ error = clib_error_return (0, "Interface already exists");
+ goto done;
+ }
+
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * '<em>netmap</em>' is a framework for very fast packet I/O from userspace.
+ * '<em>VALE</em>' is an equally fast in-kernel software switch using the
+ * netmap API. '<em>netmap</em>' includes '<em>netmap pipes</em>', a shared
+ * memory packet transport channel. Together, they provide a high speed
+ * user-space interface that allows VPP to patch into a linux namespace, a
+ * linux container, or a physical NIC without the use of DPDK. Netmap/VALE
+ * generates the '<em>netmap.ko</em>' kernel module that needs to be loaded
+ * before netmap interfaces can be created.
+ * - https://github.com/luigirizzo/netmap - Netmap/VALE repo.
+ * - https://github.com/vpp-dev/netmap - VPP development package for Netmap/VALE,
+ * which is a snapshot of the Netmap/VALE repo with minor changes to work
+ * with containers and modified kernel drivers to work with NICs.
+ *
+ * Create a netmap interface that will attach to a linux interface.
+ * The interface must already exist. Once created, a new netmap interface
+ * will exist in VPP with the name '<em>netmap-<ifname></em>', where
+ * '<em><ifname></em>' takes one of two forms:
+ * - <b>ifname</b> - Linux interface to bind too.
+ * - <b>valeXXX:YYY</b> -
+ * - Where '<em>valeXXX</em>' is an arbitrary name for a VALE
+ * interface that must start with '<em>vale</em>' and is less
+ * than 16 characters.
+ * - Where '<em>YYY</em>' is an existing linux namespace.
+ *
+ * This command has the following optional parameters:
+ *
+ * - <b>hw-addr <mac-addr></b> - Optional ethernet address, can be in either
+ * X:X:X:X:X:X unix or X.X.X cisco format.
+ *
+ * - <b>pipe</b> - Optional flag to indicate that a '<em>netmap pipe</em>'
+ * instance should be created.
+ *
+ * - <b>master | slave</b> - Optional flag to indicate whether VPP should
+ * be the master or slave of the '<em>netmap pipe</em>'. Only considered
+ * if '<em>pipe</em>' is entered. Defaults to '<em>slave</em>' if not entered.
+ *
+ * @cliexpar
+ * Example of how to create a netmap interface tied to the linux
+ * namespace '<em>vpp1</em>':
+ * @cliexstart{create netmap name vale00:vpp1 hw-addr 02:FE:3F:34:15:9B pipe master}
+ * netmap-vale00:vpp1
+ * @cliexend
+ * Once the netmap interface is created, enable the interface using:
+ * @cliexcmd{set interface state netmap-vale00:vpp1 up}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (netmap_create_command, static) = {
+ .path = "create netmap",
+ .short_help = "create netmap name <ifname>|valeXXX:YYY "
+ "[hw-addr <mac-addr>] [pipe] [master|slave]",
+ .function = netmap_create_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+netmap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 *host_if_name = NULL;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &host_if_name))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (host_if_name == NULL)
+ {
+ error = clib_error_return (0, "missing host interface name");
+ goto done;
+ }
+
+ netmap_delete_if (vm, host_if_name);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Delete a netmap interface. Use the '<em><ifname></em>' to identify
+ * the netmap interface to be deleted. In VPP, netmap interfaces are
+ * named as '<em>netmap-<ifname></em>', where '<em><ifname></em>'
+ * takes one of two forms:
+ * - <b>ifname</b> - Linux interface to bind too.
+ * - <b>valeXXX:YYY</b> -
+ * - Where '<em>valeXXX</em>' is an arbitrary name for a VALE
+ * interface that must start with '<em>vale</em>' and is less
+ * than 16 characters.
+ * - Where '<em>YYY</em>' is an existing linux namespace.
+ *
+ * @cliexpar
+ * Example of how to delete a netmap interface named '<em>netmap-vale00:vpp1</em>':
+ * @cliexcmd{delete netmap name vale00:vpp1}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (netmap_delete_command, static) = {
+ .path = "delete netmap",
+ .short_help = "delete netmap name <ifname>|valeXXX:YYY",
+ .function = netmap_delete_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+netmap_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (netmap_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/netmap/device.c b/src/plugins/netmap/device.c
new file mode 100644
index 00000000000..505deb988c4
--- /dev/null
+++ b/src/plugins/netmap/device.c
@@ -0,0 +1,252 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <netmap/net_netmap.h>
+#include <netmap/netmap.h>
+
+#define foreach_netmap_tx_func_error \
+_(NO_FREE_SLOTS, "no free tx slots") \
+_(PENDING_MSGS, "pending msgs in tx ring")
+
+typedef enum
+{
+#define _(f,s) NETMAP_TX_ERROR_##f,
+ foreach_netmap_tx_func_error
+#undef _
+ NETMAP_TX_N_ERROR,
+} netmap_tx_func_error_t;
+
+static char *netmap_tx_func_error_strings[] = {
+#define _(n,s) s,
+ foreach_netmap_tx_func_error
+#undef _
+};
+
+
+static u8 *
+format_netmap_device_name (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ netmap_main_t *apm = &netmap_main;
+ netmap_if_t *nif = pool_elt_at_index (apm->interfaces, i);
+
+ s = format (s, "netmap-%s", nif->host_if_name);
+ return s;
+}
+
+static u8 *
+format_netmap_device (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ int verbose = va_arg (*args, int);
+ netmap_main_t *nm = &netmap_main;
+ netmap_if_t *nif = vec_elt_at_index (nm->interfaces, dev_instance);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "NETMAP interface");
+ if (verbose)
+ {
+ s = format (s, "\n%U version %d flags 0x%x"
+ "\n%U region %u memsize 0x%x offset 0x%x"
+ "\n%U tx_slots %u rx_slots %u tx_rings %u rx_rings %u",
+ format_white_space, indent + 2,
+ nif->req->nr_version,
+ nif->req->nr_flags,
+ format_white_space, indent + 2,
+ nif->mem_region,
+ nif->req->nr_memsize,
+ nif->req->nr_offset,
+ format_white_space, indent + 2,
+ nif->req->nr_tx_slots,
+ nif->req->nr_rx_slots,
+ nif->req->nr_tx_rings, nif->req->nr_rx_rings);
+ }
+ return s;
+}
+
+static u8 *
+format_netmap_tx_trace (u8 * s, va_list * args)
+{
+ s = format (s, "Unimplemented...");
+ return s;
+}
+
+VNET_DEVICE_CLASS_TX_FN (netmap_device_class) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ netmap_main_t *nm = &netmap_main;
+ u32 *buffers = vlib_frame_vector_args (frame);
+ u32 n_left = frame->n_vectors;
+ f64 const time_constant = 1e3;
+ vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
+ netmap_if_t *nif = pool_elt_at_index (nm->interfaces, rd->dev_instance);
+ int cur_ring;
+
+ clib_spinlock_lock_if_init (&nif->lockp);
+
+ cur_ring = nif->first_tx_ring;
+
+ while (n_left && cur_ring <= nif->last_tx_ring)
+ {
+ struct netmap_ring *ring = NETMAP_TXRING (nif->nifp, cur_ring);
+ int n_free_slots = nm_ring_space (ring);
+ uint cur = ring->cur;
+
+ if (nm_tx_pending (ring))
+ {
+ if (ioctl (nif->fd, NIOCTXSYNC, NULL) < 0)
+ clib_unix_warning ("NIOCTXSYNC");
+ clib_cpu_time_wait (time_constant);
+
+ if (nm_tx_pending (ring) && !n_free_slots)
+ {
+ cur_ring++;
+ continue;
+ }
+ }
+
+ while (n_left && n_free_slots)
+ {
+ vlib_buffer_t *b0 = 0;
+ u32 bi = buffers[0];
+ u32 len;
+ u32 offset = 0;
+ buffers++;
+
+ struct netmap_slot *slot = &ring->slot[cur];
+
+ do
+ {
+ b0 = vlib_get_buffer (vm, bi);
+ len = b0->current_length;
+ /* memcpy */
+ clib_memcpy_fast ((u8 *) NETMAP_BUF (ring, slot->buf_idx) +
+ offset, vlib_buffer_get_current (b0), len);
+ offset += len;
+ }
+ while ((bi = b0->next_buffer));
+
+ slot->len = offset;
+ cur = (cur + 1) % ring->num_slots;
+ n_free_slots--;
+ n_left--;
+ }
+ CLIB_MEMORY_BARRIER ();
+ ring->head = ring->cur = cur;
+ }
+
+ if (n_left < frame->n_vectors)
+ ioctl (nif->fd, NIOCTXSYNC, NULL);
+
+ clib_spinlock_unlock_if_init (&nif->lockp);
+
+ if (n_left)
+ vlib_error_count (vm, node->node_index,
+ (n_left ==
+ frame->n_vectors ? NETMAP_TX_ERROR_PENDING_MSGS :
+ NETMAP_TX_ERROR_NO_FREE_SLOTS), n_left);
+
+ vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
+ return frame->n_vectors;
+}
+
+static void
+netmap_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ netmap_main_t *apm = &netmap_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance);
+
+ /* Shut off redirection */
+ if (node_index == ~0)
+ {
+ nif->per_interface_next_index = node_index;
+ return;
+ }
+
+ nif->per_interface_next_index =
+ vlib_node_add_next (vlib_get_main (), netmap_input_node.index,
+ node_index);
+}
+
+static void
+netmap_clear_hw_interface_counters (u32 instance)
+{
+ /* Nothing for now */
+}
+
+static clib_error_t *
+netmap_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ netmap_main_t *apm = &netmap_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance);
+ u32 hw_flags;
+
+ nif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ if (nif->is_admin_up)
+ hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP;
+ else
+ hw_flags = 0;
+
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+ return 0;
+}
+
+static clib_error_t *
+netmap_subif_add_del_function (vnet_main_t * vnm,
+ u32 hw_if_index,
+ struct vnet_sw_interface_t *st, int is_add)
+{
+ /* Nothing for now */
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (netmap_device_class) = {
+ .name = "netmap",
+ .format_device_name = format_netmap_device_name,
+ .format_device = format_netmap_device,
+ .format_tx_trace = format_netmap_tx_trace,
+ .tx_function_n_errors = NETMAP_TX_N_ERROR,
+ .tx_function_error_strings = netmap_tx_func_error_strings,
+ .rx_redirect_to_node = netmap_set_interface_next_node,
+ .clear_counters = netmap_clear_hw_interface_counters,
+ .admin_up_down_function = netmap_interface_admin_up_down,
+ .subif_add_del_function = netmap_subif_add_del_function,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/netmap/net_netmap.h b/src/plugins/netmap/net_netmap.h
new file mode 100644
index 00000000000..ecccedd4484
--- /dev/null
+++ b/src/plugins/netmap/net_netmap.h
@@ -0,0 +1,650 @@
+/*
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $
+ *
+ * Definitions of constants and the structures used by the netmap
+ * framework, for the part visible to both kernel and userspace.
+ * Detailed info on netmap is available with "man netmap" or at
+ *
+ * http://info.iet.unipi.it/~luigi/netmap/
+ *
+ * This API is also used to communicate with the VALE software switch
+ */
+
+#ifndef _NET_NETMAP_H_
+#define _NET_NETMAP_H_
+
+#define NETMAP_API 14 /* current API version */
+
+#define NETMAP_MIN_API 14 /* min and max versions accepted */
+#define NETMAP_MAX_API 15
+/*
+ * Some fields should be cache-aligned to reduce contention.
+ * The alignment is architecture and OS dependent, but rather than
+ * digging into OS headers to find the exact value we use an estimate
+ * that should cover most architectures.
+ */
+#define NM_CACHE_ALIGN 128
+
+/*
+ * --- Netmap data structures ---
+ *
+ * The userspace data structures used by netmap are shown below.
+ * They are allocated by the kernel and mmap()ed by userspace threads.
+ * Pointers are implemented as memory offsets or indexes,
+ * so that they can be easily dereferenced in kernel and userspace.
+
+ KERNEL (opaque, obviously)
+
+ ====================================================================
+ |
+ USERSPACE | struct netmap_ring
+ +---->+---------------+
+ / | head,cur,tail |
+ struct netmap_if (nifp, 1 per fd) / | buf_ofs |
+ +---------------+ / | other fields |
+ | ni_tx_rings | / +===============+
+ | ni_rx_rings | / | buf_idx, len | slot[0]
+ | | / | flags, ptr |
+ | | / +---------------+
+ +===============+ / | buf_idx, len | slot[1]
+ | txring_ofs[0] | (rel.to nifp)--' | flags, ptr |
+ | txring_ofs[1] | +---------------+
+ (tx+1 entries) (num_slots entries)
+ | txring_ofs[t] | | buf_idx, len | slot[n-1]
+ +---------------+ | flags, ptr |
+ | rxring_ofs[0] | +---------------+
+ | rxring_ofs[1] |
+ (rx+1 entries)
+ | rxring_ofs[r] |
+ +---------------+
+
+ * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to
+ * a file descriptor, the mmap()ed region contains a (logically readonly)
+ * struct netmap_if pointing to struct netmap_ring's.
+ *
+ * There is one netmap_ring per physical NIC ring, plus one tx/rx ring
+ * pair attached to the host stack (this pair is unused for non-NIC ports).
+ *
+ * All physical/host stack ports share the same memory region,
+ * so that zero-copy can be implemented between them.
+ * VALE switch ports instead have separate memory regions.
+ *
+ * The netmap_ring is the userspace-visible replica of the NIC ring.
+ * Each slot has the index of a buffer (MTU-sized and residing in the
+ * mmapped region), its length and some flags. An extra 64-bit pointer
+ * is provided for user-supplied buffers in the tx path.
+ *
+ * In user space, the buffer address is computed as
+ * (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE
+ *
+ * Added in NETMAP_API 11:
+ *
+ * + NIOCREGIF can request the allocation of extra spare buffers from
+ * the same memory pool. The desired number of buffers must be in
+ * nr_arg3. The ioctl may return fewer buffers, depending on memory
+ * availability. nr_arg3 will return the actual value, and, once
+ * mapped, nifp->ni_bufs_head will be the index of the first buffer.
+ *
+ * The buffers are linked to each other using the first uint32_t
+ * as the index. On close, ni_bufs_head must point to the list of
+ * buffers to be released.
+ *
+ * + NIOCREGIF can request space for extra rings (and buffers)
+ * allocated in the same memory space. The number of extra rings
+ * is in nr_arg1, and is advisory. This is a no-op on NICs where
+ * the size of the memory space is fixed.
+ *
+ * + NIOCREGIF can attach to PIPE rings sharing the same memory
+ * space with a parent device. The ifname indicates the parent device,
+ * which must already exist. Flags in nr_flags indicate if we want to
+ * bind the master or slave side, the index (from nr_ringid)
+ * is just a cookie and does not need to be sequential.
+ *
+ * + NIOCREGIF can also attach to 'monitor' rings that replicate
+ * the content of specific rings, also from the same memory space.
+ *
+ * Extra flags in nr_flags support the above functions.
+ * Application libraries may use the following naming scheme:
+ * netmap:foo all NIC ring pairs
+ * netmap:foo^ only host ring pair
+ * netmap:foo+ all NIC ring + host ring pairs
+ * netmap:foo-k the k-th NIC ring pair
+ * netmap:foo{k PIPE ring pair k, master side
+ * netmap:foo}k PIPE ring pair k, slave side
+ */
+
+/*
+ * struct netmap_slot is a buffer descriptor
+ */
+struct netmap_slot {
+ uint32_t buf_idx; /* buffer index */
+ uint16_t len; /* length for this slot */
+ uint16_t flags; /* buf changed, etc. */
+ uint64_t ptr; /* pointer for indirect buffers */
+};
+
+/*
+ * The following flags control how the slot is used
+ */
+
+#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */
+ /*
+ * must be set whenever buf_idx is changed (as it might be
+ * necessary to recompute the physical address and mapping)
+ *
+ * It is also set by the kernel whenever the buf_idx is
+ * changed internally (e.g., by pipes). Applications may
+ * use this information to know when they can reuse the
+ * contents of previously prepared buffers.
+ */
+
+#define NS_REPORT 0x0002 /* ask the hardware to report results */
+ /*
+ * Request notification when slot is used by the hardware.
+ * Normally transmit completions are handled lazily and
+ * may be unreported. This flag lets us know when a slot
+ * has been sent (e.g. to terminate the sender).
+ */
+
+#define NS_FORWARD 0x0004 /* pass packet 'forward' */
+ /*
+ * (Only for physical ports, rx rings with NR_FORWARD set).
+ * Slot released to the kernel (i.e. before ring->head) with
+ * this flag set are passed to the peer ring (host/NIC),
+ * thus restoring the host-NIC connection for these slots.
+ * This supports efficient traffic monitoring or firewalling.
+ */
+
+#define NS_NO_LEARN 0x0008 /* disable bridge learning */
+ /*
+ * On a VALE switch, do not 'learn' the source port for
+ * this buffer.
+ */
+
+#define NS_INDIRECT 0x0010 /* userspace buffer */
+ /*
+ * (VALE tx rings only) data is in a userspace buffer,
+ * whose address is in the 'ptr' field in the slot.
+ */
+
+#define NS_MOREFRAG 0x0020 /* packet has more fragments */
+ /*
+ * (VALE ports only)
+ * Set on all but the last slot of a multi-segment packet.
+ * The 'len' field refers to the individual fragment.
+ */
+
+#define NS_PORT_SHIFT 8
+#define NS_PORT_MASK (0xff << NS_PORT_SHIFT)
+ /*
+ * The high 8 bits of the flag, if not zero, indicate the
+ * destination port for the VALE switch, overriding
+ * the lookup table.
+ */
+
+#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff)
+ /*
+ * (VALE rx rings only) the high 8 bits
+ * are the number of fragments.
+ */
+
+
+/*
+ * struct netmap_ring
+ *
+ * Netmap representation of a TX or RX ring (also known as "queue").
+ * This is a queue implemented as a fixed-size circular array.
+ * At the software level the important fields are: head, cur, tail.
+ *
+ * In TX rings:
+ *
+ * head first slot available for transmission.
+ * cur wakeup point. select() and poll() will unblock
+ * when 'tail' moves past 'cur'
+ * tail (readonly) first slot reserved to the kernel
+ *
+ * [head .. tail-1] can be used for new packets to send;
+ * 'head' and 'cur' must be incremented as slots are filled
+ * with new packets to be sent;
+ * 'cur' can be moved further ahead if we need more space
+ * for new transmissions. XXX todo (2014-03-12)
+ *
+ * In RX rings:
+ *
+ * head first valid received packet
+ * cur wakeup point. select() and poll() will unblock
+ * when 'tail' moves past 'cur'
+ * tail (readonly) first slot reserved to the kernel
+ *
+ * [head .. tail-1] contain received packets;
+ * 'head' and 'cur' must be incremented as slots are consumed
+ * and can be returned to the kernel;
+ * 'cur' can be moved further ahead if we want to wait for
+ * new packets without returning the previous ones.
+ *
+ * DATA OWNERSHIP/LOCKING:
+ * The netmap_ring, and all slots and buffers in the range
+ * [head .. tail-1] are owned by the user program;
+ * the kernel only accesses them during a netmap system call
+ * and in the user thread context.
+ *
+ * Other slots and buffers are reserved for use by the kernel
+ */
+struct netmap_ring {
+ /*
+ * buf_ofs is meant to be used through macros.
+ * It contains the offset of the buffer region from this
+ * descriptor.
+ */
+ const int64_t buf_ofs;
+ const uint32_t num_slots; /* number of slots in the ring. */
+ const uint32_t nr_buf_size;
+ const uint16_t ringid;
+ const uint16_t dir; /* 0: tx, 1: rx */
+
+ uint32_t head; /* (u) first user slot */
+ uint32_t cur; /* (u) wakeup point */
+ uint32_t tail; /* (k) first kernel slot */
+
+ uint32_t flags;
+
+ struct timeval ts; /* (k) time of last *sync() */
+
+ /* opaque room for a mutex or similar object */
+#if !defined(_WIN32) || defined(__CYGWIN__)
+ uint8_t __attribute__((__aligned__(NM_CACHE_ALIGN))) sem[128];
+#else
+ uint8_t __declspec(align(NM_CACHE_ALIGN)) sem[128];
+#endif
+
+ /* the slots follow. This struct has variable size */
+ struct netmap_slot slot[0]; /* array of slots. */
+};
+
+
+/*
+ * RING FLAGS
+ */
+#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */
+ /*
+ * updates the 'ts' field on each netmap syscall. This saves
+ * saves a separate gettimeofday(), and is not much worse than
+ * software timestamps generated in the interrupt handler.
+ */
+
+#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */
+ /*
+ * Enables the NS_FORWARD slot flag for the ring.
+ */
+
+
+/*
+ * Netmap representation of an interface and its queue(s).
+ * This is initialized by the kernel when binding a file
+ * descriptor to a port, and should be considered as readonly
+ * by user programs. The kernel never uses it.
+ *
+ * There is one netmap_if for each file descriptor on which we want
+ * to select/poll.
+ * select/poll operates on one or all pairs depending on the value of
+ * nmr_queueid passed on the ioctl.
+ */
+struct netmap_if {
+ char ni_name[IFNAMSIZ]; /* name of the interface. */
+ const uint32_t ni_version; /* API version, currently unused */
+ const uint32_t ni_flags; /* properties */
+#define NI_PRIV_MEM 0x1 /* private memory region */
+
+ /*
+ * The number of packet rings available in netmap mode.
+ * Physical NICs can have different numbers of tx and rx rings.
+ * Physical NICs also have a 'host' ring pair.
+ * Additionally, clients can request additional ring pairs to
+ * be used for internal communication.
+ */
+ const uint32_t ni_tx_rings; /* number of HW tx rings */
+ const uint32_t ni_rx_rings; /* number of HW rx rings */
+
+ uint32_t ni_bufs_head; /* head index for extra bufs */
+ uint32_t ni_spare1[5];
+ /*
+ * The following array contains the offset of each netmap ring
+ * from this structure, in the following order:
+ * NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings;
+ * NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings.
+ *
+ * The area is filled up by the kernel on NIOCREGIF,
+ * and then only read by userspace code.
+ */
+ const ssize_t ring_ofs[0];
+};
+
+
+#ifndef NIOCREGIF
+/*
+ * ioctl names and related fields
+ *
+ * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues,
+ * whose identity is set in NIOCREGIF through nr_ringid.
+ * These are non blocking and take no argument.
+ *
+ * NIOCGINFO takes a struct ifreq, the interface name is the input,
+ * the outputs are number of queues and number of descriptor
+ * for each queue (useful to set number of threads etc.).
+ * The info returned is only advisory and may change before
+ * the interface is bound to a file descriptor.
+ *
+ * NIOCREGIF takes an interface name within a struct nmre,
+ * and activates netmap mode on the interface (if possible).
+ *
+ * The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we
+ * can pass it down to other NIC-related ioctls.
+ *
+ * The actual argument (struct nmreq) has a number of options to request
+ * different functions.
+ * The following are used in NIOCREGIF when nr_cmd == 0:
+ *
+ * nr_name (in)
+ * The name of the port (em0, valeXXX:YYY, etc.)
+ * limited to IFNAMSIZ for backward compatibility.
+ *
+ * nr_version (in/out)
+ * Must match NETMAP_API as used in the kernel, error otherwise.
+ * Always returns the desired value on output.
+ *
+ * nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out)
+ * On input, non-zero values may be used to reconfigure the port
+ * according to the requested values, but this is not guaranteed.
+ * On output the actual values in use are reported.
+ *
+ * nr_ringid (in)
+ * Indicates how rings should be bound to the file descriptors.
+ * If nr_flags != 0, then the low bits (in NETMAP_RING_MASK)
+ * are used to indicate the ring number, and nr_flags specifies
+ * the actual rings to bind. NETMAP_NO_TX_POLL is unaffected.
+ *
+ * NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED:
+ * If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control
+ * the binding as follows:
+ * 0 (default) binds all physical rings
+ * NETMAP_HW_RING | ring number binds a single ring pair
+ * NETMAP_SW_RING binds only the host tx/rx rings
+ *
+ * NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push
+ * packets on tx rings only if POLLOUT is set.
+ * The default is to push any pending packet.
+ *
+ * NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release
+ * packets on rx rings also when POLLIN is NOT set.
+ * The default is to touch the rx ring only with POLLIN.
+ * Note that this is the opposite of TX because it
+ * reflects the common usage.
+ *
+ * NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead.
+ * NETMAP_PRIV_MEM is set on return for ports that do not use
+ * the global memory allocator.
+ * This information is not significant and applications
+ * should look at the region id in nr_arg2
+ *
+ * nr_flags is the recommended mode to indicate which rings should
+ * be bound to a file descriptor. Values are NR_REG_*
+ *
+ * nr_arg1 (in) The number of extra rings to be reserved.
+ * Especially when allocating a VALE port the system only
+ * allocates the amount of memory needed for the port.
+ * If more shared memory rings are desired (e.g. for pipes),
+ * the first invocation for the same basename/allocator
+ * should specify a suitable number. Memory cannot be
+ * extended after the first allocation without closing
+ * all ports on the same region.
+ *
+ * nr_arg2 (in/out) The identity of the memory region used.
+ * On input, 0 means the system decides autonomously,
+ * other values may try to select a specific region.
+ * On return the actual value is reported.
+ * Region '1' is the global allocator, normally shared
+ * by all interfaces. Other values are private regions.
+ * If two ports the same region zero-copy is possible.
+ *
+ * nr_arg3 (in/out) number of extra buffers to be allocated.
+ *
+ *
+ *
+ * nr_cmd (in) if non-zero indicates a special command:
+ * NETMAP_BDG_ATTACH and nr_name = vale*:ifname
+ * attaches the NIC to the switch; nr_ringid specifies
+ * which rings to use. Used by vale-ctl -a ...
+ * nr_arg1 = NETMAP_BDG_HOST also attaches the host port
+ * as in vale-ctl -h ...
+ *
+ * NETMAP_BDG_DETACH and nr_name = vale*:ifname
+ * disconnects a previously attached NIC.
+ * Used by vale-ctl -d ...
+ *
+ * NETMAP_BDG_LIST
+ * list the configuration of VALE switches.
+ *
+ * NETMAP_BDG_VNET_HDR
+ * Set the virtio-net header length used by the client
+ * of a VALE switch port.
+ *
+ * NETMAP_BDG_NEWIF
+ * create a persistent VALE port with name nr_name.
+ * Used by vale-ctl -n ...
+ *
+ * NETMAP_BDG_DELIF
+ * delete a persistent VALE port. Used by vale-ctl -d ...
+ *
+ * nr_arg1, nr_arg2, nr_arg3 (in/out) command specific
+ *
+ *
+ *
+ */
+
+
+/*
+ * struct nmreq overlays a struct ifreq (just the name)
+ */
+struct nmreq {
+ char nr_name[IFNAMSIZ];
+ uint32_t nr_version; /* API version */
+ uint32_t nr_offset; /* nifp offset in the shared region */
+ uint32_t nr_memsize; /* size of the shared region */
+ uint32_t nr_tx_slots; /* slots in tx rings */
+ uint32_t nr_rx_slots; /* slots in rx rings */
+ uint16_t nr_tx_rings; /* number of tx rings */
+ uint16_t nr_rx_rings; /* number of rx rings */
+
+ uint16_t nr_ringid; /* ring(s) we care about */
+#define NETMAP_HW_RING 0x4000 /* single NIC ring pair */
+#define NETMAP_SW_RING 0x2000 /* only host ring pair */
+
+#define NETMAP_RING_MASK 0x0fff /* the ring number */
+
+#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */
+
+#define NETMAP_DO_RX_POLL 0x8000 /* DO automatic rxsync on poll */
+
+ uint16_t nr_cmd;
+#define NETMAP_BDG_ATTACH 1 /* attach the NIC */
+#define NETMAP_BDG_DETACH 2 /* detach the NIC */
+#define NETMAP_BDG_REGOPS 3 /* register bridge callbacks */
+#define NETMAP_BDG_LIST 4 /* get bridge's info */
+#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */
+#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */
+#define NETMAP_BDG_NEWIF 6 /* create a virtual port */
+#define NETMAP_BDG_DELIF 7 /* destroy a virtual port */
+#define NETMAP_PT_HOST_CREATE 8 /* create ptnetmap kthreads */
+#define NETMAP_PT_HOST_DELETE 9 /* delete ptnetmap kthreads */
+#define NETMAP_BDG_POLLING_ON 10 /* delete polling kthread */
+#define NETMAP_BDG_POLLING_OFF 11 /* delete polling kthread */
+#define NETMAP_VNET_HDR_GET 12 /* get the port virtio-net-hdr length */
+ uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */
+#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */
+
+ uint16_t nr_arg2;
+ uint32_t nr_arg3; /* req. extra buffers in NIOCREGIF */
+ uint32_t nr_flags;
+ /* various modes, extends nr_ringid */
+ uint32_t spare2[1];
+};
+
+#define NR_REG_MASK 0xf /* values for nr_flags */
+enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */
+ NR_REG_ALL_NIC = 1,
+ NR_REG_SW = 2,
+ NR_REG_NIC_SW = 3,
+ NR_REG_ONE_NIC = 4,
+ NR_REG_PIPE_MASTER = 5,
+ NR_REG_PIPE_SLAVE = 6,
+};
+/* monitor uses the NR_REG to select the rings to monitor */
+#define NR_MONITOR_TX 0x100
+#define NR_MONITOR_RX 0x200
+#define NR_ZCOPY_MON 0x400
+/* request exclusive access to the selected rings */
+#define NR_EXCLUSIVE 0x800
+/* request ptnetmap host support */
+#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */
+#define NR_PTNETMAP_HOST 0x1000
+#define NR_RX_RINGS_ONLY 0x2000
+#define NR_TX_RINGS_ONLY 0x4000
+/* Applications set this flag if they are able to deal with virtio-net headers,
+ * that is send/receive frames that start with a virtio-net header.
+ * If not set, NIOCREGIF will fail with netmap ports that require applications
+ * to use those headers. If the flag is set, the application can use the
+ * NETMAP_VNET_HDR_GET command to figure out the header length. */
+#define NR_ACCEPT_VNET_HDR 0x8000
+
+
+/*
+ * Windows does not have _IOWR(). _IO(), _IOW() and _IOR() are defined
+ * in ws2def.h but not sure if they are in the form we need.
+ * XXX so we redefine them
+ * in a convenient way to use for DeviceIoControl signatures
+ */
+#ifdef _WIN32
+#undef _IO // ws2def.h
+#define _WIN_NM_IOCTL_TYPE 40000
+#define _IO(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \
+ METHOD_BUFFERED, FILE_ANY_ACCESS )
+#define _IO_direct(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \
+ METHOD_OUT_DIRECT, FILE_ANY_ACCESS )
+
+#define _IOWR(_c, _n, _s) _IO(_c, _n)
+
+/* We havesome internal sysctl in addition to the externally visible ones */
+#define NETMAP_MMAP _IO_direct('i', 160) // note METHOD_OUT_DIRECT
+#define NETMAP_POLL _IO('i', 162)
+
+/* and also two setsockopt for sysctl emulation */
+#define NETMAP_SETSOCKOPT _IO('i', 140)
+#define NETMAP_GETSOCKOPT _IO('i', 141)
+
+
+//These linknames are for the Netmap Core Driver
+#define NETMAP_NT_DEVICE_NAME L"\\Device\\NETMAP"
+#define NETMAP_DOS_DEVICE_NAME L"\\DosDevices\\netmap"
+
+//Definition of a structure used to pass a virtual address within an IOCTL
+typedef struct _MEMORY_ENTRY {
+ PVOID pUsermodeVirtualAddress;
+} MEMORY_ENTRY, *PMEMORY_ENTRY;
+
+typedef struct _POLL_REQUEST_DATA {
+ int events;
+ int timeout;
+ int revents;
+} POLL_REQUEST_DATA;
+
+#endif /* _WIN32 */
+
+/*
+ * FreeBSD uses the size value embedded in the _IOWR to determine
+ * how much to copy in/out. So we need it to match the actual
+ * data structure we pass. We put some spares in the structure
+ * to ease compatibility with other versions
+ */
+#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */
+#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */
+#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */
+#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */
+#define NIOCCONFIG _IOWR('i',150, struct nm_ifreq) /* for ext. modules */
+#endif /* !NIOCREGIF */
+
+
+/*
+ * Helper functions for kernel and userspace
+ */
+
+/*
+ * check if space is available in the ring.
+ */
+static inline int
+nm_ring_empty(struct netmap_ring *ring)
+{
+ return (ring->cur == ring->tail);
+}
+
+/*
+ * Opaque structure that is passed to an external kernel
+ * module via ioctl(fd, NIOCCONFIG, req) for a user-owned
+ * bridge port (at this point ephemeral VALE interface).
+ */
+#define NM_IFRDATA_LEN 256
+struct nm_ifreq {
+ char nifr_name[IFNAMSIZ];
+ char data[NM_IFRDATA_LEN];
+};
+
+/*
+ * netmap kernel thread configuration
+ */
+/* bhyve/vmm.ko MSIX parameters for IOCTL */
+struct ptn_vmm_ioctl_msix {
+ uint64_t msg;
+ uint64_t addr;
+};
+
+/* IOCTL parameters */
+struct nm_kth_ioctl {
+ u_long com;
+ /* TODO: use union */
+ union {
+ struct ptn_vmm_ioctl_msix msix;
+ } data;
+};
+
+/* Configuration of a ptnetmap ring */
+struct ptnet_ring_cfg {
+ uint64_t ioeventfd; /* eventfd in linux, tsleep() parameter in FreeBSD */
+ uint64_t irqfd; /* eventfd in linux, ioctl fd in FreeBSD */
+ struct nm_kth_ioctl ioctl; /* ioctl parameter to send irq (only used in bhyve/FreeBSD) */
+};
+#endif /* _NET_NETMAP_H_ */
diff --git a/src/plugins/netmap/netmap.api b/src/plugins/netmap/netmap.api
new file mode 100644
index 00000000000..a14753cad9c
--- /dev/null
+++ b/src/plugins/netmap/netmap.api
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "1.0.0";
+
+/** \brief Create netmap
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param netmap_if_name - interface name
+ @param hw_addr - interface MAC
+ @param use_random_hw_addr - use random generated MAC
+ @param is_pipe - is pipe
+ @param is_master - 0=slave, 1=master
+*/
+autoreply define netmap_create
+{
+ u32 client_index;
+ u32 context;
+
+ u8 netmap_if_name[64];
+ u8 hw_addr[6];
+ u8 use_random_hw_addr;
+ u8 is_pipe;
+ u8 is_master;
+};
+
+/** \brief Delete netmap
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param netmap_if_name - interface name
+*/
+autoreply define netmap_delete
+{
+ u32 client_index;
+ u32 context;
+
+ u8 netmap_if_name[64];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/netmap/netmap.c b/src/plugins/netmap/netmap.c
new file mode 100644
index 00000000000..ebef215eb3b
--- /dev/null
+++ b/src/plugins/netmap/netmap.c
@@ -0,0 +1,334 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <netmap/net_netmap.h>
+#include <netmap/netmap.h>
+#include <netmap/netmap.api_enum.h>
+#include <netmap/netmap.api_types.h>
+
+netmap_main_t netmap_main;
+
+static clib_error_t *
+netmap_fd_read_ready (clib_file_t * uf)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ netmap_main_t *nm = &netmap_main;
+ u32 idx = uf->private_data;
+
+ nm->pending_input_bitmap =
+ clib_bitmap_set (nm->pending_input_bitmap, idx, 1);
+
+ /* Schedule the rx node */
+ vlib_node_set_interrupt_pending (vm, netmap_input_node.index);
+
+ return 0;
+}
+
+static void
+close_netmap_if (netmap_main_t * nm, netmap_if_t * nif)
+{
+ if (nif->clib_file_index != ~0)
+ {
+ clib_file_del (&file_main, file_main.file_pool + nif->clib_file_index);
+ nif->clib_file_index = ~0;
+ }
+ else if (nif->fd > -1)
+ close (nif->fd);
+
+ if (nif->mem_region)
+ {
+ netmap_mem_region_t *reg = &nm->mem_regions[nif->mem_region];
+ if (--reg->refcnt == 0)
+ {
+ munmap (reg->mem, reg->region_size);
+ reg->region_size = 0;
+ }
+ }
+
+
+ mhash_unset (&nm->if_index_by_host_if_name, nif->host_if_name,
+ &nif->if_index);
+ vec_free (nif->host_if_name);
+ vec_free (nif->req);
+
+ clib_memset (nif, 0, sizeof (*nif));
+ pool_put (nm->interfaces, nif);
+}
+
+int
+netmap_worker_thread_enable ()
+{
+ /* if worker threads are enabled, switch to polling mode */
+ foreach_vlib_main ()
+ {
+ vlib_node_set_state (this_vlib_main, netmap_input_node.index,
+ VLIB_NODE_STATE_POLLING);
+ }
+
+ return 0;
+}
+
+int
+netmap_worker_thread_disable ()
+{
+ foreach_vlib_main ()
+ {
+ vlib_node_set_state (this_vlib_main, netmap_input_node.index,
+ VLIB_NODE_STATE_INTERRUPT);
+ }
+
+ return 0;
+}
+
+int
+netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set,
+ u8 is_pipe, u8 is_master, u32 * sw_if_index)
+{
+ netmap_main_t *nm = &netmap_main;
+ int ret = 0;
+ uint32_t nr_reg;
+ netmap_if_t *nif = 0;
+ u8 hw_addr[6];
+ vnet_sw_interface_t *sw;
+ vnet_main_t *vnm = vnet_get_main ();
+ uword *p;
+ struct nmreq *req = 0;
+ netmap_mem_region_t *reg;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ int fd;
+
+ p = mhash_get (&nm->if_index_by_host_if_name, if_name);
+ if (p)
+ return VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+
+ fd = open ("/dev/netmap", O_RDWR);
+ if (fd < 0)
+ return VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+
+ pool_get (nm->interfaces, nif);
+ nif->if_index = nif - nm->interfaces;
+ nif->fd = fd;
+ nif->clib_file_index = ~0;
+
+ vec_validate (req, 0);
+ nif->req = req;
+ req->nr_version = NETMAP_API;
+ req->nr_flags = NR_REG_ALL_NIC;
+
+ if (is_pipe)
+ req->nr_flags = is_master ? NR_REG_PIPE_MASTER : NR_REG_PIPE_SLAVE;
+ else
+ req->nr_flags = NR_REG_ALL_NIC;
+
+ req->nr_flags |= NR_ACCEPT_VNET_HDR;
+ snprintf (req->nr_name, IFNAMSIZ, "%s", if_name);
+ req->nr_name[IFNAMSIZ - 1] = 0;
+
+ if (ioctl (nif->fd, NIOCREGIF, req))
+ {
+ ret = VNET_API_ERROR_NOT_CONNECTED;
+ goto error;
+ }
+
+ nif->mem_region = req->nr_arg2;
+ vec_validate (nm->mem_regions, nif->mem_region);
+ reg = &nm->mem_regions[nif->mem_region];
+ if (reg->region_size == 0)
+ {
+ reg->mem = mmap (NULL, req->nr_memsize, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ clib_warning ("mem %p", reg->mem);
+ if (reg->mem == MAP_FAILED)
+ {
+ ret = VNET_API_ERROR_NOT_CONNECTED;
+ goto error;
+ }
+ reg->region_size = req->nr_memsize;
+ }
+ reg->refcnt++;
+
+ nif->nifp = NETMAP_IF (reg->mem, req->nr_offset);
+ nr_reg = nif->req->nr_flags & NR_REG_MASK;
+
+ if (nr_reg == NR_REG_SW)
+ { /* host stack */
+ nif->first_tx_ring = nif->last_tx_ring = nif->req->nr_tx_rings;
+ nif->first_rx_ring = nif->last_rx_ring = nif->req->nr_rx_rings;
+ }
+ else if (nr_reg == NR_REG_ALL_NIC)
+ { /* only nic */
+ nif->first_tx_ring = 0;
+ nif->first_rx_ring = 0;
+ nif->last_tx_ring = nif->req->nr_tx_rings - 1;
+ nif->last_rx_ring = nif->req->nr_rx_rings - 1;
+ }
+ else if (nr_reg == NR_REG_NIC_SW)
+ {
+ nif->first_tx_ring = 0;
+ nif->first_rx_ring = 0;
+ nif->last_tx_ring = nif->req->nr_tx_rings;
+ nif->last_rx_ring = nif->req->nr_rx_rings;
+ }
+ else if (nr_reg == NR_REG_ONE_NIC)
+ {
+ /* XXX check validity */
+ nif->first_tx_ring = nif->last_tx_ring = nif->first_rx_ring =
+ nif->last_rx_ring = nif->req->nr_ringid & NETMAP_RING_MASK;
+ }
+ else
+ { /* pipes */
+ nif->first_tx_ring = nif->last_tx_ring = 0;
+ nif->first_rx_ring = nif->last_rx_ring = 0;
+ }
+
+ nif->host_if_name = if_name;
+ nif->per_interface_next_index = ~0;
+
+ if (tm->n_vlib_mains > 1)
+ clib_spinlock_init (&nif->lockp);
+
+ {
+ clib_file_t template = { 0 };
+ template.read_function = netmap_fd_read_ready;
+ template.file_descriptor = nif->fd;
+ template.private_data = nif->if_index;
+ template.description = format (0, "netmap socket");
+ nif->clib_file_index = clib_file_add (&file_main, &template);
+ }
+
+ /*use configured or generate random MAC address */
+ if (hw_addr_set)
+ memcpy (hw_addr, hw_addr_set, 6);
+ else
+ {
+ f64 now = vlib_time_now (vm);
+ u32 rnd;
+ rnd = (u32) (now * 1e6);
+ rnd = random_u32 (&rnd);
+
+ memcpy (hw_addr + 2, &rnd, sizeof (rnd));
+ hw_addr[0] = 2;
+ hw_addr[1] = 0xfe;
+ }
+
+ vnet_eth_interface_registration_t eir = {};
+
+ eir.dev_class_index = netmap_device_class.index;
+ eir.dev_instance = nif->if_index;
+ eir.address = hw_addr;
+ eir.cb.set_max_frame_size = NULL;
+
+ nif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
+
+ sw = vnet_get_hw_sw_interface (vnm, nif->hw_if_index);
+ nif->sw_if_index = sw->sw_if_index;
+
+ mhash_set_mem (&nm->if_index_by_host_if_name, if_name, &nif->if_index, 0);
+
+ if (sw_if_index)
+ *sw_if_index = nif->sw_if_index;
+
+ if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 1)
+ netmap_worker_thread_enable ();
+
+ return 0;
+
+error:
+ close_netmap_if (nm, nif);
+ return ret;
+}
+
+int
+netmap_delete_if (vlib_main_t * vm, u8 * host_if_name)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ netmap_main_t *nm = &netmap_main;
+ netmap_if_t *nif;
+ uword *p;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ p = mhash_get (&nm->if_index_by_host_if_name, host_if_name);
+ if (p == NULL)
+ {
+ clib_warning ("Host interface %s does not exist", host_if_name);
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+ nif = pool_elt_at_index (nm->interfaces, p[0]);
+
+ /* bring down the interface */
+ vnet_hw_interface_set_flags (vnm, nif->hw_if_index, 0);
+
+ ethernet_delete_interface (vnm, nif->hw_if_index);
+
+ close_netmap_if (nm, nif);
+
+ if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 0)
+ netmap_worker_thread_disable ();
+
+ return 0;
+}
+
+static clib_error_t *
+netmap_init (vlib_main_t * vm)
+{
+ netmap_main_t *nm = &netmap_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_thread_registration_t *tr;
+ uword *p;
+
+ clib_memset (nm, 0, sizeof (netmap_main_t));
+
+ nm->input_cpu_first_index = 0;
+ nm->input_cpu_count = 1;
+
+ /* find out which cpus will be used for input */
+ p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+ tr = p ? (vlib_thread_registration_t *) p[0] : 0;
+
+ if (tr && tr->count > 0)
+ {
+ nm->input_cpu_first_index = tr->first_index;
+ nm->input_cpu_count = tr->count;
+ }
+
+ mhash_init_vec_string (&nm->if_index_by_host_if_name, sizeof (uword));
+
+ vec_validate_aligned (nm->rx_buffers, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (netmap_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/netmap/netmap.h b/src/plugins/netmap/netmap.h
new file mode 100644
index 00000000000..29f855fda8e
--- /dev/null
+++ b/src/plugins/netmap/netmap.h
@@ -0,0 +1,166 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+/*
+ * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <vppinfra/lock.h>
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ clib_spinlock_t lockp;
+ u8 *host_if_name;
+ uword if_index;
+ u32 hw_if_index;
+ u32 sw_if_index;
+ u32 clib_file_index;
+
+ u32 per_interface_next_index;
+ u8 is_admin_up;
+
+ /* netmap */
+ struct nmreq *req;
+ u16 mem_region;
+ int fd;
+ struct netmap_if *nifp;
+ u16 first_tx_ring;
+ u16 last_tx_ring;
+ u16 first_rx_ring;
+ u16 last_rx_ring;
+
+} netmap_if_t;
+
+typedef struct
+{
+ char *mem;
+ u32 region_size;
+ int refcnt;
+} netmap_mem_region_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ netmap_if_t *interfaces;
+
+ /* bitmap of pending rx interfaces */
+ uword *pending_input_bitmap;
+
+ /* rx buffer cache */
+ u32 **rx_buffers;
+
+ /* hash of host interface names */
+ mhash_t if_index_by_host_if_name;
+
+ /* vector of memory regions */
+ netmap_mem_region_t *mem_regions;
+
+ /* first cpu index */
+ u32 input_cpu_first_index;
+
+ /* total cpu count */
+ u32 input_cpu_count;
+} netmap_main_t;
+
+extern netmap_main_t netmap_main;
+extern vnet_device_class_t netmap_device_class;
+extern vlib_node_registration_t netmap_input_node;
+
+int netmap_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set,
+ u8 is_pipe, u8 is_master, u32 * sw_if_index);
+int netmap_delete_if (vlib_main_t * vm, u8 * host_if_name);
+
+
+/* Macros and helper functions from sys/net/netmap_user.h */
+
+#ifdef _NET_NETMAP_H_
+
+#define _NETMAP_OFFSET(type, ptr, offset) \
+ ((type)(void *)((char *)(ptr) + (offset)))
+
+#define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs)
+
+#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
+ nifp, (nifp)->ring_ofs[index] )
+
+#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
+ nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] )
+
+#define NETMAP_BUF(ring, index) \
+ ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size))
+
+#define NETMAP_BUF_IDX(ring, buf) \
+ ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
+ (ring)->nr_buf_size )
+
+static inline uint32_t
+nm_ring_next (struct netmap_ring *ring, uint32_t i)
+{
+ return (PREDICT_FALSE (i + 1 == ring->num_slots) ? 0 : i + 1);
+}
+
+
+/*
+ * Return 1 if we have pending transmissions in the tx ring.
+ * When everything is complete ring->head = ring->tail + 1 (modulo ring size)
+ */
+static inline int
+nm_tx_pending (struct netmap_ring *ring)
+{
+ return nm_ring_next (ring, ring->tail) != ring->head;
+}
+
+static inline uint32_t
+nm_ring_space (struct netmap_ring *ring)
+{
+ int ret = ring->tail - ring->cur;
+ if (ret < 0)
+ ret += ring->num_slots;
+ return ret;
+}
+#endif
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/netmap/netmap_api.c b/src/plugins/netmap/netmap_api.c
new file mode 100644
index 00000000000..51f572a23e6
--- /dev/null
+++ b/src/plugins/netmap/netmap_api.c
@@ -0,0 +1,95 @@
+/*
+ *------------------------------------------------------------------
+ * netmap_api.c - netmap api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <netmap/netmap.h>
+
+#include <vnet/format_fns.h>
+#include <netmap/netmap.api_enum.h>
+#include <netmap/netmap.api_types.h>
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(NETMAP_CREATE, netmap_create) \
+_(NETMAP_DELETE, netmap_delete) \
+
+static void
+vl_api_netmap_create_t_handler (vl_api_netmap_create_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_netmap_create_reply_t *rmp;
+ int rv = 0;
+ u8 *if_name = NULL;
+
+ if_name = format (0, "%s", mp->netmap_if_name);
+ vec_add1 (if_name, 0);
+
+ rv =
+ netmap_create_if (vm, if_name, mp->use_random_hw_addr ? 0 : mp->hw_addr,
+ mp->is_pipe, mp->is_master, 0);
+
+ vec_free (if_name);
+
+ REPLY_MACRO (VL_API_NETMAP_CREATE_REPLY);
+}
+
+static void
+vl_api_netmap_delete_t_handler (vl_api_netmap_delete_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_netmap_delete_reply_t *rmp;
+ int rv = 0;
+ u8 *if_name = NULL;
+
+ if_name = format (0, "%s", mp->netmap_if_name);
+ vec_add1 (if_name, 0);
+
+ rv = netmap_delete_if (vm, if_name);
+
+ vec_free (if_name);
+
+ REPLY_MACRO (VL_API_NETMAP_DELETE_REPLY);
+}
+
+#include <netmap/netmap.api.c>
+static clib_error_t *
+netmap_api_hookup (vlib_main_t * vm)
+{
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table ();
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (netmap_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/netmap/node.c b/src/plugins/netmap/node.c
new file mode 100644
index 00000000000..6169847fa79
--- /dev/null
+++ b/src/plugins/netmap/node.c
@@ -0,0 +1,295 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/devices.h>
+#include <vnet/feature/feature.h>
+
+#include <netmap/net_netmap.h>
+#include <netmap/netmap.h>
+
+#define foreach_netmap_input_error
+
+typedef enum
+{
+#define _(f,s) NETMAP_INPUT_ERROR_##f,
+ foreach_netmap_input_error
+#undef _
+ NETMAP_INPUT_N_ERROR,
+} netmap_input_error_t;
+
+static char *netmap_input_error_strings[] = {
+#define _(n,s) s,
+ foreach_netmap_input_error
+#undef _
+};
+
+typedef struct
+{
+ u32 next_index;
+ u32 hw_if_index;
+ struct netmap_slot slot;
+} netmap_input_trace_t;
+
+static u8 *
+format_netmap_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ netmap_input_trace_t *t = va_arg (*args, netmap_input_trace_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "netmap: hw_if_index %d next-index %d",
+ t->hw_if_index, t->next_index);
+ s = format (s, "\n%Uslot: flags 0x%x len %u buf_idx %u",
+ format_white_space, indent + 2,
+ t->slot.flags, t->slot.len, t->slot.buf_idx);
+ return s;
+}
+
+always_inline void
+buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi);
+ vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi);
+
+ /* update first buffer */
+ first_b->total_length_not_including_first_buffer += b->current_length;
+
+ /* update previous buffer */
+ prev_b->next_buffer = bi;
+ prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+
+ /* update current buffer */
+ b->next_buffer = 0;
+}
+
+always_inline uword
+netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, netmap_if_t * nif)
+{
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ uword n_trace = vlib_get_trace_count (vm, node);
+ netmap_main_t *nm = &netmap_main;
+ u32 n_rx_packets = 0;
+ u32 n_rx_bytes = 0;
+ u32 *to_next = 0;
+ u32 n_free_bufs;
+ struct netmap_ring *ring;
+ int cur_ring;
+ u32 thread_index = vm->thread_index;
+ u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
+
+ if (nif->per_interface_next_index != ~0)
+ next_index = nif->per_interface_next_index;
+
+ n_free_bufs = vec_len (nm->rx_buffers[thread_index]);
+ if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
+ {
+ vec_validate (nm->rx_buffers[thread_index],
+ VLIB_FRAME_SIZE + n_free_bufs - 1);
+ n_free_bufs +=
+ vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs],
+ VLIB_FRAME_SIZE);
+ vec_set_len (nm->rx_buffers[thread_index], n_free_bufs);
+ }
+
+ cur_ring = nif->first_rx_ring;
+ while (cur_ring <= nif->last_rx_ring && n_free_bufs)
+ {
+ int r = 0;
+ u32 cur_slot_index;
+ ring = NETMAP_RXRING (nif->nifp, cur_ring);
+ r = nm_ring_space (ring);
+
+ if (!r)
+ {
+ cur_ring++;
+ continue;
+ }
+
+ if (r > n_free_bufs)
+ r = n_free_bufs;
+
+ cur_slot_index = ring->cur;
+ while (r)
+ {
+ u32 n_left_to_next;
+ u32 next0 = next_index;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (r && n_left_to_next)
+ {
+ vlib_buffer_t *first_b0 = 0;
+ u32 offset = 0;
+ u32 bi0 = 0, first_bi0 = 0, prev_bi0;
+ u32 next_slot_index = (cur_slot_index + 1) % ring->num_slots;
+ u32 next2_slot_index = (cur_slot_index + 2) % ring->num_slots;
+ struct netmap_slot *slot = &ring->slot[cur_slot_index];
+ u32 data_len = slot->len;
+
+ /* prefetch 2 slots in advance */
+ CLIB_PREFETCH (&ring->slot[next2_slot_index],
+ CLIB_CACHE_LINE_BYTES, LOAD);
+ /* prefetch start of next packet */
+ CLIB_PREFETCH (NETMAP_BUF
+ (ring, ring->slot[next_slot_index].buf_idx),
+ CLIB_CACHE_LINE_BYTES, LOAD);
+
+ while (data_len && n_free_bufs)
+ {
+ vlib_buffer_t *b0;
+ /* grab free buffer */
+ u32 last_empty_buffer =
+ vec_len (nm->rx_buffers[thread_index]) - 1;
+ prev_bi0 = bi0;
+ bi0 = nm->rx_buffers[thread_index][last_empty_buffer];
+ b0 = vlib_get_buffer (vm, bi0);
+ vec_set_len (nm->rx_buffers[thread_index],
+ last_empty_buffer);
+ n_free_bufs--;
+
+ /* copy data */
+ u32 bytes_to_copy =
+ data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
+ b0->current_data = 0;
+ clib_memcpy_fast (vlib_buffer_get_current (b0),
+ (u8 *) NETMAP_BUF (ring, slot->buf_idx) +
+ offset, bytes_to_copy);
+
+ /* fill buffer header */
+ b0->current_length = bytes_to_copy;
+
+ if (offset == 0)
+ {
+ b0->total_length_not_including_first_buffer = 0;
+ b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ nif->sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ first_bi0 = bi0;
+ first_b0 = vlib_get_buffer (vm, first_bi0);
+ }
+ else
+ buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0);
+
+ offset += bytes_to_copy;
+ data_len -= bytes_to_copy;
+ }
+
+ /* trace */
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ if (PREDICT_TRUE (first_b0 != 0) &&
+ vlib_trace_buffer (vm, node, next0, first_b0,
+ /* follow_chain */ 0))
+ {
+ netmap_input_trace_t *tr;
+
+ vlib_set_trace_count (vm, node, --n_trace);
+ tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->hw_if_index = nif->hw_if_index;
+ memcpy (&tr->slot, slot, sizeof (struct netmap_slot));
+ }
+ }
+
+ /* enque and take next packet */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, first_bi0,
+ next0);
+
+ /* next packet */
+ n_rx_packets++;
+ n_rx_bytes += slot->len;
+ to_next[0] = first_bi0;
+ to_next += 1;
+ n_left_to_next--;
+ cur_slot_index = next_slot_index;
+
+ r--;
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ ring->head = ring->cur = cur_slot_index;
+ cur_ring++;
+ }
+
+ if (n_rx_packets)
+ ioctl (nif->fd, NIOCRXSYNC, NULL);
+
+ vlib_increment_combined_counter
+ (vnet_get_main ()->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ vlib_get_thread_index (), nif->hw_if_index, n_rx_packets, n_rx_bytes);
+
+ vnet_device_increment_rx_packets (thread_index, n_rx_packets);
+
+ return n_rx_packets;
+}
+
+VLIB_NODE_FN (netmap_input_node) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ int i;
+ u32 n_rx_packets = 0;
+ u32 thread_index = vm->thread_index;
+ netmap_main_t *nm = &netmap_main;
+ netmap_if_t *nmi;
+
+ for (i = 0; i < vec_len (nm->interfaces); i++)
+ {
+ nmi = vec_elt_at_index (nm->interfaces, i);
+ if (nmi->is_admin_up &&
+ (i % nm->input_cpu_count) ==
+ (thread_index - nm->input_cpu_first_index))
+ n_rx_packets += netmap_device_input_fn (vm, node, frame, nmi);
+ }
+
+ return n_rx_packets;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (netmap_input_node) = {
+ .name = "netmap-input",
+ .sibling_of = "device-input",
+ .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
+ .format_trace = format_netmap_input_trace,
+ .type = VLIB_NODE_TYPE_INPUT,
+ /* default state is INTERRUPT mode, switch to POLLING if worker threads are enabled */
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .n_errors = NETMAP_INPUT_N_ERROR,
+ .error_strings = netmap_input_error_strings,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/netmap/plugin.c b/src/plugins/netmap/plugin.c
new file mode 100644
index 00000000000..1673225b683
--- /dev/null
+++ b/src/plugins/netmap/plugin.c
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2024 Tom Jones <thj@freebsd.org>
+ *
+ * This software was developed by Tom Jones <thj@freebsd.org> under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "netmap",
+};
diff --git a/src/plugins/npt66/npt66.api b/src/plugins/npt66/npt66.api
index 63640ac2097..dab09cda31f 100644
--- a/src/plugins/npt66/npt66.api
+++ b/src/plugins/npt66/npt66.api
@@ -36,5 +36,16 @@ counters npt66 {
units "packets";
description "packet translation failed";
};
-
+ icmp6_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ICMP6 checksum validation failed";
+ };
+ icmp6_truncated {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ICMP6 packet truncated";
+ };
}; \ No newline at end of file
diff --git a/src/plugins/npt66/npt66_node.c b/src/plugins/npt66/npt66_node.c
index f74f9143998..0d0c475f2c3 100644
--- a/src/plugins/npt66/npt66_node.c
+++ b/src/plugins/npt66/npt66_node.c
@@ -127,10 +127,7 @@ npt66_translate (ip6_header_t *ip, npt66_binding_t *binding, int dir)
if (!ip6_prefix_cmp (ip->src_address, binding->internal,
binding->internal_plen))
{
- clib_warning (
- "npt66_translate: src address is not internal (%U -> %U)",
- format_ip6_address, &ip->src_address, format_ip6_address,
- &ip->dst_address);
+ /* Packet is not for us */
goto done;
}
ip->src_address = ip6_prefix_copy (ip->src_address, binding->external,
@@ -144,10 +141,7 @@ npt66_translate (ip6_header_t *ip, npt66_binding_t *binding, int dir)
if (!ip6_prefix_cmp (ip->dst_address, binding->external,
binding->external_plen))
{
- clib_warning (
- "npt66_translate: dst address is not external (%U -> %U)",
- format_ip6_address, &ip->src_address, format_ip6_address,
- &ip->dst_address);
+ /* Packet is not for us */
goto done;
}
ip->dst_address = ip6_prefix_copy (ip->dst_address, binding->internal,
@@ -162,7 +156,7 @@ done:
static int
npt66_icmp6_translate (vlib_buffer_t *b, ip6_header_t *outer_ip,
icmp46_header_t *icmp, npt66_binding_t *binding,
- int dir)
+ int dir, u32 *error)
{
ip6_header_t *ip = (ip6_header_t *) (icmp + 2);
int rv = 0;
@@ -171,7 +165,7 @@ npt66_icmp6_translate (vlib_buffer_t *b, ip6_header_t *outer_ip,
if (clib_net_to_host_u16 (outer_ip->payload_length) <
sizeof (icmp46_header_t) + 4 + sizeof (ip6_header_t))
{
- clib_warning ("ICMP6 payload too short");
+ *error = NPT66_ERROR_ICMP6_TRUNCATED;
return -1;
}
@@ -181,7 +175,7 @@ npt66_icmp6_translate (vlib_buffer_t *b, ip6_header_t *outer_ip,
sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, b, outer_ip, &bogus_length);
if (sum16 != 0 && sum16 != 0xffff)
{
- clib_warning ("ICMP6 checksum failed");
+ *error = NPT66_ERROR_ICMP6_CHECKSUM;
return -1;
}
if (dir == VLIB_RX)
@@ -189,10 +183,7 @@ npt66_icmp6_translate (vlib_buffer_t *b, ip6_header_t *outer_ip,
if (!ip6_prefix_cmp (ip->src_address, binding->external,
binding->external_plen))
{
- clib_warning (
- "npt66_icmp6_translate: src address is not internal (%U -> %U)",
- format_ip6_address, &ip->src_address, format_ip6_address,
- &ip->dst_address);
+ /* Not for us */
goto done;
}
ip->src_address = ip6_prefix_copy (ip->src_address, binding->internal,
@@ -206,10 +197,7 @@ npt66_icmp6_translate (vlib_buffer_t *b, ip6_header_t *outer_ip,
if (!ip6_prefix_cmp (ip->dst_address, binding->external,
binding->external_plen))
{
- clib_warning (
- "npt66_icmp6_translate: dst address is not external (%U -> %U)",
- format_ip6_address, &ip->src_address, format_ip6_address,
- &ip->dst_address);
+ /* Not for us */
goto done;
}
ip->dst_address = ip6_prefix_copy (ip->dst_address, binding->internal,
@@ -217,8 +205,8 @@ npt66_icmp6_translate (vlib_buffer_t *b, ip6_header_t *outer_ip,
rv = npt66_adjust_checksum (binding->internal_plen, false,
binding->delta, &ip->dst_address);
}
-done:
+done:
return rv;
}
@@ -243,10 +231,12 @@ npt66_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
n_left_from = frame->n_vectors;
vlib_get_buffers (vm, from, b, n_left_from);
npt66_binding_t *binding;
+ u32 translated = 0;
/* Stage 1: build vector of flow hash (based on lookup mask) */
while (n_left_from > 0)
{
+ u32 error = NPT66_ERROR_TRANSLATION;
u32 sw_if_index = vnet_buffer (b[0])->sw_if_index[dir];
u32 iph_offset =
dir == VLIB_TX ? vnet_buffer (b[0])->ip.save_rewrite_length : 0;
@@ -261,28 +251,26 @@ npt66_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
icmp46_header_t *icmp = (icmp46_header_t *) (ip + 1);
if (ip->protocol == IP_PROTOCOL_ICMP6 && icmp->type < 128)
{
- rv = npt66_icmp6_translate (b[0], ip, icmp, binding, dir);
+ rv = npt66_icmp6_translate (b[0], ip, icmp, binding, dir, &error);
if (rv < 0)
{
- clib_warning ("ICMP6 npt66_translate failed");
*next = NPT66_NEXT_DROP;
+ b[0]->error = node->errors[error];
goto next;
}
}
- rv = npt66_translate (ip, binding, dir);
+ rv = npt66_translate (ip, binding, dir);
if (rv < 0)
{
- vlib_node_increment_counter (vm, node->node_index,
- NPT66_ERROR_TRANSLATION, 1);
+ b[0]->error = node->errors[error];
*next = NPT66_NEXT_DROP;
goto next;
}
- else if (dir == VLIB_TX)
- vlib_node_increment_counter (vm, node->node_index, NPT66_ERROR_TX, 1);
else
- vlib_node_increment_counter (vm, node->node_index, NPT66_ERROR_RX, 1);
-
+ {
+ translated++;
+ }
next:
next += 1;
n_left_from -= 1;
@@ -321,6 +309,9 @@ npt66_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
break;
}
}
+ vlib_node_increment_counter (
+ vm, node->node_index, dir == VLIB_TX ? NPT66_ERROR_TX : NPT66_ERROR_RX,
+ translated);
vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
return frame->n_vectors;
@@ -338,17 +329,17 @@ VLIB_NODE_FN (npt66_output_node)
}
VLIB_REGISTER_NODE(npt66_input_node) = {
- .name = "npt66-input",
- .vector_size = sizeof(u32),
- .format_trace = format_npt66_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = NPT66_N_ERROR,
- .error_counters = npt66_error_counters,
- .n_next_nodes = NPT66_N_NEXT,
- .next_nodes =
- {
- [NPT66_NEXT_DROP] = "error-drop",
- },
+ .name = "npt66-input",
+ .vector_size = sizeof(u32),
+ .format_trace = format_npt66_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = NPT66_N_ERROR,
+ .error_counters = npt66_error_counters,
+ .n_next_nodes = NPT66_N_NEXT,
+ .next_nodes =
+ {
+ [NPT66_NEXT_DROP] = "error-drop",
+ },
};
VLIB_REGISTER_NODE (npt66_output_node) = {
diff --git a/src/plugins/builtinurl/CMakeLists.txt b/src/plugins/osi/CMakeLists.txt
index ddbca5e50f1..8ab014770ea 100644
--- a/src/plugins/builtinurl/CMakeLists.txt
+++ b/src/plugins/osi/CMakeLists.txt
@@ -1,5 +1,4 @@
-
-# Copyright (c) <current-year> <your-organization>
+# Copyright (c) 2023 Cisco and/or its affiliates
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -12,15 +11,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-add_vpp_plugin(builtinurl
- SOURCES
- builtins.c
- builtinurl.c
- builtinurl.h
+add_vpp_plugin(osi
- API_FILES
- builtinurl.api
+ SOURCES
+ osi.c
+ node.c
+ pg.c
+ plugin.c
- API_TEST_SOURCES
- builtinurl_test.c
+ INSTALL_HEADERS
+ osi.h
)
diff --git a/src/plugins/osi/FEATURE.yaml b/src/plugins/osi/FEATURE.yaml
new file mode 100644
index 00000000000..337be1c7146
--- /dev/null
+++ b/src/plugins/osi/FEATURE.yaml
@@ -0,0 +1,11 @@
+---
+name: OSI plugin
+maintainer:
+ - community <vpp-dev@lists.fd.io>
+features:
+ - Adds support for OSI protocols (SAP types)
+ - Registered as input protocol for PPP, HDLC, and LLC
+missing:
+ - No tests for this feature currently exist
+description: ""
+state: experimental
diff --git a/src/plugins/osi/node.c b/src/plugins/osi/node.c
new file mode 100644
index 00000000000..a36b1525e0e
--- /dev/null
+++ b/src/plugins/osi/node.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * osi_node.c: osi packet processing
+ *
+ * Copyright (c) 2010 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <osi/osi.h>
+#include <vnet/ppp/ppp.h>
+#include <vnet/hdlc/hdlc.h>
+#include <vnet/llc/llc.h>
+
+#define foreach_osi_input_next \
+ _ (PUNT, "error-punt") \
+ _ (DROP, "error-drop")
+
+typedef enum
+{
+#define _(s,n) OSI_INPUT_NEXT_##s,
+ foreach_osi_input_next
+#undef _
+ OSI_INPUT_N_NEXT,
+} osi_input_next_t;
+
+typedef struct
+{
+ u8 packet_data[32];
+} osi_input_trace_t;
+
+static u8 *
+format_osi_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ osi_input_trace_t *t = va_arg (*va, osi_input_trace_t *);
+
+ s = format (s, "%U", format_osi_header, t->packet_data);
+
+ return s;
+}
+
+static uword
+osi_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ osi_main_t *lm = &osi_main;
+ u32 n_left_from, next_index, *from, *to_next;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node,
+ from,
+ n_left_from,
+ sizeof (from[0]),
+ sizeof (osi_input_trace_t));
+
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ osi_header_t *h0, *h1;
+ u8 next0, next1, enqueue_code;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *b2, *b3;
+
+ b2 = vlib_get_buffer (vm, from[2]);
+ b3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (b2, LOAD);
+ vlib_prefetch_buffer_header (b3, LOAD);
+
+ CLIB_PREFETCH (b2->data, sizeof (h0[0]), LOAD);
+ CLIB_PREFETCH (b3->data, sizeof (h1[0]), LOAD);
+ }
+
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ h0 = vlib_buffer_get_current (b0);
+ h1 = vlib_buffer_get_current (b1);
+
+ next0 = lm->input_next_by_protocol[h0->protocol];
+ next1 = lm->input_next_by_protocol[h1->protocol];
+
+ b0->error =
+ node->errors[next0 ==
+ OSI_INPUT_NEXT_DROP ? OSI_ERROR_UNKNOWN_PROTOCOL :
+ OSI_ERROR_NONE];
+ b1->error =
+ node->errors[next1 ==
+ OSI_INPUT_NEXT_DROP ? OSI_ERROR_UNKNOWN_PROTOCOL :
+ OSI_ERROR_NONE];
+
+ enqueue_code = (next0 != next_index) + 2 * (next1 != next_index);
+
+ if (PREDICT_FALSE (enqueue_code != 0))
+ {
+ switch (enqueue_code)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = bi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next1, bi1);
+ break;
+
+ case 3:
+ /* A B B or A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+ vlib_set_next_frame_buffer (vm, node, next1, bi1);
+ if (next0 == next1)
+ {
+ vlib_put_next_frame (vm, node, next_index,
+ n_left_to_next);
+ next_index = next1;
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ }
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ osi_header_t *h0;
+ u8 next0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ h0 = vlib_buffer_get_current (b0);
+
+ next0 = lm->input_next_by_protocol[h0->protocol];
+
+ b0->error =
+ node->errors[next0 ==
+ OSI_INPUT_NEXT_DROP ? OSI_ERROR_UNKNOWN_PROTOCOL :
+ OSI_ERROR_NONE];
+
+ /* Sent packet to wrong next? */
+ if (PREDICT_FALSE (next0 != next_index))
+ {
+ /* Return old frame; remove incorrectly enqueued packet. */
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1);
+
+ /* Send to correct next. */
+ next_index = next0;
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static char *osi_error_strings[] = {
+#define _(f,s) s,
+ foreach_osi_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (osi_input_node) = {
+ .function = osi_input,
+ .name = "osi-input",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = OSI_N_ERROR,
+ .error_strings = osi_error_strings,
+
+ .n_next_nodes = OSI_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [OSI_INPUT_NEXT_##s] = n,
+ foreach_osi_input_next
+#undef _
+ },
+
+ .format_buffer = format_osi_header_with_length,
+ .format_trace = format_osi_input_trace,
+ .unformat_buffer = unformat_osi_header,
+};
+
+static void
+osi_setup_node (vlib_main_t *vm, u32 node_index)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ pg_node_t *pn = pg_get_node (node_index);
+
+ n->format_buffer = format_osi_header_with_length;
+ n->unformat_buffer = unformat_osi_header;
+ pn->unformat_edit = unformat_pg_osi_header;
+}
+
+static clib_error_t *
+osi_input_init (vlib_main_t * vm)
+{
+ clib_error_t *error = 0;
+ osi_main_t *lm = &osi_main;
+
+ if ((error = vlib_call_init_function (vm, osi_init)))
+ return error;
+
+ osi_setup_node (vm, osi_input_node.index);
+
+ {
+ int i;
+ for (i = 0; i < ARRAY_LEN (lm->input_next_by_protocol); i++)
+ lm->input_next_by_protocol[i] = OSI_INPUT_NEXT_DROP;
+ }
+
+ ppp_register_input_protocol (vm, PPP_PROTOCOL_osi, osi_input_node.index);
+ hdlc_register_input_protocol (vm, HDLC_PROTOCOL_osi, osi_input_node.index);
+ llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer1,
+ osi_input_node.index);
+ llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer2,
+ osi_input_node.index);
+ llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer3,
+ osi_input_node.index);
+ llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer4,
+ osi_input_node.index);
+ llc_register_input_protocol (vm, LLC_PROTOCOL_osi_layer5,
+ osi_input_node.index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (osi_input_init);
+
+void
+osi_register_input_protocol (osi_protocol_t protocol, u32 node_index)
+{
+ osi_main_t *lm = &osi_main;
+ vlib_main_t *vm = lm->vlib_main;
+ osi_protocol_info_t *pi;
+
+ {
+ clib_error_t *error = vlib_call_init_function (vm, osi_input_init);
+ if (error)
+ clib_error_report (error);
+ }
+
+ pi = osi_get_protocol_info (lm, protocol);
+ pi->node_index = node_index;
+ pi->next_index = vlib_node_add_next (vm, osi_input_node.index, node_index);
+
+ lm->input_next_by_protocol[protocol] = pi->next_index;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/osi/osi.c b/src/plugins/osi/osi.c
new file mode 100644
index 00000000000..67c7053f388
--- /dev/null
+++ b/src/plugins/osi/osi.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * osi.c: osi support
+ *
+ * Copyright (c) 2010 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <osi/osi.h>
+
+/* Global main structure. */
+osi_main_t osi_main;
+
+u8 *
+format_osi_protocol (u8 * s, va_list * args)
+{
+ osi_protocol_t p = va_arg (*args, u32);
+ osi_main_t *pm = &osi_main;
+ osi_protocol_info_t *pi = osi_get_protocol_info (pm, p);
+
+ if (pi)
+ s = format (s, "%s", pi->name);
+ else
+ s = format (s, "0x%02x", p);
+
+ return s;
+}
+
+u8 *
+format_osi_header_with_length (u8 * s, va_list * args)
+{
+ osi_main_t *pm = &osi_main;
+ osi_header_t *h = va_arg (*args, osi_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ osi_protocol_t p = h->protocol;
+ u32 indent, header_bytes;
+
+ header_bytes = sizeof (h[0]);
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ return format (s, "osi header truncated");
+
+ indent = format_get_indent (s);
+
+ s = format (s, "OSI %U", format_osi_protocol, p);
+
+ if (max_header_bytes != 0 && header_bytes > max_header_bytes)
+ {
+ osi_protocol_info_t *pi = osi_get_protocol_info (pm, p);
+ vlib_node_t *node = vlib_get_node (pm->vlib_main, pi->node_index);
+ if (node->format_buffer)
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ node->format_buffer, (void *) (h + 1),
+ max_header_bytes - header_bytes);
+ }
+
+ return s;
+}
+
+u8 *
+format_osi_header (u8 * s, va_list * args)
+{
+ osi_header_t *h = va_arg (*args, osi_header_t *);
+ return format (s, "%U", format_osi_header_with_length, h, 0);
+}
+
+/* Returns osi protocol as an int in host byte order. */
+uword
+unformat_osi_protocol (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ osi_main_t *pm = &osi_main;
+ int p, i;
+
+ /* Numeric type. */
+ if (unformat (input, "0x%x", &p) || unformat (input, "%d", &p))
+ {
+ if (p >= (1 << 8))
+ return 0;
+ *result = p;
+ return 1;
+ }
+
+ /* Named type. */
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ pm->protocol_info_by_name, &i))
+ {
+ osi_protocol_info_t *pi = vec_elt_at_index (pm->protocol_infos, i);
+ *result = pi->protocol;
+ return 1;
+ }
+
+ return 0;
+}
+
+uword
+unformat_osi_header (unformat_input_t * input, va_list * args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ osi_header_t _h, *h = &_h;
+ u8 p;
+
+ if (!unformat (input, "%U", unformat_osi_protocol, &p))
+ return 0;
+
+ h->protocol = p;
+
+ /* Add header to result. */
+ {
+ void *p;
+ u32 n_bytes = sizeof (h[0]);
+
+ vec_add2 (*result, p, n_bytes);
+ clib_memcpy (p, h, n_bytes);
+ }
+
+ return 1;
+}
+
+static void
+add_protocol (osi_main_t * pm, osi_protocol_t protocol, char *protocol_name)
+{
+ osi_protocol_info_t *pi;
+ u32 i;
+
+ vec_add2 (pm->protocol_infos, pi, 1);
+ i = pi - pm->protocol_infos;
+
+ pi->name = protocol_name;
+ pi->protocol = protocol;
+ pi->next_index = pi->node_index = ~0;
+
+ hash_set (pm->protocol_info_by_protocol, protocol, i);
+ hash_set_mem (pm->protocol_info_by_name, pi->name, i);
+}
+
+static clib_error_t *
+osi_init (vlib_main_t * vm)
+{
+ osi_main_t *pm = &osi_main;
+
+ clib_memset (pm, 0, sizeof (pm[0]));
+ pm->vlib_main = vm;
+
+ pm->protocol_info_by_name = hash_create_string (0, sizeof (uword));
+ pm->protocol_info_by_protocol = hash_create (0, sizeof (uword));
+
+#define _(f,n) add_protocol (pm, OSI_PROTOCOL_##f, #f);
+ foreach_osi_protocol;
+#undef _
+
+ return vlib_call_init_function (vm, osi_input_init);
+}
+
+/* init order dependency: llc_init -> osi_init -> snap_init*/
+/* Otherwise, osi_input_init will wipe out e.g. the snap init */
+VLIB_INIT_FUNCTION (osi_init) = {
+ .init_order = VLIB_INITS ("llc_init", "osi_init", "snap_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/osi/osi.h b/src/plugins/osi/osi.h
new file mode 100644
index 00000000000..fb248ed9cc5
--- /dev/null
+++ b/src/plugins/osi/osi.h
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * osi.h: OSI definitions
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_osi_h
+#define included_osi_h
+
+#include <vnet/vnet.h>
+
+#define foreach_osi_protocol \
+ _ (null, 0x0) \
+ _ (x_29, 0x01) \
+ _ (x_633, 0x03) \
+ _ (q_931, 0x08) \
+ _ (q_933, 0x08) \
+ _ (q_2931, 0x09) \
+ _ (q_2119, 0x0c) \
+ _ (snap, 0x80) \
+ _ (clnp, 0x81) \
+ _ (esis, 0x82) \
+ _ (isis, 0x83) \
+ _ (idrp, 0x85) \
+ _ (x25_esis, 0x8a) \
+ _ (iso10030, 0x8c) \
+ _ (iso11577, 0x8d) \
+ _ (ip6, 0x8e) \
+ _ (compressed, 0xb0) \
+ _ (sndcf, 0xc1) \
+ _ (ip4, 0xcc) \
+ _ (ppp, 0xcf)
+
+typedef enum
+{
+#define _(f,n) OSI_PROTOCOL_##f = n,
+ foreach_osi_protocol
+#undef _
+} osi_protocol_t;
+
+typedef struct
+{
+ u8 protocol;
+
+ u8 payload[0];
+} osi_header_t;
+
+typedef struct
+{
+ /* Name (a c string). */
+ char *name;
+
+ /* OSI protocol (SAP type). */
+ osi_protocol_t protocol;
+
+ /* Node which handles this type. */
+ u32 node_index;
+
+ /* Next index for this type. */
+ u32 next_index;
+} osi_protocol_info_t;
+
+#define foreach_osi_error \
+ _ (NONE, "no error") \
+ _ (UNKNOWN_PROTOCOL, "unknown osi protocol")
+
+typedef enum
+{
+#define _(f,s) OSI_ERROR_##f,
+ foreach_osi_error
+#undef _
+ OSI_N_ERROR,
+} osi_error_t;
+
+typedef struct
+{
+ vlib_main_t *vlib_main;
+
+ osi_protocol_info_t *protocol_infos;
+
+ /* Hash tables mapping name/protocol to protocol info index. */
+ uword *protocol_info_by_name, *protocol_info_by_protocol;
+
+ /* osi-input next index indexed by protocol. */
+ u8 input_next_by_protocol[256];
+} osi_main_t;
+
+always_inline osi_protocol_info_t *
+osi_get_protocol_info (osi_main_t * m, osi_protocol_t protocol)
+{
+ uword *p = hash_get (m->protocol_info_by_protocol, protocol);
+ return p ? vec_elt_at_index (m->protocol_infos, p[0]) : 0;
+}
+
+extern osi_main_t osi_main;
+
+/* Register given node index to take input for given osi type. */
+void osi_register_input_protocol (osi_protocol_t protocol, u32 node_index);
+
+format_function_t format_osi_protocol;
+format_function_t format_osi_header;
+format_function_t format_osi_header_with_length;
+
+/* Parse osi protocol as 0xXXXX or protocol name. */
+unformat_function_t unformat_osi_protocol;
+
+/* Parse osi header. */
+unformat_function_t unformat_osi_header;
+unformat_function_t unformat_pg_osi_header;
+
+void osi_register_input_protocol (osi_protocol_t protocol, u32 node_index);
+
+format_function_t format_osi_header;
+
+#endif /* included_osi_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/osi/pg.c b/src/plugins/osi/pg.c
new file mode 100644
index 00000000000..3bac693c127
--- /dev/null
+++ b/src/plugins/osi/pg.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * osi_pg.c: packet generator osi interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <osi/osi.h>
+
+typedef struct
+{
+ pg_edit_t protocol;
+} pg_osi_header_t;
+
+static inline void
+pg_osi_header_init (pg_osi_header_t * e)
+{
+ pg_edit_init (&e->protocol, osi_header_t, protocol);
+}
+
+uword
+unformat_pg_osi_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_osi_header_t *h;
+ u32 group_index, error;
+
+ h = pg_create_edit_group (s, sizeof (h[0]), sizeof (osi_header_t),
+ &group_index);
+ pg_osi_header_init (h);
+
+ error = 1;
+ if (!unformat (input, "%U",
+ unformat_pg_edit, unformat_osi_protocol, &h->protocol))
+ goto done;
+
+ {
+ osi_main_t *pm = &osi_main;
+ osi_protocol_info_t *pi = 0;
+ pg_node_t *pg_node = 0;
+
+ if (h->protocol.type == PG_EDIT_FIXED)
+ {
+ u8 t = *h->protocol.values[PG_EDIT_LO];
+ pi = osi_get_protocol_info (pm, t);
+ if (pi && pi->node_index != ~0)
+ pg_node = pg_get_node (pi->node_index);
+ }
+
+ if (pg_node && pg_node->unformat_edit
+ && unformat_user (input, pg_node->unformat_edit, s))
+ ;
+
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto done;
+ }
+
+ error = 0;
+done:
+ if (error)
+ pg_free_edit_group (s);
+ return error == 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/osi/plugin.c b/src/plugins/osi/plugin.c
new file mode 100644
index 00000000000..5fc412e093e
--- /dev/null
+++ b/src/plugins/osi/plugin.c
@@ -0,0 +1,23 @@
+/*
+ * plugin.c: osi
+ *
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "OSI plugin",
+}; \ No newline at end of file
diff --git a/src/plugins/prom/prom.c b/src/plugins/prom/prom.c
index 934e8480d3c..475e98b1038 100644
--- a/src/plugins/prom/prom.c
+++ b/src/plugins/prom/prom.c
@@ -191,6 +191,7 @@ send_data_to_hss (hss_session_handle_t sh)
args.sh = sh;
args.data = vec_dup (pm->stats);
args.data_len = vec_len (pm->stats);
+ args.ct = HTTP_CONTENT_TEXT_PLAIN;
args.sc = HTTP_STATUS_OK;
args.free_vec_data = 1;
@@ -207,7 +208,7 @@ static uword
prom_scraper_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
vlib_frame_t *f)
{
- uword *event_data = 0, event_type;
+ uword *event_data = 0, event_type, *sh_as_uword;
prom_main_t *pm = &prom_main;
hss_session_handle_t sh;
f64 timeout = 10000.0;
@@ -222,12 +223,15 @@ prom_scraper_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
/* timeout, do nothing */
break;
case PROM_SCRAPER_EVT_RUN:
- sh.as_u64 = event_data[0];
vec_reset_length (pm->stats);
pm->stats = scrape_stats_segment (pm->stats, pm->stats_patterns,
pm->used_only);
- session_send_rpc_evt_to_thread_force (sh.thread_index,
- send_data_to_hss_rpc, &sh);
+ vec_foreach (sh_as_uword, event_data)
+ {
+ sh.as_u64 = (u64) *sh_as_uword;
+ session_send_rpc_evt_to_thread_force (
+ sh.thread_index, send_data_to_hss_rpc, sh_as_uword);
+ }
pm->last_scrape = vlib_time_now (vm);
break;
default:
diff --git a/src/plugins/pvti/CMakeLists.txt b/src/plugins/pvti/CMakeLists.txt
new file mode 100644
index 00000000000..900b662d54a
--- /dev/null
+++ b/src/plugins/pvti/CMakeLists.txt
@@ -0,0 +1,40 @@
+# Copyright (c) 2024 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(pvti
+ SOURCES
+ pvti_if.c
+ pvti.c
+ input.h
+ input.c
+ input-main.c
+ output.h
+ output.c
+ output-main.c
+ bypass.h
+ bypass.c
+ bypass-main.c
+ api.c
+ pvti.h
+
+ MULTIARCH_SOURCES
+ input.c
+ output.c
+ bypass.c
+
+ API_FILES
+ pvti.api
+
+ # API_TEST_SOURCES
+ # pvti_test.c
+)
diff --git a/src/plugins/pvti/FEATURE.yaml b/src/plugins/pvti/FEATURE.yaml
new file mode 100644
index 00000000000..52dbe5b7c1b
--- /dev/null
+++ b/src/plugins/pvti/FEATURE.yaml
@@ -0,0 +1,8 @@
+---
+name: Packet Vector Tunnel
+maintainer: Andrew Yourtchenko <ayourtch@gmail.com>
+features:
+ - support inner MTU up to ~8K over standard 1280..1500 MTU substrate
+description: "Large MTU Tunnels"
+state: development
+properties: [API, CLI]
diff --git a/src/plugins/pvti/api.c b/src/plugins/pvti/api.c
new file mode 100644
index 00000000000..cda39ad44e8
--- /dev/null
+++ b/src/plugins/pvti/api.c
@@ -0,0 +1,137 @@
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/format_fns.h>
+#include <vnet/ip/ip_types_api.h>
+#include <vlibapi/api.h>
+
+#include <pvti/pvti.api_enum.h>
+#include <pvti/pvti.api_types.h>
+
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+
+#define REPLY_MSG_ID_BASE pvm->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+typedef struct
+{
+ vl_api_registration_t *reg;
+ u32 context;
+} pvti_if_details_ctx_t;
+
+typedef struct
+{
+
+} pvti_interface_dump_ctx_t;
+
+static walk_rc_t
+pvti_if_send_details (index_t pvtii, void *data)
+{
+ vl_api_pvti_interface_details_t *rmp;
+ pvti_if_details_ctx_t *ctx = data;
+ const pvti_if_t *pvi;
+
+ pvi = pvti_if_get (pvtii);
+
+ rmp = vl_msg_api_alloc_zero (sizeof (*rmp));
+ rmp->_vl_msg_id =
+ htons (VL_API_PVTI_INTERFACE_DETAILS + pvti_main.msg_id_base);
+
+ rmp->interface.sw_if_index = htonl (pvi->sw_if_index);
+ rmp->interface.local_port = htons (pvi->local_port);
+ rmp->interface.remote_port = htons (pvi->remote_port);
+ rmp->interface.underlay_mtu = htons (pvi->underlay_mtu);
+
+ ip_address_encode2 (&pvi->local_ip, &rmp->interface.local_ip);
+ ip_address_encode2 (&pvi->remote_ip, &rmp->interface.remote_ip);
+
+ rmp->context = ctx->context;
+
+ vl_api_send_msg (ctx->reg, (u8 *) rmp);
+
+ return (WALK_CONTINUE);
+}
+
+static void
+vl_api_pvti_interface_dump_t_handler (vl_api_pvti_interface_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ // pvti_main_t *pvm = &pvti_main;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (reg == 0)
+ return;
+
+ pvti_if_details_ctx_t ctx = {
+ .reg = reg,
+ .context = mp->context,
+ };
+
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ if (sw_if_index == ~0)
+ pvti_if_walk (pvti_if_send_details, &ctx);
+ else
+ {
+ index_t pvtii = pvti_if_find_by_sw_if_index (sw_if_index);
+ if (pvtii != INDEX_INVALID)
+ pvti_if_send_details (pvtii, &ctx);
+ }
+}
+
+static void
+vl_api_pvti_interface_create_t_handler (vl_api_pvti_interface_create_t *mp)
+{
+ vl_api_pvti_interface_create_reply_t *rmp;
+ pvti_main_t *pvm = &pvti_main;
+ int rv = ~0;
+ u32 sw_if_index = ~0;
+ ip_address_t local_ip;
+ ip_address_t remote_ip;
+
+ ip_address_decode2 (&mp->interface.local_ip, &local_ip);
+ ip_address_decode2 (&mp->interface.remote_ip, &remote_ip);
+ u16 lport = clib_host_to_net_u16 (mp->interface.local_port);
+ u16 rport = clib_host_to_net_u16 (mp->interface.remote_port);
+ u16 underlay_mtu = clib_host_to_net_u16 (mp->interface.underlay_mtu);
+ u32 underlay_fib_index =
+ clib_host_to_net_u32 (mp->interface.underlay_fib_index);
+ pvti_peer_address_method_t peer_address_method =
+ mp->interface.peer_address_from_payload ? PVTI_PEER_ADDRESS_FROM_PAYLOAD :
+ PVTI_PEER_ADDRESS_FIXED;
+
+ if (underlay_mtu == 0)
+ {
+ underlay_mtu = 1500;
+ }
+
+ rv =
+ pvti_if_create (&local_ip, lport, &remote_ip, rport, peer_address_method,
+ underlay_mtu, underlay_fib_index, &sw_if_index);
+
+ REPLY_MACRO2 (VL_API_PVTI_INTERFACE_CREATE_REPLY,
+ { rmp->sw_if_index = htonl (sw_if_index); });
+}
+
+static void
+vl_api_pvti_interface_delete_t_handler (vl_api_pvti_interface_delete_t *mp)
+{
+ vl_api_pvti_interface_delete_reply_t *rmp;
+ pvti_main_t *pvm = &pvti_main;
+ int rv = 0;
+
+ rv = pvti_if_delete (ntohl (mp->sw_if_index));
+ REPLY_MACRO (VL_API_PVTI_INTERFACE_DELETE_REPLY);
+}
+
+/* API definitions */
+#include <pvti/pvti.api.c>
+
+void
+pvti_api_init ()
+{
+ pvti_main_t *pvm = &pvti_main;
+ /* Add our API messages to the global name_crc hash table */
+ pvm->msg_id_base = setup_message_id_table ();
+}
diff --git a/src/plugins/pvti/bypass-main.c b/src/plugins/pvti/bypass-main.c
new file mode 100644
index 00000000000..db79ccd2113
--- /dev/null
+++ b/src/plugins/pvti/bypass-main.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pvti/bypass.h>
+
+/* packet trace format function */
+static u8 *
+format_pvti_bypass_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ pvti_bypass_trace_t *t = va_arg (*args, pvti_bypass_trace_t *);
+
+ s = format (s, "PVTI-BYPASS: sw_if_index %d, next index %d\n",
+ t->sw_if_index, t->next_index);
+ s = format (s, " src %U sport %d dport %d\n", format_ip_address,
+ &t->remote_ip, t->remote_port, t->local_port);
+ s = format (s, " seq: %d", t->seq);
+ return s;
+}
+
+vlib_node_registration_t pvti4_bypass_node;
+vlib_node_registration_t pvti6_bypass_node;
+
+static char *pvti_bypass_error_strings[] = {
+#define _(sym, string) string,
+ foreach_pvti_bypass_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (pvti4_bypass_node) =
+{
+ .name = "ip4-pvti-bypass",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_bypass_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(pvti_bypass_error_strings),
+ .error_strings = pvti_bypass_error_strings,
+
+ .n_next_nodes = PVTI_BYPASS_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_BYPASS_NEXT_DROP] = "error-drop",
+ [PVTI_BYPASS_NEXT_PVTI_INPUT] = "pvti4-input",
+ },
+
+};
+
+VLIB_REGISTER_NODE (pvti6_bypass_node) =
+{
+ .name = "ip6-pvti-bypass",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_bypass_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(pvti_bypass_error_strings),
+ .error_strings = pvti_bypass_error_strings,
+
+ .n_next_nodes = PVTI_BYPASS_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_BYPASS_NEXT_DROP] = "error-drop",
+ [PVTI_BYPASS_NEXT_PVTI_INPUT] = "pvti6-input",
+ },
+
+};
diff --git a/src/plugins/pvti/bypass.c b/src/plugins/pvti/bypass.c
new file mode 100644
index 00000000000..14c976439eb
--- /dev/null
+++ b/src/plugins/pvti/bypass.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+#include <pvti/bypass.h>
+
+always_inline u16
+pvti_bypass_node_common (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_ip6)
+{
+ u32 n_left_from, *from, *to_next;
+ pvti_bypass_next_t next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+
+ u32 pkts_processed = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 sw_if_index0 = 0;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ udp_header_t *udp0;
+ u32 bi0, ip_len0, udp_len0, flags0, next0;
+ u8 error0, good_udp0, proto0;
+ i32 len_diff0;
+
+ bi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ /* setup the packet for the next feature */
+ vnet_feature_next (&next0, b0);
+
+ if (is_ip6)
+ {
+ ip60 = vlib_buffer_get_current (b0);
+ }
+ else
+ {
+ ip40 = vlib_buffer_get_current (b0);
+ }
+
+ if (is_ip6)
+ {
+ proto0 = ip60->protocol;
+ }
+ else
+ {
+ /* Treat IP frag packets as "experimental" protocol for now */
+ proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol;
+ }
+
+ /* Process packet 0 */
+ if (proto0 != IP_PROTOCOL_UDP)
+ goto exit; /* not UDP packet */
+
+ if (is_ip6)
+ udp0 = ip6_next_header (ip60);
+ else
+ udp0 = ip4_next_header (ip40);
+
+ /* look up the destination ip and port */
+ u32 pvti_index0 = INDEX_INVALID;
+ if (is_ip6)
+ {
+ pvti_index0 = pvti_if_find_by_remote_ip6_and_port (
+ &ip60->src_address, clib_net_to_host_u16 (udp0->src_port));
+ }
+ else
+ {
+ pvti_index0 = pvti_if_find_by_remote_ip4_and_port (
+ &ip40->src_address, clib_net_to_host_u16 (udp0->src_port));
+ }
+ if (pvti_index0 == INDEX_INVALID)
+ goto exit;
+
+ flags0 = b0->flags;
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ /* Don't verify UDP checksum for packets with explicit zero checksum.
+ */
+ good_udp0 |= udp0->checksum == 0;
+
+ /* Verify UDP length */
+ if (is_ip6)
+ ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
+ else
+ ip_len0 = clib_net_to_host_u16 (ip40->length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+
+ /* Verify UDP checksum */
+ if (PREDICT_FALSE (!good_udp0))
+ {
+ if (is_ip6)
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
+ else
+ flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
+ good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+
+ if (is_ip6)
+ {
+ error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
+ }
+ else
+ {
+ error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
+ error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
+ }
+
+ next0 = error0 ? PVTI_BYPASS_NEXT_DROP : PVTI_BYPASS_NEXT_PVTI_INPUT;
+ b0->error = error0 ? error_node->errors[error0] : 0;
+
+ /* pvtiX-input node expect current at PVTI header */
+ if (is_ip6)
+ vlib_buffer_advance (b0, sizeof (ip6_header_t) +
+ sizeof (udp_header_t));
+ else
+ vlib_buffer_advance (b0, sizeof (ip4_header_t) +
+ sizeof (udp_header_t));
+ exit:
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ pvti_bypass_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->next_index = next0;
+ t->seq = 0; // clib_net_to_host_u32 (pvti0->seq);
+ if (is_ip6)
+ {
+ }
+ else
+ {
+ t->remote_ip.ip.ip4 = ip40->src_address;
+ t->remote_ip.version = AF_IP4;
+ }
+ // t->local_port = h0->udp.dst_port;
+ // t->remote_port = h0->udp.src_port;
+ }
+
+ pkts_processed += 1;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_BYPASS_ERROR_PROCESSED, pkts_processed);
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (pvti4_bypass_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_bypass_node_common (vm, node, frame, 0);
+}
+
+VLIB_NODE_FN (pvti6_bypass_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_bypass_node_common (vm, node, frame, 1);
+}
diff --git a/src/plugins/pvti/bypass.h b/src/plugins/pvti/bypass.h
new file mode 100644
index 00000000000..611d5770ad3
--- /dev/null
+++ b/src/plugins/pvti/bypass.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_pvti_bypass_h__
+#define __included_pvti_bypass_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ ip_address_t remote_ip;
+ u16 remote_port;
+ u16 local_port;
+ u32 seq;
+} pvti_bypass_trace_t;
+
+#define foreach_pvti_bypass_error \
+ _ (PROCESSED, "PVTI bypass tunnel packets processed")
+
+typedef enum
+{
+#define _(sym, str) PVTI_BYPASS_ERROR_##sym,
+ foreach_pvti_bypass_error
+#undef _
+ PVTI_BYPASS_N_ERROR,
+} pvti_bypass_error_t;
+
+typedef enum
+{
+ PVTI_BYPASS_NEXT_DROP,
+ PVTI_BYPASS_NEXT_PVTI_INPUT,
+ PVTI_BYPASS_N_NEXT,
+} pvti_bypass_next_t;
+
+#endif // pvti_bypass_h
diff --git a/src/plugins/pvti/input-main.c b/src/plugins/pvti/input-main.c
new file mode 100644
index 00000000000..8ab8b18dd7c
--- /dev/null
+++ b/src/plugins/pvti/input-main.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pvti/input.h>
+
+static char *pvti_input_error_strings[] = {
+#define _(sym, string) string,
+ foreach_pvti_input_error
+#undef _
+};
+
+#define _(f, s) s,
+static char *pvti_input_trace_type_names[] = { foreach_pvti_input_trace_type };
+#undef _
+
+static char *
+get_pvti_trace_type_name (u8 ptype)
+{
+ if (ptype < PVTI_INPUT_TRACE_N_TYPES)
+ {
+ return pvti_input_trace_type_names[ptype];
+ }
+ else
+ {
+ return "unknown";
+ }
+}
+
+/* packet trace format function */
+static u8 *
+format_pvti_input_trace (u8 *s, va_list *args)
+{
+ int i;
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ pvti_input_trace_t *t = va_arg (*args, pvti_input_trace_t *);
+
+ u32 indent = format_get_indent (s);
+
+ s = format (s,
+ "PVTI-IN: sw_if_index %d, next index %d, trace_type: %s(%d), "
+ "chunkcnt: %d\n",
+ t->sw_if_index, t->next_index,
+ get_pvti_trace_type_name (t->trace_type), t->trace_type,
+ t->chunk_count);
+ s = format (s, " src %U sport %d dport %d\n", format_ip_address,
+ &t->remote_ip, t->remote_port, t->local_port);
+ s = format (s, " seq: %d, chunk_count: %d\n", t->seq, t->chunk_count);
+ u16 max = t->chunk_count > MAX_CHUNKS ? MAX_CHUNKS : t->chunk_count;
+ for (i = 0; i < max; i++)
+ {
+ s = format (s, " %02d: sz %d\n", i, t->chunks[i].total_chunk_length);
+ }
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_ip_adjacency_packet_data, t->packet_data,
+ sizeof (t->packet_data));
+
+ return s;
+}
+
+vlib_node_registration_t pvti4_input_node;
+vlib_node_registration_t pvti6_input_node;
+
+VLIB_REGISTER_NODE (pvti4_input_node) =
+{
+ .name = "pvti4-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(pvti_input_error_strings),
+ .error_strings = pvti_input_error_strings,
+
+ .n_next_nodes = PVTI_INPUT_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_INPUT_NEXT_DROP] = "error-drop",
+ [PVTI_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+ [PVTI_INPUT_NEXT_IP6_INPUT] = "ip6-input",
+ [PVTI_INPUT_NEXT_PUNT] = "error-punt",
+ },
+
+};
+VLIB_REGISTER_NODE (pvti6_input_node) =
+{
+ .name = "pvti6-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = ARRAY_LEN(pvti_input_error_strings),
+ .error_strings = pvti_input_error_strings,
+
+ .n_next_nodes = PVTI_INPUT_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_INPUT_NEXT_DROP] = "error-drop",
+ [PVTI_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+ [PVTI_INPUT_NEXT_IP6_INPUT] = "ip6-input",
+ [PVTI_INPUT_NEXT_PUNT] = "error-punt",
+ },
+
+};
diff --git a/src/plugins/pvti/input.c b/src/plugins/pvti/input.c
new file mode 100644
index 00000000000..6a8806e2795
--- /dev/null
+++ b/src/plugins/pvti/input.c
@@ -0,0 +1,496 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+#include <pvti/input.h>
+
+always_inline void
+pvti_enqueue_rx_bi_to_next_and_trace (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ pvti_per_thread_data_t *ptd, u32 bi0,
+ u16 next0)
+{
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+
+ if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next0, b0,
+ /* follow_chain */ 0)))
+ {
+ pvti_input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ t->trace_type = PVTI_INPUT_TRACE_decap;
+ clib_memcpy (t->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t->packet_data));
+ }
+ vec_add1 (ptd->pending_rx_buffers, bi0);
+ vec_add1 (ptd->pending_rx_nexts, next0);
+}
+
+always_inline pvti_rx_peer_t *
+pvti_try_find_or_create_rx_peer (pvti_per_thread_data_t *ptd,
+ vlib_buffer_t *b0, bool is_ip6)
+{
+ pvti_rx_peer_t *peer;
+
+ ip_address_t remote_ip = { 0 };
+ u16 remote_port;
+ if (is_ip6)
+ {
+ pvti_ip6_encap_header_t *h0 =
+ ((pvti_ip6_encap_header_t *) vlib_buffer_get_current (b0)) - 1;
+ ip_address_set (&remote_ip, &h0->ip6.src_address, AF_IP6);
+ remote_port = clib_net_to_host_u16 (h0->udp.src_port);
+ }
+ else
+ {
+ pvti_ip4_encap_header_t *h0 =
+ ((pvti_ip4_encap_header_t *) vlib_buffer_get_current (b0)) - 1;
+ ip_address_set (&remote_ip, &h0->ip4.src_address, AF_IP4);
+ remote_port = clib_net_to_host_u16 (h0->udp.src_port);
+ }
+
+ pool_foreach (peer, ptd->rx_peers)
+ {
+ if (peer->remote_port == remote_port &&
+ 0 == ip_address_cmp (&remote_ip, &peer->remote_ip))
+ {
+ if (peer->deleted)
+ {
+ // The peer has been marked as deleted - wipe it.
+ clib_memset (peer, 0xca, sizeof (*peer));
+ pool_put (ptd->rx_peers, peer);
+ continue;
+ }
+ return peer;
+ }
+ }
+
+ index_t pvti_if_index0 =
+ pvti_if_find_by_remote_ip_and_port (&remote_ip, remote_port);
+ if (INDEX_INVALID == pvti_if_index0)
+ {
+ // no suitable interface found, bail
+ return 0;
+ }
+ pvti_if_t *pvti_if0 = pvti_if_get (pvti_if_index0);
+
+ pvti_rx_peer_t new_peer = {
+ .local_ip = pvti_if0->local_ip,
+ .local_port = pvti_if0->local_port,
+ .remote_ip = remote_ip,
+ .remote_port = remote_port,
+ .pvti_if_index = pvti_if_index0,
+ .rx_streams = { { 0 } },
+ };
+ pvti_rx_peer_t *rx_new_peer;
+ pool_get (ptd->rx_peers, rx_new_peer);
+ *rx_new_peer = new_peer;
+
+ int i;
+ for (i = 0; i < MAX_RX_STREAMS; i++)
+ {
+ rx_new_peer->rx_streams[i].rx_bi0 = INDEX_INVALID;
+ rx_new_peer->rx_streams[i].rx_bi0_first = INDEX_INVALID;
+ rx_new_peer->rx_streams[i].rx_next0 = 0;
+ }
+
+ return rx_new_peer;
+}
+
+always_inline u16
+pvti_input_node_common (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_ip6)
+{
+ u32 n_left_from, *from;
+ pvti_chunk_header_t *chunks[MAX_CHUNKS];
+ u32 pkts_processed = 0;
+ u32 pkts_decapsulated = 0;
+ u32 decap_failed_no_buffers = 0;
+
+ pvti_main_t *pvm = &pvti_main;
+
+ u32 thread_index = vlib_get_thread_index ();
+ pvti_per_thread_data_t *ptd =
+ vec_elt_at_index (pvm->per_thread_data[is_ip6], thread_index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = PVTI_INPUT_NEXT_DROP;
+ u32 sw_if_index0;
+ u8 true_chunk_count = 0;
+ u8 max_chunk_count;
+
+ bi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ pvti_ip4_encap_header_t *h0 =
+ ((pvti_ip4_encap_header_t *) vlib_buffer_get_current (b0)) - 1;
+ pvti_rx_peer_t *pvti_rx_peer0 =
+ pvti_try_find_or_create_rx_peer (ptd, b0, is_ip6);
+ if (!pvti_rx_peer0)
+ {
+ b0->error = node->errors[PVTI_INPUT_ERROR_PEER];
+ goto drop_and_maybe_trace;
+ }
+
+ b0 = vlib_get_buffer (vm, bi0);
+ pvti_packet_header_t *pvti0 = vlib_buffer_get_current (b0);
+ u8 stream_index = pvti0->stream_index;
+ max_chunk_count =
+ pvti0->chunk_count < MAX_CHUNKS ? pvti0->chunk_count : MAX_CHUNKS;
+ u16 pvti_packet_header_sz0 =
+ pvti0->pad_bytes + offsetof (pvti_packet_header_t, pad);
+ if (b0->current_length < pvti_packet_header_sz0)
+ {
+ b0->error = node->errors[PVTI_INPUT_ERROR_PACKET_TOO_SHORT];
+ goto drop_and_maybe_trace;
+ }
+ vlib_buffer_advance (b0, pvti_packet_header_sz0);
+
+ if (max_chunk_count == 0)
+ {
+ b0->error = node->errors[PVTI_INPUT_ERROR_NOCHUNKS];
+ goto drop_and_maybe_trace;
+ }
+ if (pvti0->reass_chunk_count > max_chunk_count)
+ {
+ b0->error = node->errors[PVTI_INPUT_ERROR_TOOMANYREASS];
+ goto drop_and_maybe_trace;
+ }
+ pvti_per_rx_stream_data_t *rx_stream0 =
+ &pvti_rx_peer0->rx_streams[stream_index];
+
+ u32 new_seq0 = clib_net_to_host_u32 (pvti0->seq);
+ if (new_seq0 == rx_stream0->last_rx_seq + 1)
+ {
+ /* Sequence# matches, we can attempt adding the leading chunks to
+ * reassembly */
+ rx_stream0->last_rx_seq = new_seq0;
+
+ while ((b0->current_length > 0) &&
+ true_chunk_count < pvti0->reass_chunk_count)
+ {
+ /* attempt to either incorporate the first chunk into
+ * reassembly or skip it. */
+ pvti_chunk_header_t *pvc0 = vlib_buffer_get_current (b0);
+ const u16 chunk_payload_length =
+ clib_net_to_host_u16 (pvc0->total_chunk_length) -
+ sizeof (*pvc0);
+ vlib_buffer_advance (b0, sizeof (*pvc0));
+
+ if (rx_stream0->rx_bi0 == INDEX_INVALID)
+ {
+ clib_warning (
+ "RX internal error: not-first chunk but no wip block");
+ }
+ else
+ {
+
+ vlib_buffer_t *rb0 =
+ vlib_get_buffer (vm, rx_stream0->rx_bi0);
+ u16 allowed_length =
+ PVTI_RX_MAX_LENGTH - rb0->current_length;
+ if (allowed_length > chunk_payload_length)
+ {
+ // simple case - there is space in the buffer to fit
+ // the whole chunk
+ void *tail =
+ vlib_buffer_put_uninit (rb0, chunk_payload_length);
+ clib_memcpy (tail, vlib_buffer_get_current (b0),
+ chunk_payload_length);
+ }
+ else
+ {
+ // The current chunk can not fit - need to make two
+ // copies, one into the current buffer, and one into
+ // a newly allocated chained buffer.
+ void *tail =
+ vlib_buffer_put_uninit (rb0, allowed_length);
+ clib_memcpy (tail, vlib_buffer_get_current (b0),
+ allowed_length);
+ u16 remaining_payload_length =
+ chunk_payload_length - allowed_length;
+ u32 nrbi0 = pvti_get_new_buffer (vm);
+ if (INDEX_INVALID == nrbi0)
+ {
+ ASSERT (0); // FIXME what the recovery is
+ // supposed to look like ?
+ }
+ else
+ {
+ // link up the new buffer and copy the remainder
+ // there
+ vlib_buffer_t *nrb0 = vlib_get_buffer (vm, nrbi0);
+ rb0->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ rb0->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ rb0->next_buffer = nrbi0;
+ rx_stream0->rx_bi0 = nrbi0;
+ void *tail = vlib_buffer_put_uninit (
+ nrb0, remaining_payload_length);
+ clib_memcpy (tail,
+ vlib_buffer_get_current (b0) +
+ allowed_length,
+ remaining_payload_length);
+ }
+ }
+ pvti_rx_peer0->rx_streams[stream_index]
+ .rx_received_inner_length += chunk_payload_length;
+ if (pvti_rx_peer0->rx_streams[stream_index]
+ .rx_received_inner_length ==
+ pvti_rx_peer0->rx_streams[stream_index]
+ .rx_expected_inner_length)
+ {
+ next0 = rx_stream0->rx_next0;
+ pvti_enqueue_rx_bi_to_next_and_trace (
+ vm, node, ptd, rx_stream0->rx_bi0_first, next0);
+ pkts_decapsulated += 1;
+
+ // clean out the current reassemly state
+ rx_stream0->rx_bi0 = INDEX_INVALID;
+ rx_stream0->rx_bi0_first = INDEX_INVALID;
+ pvti_rx_peer0->rx_streams[stream_index]
+ .rx_received_inner_length = 0;
+ pvti_rx_peer0->rx_streams[stream_index]
+ .rx_expected_inner_length = 0;
+ rx_stream0->rx_next0 = 0;
+ }
+ }
+ chunks[true_chunk_count] = pvc0;
+ true_chunk_count += 1;
+ vlib_buffer_advance (b0, chunk_payload_length);
+ }
+ }
+ else
+ {
+ /* Sequence does not match, skip the reassembly chunks and reset
+ * the reassembly state */
+
+ while ((b0->current_length > 0) &&
+ true_chunk_count < pvti0->reass_chunk_count)
+ {
+ /* skip the reassembly chunks */
+ pvti_chunk_header_t *pvc0 = vlib_buffer_get_current (b0);
+ chunks[true_chunk_count] = pvc0;
+ true_chunk_count += 1;
+ vlib_buffer_advance (
+ b0, clib_net_to_host_u16 (pvc0->total_chunk_length));
+ }
+ // FIXME: discard the current reassembly state, reset the seq#
+ if (rx_stream0->rx_bi0_first != INDEX_INVALID)
+ {
+ clib_warning ("RX PVTI: discard chunk being reassembled");
+ vlib_buffer_free_one (vm, rx_stream0->rx_bi0_first);
+ rx_stream0->rx_bi0 = INDEX_INVALID;
+ rx_stream0->rx_bi0_first = INDEX_INVALID;
+ rx_stream0->rx_received_inner_length = 0;
+ rx_stream0->rx_expected_inner_length = 0;
+ rx_stream0->rx_next0 = 0;
+ }
+ }
+
+ while ((b0->current_length > 0) && true_chunk_count < max_chunk_count)
+ {
+ if (b0->current_length < sizeof (pvti_chunk_header_t))
+ {
+ clib_warning ("RX ERR: length too short for a chunk");
+ break;
+ }
+ pvti_chunk_header_t *pvc0 = vlib_buffer_get_current (b0);
+ chunks[true_chunk_count] = pvc0;
+ true_chunk_count += 1;
+ u16 total_chunk_length =
+ clib_net_to_host_u16 (pvc0->total_chunk_length);
+ if (b0->current_length < total_chunk_length)
+ {
+ clib_warning ("RX ERR: length 0x%x too big for a chunk",
+ true_chunk_count);
+ break;
+ }
+ u8 *pkt = (u8 *) (pvc0 + 1);
+ u16 inner_length;
+ if (rx_stream0->rx_bi0_first != INDEX_INVALID)
+ {
+ vlib_buffer_free_one (vm, rx_stream0->rx_bi0_first);
+ rx_stream0->rx_bi0 = INDEX_INVALID;
+ rx_stream0->rx_bi0_first = INDEX_INVALID;
+ rx_stream0->rx_received_inner_length = 0;
+ rx_stream0->rx_expected_inner_length = 0;
+ rx_stream0->rx_next0 = 0;
+ }
+
+ switch (*pkt & 0xf0)
+ {
+ case 0x40:
+ next0 = PVTI_INPUT_NEXT_IP4_INPUT;
+ inner_length = clib_net_to_host_u16 (*((u16 *) (pkt + 2)));
+ break;
+ case 0x60:
+ next0 = PVTI_INPUT_NEXT_IP6_INPUT;
+ inner_length = clib_net_to_host_u16 (*((u16 *) (pkt + 4))) +
+ sizeof (ip6_header_t);
+ break;
+ default:
+ next0 = PVTI_INPUT_NEXT_DROP;
+ vlib_buffer_advance (b0, total_chunk_length);
+ continue;
+ }
+ vlib_buffer_advance (b0, sizeof (pvti_chunk_header_t));
+
+ if (inner_length + sizeof (pvti_chunk_header_t) > total_chunk_length)
+ {
+ /* FIXME: the packet size is larger than the chunk -> it's a
+ * first fragment */
+ // enqueue the chunk and finish packet processing.
+ // There must be no active reassembly.
+ ASSERT (rx_stream0->rx_bi0_first == INDEX_INVALID);
+ rx_stream0->rx_next0 = next0;
+ rx_stream0->rx_bi0 = bi0;
+ rx_stream0->rx_bi0_first = bi0;
+ rx_stream0->rx_expected_inner_length = inner_length;
+ rx_stream0->rx_received_inner_length =
+ total_chunk_length - sizeof (pvti_chunk_header_t);
+ rx_stream0->last_rx_seq = new_seq0;
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ pvti_input_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = ~0;
+ t->trace_type = PVTI_INPUT_TRACE_enqueue;
+ clib_memcpy (t->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t->packet_data));
+ }
+ goto continue_outer;
+ }
+
+ u32 nbi0 = pvti_get_new_buffer (vm);
+ if (INDEX_INVALID == nbi0)
+ {
+ decap_failed_no_buffers += 1;
+ continue;
+ };
+ vlib_buffer_t *nb0 = vlib_get_buffer (vm, nbi0);
+ pvti_if_t *pvti_if0 = pvti_if_get (pvti_rx_peer0->pvti_if_index);
+ vnet_buffer (nb0)->sw_if_index[VLIB_RX] = pvti_if0->sw_if_index;
+ void *new_packet = vlib_buffer_put_uninit (nb0, inner_length);
+ clib_memcpy (new_packet, pvc0 + 1, inner_length);
+ vlib_buffer_advance (b0, inner_length);
+
+ pvti_enqueue_rx_bi_to_next_and_trace (vm, node, ptd, nbi0, next0);
+ pkts_decapsulated += 1;
+ }
+ /* we have processed all the chunks from the buffer, but the buffer
+ * remains. Free it. */
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ pvti_input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = ~0;
+ t->trace_type = PVTI_INPUT_TRACE_free;
+ t->seq = clib_net_to_host_u32 (pvti0->seq);
+ t->chunk_count = pvti0->chunk_count;
+ u8 chunk_count =
+ pvti0->chunk_count < MAX_CHUNKS ? pvti0->chunk_count : MAX_CHUNKS;
+ for (int i = 0; i < chunk_count; i++)
+ {
+ t->chunks[i].total_chunk_length =
+ clib_net_to_host_u16 (chunks[i]->total_chunk_length);
+ }
+ clib_memcpy (t->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t->packet_data));
+ }
+ vlib_buffer_free_one (vm, bi0);
+
+ continue_outer:
+ pkts_processed += 1;
+ continue;
+
+ drop_and_maybe_trace:
+ next0 = PVTI_INPUT_NEXT_DROP;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ int i;
+ pvti_input_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = sw_if_index0;
+ t->trace_type = PVTI_INPUT_TRACE_drop;
+ t->next_index = next0;
+ t->remote_ip.ip.ip4 = h0->ip4.src_address;
+ t->remote_ip.version = AF_IP4;
+ t->local_port = h0->udp.dst_port;
+ t->remote_port = h0->udp.src_port;
+ if (!pvti_rx_peer0)
+ {
+ t->seq = 0xdeaddead;
+ }
+ else
+ {
+ t->seq = clib_net_to_host_u32 (pvti0->seq);
+ t->chunk_count = pvti0->chunk_count;
+ u8 chunk_count = pvti0->chunk_count < MAX_CHUNKS ?
+ pvti0->chunk_count :
+ MAX_CHUNKS;
+ for (i = 0; i < chunk_count; i++)
+ {
+ t->chunks[i].total_chunk_length =
+ clib_net_to_host_u16 (chunks[i]->total_chunk_length);
+ }
+ }
+ }
+
+ pkts_processed += 1;
+ vec_add1 (ptd->pending_rx_buffers, bi0);
+ vec_add1 (ptd->pending_rx_nexts, next0);
+ }
+
+ vlib_buffer_enqueue_to_next_vec (vm, node, &ptd->pending_rx_buffers,
+ &ptd->pending_rx_nexts,
+ vec_len (ptd->pending_rx_nexts));
+ vec_reset_length (ptd->pending_rx_buffers);
+ vec_reset_length (ptd->pending_rx_nexts);
+
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_INPUT_ERROR_PROCESSED, pkts_processed);
+ vlib_node_increment_counter (
+ vm, node->node_index, PVTI_INPUT_ERROR_DECAPSULATED, pkts_decapsulated);
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_INPUT_ERROR_NO_BUFFERS,
+ decap_failed_no_buffers);
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (pvti4_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_input_node_common (vm, node, frame, 0);
+}
+
+VLIB_NODE_FN (pvti6_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_input_node_common (vm, node, frame, 1);
+}
diff --git a/src/plugins/pvti/input.h b/src/plugins/pvti/input.h
new file mode 100644
index 00000000000..02a186cde05
--- /dev/null
+++ b/src/plugins/pvti/input.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_pvti_input_h__
+#define __included_pvti_input_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+
+typedef struct
+{
+ u16 total_chunk_length;
+} pvti_input_chunk_t;
+
+#define MAX_CHUNKS 32
+#define PVTI_RX_MAX_LENGTH 2048
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ ip_address_t remote_ip;
+ u16 remote_port;
+ u16 local_port;
+ u32 seq;
+ pvti_input_chunk_t chunks[MAX_CHUNKS];
+ u8 chunk_count;
+ u8 trace_type;
+ u8 packet_data[64];
+} pvti_input_trace_t;
+
+#define foreach_pvti_input_trace_type \
+ _ (drop, "drop") \
+ _ (decap, "decapsulate") \
+ _ (free, "free") \
+ _ (enqueue, "enqueue")
+
+typedef enum
+{
+#define _(f, s) PVTI_INPUT_TRACE_##f,
+ foreach_pvti_input_trace_type
+#undef _
+ PVTI_INPUT_TRACE_N_TYPES,
+} pvti_input_trace_type_t;
+
+#define foreach_pvti_input_error \
+ _ (PROCESSED, "PVTI tunneled packets processed") \
+ _ (DECAPSULATED, "PVTI inner packets decapsulated") \
+ _ (PEER, "Could not find a peer") \
+ _ (NOCHUNKS, "Packet has no chunks") \
+ _ (NO_BUFFERS, "No buffers available to decapsulate") \
+ _ (TOOMANYREASS, "Packet has more reassembly chunks than total") \
+ _ (PACKET_TOO_SHORT, "Packet too short")
+
+typedef enum
+{
+#define _(sym, str) PVTI_INPUT_ERROR_##sym,
+ foreach_pvti_input_error
+#undef _
+ PVTI_INPUT_N_ERROR,
+} pvti_input_error_t;
+
+typedef enum
+{
+ PVTI_INPUT_NEXT_DROP,
+ PVTI_INPUT_NEXT_IP4_INPUT,
+ PVTI_INPUT_NEXT_IP6_INPUT,
+ PVTI_INPUT_NEXT_PUNT,
+ PVTI_INPUT_N_NEXT,
+} pvti_input_next_t;
+
+#endif // pvti_input_h
diff --git a/src/plugins/pvti/output-main.c b/src/plugins/pvti/output-main.c
new file mode 100644
index 00000000000..ae4ae5f8e98
--- /dev/null
+++ b/src/plugins/pvti/output-main.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <pvti/output.h>
+
+/* packet trace format function */
+static u8 *
+format_pvti_output_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ pvti_output_trace_t *t = va_arg (*args, pvti_output_trace_t *);
+
+ u32 indent = format_get_indent (s);
+ s =
+ format (s, "PVTI-OUT(%d): sw_if_index %d, next index %d, underlay_mtu %d,",
+ t->trace_type, t->sw_if_index, t->next_index, t->underlay_mtu);
+ s = format (s, "\n%U stream_index %d, bi0_max_current_length %d, tx_seq %d",
+ format_white_space, indent, t->stream_index,
+ t->bi0_max_current_length, t->tx_seq);
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_ip_adjacency_packet_data, t->packet_data,
+ sizeof (t->packet_data));
+
+ return s;
+}
+
+vlib_node_registration_t pvti_output_node;
+
+static char *pvti_output_error_strings[] = {
+#define _(sym, string) string,
+ foreach_pvti_output_error
+#undef _
+};
+
+VLIB_REGISTER_NODE (pvti4_output_node) =
+{
+ .name = "pvti4-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_output_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(pvti_output_error_strings),
+ .error_strings = pvti_output_error_strings,
+
+ .n_next_nodes = PVTI_OUTPUT_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_OUTPUT_NEXT_DROP] = "error-drop",
+ [PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx",
+ [PVTI_OUTPUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [PVTI_OUTPUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ },
+
+};
+VLIB_REGISTER_NODE (pvti6_output_node) =
+{
+ .name = "pvti6-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pvti_output_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(pvti_output_error_strings),
+ .error_strings = pvti_output_error_strings,
+
+ .n_next_nodes = PVTI_OUTPUT_N_NEXT,
+
+ .next_nodes = {
+ [PVTI_OUTPUT_NEXT_DROP] = "error-drop",
+ [PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx",
+ [PVTI_OUTPUT_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [PVTI_OUTPUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ },
+
+};
diff --git a/src/plugins/pvti/output.c b/src/plugins/pvti/output.c
new file mode 100644
index 00000000000..1939c6f585a
--- /dev/null
+++ b/src/plugins/pvti/output.c
@@ -0,0 +1,543 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+#include <pvti/output.h>
+
+static_always_inline u32
+ip6_vtcfl (u8 stream_index)
+{
+ u32 vtcfl = 0x6 << 28;
+ vtcfl |= stream_index;
+
+ return (clib_host_to_net_u32 (vtcfl));
+}
+
+always_inline vlib_buffer_t *
+pvti_alloc_new_tx_buffer (vlib_main_t *vm)
+{
+ u32 bi0 = INDEX_INVALID;
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ {
+ return 0;
+ }
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ b0->current_data = 0;
+ b0->current_length = 0;
+ return b0;
+}
+
+always_inline bool
+pvti_find_or_try_create_tx_peer (vlib_main_t *vm, pvti_per_thread_data_t *ptd,
+ pvti_if_t *pvti_if0, ip_address_t *remote_ip,
+ u16 remote_port, u32 *out_index)
+{
+
+ pvti_tx_peer_t *peer;
+ pool_foreach (peer, ptd->tx_peers)
+ {
+ if (peer->remote_port == remote_port &&
+ 0 == ip_address_cmp (remote_ip, &peer->remote_ip))
+ {
+ if (peer->deleted)
+ {
+ // Bad luck, the peer has been deleted.
+ u32 boi0 = vlib_get_buffer_index (vm, peer->bo0);
+ if (peer->bo0)
+ {
+ vlib_buffer_free (vm, &boi0, 1);
+ }
+ clib_memset (peer, 0xca, sizeof (*peer));
+ pool_put (ptd->tx_peers, peer);
+ continue;
+ }
+ *out_index = peer - ptd->tx_peers;
+ return 1;
+ }
+ }
+
+ ip_address_family_t dst_ver = ip_addr_version (&pvti_if0->remote_ip);
+
+ u16 pvti_encap_overhead = (dst_ver == AF_IP6) ?
+ sizeof (pvti_ip6_encap_header_t) :
+ sizeof (pvti_ip4_encap_header_t);
+
+ u16 pvti_packet_overhead =
+ pvti_encap_overhead + sizeof (pvti_packet_header_t) + PVTI_ALIGN_BYTES;
+
+ ASSERT (pvti_if0->underlay_mtu > pvti_packet_overhead);
+
+ u32 bo0_max_current_length = pvti_if0->underlay_mtu - pvti_packet_overhead;
+
+ vlib_buffer_t *bo0 = pvti_alloc_new_tx_buffer (vm);
+
+ if (!bo0)
+ {
+ return 0;
+ }
+
+ pvti_tx_peer_t new_peer = {
+ .local_ip = pvti_if0->local_ip,
+ .remote_ip = *remote_ip,
+ .local_port = pvti_if0->local_port,
+ .remote_port = remote_port,
+ .underlay_mtu = pvti_if0->underlay_mtu,
+ .underlay_fib_index = pvti_if0->underlay_fib_index,
+ .bo0_max_current_length = bo0_max_current_length,
+ .pvti_if_index = pvti_if_get_index (pvti_if0),
+ .deleted = 0,
+ .bo0 = bo0,
+ .chunk_count = 0,
+ .reass_chunk_count = 0,
+ .current_tx_seq = 42,
+ };
+
+ pvti_tx_peer_t *tx_new_peer;
+ pool_get (ptd->tx_peers, tx_new_peer);
+
+ *tx_new_peer = new_peer;
+ *out_index = tx_new_peer - ptd->tx_peers;
+ return 1;
+}
+
+always_inline bool
+pvti_try_get_tx_peer_index (vlib_main_t *vm, pvti_per_thread_data_t *ptd,
+ pvti_if_t *pvti_if0, vlib_buffer_t *b0,
+ bool is_ip6, u32 *out_index)
+{
+ if (pvti_if0->peer_address_from_payload)
+ {
+ ip_address_t remote_ip = { 0 };
+ if (is_ip6)
+ {
+ ip6_header_t *ip6 = vlib_buffer_get_current (b0);
+ ip_address_set (&remote_ip, &ip6->dst_address, AF_IP6);
+ }
+ else
+ {
+ ip4_header_t *ip4 = vlib_buffer_get_current (b0);
+ ip_address_set (&remote_ip, &ip4->dst_address, AF_IP4);
+ }
+ return pvti_find_or_try_create_tx_peer (
+ vm, ptd, pvti_if0, &remote_ip, pvti_if0->remote_port, out_index);
+ }
+ else
+ {
+ return pvti_find_or_try_create_tx_peer (
+ vm, ptd, pvti_if0, &pvti_if0->remote_ip, pvti_if0->remote_port,
+ out_index);
+ }
+ /* not reached */
+}
+
+always_inline void
+pvti_finalize_chunk (pvti_tx_peer_t *tx_peer,
+ pvti_chunk_header_t *chunk_header, u8 *tail,
+ bool is_reassembly_chunk)
+{
+ clib_memset (chunk_header, 0xab, sizeof (pvti_chunk_header_t));
+ chunk_header->total_chunk_length =
+ clib_host_to_net_u16 (tail - (u8 *) chunk_header);
+ tx_peer->chunk_count++;
+ if (is_reassembly_chunk)
+ {
+ tx_peer->reass_chunk_count++;
+ }
+}
+
+always_inline pvti_output_next_t
+encap_pvti_buffer_ip46 (vlib_main_t *vm, vlib_node_runtime_t *node,
+ pvti_tx_peer_t *tx_peer, int is_ip6)
+{
+ ip_address_family_t src_ver = ip_addr_version (&tx_peer->local_ip);
+ ip_address_family_t dst_ver = ip_addr_version (&tx_peer->remote_ip);
+ u8 stream_index = 0;
+
+ ASSERT (src_ver == dst_ver);
+ bool is_ip6_encap = (AF_IP6 == src_ver);
+
+ vlib_buffer_t *b0 = tx_peer->bo0;
+ vlib_buffer_advance (b0,
+ -(sizeof (pvti_packet_header_t) + PVTI_ALIGN_BYTES));
+
+ pvti_packet_header_t *pvti0 = vlib_buffer_get_current (b0);
+ clib_memset (pvti0, 0xca, sizeof (*pvti0) + PVTI_ALIGN_BYTES);
+ pvti0->pad_bytes = PVTI_ALIGN_BYTES;
+
+ pvti0->seq = clib_host_to_net_u32 (tx_peer->current_tx_seq);
+ pvti0->stream_index = stream_index;
+ pvti0->reass_chunk_count = tx_peer->reass_chunk_count;
+ pvti0->chunk_count = tx_peer->chunk_count;
+ pvti0->mandatory_flags_mask = 0;
+ pvti0->flags_value = 0;
+
+ if (is_ip6_encap)
+ {
+ vlib_buffer_advance (b0, -(sizeof (pvti_ip6_encap_header_t)));
+ if (b0->current_data < -VLIB_BUFFER_PRE_DATA_SIZE)
+ {
+ // undo the change
+ vlib_buffer_advance (b0, (sizeof (pvti_ip6_encap_header_t)));
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_NO_PRE_SPACE];
+ return PVTI_OUTPUT_NEXT_DROP;
+ }
+ pvti_ip6_encap_header_t *ve = vlib_buffer_get_current (b0);
+
+ ve->udp.src_port = clib_host_to_net_u16 (tx_peer->local_port);
+ ve->udp.dst_port = clib_host_to_net_u16 (tx_peer->remote_port);
+ ve->udp.length = clib_host_to_net_u16 (
+ b0->current_length - offsetof (pvti_ip6_encap_header_t, udp));
+ ve->udp.checksum = 0;
+
+ ve->ip6.ip_version_traffic_class_and_flow_label =
+ ip6_vtcfl (stream_index);
+ ve->ip6.payload_length = ve->udp.length;
+ ve->ip6.protocol = 17;
+ ve->ip6.hop_limit = 128;
+ ip_address_copy_addr (&ve->ip6.src_address, &tx_peer->local_ip);
+ ip_address_copy_addr (&ve->ip6.dst_address, &tx_peer->remote_ip);
+ }
+ else
+ {
+ vlib_buffer_advance (b0, -(sizeof (pvti_ip4_encap_header_t)));
+ if (b0->current_data < -VLIB_BUFFER_PRE_DATA_SIZE)
+ {
+ // undo the change
+ vlib_buffer_advance (b0, (sizeof (pvti_ip4_encap_header_t)));
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_NO_PRE_SPACE];
+ return PVTI_OUTPUT_NEXT_DROP;
+ }
+ pvti_ip4_encap_header_t *ve = vlib_buffer_get_current (b0);
+
+ ve->udp.src_port = clib_host_to_net_u16 (tx_peer->local_port);
+ ve->udp.dst_port = clib_host_to_net_u16 (tx_peer->remote_port);
+ ve->udp.length = clib_host_to_net_u16 (
+ b0->current_length - offsetof (pvti_ip4_encap_header_t, udp));
+ ve->udp.checksum = 0;
+
+ ve->ip4.ip_version_and_header_length = 0x45;
+ ve->ip4.tos = 0;
+ ve->ip4.length = clib_host_to_net_u16 (b0->current_length);
+ ve->ip4.fragment_id =
+ clib_host_to_net_u16 (tx_peer->current_tx_seq & 0xffff);
+ ve->ip4.flags_and_fragment_offset = 0;
+ ve->ip4.ttl = 128;
+ ve->ip4.protocol = 17;
+
+ ve->ip4.dst_address.as_u32 = ip_addr_v4 (&tx_peer->remote_ip).data_u32;
+ ve->ip4.src_address.as_u32 = ip_addr_v4 (&tx_peer->local_ip).data_u32;
+ ve->ip4.checksum = ip4_header_checksum (&ve->ip4);
+ }
+
+ // This is important, if not reset, causes a crash
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = tx_peer->underlay_fib_index;
+
+ // vnet_buffer (b0)->oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
+ return is_ip6_encap ? PVTI_OUTPUT_NEXT_IP6_LOOKUP :
+ PVTI_OUTPUT_NEXT_IP4_LOOKUP;
+}
+
+always_inline void
+pvti_enqueue_tx_and_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
+ pvti_per_thread_data_t *ptd, vlib_buffer_t *b0,
+ u16 next0, u8 stream_index, pvti_tx_peer_t *tx_peer)
+{
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ tx_peer->is_bo0_traced))
+ {
+ if (PREDICT_TRUE (
+ vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0)))
+ {
+
+ pvti_output_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ t->underlay_mtu = tx_peer->underlay_mtu;
+ t->stream_index = stream_index;
+ t->trace_type = 1;
+ t->bi0_max_current_length = tx_peer->bo0_max_current_length;
+ t->tx_seq = tx_peer->current_tx_seq;
+ clib_memcpy (t->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t->packet_data));
+ }
+ }
+ u32 bi0 = vlib_get_buffer_index (vm, b0);
+ vec_add1 (ptd->pending_tx_buffers, bi0);
+ vec_add1 (ptd->pending_tx_nexts, next0);
+}
+
+always_inline void
+pvti_enqueue_tx_drop_and_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
+ pvti_per_thread_data_t *ptd, vlib_buffer_t *b0,
+ u8 stream_index)
+{
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ pvti_output_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = PVTI_OUTPUT_NEXT_DROP;
+ t->stream_index = stream_index;
+ t->trace_type = 0;
+ clib_memcpy (t->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t->packet_data));
+ }
+ u32 bi0 = vlib_get_buffer_index (vm, b0);
+ vec_add1 (ptd->pending_tx_buffers, bi0);
+ vec_add1 (ptd->pending_tx_nexts, PVTI_OUTPUT_NEXT_DROP);
+}
+
+always_inline bool
+pvti_flush_peer_and_recharge (vlib_main_t *vm, vlib_node_runtime_t *node,
+ pvti_per_thread_data_t *ptd, u32 tx_peer_index,
+ u8 stream_index, const bool is_ip6)
+{
+ pvti_tx_peer_t *tx_peer = pool_elt_at_index (ptd->tx_peers, tx_peer_index);
+ u16 next0 = encap_pvti_buffer_ip46 (vm, node, tx_peer, is_ip6);
+
+ pvti_enqueue_tx_and_trace (vm, node, ptd, tx_peer->bo0, next0, stream_index,
+ tx_peer);
+
+ tx_peer->bo0 = pvti_alloc_new_tx_buffer (vm);
+ tx_peer->reass_chunk_count = 0;
+ tx_peer->chunk_count = 0;
+ tx_peer->current_tx_seq++;
+
+ return 1;
+}
+
+always_inline u16
+pvti_output_node_common (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, const bool is_ip6)
+{
+ pvti_main_t *pvm = &pvti_main;
+
+ u32 n_left_from, *from;
+ u32 pkts_encapsulated = 0;
+ u32 pkts_processed = 0;
+ u32 pkts_chopped = 0;
+ u32 pkts_overflow = 0;
+ u32 pkts_overflow_cantfit = 0;
+
+ bool is_node_traced = (node->flags & VLIB_NODE_FLAG_TRACE) ? 1 : 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ u8 stream_index = pvti_get_stream_index (is_ip6);
+
+ u32 thread_index = vlib_get_thread_index ();
+ pvti_per_thread_data_t *ptd =
+ vec_elt_at_index (pvm->per_thread_data[is_ip6], thread_index);
+
+ vlib_buffer_t *ibufs[VLIB_FRAME_SIZE], **ib = ibufs;
+
+ vlib_get_buffers (vm, from, ibufs, n_left_from);
+
+ n_left_from = frame->n_vectors;
+ while (1 && n_left_from > 0)
+ {
+ n_left_from -= 1;
+ vlib_buffer_t *b0 = ib[0];
+ ib++;
+ u32 bi0 = vlib_get_buffer_index (vm, b0);
+ bool is_b0_traced =
+ is_node_traced && ((b0->flags & VLIB_BUFFER_IS_TRACED) ? 1 : 0);
+ pkts_processed += 1;
+
+ u32 sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ u32 pvti_index0 = pvti_if_find_by_sw_if_index (sw_if_index0);
+ if (pvti_index0 == INDEX_INVALID)
+ {
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_PEER];
+ pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0, stream_index);
+ continue;
+ }
+ pvti_if_t *pvti_if0 = pvti_if_get (pvti_index0);
+ u32 tx_peer_index;
+ if (!pvti_try_get_tx_peer_index (vm, ptd, pvti_if0, b0, is_ip6,
+ &tx_peer_index))
+ {
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_MAKE_PEER];
+ pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0, stream_index);
+ continue;
+ }
+ pvti_tx_peer_t *tx_peer = &ptd->tx_peers[tx_peer_index];
+
+ u32 b0_len = vlib_buffer_length_in_chain (vm, b0);
+ u32 total_chunk_len = sizeof (pvti_chunk_header_t) + b0_len;
+
+ if (tx_peer->bo0_max_current_length >=
+ tx_peer->bo0->current_length + total_chunk_len)
+ {
+ /* Happy case, we can fit the entire new chunk */
+ pvti_chunk_header_t *chunk_header = vlib_buffer_put_uninit (
+ tx_peer->bo0, sizeof (pvti_chunk_header_t));
+ u8 *tail = vlib_buffer_put_uninit (tx_peer->bo0, b0_len);
+ vlib_buffer_t *b0_curr;
+ b0_curr = b0;
+ while (b0_len > 0)
+ {
+ clib_memcpy (tail, vlib_buffer_get_current (b0_curr),
+ b0_curr->current_length);
+ tail += b0_curr->current_length;
+ b0_len -= b0_curr->current_length;
+ ASSERT ((b0_len == 0) ||
+ (b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT));
+ if (b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b0_curr = vlib_get_buffer (vm, b0_curr->next_buffer);
+ }
+ }
+ tx_peer->is_bo0_traced |= is_b0_traced;
+ pvti_finalize_chunk (tx_peer, chunk_header, tail, false);
+ }
+ else
+ {
+ bool is_reassembly = false;
+ /* FIXME: here, flush a packet if we want to avoid fragmenting it */
+#define PVTI_TINY_PACKET_SZ 20
+ int threshold_len =
+ sizeof (pvti_chunk_header_t) + PVTI_TINY_PACKET_SZ;
+
+ /* Can we fit anything meaningful into bo0 ? if not - flush */
+ if (tx_peer->bo0_max_current_length <=
+ tx_peer->bo0->current_length + threshold_len)
+ {
+ if (!pvti_flush_peer_and_recharge (vm, node, ptd, tx_peer_index,
+ stream_index, is_ip6))
+ {
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_RECHARGE0];
+ pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0,
+ stream_index);
+ continue;
+ }
+ pkts_encapsulated += 1;
+ }
+
+ pvti_chunk_header_t *chunk_header = vlib_buffer_put_uninit (
+ tx_peer->bo0, sizeof (pvti_chunk_header_t));
+
+ u8 *tail;
+ vlib_buffer_t *b0_curr;
+ /* append the chained buffers and flush as necessary */
+ b0_curr = b0;
+
+ int curr_b0_start_offset = 0;
+
+ while (b0_len > 0)
+ {
+ ASSERT (tx_peer->bo0_max_current_length >
+ tx_peer->bo0->current_length);
+ int copy_len =
+ clib_min (b0_curr->current_length - curr_b0_start_offset,
+ tx_peer->bo0_max_current_length -
+ tx_peer->bo0->current_length);
+ tail = vlib_buffer_put_uninit (tx_peer->bo0, copy_len);
+ clib_memcpy (tail,
+ (u8 *) vlib_buffer_get_current (b0_curr) +
+ curr_b0_start_offset,
+ copy_len);
+ tail += copy_len;
+ b0_len -= copy_len;
+ // Advance the start offset or reset it if we copied the entire
+ // block
+ curr_b0_start_offset =
+ curr_b0_start_offset + copy_len == b0_curr->current_length ?
+ 0 :
+ curr_b0_start_offset + copy_len;
+ ASSERT ((b0_len == 0) || (curr_b0_start_offset > 0) ||
+ (b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT));
+ if (curr_b0_start_offset > 0)
+ {
+ pvti_finalize_chunk (tx_peer, chunk_header, tail,
+ is_reassembly);
+ tx_peer->is_bo0_traced |= is_b0_traced;
+ if (!pvti_flush_peer_and_recharge (
+ vm, node, ptd, tx_peer_index, stream_index, is_ip6))
+ {
+ b0->error = node->errors[PVTI_OUTPUT_ERROR_RECHARGE1];
+ pvti_enqueue_tx_drop_and_trace (vm, node, ptd, b0,
+ stream_index);
+ continue;
+ }
+ pkts_encapsulated += 1;
+ /* next chunk(s) will be reassembly until the next block */
+ is_reassembly = true;
+ chunk_header = vlib_buffer_put_uninit (
+ tx_peer->bo0, sizeof (pvti_chunk_header_t));
+ }
+ else
+ {
+ if ((b0_curr->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ b0_curr = vlib_get_buffer (vm, b0_curr->next_buffer);
+ }
+ else
+ {
+ pvti_finalize_chunk (tx_peer, chunk_header, tail,
+ is_reassembly);
+ tx_peer->is_bo0_traced |= is_b0_traced;
+ }
+ }
+ }
+ }
+ vlib_buffer_free_one (vm, bi0);
+ }
+
+ int i;
+ for (i = 0; i < vec_len (ptd->tx_peers); i++)
+ {
+ if (ptd->tx_peers[i].chunk_count)
+ {
+ pvti_flush_peer_and_recharge (vm, node, ptd, i, stream_index,
+ is_ip6);
+ pkts_encapsulated += 1;
+ }
+ }
+
+ vlib_buffer_enqueue_to_next_vec (vm, node, &ptd->pending_tx_buffers,
+ &ptd->pending_tx_nexts,
+ vec_len (ptd->pending_tx_nexts));
+ vec_reset_length (ptd->pending_tx_buffers);
+ vec_reset_length (ptd->pending_tx_nexts);
+
+ vlib_node_increment_counter (
+ vm, node->node_index, PVTI_OUTPUT_ERROR_ENCAPSULATED, pkts_encapsulated);
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_OUTPUT_ERROR_PROCESSED, pkts_processed);
+ vlib_node_increment_counter (vm, node->node_index, PVTI_OUTPUT_ERROR_CHOPPED,
+ pkts_chopped);
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_OUTPUT_ERROR_OVERFLOW, pkts_overflow);
+ vlib_node_increment_counter (vm, node->node_index,
+ PVTI_OUTPUT_ERROR_OVERFLOW_CANTFIT,
+ pkts_overflow_cantfit);
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (pvti4_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_output_node_common (vm, node, frame, 0);
+}
+
+VLIB_NODE_FN (pvti6_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return pvti_output_node_common (vm, node, frame, 1);
+}
diff --git a/src/plugins/pvti/output.h b/src/plugins/pvti/output.h
new file mode 100644
index 00000000000..95e78ba9720
--- /dev/null
+++ b/src/plugins/pvti/output.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_pvti_output_h__
+#define __included_pvti_output_h__
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ u32 tx_seq;
+ u16 underlay_mtu;
+ u16 bi0_max_current_length;
+ u8 stream_index;
+ u8 trace_type;
+ u8 packet_data[96];
+} pvti_output_trace_t;
+
+#define foreach_pvti_output_error \
+ _ (NONE, "No error") \
+ _ (PROCESSED, "Packets processed") \
+ _ (ENCAPSULATED, "Packets encapsulated") \
+ _ (PEER, "No peer found") \
+ _ (MAKE_PEER, "Could not make peer") \
+ _ (RECHARGE0, "Could not recharge 0") \
+ _ (RECHARGE1, "Could not recharge 1") \
+ _ (NO_PRE_SPACE, "Not enought pre-data space") \
+ _ (CHOPPED, "Packets chopped") \
+ _ (OVERFLOW, "Packets overflowed") \
+ _ (OVERFLOW_CANTFIT, "Packets overflowed and cant fit excess")
+
+typedef enum
+{
+#define _(sym, str) PVTI_OUTPUT_ERROR_##sym,
+ foreach_pvti_output_error
+#undef _
+ PVTI_OUTPUT_N_ERROR,
+} pvti_output_error_t;
+
+typedef enum
+{
+ PVTI_INDEPENDENT_CHUNK = 0,
+ PVTI_REASS_CHUNK,
+} pvti_chunk_type_t;
+
+#define MAX_CURR_LEN_UNKNOWN 0xffff
+
+typedef enum
+{
+ PVTI_OUTPUT_NEXT_DROP,
+ PVTI_OUTPUT_NEXT_INTERFACE_OUTPUT,
+ PVTI_OUTPUT_NEXT_IP4_LOOKUP,
+ PVTI_OUTPUT_NEXT_IP6_LOOKUP,
+ PVTI_OUTPUT_N_NEXT,
+} pvti_output_next_t;
+
+#endif // pvti_output_h
diff --git a/src/plugins/pvti/pvti.api b/src/plugins/pvti/pvti.api
new file mode 100644
index 00000000000..859ed1ab6b0
--- /dev/null
+++ b/src/plugins/pvti/pvti.api
@@ -0,0 +1,111 @@
+/*
+ * pvti.api - binary API skeleton
+ *
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file pvti.api
+ * @brief VPP control-plane API messages.
+ *
+ * This file defines VPP control-plane binary API messages which are generally
+ * called through a shared memory interface.
+ */
+
+/* Version and type recitations */
+
+option version = "0.0.1";
+import "vnet/interface_types.api";
+import "vnet/ip/ip_types.api";
+
+/** \brief A composite type uniquely defining a PVTI tunnel.
+ @param sw_if_index - ignored on create/delete, present in details.
+ @param src_ip - Source IP address
+ @param src_port - Source UDP port
+ @param dst_ip - Destination IP address
+ @param dst_port - Destination UDP port
+ @param underlay_mtu - Underlay MTU for packet splitting/coalescing
+ @param underlay_fib_index - Underlay FIB index to be used after encap
+*/
+typedef pvti_tunnel
+{
+ vl_api_interface_index_t sw_if_index;
+ vl_api_address_t local_ip;
+ u16 local_port;
+ vl_api_address_t remote_ip;
+ bool peer_address_from_payload;
+ u16 remote_port;
+ u16 underlay_mtu;
+ u32 underlay_fib_index;
+};
+
+
+/** @brief API to enable / disable pvti on an interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable_disable - 1 to enable, 0 to disable the feature
+ @param sw_if_index - interface handle
+*/
+
+define pvti_interface_create
+{
+ option status="in_progress";
+
+ /* Client identifier, set from api_main.my_client_index */
+ u32 client_index;
+
+ /* Arbitrary context, so client can match reply to request */
+ u32 context;
+ vl_api_pvti_tunnel_t interface;
+};
+
+define pvti_interface_create_reply
+{
+ option status="in_progress";
+ u32 context;
+ i32 retval;
+
+ /* Index for the newly created interface */
+ vl_api_interface_index_t sw_if_index;
+};
+
+autoreply define pvti_interface_delete {
+ option status="in_progress";
+
+ /* Client identifier, set from api_main.my_client_index */
+ u32 client_index;
+
+ /* Arbitrary context, so client can match reply to request */
+ u32 context;
+
+ vl_api_interface_index_t sw_if_index;
+};
+
+
+define pvti_interface_dump
+{
+ option status="in_progress";
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+};
+
+define pvti_interface_details
+{
+ option status="in_progress";
+ u32 context;
+ vl_api_pvti_tunnel_t interface;
+};
+
+
diff --git a/src/plugins/pvti/pvti.c b/src/plugins/pvti/pvti.c
new file mode 100644
index 00000000000..646276dec09
--- /dev/null
+++ b/src/plugins/pvti/pvti.c
@@ -0,0 +1,479 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/fib/fib_table.h>
+#include <pvti/pvti.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp/app/version.h>
+#include <stdbool.h>
+
+#include <pvti/pvti.api_enum.h>
+#include <pvti/pvti.api_types.h>
+
+#include <pvti/pvti_if.h>
+
+#define REPLY_MSG_ID_BASE pmp->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+#include <vnet/ip/ip_format_fns.h>
+
+pvti_main_t pvti_main;
+
+u8 *
+format_pvti_tx_peer_ptr (u8 *s, va_list *args)
+{
+ pvti_tx_peer_t *peer = va_arg (*args, pvti_tx_peer_t *);
+
+ s = format (
+ s,
+ "[%p]%s local:%U:%d remote:%U:%d underlay_mtu:%d underlay_fib_idx:%d "
+ "pvti_idx:%d b0_max_clen:%d cseq:%d chunk_count:%d reass_chunk_count:%d",
+ peer, peer->deleted ? " DELETED" : "", format_ip46_address,
+ &peer->local_ip, IP46_TYPE_ANY, peer->local_port, format_ip46_address,
+ &peer->remote_ip, IP46_TYPE_ANY, peer->remote_port, peer->underlay_mtu,
+ peer->underlay_fib_index, peer->pvti_if_index,
+ peer->bo0_max_current_length, peer->current_tx_seq, peer->chunk_count,
+ peer->reass_chunk_count);
+
+ return (s);
+}
+
+u8 *
+format_pvti_rx_peer_ptr (u8 *s, va_list *args)
+{
+ pvti_rx_peer_t *peer = va_arg (*args, pvti_rx_peer_t *);
+
+ s = format (s, "[%p]%s local:%U:%d remote:%U:%d pvti_idx:%d", peer,
+ peer->deleted ? " DELETED" : "", format_ip46_address,
+ &peer->local_ip, IP46_TYPE_ANY, peer->local_port,
+ format_ip46_address, &peer->remote_ip, IP46_TYPE_ANY,
+ peer->remote_port, peer->pvti_if_index);
+
+ return (s);
+}
+
+void
+pvti_verify_initialized (pvti_main_t *pvm)
+{
+ if (!pvm->is_initialized)
+ {
+ const int n_threads = vlib_get_n_threads ();
+ vec_validate (pvm->per_thread_data[0], n_threads - 1);
+ vec_validate (pvm->per_thread_data[1], n_threads - 1);
+ pvm->is_initialized = 1;
+ }
+}
+
+void
+vnet_int_pvti_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
+{
+ pvti_main_t *pvm = &pvti_main;
+
+ if (pool_is_free_index (pvm->vnet_main->interface_main.sw_interfaces,
+ sw_if_index))
+ return;
+
+ pvti_verify_initialized (pvm);
+
+ is_enable = !!is_enable;
+
+ if (is_ip6)
+ {
+ if (clib_bitmap_get (pvm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index) !=
+ is_enable)
+ {
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-pvti-bypass",
+ sw_if_index, is_enable, 0, 0);
+ pvm->bm_ip6_bypass_enabled_by_sw_if = clib_bitmap_set (
+ pvm->bm_ip6_bypass_enabled_by_sw_if, sw_if_index, is_enable);
+ }
+ }
+ else
+ {
+ if (clib_bitmap_get (pvm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index) !=
+ is_enable)
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-pvti-bypass",
+ sw_if_index, is_enable, 0, 0);
+ pvm->bm_ip4_bypass_enabled_by_sw_if = clib_bitmap_set (
+ pvm->bm_ip4_bypass_enabled_by_sw_if, sw_if_index, is_enable);
+ }
+ }
+}
+
+static clib_error_t *
+set_ip_pvti_bypass (u32 is_ip6, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index, is_enable;
+
+ sw_if_index = ~0;
+ is_enable = 1;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user (line_input, unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "del"))
+ is_enable = 0;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == sw_if_index)
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ vnet_int_pvti_bypass_mode (sw_if_index, is_ip6, is_enable);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+set_ip4_pvti_bypass (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ return set_ip_pvti_bypass (0, input, cmd);
+}
+
+VLIB_CLI_COMMAND (set_interface_ip_pvti_bypass_command, static) = {
+ .path = "set interface ip pvti-bypass",
+ .function = set_ip4_pvti_bypass,
+ .short_help = "set interface ip pvti-bypass <interface> [del]",
+};
+
+static clib_error_t *
+set_ip6_pvti_bypass (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ return set_ip_pvti_bypass (1, input, cmd);
+}
+
+VLIB_CLI_COMMAND (set_interface_ip6_pvti_bypass_command, static) = {
+ .path = "set interface ip6 pvti-bypass",
+ .function = set_ip6_pvti_bypass,
+ .short_help = "set interface ip6 pvti-bypass <interface> [del]",
+};
+
+static clib_error_t *
+pvti_interface_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+
+ // pvti_main_t * pmp = &pvti_main;
+ u32 sw_if_index = ~0;
+ int rv = 0;
+ ip_address_t peer_ip = { 0 };
+ ip_address_t local_ip = { 0 };
+ u32 peer_port = 0;
+ u32 local_port = 12345;
+ u32 underlay_mtu = 1500;
+ u32 underlay_fib_index = ~0;
+ u32 underlay_table_id = ~0;
+ pvti_peer_address_method_t peer_address_method = PVTI_PEER_ADDRESS_FIXED;
+ bool peer_set = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "peer %U %d %d", unformat_ip_address, &peer_ip,
+ &peer_port, &local_port))
+ {
+ peer_set = 1;
+ }
+ else if (unformat (line_input, "underlay-mtu %d", &underlay_mtu))
+ {
+ // MTU set
+ }
+ else if (unformat (line_input, "local-ip %U", unformat_ip_address,
+ &local_ip))
+ {
+ // local IP set
+ }
+ else if (unformat (line_input, "underlay-fib %d", &underlay_fib_index))
+ {
+ // underlay fib set
+ }
+ else if (unformat (line_input, "peer-address-from-payload"))
+ {
+ peer_address_method = PVTI_PEER_ADDRESS_FROM_PAYLOAD;
+ }
+ else if (unformat (line_input, "underlay-table %d", &underlay_table_id))
+ {
+ fib_protocol_t fib_proto = FIB_PROTOCOL_IP4;
+ if (peer_ip.version == AF_IP6)
+ {
+ fib_proto = FIB_PROTOCOL_IP6;
+ }
+ u32 fib_index = fib_table_find (fib_proto, underlay_table_id);
+
+ if (~0 == fib_index)
+ {
+ error = clib_error_return (0, "Nonexistent table id %d",
+ underlay_table_id);
+ goto done;
+ }
+ underlay_fib_index = fib_index;
+ }
+ else
+ break;
+ }
+ if (!peer_set)
+ {
+ error = clib_error_return (0, "Please specify a peer...");
+ goto done;
+ }
+
+ rv = pvti_if_create (&local_ip, local_port, &peer_ip, peer_port,
+ peer_address_method, underlay_mtu, underlay_fib_index,
+ &sw_if_index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ error = clib_error_return (0, "Invalid interface");
+ break;
+
+ default:
+ error = clib_error_return (0, "pvti_if_create returned %d", rv);
+ }
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+static clib_error_t *
+pvti_interface_delete_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ // pvti_main_t * pmp = &pvti_main;
+ u32 sw_if_index = ~0;
+ int rv = 0;
+ bool if_index_set = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "if-index %d", &sw_if_index))
+ {
+ if_index_set = 1;
+ }
+ else
+ break;
+ }
+ if (!if_index_set)
+ return clib_error_return (0, "Please specify a sw_if_index...");
+
+ rv = pvti_if_delete (sw_if_index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INVALID_SW_IF_INDEX:
+ return clib_error_return (0, "Invalid interface");
+ break;
+
+ default:
+ return clib_error_return (0, "pvti_if_delete returned %d", rv);
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (pvti_interface_create_command, static) = {
+ .path = "pvti interface create",
+ .short_help =
+ "pvti interface create peer <remote-ip> <remote-port> <local-port> [ "
+ "local-ip <ip-addr> ][ underlay-mtu <MTU>][underlay-table "
+ "<table-index>][inderlay-fib <fib-index>]",
+ .function = pvti_interface_create_command_fn,
+};
+
+VLIB_CLI_COMMAND (pvti_interface_delete_command, static) = {
+ .path = "pvti interface delete",
+ .short_help = "pvti interface delete if-index <sw-ifindex>",
+ .function = pvti_interface_delete_command_fn,
+};
+
+static clib_error_t *
+pvti_show_interface_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ pvti_if_t *pvti_if;
+ vec_foreach (pvti_if, pvti_main.if_pool)
+ {
+ int index = pvti_if - pvti_main.if_pool;
+ vlib_cli_output (vm, "%U", format_pvti_if, index);
+ };
+ return 0;
+}
+
+static clib_error_t *
+pvti_show_tx_peers_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ pvti_per_thread_data_t *ptd;
+ int is_ip6;
+ for (is_ip6 = 0; is_ip6 <= 1; is_ip6++)
+ {
+ vec_foreach (ptd, pvti_main.per_thread_data[is_ip6])
+ {
+ vlib_cli_output (vm, "thread %d (%s)",
+ ptd - pvti_main.per_thread_data[is_ip6],
+ is_ip6 ? "IPv6" : "IPv4");
+ pvti_tx_peer_t *peer;
+ vec_foreach (peer, ptd->tx_peers)
+ {
+ vlib_cli_output (vm, " %U", format_pvti_tx_peer_ptr, peer);
+ }
+ }
+ }
+ return 0;
+}
+
+static clib_error_t *
+pvti_show_rx_peers_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ pvti_per_thread_data_t *ptd;
+ int is_ip6;
+ for (is_ip6 = 0; is_ip6 <= 1; is_ip6++)
+ {
+ vec_foreach (ptd, pvti_main.per_thread_data[is_ip6])
+ {
+ vlib_cli_output (vm, "thread %d (%s)",
+ ptd - pvti_main.per_thread_data[is_ip6],
+ is_ip6 ? "IPv6" : "IPv4");
+ pvti_rx_peer_t *peer;
+ vec_foreach (peer, ptd->rx_peers)
+ {
+ vlib_cli_output (vm, " %U", format_pvti_rx_peer_ptr, peer);
+ }
+ }
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (pvti_show_interface_command, static) = {
+ .path = "show pvti interface",
+ .short_help = "show pvti interface",
+ .function = pvti_show_interface_command_fn,
+};
+
+VLIB_CLI_COMMAND (pvti_show_tx_peers_command, static) = {
+ .path = "show pvti tx peers",
+ .short_help = "show pvti tx peers",
+ .function = pvti_show_tx_peers_command_fn,
+};
+
+VLIB_CLI_COMMAND (pvti_show_rx_peers_command, static) = {
+ .path = "show pvti rx peers",
+ .short_help = "show pvti rx peers",
+ .function = pvti_show_rx_peers_command_fn,
+};
+
+void pvti_api_init ();
+
+VNET_FEATURE_INIT (pvti4_bypass, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-pvti-bypass",
+ .runs_before = 0,
+};
+
+VNET_FEATURE_INIT (pvti6_bypass, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-pvti-bypass",
+ .runs_before = 0,
+};
+
+static clib_error_t *
+pvti_early_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ u8 *runs_before = 0;
+ int rbi = 0;
+ if (vec_len (vnet_feat_pvti4_bypass.runs_before) == 0)
+ {
+ rbi = 0;
+ }
+ else
+ {
+ rbi = vec_len (vnet_feat_pvti4_bypass.runs_before) - 1;
+ }
+ vec_validate (vnet_feat_pvti4_bypass.runs_before, rbi);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "runs-before %v", &runs_before))
+ {
+ vec_add1 (runs_before, 0);
+ vnet_feat_pvti4_bypass.runs_before[rbi] = (char *) runs_before;
+ vec_add1 (vnet_feat_pvti4_bypass.runs_before, 0);
+ }
+ else
+ return clib_error_return (0, "unknown input");
+ }
+
+ return NULL;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (pvti_early_config, "pvti");
+
+static clib_error_t *
+pvti_init (vlib_main_t *vm)
+{
+ pvti_main_t *pmp = &pvti_main;
+ clib_error_t *error = 0;
+
+ pmp->vlib_main = vm;
+ pmp->vnet_main = vnet_get_main ();
+ pmp->is_initialized = 0;
+
+ pvti_api_init ();
+ return error;
+}
+
+VLIB_INIT_FUNCTION (pvti_init);
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Packet Vector Tunnel Interface plugin",
+};
diff --git a/src/plugins/pvti/pvti.h b/src/plugins/pvti/pvti.h
new file mode 100644
index 00000000000..ac097c5ecca
--- /dev/null
+++ b/src/plugins/pvti/pvti.h
@@ -0,0 +1,257 @@
+/*
+ * pvti.h - skeleton vpp engine plug-in header file
+ *
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_pvti_h__
+#define __included_pvti_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+
+#define VPP_MAX_THREADS (1 << 8)
+
+#define MAX_RX_STREAMS 256
+
+#define PVTI_ALIGN_BYTES 9
+
+typedef CLIB_PACKED (struct {
+ u32 seq;
+ u8 stream_index; // set to the cpu# on the sending side
+ u8 chunk_count;
+ u8 reass_chunk_count; // number of chunks in the front that are related to
+ // previously started buffer
+ // mandatory_flags_mask highlights which of the flags cause packet drop if
+ // not understood, and which of them can be just ignored.
+ u8 mandatory_flags_mask;
+ u8 flags_value;
+ u8 pad_bytes;
+ u8 pad[0];
+}) pvti_packet_header_t;
+
+typedef CLIB_PACKED (struct {
+ ip4_header_t ip4;
+ udp_header_t udp;
+ // not part of encap header pvti_packet_header_t pv;
+}) pvti_ip4_encap_header_t;
+
+typedef CLIB_PACKED (struct {
+ ip6_header_t ip6;
+ udp_header_t udp;
+ // not part of encap header pvti_packet_header_t pv;
+}) pvti_ip6_encap_header_t;
+
+typedef CLIB_PACKED (struct {
+ u16 total_chunk_length;
+ // More fragments: this chunk is not the last block fragment
+#define CHUNK_FLAGS_MF (1 << 0)
+ // More blocks: this block has chained blocks that follow
+#define CHUNK_FLAGS_MB (1 << 1)
+ u16 _pad0;
+ u32 _pad1;
+ u8 chunk_data[0];
+}) pvti_chunk_header_t;
+
+typedef struct
+{
+ // a buffer being built from the smaller packets
+ u32 bi0;
+
+ // how big can this buffer grow
+ u32 bi0_max_current_length;
+
+ // how many chunks are already in the buffer
+ u8 chunk_count;
+ // leading reassembly chunk count
+ u8 reass_chunk_count;
+
+ u32 current_tx_seq;
+} pvti_per_tx_stream_data_t;
+
+typedef struct
+{
+ /* The seq# that we last processed */
+ u32 last_rx_seq;
+
+ // a current buffer that is being reassembled
+ u32 rx_bi0;
+ // The root buffer, most of the times == rx_bi0 except in the case of chained
+ // buffers.
+ u32 rx_bi0_first;
+
+ // Next index for dispatch when the reassembly is done
+ u16 rx_next0;
+ // expected totall inner length for the packet
+ u16 rx_expected_inner_length;
+ u16 rx_received_inner_length;
+
+} pvti_per_rx_stream_data_t;
+
+typedef struct
+{
+ ip_address_t local_ip;
+ ip_address_t remote_ip;
+ u16 remote_port;
+ u16 local_port;
+ u16 underlay_mtu;
+ u32 underlay_fib_index;
+
+ u32 pvti_if_index;
+ bool deleted;
+ bool is_bo0_traced;
+
+ u32 bo0_max_current_length;
+
+ u8 chunk_count;
+ u8 reass_chunk_count;
+ u32 current_tx_seq;
+ vlib_buffer_t *bo0;
+
+} pvti_tx_peer_t;
+
+typedef struct
+{
+ ip_address_t local_ip;
+ ip_address_t remote_ip;
+ u16 remote_port;
+ u16 local_port;
+
+ pvti_per_rx_stream_data_t rx_streams[MAX_RX_STREAMS];
+
+ u32 pvti_if_index;
+ bool deleted;
+} pvti_rx_peer_t;
+
+typedef struct
+{
+ /* pool of destination-based structures which are used to build the packets
+ */
+ pvti_tx_peer_t *tx_peers;
+
+ /* vector of buffers to send */
+ u32 *pending_tx_buffers;
+ u16 *pending_tx_nexts;
+ /* pool of source-based structures for the remote peers' data tracking
+ */
+ pvti_rx_peer_t *rx_peers;
+
+ /* vector of buffers being decapsulated */
+ u32 *pending_rx_buffers;
+ u16 *pending_rx_nexts;
+
+} pvti_per_thread_data_t;
+
+typedef struct
+{
+ ip_address_t local_ip;
+ ip_address_t remote_ip;
+ u16 remote_port;
+ u16 local_port;
+ u16 underlay_mtu;
+ u32 underlay_fib_index;
+ bool peer_address_from_payload;
+ u64 created_at;
+
+ u32 sw_if_index;
+ u32 hw_if_index;
+
+ // per-stream data for TX
+ pvti_per_tx_stream_data_t tx_streams[256];
+ pvti_per_rx_stream_data_t rx_streams[256];
+
+} pvti_if_t;
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+
+ /* have we initialized the data structures ? */
+ bool is_initialized;
+
+ /* interface pool */
+ pvti_if_t *if_pool;
+
+ /* if_index in the pool above by sw_if_index */
+ index_t *if_index_by_sw_if_index;
+
+ /* indices by port */
+ index_t **if_indices_by_port;
+
+ /* per-thread data, ip4[0] and ip6[1] */
+ pvti_per_thread_data_t *per_thread_data[2];
+
+ /* on/off switch for the periodic function */
+ u8 periodic_timer_enabled;
+ /* Node index, non-zero if the periodic process has been created */
+ u32 periodic_node_index;
+
+ /* graph node state */
+ uword *bm_ip4_bypass_enabled_by_sw_if;
+ uword *bm_ip6_bypass_enabled_by_sw_if;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ ethernet_main_t *ethernet_main;
+} pvti_main_t;
+
+extern pvti_main_t pvti_main;
+
+extern vlib_node_registration_t pvti_node;
+extern vlib_node_registration_t pvti4_input_node;
+extern vlib_node_registration_t pvti4_output_node;
+extern vlib_node_registration_t pvti6_input_node;
+extern vlib_node_registration_t pvti6_output_node;
+extern vlib_node_registration_t pvti_periodic_node;
+
+always_inline u8
+pvti_get_stream_index (int is_ip6)
+{
+ u32 thread_index = vlib_get_thread_index ();
+
+ ASSERT ((thread_index & 0xffffff80) == 0);
+
+ u8 stream_index = (thread_index & 0x7f) | (is_ip6 ? 0x80 : 0);
+ return stream_index;
+}
+
+/* attempt to get a new buffer */
+always_inline u32
+pvti_get_new_buffer (vlib_main_t *vm)
+{
+ u32 bi0 = INDEX_INVALID;
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ {
+ return INDEX_INVALID;
+ }
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ b0->current_data = 0;
+ b0->current_length = 0;
+ return bi0;
+}
+
+/* Periodic function events */
+#define PVTI_EVENT1 1
+#define PVTI_EVENT2 2
+#define PVTI_EVENT_PERIODIC_ENABLE_DISABLE 3
+
+void pvti_create_periodic_process (pvti_main_t *);
+void pvti_verify_initialized (pvti_main_t *pvm);
+
+#endif /* __included_pvti_h__ */
diff --git a/src/plugins/pvti/pvti_if.c b/src/plugins/pvti/pvti_if.c
new file mode 100644
index 00000000000..4f83994a1a4
--- /dev/null
+++ b/src/plugins/pvti/pvti_if.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Copyright (c) 2020 Doc.ai and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/adj/adj_midchain.h>
+#include <vnet/udp/udp.h>
+
+#include <pvti/pvti.h>
+#include <pvti/pvti_if.h>
+
+static u8 *
+format_pvti_if_name (u8 *s, va_list *args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ // wg_if_t *wgi = wg_if_get (dev_instance);
+ return format (s, "pvti%d", dev_instance);
+}
+
+u8 *
+format_pvti_if (u8 *s, va_list *args)
+{
+ index_t pvtii = va_arg (*args, u32);
+ pvti_if_t *pvti_if = pvti_if_get (pvtii);
+
+ s = format (
+ s, "[%d] %U local:%U:%d remote:%U:%d underlay_mtu:%d underlay_fib_idx:%d",
+ pvtii, format_vnet_sw_if_index_name, vnet_get_main (),
+ pvti_if->sw_if_index, format_ip46_address, &pvti_if->local_ip,
+ IP46_TYPE_ANY, pvti_if->local_port, format_ip46_address,
+ &pvti_if->remote_ip, IP46_TYPE_ANY, pvti_if->remote_port,
+ pvti_if->underlay_mtu, pvti_if->underlay_fib_index);
+
+ return (s);
+}
+
+index_t
+pvti_if_find_by_sw_if_index (u32 sw_if_index)
+{
+ if (vec_len (pvti_main.if_index_by_sw_if_index) <= sw_if_index)
+ return INDEX_INVALID;
+ u32 ti = pvti_main.if_index_by_sw_if_index[sw_if_index];
+ if (ti == ~0)
+ return INDEX_INVALID;
+
+ return (ti);
+}
+
+index_t
+pvti_if_find_by_remote_ip4_and_port (ip4_address_t *remote_ip4,
+ u16 remote_port)
+{
+ pvti_if_t *ifc;
+ pool_foreach (ifc, pvti_main.if_pool)
+ {
+ if ((ifc->remote_port == remote_port) &&
+ (ifc->remote_ip.version == AF_IP4) &&
+ ((ifc->remote_ip.ip.ip4.as_u32 == remote_ip4->as_u32) ||
+ ifc->peer_address_from_payload))
+ {
+ return (ifc - pvti_main.if_pool);
+ }
+ }
+ return INDEX_INVALID;
+}
+
+index_t
+pvti_if_find_by_remote_ip6_and_port (ip6_address_t *remote_ip6,
+ u16 remote_port)
+{
+ pvti_if_t *ifc;
+ pool_foreach (ifc, pvti_main.if_pool)
+ {
+ if ((ifc->remote_port == remote_port) &&
+ (ifc->remote_ip.version == AF_IP6) &&
+ ((0 == memcmp (&ifc->remote_ip.ip.ip6, remote_ip6,
+ sizeof (*remote_ip6))) ||
+ ifc->peer_address_from_payload))
+ {
+ return (ifc - pvti_main.if_pool);
+ }
+ }
+ return INDEX_INVALID;
+}
+
+index_t
+pvti_if_find_by_remote_ip_and_port (ip_address_t *remote_ip, u16 remote_port)
+{
+ pvti_if_t *ifc;
+ pool_foreach (ifc, pvti_main.if_pool)
+ {
+ if ((ifc->remote_port == remote_port) &&
+ (ifc->peer_address_from_payload ||
+ (0 == ip_address_cmp (remote_ip, &ifc->remote_ip))))
+ {
+ return (ifc - pvti_main.if_pool);
+ }
+ }
+ return INDEX_INVALID;
+}
+
+static void
+pvti_add_tidx_by_port (index_t t_index, u16 port)
+{
+ pvti_main_t *pvm = &pvti_main;
+ vec_validate_init_empty (pvm->if_indices_by_port, port, NULL);
+ vec_add1 (pvm->if_indices_by_port[port], t_index);
+}
+
+static void
+pvti_del_tidx_by_port (index_t t_index, u16 port)
+{
+ pvti_main_t *pvm = &pvti_main;
+ index_t *ii;
+ if (!pvm->if_indices_by_port)
+ {
+ return;
+ }
+ if (port >= vec_len (pvm->if_indices_by_port))
+ {
+ return;
+ }
+ if (vec_len (pvm->if_indices_by_port[port]) == 0)
+ {
+ ALWAYS_ASSERT (pvm->if_indices_by_port[port] > 0);
+ /* not reached */
+ return;
+ }
+
+ vec_foreach (ii, pvm->if_indices_by_port[port])
+ {
+ if (*ii == t_index)
+ {
+ vec_del1 (pvm->if_indices_by_port[port],
+ pvm->if_indices_by_port[port] - ii);
+ break;
+ }
+ }
+}
+
+static u32
+pvti_get_tunnel_count_by_port (u16 port)
+{
+ pvti_main_t *pvm = &pvti_main;
+ if (!pvm->if_indices_by_port)
+ {
+ return 0;
+ }
+ return vec_len (vec_elt (pvm->if_indices_by_port, port));
+}
+
+static clib_error_t *
+pvti_if_admin_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+ // vnet_hw_interface_t *hi;
+ u32 hw_flags;
+
+ // hi = vnet_get_hw_interface (vnm, hw_if_index);
+ hw_flags =
+ (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? VNET_HW_INTERFACE_FLAG_LINK_UP :
+ 0);
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+ return (NULL);
+}
+
+void
+pvti_if_update_adj (vnet_main_t *vnm, u32 sw_if_index, adj_index_t ai)
+{
+
+ /* Convert any neighbour adjacency that has a next-hop reachable through
+ * the wg interface into a midchain. This is to avoid sending ARP/ND to
+ * resolve the next-hop address via the wg interface. Then, if one of the
+ * peers has matching prefix among allowed prefixes, the midchain will be
+ * updated to the corresponding one.
+ */
+ adj_nbr_midchain_update_rewrite (ai, NULL, NULL, ADJ_FLAG_NONE, NULL);
+
+ // wgii = wg_if_find_by_sw_if_index (sw_if_index);
+ // wg_if_peer_walk (wg_if_get (wgii), wg_peer_if_adj_change, &ai);
+}
+
+VNET_DEVICE_CLASS (pvti_if_device_class) = {
+ .name = "Packet Vectorizer Tunnel",
+ .format_device_name = format_pvti_if_name,
+ .admin_up_down_function = pvti_if_admin_up_down,
+};
+
+VNET_HW_INTERFACE_CLASS (pvti_hw_interface_class) = {
+ .name = "PVTunnel",
+ .update_adjacency = pvti_if_update_adj,
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+ // .flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
+};
+
+int
+pvti_if_create (ip_address_t *local_ip, u16 local_port,
+ ip_address_t *remote_ip, u16 remote_port,
+ pvti_peer_address_method_t peer_address_method,
+ u16 underlay_mtu, u32 underlay_fib_index, u32 *sw_if_indexp)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ pvti_main_t *pvm = &pvti_main;
+ u32 hw_if_index;
+ vnet_hw_interface_t *hi;
+ pvti_verify_initialized (pvm);
+
+ pvti_if_t *pvti_if;
+
+ ASSERT (sw_if_indexp);
+
+ *sw_if_indexp = (u32) ~0;
+
+ pool_get_zero (pvti_main.if_pool, pvti_if);
+ pvti_if->local_ip = *local_ip;
+ pvti_if->local_port = local_port;
+ pvti_if->remote_ip = *remote_ip;
+ if (peer_address_method == PVTI_PEER_ADDRESS_FROM_PAYLOAD)
+ {
+ pvti_if->peer_address_from_payload = 1;
+ }
+ pvti_if->remote_port = remote_port;
+ pvti_if->underlay_mtu = underlay_mtu;
+ pvti_if->underlay_fib_index = underlay_fib_index;
+ pvti_if->created_at = clib_cpu_time_now ();
+
+ /* tunnel index (or instance) */
+ u32 t_idx = pvti_if - pvti_main.if_pool;
+
+ hw_if_index =
+ vnet_register_interface (vnm, pvti_if_device_class.index, t_idx,
+ pvti_hw_interface_class.index, t_idx);
+
+ pvti_if->hw_if_index = hw_if_index;
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ pvti_if->sw_if_index = *sw_if_indexp = hi->sw_if_index;
+
+ vec_validate_init_empty (pvm->if_index_by_sw_if_index, hi->sw_if_index,
+ INDEX_INVALID);
+
+ vec_elt (pvm->if_index_by_sw_if_index, hi->sw_if_index) = t_idx;
+ pvti_if_t *pvti_if0 = pool_elt_at_index (pvti_main.if_pool, t_idx);
+ int i;
+ for (i = 0; i < 256; i++)
+ {
+ pvti_if0->tx_streams[i].bi0 = INDEX_INVALID;
+ pvti_if0->tx_streams[i].current_tx_seq = 42;
+
+ pvti_if0->rx_streams[i].rx_bi0 = INDEX_INVALID;
+ pvti_if0->rx_streams[i].rx_bi0_first = INDEX_INVALID;
+ }
+
+ /*
+ int is_ip6 = 0;
+ u32 encap_index = !is_ip6 ?
+ pvti4_output_node.index : pvti6_output_node.index;
+ vnet_set_interface_output_node (vnm, pvti_if->hw_if_index, encap_index);
+ */
+ vnet_set_interface_l3_output_node (vnm->vlib_main, hi->sw_if_index,
+ (u8 *) "pvti4-output");
+
+ pvti_add_tidx_by_port (t_idx, local_port);
+ if (1 == pvti_get_tunnel_count_by_port (local_port))
+ {
+ clib_warning ("Registering local port %d", local_port);
+ udp_register_dst_port (vlib_get_main (), local_port,
+ pvti4_input_node.index, UDP_IP4);
+ udp_register_dst_port (vlib_get_main (), local_port,
+ pvti6_input_node.index, UDP_IP6);
+ }
+ else
+ {
+ clib_warning ("Not registering the port");
+ }
+
+ vnet_hw_interface_set_flags (vnm, pvti_if->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+
+ return 0;
+}
+
+void
+pvti_if_walk (pvti_if_walk_cb_t fn, void *data)
+{
+ index_t pvtii;
+
+ pool_foreach_index (pvtii, pvti_main.if_pool)
+ {
+ if (WALK_STOP == fn (pvtii, data))
+ break;
+ }
+}
+
+int
+pvti_if_delete (u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ pvti_main_t *pvm = &pvti_main;
+
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ vnet_hw_interface_t *hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ if (hw == 0 || hw->dev_class_index != pvti_if_device_class.index)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ pvti_if_t *ifc;
+ bool found = 0;
+ pool_foreach (ifc, pvm->if_pool)
+ {
+ if (ifc->sw_if_index == sw_if_index)
+ {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ {
+ return VNET_API_ERROR_INVALID_VALUE_2;
+ }
+ index_t tidx = ifc - pvm->if_pool;
+
+ u16 local_port = ifc->local_port;
+ pvti_del_tidx_by_port (tidx, local_port);
+ pvm->if_index_by_sw_if_index[sw_if_index] = INDEX_INVALID;
+
+ if (0 == pvti_get_tunnel_count_by_port (local_port))
+ {
+ udp_unregister_dst_port (vlib_get_main (), local_port, 1);
+ udp_unregister_dst_port (vlib_get_main (), local_port, 0);
+ }
+
+ vnet_reset_interface_l3_output_node (vnm->vlib_main, sw_if_index);
+ vnet_delete_hw_interface (vnm, hw->hw_if_index);
+ pool_put (pvti_main.if_pool, ifc);
+
+ /* mark per-thread peers as deleted */
+ pvti_per_thread_data_t *ptd;
+
+ vec_foreach (ptd, pvm->per_thread_data[0])
+ {
+ pvti_tx_peer_t *peer;
+ vec_foreach (peer, ptd->tx_peers)
+ {
+ if (tidx == peer->pvti_if_index)
+ {
+ peer->deleted = 1;
+ }
+ }
+ }
+ vec_foreach (ptd, pvm->per_thread_data[1])
+ {
+ pvti_tx_peer_t *peer;
+ vec_foreach (peer, ptd->tx_peers)
+ {
+ if (tidx == peer->pvti_if_index)
+ {
+ peer->deleted = 1;
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/src/plugins/pvti/pvti_if.h b/src/plugins/pvti/pvti_if.h
new file mode 100644
index 00000000000..44bf22ce825
--- /dev/null
+++ b/src/plugins/pvti/pvti_if.h
@@ -0,0 +1,47 @@
+#ifndef PVTI_IF_H
+#define PVTI_IF_H
+
+#include <vnet/interface_funcs.h>
+
+typedef enum
+{
+ PVTI_PEER_ADDRESS_FIXED = 0,
+ PVTI_PEER_ADDRESS_FROM_PAYLOAD
+} pvti_peer_address_method_t;
+
+typedef walk_rc_t (*pvti_if_walk_cb_t) (index_t wgi, void *data);
+void pvti_if_walk (pvti_if_walk_cb_t fn, void *data);
+
+int pvti_if_create (ip_address_t *local_ip, u16 local_port,
+ ip_address_t *remote_ip, u16 remote_port,
+ pvti_peer_address_method_t peer_address_method,
+ u16 underlay_mtu, u32 underlay_fib_index,
+ u32 *sw_if_indexp);
+index_t pvti_if_find_by_sw_if_index (u32 sw_if_index);
+index_t pvti_if_find_by_remote_ip4_and_port (ip4_address_t *remote_ip4,
+ u16 remote_port);
+index_t pvti_if_find_by_remote_ip6_and_port (ip6_address_t *remote_ip4,
+ u16 remote_port);
+
+index_t pvti_if_find_by_remote_ip_and_port (ip_address_t *remote_ip,
+ u16 remote_port);
+
+int pvti_if_delete (u32 sw_if_index);
+
+u8 *format_pvti_if (u8 *s, va_list *args);
+
+static_always_inline pvti_if_t *
+pvti_if_get (index_t pvtii)
+{
+ if (INDEX_INVALID == pvtii)
+ return (NULL);
+ return (pool_elt_at_index (pvti_main.if_pool, pvtii));
+}
+
+static_always_inline index_t
+pvti_if_get_index (pvti_if_t *pvti_if)
+{
+ return pvti_if - pvti_main.if_pool;
+}
+
+#endif
diff --git a/src/plugins/quic/quic.c b/src/plugins/quic/quic.c
index 60d4ac21c19..3797cd2b4ea 100644
--- a/src/plugins/quic/quic.c
+++ b/src/plugins/quic/quic.c
@@ -1058,6 +1058,8 @@ quic_on_stream_open (quicly_stream_open_t * self, quicly_stream_t * stream)
svm_fifo_add_want_deq_ntf (stream_session->rx_fifo,
SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL |
SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY);
+ svm_fifo_init_ooo_lookup (stream_session->rx_fifo, 0 /* ooo enq */);
+ svm_fifo_init_ooo_lookup (stream_session->tx_fifo, 1 /* ooo deq */);
stream_session->session_state = SESSION_STATE_ACCEPTING;
if ((rv = app_worker_accept_notify (app_wrk, stream_session)))
@@ -1302,6 +1304,8 @@ quic_connect_stream (session_t * quic_session, session_endpoint_cfg_t * sep)
return app_worker_connect_notify (app_wrk, NULL, rv, sep->opaque);
}
+ svm_fifo_init_ooo_lookup (stream_session->rx_fifo, 0 /* ooo enq */);
+ svm_fifo_init_ooo_lookup (stream_session->tx_fifo, 1 /* ooo deq */);
svm_fifo_add_want_deq_ntf (stream_session->rx_fifo,
SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL |
SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY);
@@ -1328,14 +1332,16 @@ quic_connect_connection (session_endpoint_cfg_t * sep)
quic_ctx_t *ctx;
app_worker_t *app_wrk;
application_t *app;
+ transport_endpt_ext_cfg_t *ext_cfg;
int error;
- if (!sep->ext_cfg)
+ ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
+ if (!ext_cfg)
return SESSION_E_NOEXTCFG;
/* Use pool on thread 1 if we have workers because of UDP */
thread_index = transport_cl_thread ();
- ccfg = &sep->ext_cfg->crypto;
+ ccfg = &ext_cfg->crypto;
clib_memset (cargs, 0, sizeof (*cargs));
ctx_index = quic_ctx_alloc (thread_index);
@@ -1471,13 +1477,15 @@ quic_start_listen (u32 quic_listen_session_index,
quic_ctx_t *lctx;
u32 lctx_index;
app_listener_t *app_listener;
+ transport_endpt_ext_cfg_t *ext_cfg;
int rv;
sep = (session_endpoint_cfg_t *) tep;
- if (!sep->ext_cfg)
+ ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
+ if (!ext_cfg)
return SESSION_E_NOEXTCFG;
- ccfg = &sep->ext_cfg->crypto;
+ ccfg = &ext_cfg->crypto;
app_wrk = app_worker_get (sep->app_wrk_index);
app = application_get (app_wrk->app_index);
QUIC_DBG (2, "Called quic_start_listen for app %d", app_wrk->app_index);
@@ -1679,6 +1687,9 @@ quic_on_quic_session_connected (quic_ctx_t * ctx)
return;
}
+ svm_fifo_init_ooo_lookup (quic_session->rx_fifo, 0 /* ooo enq */);
+ svm_fifo_init_ooo_lookup (quic_session->tx_fifo, 1 /* ooo deq */);
+
quic_session->session_state = SESSION_STATE_CONNECTING;
if ((rv = app_worker_connect_notify (app_wrk, quic_session,
SESSION_E_NONE, ctx->client_opaque)))
@@ -2137,6 +2148,9 @@ quic_accept_connection (quic_rx_packet_ctx_t * pctx)
return;
}
+ svm_fifo_init_ooo_lookup (quic_session->rx_fifo, 0 /* ooo enq */);
+ svm_fifo_init_ooo_lookup (quic_session->tx_fifo, 1 /* ooo deq */);
+
app_wrk = app_worker_get (quic_session->app_wrk_index);
quic_session->session_state = SESSION_STATE_ACCEPTING;
if ((rv = app_worker_accept_notify (app_wrk, quic_session)))
diff --git a/src/plugins/snort/CMakeLists.txt b/src/plugins/snort/CMakeLists.txt
index bd9dcdc4fdd..3fc2bd625a4 100644
--- a/src/plugins/snort/CMakeLists.txt
+++ b/src/plugins/snort/CMakeLists.txt
@@ -7,6 +7,10 @@ add_vpp_plugin(snort
dequeue.c
main.c
cli.c
+ snort_api.c
+
+ API_FILES
+ snort.api
MULTIARCH_SOURCES
enqueue.c
diff --git a/src/plugins/snort/cli.c b/src/plugins/snort/cli.c
index 08740f41b37..4b6dbc742a7 100644
--- a/src/plugins/snort/cli.c
+++ b/src/plugins/snort/cli.c
@@ -25,6 +25,7 @@ snort_create_instance_command_fn (vlib_main_t *vm, unformat_input_t *input,
u8 *name = 0;
u32 queue_size = 1024;
u8 drop_on_diconnect = 1;
+ int rv = 0;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -60,8 +61,30 @@ snort_create_instance_command_fn (vlib_main_t *vm, unformat_input_t *input,
goto done;
}
- err = snort_instance_create (vm, (char *) name, min_log2 (queue_size),
- drop_on_diconnect);
+ rv = snort_instance_create (vm, (char *) name, min_log2 (queue_size),
+ drop_on_diconnect);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case VNET_API_ERROR_ENTRY_ALREADY_EXISTS:
+ err = clib_error_return (0, "instance '%s' already exists", name);
+ break;
+ case VNET_API_ERROR_SYSCALL_ERROR_1:
+ err = clib_error_return (0, "memory fd failure: %U", format_clib_error,
+ clib_mem_get_last_error ());
+ break;
+ case VNET_API_ERROR_SYSCALL_ERROR_2:
+ err = clib_error_return (0, "ftruncate failure");
+ break;
+ case VNET_API_ERROR_SYSCALL_ERROR_3:
+ err = clib_error_return (0, "mmap failure");
+ break;
+ default:
+ err = clib_error_return (0, "snort_instance_create returned %d", rv);
+ break;
+ }
done:
vec_free (name);
@@ -77,6 +100,118 @@ VLIB_CLI_COMMAND (snort_create_instance_command, static) = {
};
static clib_error_t *
+snort_disconnect_instance_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *err = 0;
+ u8 *name = 0;
+ snort_instance_t *si;
+ int rv = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "please specify instance name");
+
+ if (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ unformat (line_input, "%s", &name);
+
+ if (!name)
+ {
+ err = clib_error_return (0, "please specify instance name");
+ goto done;
+ }
+
+ si = snort_get_instance_by_name ((char *) name);
+ if (!si)
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ else
+ rv = snort_instance_disconnect (vm, si->index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ err = clib_error_return (0, "unknown instance '%s'", name);
+ break;
+ case VNET_API_ERROR_FEATURE_DISABLED:
+ err = clib_error_return (0, "instance '%s' is not connected", name);
+ break;
+ case VNET_API_ERROR_INVALID_VALUE:
+ err = clib_error_return (0, "failed to disconnect a broken client");
+ break;
+ default:
+ err = clib_error_return (0, "snort_instance_disconnect returned %d", rv);
+ break;
+ }
+
+done:
+ vec_free (name);
+ unformat_free (line_input);
+ return err;
+}
+
+VLIB_CLI_COMMAND (snort_disconnect_instance_command, static) = {
+ .path = "snort disconnect instance",
+ .short_help = "snort disconnect instance <name>",
+ .function = snort_disconnect_instance_command_fn,
+};
+
+static clib_error_t *
+snort_delete_instance_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *err = 0;
+ u8 *name = 0;
+ int rv = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "please specify instance name");
+
+ if (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ unformat (line_input, "%s", &name);
+
+ if (!name)
+ {
+ err = clib_error_return (0, "please specify instance name");
+ goto done;
+ }
+
+ snort_instance_t *si = snort_get_instance_by_name ((char *) name);
+ if (!si)
+ err = clib_error_return (0, "unknown instance '%s' requested", name);
+ else
+ rv = snort_instance_delete (vm, si->index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ err = clib_error_return (0, "instance '%s' deletion failure", name);
+ break;
+ case VNET_API_ERROR_INSTANCE_IN_USE:
+ err = clib_error_return (0, "instance '%s' has connected client", name);
+ break;
+ default:
+ err = clib_error_return (0, "snort_instance_delete returned %d", rv);
+ break;
+ }
+
+done:
+ vec_free (name);
+ unformat_free (line_input);
+ return err;
+}
+
+VLIB_CLI_COMMAND (snort_delete_instance_command, static) = {
+ .path = "snort delete instance",
+ .short_help = "snort delete instance <name>",
+ .function = snort_delete_instance_command_fn,
+};
+
+static clib_error_t *
snort_attach_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
@@ -86,6 +221,7 @@ snort_attach_command_fn (vlib_main_t *vm, unformat_input_t *input,
u8 *name = 0;
u32 sw_if_index = ~0;
snort_attach_dir_t dir = SNORT_INOUT;
+ int rv = 0;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -124,8 +260,29 @@ snort_attach_command_fn (vlib_main_t *vm, unformat_input_t *input,
goto done;
}
- err =
- snort_interface_enable_disable (vm, (char *) name, sw_if_index, 1, dir);
+ rv = snort_interface_enable_disable (vm, (char *) name, sw_if_index, 1, dir);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case VNET_API_ERROR_FEATURE_ALREADY_ENABLED:
+ /* already attached to same instance */
+ break;
+ case VNET_API_ERROR_INSTANCE_IN_USE:
+ err = clib_error_return (0,
+ "interface %U already assigned to "
+ "an instance",
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ break;
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ err = clib_error_return (0, "unknown instance '%s'", name);
+ break;
+ default:
+ err = clib_error_return (0, "snort_interface_enable_disable returned %d",
+ rv);
+ break;
+ }
done:
vec_free (name);
@@ -148,6 +305,7 @@ snort_detach_command_fn (vlib_main_t *vm, unformat_input_t *input,
vnet_main_t *vnm = vnet_get_main ();
clib_error_t *err = 0;
u32 sw_if_index = ~0;
+ int rv = 0;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -172,7 +330,23 @@ snort_detach_command_fn (vlib_main_t *vm, unformat_input_t *input,
goto done;
}
- err = snort_interface_enable_disable (vm, 0, sw_if_index, 0, SNORT_INOUT);
+ rv = snort_interface_enable_disable (vm, 0, sw_if_index, 0, SNORT_INOUT);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case VNET_API_ERROR_INVALID_INTERFACE:
+ err = clib_error_return (0,
+ "interface %U is not assigned to snort "
+ "instance!",
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ break;
+ default:
+ err = clib_error_return (0, "snort_interface_enable_disable returned %d",
+ rv);
+ break;
+ }
done:
unformat_free (line_input);
@@ -213,7 +387,7 @@ snort_show_interfaces_command_fn (vlib_main_t *vm, unformat_input_t *input,
snort_instance_t *si;
u32 *index;
- vlib_cli_output (vm, "interface\tsnort instance");
+ vlib_cli_output (vm, "interface\t\tsnort instance");
vec_foreach (index, sm->instance_by_sw_if_index)
{
if (index[0] != ~0)
@@ -237,7 +411,18 @@ snort_show_clients_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
snort_main_t *sm = &snort_main;
- vlib_cli_output (vm, "number of clients: %d", pool_elts (sm->clients));
+ u32 n_clients = pool_elts (sm->clients);
+ snort_client_t *c;
+ snort_instance_t *si;
+
+ vlib_cli_output (vm, "number of clients: %d", n_clients);
+ if (n_clients)
+ vlib_cli_output (vm, "client snort instance");
+ pool_foreach (c, sm->clients)
+ {
+ si = vec_elt_at_index (sm->instances, c->instance_index);
+ vlib_cli_output (vm, "%6d %s", c - sm->clients, si->name);
+ }
return 0;
}
@@ -251,14 +436,16 @@ static clib_error_t *
snort_mode_polling_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
- return snort_set_node_mode (vm, VLIB_NODE_STATE_POLLING);
+ snort_set_node_mode (vm, VLIB_NODE_STATE_POLLING);
+ return 0;
}
static clib_error_t *
snort_mode_interrupt_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
- return snort_set_node_mode (vm, VLIB_NODE_STATE_INTERRUPT);
+ snort_set_node_mode (vm, VLIB_NODE_STATE_INTERRUPT);
+ return 0;
}
VLIB_CLI_COMMAND (snort_mode_polling_command, static) = {
diff --git a/src/plugins/snort/daq_vpp.c b/src/plugins/snort/daq_vpp.c
index 386092a0382..6fc0bf5506a 100644
--- a/src/plugins/snort/daq_vpp.c
+++ b/src/plugins/snort/daq_vpp.c
@@ -10,6 +10,7 @@
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/mman.h>
+#include <sys/time.h>
#include <errno.h>
#include <sys/epoll.h>
@@ -521,6 +522,7 @@ vpp_daq_msg_receive_one (VPP_Context_t *vc, VPPQueuePair *qp,
{
uint32_t n_recv, n_left;
uint32_t head, next, mask = qp->queue_size - 1;
+ struct timeval tv;
if (max_recv == 0)
return 0;
@@ -535,11 +537,14 @@ vpp_daq_msg_receive_one (VPP_Context_t *vc, VPPQueuePair *qp,
n_left = n_recv = max_recv;
}
+ gettimeofday (&tv, NULL);
while (n_left--)
{
uint32_t desc_index = qp->enq_ring[next & mask];
daq_vpp_desc_t *d = qp->descs + desc_index;
VPPDescData *dd = qp->desc_data + desc_index;
+ dd->pkthdr.ts.tv_sec = tv.tv_sec;
+ dd->pkthdr.ts.tv_usec = tv.tv_usec;
dd->pkthdr.pktlen = d->length;
dd->pkthdr.address_space_id = d->address_space_id;
dd->msg.data = vc->bpools[d->buffer_pool].base + d->offset;
diff --git a/src/plugins/snort/dequeue.c b/src/plugins/snort/dequeue.c
index 31745de404c..bc301f6888b 100644
--- a/src/plugins/snort/dequeue.c
+++ b/src/plugins/snort/dequeue.c
@@ -307,7 +307,7 @@ snort_deq_node_polling (vlib_main_t *vm, vlib_node_runtime_t *node,
snort_qpair_t *qp;
snort_instance_t *si;
- vec_foreach (si, sm->instances)
+ pool_foreach (si, sm->instances)
{
qp = vec_elt_at_index (si->qpairs, vm->thread_index);
u32 ready = __atomic_load_n (&qp->ready, __ATOMIC_ACQUIRE);
diff --git a/src/plugins/snort/enqueue.c b/src/plugins/snort/enqueue.c
index 409c0e49078..ce4f34491ec 100644
--- a/src/plugins/snort/enqueue.c
+++ b/src/plugins/snort/enqueue.c
@@ -133,7 +133,7 @@ snort_enq_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
nexts, n_processed);
}
- vec_foreach (si, sm->instances)
+ pool_foreach (si, sm->instances)
{
u32 head, freelist_len, n_pending, n_enq, mask;
u64 ctr = 1;
diff --git a/src/plugins/snort/main.c b/src/plugins/snort/main.c
index 2430fcdc5c2..50bff027a13 100644
--- a/src/plugins/snort/main.c
+++ b/src/plugins/snort/main.c
@@ -3,10 +3,24 @@
*/
#include <vlib/vlib.h>
+#include <vlibapi/api_types.h>
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
#include <snort/snort.h>
+#include <snort/snort.api_enum.h>
+#include <snort/snort.api_types.h>
+
+#include <vnet/ip/ip_types_api.h>
+#include <vnet/format_fns.h>
+
+#include <vlibapi/api_helper_macros.h>
+
+#include <vnet/vnet.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
#include <sys/eventfd.h>
snort_main_t snort_main;
@@ -18,6 +32,12 @@ VLIB_REGISTER_LOG_CLASS (snort_log, static) = {
#define log_debug(fmt, ...) vlib_log_debug (snort_log.class, fmt, __VA_ARGS__)
#define log_err(fmt, ...) vlib_log_err (snort_log.class, fmt, __VA_ARGS__)
+snort_main_t *
+snort_get_main ()
+{
+ return &snort_main;
+}
+
static void
snort_client_disconnect (clib_file_t *uf)
{
@@ -45,7 +65,38 @@ snort_client_disconnect (clib_file_t *uf)
pool_put (sm->clients, c);
}
-static snort_instance_t *
+int
+snort_instance_disconnect (vlib_main_t *vm, u32 instance_index)
+{
+ snort_main_t *sm = &snort_main;
+ snort_instance_t *si;
+ snort_client_t *client;
+ clib_file_main_t *fm = &file_main;
+ clib_file_t *uf = 0;
+ int rv = 0;
+
+ si = snort_get_instance_by_index (instance_index);
+ if (!si)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (si->client_index == ~0)
+ return VNET_API_ERROR_FEATURE_DISABLED;
+
+ client = pool_elt_at_index (sm->clients, si->client_index);
+ uf = clib_file_get (fm, client->file_index);
+ if (uf)
+ snort_client_disconnect (uf);
+ else
+ {
+ log_err ("failed to disconnect a broken client from"
+ "instance '%s'",
+ si->name);
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ return rv;
+}
+
+snort_instance_t *
snort_get_instance_by_name (char *name)
{
snort_main_t *sm = &snort_main;
@@ -54,7 +105,16 @@ snort_get_instance_by_name (char *name)
return 0;
return vec_elt_at_index (sm->instances, p[0]);
- ;
+}
+
+snort_instance_t *
+snort_get_instance_by_index (u32 instance_index)
+{
+ snort_main_t *sm = &snort_main;
+
+ if (pool_is_free_index (sm->instances, instance_index))
+ return 0;
+ return pool_elt_at_index (sm->instances, instance_index);
}
static clib_error_t *
@@ -110,6 +170,8 @@ snort_conn_fd_read_ready (clib_file_t *uf)
snort_client_disconnect (uf);
return 0;
}
+ snort_freelist_init (qp->freelist);
+ *qp->enq_head = *qp->deq_head = qp->next_desc = 0;
}
base = (u8 *) si->shm_base;
@@ -281,14 +343,13 @@ snort_listener_init (vlib_main_t *vm)
return 0;
}
-clib_error_t *
+int
snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz,
u8 drop_on_disconnect)
{
vlib_thread_main_t *tm = vlib_get_thread_main ();
snort_main_t *sm = &snort_main;
snort_instance_t *si;
- clib_error_t *err = 0;
u32 index, i;
u8 *base = CLIB_MEM_VM_MAP_FAILED;
u32 size;
@@ -296,9 +357,10 @@ snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz,
u32 qpair_mem_sz = 0;
u32 qsz = 1 << log2_queue_sz;
u8 align = CLIB_CACHE_LINE_BYTES;
+ int rv = 0;
if (snort_get_instance_by_name (name))
- return clib_error_return (0, "instance already exists");
+ return VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
/* descriptor table */
qpair_mem_sz += round_pow2 (qsz * sizeof (daq_vpp_desc_t), align);
@@ -316,14 +378,13 @@ snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz,
if (fd == -1)
{
- err = clib_error_return (0, "memory fd failure: %U", format_clib_error,
- clib_mem_get_last_error ());
+ rv = VNET_API_ERROR_SYSCALL_ERROR_1;
goto done;
}
if ((ftruncate (fd, size)) == -1)
{
- err = clib_error_return (0, "ftruncate failure");
+ rv = VNET_API_ERROR_SYSCALL_ERROR_2;
goto done;
}
@@ -331,7 +392,7 @@ snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz,
if (base == CLIB_MEM_VM_MAP_FAILED)
{
- err = clib_error_return (0, "mmap failure");
+ rv = VNET_API_ERROR_SYSCALL_ERROR_3;
goto done;
}
@@ -399,17 +460,17 @@ snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz,
sm->input_mode);
done:
- if (err)
+ if (rv)
{
if (base != CLIB_MEM_VM_MAP_FAILED)
clib_mem_vm_unmap (base);
if (fd != -1)
close (fd);
}
- return err;
+ return rv;
}
-clib_error_t *
+int
snort_interface_enable_disable (vlib_main_t *vm, char *instance_name,
u32 sw_if_index, int is_enable,
snort_attach_dir_t snort_dir)
@@ -417,16 +478,16 @@ snort_interface_enable_disable (vlib_main_t *vm, char *instance_name,
snort_main_t *sm = &snort_main;
vnet_main_t *vnm = vnet_get_main ();
snort_instance_t *si;
- clib_error_t *err = 0;
u64 fa_data;
u32 index;
+ int rv = 0;
if (is_enable)
{
if ((si = snort_get_instance_by_name (instance_name)) == 0)
{
- err = clib_error_return (0, "unknown instance '%s'", instance_name);
- goto done;
+ log_err ("unknown instance '%s'", instance_name);
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
}
vec_validate_init_empty (sm->instance_by_sw_if_index, sw_if_index, ~0);
@@ -434,12 +495,13 @@ snort_interface_enable_disable (vlib_main_t *vm, char *instance_name,
index = sm->instance_by_sw_if_index[sw_if_index];
if (index != ~0)
{
+ if (index == si->index)
+ rv = VNET_API_ERROR_FEATURE_ALREADY_ENABLED;
+ else
+ rv = VNET_API_ERROR_INSTANCE_IN_USE;
si = vec_elt_at_index (sm->instances, index);
- err = clib_error_return (0,
- "interface %U already assgined to "
- "instance '%s'",
- format_vnet_sw_if_index_name, vnm,
- sw_if_index, si->name);
+ log_err ("interface %U already assgined to instance '%s'",
+ format_vnet_sw_if_index_name, vnm, sw_if_index, si->name);
goto done;
}
@@ -462,11 +524,9 @@ snort_interface_enable_disable (vlib_main_t *vm, char *instance_name,
if (sw_if_index >= vec_len (sm->instance_by_sw_if_index) ||
sm->instance_by_sw_if_index[sw_if_index] == ~0)
{
- err =
- clib_error_return (0,
- "interface %U is not assigned to snort "
- "instance!",
- format_vnet_sw_if_index_name, vnm, sw_if_index);
+ rv = VNET_API_ERROR_INVALID_INTERFACE;
+ log_err ("interface %U is not assigned to snort instance!",
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
goto done;
}
index = sm->instance_by_sw_if_index[sw_if_index];
@@ -488,12 +548,66 @@ snort_interface_enable_disable (vlib_main_t *vm, char *instance_name,
}
done:
- if (err)
- log_err ("%U", format_clib_error, err);
- return 0;
+ return rv;
}
-clib_error_t *
+static int
+snort_strip_instance_interfaces (vlib_main_t *vm, u32 instance_index)
+{
+ snort_main_t *sm = &snort_main;
+ u32 *index;
+ int rv = 0;
+
+ vec_foreach (index, sm->instance_by_sw_if_index)
+ {
+ if (*index == instance_index)
+ rv = snort_interface_enable_disable (
+ vm, NULL, index - sm->instance_by_sw_if_index, 0, 0);
+ if (rv)
+ break;
+ }
+
+ return rv;
+}
+
+int
+snort_instance_delete (vlib_main_t *vm, u32 instance_index)
+{
+ snort_main_t *sm = &snort_main;
+ snort_instance_t *si;
+ snort_qpair_t *qp;
+ int rv = 0;
+
+ si = snort_get_instance_by_index (instance_index);
+ if (!si)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ if (si->client_index != ~0)
+ return VNET_API_ERROR_INSTANCE_IN_USE;
+
+ if ((rv = snort_strip_instance_interfaces (vm, si->index)))
+ return rv;
+
+ hash_unset_mem (sm->instance_by_name, si->name);
+
+ clib_mem_vm_unmap (si->shm_base);
+ close (si->shm_fd);
+
+ vec_foreach (qp, si->qpairs)
+ {
+ clib_file_del_by_index (&file_main, qp->deq_fd_file_index);
+ }
+
+ log_debug ("deleting instance '%s'", si->name);
+
+ vec_free (si->qpairs);
+ vec_free (si->name);
+ pool_put (sm->instances, si);
+
+ return rv;
+}
+
+int
snort_set_node_mode (vlib_main_t *vm, u32 mode)
{
int i;
diff --git a/src/plugins/snort/snort.api b/src/plugins/snort/snort.api
new file mode 100644
index 00000000000..5c65f79e68a
--- /dev/null
+++ b/src/plugins/snort/snort.api
@@ -0,0 +1,226 @@
+option version = "1.0.0";
+
+import "vnet/interface_types.api";
+import "vnet/ip/ip_types.api";
+
+define snort_instance_create {
+ u32 client_index;
+ u32 context;
+ u32 queue_size;
+ u8 drop_on_disconnect;
+ string name[];
+};
+
+define snort_instance_create_reply {
+ u32 context;
+ i32 retval;
+ u32 instance_index;
+};
+
+define snort_instance_delete {
+ u32 client_index;
+ u32 context;
+ u32 instance_index;
+};
+
+define snort_instance_delete_reply {
+ u32 context;
+ i32 retval;
+};
+
+define snort_client_disconnect {
+ u32 client_index;
+ u32 context;
+ u32 snort_client_index;
+};
+
+define snort_client_disconnect_reply {
+ u32 context;
+ i32 retval;
+};
+
+define snort_instance_disconnect {
+ u32 client_index;
+ u32 context;
+ u32 instance_index;
+};
+
+define snort_instance_disconnect_reply {
+ u32 context;
+ i32 retval;
+};
+
+define snort_interface_attach {
+ u32 client_index;
+ u32 context;
+ u32 instance_index;
+ u32 sw_if_index;
+ u8 snort_dir;
+};
+
+define snort_interface_attach_reply {
+ u32 context;
+ i32 retval;
+};
+
+define snort_interface_detach {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+define snort_interface_detach_reply {
+ u32 context;
+ i32 retval;
+};
+
+define snort_input_mode_get {
+ u32 client_index;
+ u32 context;
+};
+
+define snort_input_mode_get_reply {
+ u32 context;
+ i32 retval;
+ u32 snort_mode;
+};
+
+define snort_input_mode_set {
+ u32 client_index;
+ u32 context;
+ u8 input_mode;
+};
+
+define snort_input_mode_set_reply {
+ u32 context;
+ i32 retval;
+};
+
+service {
+ rpc snort_instance_get returns snort_instance_get_reply
+ stream snort_instance_details;
+};
+
+/** \brief Get snort instance(s).
+ @param client_index - opaque cookie to identify the sender.
+ @param context - sender context
+ @param cursor - current iterator value (all requested).
+ @param instance_index - instance index (~0 for all).
+*/
+define snort_instance_get
+{
+ u32 client_index;
+ u32 context;
+ u32 cursor;
+ u32 instance_index;
+};
+
+/** \brief Reply for snort instance(s).
+ @param context - sender context
+ @param retval - return code for the request.
+ @param cursor - iterator value to continue with (if there is more).
+*/
+define snort_instance_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+
+/** \brief Details of a snort instance.
+ @param context - sender context
+ @param instance - snort instance info.
+*/
+define snort_instance_details {
+ u32 context;
+ u32 instance_index;
+ u32 shm_size;
+ u32 shm_fd;
+ u8 drop_on_disconnect;
+ u32 snort_client_index;
+ string name[];
+};
+
+service {
+ rpc snort_interface_get returns snort_interface_get_reply
+ stream snort_interface_details;
+};
+
+/** \brief Get snort interface(s).
+ @param client_index - opaque cookie to identify the sender.
+ @param context - sender context
+ @param cursor - current iterator value (all requested).
+ @param sw_if_index - sw if index (~0 for all).
+*/
+define snort_interface_get
+{
+ u32 client_index;
+ u32 context;
+ u32 cursor;
+ u32 sw_if_index;
+};
+
+/** \brief Reply for snort interface(s).
+ @param context - sender context
+ @param retval - return code for the request.
+ @param cursor - iterator value to continue with (if there is more).
+*/
+define snort_interface_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+
+/** \brief Details of a snort interface.
+ @param context - sender context
+ @param sw_if_index - interface index
+ @param instance_index - snort instance the interface is attached to.
+*/
+define snort_interface_details {
+ u32 context;
+ u32 sw_if_index;
+ u32 instance_index;
+};
+
+service {
+ rpc snort_client_get returns snort_client_get_reply
+ stream snort_client_details;
+};
+
+/** \brief Get snort clients.
+ @param client_index - opaque cookie to identify the sender.
+ @param context - sender context
+ @param cursor - current iterator value (all requested).
+ @param client_index (~0 for all).
+*/
+define snort_client_get
+{
+ u32 client_index;
+ u32 context;
+ u32 cursor;
+ u32 snort_client_index;
+};
+
+/** \brief Reply for snort clients.
+ @param context - sender context
+ @param retval - return code for the request.
+ @param cursor - iterator value to continue with (if there is more).
+*/
+define snort_client_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+
+/** \brief Details of a snort client.
+ @param context - sender context
+ @param client index
+ @param instance_index - snort instance of the client.
+*/
+define snort_client_details {
+ u32 context;
+ u32 client_index;
+ u32 instance_index;
+};
diff --git a/src/plugins/snort/snort.h b/src/plugins/snort/snort.h
index 79299aa6d91..c7e856c0127 100644
--- a/src/plugins/snort/snort.h
+++ b/src/plugins/snort/snort.h
@@ -7,6 +7,7 @@
#include <vppinfra/error.h>
#include <vppinfra/socket.h>
+#include <vppinfra/file.h>
#include <vlib/vlib.h>
#include <snort/daq_vpp.h>
@@ -78,8 +79,11 @@ typedef struct
snort_per_thread_data_t *per_thread_data;
u32 input_mode;
u8 *socket_name;
+ /* API message ID base */
+ u16 msg_id_base;
} snort_main_t;
+extern clib_file_main_t file_main;
extern snort_main_t snort_main;
extern vlib_node_registration_t snort_enq_node;
extern vlib_node_registration_t snort_deq_node;
@@ -103,13 +107,17 @@ typedef enum
}
/* functions */
-clib_error_t *snort_instance_create (vlib_main_t *vm, char *name,
- u8 log2_queue_sz, u8 drop_on_disconnect);
-clib_error_t *snort_interface_enable_disable (vlib_main_t *vm,
- char *instance_name,
- u32 sw_if_index, int is_enable,
- snort_attach_dir_t dir);
-clib_error_t *snort_set_node_mode (vlib_main_t *vm, u32 mode);
+snort_main_t *snort_get_main ();
+snort_instance_t *snort_get_instance_by_index (u32 instance_index);
+snort_instance_t *snort_get_instance_by_name (char *name);
+int snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz,
+ u8 drop_on_disconnect);
+int snort_interface_enable_disable (vlib_main_t *vm, char *instance_name,
+ u32 sw_if_index, int is_enable,
+ snort_attach_dir_t dir);
+int snort_set_node_mode (vlib_main_t *vm, u32 mode);
+int snort_instance_delete (vlib_main_t *vm, u32 instance_index);
+int snort_instance_disconnect (vlib_main_t *vm, u32 instance_index);
always_inline void
snort_freelist_init (u32 *fl)
diff --git a/src/plugins/snort/snort_api.c b/src/plugins/snort/snort_api.c
new file mode 100644
index 00000000000..adad0d8763f
--- /dev/null
+++ b/src/plugins/snort/snort_api.c
@@ -0,0 +1,405 @@
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <snort/snort.h>
+#include <vlibapi/api_types.h>
+
+#include <snort/snort.api_enum.h>
+#include <snort/snort.api_types.h>
+
+#include <vlibmemory/api.h>
+#include <vnet/ip/ip_types_api.h>
+#include <vnet/format_fns.h>
+#include <vnet/api_errno.h>
+
+/**
+ * Base message ID fot the plugin
+ */
+static u32 snort_base_msg_id;
+#define REPLY_MSG_ID_BASE snort_base_msg_id
+
+#include <vlibapi/api_helper_macros.h>
+
+#include <vnet/vnet.h>
+
+#include <vlibapi/api.h>
+#include <sys/eventfd.h>
+
+VLIB_REGISTER_LOG_CLASS (snort_log, static) = {
+ .class_name = "snort",
+};
+
+#define log_debug(fmt, ...) vlib_log_debug (snort_log.class, fmt, __VA_ARGS__)
+#define log_err(fmt, ...) vlib_log_err (snort_log.class, fmt, __VA_ARGS__)
+
+static void
+vl_api_snort_instance_create_t_handler (vl_api_snort_instance_create_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_snort_instance_create_reply_t *rmp;
+ char *name = vl_api_from_api_to_new_c_string (&mp->name);
+ u32 queue_sz = clib_net_to_host_u32 (mp->queue_size);
+ u8 drop_on_disconnect = mp->drop_on_disconnect;
+ int rv = 0;
+ u32 instance_index = ~0;
+ snort_instance_t *si;
+
+ rv =
+ snort_instance_create (vm, name, min_log2 (queue_sz), drop_on_disconnect);
+
+ if ((si = snort_get_instance_by_name (name)))
+ {
+ instance_index = si->index;
+ }
+
+ REPLY_MACRO2 (VL_API_SNORT_INSTANCE_CREATE_REPLY, ({
+ rmp->instance_index = clib_host_to_net_u32 (instance_index);
+ }));
+}
+
+static void
+vl_api_snort_instance_delete_t_handler (vl_api_snort_instance_delete_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_snort_instance_delete_reply_t *rmp;
+ u32 instance_index = clib_net_to_host_u32 (mp->instance_index);
+ int rv;
+
+ rv = snort_instance_delete (vm, instance_index);
+
+ REPLY_MACRO (VL_API_SNORT_INSTANCE_DELETE_REPLY);
+}
+
+static void
+vl_api_snort_interface_attach_t_handler (vl_api_snort_interface_attach_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_snort_interface_attach_reply_t *rmp;
+ u32 instance_index = clib_net_to_host_u32 (mp->instance_index);
+ snort_instance_t *instance = 0;
+ u32 sw_if_index = clib_net_to_host_u32 (mp->sw_if_index);
+ u8 snort_dir = mp->snort_dir;
+ int rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ if (sw_if_index == INDEX_INVALID)
+ rv = VNET_API_ERROR_NO_MATCHING_INTERFACE;
+ else
+ {
+ instance = snort_get_instance_by_index (instance_index);
+ if (instance)
+ rv = snort_interface_enable_disable (vm, (char *) instance->name,
+ sw_if_index, 1 /* is_enable */,
+ snort_dir);
+ }
+
+ REPLY_MACRO (VL_API_SNORT_INTERFACE_ATTACH_REPLY);
+}
+
+static void
+send_snort_instance_details (const snort_instance_t *instance,
+ vl_api_registration_t *rp, u32 context)
+{
+ vl_api_snort_instance_details_t *rmp;
+ u32 name_len = vec_len (instance->name);
+
+ REPLY_MACRO_DETAILS5 (
+ VL_API_SNORT_INSTANCE_DETAILS, name_len, rp, context, ({
+ rmp->instance_index = clib_host_to_net_u32 (instance->index);
+ vl_api_vec_to_api_string (instance->name, &rmp->name);
+ rmp->snort_client_index = clib_host_to_net_u32 (instance->client_index);
+ rmp->shm_size = clib_host_to_net_u32 (instance->shm_size);
+ rmp->shm_fd = clib_host_to_net_u32 (instance->shm_fd);
+ rmp->drop_on_disconnect = instance->drop_on_disconnect;
+ }));
+}
+
+static void
+vl_api_snort_instance_get_t_handler (vl_api_snort_instance_get_t *mp)
+{
+ snort_main_t *sm = snort_get_main ();
+ snort_instance_t *instance = 0;
+ vl_api_snort_instance_get_reply_t *rmp;
+ u32 instance_index;
+ int rv = 0;
+
+ instance_index = clib_net_to_host_u32 (mp->instance_index);
+
+ if (instance_index == INDEX_INVALID)
+ {
+ /* clang-format off */
+ REPLY_AND_DETAILS_MACRO (
+ VL_API_SNORT_INSTANCE_GET_REPLY, sm->instances, ({
+ instance = pool_elt_at_index (sm->instances, cursor);
+ send_snort_instance_details (instance, rp, mp->context);
+ }));
+ /* clang-format on */
+ }
+ else
+ {
+ instance = snort_get_instance_by_index (instance_index);
+
+ if (instance)
+ {
+ vl_api_registration_t *rp =
+ vl_api_client_index_to_registration (mp->client_index);
+
+ if (rp == NULL)
+ {
+ return;
+ }
+
+ send_snort_instance_details (instance, rp, mp->context);
+ }
+ else
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ /* clang-format off */
+ REPLY_MACRO2 (VL_API_SNORT_INSTANCE_GET_REPLY, ({
+ rmp->cursor = INDEX_INVALID;
+ }));
+ /* clang-format on */
+ }
+}
+
+static void
+send_snort_interface_details (u32 sw_if_index, u32 instance_index,
+ vl_api_registration_t *rp, u32 context)
+{
+ vl_api_snort_interface_details_t *rmp;
+
+ if (instance_index != ~0)
+ {
+ REPLY_MACRO_DETAILS4 (VL_API_SNORT_INTERFACE_DETAILS, rp, context, ({
+ rmp->instance_index =
+ clib_host_to_net_u32 (instance_index);
+ rmp->sw_if_index =
+ clib_host_to_net_u32 (sw_if_index);
+ }));
+ }
+}
+
+static void
+vl_api_snort_interface_get_t_handler (vl_api_snort_interface_get_t *mp)
+{
+ snort_main_t *sm = snort_get_main ();
+ vl_api_snort_interface_get_reply_t *rmp;
+ u32 sw_if_index;
+ u32 *index;
+ int rv = 0;
+
+ sw_if_index = clib_net_to_host_u32 (mp->sw_if_index);
+
+ if (sw_if_index == INDEX_INVALID)
+ {
+ /* clang-format off */
+ if (vec_len (sm->instance_by_sw_if_index) == 0)
+ {
+ REPLY_MACRO2 (VL_API_SNORT_INTERFACE_GET_REPLY, ({ rmp->cursor = ~0; }));
+ return;
+ }
+
+ REPLY_AND_DETAILS_VEC_MACRO(
+ VL_API_SNORT_INTERFACE_GET_REPLY,
+ sm->instance_by_sw_if_index,
+ mp, rmp, rv, ({
+ index = vec_elt_at_index (sm->instance_by_sw_if_index, cursor);
+ send_snort_interface_details (cursor, *index, rp, mp->context);
+ }))
+ /* clang-format on */
+ }
+ else
+ {
+ index = vec_elt_at_index (sm->instance_by_sw_if_index, sw_if_index);
+ if (snort_get_instance_by_index (index[0]))
+ {
+ vl_api_registration_t *rp =
+ vl_api_client_index_to_registration (mp->client_index);
+
+ if (rp == NULL)
+ {
+ return;
+ }
+
+ send_snort_interface_details (sw_if_index, *index, rp, mp->context);
+ }
+ else
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ /* clang-format off */
+ REPLY_MACRO2 (VL_API_SNORT_INTERFACE_GET_REPLY, ({
+ rmp->cursor = INDEX_INVALID;
+ }));
+ /* clang-format on */
+ }
+}
+
+static void
+send_snort_client_details (const snort_client_t *client,
+ vl_api_registration_t *rp, u32 context)
+{
+ snort_main_t *sm = snort_get_main ();
+ vl_api_snort_client_details_t *rmp;
+ snort_instance_t *instance;
+
+ if (client->instance_index == ~0)
+ {
+ return;
+ }
+
+ instance = pool_elt_at_index (sm->instances, client->instance_index);
+ if (instance)
+ {
+ REPLY_MACRO_DETAILS4 (VL_API_SNORT_CLIENT_DETAILS, rp, context, ({
+ rmp->instance_index =
+ clib_host_to_net_u32 (client->instance_index);
+ rmp->client_index =
+ clib_host_to_net_u32 (client - sm->clients);
+ }));
+ }
+}
+
+static void
+vl_api_snort_client_get_t_handler (vl_api_snort_client_get_t *mp)
+{
+ snort_main_t *sm = snort_get_main ();
+ snort_client_t *client;
+ vl_api_snort_client_get_reply_t *rmp;
+ u32 client_index;
+ int rv = 0;
+
+ client_index = clib_net_to_host_u32 (mp->snort_client_index);
+
+ if (client_index == INDEX_INVALID)
+ {
+ /* clang-format off */
+ REPLY_AND_DETAILS_MACRO (
+ VL_API_SNORT_CLIENT_GET_REPLY, sm->clients, ({
+ client = pool_elt_at_index (sm->clients, cursor);
+ send_snort_client_details (client, rp, mp->context);
+ }));
+ /* clang-format on */
+ }
+ else
+ {
+ client = pool_elt_at_index (sm->clients, client_index);
+
+ if (client)
+ {
+ vl_api_registration_t *rp =
+ vl_api_client_index_to_registration (mp->client_index);
+
+ if (rp == NULL)
+ {
+ return;
+ }
+
+ send_snort_client_details (client, rp, mp->context);
+ }
+ else
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ /* clang-format off */
+ REPLY_MACRO2 (VL_API_SNORT_CLIENT_GET_REPLY, ({
+ rmp->cursor = INDEX_INVALID;
+ }));
+ /* clang-format on */
+ }
+}
+
+static void
+vl_api_snort_client_disconnect_t_handler (vl_api_snort_client_disconnect_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ snort_main_t *sm = snort_get_main ();
+ snort_client_t *client;
+ vl_api_snort_client_disconnect_reply_t *rmp;
+ u32 client_index = clib_net_to_host_u32 (mp->snort_client_index);
+ int rv = 0;
+
+ if (pool_is_free_index (sm->clients, client_index))
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+ else
+ {
+ client = pool_elt_at_index (sm->clients, client_index);
+ rv = snort_instance_disconnect (vm, client->instance_index);
+ }
+
+ REPLY_MACRO (VL_API_SNORT_CLIENT_DISCONNECT_REPLY);
+}
+
+static void
+vl_api_snort_instance_disconnect_t_handler (
+ vl_api_snort_instance_disconnect_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_snort_instance_disconnect_reply_t *rmp;
+ u32 instance_index = clib_net_to_host_u32 (mp->instance_index);
+ int rv = snort_instance_disconnect (vm, instance_index);
+
+ REPLY_MACRO (VL_API_SNORT_INSTANCE_DISCONNECT_REPLY);
+}
+
+static void
+vl_api_snort_interface_detach_t_handler (vl_api_snort_interface_detach_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_snort_interface_detach_reply_t *rmp;
+ u32 sw_if_index = clib_net_to_host_u32 (mp->sw_if_index);
+ int rv = VNET_API_ERROR_NO_MATCHING_INTERFACE;
+
+ if (sw_if_index != INDEX_INVALID)
+ rv = snort_interface_enable_disable (vm, NULL, sw_if_index,
+ 0 /* is_enable */, SNORT_INOUT);
+
+ REPLY_MACRO (VL_API_SNORT_INTERFACE_DETACH_REPLY);
+}
+
+static void
+vl_api_snort_input_mode_get_t_handler (vl_api_snort_input_mode_get_t *mp)
+{
+ snort_main_t *sm = &snort_main;
+ vl_api_snort_input_mode_get_reply_t *rmp;
+ int rv = 0;
+
+ REPLY_MACRO2 (VL_API_SNORT_INPUT_MODE_GET_REPLY, ({
+ rmp->snort_mode = clib_host_to_net_u32 (sm->input_mode);
+ }));
+}
+
+static void
+vl_api_snort_input_mode_set_t_handler (vl_api_snort_input_mode_set_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_snort_input_mode_set_reply_t *rmp;
+ u8 mode = mp->input_mode;
+ int rv = 0;
+
+ if (mode != VLIB_NODE_STATE_INTERRUPT && mode != VLIB_NODE_STATE_POLLING)
+ {
+ clib_error_return (0, "invalid input mode %u", mode);
+ }
+ snort_set_node_mode (vm, mode);
+
+ REPLY_MACRO (VL_API_SNORT_INPUT_MODE_SET_REPLY);
+}
+
+/* API definitions */
+#include <snort/snort.api.c>
+
+clib_error_t *
+snort_init_api (vlib_main_t *vm)
+{
+ /* Add our API messages to the global name_crc hash table */
+ snort_base_msg_id = setup_message_id_table ();
+
+ return NULL;
+}
+
+VLIB_INIT_FUNCTION (snort_init_api);
diff --git a/src/plugins/srmpls/CMakeLists.txt b/src/plugins/srmpls/CMakeLists.txt
new file mode 100644
index 00000000000..25905d31e1b
--- /dev/null
+++ b/src/plugins/srmpls/CMakeLists.txt
@@ -0,0 +1,30 @@
+# Copyright (c) 2024 Cisco and/or its affiliates
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(srmpls
+ SOURCES
+ sr_mpls_policy.c
+ sr_mpls_steering.c
+ sr_mpls_api.c
+ plugin.c
+
+ INSTALL_HEADERS
+ sr_mpls.h
+
+ API_FILES
+ sr_mpls.api
+
+ # This might need to be VAT_AUTO_TEST? Not documented
+ API_TEST_SOURCES
+ sr_mpls_test.c
+)
diff --git a/src/plugins/srmpls/FEATURE.yaml b/src/plugins/srmpls/FEATURE.yaml
new file mode 100644
index 00000000000..c5b958224c7
--- /dev/null
+++ b/src/plugins/srmpls/FEATURE.yaml
@@ -0,0 +1,9 @@
+---
+name: Segment Routing for MPLS
+maintainer: Pablo Camarillo <pcamaril@cisco.com>
+features:
+ - SR Policy support
+ - Automated steering (SR steering based on NextHop/Color)
+description: "SR-MPLS"
+state: production
+properties: [API, CLI, MULTITHREAD]
diff --git a/src/plugins/srmpls/dir.dox b/src/plugins/srmpls/dir.dox
new file mode 100644
index 00000000000..76ec1d6a41b
--- /dev/null
+++ b/src/plugins/srmpls/dir.dox
@@ -0,0 +1,22 @@
+/*
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir
+ @brief Segment Routing MPLS code
+
+ An implementation of Segment Routing for the MPLS dataplane.
+
+*/ \ No newline at end of file
diff --git a/src/plugins/marvell/plugin.c b/src/plugins/srmpls/plugin.c
index ed90776ba95..af87607764f 100644
--- a/src/plugins/marvell/plugin.c
+++ b/src/plugins/srmpls/plugin.c
@@ -1,6 +1,7 @@
/*
- *------------------------------------------------------------------
- * Copyright (c) 2018 Cisco and/or its affiliates.
+ * plugin.c: srmpls
+ *
+ * Copyright (c) 2024 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -12,22 +13,14 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- *------------------------------------------------------------------
*/
#include <vlib/vlib.h>
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
+// register a plugin
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
- .description = "Marvell PP2 Device Driver",
+ .description = "Segment Routing for MPLS plugin",
};
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/srmpls/sr_doc.rst b/src/plugins/srmpls/sr_doc.rst
new file mode 100644
index 00000000000..ed847fa0d42
--- /dev/null
+++ b/src/plugins/srmpls/sr_doc.rst
@@ -0,0 +1,215 @@
+.. _srmpls_doc:
+
+SR-MPLS: Segment Routing for MPLS
+=================================
+
+This is a memo intended to contain documentation of the VPP SR-MPLS
+implementation. Everything that is not directly obvious should come
+here. For any feedback on content that should be explained please
+mailto:pcamaril@cisco.com
+
+Segment Routing
+---------------
+
+Segment routing is a network technology focused on addressing the
+limitations of existing IP and Multiprotocol Label Switching (MPLS)
+networks in terms of simplicity, scale, and ease of operation. It is a
+foundation for application engineered routing as it prepares the
+networks for new business models where applications can control the
+network behavior.
+
+Segment routing seeks the right balance between distributed intelligence
+and centralized optimization and programming. It was built for the
+software-defined networking (SDN) era.
+
+Segment routing enhances packet forwarding behavior by enabling a
+network to transport unicast packets through a specific forwarding path,
+different from the normal path that a packet usually takes (IGP shortest
+path or BGP best path). This capability benefits many use cases, and one
+can build those specific paths based on application requirements.
+
+Segment routing uses the source routing paradigm. A node, usually a
+router but also a switch, a trusted server, or a virtual forwarder
+running on a hypervisor, steers a packet through an ordered list of
+instructions, called segments. A segment can represent any instruction,
+topological or service-based. A segment can have a local semantic to a
+segment-routing node or global within a segment-routing network. Segment
+routing allows an operator to enforce a flow through any topological
+path and service chain while maintaining per-flow state only at the
+ingress node to the segment-routing network. Segment routing also
+supports equal-cost multipath (ECMP) by design.
+
+Segment routing can operate with either an MPLS or an IPv6 data plane.
+All the currently available MPLS services, such as Layer 3 VPN (L3VPN),
+L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services
+[VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet
+VPN [PBB-EVPN]), can run on top of a segment-routing transport network.
+
+**The implementation of Segment Routing in VPP covers both the IPv6 data
+plane (SRv6) as well as the MPLS data plane (SR-MPLS). This page
+contains the SR-MPLS documentation.**
+
+Segment Routing terminology
+---------------------------
+
+- SegmentID (SID): is an MPLS label.
+- Segment List (SL) (SID List): is the sequence of SIDs that the packet
+ will traverse.
+- SR Policy: is a set of candidate paths (SID list+weight). An SR
+ policy is uniquely identified by its Binding SID and associated with
+ a weighted set of Segment Lists. In case several SID lists are
+ defined, traffic steered into the policy is unevenly load-balanced
+ among them according to their respective weights.
+- BindingSID: a BindingSID is a SID (only one) associated one-one with
+ an SR Policy. If a packet arrives with MPLS label corresponding to a
+ BindingSID, then the SR policy will be applied to such packet.
+ (BindingSID is popped first.)
+
+SR-MPLS features in VPP
+-----------------------
+
+The SR-MPLS implementation is focused on the SR policies, as well on its
+steering. Others SR-MPLS features, such as for example AdjSIDs, can be
+achieved using the regular VPP MPLS implementation.
+
+The Segment Routing Policy
+(*draft-filsfils-spring-segment-routing-policy*) defines SR Policies.
+
+Creating a SR Policy
+--------------------
+
+An SR Policy is defined by a Binding SID and a weighted set of Segment
+Lists.
+
+A new SR policy is created with a first SID list using:
+
+::
+
+ sr mpls policy add bsid 40001 next 16001 next 16002 next 16003 (weight 5)
+
+- The weight parameter is only used if more than one SID list is
+ associated with the policy.
+
+An SR policy is deleted with:
+
+::
+
+ sr mpls policy del bsid 40001
+
+The existing SR policies are listed with:
+
+::
+
+ show sr mpls policies
+
+Adding/Removing SID Lists from an SR policy
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+An additional SID list is associated with an existing SR policy with:
+
+::
+
+ sr mpls policy mod bsid 40001 add sl next 16001 next 16002 next 16003 (weight 3)
+
+Conversely, a SID list can be removed from an SR policy with:
+
+::
+
+ sr mpls policy mod bsid 4001 del sl index 1
+
+Note that this CLI cannot be used to remove the last SID list of a
+policy. Instead the SR policy delete CLI must be used.
+
+The weight of a SID list can also be modified with:
+
+::
+
+ sr mpls policy mod bsid 40001 mod sl index 1 weight 4
+
+SR Policies: Spray policies
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Spray policies are a specific type of SR policies where the packet is
+replicated on all the SID lists, rather than load-balanced among them.
+
+SID list weights are ignored with this type of policies.
+
+A Spray policy is instantiated by appending the keyword **spray** to a
+regular SR-MPLS policy command, as in:
+
+::
+
+ sr mpls policy add bsid 40002 next 16001 next 16002 next 16003 spray
+
+Spray policies are used for removing multicast state from a network core
+domain, and instead send a linear unicast copy to every access node. The
+last SID in each list accesses the multicast tree within the access
+node.
+
+Steering packets into a SR Policy
+---------------------------------
+
+Segment Routing supports three methods of steering traffic into an SR
+policy.
+
+Local steering
+~~~~~~~~~~~~~~
+
+In this variant incoming packets match a routing policy which directs
+them on a local SR policy.
+
+In order to achieve this behavior the user needs to create an ‘sr
+steering policy via sr policy bsid’.
+
+::
+
+ sr mpls steer l3 2001::/64 via sr policy bsid 40001
+ sr mpls steer l3 2001::/64 via sr policy bsid 40001 fib-table 3
+ sr mpls steer l3 10.0.0.0/16 via sr policy bsid 40001
+ sr mpls steer l3 10.0.0.0/16 via sr policy bsid 40001 vpn-label 500
+
+Remote steering
+~~~~~~~~~~~~~~~
+
+In this variant incoming packets have an active SID matching a local
+BSID at the head-end.
+
+In order to achieve this behavior the packets should simply arrive with
+an active SID equal to the Binding SID of a locally instantiated SR
+policy.
+
+Automated steering
+~~~~~~~~~~~~~~~~~~
+
+In this variant incoming packets match a BGP/Service route which
+recurses on the BSID of a local policy.
+
+In order to achieve this behavior the user first needs to color the SR
+policies. He can do so by using the CLI:
+
+::
+
+ sr mpls policy te bsid xxxxx endpoint x.x.x.x color 12341234
+
+Notice that an SR policy can have a single endpoint and a single color.
+Notice that the *endpoint* value is an IP46 address and the color a u32.
+
+Then, for any BGP/Service route the user has to use the API to steer
+prefixes:
+
+::
+
+ sr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2
+ sr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2 vpn-label 500
+
+Notice that *co* refers to the CO-bits (values [0|1|2|3]).
+
+Notice also that a given prefix might be steered over several colors
+(same next-hop and same co-bit value). In order to add new colors just
+execute the API several times (or with the del parameter to delete the
+color).
+
+This variant is meant to be used in conjunction with a control plane
+agent that uses the underlying binary API bindings of
+*sr_mpls_steering_policy_add*/*sr_mpls_steering_policy_del* for any BGP
+service route received.
diff --git a/src/plugins/srmpls/sr_mpls.api b/src/plugins/srmpls/sr_mpls.api
new file mode 100644
index 00000000000..742f135d493
--- /dev/null
+++ b/src/plugins/srmpls/sr_mpls.api
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the
+ * License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+option version = "3.0.0";
+
+import "vnet/interface_types.api";
+import "vnet/ip/ip_types.api";
+import "vnet/srv6/sr_types.api";
+
+/** \brief MPLS SR policy add
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid - is the bindingSID of the SR Policy. MPLS label (20bit)
+ @param weight - is the weight of the sid list. optional.
+ @param is_spray - is the type of the SR policy. (0.Default // 1.Spray)
+ @param segments - vector of labels (20bit) composing the segment list
+*/
+autoreply define sr_mpls_policy_add
+{
+ u32 client_index;
+ u32 context;
+ u32 bsid;
+ u32 weight;
+ bool is_spray;
+ u8 n_segments;
+ u32 segments[n_segments];
+};
+
+/** \brief MPLS SR policy modification
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy. MPLS label (20bit)
+ @param sr_policy_index is the index of the SR policy
+ @param fib_table is the VRF where to install the FIB entry for the BSID
+ @param operation is the operation to perform (among the top ones)
+ @param segments is a vector of MPLS labels composing the segment list
+ @param sl_index is the index of the Segment List to modify/delete
+ @param weight is the weight of the sid list. optional.
+ @param is_encap Mode. Encapsulation or SRH insertion.
+*/
+autoreply define sr_mpls_policy_mod
+{
+ u32 client_index;
+ u32 context;
+ u32 bsid;
+ vl_api_sr_policy_op_t operation;
+ u32 sl_index;
+ u32 weight;
+ u8 n_segments;
+ u32 segments[n_segments];
+};
+
+/** \brief MPLS SR policy deletion
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy. MPLS label (20bit)
+*/
+autoreply define sr_mpls_policy_del
+{
+ u32 client_index;
+ u32 context;
+ u32 bsid;
+};
+
+/** \brief MPLS SR steering add/del
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_del
+ @param bsid - is the bindingSID of the SR Policy (~0 is no bsid)
+ @param table_id - is the VRF where to install the FIB entry for the BSID
+ @param prefix - is the IPv4/v6 address for L3 traffic type.
+ @param mask_width - is the mask for L3 traffic type
+ @param next_hop - describes the next_hop (in case no BSID)
+ @param color - describes the color
+ @param co_bits - are the CO_bits of the steering policy
+ @param vpn_label - is an additonal last VPN label. (~0 is no label)
+*/
+autoreply define sr_mpls_steering_add_del
+{
+ u32 client_index;
+ u32 context;
+ bool is_del[default = false];
+ u32 bsid;
+ u32 table_id;
+ vl_api_prefix_t prefix;
+ u32 mask_width;
+ vl_api_address_t next_hop;
+ u32 color;
+ u8 co_bits;
+ u32 vpn_label;
+};
+
+/** \brief MPLS SR steering add/del
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy
+ @param endpoint is the endpoint of the SR policy
+ @param color is the color of the sr policy
+*/
+autoreply define sr_mpls_policy_assign_endpoint_color
+{
+ u32 client_index;
+ u32 context;
+ u32 bsid;
+ vl_api_address_t endpoint;
+ u32 color;
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON Local Variables: eval:
+ * (c-set-style "gnu") End:
+ */
diff --git a/src/plugins/srmpls/sr_mpls.h b/src/plugins/srmpls/sr_mpls.h
new file mode 100644
index 00000000000..a8f9494428f
--- /dev/null
+++ b/src/plugins/srmpls/sr_mpls.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates. Licensed under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * @file
+ * @brief Segment Routing MPLS data structures definitions
+ *
+ */
+
+#ifndef included_vnet_srmpls_h
+#define included_vnet_srmpls_h
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+/* SR policy types */
+#define SR_POLICY_TYPE_DEFAULT 0
+#define SR_POLICY_TYPE_SPRAY 1
+
+#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1
+
+#define SR_STEER_IPV4 4
+#define SR_STEER_IPV6 6
+
+#define SR_TE_CO_BITS_00 0
+#define SR_TE_CO_BITS_01 1
+#define SR_TE_CO_BITS_10 2
+#define SR_TE_CO_BITS_11 3
+
+/**
+ * @brief SR Segment List (SID list)
+ */
+typedef struct
+{
+ /* SIDs (key) */
+ mpls_label_t *segments;
+
+ /* SID list weight (wECMP / UCMP) */
+ u32 weight;
+
+} mpls_sr_sl_t;
+
+typedef struct
+{
+ u32 *segments_lists; /**< Pool of SID lists indexes */
+
+ mpls_label_t bsid; /**< BindingSID (key) */
+
+ u8 type; /**< Type (default is 0) */
+ /* SR Policy specific DPO */
+ /* IF Type = DEFAULT Then Load-Balancer DPO among SID lists */
+ /* IF Type = SPRAY then Spray DPO with all SID lists */
+
+ ip46_address_t endpoint; /**< Optional NH for SR TE */
+ u8 endpoint_type;
+ u32 color; /**< Optional color for SR TE */
+} mpls_sr_policy_t;
+
+/**
+ * @brief Steering db key
+ *
+ * L3 is IPv4/IPv6 + mask
+ */
+typedef struct
+{
+ ip46_address_t prefix; /**< IP address of the prefix */
+ u32 mask_width; /**< Mask width of the prefix */
+ u32 fib_table; /**< VRF of the prefix */
+ u8 traffic_type; /**< Traffic type (IPv4, IPv6, L2) */
+ u8 padding[3];
+} sr_mpls_steering_key_t;
+
+typedef struct
+{
+ sr_mpls_steering_key_t classify; /**< Traffic classification */
+ mpls_label_t bsid; /**< SR Policy index */
+ ip46_address_t next_hop; /**< SR TE NH */
+ char nh_type;
+ u32 *color; /**< Vector of SR TE colors */
+ char co_bits; /**< Color-Only bits */
+ mpls_label_t vpn_label;
+} mpls_sr_steering_policy_t;
+
+/**
+ * @brief Segment Routing main datastructure
+ */
+typedef struct
+{
+ /* SR SID lists */
+ mpls_sr_sl_t *sid_lists;
+
+ /* SR MPLS policies */
+ mpls_sr_policy_t *sr_policies;
+
+ /* Hash table mapping BindingSID to SR MPLS policy */
+ uword *sr_policies_index_hash;
+
+ /* Pool of SR steer policies instances */
+ mpls_sr_steering_policy_t *steer_policies;
+
+ /* MHash table mapping steering rules to SR steer instance */
+ mhash_t sr_steer_policies_hash;
+
+ /** SR TE **/
+ /* Hash table mapping (Color->Endpoint->BSID) for SR policies */
+ mhash_t sr_policies_c2e2eclabel_hash;
+ /* SR TE (internal) fib table (Endpoint, Color) */
+ u32 fib_table_EC;
+ /* Pool of (Endpoint, Color) hidden labels */
+ u32 *ec_labels;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} mpls_sr_main_t;
+
+extern mpls_sr_main_t sr_mpls_main;
+
+extern int
+sr_mpls_policy_add (mpls_label_t bsid, mpls_label_t * segments,
+ u8 behavior, u32 weight);
+
+extern int
+sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
+ mpls_label_t * segments, u32 sl_index, u32 weight);
+
+extern int sr_mpls_policy_del (mpls_label_t bsid);
+
+extern int
+sr_mpls_policy_assign_endpoint_color (mpls_label_t bsid,
+ ip46_address_t * endpoint,
+ u8 endpoint_type, u32 color);
+
+extern int
+sr_mpls_steering_policy_add (mpls_label_t bsid, u32 table_id,
+ ip46_address_t * prefix, u32 mask_width,
+ u8 traffic_type, ip46_address_t * next_hop,
+ u8 nh_type, u32 color, char co_bits,
+ mpls_label_t vpn_label);
+
+extern int
+sr_mpls_steering_policy_del (ip46_address_t * prefix,
+ u32 mask_width, u8 traffic_type, u32 table_id,
+ u32 color);
+
+extern u32 find_or_create_internal_label (ip46_address_t endpoint, u32 color);
+
+extern void internal_label_lock (ip46_address_t endpoint, u32 color);
+
+extern void internal_label_unlock (ip46_address_t endpoint, u32 color);
+
+#endif /* included_vnet_sr_mpls_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables: eval: (c-set-style "gnu") End:
+ */
diff --git a/src/plugins/srmpls/sr_mpls_api.c b/src/plugins/srmpls/sr_mpls_api.c
new file mode 100644
index 00000000000..3e89017dbc1
--- /dev/null
+++ b/src/plugins/srmpls/sr_mpls_api.c
@@ -0,0 +1,257 @@
+/*
+ * ------------------------------------------------------------------
+ * sr_api.c - ipv6 segment routing api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates. Licensed under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ * ------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include "sr_mpls.h"
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/feature/feature.h>
+#include <vnet/ip/ip_types_api.h>
+
+#include <vnet/format_fns.h>
+#include <plugins/srmpls/sr_mpls.api_enum.h>
+#include <plugins/srmpls/sr_mpls.api_types.h>
+
+#define vl_api_version(n, v) static u32 api_version = v;
+#include <plugins/srmpls/sr_mpls.api.h>
+#undef vl_api_version
+
+#define vl_endianfun
+#include <plugins/srmpls/sr_mpls.api.h>
+#undef vl_endianfun
+
+#define vl_calcsizefun
+#include <plugins/srmpls/sr_mpls.api.h>
+#undef vl_calcsizefun
+
+#define vl_printfun
+#include <plugins/srmpls/sr_mpls.api.h>
+#undef vl_printfun
+
+#define vl_msg_name_crc_list
+#include <plugins/srmpls/sr_mpls.api.h>
+#undef vl_msg_name_crc_list
+
+#define REPLY_MSG_ID_BASE msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(SR_MPLS_POLICY_DEL, sr_mpls_policy_del) \
+_(SR_MPLS_STEERING_ADD_DEL, sr_mpls_steering_add_del) \
+_(SR_MPLS_POLICY_ASSIGN_ENDPOINT_COLOR, sr_mpls_policy_assign_endpoint_color)
+
+static u16 msg_id_base;
+
+static void
+vl_api_sr_mpls_policy_add_t_handler (vl_api_sr_mpls_policy_add_t * mp)
+{
+ vl_api_sr_mpls_policy_add_reply_t *rmp;
+
+ mpls_label_t *segments = 0, *seg;
+ mpls_label_t this_address = 0;
+
+ int i;
+ for (i = 0; i < mp->n_segments; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ this_address = ntohl (mp->segments[i]);
+ clib_memcpy (seg, &this_address, sizeof (this_address));
+ }
+
+ int rv = 0;
+ rv = sr_mpls_policy_add (ntohl (mp->bsid),
+ segments, mp->is_spray, ntohl (mp->weight));
+ vec_free (segments);
+
+ REPLY_MACRO (VL_API_SR_MPLS_POLICY_ADD_REPLY);
+}
+
+static void
+vl_api_sr_mpls_policy_mod_t_handler (vl_api_sr_mpls_policy_mod_t * mp)
+{
+ vl_api_sr_mpls_policy_mod_reply_t *rmp;
+
+ mpls_label_t *segments = 0, *seg;
+ mpls_label_t this_address = 0;
+
+ int i;
+ for (i = 0; i < mp->n_segments; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ this_address = ntohl (mp->segments[i]);
+ clib_memcpy (seg, &this_address, sizeof (this_address));
+ }
+
+ int rv = 0;
+ rv = sr_mpls_policy_mod (ntohl (mp->bsid),
+ ntohl (mp->operation), segments,
+ ntohl (mp->sl_index), ntohl (mp->weight));
+ vec_free (segments);
+
+ REPLY_MACRO (VL_API_SR_MPLS_POLICY_MOD_REPLY);
+}
+
+static void
+vl_api_sr_mpls_policy_del_t_handler (vl_api_sr_mpls_policy_del_t * mp)
+{
+ vl_api_sr_mpls_policy_del_reply_t *rmp;
+ int rv = 0;
+ rv = sr_mpls_policy_del (ntohl (mp->bsid));
+
+ REPLY_MACRO (VL_API_SR_MPLS_POLICY_DEL_REPLY);
+}
+
+static void vl_api_sr_mpls_steering_add_del_t_handler
+ (vl_api_sr_mpls_steering_add_del_t * mp)
+{
+ vl_api_sr_mpls_steering_add_del_reply_t *rmp;
+ fib_prefix_t prefix;
+ ip46_address_t next_hop;
+ clib_memset (&prefix, 0, sizeof (ip46_address_t));
+
+ ip_prefix_decode (&mp->prefix, &prefix);
+ ip_address_decode (&mp->next_hop, &next_hop);
+
+ int rv = 0;
+ if (mp->is_del)
+ rv = sr_mpls_steering_policy_del (&prefix.fp_addr,
+ prefix.fp_len,
+ ip46_address_is_ip4 (&prefix.fp_addr) ?
+ SR_STEER_IPV4 : SR_STEER_IPV6,
+ ntohl (mp->table_id),
+ ntohl (mp->color));
+ else
+ rv = sr_mpls_steering_policy_add (ntohl (mp->bsid),
+ ntohl (mp->table_id),
+ &prefix.fp_addr,
+ prefix.fp_len,
+ ip46_address_is_ip4 (&prefix.fp_addr) ?
+ SR_STEER_IPV4 : SR_STEER_IPV6,
+ &next_hop,
+ ip46_address_is_ip4 (&next_hop) ?
+ SR_STEER_IPV4 : SR_STEER_IPV6,
+ ntohl (mp->color), mp->co_bits,
+ ntohl (mp->vpn_label));
+
+ REPLY_MACRO (VL_API_SR_MPLS_STEERING_ADD_DEL_REPLY);
+}
+
+static void vl_api_sr_mpls_policy_assign_endpoint_color_t_handler
+ (vl_api_sr_mpls_policy_assign_endpoint_color_t * mp)
+{
+ vl_api_sr_mpls_policy_assign_endpoint_color_reply_t *rmp;
+ int rv = 0;
+
+ ip46_address_t endpoint;
+ clib_memset (&endpoint, 0, sizeof (ip46_address_t));
+ ip_address_decode (&mp->endpoint, &endpoint);
+
+ rv = sr_mpls_policy_assign_endpoint_color (ntohl (mp->bsid),
+ &endpoint,
+ ip46_address_is_ip4 (&endpoint) ?
+ SR_STEER_IPV4 : SR_STEER_IPV6,
+ ntohl (mp->color));
+
+ REPLY_MACRO (VL_API_SR_MPLS_POLICY_ASSIGN_ENDPOINT_COLOR_REPLY);
+}
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id, n, crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + REPLY_MSG_ID_BASE);
+ foreach_vl_msg_name_crc_sr_mpls;
+#undef _
+}
+
+static clib_error_t *
+sr_mpls_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = vlibapi_get_main ();
+
+ u8 *name = format (0, "sr_mpls_%08x%c", api_version, 0);
+ REPLY_MSG_ID_BASE =
+ vl_msg_api_get_msg_ids ((char *) name, VL_MSG_SR_MPLS_LAST);
+ vec_free (name);
+
+#define _(N, n) \
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){ \
+ .id = REPLY_MSG_ID_BASE + VL_API_##N, \
+ .name = #n, \
+ .handler = vl_api_##n##_t_handler, \
+ .endian = vl_api_##n##_t_endian, \
+ .format_fn = vl_api_##n##_t_format, \
+ .size = sizeof (vl_api_##n##_t), \
+ .traced = 1, \
+ .tojson = vl_api_##n##_t_tojson, \
+ .fromjson = vl_api_##n##_t_fromjson, \
+ .calc_size = vl_api_##n##_t_calc_size, \
+ });
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Manually register the sr policy add msg, so we trace enough bytes
+ * to capture a typical segment list
+ */
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){
+ .id = REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_ADD,
+ .name = "sr_mpls_policy_add",
+ .handler = vl_api_sr_mpls_policy_add_t_handler,
+ .endian = vl_api_sr_mpls_policy_add_t_endian,
+ .format_fn = vl_api_sr_mpls_policy_add_t_format,
+ .size = 256,
+ .traced = 1,
+ .tojson = vl_api_sr_mpls_policy_add_t_tojson,
+ .fromjson = vl_api_sr_mpls_policy_add_t_fromjson,
+ .calc_size = vl_api_sr_mpls_policy_add_t_calc_size,
+ });
+ /*
+ * Manually register the sr policy mod msg, so we trace enough bytes
+ * to capture a typical segment list
+ */
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){
+ .id = REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_MOD,
+ .name = "sr_mpls_policy_mod",
+ .handler = vl_api_sr_mpls_policy_mod_t_handler,
+ .endian = vl_api_sr_mpls_policy_mod_t_endian,
+ .format_fn = vl_api_sr_mpls_policy_mod_t_format,
+ .size = 256,
+ .traced = 1,
+ .tojson = vl_api_sr_mpls_policy_mod_t_tojson,
+ .fromjson = vl_api_sr_mpls_policy_mod_t_fromjson,
+ .calc_size = vl_api_sr_mpls_policy_mod_t_calc_size,
+ });
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (sr_mpls_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables: eval: (c-set-style "gnu") End:
+ */
diff --git a/src/plugins/srmpls/sr_mpls_policy.c b/src/plugins/srmpls/sr_mpls_policy.c
new file mode 100644
index 00000000000..af24acd8cf6
--- /dev/null
+++ b/src/plugins/srmpls/sr_mpls_policy.c
@@ -0,0 +1,903 @@
+/*
+ * sr_mpls_policy.c: SR-MPLS policies
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates. Licensed under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * @file
+ * @brief SR MPLS policy creation and application
+ *
+ * Create an SR policy.
+ * An SR policy can be either of 'default' type or 'spray' type
+ * An SR policy has attached a list of SID lists.
+ * In case the SR policy is a default one it will load balance among them.
+ * An SR policy has associated a BindingSID.
+ * In case any packet arrives with MPLS_label == BindingSID then the SR policy
+ * associated to such bindingSID will be applied to such packet.
+ * Also, a BSID can be associated with a (Next-Hop, Color)
+ *
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include "sr_mpls.h"
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/ip/ip.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+mpls_sr_main_t sr_mpls_main;
+
+/*************************** SR LB helper functions **************************/
+/**
+ * @brief Creates a Segment List and adds it to an SR policy
+ *
+ * Creates a Segment List and adds it to the SR policy. Notice that the SL are
+ * not necessarily unique. Hence there might be two Segment List within the
+ * same SR Policy with exactly the same segments and same weight.
+ *
+ * @param sr_policy is the SR policy where the SL will be added
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ * @param weight is the weight of the SegmentList (for load-balancing purposes)
+ * @param is_encap represents the mode (SRH insertion vs Encapsulation)
+ *
+ * @return pointer to the just created segment list
+ */
+static inline mpls_sr_sl_t *
+create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_sl_t *segment_list;
+ u32 ii;
+
+ pool_get (sm->sid_lists, segment_list);
+ clib_memset (segment_list, 0, sizeof (*segment_list));
+
+ vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists);
+
+ /* Fill in segment list */
+ segment_list->weight =
+ (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT);
+ segment_list->segments = vec_dup (sl);
+
+ mpls_eos_bit_t eos;
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = segment_list->weight,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_label_stack = NULL,
+ .frp_local_label = sl[0],
+ };
+
+ if (vec_len (sl) > 1)
+ {
+ vec_validate (path.frp_label_stack, vec_len (sl) - 2);
+ for (ii = 1; ii < vec_len (sl); ii++)
+ {
+ path.frp_label_stack[ii - 1].fml_value = sl[ii];
+ }
+ }
+ else
+ {
+ /*
+ * add an impliciet NULL label to allow non-eos recursion
+ */
+ fib_mpls_label_t lbl = {
+ .fml_value = MPLS_IETF_IMPLICIT_NULL_LABEL,
+ };
+ vec_add1 (path.frp_label_stack, lbl);
+ }
+
+ fib_route_path_t *paths = NULL;
+ vec_add1 (paths, path);
+
+ fib_prefix_t pfx = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = sr_policy->bsid,
+ .fp_eos = eos,
+ .fp_payload_proto = DPO_PROTO_MPLS,
+ };
+
+ fib_table_entry_path_add2 (0,
+ &pfx,
+ FIB_SOURCE_SR,
+ (sr_policy->type == SR_POLICY_TYPE_DEFAULT ?
+ FIB_ENTRY_FLAG_NONE :
+ FIB_ENTRY_FLAG_MULTICAST), paths);
+ vec_free (paths);
+ }
+
+ return segment_list;
+}
+
+/******************************* SR rewrite API *******************************/
+/*
+ * Three functions for handling sr policies: -> sr_mpls_policy_add ->
+ * sr_mpls_policy_del -> sr_mpls_policy_mod All of them are API. CLI function
+ * on sr_policy_command_fn
+ */
+
+/**
+ * @brief Create a new SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param segments is a vector of MPLS labels composing the segment list
+ * @param behavior is the behavior of the SR policy. (default//spray)
+ * @param fib_table is the VRF where to install the FIB entry for the BSID
+ * @param weight is the weight of this specific SID list
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_policy_add (mpls_label_t bsid, mpls_label_t * segments,
+ u8 behavior, u32 weight)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_policy_t *sr_policy = 0;
+ uword *p;
+
+ if (!sm->sr_policies_index_hash)
+ sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
+
+ /* MPLS SR policies cannot be created unless the MPLS table is present */
+ if (~0 == fib_table_find (FIB_PROTOCOL_MPLS, MPLS_FIB_DEFAULT_TABLE_ID))
+ return (VNET_API_ERROR_NO_SUCH_TABLE);
+
+ /* Search for existing keys (BSID) */
+ p = hash_get (sm->sr_policies_index_hash, bsid);
+ if (p)
+ {
+ /* Add SR policy that already exists; complain */
+ return -12;
+ }
+ /* Add an SR policy object */
+ pool_get (sm->sr_policies, sr_policy);
+ clib_memset (sr_policy, 0, sizeof (*sr_policy));
+
+ /* the first policy needs to lock the MPLS table so it doesn't
+ * disappear with policies in it */
+ if (1 == pool_elts (sm->sr_policies))
+ fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS,
+ MPLS_FIB_DEFAULT_TABLE_ID,
+ FIB_SOURCE_SR);
+ sr_policy->bsid = bsid;
+ sr_policy->type = behavior;
+ sr_policy->endpoint_type = 0;
+ ip6_address_set_zero (&sr_policy->endpoint.ip6);
+ sr_policy->color = (u32) ~ 0;
+
+ /* Copy the key */
+ hash_set (sm->sr_policies_index_hash, bsid, sr_policy - sm->sr_policies);
+
+ /* Create a segment list and add the index to the SR policy */
+ create_sl (sr_policy, segments, weight);
+
+ return 0;
+}
+
+/**
+ * @brief Delete a SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param index is the index of the SR policy
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_policy_del (mpls_label_t bsid)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_policy_t *sr_policy = 0;
+ mpls_sr_sl_t *segment_list;
+ mpls_eos_bit_t eos;
+ u32 *sl_index;
+ uword *p;
+
+ if (!sm->sr_policies_index_hash)
+ sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
+
+ p = hash_get (sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -1;
+
+ /* Clean SID Lists */
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = segment_list->weight,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_local_label = segment_list->segments[0],
+ };
+
+ vec_add (path.frp_label_stack, segment_list + 1,
+ vec_len (segment_list) - 1);
+
+ fib_route_path_t *paths = NULL;
+ vec_add1 (paths, path);
+
+ /* remove each of the MPLS routes */
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = sr_policy->bsid,
+ .fp_eos = eos,
+ .fp_payload_proto = DPO_PROTO_MPLS,
+ };
+
+ fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
+ }
+ vec_free (paths);
+ vec_free (segment_list->segments);
+ pool_put_index (sm->sid_lists, *sl_index);
+ }
+
+ /* If there is still traces of TE, make sure locks are released */
+ if (sr_policy->endpoint_type != 0 && sr_policy->color != (u32) ~ 0)
+ {
+ sr_mpls_policy_assign_endpoint_color (bsid, NULL, 0, (u32) ~ 0);
+ }
+
+ /* Remove SR policy entry */
+ hash_unset (sm->sr_policies_index_hash, sr_policy->bsid);
+ pool_put (sm->sr_policies, sr_policy);
+
+ if (0 == pool_elts (sm->sr_policies))
+ fib_table_unlock (MPLS_FIB_DEFAULT_TABLE_ID,
+ FIB_PROTOCOL_MPLS, FIB_SOURCE_SR);
+
+ return 0;
+}
+
+/**
+ * @brief Modify an existing SR policy
+ *
+ * The possible modifications are adding a new Segment List, modifying an
+ * existing Segment List (modify the weight only) and delete a given
+ * Segment List from the SR Policy.
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param fib_table is the VRF where to install the FIB entry for the BSID
+ * @param operation is the operation to perform (among the top ones)
+ * @param segments is a vector of IPv6 address composing the segment list
+ * @param sl_index is the index of the Segment List to modify/delete
+ * @param weight is the weight of the sid list. optional.
+ *
+ * @return 0 ok, >0 index of SL, <0 error
+ */
+int
+sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
+ mpls_label_t * segments, u32 sl_index, u32 weight)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_policy_t *sr_policy = 0;
+ mpls_sr_sl_t *segment_list;
+ u32 *sl_index_iterate;
+ uword *p;
+
+ if (!sm->sr_policies_index_hash)
+ sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
+
+ p = hash_get (sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -1;
+
+ if (operation == 1)
+ { /* Add SR List to an existing SR policy */
+ /* Create the new SL */
+ segment_list = create_sl (sr_policy, segments, weight);
+ return segment_list - sm->sid_lists;
+ }
+ else if (operation == 2)
+ { /* Delete SR List from an existing SR
+ * policy */
+ /* Check that currently there are more than one SID list */
+ if (vec_len (sr_policy->segments_lists) == 1)
+ return -21;
+
+ /*
+ * Check that the SR list does exist and is assigned to the
+ * sr policy
+ */
+ vec_foreach (sl_index_iterate, sr_policy->segments_lists)
+ if (*sl_index_iterate == sl_index)
+ break;
+
+ if (*sl_index_iterate != sl_index)
+ return -22;
+
+ /* Remove the lucky SR list that is being kicked out */
+ segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
+
+ mpls_eos_bit_t eos;
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = segment_list->weight,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_local_label = segment_list->segments[0],
+ };
+
+ vec_add (path.frp_label_stack, segment_list + 1,
+ vec_len (segment_list) - 1);
+
+ fib_route_path_t *paths = NULL;
+ vec_add1 (paths, path);
+
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = sr_policy->bsid,
+ .fp_eos = eos,
+ .fp_payload_proto = DPO_PROTO_MPLS,
+ };
+
+ fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
+ }
+
+ vec_free (paths);
+ vec_free (segment_list->segments);
+ pool_put_index (sm->sid_lists, sl_index);
+ vec_del1 (sr_policy->segments_lists,
+ sl_index_iterate - sr_policy->segments_lists);
+ }
+ else if (operation == 3)
+ { /* Modify the weight of an existing
+ * SR List */
+ /* Find the corresponding SL */
+ vec_foreach (sl_index_iterate, sr_policy->segments_lists)
+ if (*sl_index_iterate == sl_index)
+ break;
+
+ if (*sl_index_iterate != sl_index)
+ return -32;
+
+ /* Change the weight */
+ segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
+
+ /* Update LB */
+ mpls_eos_bit_t eos;
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = segment_list->weight,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_local_label = segment_list->segments[0],
+ };
+
+ vec_add (path.frp_label_stack, segment_list + 1,
+ vec_len (segment_list) - 1);
+
+ fib_route_path_t *paths = NULL;
+ vec_add1 (paths, path);
+
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = sr_policy->bsid,
+ .fp_eos = eos,
+ .fp_payload_proto = DPO_PROTO_MPLS,
+ };
+
+ fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
+ }
+
+ segment_list->weight = weight;
+
+ path.frp_weight = segment_list->weight;
+
+ vec_free (paths);
+ paths = NULL;
+ vec_add1 (paths, path);
+
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = sr_policy->bsid,
+ .fp_eos = eos,
+ .fp_payload_proto = DPO_PROTO_MPLS,
+ };
+
+ fib_table_entry_path_add2 (0,
+ &pfx,
+ FIB_SOURCE_SR,
+ (sr_policy->type ==
+ SR_POLICY_TYPE_DEFAULT ?
+ FIB_ENTRY_FLAG_NONE :
+ FIB_ENTRY_FLAG_MULTICAST), paths);
+ }
+ }
+ return 0;
+}
+
+/**
+ * @brief CLI for 'sr mpls policies' command family
+ */
+static clib_error_t *
+sr_mpls_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int rv = -1;
+ char is_del = 0, is_add = 0, is_mod = 0;
+ char policy_set = 0;
+ mpls_label_t bsid, next_label;
+ u32 sl_index = (u32) ~ 0;
+ u32 weight = (u32) ~ 0;
+ mpls_label_t *segments = 0;
+ u8 operation = 0;
+ u8 is_spray = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!is_add && !is_mod && !is_del && unformat (input, "add"))
+ is_add = 1;
+ else if (!is_add && !is_mod && !is_del && unformat (input, "del"))
+ is_del = 1;
+ else if (!is_add && !is_mod && !is_del && unformat (input, "mod"))
+ is_mod = 1;
+ else if (!policy_set
+ && unformat (input, "bsid %U", unformat_mpls_unicast_label,
+ &bsid))
+ policy_set = 1;
+ else if (unformat (input, "weight %d", &weight));
+ else if (unformat
+ (input, "next %U", unformat_mpls_unicast_label, &next_label))
+ {
+ vec_add (segments, &next_label, 1);
+ }
+ else if (unformat (input, "add sl"))
+ operation = 1;
+ else if (unformat (input, "del sl index %d", &sl_index))
+ operation = 2;
+ else if (unformat (input, "mod sl index %d", &sl_index))
+ operation = 3;
+ else if (unformat (input, "spray"))
+ is_spray = 1;
+ else
+ break;
+ }
+
+ if (!is_add && !is_mod && !is_del)
+ return clib_error_return (0, "Incorrect CLI");
+
+ if (!policy_set)
+ return clib_error_return (0, "No SR policy BSID or index specified");
+
+ if (is_add)
+ {
+ if (vec_len (segments) == 0)
+ return clib_error_return (0, "No Segment List specified");
+
+ rv = sr_mpls_policy_add (bsid, segments,
+ (is_spray ? SR_POLICY_TYPE_SPRAY :
+ SR_POLICY_TYPE_DEFAULT), weight);
+ vec_free (segments);
+ }
+ else if (is_del)
+ rv = sr_mpls_policy_del (bsid);
+ else if (is_mod)
+ {
+ if (!operation)
+ return clib_error_return (0, "No SL modification specified");
+ if (operation != 1 && sl_index == (u32) ~ 0)
+ return clib_error_return (0, "No Segment List index specified");
+ if (operation == 1 && vec_len (segments) == 0)
+ return clib_error_return (0, "No Segment List specified");
+ if (operation == 3 && weight == (u32) ~ 0)
+ return clib_error_return (0, "No new weight for the SL specified");
+ rv = sr_mpls_policy_mod (bsid, operation, segments, sl_index, weight);
+ vec_free (segments);
+ }
+ switch (rv)
+ {
+ case 0:
+ break;
+ case 1:
+ return 0;
+ case -12:
+ return clib_error_return (0,
+ "There is already a FIB entry for the BindingSID address.\n"
+ "The SR policy could not be created.");
+ case -21:
+ return clib_error_return (0,
+ "The selected SR policy only contains ONE segment list. "
+ "Please remove the SR policy instead");
+ case -22:
+ return clib_error_return (0,
+ "Could not delete the segment list. "
+ "It is not associated with that SR policy.");
+ case -23:
+ return clib_error_return (0,
+ "Could not delete the segment list. "
+ "It is not associated with that SR policy.");
+ case -32:
+ return clib_error_return (0,
+ "Could not modify the segment list. "
+ "The given SL is not associated with such SR policy.");
+ case VNET_API_ERROR_NO_SUCH_TABLE:
+ return clib_error_return (0, "the Default MPLS table is not present");
+ default:
+ return clib_error_return (0, "BUG: sr policy returns %d", rv);
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND(sr_mpls_policy_command, static)=
+{
+ .path = "sr mpls policy",
+ .short_help = "sr mpls policy [add||del||mod] bsid 2999 "
+ "next 10 next 20 next 30 (weight 1) (spray)",
+ .long_help = "TBD.\n",
+ .function = sr_mpls_policy_command_fn,
+};
+
+/**
+ * @brief CLI to display onscreen all the SR MPLS policies
+ */
+static clib_error_t *
+show_sr_mpls_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_sl_t *segment_list = 0;
+ mpls_sr_policy_t *sr_policy = 0;
+ mpls_sr_policy_t **vec_policies = 0;
+ mpls_label_t *label;
+ u32 *sl_index;
+ u8 *s;
+ int i = 0;
+
+ vlib_cli_output (vm, "SR MPLS policies:");
+
+ pool_foreach (sr_policy, sm->sr_policies) {
+ vec_add1(vec_policies, sr_policy);
+ }
+
+ vec_foreach_index (i, vec_policies)
+ {
+ sr_policy = vec_policies[i];
+ vlib_cli_output (vm, "[%u].-\tBSID: %U",
+ (u32) (sr_policy - sm->sr_policies),
+ format_mpls_unicast_label, sr_policy->bsid);
+ switch (sr_policy->endpoint_type)
+ {
+ case SR_STEER_IPV6:
+ vlib_cli_output (vm, "\tEndpoint: %U", format_ip6_address,
+ &sr_policy->endpoint.ip6);
+ vlib_cli_output (vm, "\tColor: %u", sr_policy->color);
+ break;
+ case SR_STEER_IPV4:
+ vlib_cli_output (vm, "\tEndpoint: %U", format_ip4_address,
+ &sr_policy->endpoint.ip4);
+ vlib_cli_output (vm, "\tColor: %u", sr_policy->color);
+ break;
+ default:
+ vlib_cli_output (vm, "\tTE disabled");
+ }
+ vlib_cli_output (vm, "\tType: %s",
+ (sr_policy->type ==
+ SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray"));
+ vlib_cli_output (vm, "\tSegment Lists:");
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ s = NULL;
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+ s = format (s, "\t[%u].- ", *sl_index);
+ s = format (s, "< ");
+ vec_foreach (label, segment_list->segments)
+ {
+ s = format (s, "%U, ", format_mpls_unicast_label, *label);
+ }
+ s = format (s, "\b\b > ");
+ vlib_cli_output (vm, " %s", s);
+ }
+ vlib_cli_output (vm, "-----------");
+ }
+ vec_free (vec_policies);
+ return 0;
+}
+
+VLIB_CLI_COMMAND(show_sr_mpls_policies_command, static)=
+{
+ .path = "show sr mpls policies",
+ .short_help = "show sr mpls policies",
+ .function = show_sr_mpls_policies_command_fn,
+};
+
+/**
+ * @brief Update the Endpoint,Color tuple of an SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param endpoint represents the IP46 of the endpoint
+ * @param color represents the color (u32)
+ *
+ * To reset to NULL use ~0 as parameters.
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_policy_assign_endpoint_color (mpls_label_t bsid,
+ ip46_address_t * endpoint,
+ u8 endpoint_type, u32 color)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_policy_t *sr_policy = 0;
+ uword *endpoint_table, *p, *old_value;
+
+ ip46_address_t any;
+ any.as_u64[0] = any.as_u64[1] = (u64) ~ 0;
+
+ if (!sm->sr_policies_index_hash)
+ sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
+
+ p = hash_get (sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -1;
+
+ /* If previous Endpoint, color existed, remove (NH,C) and (ANY,C) */
+ if (sr_policy->endpoint_type)
+ {
+ endpoint_table =
+ mhash_get (&sm->sr_policies_c2e2eclabel_hash, &sr_policy->color);
+ if (!endpoint_table)
+ return -2;
+ old_value =
+ mhash_get ((mhash_t *) endpoint_table, &sr_policy->endpoint);
+
+ /* CID 180995 This should never be NULL unless the two hash tables
+ * get out of sync */
+ ALWAYS_ASSERT (old_value != NULL);
+
+ fib_prefix_t pfx = { 0 };
+ pfx.fp_proto = FIB_PROTOCOL_MPLS;
+ pfx.fp_len = 21;
+ pfx.fp_label = (u32) * old_value;
+
+ mpls_eos_bit_t eos;
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ pfx.fp_eos = eos;
+ fib_table_entry_path_remove (sm->fib_table_EC,
+ &pfx,
+ FIB_SOURCE_SR,
+ DPO_PROTO_MPLS,
+ NULL,
+ ~0, 0, 1, FIB_ROUTE_PATH_FLAG_NONE);
+ }
+
+ old_value = mhash_get ((mhash_t *) endpoint_table, &any);
+ pfx.fp_label = (u32) * old_value;
+
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ pfx.fp_eos = eos;
+ fib_table_entry_path_remove (sm->fib_table_EC,
+ &pfx,
+ FIB_SOURCE_SR,
+ DPO_PROTO_MPLS,
+ NULL,
+ ~0, 0, 1, FIB_ROUTE_PATH_FLAG_NONE);
+ }
+
+ /* Release the lock on (NH, Color) and (ANY, Color) */
+ internal_label_unlock (sr_policy->endpoint, sr_policy->color);
+ internal_label_unlock (any, sr_policy->color);
+
+ /* Reset the values on the SR policy */
+ sr_policy->endpoint_type = 0;
+ sr_policy->endpoint.as_u64[0] = sr_policy->endpoint.as_u64[1] =
+ (u64) ~ 0;
+ sr_policy->color = (u32) ~ 0;
+ }
+
+ if (endpoint_type)
+ {
+ sr_policy->endpoint_type = endpoint_type;
+ sr_policy->endpoint.as_u64[0] = endpoint->as_u64[0];
+ sr_policy->endpoint.as_u64[1] = endpoint->as_u64[1];
+ sr_policy->color = color;
+
+ u32 label = find_or_create_internal_label (*endpoint, color);
+ internal_label_lock (*endpoint, sr_policy->color);
+
+ /* If FIB doesnt exist, create them */
+ if (sm->fib_table_EC == (u32) ~ 0)
+ {
+ sm->fib_table_EC = fib_table_create_and_lock (FIB_PROTOCOL_MPLS,
+ FIB_SOURCE_SR,
+ "SR-MPLS Traffic Engineering (NextHop,Color)");
+
+ fib_table_flush (sm->fib_table_EC, FIB_PROTOCOL_MPLS,
+ FIB_SOURCE_SPECIAL);
+ }
+
+ fib_prefix_t pfx = { 0 };
+ pfx.fp_proto = FIB_PROTOCOL_MPLS;
+ pfx.fp_len = 21;
+
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = 1,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_label_stack = 0
+ };
+ path.frp_local_label = sr_policy->bsid;
+
+ //Add the entry to ANY,Color
+ u32 any_label = find_or_create_internal_label (any, color);
+ internal_label_lock (any, sr_policy->color);
+
+ pfx.fp_eos = MPLS_EOS;
+ path.frp_eos = MPLS_EOS;
+
+ fib_route_path_t *paths = NULL;
+ vec_add1 (paths, path);
+
+ pfx.fp_label = label;
+ fib_table_entry_update (sm->fib_table_EC,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+
+ pfx.fp_label = any_label;
+ fib_table_entry_update (sm->fib_table_EC,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+
+ fib_mpls_label_t fml = {
+ .fml_value = MPLS_IETF_IMPLICIT_NULL_LABEL,
+ };
+
+ vec_add1 (path.frp_label_stack, fml);
+ pfx.fp_eos = MPLS_NON_EOS;
+ path.frp_eos = MPLS_NON_EOS;
+
+ paths = NULL;
+ vec_add1 (paths, path);
+
+ pfx.fp_label = label;
+ fib_table_entry_update (sm->fib_table_EC,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+
+ pfx.fp_label = any_label;
+ fib_table_entry_update (sm->fib_table_EC,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+ }
+ return 0;
+}
+
+/**
+ * @brief CLI to modify the Endpoint,Color of an SR policy
+ */
+static clib_error_t *
+cli_sr_mpls_policy_ec_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip46_address_t endpoint;
+ u32 color = (u32) ~ 0;
+ mpls_label_t bsid;
+ u8 endpoint_type = 0;
+ char clear = 0, color_set = 0, bsid_set = 0;
+
+ clib_memset (&endpoint, 0, sizeof (ip46_address_t));
+
+ int rv;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!endpoint_type
+ && unformat (input, "endpoint %U", unformat_ip6_address,
+ &endpoint.ip6))
+ endpoint_type = SR_STEER_IPV6;
+ else if (!endpoint_type
+ && unformat (input, "endpoint %U", unformat_ip4_address,
+ &endpoint.ip4))
+ endpoint_type = SR_STEER_IPV4;
+ else if (!color_set && unformat (input, "color %u", &color))
+ color_set = 1;
+ else if (!bsid_set
+ && unformat (input, "bsid %U", unformat_mpls_unicast_label,
+ &bsid))
+ bsid_set = 1;
+ else if (!clear && unformat (input, "clear"))
+ clear = 1;
+ else
+ break;
+ }
+
+ if (!bsid_set)
+ return clib_error_return (0, "No BSID specified");
+ if (!endpoint_type && !clear)
+ return clib_error_return (0, "No Endpoint specified");
+ if (!color_set && !clear)
+ return clib_error_return (0, "No Color set");
+
+ /* In case its a cleanup */
+ if (clear)
+ {
+ ip6_address_set_zero (&endpoint.ip6);
+ color = (u32) ~ 0;
+ }
+ rv =
+ sr_mpls_policy_assign_endpoint_color (bsid, &endpoint, endpoint_type,
+ color);
+
+ if (rv)
+ clib_error_return (0, "Error on Endpoint,Color");
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND(cli_sr_mpls_policy_ec_command, static)=
+{
+ .path = "sr mpls policy te",
+ .short_help = "sr mpls policy te bsid xxxxx endpoint x.x.x.x color 12341234",
+ .function = cli_sr_mpls_policy_ec_command_fn,
+};
+
+/********************* SR MPLS Policy initialization ***********************/
+/**
+ * @brief SR MPLS Policy initialization
+ */
+clib_error_t *
+sr_mpls_policy_rewrite_init (vlib_main_t * vm)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+
+ /* Init memory for sr policy keys (bsid <-> ip6_address_t) */
+ sm->sr_policies_index_hash = NULL;
+ sm->sr_policies_c2e2eclabel_hash.hash = NULL;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (sr_mpls_policy_rewrite_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables: eval: (c-set-style "gnu") End:
+ */
diff --git a/src/plugins/srmpls/sr_mpls_steering.c b/src/plugins/srmpls/sr_mpls_steering.c
new file mode 100644
index 00000000000..24c8b0e2d9f
--- /dev/null
+++ b/src/plugins/srmpls/sr_mpls_steering.c
@@ -0,0 +1,897 @@
+/*
+ * sr_steering.c: ipv6 segment routing steering into SR policy
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates. Licensed under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * @file
+ * @brief Packet steering into SR-MPLS Policies
+ *
+ * This file is in charge of handling the FIB appropiatly to steer packets
+ * through SR Policies as defined in 'sr_mpls_policy.c'. Notice that here
+ * we are only doing steering. SR policy application is done in
+ * sr_policy_rewrite.c
+ *
+ * Supports:
+ * - Steering of IPv6 traffic Destination Address based through BSID
+ * - Steering of IPv4 traffic Destination Address based through BSID
+ * - Steering of IPv4 and IPv6 traffic through N,C (SR CP)
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include "sr_mpls.h"
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/mpls_fib.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#define SRMPLS_TE_OFFSET 50
+
+/**
+ * @brief function to sort the colors in descending order
+ */
+int
+sort_color_descent (const u32 * x, u32 * y)
+{
+ return *y - *x;
+}
+
+/********************* Internal (NH, C) labels *******************************/
+/**
+ * @brief find the corresponding label for (endpoint, color) and lock it
+ * endpoint might be NULL or ANY
+ * NULL = 0, ANY=~0
+ */
+u32
+find_or_create_internal_label (ip46_address_t endpoint, u32 color)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ uword *color_table, *result_label;
+
+ if (!sm->sr_policies_c2e2eclabel_hash.hash)
+ mhash_init (&sm->sr_policies_c2e2eclabel_hash, sizeof (mhash_t),
+ sizeof (u32));
+
+ color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color);
+ if (!color_table)
+ {
+ mhash_t color_t;
+ clib_memset (&color_t, 0, sizeof (mhash_t));
+ mhash_init (&color_t, sizeof (u32), sizeof (ip46_address_t));
+ mhash_set_mem (&sm->sr_policies_c2e2eclabel_hash, &color,
+ (uword *) & color_t, NULL);
+ color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color);
+ }
+
+ result_label = mhash_get ((mhash_t *) color_table, &endpoint);
+
+ if (result_label)
+ return (u32) * result_label;
+
+ /* Create and set a new internal label */
+ u32 *new_internal_label = 0;
+ pool_get (sm->ec_labels, new_internal_label);
+ *new_internal_label = 0;
+ mhash_set ((mhash_t *) color_table, &endpoint,
+ (new_internal_label - sm->ec_labels) + SRMPLS_TE_OFFSET, NULL);
+
+ return (new_internal_label - sm->ec_labels) + SRMPLS_TE_OFFSET;
+}
+
+always_inline void
+internal_label_lock_co (ip46_address_t endpoint, u32 color, char co_bits)
+{
+ ip46_address_t zero, any;
+ ip46_address_reset (&zero);
+ any.as_u64[0] = any.as_u64[1] = (u64) ~ 0;
+ switch (co_bits)
+ {
+ case SR_TE_CO_BITS_10:
+ internal_label_lock (endpoint, color);
+ internal_label_lock (zero, color);
+ internal_label_lock (any, color);
+ break;
+ case SR_TE_CO_BITS_01:
+ internal_label_lock (endpoint, color);
+ internal_label_lock (zero, color);
+ break;
+ case SR_TE_CO_BITS_00:
+ case SR_TE_CO_BITS_11:
+ internal_label_lock (endpoint, color);
+ break;
+ }
+}
+
+/**
+ * @brief lock the label for (NH, C)
+ * endpoint might be NULL or ANY
+ * NULL = 0, ANY=~0
+ */
+void
+internal_label_lock (ip46_address_t endpoint, u32 color)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ uword *color_table, *result_label;
+
+ if (!sm->sr_policies_c2e2eclabel_hash.hash)
+ return;
+
+ color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color);
+ if (!color_table)
+ return;
+
+ result_label = mhash_get ((mhash_t *) color_table, &endpoint);
+
+ if (!result_label)
+ return;
+
+ /* Lock it */
+ u32 *label_lock =
+ pool_elt_at_index (sm->ec_labels, *result_label - SRMPLS_TE_OFFSET);
+ (*label_lock)++;
+}
+
+
+always_inline void
+internal_label_unlock_co (ip46_address_t endpoint, u32 color, char co_bits)
+{
+ ip46_address_t zero, any;
+ ip46_address_reset (&zero);
+ any.as_u64[0] = any.as_u64[1] = (u64) ~ 0;
+ switch (co_bits)
+ {
+ case SR_TE_CO_BITS_10:
+ internal_label_unlock (endpoint, color);
+ internal_label_unlock (zero, color);
+ internal_label_unlock (any, color);
+ break;
+ case SR_TE_CO_BITS_01:
+ internal_label_unlock (endpoint, color);
+ internal_label_unlock (zero, color);
+ break;
+ case SR_TE_CO_BITS_00:
+ case SR_TE_CO_BITS_11:
+ internal_label_unlock (endpoint, color);
+ break;
+ }
+}
+
+/**
+ * @brief Release lock on label for (endpoint, color)
+ * endpoint might be NULL or ANY
+ * NULL = 0, ANY=~0
+ */
+void
+internal_label_unlock (ip46_address_t endpoint, u32 color)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ uword *color_table, *result_label;
+
+ if (!sm->sr_policies_c2e2eclabel_hash.hash)
+ return;
+
+ color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color);
+ if (!color_table)
+ return;
+
+ result_label = mhash_get ((mhash_t *) color_table, &endpoint);
+
+ if (!result_label)
+ return;
+
+ u32 *label_lock =
+ pool_elt_at_index (sm->ec_labels, *result_label - SRMPLS_TE_OFFSET);
+ (*label_lock)--;
+
+ if (*label_lock == 0)
+ {
+ pool_put (sm->ec_labels, label_lock);
+ mhash_unset ((mhash_t *) color_table, &endpoint, NULL);
+ if (mhash_elts ((mhash_t *) color_table) == 0)
+ {
+ mhash_free ((mhash_t *) color_table);
+ mhash_unset (&sm->sr_policies_c2e2eclabel_hash, &color, NULL);
+ if (mhash_elts (&sm->sr_policies_c2e2eclabel_hash) == 0)
+ {
+ mhash_free (&sm->sr_policies_c2e2eclabel_hash);
+ sm->sr_policies_c2e2eclabel_hash.hash = NULL;
+ fib_table_unlock (sm->fib_table_EC, FIB_PROTOCOL_MPLS,
+ FIB_SOURCE_SR);
+ sm->fib_table_EC = (u32) ~ 0;
+ }
+ }
+ }
+}
+
+/********************* steering computation *********************************/
+/**
+ * @brief function to update the FIB
+ */
+void
+compute_sr_te_automated_steering_fib_entry (mpls_sr_steering_policy_t *
+ steer_pl)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ fib_prefix_t pfx = { 0 };
+
+ u32 *internal_labels = 0;
+ ip46_address_t zero, any;
+ ip46_address_reset (&zero);
+ any.as_u64[0] = any.as_u64[1] = (u64) ~ 0;
+
+ u32 *color_i = NULL;
+ vec_foreach (color_i, steer_pl->color)
+ {
+ switch (steer_pl->co_bits)
+ {
+ case SR_TE_CO_BITS_10:
+ vec_add1 (internal_labels,
+ find_or_create_internal_label (steer_pl->next_hop,
+ *color_i));
+ vec_add1 (internal_labels,
+ find_or_create_internal_label (zero, *color_i));
+ vec_add1 (internal_labels,
+ find_or_create_internal_label (any, *color_i));
+ break;
+ case SR_TE_CO_BITS_01:
+ vec_add1 (internal_labels,
+ find_or_create_internal_label (steer_pl->next_hop,
+ *color_i));
+ vec_add1 (internal_labels,
+ find_or_create_internal_label (zero, *color_i));
+ break;
+ case SR_TE_CO_BITS_00:
+ case SR_TE_CO_BITS_11:
+ vec_add1 (internal_labels,
+ find_or_create_internal_label (steer_pl->next_hop,
+ *color_i));
+ break;
+ }
+ }
+
+ /* Does hidden FIB already exist? */
+ if (sm->fib_table_EC == (u32) ~ 0)
+ {
+ sm->fib_table_EC = fib_table_create_and_lock (FIB_PROTOCOL_MPLS,
+ FIB_SOURCE_SR,
+ "SR-MPLS Traffic Engineering (NextHop,Color)");
+
+ fib_table_flush (sm->fib_table_EC, FIB_PROTOCOL_MPLS,
+ FIB_SOURCE_SPECIAL);
+ }
+
+ /* Add the corresponding FIB entries */
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_eos = MPLS_EOS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = sm->fib_table_EC,
+ .frp_weight = 1,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_label_stack = 0
+ };
+ fib_route_path_t *paths = NULL;
+
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
+ }
+
+ if (steer_pl->vpn_label != (u32) ~ 0)
+ {
+ fib_mpls_label_t fml = {
+ .fml_value = steer_pl->vpn_label,
+ };
+ vec_add1 (path.frp_label_stack, fml);
+ path.frp_eos = MPLS_NON_EOS;
+ }
+
+ u32 label_i;
+ vec_foreach_index (label_i, internal_labels)
+ {
+ path.frp_local_label = internal_labels[label_i];
+ path.frp_preference = label_i;
+ vec_add1 (paths, path);
+ }
+
+ /* Finally we must add to FIB IGP to N */
+ clib_memcpy (&path.frp_addr, &steer_pl->next_hop,
+ sizeof (steer_pl->next_hop));
+ path.frp_preference = vec_len (internal_labels);
+ path.frp_label_stack = NULL;
+
+ if (steer_pl->nh_type == SR_STEER_IPV6)
+ {
+ path.frp_proto = DPO_PROTO_IP6;
+ path.frp_fib_index =
+ fib_table_find (FIB_PROTOCOL_IP6,
+ (steer_pl->classify.fib_table !=
+ (u32) ~ 0 ? steer_pl->classify.fib_table : 0));
+ }
+ else if (steer_pl->nh_type == SR_STEER_IPV4)
+ {
+ path.frp_proto = DPO_PROTO_IP4;
+ path.frp_fib_index =
+ fib_table_find (FIB_PROTOCOL_IP4,
+ (steer_pl->classify.fib_table !=
+ (u32) ~ 0 ? steer_pl->classify.fib_table : 0));
+ }
+
+ vec_add1 (paths, path);
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ fib_table_entry_update (fib_table_find
+ (FIB_PROTOCOL_IP6,
+ (steer_pl->classify.fib_table !=
+ (u32) ~ 0 ? steer_pl->classify.fib_table : 0)),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ fib_table_entry_update (fib_table_find
+ (FIB_PROTOCOL_IP4,
+ (steer_pl->classify.fib_table !=
+ (u32) ~ 0 ? steer_pl->classify.fib_table : 0)),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+
+ vec_free (paths);
+ paths = NULL;
+}
+
+/**
+ * @brief Steer traffic L3 traffic through a given SR-MPLS policy
+ *
+ * @param is_del
+ * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
+ * @param sr_policy is the index of the SR Policy (alt to bsid)
+ * @param table_id is the VRF where to install the FIB entry for the BSID
+ * @param prefix is the IPv4/v6 address for L3 traffic type
+ * @param mask_width is the mask for L3 traffic type
+ * @param traffic_type describes the type of traffic
+ * @param next_hop SR TE Next-Hop
+ * @param nh_type is the AF of Next-Hop
+ * @param color SR TE color
+ * @param co_bits SR TE color-only bits
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_steering_policy_add (mpls_label_t bsid, u32 table_id,
+ ip46_address_t * prefix, u32 mask_width,
+ u8 traffic_type, ip46_address_t * next_hop,
+ u8 nh_type, u32 color, char co_bits,
+ mpls_label_t vpn_label)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ sr_mpls_steering_key_t key;
+ mpls_sr_steering_policy_t *steer_pl;
+ fib_prefix_t pfx = { 0 };
+
+ mpls_sr_policy_t *sr_policy = 0;
+ uword *p = 0;
+
+ clib_memset (&key, 0, sizeof (sr_mpls_steering_key_t));
+
+ if (traffic_type != SR_STEER_IPV4 && traffic_type != SR_STEER_IPV6)
+ return -1;
+
+ /* Compute the steer policy key */
+ key.prefix.as_u64[0] = prefix->as_u64[0];
+ key.prefix.as_u64[1] = prefix->as_u64[1];
+ key.mask_width = mask_width;
+ key.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
+ key.traffic_type = traffic_type;
+
+ /*
+ * Search for steering policy. If already exists we are adding a new
+ * color.
+ */
+ if (!sm->sr_steer_policies_hash.hash)
+ mhash_init (&sm->sr_steer_policies_hash, sizeof (uword),
+ sizeof (sr_mpls_steering_key_t));
+
+ p = mhash_get (&sm->sr_steer_policies_hash, &key);
+ if (p)
+ {
+ steer_pl = pool_elt_at_index (sm->steer_policies, p[0]);
+ if (steer_pl->bsid != (u32) ~ 0)
+ return -1; //Means we are rewritting the steering. Not allowed.
+
+ /* Means we are adding a color. Check that NH match. */
+ if (ip46_address_cmp (&steer_pl->next_hop, next_hop))
+ return -2;
+ if (vec_search (steer_pl->color, color) != ~0)
+ return -3;
+ if (steer_pl->co_bits != co_bits)
+ return -4; /* CO colors should be the same */
+ if (steer_pl->vpn_label != vpn_label)
+ return -5; /* VPN label should be the same */
+
+ /* Remove the steering and ReDo it */
+ vec_add1 (steer_pl->color, color);
+ vec_sort_with_function (steer_pl->color, sort_color_descent);
+ compute_sr_te_automated_steering_fib_entry (steer_pl);
+ internal_label_lock_co (steer_pl->next_hop, color, steer_pl->co_bits);
+ return 0;
+ }
+
+ /* Create a new steering policy */
+ pool_get (sm->steer_policies, steer_pl);
+ clib_memset (steer_pl, 0, sizeof (*steer_pl));
+ clib_memcpy (&steer_pl->classify.prefix, prefix, sizeof (ip46_address_t));
+ clib_memcpy (&steer_pl->next_hop, next_hop, sizeof (ip46_address_t));
+ steer_pl->nh_type = nh_type;
+ steer_pl->co_bits = co_bits;
+ steer_pl->classify.mask_width = mask_width;
+ steer_pl->classify.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
+ steer_pl->classify.traffic_type = traffic_type;
+ steer_pl->color = NULL;
+ steer_pl->vpn_label = vpn_label;
+
+ /* Create and store key */
+ mhash_set (&sm->sr_steer_policies_hash, &key, steer_pl - sm->steer_policies,
+ NULL);
+
+ /* Local steering */
+ if (bsid != (u32) ~ 0)
+ {
+ if (!sm->sr_policies_index_hash)
+ sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
+ steer_pl->bsid = bsid;
+ p = hash_get (sm->sr_policies_index_hash, bsid);
+ if (!p)
+ return -1;
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_local_label = sr_policy->bsid,
+ .frp_eos = MPLS_EOS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = 1,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_label_stack = 0
+ };
+ fib_route_path_t *paths = NULL;
+
+ if (steer_pl->vpn_label != (u32) ~ 0)
+ {
+ fib_mpls_label_t fml = {
+ .fml_value = steer_pl->vpn_label,
+ };
+ vec_add1 (path.frp_label_stack, fml);
+ }
+
+ /* FIB API calls - Recursive route through the BindingSID */
+ if (traffic_type == SR_STEER_IPV6)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
+ path.frp_fib_index = 0;
+ path.frp_preference = 0;
+ vec_add1 (paths, path);
+ fib_table_entry_path_add2 (fib_table_find
+ (FIB_PROTOCOL_IP6,
+ (table_id != (u32) ~ 0 ? table_id : 0)),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+ vec_free (paths);
+ }
+ else if (traffic_type == SR_STEER_IPV4)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
+ path.frp_fib_index = 0;
+ path.frp_preference = 0;
+ vec_add1 (paths, path);
+ fib_table_entry_path_add2 (fib_table_find
+ (FIB_PROTOCOL_IP4,
+ (table_id != (u32) ~ 0 ? table_id : 0)),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+ vec_free (paths);
+ }
+ }
+ /* Automated steering */
+ else
+ {
+ steer_pl->bsid = (u32) ~ 0;
+ vec_add1 (steer_pl->color, color);
+ compute_sr_te_automated_steering_fib_entry (steer_pl);
+ internal_label_lock_co (steer_pl->next_hop, color, steer_pl->co_bits);
+ }
+ return 0;
+}
+
+/**
+ * @brief Delete steering rule for an SR-MPLS policy
+ *
+ * @param is_del
+ * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
+ * @param sr_policy is the index of the SR Policy (alt to bsid)
+ * @param table_id is the VRF where to install the FIB entry for the BSID
+ * @param prefix is the IPv4/v6 address for L3 traffic type
+ * @param mask_width is the mask for L3 traffic type
+ * @param traffic_type describes the type of traffic
+ * @param next_hop SR TE Next-HOP
+ * @param nh_type is the AF of Next-Hop
+ * @param color SR TE color
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_steering_policy_del (ip46_address_t * prefix, u32 mask_width,
+ u8 traffic_type, u32 table_id, u32 color)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ sr_mpls_steering_key_t key;
+ mpls_sr_steering_policy_t *steer_pl;
+ fib_prefix_t pfx = { 0 };
+ uword *p = 0;
+
+ clib_memset (&key, 0, sizeof (sr_mpls_steering_key_t));
+
+ /* Compute the steer policy key */
+ if (traffic_type != SR_STEER_IPV4 && traffic_type != SR_STEER_IPV6)
+ return -1;
+
+ key.prefix.as_u64[0] = prefix->as_u64[0];
+ key.prefix.as_u64[1] = prefix->as_u64[1];
+ key.mask_width = mask_width;
+ key.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
+ key.traffic_type = traffic_type;
+
+ if (!sm->sr_steer_policies_hash.hash)
+ mhash_init (&sm->sr_steer_policies_hash, sizeof (uword),
+ sizeof (sr_mpls_steering_key_t));
+
+ /* Search for the item */
+ p = mhash_get (&sm->sr_steer_policies_hash, &key);
+
+ if (!p)
+ return -1;
+
+ /* Retrieve Steer Policy function */
+ steer_pl = pool_elt_at_index (sm->steer_policies, p[0]);
+
+ if (steer_pl->bsid == (u32) ~ 0)
+ {
+ /* Remove the color from the color vector */
+ vec_del1 (steer_pl->color, vec_search (steer_pl->color, color));
+
+ if (vec_len (steer_pl->color))
+ {
+ /* Reorder Colors */
+ vec_sort_with_function (steer_pl->color, sort_color_descent);
+ compute_sr_te_automated_steering_fib_entry (steer_pl);
+ /* Remove all the locks for this ones... */
+ internal_label_unlock_co (steer_pl->next_hop, color,
+ steer_pl->co_bits);
+ return 0;
+ }
+ else
+ {
+ vec_free (steer_pl->color);
+ /* Remove FIB entry */
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP6,
+ steer_pl->classify.fib_table), &pfx,
+ FIB_SOURCE_SR);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP4,
+ steer_pl->classify.fib_table), &pfx,
+ FIB_SOURCE_SR);
+ }
+ /* Remove all the locks for this ones... */
+ internal_label_unlock_co (steer_pl->next_hop, color,
+ steer_pl->co_bits);
+ }
+ }
+ else //Remove by BSID
+ {
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP6,
+ steer_pl->classify.fib_table), &pfx,
+ FIB_SOURCE_SR);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP4,
+ steer_pl->classify.fib_table), &pfx,
+ FIB_SOURCE_SR);
+ }
+ }
+ /* Delete SR steering policy entry */
+ pool_put (sm->steer_policies, steer_pl);
+ mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+ if (mhash_elts (&sm->sr_steer_policies_hash) == 0)
+ {
+ mhash_free (&sm->sr_steer_policies_hash);
+ sm->sr_steer_policies_hash.hash = NULL;
+ }
+ return 0;
+}
+
+static clib_error_t *
+sr_mpls_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int is_del = 0;
+
+ ip46_address_t prefix, nh;
+ u32 dst_mask_width = 0;
+ u8 traffic_type = 0;
+ u8 nh_type = 0;
+ u32 fib_table = (u32) ~ 0, color = (u32) ~ 0;
+ u32 co_bits = 0;
+
+ mpls_label_t bsid, vpn_label = (u32) ~ 0;
+
+ u8 sr_policy_set = 0;
+
+ clib_memset (&prefix, 0, sizeof (ip46_address_t));
+ clib_memset (&nh, 0, sizeof (ip46_address_t));
+
+ int rv;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_del = 1;
+ else if (!traffic_type
+ && unformat (input, "l3 %U/%d", unformat_ip6_address,
+ &prefix.ip6, &dst_mask_width))
+ traffic_type = SR_STEER_IPV6;
+ else if (!traffic_type
+ && unformat (input, "l3 %U/%d", unformat_ip4_address,
+ &prefix.ip4, &dst_mask_width))
+ traffic_type = SR_STEER_IPV4;
+ else if (!sr_policy_set
+ && unformat (input, "via sr policy bsid %U",
+ unformat_mpls_unicast_label, &bsid))
+ sr_policy_set = 1;
+ else if (!sr_policy_set
+ && unformat (input, "via next-hop %U color %d co %d",
+ unformat_ip4_address, &nh.ip4, &color, &co_bits))
+ {
+ sr_policy_set = 1;
+ nh_type = SR_STEER_IPV4;
+ }
+ else if (!sr_policy_set
+ && unformat (input, "via next-hop %U color %d co %d",
+ unformat_ip6_address, &nh.ip6, &color, &co_bits))
+ {
+ sr_policy_set = 1;
+ nh_type = SR_STEER_IPV6;
+ }
+ else if (fib_table == (u32) ~ 0
+ && unformat (input, "fib-table %d", &fib_table));
+ else if (unformat (input, "vpn-label %U",
+ unformat_mpls_unicast_label, &vpn_label));
+ else
+ break;
+ }
+
+ if (!traffic_type)
+ return clib_error_return (0, "No L3 traffic specified");
+ if (!sr_policy_set)
+ return clib_error_return (0, "No SR policy specified");
+
+ /* Make sure that the prefixes are clean */
+ if (traffic_type == SR_STEER_IPV4)
+ {
+ u32 mask =
+ (dst_mask_width ? (0xFFFFFFFFu >> (32 - dst_mask_width)) : 0);
+ prefix.ip4.as_u32 &= mask;
+ }
+ else if (traffic_type == SR_STEER_IPV6)
+ {
+ ip6_address_t mask;
+ ip6_address_mask_from_width (&mask, dst_mask_width);
+ ip6_address_mask (&prefix.ip6, &mask);
+ }
+
+ if (nh_type)
+ bsid = (u32) ~ 0;
+
+ if (is_del)
+ rv =
+ sr_mpls_steering_policy_del (&prefix, dst_mask_width,
+ traffic_type, fib_table, color);
+
+ else
+ rv =
+ sr_mpls_steering_policy_add (bsid, fib_table, &prefix, dst_mask_width,
+ traffic_type, &nh, nh_type, color, co_bits,
+ vpn_label);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case 1:
+ return 0;
+ case -1:
+ return clib_error_return (0, "Incorrect API usage.");
+ case -2:
+ return clib_error_return (0, "The Next-Hop does not match.");
+ case -3:
+ return clib_error_return (0, "The color already exists.");
+ case -4:
+ return clib_error_return (0, "The co-bits do not match.");
+ case -5:
+ return clib_error_return (0, "The VPN-labels do not match.");
+ default:
+ return clib_error_return (0, "BUG: sr steer policy returns %d", rv);
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND(sr_mpls_steer_policy_command, static)=
+{
+ .path = "sr mpls steer",
+ .short_help = "sr mpls steer (del) l3 <ip_addr/mask> "
+ "via [sr policy bsid <mpls_label> || next-hop <ip46_addr> color <u32> co <0|1|2|3> ](fib-table <fib_table_index>)(vpn-label 500)",
+ .long_help =
+ "\tSteer L3 traffic through an existing SR policy.\n"
+ "\tExamples:\n"
+ "\t\tsr steer l3 2001::/64 via sr_policy bsid 29999\n"
+ "\t\tsr steer del l3 2001::/64 via sr_policy bsid 29999\n"
+ "\t\tsr steer l3 2001::/64 via next-hop 1.1.1.1 color 1234 co 0\n"
+ "\t\tsr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2 vpn-label 500\n",
+ .function = sr_mpls_steer_policy_command_fn,
+};
+
+static clib_error_t *
+show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_steering_policy_t **steer_policies = 0;
+ mpls_sr_steering_policy_t *steer_pl;
+
+ int i;
+
+ vlib_cli_output (vm, "SR MPLS steering policies:");
+ pool_foreach (steer_pl, sm->steer_policies) {
+ vec_add1(steer_policies, steer_pl);
+ }
+ for (i = 0; i < vec_len (steer_policies); i++)
+ {
+ vlib_cli_output (vm, "==========================");
+ steer_pl = steer_policies[i];
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ vlib_cli_output (vm, "Prefix: %U/%d via:",
+ format_ip4_address,
+ &steer_pl->classify.prefix.ip4,
+ steer_pl->classify.mask_width);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ vlib_cli_output (vm, "Prefix: %U/%d via:",
+ format_ip6_address,
+ &steer_pl->classify.prefix.ip6,
+ steer_pl->classify.mask_width);
+ }
+
+ if (steer_pl->bsid != (u32) ~ 0)
+ {
+ vlib_cli_output (vm, "· BSID %U",
+ format_mpls_unicast_label, steer_pl->bsid);
+ }
+ else
+ {
+ if (steer_pl->nh_type == SR_STEER_IPV4)
+ {
+ vlib_cli_output (vm, "· Next-hop %U",
+ format_ip4_address, &steer_pl->next_hop.ip4);
+ }
+ else if (steer_pl->nh_type == SR_STEER_IPV6)
+ {
+ vlib_cli_output (vm, "· Next-hop %U",
+ format_ip6_address, &steer_pl->next_hop.ip6);
+ }
+
+ u32 *color_i = 0;
+ u8 *s = NULL;
+ s = format (s, "[ ");
+ vec_foreach (color_i, steer_pl->color)
+ {
+ s = format (s, "%d, ", *color_i);
+ }
+ s = format (s, "\b\b ]");
+ vlib_cli_output (vm, "· Color %s", s);
+
+ switch (steer_pl->co_bits)
+ {
+ case SR_TE_CO_BITS_00:
+ vlib_cli_output (vm, "· CO-bits: 00");
+ break;
+ case SR_TE_CO_BITS_01:
+ vlib_cli_output (vm, "· CO-bits: 01");
+ break;
+ case SR_TE_CO_BITS_10:
+ vlib_cli_output (vm, "· CO-bits: 10");
+ break;
+ case SR_TE_CO_BITS_11:
+ vlib_cli_output (vm, "· CO-bits: 11");
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND(show_sr_mpls_steering_policies_command, static)=
+{
+ .path = "show sr mpls steering policies",
+ .short_help = "show sr mpls steering policies",
+ .function = show_sr_mpls_steering_policies_command_fn,
+};
+
+clib_error_t *
+sr_mpls_steering_init (vlib_main_t * vm)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+
+ /* Init memory for function keys */
+ sm->sr_steer_policies_hash.hash = NULL;
+
+ sm->fib_table_EC = (u32) ~ 0;
+ sm->ec_labels = 0;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION(sr_mpls_steering_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables: eval: (c-set-style "gnu") End:
+ */
diff --git a/src/plugins/srmpls/sr_mpls_test.c b/src/plugins/srmpls/sr_mpls_test.c
new file mode 100644
index 00000000000..7aff4c32b06
--- /dev/null
+++ b/src/plugins/srmpls/sr_mpls_test.c
@@ -0,0 +1,174 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/error.h>
+#include <vpp/api/types.h>
+
+#define __plugin_msg_base sr_mpls_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* Declare message IDs */
+#include <vnet/format_fns.h>
+#include <plugins/srmpls/sr_mpls.api_enum.h>
+#include <plugins/srmpls/sr_mpls.api_types.h>
+
+#define vl_endianfun /* define message structures */
+#include <plugins/srmpls/sr_mpls.api.h>
+#undef vl_endianfun
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ u32 ping_id;
+ vat_main_t *vat_main;
+} sr_mpls_test_main_t;
+
+static sr_mpls_test_main_t sr_mpls_test_main;
+
+static int
+api_sr_mpls_policy_mod (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_mpls_steering_add_del (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_mpls_policy_assign_endpoint_color (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_mpls_policy_add (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sr_mpls_policy_add_t *mp;
+ u32 bsid = 0;
+ u32 weight = 1;
+ u8 type = 0;
+ u8 n_segments = 0;
+ u32 sid;
+ u32 *segments = NULL;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bsid %d", &bsid))
+ ;
+ else if (unformat (i, "weight %d", &weight))
+ ;
+ else if (unformat (i, "spray"))
+ type = 1;
+ else if (unformat (i, "next %d", &sid))
+ {
+ n_segments += 1;
+ vec_add1 (segments, htonl (sid));
+ }
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (bsid == 0)
+ {
+ errmsg ("bsid not set");
+ return -99;
+ }
+
+ if (n_segments == 0)
+ {
+ errmsg ("no sid in segment stack");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M2 (SR_MPLS_POLICY_ADD, mp, sizeof (u32) * n_segments);
+
+ mp->bsid = htonl (bsid);
+ mp->weight = htonl (weight);
+ mp->is_spray = type;
+ mp->n_segments = n_segments;
+ memcpy (mp->segments, segments, sizeof (u32) * n_segments);
+ vec_free (segments);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_sr_mpls_policy_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sr_mpls_policy_del_t *mp;
+ u32 bsid = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bsid %d", &bsid))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (bsid == 0)
+ {
+ errmsg ("bsid not set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SR_MPLS_POLICY_DEL, mp);
+
+ mp->bsid = htonl (bsid);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#include <plugins/srmpls/sr_mpls.api_test.c>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/srtp/srtp.c b/src/plugins/srtp/srtp.c
index bb54e672918..6862301d2d2 100644
--- a/src/plugins/srtp/srtp.c
+++ b/src/plugins/srtp/srtp.c
@@ -641,10 +641,12 @@ srtp_connect (transport_endpoint_cfg_t *tep)
application_t *app;
srtp_tc_t *ctx;
u32 ctx_index;
+ transport_endpt_ext_cfg_t *ext_cfg;
int rv;
sep = (session_endpoint_cfg_t *) tep;
- if (!sep->ext_cfg)
+ ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_NONE);
+ if (!ext_cfg)
return SESSION_E_NOEXTCFG;
app_wrk = app_worker_get (sep->app_wrk_index);
@@ -658,7 +660,7 @@ srtp_connect (transport_endpoint_cfg_t *tep)
ctx->srtp_ctx_handle = ctx_index;
ctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
- srtp_init_policy (ctx, (transport_endpt_cfg_srtp_t *) sep->ext_cfg->data);
+ srtp_init_policy (ctx, (transport_endpt_cfg_srtp_t *) ext_cfg->data);
clib_memcpy_fast (&cargs->sep, sep, sizeof (session_endpoint_t));
cargs->sep.transport_proto = TRANSPORT_PROTO_UDP;
@@ -723,9 +725,11 @@ srtp_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
app_listener_t *al;
srtp_tc_t *lctx;
u32 lctx_index;
+ transport_endpt_ext_cfg_t *ext_cfg;
sep = (session_endpoint_cfg_t *) tep;
- if (!sep->ext_cfg)
+ ext_cfg = session_endpoint_get_ext_cfg (sep, TRANSPORT_ENDPT_EXT_CFG_NONE);
+ if (!ext_cfg)
return SESSION_E_NOEXTCFG;
app_wrk = app_worker_get (sep->app_wrk_index);
@@ -756,7 +760,7 @@ srtp_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
lctx->c_s_index = app_listener_index;
lctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
- srtp_init_policy (lctx, (transport_endpt_cfg_srtp_t *) sep->ext_cfg->data);
+ srtp_init_policy (lctx, (transport_endpt_cfg_srtp_t *) ext_cfg->data);
SRTP_DBG (1, "Started listening %d", lctx_index);
return lctx_index;
diff --git a/src/plugins/tlsmbedtls/tls_mbedtls.c b/src/plugins/tlsmbedtls/tls_mbedtls.c
index af04f1adeb0..2f4757e28a1 100644
--- a/src/plugins/tlsmbedtls/tls_mbedtls.c
+++ b/src/plugins/tlsmbedtls/tls_mbedtls.c
@@ -396,6 +396,8 @@ mbedtls_ctx_handshake_rx (tls_ctx_t * ctx)
if (mc->ssl.state != MBEDTLS_SSL_HANDSHAKE_OVER)
return 0;
+ ctx->flags |= TLS_CONN_F_HS_DONE;
+
/*
* Handshake complete
*/
@@ -532,17 +534,10 @@ mbedtls_ctx_read (tls_ctx_t * ctx, session_t * tls_session)
return enq;
}
-static u8
-mbedtls_handshake_is_over (tls_ctx_t * ctx)
-{
- mbedtls_ctx_t *mc = (mbedtls_ctx_t *) ctx;
- return (mc->ssl.state == MBEDTLS_SSL_HANDSHAKE_OVER);
-}
-
static int
mbedtls_transport_close (tls_ctx_t * ctx)
{
- if (!mbedtls_handshake_is_over (ctx))
+ if (!(ctx->flags & TLS_CONN_F_HS_DONE))
{
session_close (session_get_from_handle (ctx->tls_session_handle));
return 0;
@@ -554,7 +549,7 @@ mbedtls_transport_close (tls_ctx_t * ctx)
static int
mbedtls_transport_reset (tls_ctx_t *ctx)
{
- if (!mbedtls_handshake_is_over (ctx))
+ if (!(ctx->flags & TLS_CONN_F_HS_DONE))
{
session_close (session_get_from_handle (ctx->tls_session_handle));
return 0;
@@ -590,7 +585,6 @@ const static tls_engine_vft_t mbedtls_engine = {
.ctx_init_client = mbedtls_ctx_init_client,
.ctx_write = mbedtls_ctx_write,
.ctx_read = mbedtls_ctx_read,
- .ctx_handshake_is_over = mbedtls_handshake_is_over,
.ctx_start_listen = mbedtls_start_listen,
.ctx_stop_listen = mbedtls_stop_listen,
.ctx_transport_close = mbedtls_transport_close,
diff --git a/src/plugins/tlsopenssl/tls_openssl.c b/src/plugins/tlsopenssl/tls_openssl.c
index 5d172a0adcf..c8e685f20c5 100644
--- a/src/plugins/tlsopenssl/tls_openssl.c
+++ b/src/plugins/tlsopenssl/tls_openssl.c
@@ -1037,15 +1037,6 @@ openssl_ctx_init_server (tls_ctx_t * ctx)
return 0;
}
-static u8
-openssl_handshake_is_over (tls_ctx_t * ctx)
-{
- openssl_ctx_t *mc = (openssl_ctx_t *) ctx;
- if (!mc->ssl)
- return 0;
- return SSL_is_init_finished (mc->ssl);
-}
-
static int
openssl_transport_close (tls_ctx_t * ctx)
{
@@ -1054,7 +1045,7 @@ openssl_transport_close (tls_ctx_t * ctx)
return 0;
#endif
- if (!openssl_handshake_is_over (ctx))
+ if (!(ctx->flags & TLS_CONN_F_HS_DONE))
{
openssl_handle_handshake_failure (ctx);
return 0;
@@ -1066,7 +1057,7 @@ openssl_transport_close (tls_ctx_t * ctx)
static int
openssl_transport_reset (tls_ctx_t *ctx)
{
- if (!openssl_handshake_is_over (ctx))
+ if (!(ctx->flags & TLS_CONN_F_HS_DONE))
{
openssl_handle_handshake_failure (ctx);
return 0;
@@ -1166,7 +1157,6 @@ const static tls_engine_vft_t openssl_engine = {
.ctx_init_client = openssl_ctx_init_client,
.ctx_write = openssl_ctx_write,
.ctx_read = openssl_ctx_read,
- .ctx_handshake_is_over = openssl_handshake_is_over,
.ctx_start_listen = openssl_start_listen,
.ctx_stop_listen = openssl_stop_listen,
.ctx_transport_close = openssl_transport_close,
@@ -1286,7 +1276,10 @@ tls_openssl_set_command_fn (vlib_main_t * vm, unformat_input_t * input,
}
else
{
- vnet_session_enable_disable (vm, 1);
+ session_enable_disable_args_t args = { .is_en = 1,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_RULE_TABLE };
+ vnet_session_enable_disable (vm, &args);
if (openssl_engine_register (engine_name, engine_alg, async) < 0)
{
return clib_error_return (0, "Failed to register %s polling",
diff --git a/src/plugins/tlspicotls/tls_picotls.c b/src/plugins/tlspicotls/tls_picotls.c
index 7375b928206..9459cb776b5 100644
--- a/src/plugins/tlspicotls/tls_picotls.c
+++ b/src/plugins/tlspicotls/tls_picotls.c
@@ -88,14 +88,6 @@ picotls_lctx_get (u32 lctx_index)
return pool_elt_at_index (picotls_main.lctx_pool, lctx_index);
}
-static u8
-picotls_handshake_is_over (tls_ctx_t * ctx)
-{
- picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx;
- assert (ptls_ctx->tls);
- return ptls_handshake_is_complete (ptls_ctx->tls);
-}
-
static int
picotls_try_handshake_write (picotls_ctx_t * ptls_ctx,
session_t * tls_session, ptls_buffer_t * buf)
@@ -194,7 +186,7 @@ picotls_confirm_app_close (tls_ctx_t * ctx)
static int
picotls_transport_close (tls_ctx_t * ctx)
{
- if (!picotls_handshake_is_over (ctx))
+ if (!(ctx->flags & TLS_CONN_F_HS_DONE))
{
picotls_handle_handshake_failure (ctx);
return 0;
@@ -206,7 +198,7 @@ picotls_transport_close (tls_ctx_t * ctx)
static int
picotls_transport_reset (tls_ctx_t *ctx)
{
- if (!picotls_handshake_is_over (ctx))
+ if (!(ctx->flags & TLS_CONN_F_HS_DONE))
{
picotls_handle_handshake_failure (ctx);
return 0;
@@ -435,7 +427,7 @@ picotls_ctx_read (tls_ctx_t *ctx, session_t *tcp_session)
if (PREDICT_FALSE (!ptls_handshake_is_complete (ptls_ctx->tls)))
{
picotls_do_handshake (ptls_ctx, tcp_session);
- if (picotls_handshake_is_over (ctx))
+ if (ctx->flags & TLS_CONN_F_HS_DONE)
{
if (ptls_is_server (ptls_ctx->tls))
{
@@ -750,7 +742,6 @@ const static tls_engine_vft_t picotls_engine = {
.ctx_free = picotls_ctx_free,
.ctx_get = picotls_ctx_get,
.ctx_get_w_thread = picotls_ctx_get_w_thread,
- .ctx_handshake_is_over = picotls_handshake_is_over,
.ctx_start_listen = picotls_start_listen,
.ctx_stop_listen = picotls_stop_listen,
.ctx_init_server = picotls_ctx_init_server,
diff --git a/src/plugins/unittest/fib_test.c b/src/plugins/unittest/fib_test.c
index fbac809d726..491d135322c 100644
--- a/src/plugins/unittest/fib_test.c
+++ b/src/plugins/unittest/fib_test.c
@@ -10264,7 +10264,57 @@ fib_test_inherit (void)
&l99_o_10_10_10_3),
"%U via interposer label",
format_fib_prefix,&pfx_10_10_10_21_s_32);
+ fib_table_entry_special_remove(0,
+ &pfx_10_10_10_0_s_24,
+ FIB_SOURCE_SPECIAL);
+
+ const ip46_address_t nh_0_0_0_0 = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x00000000),
+ };
+ const fib_prefix_t pfx_0_0_0_0_s_0 = {
+ .fp_len = 0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = nh_0_0_0_0,
+ };
+ /* we have prio(API) < prio(hi_src) < prio(SPECIAL) */
+ /* Add/remove an interposer source from the top of the subtrie. The
+ * interposer source is inherited.
+ */
+ fib_table_entry_special_dpo_add(0,
+ &pfx_0_0_0_0_s_0,
+ hi_src,
+ (FIB_ENTRY_FLAG_COVERED_INHERIT |
+ FIB_ENTRY_FLAG_INTERPOSE),
+ &interposer);
+ /*
+ * Add/remove an interposer source from the top of the subtrie. The
+ * interposer source is inherited, the previous inheritance is discarded.
+ */
+ fib_table_entry_special_dpo_add(0,
+ &pfx_10_10_10_0_s_24,
+ FIB_SOURCE_SPECIAL,
+ (FIB_ENTRY_FLAG_COVERED_INHERIT |
+ FIB_ENTRY_FLAG_INTERPOSE),
+ &interposer);
+ /* force a tree walk */
+ fib_table_entry_update_one_path(0,
+ &pfx_0_0_0_0_s_0,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ &nh_10_10_10_3,
+ tm->hw[0]->sw_if_index,
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_special_remove(0,
+ &pfx_10_10_10_0_s_24,
+ FIB_SOURCE_SPECIAL);
+ fib_table_entry_special_remove(0,
+ &pfx_0_0_0_0_s_0,
+ hi_src);
/*
* cleanup
*/
@@ -10275,6 +10325,7 @@ fib_test_inherit (void)
fib_table_entry_delete(0, &pfx_10_10_10_0_s_24, FIB_SOURCE_API);
fib_table_entry_delete(0, &pfx_10_10_0_0_s_16, FIB_SOURCE_API);
fib_table_entry_delete(0, &pfx_10_10_10_0_s_24, FIB_SOURCE_SPECIAL);
+ fib_table_entry_delete(0, &pfx_0_0_0_0_s_0, FIB_SOURCE_API);
adj_unlock(ai_10_10_10_1);
adj_unlock(ai_10_10_10_2);
adj_unlock(ai_10_10_10_3);
diff --git a/src/plugins/unittest/policer_test.c b/src/plugins/unittest/policer_test.c
index 2b14bf687bf..41f769960a3 100644
--- a/src/plugins/unittest/policer_test.c
+++ b/src/plugins/unittest/policer_test.c
@@ -21,7 +21,7 @@ policer_test (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd_arg)
{
int policer_index, i;
- uint rate_kbps, burst, num_pkts;
+ unsigned int rate_kbps, burst, num_pkts;
double total_bytes, cpu_ticks_per_pkt, time = 0;
double cpu_speed, cpu_ticks_per_byte;
policer_result_e result, input_colour = POLICE_CONFORM;
diff --git a/src/plugins/unittest/segment_manager_test.c b/src/plugins/unittest/segment_manager_test.c
index a106470ee48..29da662e007 100644
--- a/src/plugins/unittest/segment_manager_test.c
+++ b/src/plugins/unittest/segment_manager_test.c
@@ -739,8 +739,11 @@ segment_manager_test (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd_arg)
{
int res = 0;
+ session_enable_disable_args_t args = { .is_en = 1,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_RULE_TABLE };
- vnet_session_enable_disable (vm, 1);
+ vnet_session_enable_disable (vm, &args);
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
diff --git a/src/plugins/unittest/session_test.c b/src/plugins/unittest/session_test.c
index b7627acc129..7702e817070 100644
--- a/src/plugins/unittest/session_test.c
+++ b/src/plugins/unittest/session_test.c
@@ -13,13 +13,11 @@
* limitations under the License.
*/
-#include <vnet/session/application_namespace.h>
-#include <vnet/session/application_interface.h>
+#include <arpa/inet.h>
#include <vnet/session/application.h>
#include <vnet/session/session.h>
-#include <vnet/session/session_rules_table.h>
-#include <vnet/tcp/tcp.h>
#include <sys/epoll.h>
+#include <vnet/session/session_rules_table.h>
#define SESSION_TEST_I(_cond, _comment, _args...) \
({ \
@@ -133,7 +131,8 @@ session_create_lookpback (u32 table_id, u32 * sw_if_index,
if (table_id != 0)
{
- ip_table_create (FIB_PROTOCOL_IP4, table_id, 0, 0);
+ ip_table_create (FIB_PROTOCOL_IP4, table_id, 0 /* is_api */,
+ 1 /* create_mfib */, 0);
ip_table_bind (FIB_PROTOCOL_IP4, *sw_if_index, table_id);
}
@@ -774,10 +773,37 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input)
return 0;
}
+static void
+session_test_disable_rt_backend_engine (vlib_main_t *vm)
+{
+ session_enable_disable_args_t args = { .is_en = 0,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_DISABLE };
+ vnet_session_enable_disable (vm, &args);
+}
+
+static void
+session_test_enable_rule_table_engine (vlib_main_t *vm)
+{
+ session_enable_disable_args_t args = { .is_en = 1,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_RULE_TABLE };
+ vnet_session_enable_disable (vm, &args);
+}
+
+static void
+session_test_enable_sdl_engine (vlib_main_t *vm)
+{
+ session_enable_disable_args_t args = { .is_en = 1,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_SDL };
+ vnet_session_enable_disable (vm, &args);
+}
+
static int
session_test_rule_table (vlib_main_t * vm, unformat_input_t * input)
{
- session_rules_table_t _srt, *srt = &_srt;
+ session_table_t *st = session_table_alloc ();
u16 lcl_port = 1234, rmt_port = 4321;
u32 action_index = 1, res;
ip4_address_t lcl_lkup, rmt_lkup;
@@ -795,8 +821,13 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input)
}
}
- clib_memset (srt, 0, sizeof (*srt));
- session_rules_table_init (srt);
+ session_test_disable_rt_backend_engine (vm);
+ session_test_enable_rule_table_engine (vm);
+
+ session_table_init (st, FIB_PROTOCOL_MAX);
+ vec_add1 (st->appns_index,
+ app_namespace_index (app_namespace_get_default ()));
+ session_rules_table_init (st, FIB_PROTOCOL_MAX);
ip4_address_t lcl_ip = {
.as_u32 = clib_host_to_net_u32 (0x01020304),
@@ -835,12 +866,13 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input)
.action_index = action_index++,
.is_add = 1,
};
- error = session_rules_table_add_del (srt, &args);
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
SESSION_TEST ((error == 0), "Add 1.2.3.4/16 1234 5.6.7.8/16 4321 action %d",
action_index - 1);
- res =
- session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, lcl_port, rmt_port);
+ res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_ip, &rmt_ip, lcl_port, rmt_port);
SESSION_TEST ((res == 1),
"Lookup 1.2.3.4 1234 5.6.7.8 4321, action should " "be 1: %d",
res);
@@ -851,13 +883,15 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input)
args.lcl.fp_addr.ip4 = lcl_ip;
args.lcl.fp_len = 24;
args.action_index = action_index++;
- error = session_rules_table_add_del (srt, &args);
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
SESSION_TEST ((error == 0), "Add 1.2.3.4/24 1234 5.6.7.8/16 4321 action %d",
action_index - 1);
args.rmt.fp_addr.ip4 = rmt_ip;
args.rmt.fp_len = 24;
args.action_index = action_index++;
- error = session_rules_table_add_del (srt, &args);
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
SESSION_TEST ((error == 0), "Add 1.2.3.4/24 1234 5.6.7.8/24 4321 action %d",
action_index - 1);
@@ -869,13 +903,15 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input)
args.rmt.fp_addr.ip4 = rmt_ip2;
args.rmt.fp_len = 16;
args.action_index = action_index++;
- error = session_rules_table_add_del (srt, &args);
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
SESSION_TEST ((error == 0), "Add 2.2.2.2/24 1234 6.6.6.6/16 4321 action %d",
action_index - 1);
args.lcl.fp_addr.ip4 = lcl_ip3;
args.rmt.fp_addr.ip4 = rmt_ip3;
args.action_index = action_index++;
- error = session_rules_table_add_del (srt, &args);
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
SESSION_TEST ((error == 0), "Add 3.3.3.3/24 1234 7.7.7.7/16 4321 action %d",
action_index - 1);
@@ -885,7 +921,8 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input)
args.lcl.fp_addr.ip4 = lcl_ip3;
args.rmt.fp_addr.ip4 = rmt_ip3;
args.action_index = action_index++;
- error = session_rules_table_add_del (srt, &args);
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
SESSION_TEST ((error == 0), "overwrite 3.3.3.3/24 1234 7.7.7.7/16 4321 "
"action %d", action_index - 1);
@@ -893,23 +930,22 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input)
* Lookup 1.2.3.4/32 1234 5.6.7.8/32 4321, 1.2.2.4/32 1234 5.6.7.9/32 4321
* and 3.3.3.3 1234 7.7.7.7 4321
*/
- res =
- session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, lcl_port, rmt_port);
+ res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_ip, &rmt_ip, lcl_port, rmt_port);
SESSION_TEST ((res == 3),
"Lookup 1.2.3.4 1234 5.6.7.8 4321 action " "should be 3: %d",
res);
lcl_lkup.as_u32 = clib_host_to_net_u32 (0x01020204);
rmt_lkup.as_u32 = clib_host_to_net_u32 (0x05060709);
- res =
- session_rules_table_lookup4 (srt, &lcl_lkup,
- &rmt_lkup, lcl_port, rmt_port);
+ res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_lkup, &rmt_lkup, lcl_port, rmt_port);
SESSION_TEST ((res == 1),
"Lookup 1.2.2.4 1234 5.6.7.9 4321, action " "should be 1: %d",
res);
- res =
- session_rules_table_lookup4 (srt, &lcl_ip3, &rmt_ip3, lcl_port, rmt_port);
+ res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_ip3, &rmt_ip3, lcl_port, rmt_port);
SESSION_TEST ((res == 6),
"Lookup 3.3.3.3 1234 7.7.7.7 4321, action "
"should be 6 (updated): %d", res);
@@ -925,17 +961,17 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input)
args.lcl_port = 0;
args.rmt_port = 0;
args.action_index = action_index++;
- error = session_rules_table_add_del (srt, &args);
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
SESSION_TEST ((error == 0), "Add 1.2.3.4/24 * 5.6.7.8/24 * action %d",
action_index - 1);
- res =
- session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, lcl_port, rmt_port);
+ res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_ip, &rmt_ip, lcl_port, rmt_port);
SESSION_TEST ((res == 7),
"Lookup 1.2.3.4 1234 5.6.7.8 4321, action should"
" be 7 (lpm dst): %d", res);
- res =
- session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip,
- lcl_port + 1, rmt_port);
+ res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_ip, &rmt_ip, lcl_port + 1, rmt_port);
SESSION_TEST ((res == 7),
"Lookup 1.2.3.4 1235 5.6.7.8 4321, action should " "be 7: %d",
res);
@@ -947,7 +983,8 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input)
* 1.2.3.4 1235 5.6.7.8 4322
*/
args.is_add = 0;
- error = session_rules_table_add_del (srt, &args);
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
SESSION_TEST ((error == 0), "Del 1.2.3.4/24 * 5.6.7.8/24 *");
args.lcl.fp_addr.ip4 = lcl_ip;
@@ -958,7 +995,8 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input)
args.rmt_port = 0;
args.action_index = action_index++;
args.is_add = 1;
- error = session_rules_table_add_del (srt, &args);
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
SESSION_TEST ((error == 0), "Add 1.2.3.4/16 * 5.6.7.8/16 * action %d",
action_index - 1);
@@ -970,27 +1008,28 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input)
args.rmt_port = rmt_port;
args.action_index = action_index++;
args.is_add = 1;
- error = session_rules_table_add_del (srt, &args);
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
SESSION_TEST ((error == 0), "Add 1.2.3.4/24 1235 5.6.7.8/24 4321 action %d",
action_index - 1);
if (verbose)
- session_rules_table_cli_dump (vm, srt, FIB_PROTOCOL_IP4);
+ session_rules_table_cli_dump (vm, st->srtg_handle, TRANSPORT_PROTO_TCP,
+ FIB_PROTOCOL_IP4);
- res =
- session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, lcl_port, rmt_port);
+ res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_ip, &rmt_ip, lcl_port, rmt_port);
SESSION_TEST ((res == 3),
"Lookup 1.2.3.4 1234 5.6.7.8 4321, action should " "be 3: %d",
res);
- res =
- session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip,
- lcl_port + 1, rmt_port);
+ res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_ip, &rmt_ip, lcl_port + 1, rmt_port);
SESSION_TEST ((res == 9),
"Lookup 1.2.3.4 1235 5.6.7.8 4321, action should " "be 9: %d",
res);
res =
- session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip,
- lcl_port + 1, rmt_port + 1);
+ session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP, &lcl_ip,
+ &rmt_ip, lcl_port + 1, rmt_port + 1);
SESSION_TEST ((res == 8),
"Lookup 1.2.3.4 1235 5.6.7.8 4322, action should " "be 8: %d",
res);
@@ -1004,10 +1043,11 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input)
args.lcl.fp_len = 16;
args.rmt.fp_len = 16;
args.is_add = 0;
- error = session_rules_table_add_del (srt, &args);
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
SESSION_TEST ((error == 0), "Del 1.2.0.0/16 1234 5.6.0.0/16 4321");
- res =
- session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, lcl_port, rmt_port);
+ res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_ip, &rmt_ip, lcl_port, rmt_port);
SESSION_TEST ((res == 3),
"Lookup 1.2.3.4 1234 5.6.7.8 4321, action should " "be 3: %d",
res);
@@ -1015,10 +1055,11 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input)
args.lcl_port = 0;
args.rmt_port = 0;
args.is_add = 0;
- error = session_rules_table_add_del (srt, &args);
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
SESSION_TEST ((error == 0), "Del 1.2.0.0/16 * 5.6.0.0/16 *");
- res =
- session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, lcl_port, rmt_port);
+ res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_ip, &rmt_ip, lcl_port, rmt_port);
SESSION_TEST ((res == 3),
"Lookup 1.2.3.4 1234 5.6.7.8 4321, action should " "be 3: %d",
res);
@@ -1033,12 +1074,15 @@ session_test_rule_table (vlib_main_t * vm, unformat_input_t * input)
args.lcl_port = 1234;
args.rmt_port = 4321;
args.is_add = 0;
- error = session_rules_table_add_del (srt, &args);
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
SESSION_TEST ((error == 0), "Del 1.2.3.4/24 1234 5.6.7.5/24");
- res =
- session_rules_table_lookup4 (srt, &lcl_ip, &rmt_ip, lcl_port, rmt_port);
+ res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_ip, &rmt_ip, lcl_port, rmt_port);
SESSION_TEST ((res == 2), "Action should be 2: %d", res);
+ session_table_free (st, FIB_PROTOCOL_MAX);
+
return 0;
}
@@ -1074,6 +1118,9 @@ session_test_rules (vlib_main_t * vm, unformat_input_t * input)
}
}
+ session_test_disable_rt_backend_engine (vm);
+ session_test_enable_rule_table_engine (vm);
+
server_sep.is_ip4 = 1;
server_sep.port = placeholder_port;
clib_memset (options, 0, sizeof (options));
@@ -2073,13 +2120,330 @@ session_test_mq_basic (vlib_main_t * vm, unformat_input_t * input)
return 0;
}
+static f32
+session_get_memory_usage (void)
+{
+ clib_mem_heap_t *heap = clib_mem_get_per_cpu_heap ();
+ u8 *s = 0;
+ char *ss;
+ f32 used = 0.0;
+
+ s = format (s, "%U\n", format_clib_mem_heap, heap, 0);
+ ss = strstr ((char *) s, "used:");
+ if (ss)
+ sscanf (ss, "used: %f", &used);
+ else
+ clib_warning ("substring 'used:' not found from show memory");
+ vec_free (s);
+ return (used);
+}
+
+static int
+session_test_enable_disable (vlib_main_t *vm, unformat_input_t *input)
+{
+ u32 iteration = 100, i;
+ uword was_enabled;
+ f32 was_using, now_using;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "repeat %d", &iteration))
+ ;
+ else
+ {
+ vlib_cli_output (vm, "parse error: '%U'", format_unformat_error,
+ input);
+ return -1;
+ }
+ }
+
+ was_enabled = clib_mem_trace_enable_disable (0);
+ /* warm up */
+ for (i = 0; i < 10; i++)
+ {
+ session_test_disable_rt_backend_engine (vm);
+ session_test_enable_sdl_engine (vm);
+ session_test_disable_rt_backend_engine (vm);
+ session_test_enable_rule_table_engine (vm);
+ }
+ was_using = session_get_memory_usage ();
+
+ for (i = 0; i < iteration; i++)
+ {
+ session_test_disable_rt_backend_engine (vm);
+ session_test_enable_sdl_engine (vm);
+ session_test_disable_rt_backend_engine (vm);
+ session_test_enable_rule_table_engine (vm);
+ }
+ now_using = session_get_memory_usage ();
+
+ clib_mem_trace_enable_disable (was_enabled);
+ SESSION_TEST ((was_using == now_using), "was using %.2fM, now using %.2fM",
+ was_using, now_using);
+
+ return 0;
+}
+
+static int
+session_test_sdl (vlib_main_t *vm, unformat_input_t *input)
+{
+ session_table_t *st = session_table_alloc ();
+ u16 lcl_port = 0, rmt_port = 0;
+ u32 action_index = 1, res;
+ int verbose = 0, error;
+ ip4_address_t rmt_ip;
+ const char ip_str_1234[] = "1.2.3.4";
+ inet_pton (AF_INET, ip_str_1234, &rmt_ip);
+ ip4_address_t lcl_ip = {
+ .as_u32 = clib_host_to_net_u32 (0x0),
+ };
+ ip6_address_t lcl_ip6 = {
+ .as_u64 = { 0, 0 },
+ };
+ fib_prefix_t rmt_pref = {
+ .fp_addr.ip4.as_u32 = rmt_ip.as_u32,
+ .fp_len = 16,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ fib_prefix_t lcl_pref = {
+ .fp_addr.ip4.as_u32 = lcl_ip.as_u32,
+ .fp_len = 0,
+ .fp_proto = 0,
+ };
+ session_rule_table_add_del_args_t args = {
+ .lcl = lcl_pref,
+ .rmt = rmt_pref,
+ .lcl_port = lcl_port,
+ .rmt_port = rmt_port,
+ .action_index = action_index++,
+ .is_add = 1,
+ };
+ const char ip_str_1200[] = "1.2.0.0";
+ const char ip_str_1230[] = "1.2.3.0";
+ const char ip_str_1111[] = "1.1.1.1";
+ const char ip6_str[] = "2501:0db8:85a3:0000:0000:8a2e:0371:1";
+ const char ip6_str2[] = "2501:0db8:85a3:0000:0000:8a2e:0372:1";
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ {
+ vlib_cli_output (vm, "parse error: '%U'", format_unformat_error,
+ input);
+ return -1;
+ }
+ }
+
+ session_test_disable_rt_backend_engine (vm);
+ session_test_enable_sdl_engine (vm);
+
+ session_table_init (st, FIB_PROTOCOL_MAX);
+ vec_add1 (st->appns_index,
+ app_namespace_index (app_namespace_get_default ()));
+ session_rules_table_init (st, FIB_PROTOCOL_MAX);
+
+ /* Add 1.2.0.0/16 */
+ args.rmt.fp_len = 16;
+ inet_pton (AF_INET, ip_str_1200, &args.rmt.fp_addr.ip4.as_u32);
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
+ SESSION_TEST ((error == 0), "Add %s/%d action %d", ip_str_1200,
+ args.rmt.fp_len, action_index - 1);
+
+ /* Lookup 1.2.3.4 */
+ res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_ip, &rmt_ip, lcl_port, rmt_port);
+ SESSION_TEST ((res == action_index - 1),
+ "Lookup %s, action should "
+ "be 1: %d",
+ ip_str_1234, action_index - 1);
+
+ /*
+ * Add 1.2.3.0/24
+ */
+ args.rmt.fp_len = 24;
+ inet_pton (AF_INET, ip_str_1230, &args.rmt.fp_addr.ip4.as_u32);
+ args.action_index = action_index++;
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
+ SESSION_TEST ((error == 0), "Add %s/%d action %d", ip_str_1230,
+ args.rmt.fp_len, action_index - 1);
+
+ /* Lookup 1.2.3.4 */
+ res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_ip, &rmt_ip, lcl_port, rmt_port);
+ SESSION_TEST ((res == action_index - 1),
+ "Lookup %s, action should "
+ "be 2: %d",
+ ip_str_1234, action_index - 1);
+
+ /* look up 1.1.1.1, should be -1 (invalid index) */
+ inet_pton (AF_INET, ip_str_1111, &rmt_ip);
+ res = session_rules_table_lookup4 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_ip, &rmt_ip, lcl_port, rmt_port);
+ SESSION_TEST ((res == SESSION_TABLE_INVALID_INDEX),
+ "Lookup %s, action should "
+ "be -1: %d",
+ ip_str_1111, res);
+
+ /* Add again 1.2.0.0/16, should be rejected */
+ args.rmt.fp_len = 16;
+ inet_pton (AF_INET, ip_str_1200, &args.rmt.fp_addr.ip4.as_u32);
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
+ SESSION_TEST ((error == SESSION_E_IPINUSE), "Add %s/%d action %d",
+ ip_str_1200, args.rmt.fp_len, error);
+ /*
+ * Add 0.0.0.0/0, should get an error
+ */
+ args.rmt.fp_len = 0;
+ args.rmt.fp_addr.ip4.as_u32 = 0;
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
+ SESSION_TEST ((error == SESSION_E_IPINUSE), "Add 0.0.0.0/%d action %d",
+ args.rmt.fp_len, error);
+
+ /* delete 0.0.0.0 should be rejected */
+ args.is_add = 0;
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
+ SESSION_TEST ((error == SESSION_E_NOROUTE), "Del 0.0.0.0/%d action %d",
+ args.rmt.fp_len, error);
+ if (verbose)
+ session_rules_table_cli_dump (vm, st->srtg_handle, TRANSPORT_PROTO_TCP,
+ FIB_PROTOCOL_IP4);
+
+ /*
+ * Clean up
+ * Delete 1.2.0.0/16
+ * Delete 1.2.3.0/24
+ */
+ inet_pton (AF_INET, ip_str_1200, &args.rmt.fp_addr.ip4.as_u32);
+ args.rmt.fp_len = 16;
+ args.is_add = 0;
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
+ SESSION_TEST ((error == 0), "Del %s/%d should 0: %d", ip_str_1200,
+ args.rmt.fp_len, error);
+
+ inet_pton (AF_INET, ip_str_1230, &args.rmt.fp_addr.ip4.as_u32);
+ args.rmt.fp_len = 24;
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
+ SESSION_TEST ((error == 0), "Del %s/%d, should be 0: %d", ip_str_1230,
+ args.rmt.fp_len, error);
+ if (verbose)
+ session_rules_table_cli_dump (vm, st->srtg_handle, TRANSPORT_PROTO_TCP,
+ FIB_PROTOCOL_IP4);
+
+ /* ip6 tests */
+
+ /*
+ * Add ip6 2001:0db8:85a3:0000:0000:8a2e:0371:1/124
+ */
+ ip6_address_t lcl_lkup;
+ inet_pton (AF_INET6, ip6_str, &args.rmt.fp_addr.ip6);
+ args.rmt.fp_len = 124;
+ args.rmt.fp_proto = FIB_PROTOCOL_IP6;
+ args.action_index = action_index++;
+ args.is_add = 1;
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
+ SESSION_TEST ((error == 0), "Add %s/%d action %d", ip6_str, args.rmt.fp_len,
+ action_index - 1);
+ if (verbose)
+ session_rules_table_cli_dump (vm, st->srtg_handle, TRANSPORT_PROTO_TCP,
+ FIB_PROTOCOL_IP6);
+
+ /* Lookup 2001:0db8:85a3:0000:0000:8a2e:0371:1 */
+ res = session_rules_table_lookup6 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_ip6, &args.rmt.fp_addr.ip6, lcl_port,
+ rmt_port);
+ SESSION_TEST ((res == action_index - 1),
+ "Lookup %s action should "
+ "be 3: %d",
+ ip6_str, action_index - 1);
+
+ /* Lookup 2001:0db8:85a3:0000:0000:8a2e:0372:1 */
+ inet_pton (AF_INET6, ip6_str2, &lcl_lkup);
+ res = session_rules_table_lookup6 (st->srtg_handle, TRANSPORT_PROTO_TCP,
+ &lcl_ip6, &lcl_lkup, lcl_port, rmt_port);
+ SESSION_TEST ((res == SESSION_TABLE_INVALID_INDEX),
+ "Lookup %s action should "
+ "be -1: %d",
+ ip6_str2, res);
+
+ /*
+ * del ip6 2001:0db8:85a3:0000:0000:8a2e:0371:1/124
+ */
+ args.is_add = 0;
+ args.rmt.fp_len = 124;
+ error =
+ session_rules_table_add_del (st->srtg_handle, TRANSPORT_PROTO_TCP, &args);
+ SESSION_TEST ((error == 0), "del %s/%d, should be 0: %d", ip6_str,
+ args.rmt.fp_len, error);
+ if (verbose)
+ session_rules_table_cli_dump (vm, st->srtg_handle, TRANSPORT_PROTO_TCP,
+ FIB_PROTOCOL_IP6);
+
+ session_table_free (st, FIB_PROTOCOL_MAX);
+
+ return 0;
+}
+
+static int
+session_test_ext_cfg (vlib_main_t *vm, unformat_input_t *input)
+{
+ session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
+ transport_endpt_ext_cfg_t *ext_cfg;
+
+ ext_cfg = session_endpoint_add_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_HTTP,
+ sizeof (ext_cfg->opaque));
+ ext_cfg->opaque = 60;
+
+ ext_cfg =
+ session_endpoint_add_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO,
+ sizeof (transport_endpt_crypto_cfg_t));
+ ext_cfg->crypto.ckpair_index = 1;
+
+ ext_cfg = session_endpoint_add_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_NONE,
+ sizeof (ext_cfg->opaque));
+ ext_cfg->opaque = 345;
+
+ ext_cfg = session_endpoint_get_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_HTTP);
+ SESSION_TEST ((ext_cfg != 0),
+ "TRANSPORT_ENDPT_EXT_CFG_HTTP should be present");
+ SESSION_TEST ((ext_cfg->opaque == 60),
+ "TRANSPORT_ENDPT_EXT_CFG_HTTP opaque value should be 60: %u",
+ ext_cfg->opaque);
+ ext_cfg =
+ session_endpoint_get_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
+ SESSION_TEST ((ext_cfg != 0),
+ "TRANSPORT_ENDPT_EXT_CFG_CRYPTO should be present");
+ SESSION_TEST (
+ (ext_cfg->crypto.ckpair_index == 1),
+ "TRANSPORT_ENDPT_EXT_CFG_HTTP ckpair_index value should be 1: %u",
+ ext_cfg->crypto.ckpair_index);
+ ext_cfg = session_endpoint_get_ext_cfg (&sep, TRANSPORT_ENDPT_EXT_CFG_NONE);
+ SESSION_TEST ((ext_cfg != 0),
+ "TRANSPORT_ENDPT_EXT_CFG_NONE should be present");
+ SESSION_TEST ((ext_cfg->opaque == 345),
+ "TRANSPORT_ENDPT_EXT_CFG_HTTP opaque value should be 345: %u",
+ ext_cfg->opaque);
+ session_endpoint_free_ext_cfgs (&sep);
+
+ return 0;
+}
+
static clib_error_t *
session_test (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd_arg)
{
int res = 0;
- vnet_session_enable_disable (vm, 1);
+ session_test_enable_rule_table_engine (vm);
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
@@ -2099,6 +2463,12 @@ session_test (vlib_main_t * vm,
res = session_test_mq_speed (vm, input);
else if (unformat (input, "mq-basic"))
res = session_test_mq_basic (vm, input);
+ else if (unformat (input, "enable-disable"))
+ res = session_test_enable_disable (vm, input);
+ else if (unformat (input, "sdl"))
+ res = session_test_sdl (vm, input);
+ else if (unformat (input, "ext-cfg"))
+ res = session_test_ext_cfg (vm, input);
else if (unformat (input, "all"))
{
if ((res = session_test_basic (vm, input)))
@@ -2117,6 +2487,12 @@ session_test (vlib_main_t * vm,
goto done;
if ((res = session_test_mq_basic (vm, input)))
goto done;
+ if ((res = session_test_sdl (vm, input)))
+ goto done;
+ if ((res = session_test_ext_cfg (vm, input)))
+ goto done;
+ if ((res = session_test_enable_disable (vm, input)))
+ goto done;
}
else
break;
diff --git a/src/plugins/unittest/tcp_test.c b/src/plugins/unittest/tcp_test.c
index 34033a0b622..bd39474ce93 100644
--- a/src/plugins/unittest/tcp_test.c
+++ b/src/plugins/unittest/tcp_test.c
@@ -1550,8 +1550,11 @@ tcp_test (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd_arg)
{
int res = 0;
+ session_enable_disable_args_t args = { .is_en = 1,
+ .rt_engine_type =
+ RT_BACKEND_ENGINE_RULE_TABLE };
- vnet_session_enable_disable (vm, 1);
+ vnet_session_enable_disable (vm, &args);
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
diff --git a/src/plugins/unittest/util_test.c b/src/plugins/unittest/util_test.c
index 53384e55494..5b7e30bc21f 100644
--- a/src/plugins/unittest/util_test.c
+++ b/src/plugins/unittest/util_test.c
@@ -101,6 +101,36 @@ VLIB_CLI_COMMAND (test_hash_command, static) =
.function = test_hash_command_fn,
};
+static void *
+leak_memory_fn (void *args)
+{
+ u8 *p = 0;
+ vec_validate (p, 100);
+ p = 0;
+ return 0;
+}
+
+static clib_error_t *
+test_mem_leak_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ /* do memory leak from thread, so no 'unix_cli' in traceback */
+ pthread_t thread;
+ int rv = pthread_create (&thread, NULL, leak_memory_fn, 0);
+ if (rv)
+ {
+ return clib_error_return (0, "pthread_create failed");
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (test_mem_leak_command, static) = {
+ .path = "test mem-leak",
+ .short_help = "leak some memory",
+ .function = test_mem_leak_command_fn,
+};
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/urpf/CMakeLists.txt b/src/plugins/urpf/CMakeLists.txt
index 2f44e3b2344..f665d30b0bb 100644
--- a/src/plugins/urpf/CMakeLists.txt
+++ b/src/plugins/urpf/CMakeLists.txt
@@ -22,6 +22,10 @@ add_vpp_plugin(urpf
ip4_urpf.c
ip6_urpf.c
+ INSTALL_HEADERS
+ urpf_dp.h
+ urpf.h
+
API_FILES
urpf.api
)
diff --git a/src/plugins/urpf/urpf.c b/src/plugins/urpf/urpf.c
index e5209caafb4..1e7d6c0fb91 100644
--- a/src/plugins/urpf/urpf.c
+++ b/src/plugins/urpf/urpf.c
@@ -60,7 +60,17 @@ static const char *urpf_feats[N_AF][VLIB_N_DIR][URPF_N_MODES] =
urpf_data_t *urpf_cfgs[N_AF][VLIB_N_DIR];
u8 *
-format_urpf_mode (u8 * s, va_list * a)
+format_urpf_trace (u8 *s, va_list *va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ urpf_trace_t *t = va_arg (*va, urpf_trace_t *);
+
+ return format (s, "uRPF:%d fib:%d", t->urpf, t->fib_index);
+}
+
+__clib_export u8 *
+format_urpf_mode (u8 *s, va_list *a)
{
urpf_mode_t mode = va_arg (*a, int);
@@ -76,8 +86,8 @@ format_urpf_mode (u8 * s, va_list * a)
return (format (s, "unknown"));
}
-static uword
-unformat_urpf_mode (unformat_input_t * input, va_list * args)
+__clib_export uword
+unformat_urpf_mode (unformat_input_t *input, va_list *args)
{
urpf_mode_t *mode = va_arg (*args, urpf_mode_t *);
@@ -94,7 +104,16 @@ unformat_urpf_mode (unformat_input_t * input, va_list * args)
return 0;
}
-int
+__clib_export int
+urpf_feature_enable_disable (ip_address_family_t af, vlib_dir_t dir,
+ urpf_mode_t mode, u32 sw_if_index, int enable)
+{
+ return vnet_feature_enable_disable (urpf_feat_arcs[af][dir],
+ urpf_feats[af][dir][mode], sw_if_index,
+ enable, 0, 0);
+}
+
+__clib_export int
urpf_update (urpf_mode_t mode, u32 sw_if_index, ip_address_family_t af,
vlib_dir_t dir, u32 table_id)
{
diff --git a/src/plugins/urpf/urpf.h b/src/plugins/urpf/urpf.h
index 6983a2b440c..a40a25df16b 100644
--- a/src/plugins/urpf/urpf.h
+++ b/src/plugins/urpf/urpf.h
@@ -32,7 +32,15 @@ typedef enum urpf_mode_t_
#define URPF_N_MODES (URPF_MODE_STRICT+1)
-extern u8 *format_urpf_mode (u8 * s, va_list * a);
+typedef struct
+{
+ index_t urpf;
+ u32 fib_index;
+} urpf_trace_t;
+
+u8 *format_urpf_trace (u8 *s, va_list *va);
+u8 *format_urpf_mode (u8 *s, va_list *a);
+uword unformat_urpf_mode (unformat_input_t *input, va_list *args);
typedef struct
{
@@ -43,8 +51,8 @@ typedef struct
extern urpf_data_t *urpf_cfgs[N_AF][VLIB_N_DIR];
-extern int urpf_update (urpf_mode_t mode, u32 sw_if_index,
- ip_address_family_t af, vlib_dir_t dir, u32 table_id);
+int urpf_update (urpf_mode_t mode, u32 sw_if_index, ip_address_family_t af,
+ vlib_dir_t dir, u32 table_id);
#endif
diff --git a/src/plugins/urpf/urpf_dp.h b/src/plugins/urpf/urpf_dp.h
index 816d8b70b90..b17fed7e04b 100644
--- a/src/plugins/urpf/urpf_dp.h
+++ b/src/plugins/urpf/urpf_dp.h
@@ -53,22 +53,6 @@
*
* This file contains the interface unicast source check.
*/
-typedef struct
-{
- index_t urpf;
-} urpf_trace_t;
-
-static u8 *
-format_urpf_trace (u8 * s, va_list * va)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
- urpf_trace_t *t = va_arg (*va, urpf_trace_t *);
-
- s = format (s, "uRPF:%d", t->urpf);
-
- return s;
-}
#define foreach_urpf_error \
_(DROP, "uRPF Drop") \
@@ -87,10 +71,157 @@ typedef enum
URPF_N_NEXT,
} urpf_next_t;
+static_always_inline u32
+urpf_get_fib_index (vlib_buffer_t *b, ip_address_family_t af, vlib_dir_t dir)
+{
+ u32 sw_if_index = vnet_buffer (b)->sw_if_index[dir];
+ return vec_elt (urpf_cfgs[af][dir], sw_if_index).fib_index;
+}
+
+static_always_inline void
+urpf_perform_check_x1 (ip_address_family_t af, vlib_dir_t dir,
+ urpf_mode_t mode, vlib_buffer_t *b, const u8 *h,
+ u32 fib_index, load_balance_t **lb, u32 *pass)
+{
+ load_balance_t *llb;
+ u32 lpass;
+ u32 lb_index;
+
+ ASSERT (fib_index != ~0);
+
+ if (AF_IP4 == af)
+ {
+ const ip4_header_t *ip;
+
+ ip = (ip4_header_t *) h;
+
+ lb_index = ip4_fib_forwarding_lookup (fib_index, &ip->src_address);
+
+ /* Pass multicast. */
+ lpass = (ip4_address_is_multicast (&ip->src_address) ||
+ ip4_address_is_global_broadcast (&ip->src_address));
+ }
+ else
+ {
+ const ip6_header_t *ip;
+
+ ip = (ip6_header_t *) h;
+
+ lb_index = ip6_fib_table_fwding_lookup (fib_index, &ip->src_address);
+ lpass = ip6_address_is_multicast (&ip->src_address);
+ }
+
+ llb = load_balance_get (lb_index);
+
+ if (URPF_MODE_STRICT == mode)
+ {
+ int res;
+
+ res = fib_urpf_check (llb->lb_urpf, vnet_buffer (b)->sw_if_index[dir]);
+ if (VLIB_RX == dir)
+ lpass |= res;
+ else
+ {
+ lpass |= !res && fib_urpf_check_size (llb->lb_urpf);
+ lpass |= b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+ }
+ else
+ lpass |= fib_urpf_check_size (llb->lb_urpf);
+
+ *lb = llb;
+ *pass = lpass;
+}
+
+static_always_inline void
+urpf_perform_check_x2 (ip_address_family_t af, vlib_dir_t dir,
+ urpf_mode_t mode, vlib_buffer_t *b0, vlib_buffer_t *b1,
+ const u8 *h0, const u8 *h1, u32 fib_index0,
+ u32 fib_index1, load_balance_t **lb0,
+ load_balance_t **lb1, u32 *pass0, u32 *pass1)
+{
+ load_balance_t *llb0, *llb1;
+ u32 lpass0, lpass1;
+ u32 lb_index0, lb_index1;
+
+ ASSERT (fib_index0 != ~0);
+ ASSERT (fib_index1 != ~0);
+
+ if (AF_IP4 == af)
+ {
+ const ip4_header_t *ip0, *ip1;
+
+ ip0 = (ip4_header_t *) h0;
+ ip1 = (ip4_header_t *) h1;
+
+ ip4_fib_forwarding_lookup_x2 (fib_index0, fib_index1, &ip0->src_address,
+ &ip1->src_address, &lb_index0, &lb_index1);
+ /* Pass multicast. */
+ lpass0 = (ip4_address_is_multicast (&ip0->src_address) ||
+ ip4_address_is_global_broadcast (&ip0->src_address));
+ lpass1 = (ip4_address_is_multicast (&ip1->src_address) ||
+ ip4_address_is_global_broadcast (&ip1->src_address));
+ }
+ else
+ {
+ const ip6_header_t *ip0, *ip1;
+
+ ip0 = (ip6_header_t *) h0;
+ ip1 = (ip6_header_t *) h1;
+
+ lb_index0 = ip6_fib_table_fwding_lookup (fib_index0, &ip0->src_address);
+ lb_index1 = ip6_fib_table_fwding_lookup (fib_index1, &ip1->src_address);
+ lpass0 = ip6_address_is_multicast (&ip0->src_address);
+ lpass1 = ip6_address_is_multicast (&ip1->src_address);
+ }
+
+ llb0 = load_balance_get (lb_index0);
+ llb1 = load_balance_get (lb_index1);
+
+ if (URPF_MODE_STRICT == mode)
+ {
+ /* for RX the check is: would this source adddress be
+ * forwarded out of the interface on which it was recieved,
+ * if yes allow. For TX it's; would this source address be
+ * forwarded out of the interface through which it is being
+ * sent, if yes drop.
+ */
+ int res0, res1;
+
+ res0 =
+ fib_urpf_check (llb0->lb_urpf, vnet_buffer (b0)->sw_if_index[dir]);
+ res1 =
+ fib_urpf_check (llb1->lb_urpf, vnet_buffer (b1)->sw_if_index[dir]);
+
+ if (VLIB_RX == dir)
+ {
+ lpass0 |= res0;
+ lpass1 |= res1;
+ }
+ else
+ {
+ lpass0 |= !res0 && fib_urpf_check_size (llb0->lb_urpf);
+ lpass1 |= !res1 && fib_urpf_check_size (llb1->lb_urpf);
+
+ /* allow locally generated */
+ lpass0 |= b0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ lpass1 |= b1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+ }
+ else
+ {
+ lpass0 |= fib_urpf_check_size (llb0->lb_urpf);
+ lpass1 |= fib_urpf_check_size (llb1->lb_urpf);
+ }
+
+ *lb0 = llb0;
+ *lb1 = llb1;
+ *pass0 = lpass0;
+ *pass1 = lpass1;
+}
+
static_always_inline uword
-urpf_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame,
+urpf_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame,
ip_address_family_t af, vlib_dir_t dir, urpf_mode_t mode)
{
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
@@ -106,8 +237,8 @@ urpf_inline (vlib_main_t * vm,
while (n_left >= 4)
{
- u32 pass0, lb_index0, pass1, lb_index1;
- const load_balance_t *lb0, *lb1;
+ u32 pass0, pass1;
+ load_balance_t *lb0 = 0, *lb1 = 0;
u32 fib_index0, fib_index1;
const u8 *h0, *h1;
@@ -121,87 +252,32 @@ urpf_inline (vlib_main_t * vm,
h0 = (u8 *) vlib_buffer_get_current (b[0]);
h1 = (u8 *) vlib_buffer_get_current (b[1]);
-
if (VLIB_TX == dir)
{
h0 += vnet_buffer (b[0])->ip.save_rewrite_length;
h1 += vnet_buffer (b[1])->ip.save_rewrite_length;
}
- fib_index0 =
- urpf_cfgs[af][dir][vnet_buffer (b[0])->sw_if_index[dir]].fib_index;
- fib_index1 =
- urpf_cfgs[af][dir][vnet_buffer (b[1])->sw_if_index[dir]].fib_index;
+ fib_index0 = urpf_get_fib_index (b[0], af, dir);
+ fib_index1 = urpf_get_fib_index (b[1], af, dir);
+ urpf_perform_check_x2 (af, dir, mode, b[0], b[1], h0, h1, fib_index0,
+ fib_index1, &lb0, &lb1, &pass0, &pass1);
- if (AF_IP4 == af)
- {
- const ip4_header_t *ip0, *ip1;
-
- ip0 = (ip4_header_t *) h0;
- ip1 = (ip4_header_t *) h1;
-
- ip4_fib_forwarding_lookup_x2 (fib_index0,
- fib_index1,
- &ip0->src_address,
- &ip1->src_address,
- &lb_index0, &lb_index1);
- /* Pass multicast. */
- pass0 = (ip4_address_is_multicast (&ip0->src_address) ||
- ip4_address_is_global_broadcast (&ip0->src_address));
- pass1 = (ip4_address_is_multicast (&ip1->src_address) ||
- ip4_address_is_global_broadcast (&ip1->src_address));
- }
- else
+ if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
{
- const ip6_header_t *ip0, *ip1;
-
- ip0 = (ip6_header_t *) h0;
- ip1 = (ip6_header_t *) h1;
-
- lb_index0 = ip6_fib_table_fwding_lookup (fib_index0,
- &ip0->src_address);
- lb_index1 = ip6_fib_table_fwding_lookup (fib_index1,
- &ip1->src_address);
- pass0 = ip6_address_is_multicast (&ip0->src_address);
- pass1 = ip6_address_is_multicast (&ip1->src_address);
- }
-
- lb0 = load_balance_get (lb_index0);
- lb1 = load_balance_get (lb_index1);
+ urpf_trace_t *t;
- if (URPF_MODE_STRICT == mode)
- {
- /* for RX the check is: would this source adddress be forwarded
- * out of the interface on which it was recieved, if yes allow.
- * For TX it's; would this source address be forwarded out of the
- * interface through which it is being sent, if yes drop.
- */
- int res0, res1;
-
- res0 = fib_urpf_check (lb0->lb_urpf,
- vnet_buffer (b[0])->sw_if_index[dir]);
- res1 = fib_urpf_check (lb1->lb_urpf,
- vnet_buffer (b[1])->sw_if_index[dir]);
-
- if (VLIB_RX == dir)
- {
- pass0 |= res0;
- pass1 |= res1;
- }
- else
- {
- pass0 |= !res0 && fib_urpf_check_size (lb0->lb_urpf);
- pass1 |= !res1 && fib_urpf_check_size (lb1->lb_urpf);
-
- /* allow locally generated */
- pass0 |= b[0]->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
- pass1 |= b[1]->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
- }
+ t = vlib_add_trace (vm, node, b[0], sizeof (*t));
+ t->urpf = lb0 ? lb0->lb_urpf : ~0;
+ t->fib_index = fib_index0;
}
- else
+ if (b[1]->flags & VLIB_BUFFER_IS_TRACED)
{
- pass0 |= fib_urpf_check_size (lb0->lb_urpf);
- pass1 |= fib_urpf_check_size (lb1->lb_urpf);
+ urpf_trace_t *t;
+
+ t = vlib_add_trace (vm, node, b[1], sizeof (*t));
+ t->urpf = lb1 ? lb1->lb_urpf : ~0;
+ t->fib_index = fib_index1;
}
if (PREDICT_TRUE (pass0))
@@ -218,22 +294,6 @@ urpf_inline (vlib_main_t * vm,
next[1] = URPF_NEXT_DROP;
b[1]->error = node->errors[URPF_ERROR_DROP];
}
-
- if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
- {
- urpf_trace_t *t;
-
- t = vlib_add_trace (vm, node, b[0], sizeof (*t));
- t->urpf = lb0->lb_urpf;
- }
- if (b[1]->flags & VLIB_BUFFER_IS_TRACED)
- {
- urpf_trace_t *t;
-
- t = vlib_add_trace (vm, node, b[1], sizeof (*t));
- t->urpf = lb1->lb_urpf;
- }
-
b += 2;
next += 2;
n_left -= 2;
@@ -241,8 +301,8 @@ urpf_inline (vlib_main_t * vm,
while (n_left)
{
- u32 pass0, lb_index0, fib_index0;
- const load_balance_t *lb0;
+ u32 pass0, fib_index0;
+ load_balance_t *lb0 = 0;
const u8 *h0;
h0 = (u8 *) vlib_buffer_get_current (b[0]);
@@ -250,51 +310,18 @@ urpf_inline (vlib_main_t * vm,
if (VLIB_TX == dir)
h0 += vnet_buffer (b[0])->ip.save_rewrite_length;
- fib_index0 =
- urpf_cfgs[af][dir][vnet_buffer (b[0])->sw_if_index[dir]].fib_index;
-
- if (AF_IP4 == af)
- {
- const ip4_header_t *ip0;
-
- ip0 = (ip4_header_t *) h0;
-
- lb_index0 = ip4_fib_forwarding_lookup (fib_index0,
- &ip0->src_address);
+ fib_index0 = urpf_get_fib_index (b[0], af, dir);
+ urpf_perform_check_x1 (af, dir, mode, b[0], h0, fib_index0, &lb0,
+ &pass0);
- /* Pass multicast. */
- pass0 = (ip4_address_is_multicast (&ip0->src_address) ||
- ip4_address_is_global_broadcast (&ip0->src_address));
- }
- else
+ if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
{
- const ip6_header_t *ip0;
-
- ip0 = (ip6_header_t *) h0;
-
- lb_index0 = ip6_fib_table_fwding_lookup (fib_index0,
- &ip0->src_address);
- pass0 = ip6_address_is_multicast (&ip0->src_address);
- }
-
- lb0 = load_balance_get (lb_index0);
+ urpf_trace_t *t;
- if (URPF_MODE_STRICT == mode)
- {
- int res0;
-
- res0 = fib_urpf_check (lb0->lb_urpf,
- vnet_buffer (b[0])->sw_if_index[dir]);
- if (VLIB_RX == dir)
- pass0 |= res0;
- else
- {
- pass0 |= !res0 && fib_urpf_check_size (lb0->lb_urpf);
- pass0 |= b[0]->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED;
- }
+ t = vlib_add_trace (vm, node, b[0], sizeof (*t));
+ t->urpf = lb0 ? lb0->lb_urpf : ~0;
+ t->fib_index = fib_index0;
}
- else
- pass0 |= fib_urpf_check_size (lb0->lb_urpf);
if (PREDICT_TRUE (pass0))
vnet_feature_next_u16 (&next[0], b[0]);
@@ -303,14 +330,6 @@ urpf_inline (vlib_main_t * vm,
next[0] = URPF_NEXT_DROP;
b[0]->error = node->errors[URPF_ERROR_DROP];
}
-
- if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
- {
- urpf_trace_t *t;
-
- t = vlib_add_trace (vm, node, b[0], sizeof (*t));
- t->urpf = lb0->lb_urpf;
- }
b++;
next++;
n_left--;
diff --git a/src/plugins/wireguard/wireguard_chachapoly.c b/src/plugins/wireguard/wireguard_chachapoly.c
index 0dd7908d2e2..ad644ff6cb8 100644
--- a/src/plugins/wireguard/wireguard_chachapoly.c
+++ b/src/plugins/wireguard/wireguard_chachapoly.c
@@ -72,11 +72,11 @@ wg_xchacha20poly1305_encrypt (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst,
u64 h_nonce;
clib_memcpy (&h_nonce, nonce + 16, sizeof (h_nonce));
- h_nonce = le64toh (h_nonce);
+ h_nonce = clib_little_to_host_u64 (h_nonce);
hchacha20 (derived_key, nonce, key);
for (i = 0; i < (sizeof (derived_key) / sizeof (derived_key[0])); i++)
- (derived_key[i]) = htole32 ((derived_key[i]));
+ (derived_key[i]) = clib_host_to_little_u32 ((derived_key[i]));
uint32_t key_idx;
@@ -102,11 +102,11 @@ wg_xchacha20poly1305_decrypt (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst,
u64 h_nonce;
clib_memcpy (&h_nonce, nonce + 16, sizeof (h_nonce));
- h_nonce = le64toh (h_nonce);
+ h_nonce = clib_little_to_host_u64 (h_nonce);
hchacha20 (derived_key, nonce, key);
for (i = 0; i < (sizeof (derived_key) / sizeof (derived_key[0])); i++)
- (derived_key[i]) = htole32 ((derived_key[i]));
+ (derived_key[i]) = clib_host_to_little_u32 ((derived_key[i]));
uint32_t key_idx;
diff --git a/src/plugins/wireguard/wireguard_noise.c b/src/plugins/wireguard/wireguard_noise.c
index 5fe2e44b03b..c3f28f442f5 100644
--- a/src/plugins/wireguard/wireguard_noise.c
+++ b/src/plugins/wireguard/wireguard_noise.c
@@ -751,8 +751,8 @@ noise_tai64n_now (uint8_t output[NOISE_TIMESTAMP_LEN])
unix_nanosec &= REJECT_INTERVAL_MASK;
/* https://cr.yp.to/libtai/tai64.html */
- sec = htobe64 (0x400000000000000aULL + unix_sec);
- nsec = htobe32 (unix_nanosec);
+ sec = clib_host_to_big_u64 (0x400000000000000aULL + unix_sec);
+ nsec = clib_host_to_big_u32 (unix_nanosec);
/* memcpy to output buffer, assuming output could be unaligned. */
clib_memcpy (output, &sec, sizeof (sec));