aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/acl/acl_test.c37
-rw-r--r--src/plugins/acl/sess_mgmt_node.c5
-rw-r--r--src/plugins/crypto_native/CMakeLists.txt11
-rw-r--r--src/plugins/crypto_native/aes_cbc.c106
-rw-r--r--src/plugins/crypto_native/aes_ctr.c64
-rw-r--r--src/plugins/crypto_native/aes_gcm.c71
-rw-r--r--src/plugins/crypto_native/crypto_native.h67
-rw-r--r--src/plugins/crypto_native/main.c117
-rw-r--r--src/plugins/crypto_native/sha2.c186
-rw-r--r--src/plugins/dev_iavf/port.c43
-rw-r--r--src/plugins/dev_octeon/init.c25
-rw-r--r--src/plugins/dev_octeon/octeon.h5
-rw-r--r--src/plugins/dev_octeon/port.c82
-rw-r--r--src/plugins/dev_octeon/tx_node.c88
-rw-r--r--src/plugins/dpdk/CMakeLists.txt6
-rw-r--r--src/plugins/dpdk/device/cli.c9
-rw-r--r--src/plugins/dpdk/device/common.c2
-rw-r--r--src/plugins/dpdk/device/dpdk.h4
-rw-r--r--src/plugins/dpdk/device/init.c37
-rw-r--r--src/plugins/dpdk/main.c7
-rw-r--r--src/plugins/fateshare/fateshare.c40
-rw-r--r--src/plugins/fateshare/vpp_fateshare_monitor.c28
-rw-r--r--src/plugins/hs_apps/CMakeLists.txt4
-rw-r--r--src/plugins/hs_apps/http_cli.c7
-rw-r--r--src/plugins/hs_apps/http_client_cli.c39
-rw-r--r--src/plugins/http/http.c206
-rw-r--r--src/plugins/http/http.h1
-rw-r--r--src/plugins/ikev2/ikev2.api57
-rw-r--r--src/plugins/ikev2/ikev2.c12
-rw-r--r--src/plugins/ikev2/ikev2_api.c183
-rw-r--r--src/plugins/ikev2/ikev2_cli.c8
-rw-r--r--src/plugins/ikev2/ikev2_priv.h4
-rw-r--r--src/plugins/ikev2/ikev2_test.c155
-rw-r--r--src/plugins/ikev2/ikev2_types.api39
-rw-r--r--src/plugins/lisp/lisp-cp/lisp_types.h3
-rw-r--r--src/plugins/marvell/pp2/cli.c2
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_api.c3
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_in2out.c2
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_api.c3
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_in2out.c2
-rw-r--r--src/plugins/nat/pnat/pnat_api.c5
-rw-r--r--src/plugins/netmap/CMakeLists.txt32
-rw-r--r--src/plugins/netmap/FEATURE.yaml12
-rw-r--r--src/plugins/netmap/cli.c236
-rw-r--r--src/plugins/netmap/device.c252
-rw-r--r--src/plugins/netmap/net_netmap.h650
-rw-r--r--src/plugins/netmap/netmap.api56
-rw-r--r--src/plugins/netmap/netmap.c334
-rw-r--r--src/plugins/netmap/netmap.h166
-rw-r--r--src/plugins/netmap/netmap_api.c95
-rw-r--r--src/plugins/netmap/node.c295
-rw-r--r--src/plugins/netmap/plugin.c16
-rw-r--r--src/plugins/srmpls/CMakeLists.txt30
-rw-r--r--src/plugins/srmpls/FEATURE.yaml9
-rw-r--r--src/plugins/srmpls/dir.dox22
-rw-r--r--src/plugins/srmpls/plugin.c26
-rw-r--r--src/plugins/srmpls/sr_doc.rst215
-rw-r--r--src/plugins/srmpls/sr_mpls.api124
-rw-r--r--src/plugins/srmpls/sr_mpls.h177
-rw-r--r--src/plugins/srmpls/sr_mpls_api.c257
-rw-r--r--src/plugins/srmpls/sr_mpls_policy.c903
-rw-r--r--src/plugins/srmpls/sr_mpls_steering.c897
-rw-r--r--src/plugins/srmpls/sr_mpls_test.c174
-rw-r--r--src/plugins/tlsopenssl/tls_openssl.c7
-rw-r--r--src/plugins/unittest/gso_test.c113
-rw-r--r--src/plugins/unittest/policer_test.c2
-rw-r--r--src/plugins/wireguard/wireguard_chachapoly.c8
-rw-r--r--src/plugins/wireguard/wireguard_noise.c4
68 files changed, 6431 insertions, 456 deletions
diff --git a/src/plugins/acl/acl_test.c b/src/plugins/acl/acl_test.c
index fddb3d532ff..98803a916cb 100644
--- a/src/plugins/acl/acl_test.c
+++ b/src/plugins/acl/acl_test.c
@@ -18,6 +18,8 @@
*------------------------------------------------------------------
*/
+#include <byteswap.h>
+
#include <vat/vat.h>
#include <vlibapi/api.h>
#include <vlibmemory/api.h>
@@ -112,7 +114,7 @@ static void vl_api_acl_interface_list_details_t_handler
int i;
vat_main_t * vam = acl_test_main.vat_main;
u8 *out = 0;
- vl_api_acl_interface_list_details_t_endian(mp);
+ vl_api_acl_interface_list_details_t_endian (mp, 0 /* from network */);
out = format(out, "sw_if_index: %d, count: %d, n_input: %d\n", mp->sw_if_index, mp->count, mp->n_input);
out = format(out, " input ");
for(i=0; i<mp->count; i++) {
@@ -139,7 +141,8 @@ static void vl_api_acl_interface_etype_whitelist_details_t_handler
int i;
vat_main_t * vam = acl_test_main.vat_main;
u8 *out = 0;
- vl_api_acl_interface_etype_whitelist_details_t_endian(mp);
+ vl_api_acl_interface_etype_whitelist_details_t_endian (
+ mp, 0 /* from network */);
out = format(out, "sw_if_index: %d, count: %d, n_input: %d\n", mp->sw_if_index, mp->count, mp->n_input);
out = format(out, " input ");
for(i=0; i<mp->count; i++) {
@@ -171,15 +174,15 @@ vl_api_acl_rule_t_pretty_format (u8 *out, vl_api_acl_rule_t * a)
inet_ntop(af, &a->src_prefix.address.un, (void *)src, sizeof(src));
inet_ntop(af, &a->dst_prefix.address.un, (void *)dst, sizeof(dst));
- out = format(out, "%s action %d src %s/%d dst %s/%d proto %d sport %d-%d dport %d-%d tcpflags %d mask %d",
- a->src_prefix.address.af ? "ipv6" : "ipv4", a->is_permit,
- src, a->src_prefix.len,
- dst, a->dst_prefix.len,
- a->proto,
- a->srcport_or_icmptype_first, a->srcport_or_icmptype_last,
- a->dstport_or_icmpcode_first, a->dstport_or_icmpcode_last,
- a->tcp_flags_value, a->tcp_flags_mask);
- return(out);
+ out = format (out,
+ "%s action %d src %s/%d dst %s/%d proto %d sport %d-%d dport "
+ "%d-%d tcpflags %d mask %d",
+ a->src_prefix.address.af ? "ipv6" : "ipv4", a->is_permit, src,
+ a->src_prefix.len, dst, a->dst_prefix.len, a->proto,
+ a->srcport_or_icmptype_first, a->srcport_or_icmptype_last,
+ a->dstport_or_icmpcode_first, a->dstport_or_icmpcode_last,
+ a->tcp_flags_value, a->tcp_flags_mask);
+ return (out);
}
@@ -189,9 +192,10 @@ static void vl_api_acl_details_t_handler
{
int i;
vat_main_t * vam = acl_test_main.vat_main;
- vl_api_acl_details_t_endian(mp);
- u8 *out = 0;
- out = format(0, "acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag);
+ vl_api_acl_details_t_endian (mp, 0 /* from network */);
+ u8 *out = 0;
+ out = format (0, "acl_index: %d, count: %d\n tag {%s}\n",
+ mp->acl_index, mp->count, mp->tag);
for(i=0; i<mp->count; i++) {
out = format(out, " ");
out = vl_api_acl_rule_t_pretty_format(out, &mp->r[i]);
@@ -223,8 +227,9 @@ static void vl_api_macip_acl_details_t_handler
{
int i;
vat_main_t * vam = acl_test_main.vat_main;
- vl_api_macip_acl_details_t_endian(mp);
- u8 *out = format(0,"MACIP acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag);
+ vl_api_macip_acl_details_t_endian (mp, 0 /* from network */);
+ u8 *out = format (0, "MACIP acl_index: %d, count: %d\n tag {%s}\n",
+ mp->acl_index, mp->count, mp->tag);
for(i=0; i<mp->count; i++) {
out = format(out, " ");
out = vl_api_macip_acl_rule_t_pretty_format(out, &mp->r[i]);
diff --git a/src/plugins/acl/sess_mgmt_node.c b/src/plugins/acl/sess_mgmt_node.c
index e049a3ffa85..418baef9b6b 100644
--- a/src/plugins/acl/sess_mgmt_node.c
+++ b/src/plugins/acl/sess_mgmt_node.c
@@ -371,8 +371,9 @@ send_one_worker_interrupt (vlib_main_t * vm, acl_main_t * am,
}
void
-aclp_post_session_change_request (acl_main_t * am, u32 target_thread,
- u32 target_session, u32 request_type)
+aclp_post_session_change_request (acl_main_t *am, u32 target_thread,
+ u32 target_session,
+ acl_fa_sess_req_t request_type)
{
acl_fa_per_worker_data_t *pw_me =
&am->per_worker_data[os_get_thread_index ()];
diff --git a/src/plugins/crypto_native/CMakeLists.txt b/src/plugins/crypto_native/CMakeLists.txt
index 9b6091610d9..5499ed4608a 100644
--- a/src/plugins/crypto_native/CMakeLists.txt
+++ b/src/plugins/crypto_native/CMakeLists.txt
@@ -12,8 +12,8 @@
# limitations under the License.
if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
- list(APPEND VARIANTS "slm\;-march=silvermont")
- list(APPEND VARIANTS "hsw\;-march=haswell")
+ list(APPEND VARIANTS "slm\;-march=silvermont -maes")
+ list(APPEND VARIANTS "hsw\;-march=haswell -maes")
if(compiler_flag_march_skylake_avx512 AND compiler_flag_mprefer_vector_width_256)
list(APPEND VARIANTS "skx\;-march=skylake-avx512 -mprefer-vector-width=256")
endif()
@@ -23,16 +23,15 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
if(compiler_flag_march_alderlake)
list(APPEND VARIANTS "adl\;-march=alderlake -mprefer-vector-width=256")
endif()
- set (COMPILE_FILES aes_cbc.c aes_gcm.c aes_ctr.c)
- set (COMPILE_OPTS -Wall -fno-common -maes)
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
list(APPEND VARIANTS "armv8\;-march=armv8.1-a+crc+crypto")
- set (COMPILE_FILES aes_cbc.c aes_gcm.c aes_ctr.c)
- set (COMPILE_OPTS -Wall -fno-common)
endif()
+set (COMPILE_FILES aes_cbc.c aes_gcm.c aes_ctr.c sha2.c)
+set (COMPILE_OPTS -Wall -fno-common)
+
if (NOT VARIANTS)
return()
endif()
diff --git a/src/plugins/crypto_native/aes_cbc.c b/src/plugins/crypto_native/aes_cbc.c
index c84390c3108..dd7ca3f1cf1 100644
--- a/src/plugins/crypto_native/aes_cbc.c
+++ b/src/plugins/crypto_native/aes_cbc.c
@@ -249,18 +249,30 @@ decrypt:
return n_ops;
}
-#define foreach_aes_cbc_handler_type _(128) _(192) _(256)
-
-#define _(x) \
-static u32 aes_ops_dec_aes_cbc_##x \
-(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
-{ return aes_ops_dec_aes_cbc (vm, ops, n_ops, AES_KEY_##x); } \
-static u32 aes_ops_enc_aes_cbc_##x \
-(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
-{ return aes_ops_enc_aes_cbc (vm, ops, n_ops, AES_KEY_##x); } \
-
-foreach_aes_cbc_handler_type;
-#undef _
+static int
+aes_cbc_cpu_probe ()
+{
+#if defined(__VAES__) && defined(__AVX512F__)
+ if (clib_cpu_supports_vaes () && clib_cpu_supports_avx512f ())
+ return 50;
+#elif defined(__VAES__)
+ if (clib_cpu_supports_vaes ())
+ return 40;
+#elif defined(__AVX512F__)
+ if (clib_cpu_supports_avx512f ())
+ return 30;
+#elif defined(__AVX2__)
+ if (clib_cpu_supports_avx2 ())
+ return 20;
+#elif __AES__
+ if (clib_cpu_supports_aes ())
+ return 10;
+#elif __aarch64__
+ if (clib_cpu_supports_aarch64_aes ())
+ return 10;
+#endif
+ return -1;
+}
static void *
aes_cbc_key_exp_128 (vnet_crypto_key_t *key)
@@ -289,43 +301,39 @@ aes_cbc_key_exp_256 (vnet_crypto_key_t *key)
return kd;
}
-#include <fcntl.h>
-
-clib_error_t *
-#if defined(__VAES__) && defined(__AVX512F__)
-crypto_native_aes_cbc_init_icl (vlib_main_t *vm)
-#elif defined(__VAES__)
-crypto_native_aes_cbc_init_adl (vlib_main_t *vm)
-#elif __AVX512F__
-crypto_native_aes_cbc_init_skx (vlib_main_t * vm)
-#elif __aarch64__
-crypto_native_aes_cbc_init_neon (vlib_main_t * vm)
-#elif __AVX2__
-crypto_native_aes_cbc_init_hsw (vlib_main_t * vm)
-#else
-crypto_native_aes_cbc_init_slm (vlib_main_t * vm)
-#endif
-{
- crypto_native_main_t *cm = &crypto_native_main;
+#define foreach_aes_cbc_handler_type _ (128) _ (192) _ (256)
+
+#define _(x) \
+ static u32 aes_ops_enc_aes_cbc_##x (vlib_main_t *vm, \
+ vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return aes_ops_enc_aes_cbc (vm, ops, n_ops, AES_KEY_##x); \
+ } \
+ \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##x##_cbc_enc) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##x##_CBC_ENC, \
+ .fn = aes_ops_enc_aes_cbc_##x, \
+ .probe = aes_cbc_cpu_probe, \
+ }; \
+ \
+ static u32 aes_ops_dec_aes_cbc_##x (vlib_main_t *vm, \
+ vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return aes_ops_dec_aes_cbc (vm, ops, n_ops, AES_KEY_##x); \
+ } \
+ \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##x##_cbc_dec) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##x##_CBC_DEC, \
+ .fn = aes_ops_dec_aes_cbc_##x, \
+ .probe = aes_cbc_cpu_probe, \
+ }; \
+ \
+ CRYPTO_NATIVE_KEY_HANDLER (aes_##x##_cbc) = { \
+ .alg_id = VNET_CRYPTO_ALG_AES_##x##_CBC, \
+ .key_fn = aes_cbc_key_exp_##x, \
+ .probe = aes_cbc_cpu_probe, \
+ };
-#define _(x) \
- vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_AES_##x##_CBC_ENC, \
- aes_ops_enc_aes_cbc_##x); \
- vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_AES_##x##_CBC_DEC, \
- aes_ops_dec_aes_cbc_##x); \
- cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_CBC] = aes_cbc_key_exp_##x;
- foreach_aes_cbc_handler_type;
+foreach_aes_cbc_handler_type;
#undef _
- return 0;
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/crypto_native/aes_ctr.c b/src/plugins/crypto_native/aes_ctr.c
index 3a219510419..d02a7b69b9d 100644
--- a/src/plugins/crypto_native/aes_ctr.c
+++ b/src/plugins/crypto_native/aes_ctr.c
@@ -81,32 +81,50 @@ aes_ctr_key_exp (vnet_crypto_key_t *key, aes_key_size_t ks)
foreach_aes_ctr_handler_type;
#undef _
-clib_error_t *
+static int
+probe ()
+{
#if defined(__VAES__) && defined(__AVX512F__)
-crypto_native_aes_ctr_init_icl (vlib_main_t *vm)
+ if (clib_cpu_supports_vaes () && clib_cpu_supports_avx512f ())
+ return 50;
#elif defined(__VAES__)
-crypto_native_aes_ctr_init_adl (vlib_main_t *vm)
-#elif __AVX512F__
-crypto_native_aes_ctr_init_skx (vlib_main_t *vm)
-#elif __AVX2__
-crypto_native_aes_ctr_init_hsw (vlib_main_t *vm)
+ if (clib_cpu_supports_vaes ())
+ return 40;
+#elif defined(__AVX512F__)
+ if (clib_cpu_supports_avx512f ())
+ return 30;
+#elif defined(__AVX2__)
+ if (clib_cpu_supports_avx2 ())
+ return 20;
+#elif __AES__
+ if (clib_cpu_supports_aes ())
+ return 10;
#elif __aarch64__
-crypto_native_aes_ctr_init_neon (vlib_main_t *vm)
-#else
-crypto_native_aes_ctr_init_slm (vlib_main_t *vm)
+ if (clib_cpu_supports_aarch64_aes ())
+ return 10;
#endif
-{
- crypto_native_main_t *cm = &crypto_native_main;
+ return -1;
+}
-#define _(x) \
- vnet_crypto_register_ops_handlers ( \
- vm, cm->crypto_engine_index, VNET_CRYPTO_OP_AES_##x##_CTR_ENC, \
- aes_ops_aes_ctr_##x, aes_ops_aes_ctr_##x##_chained); \
- vnet_crypto_register_ops_handlers ( \
- vm, cm->crypto_engine_index, VNET_CRYPTO_OP_AES_##x##_CTR_DEC, \
- aes_ops_aes_ctr_##x, aes_ops_aes_ctr_##x##_chained); \
- cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_CTR] = aes_ctr_key_exp_##x;
- foreach_aes_ctr_handler_type;
+#define _(b) \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##b##_ctr_enc) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##b##_CTR_ENC, \
+ .fn = aes_ops_aes_ctr_##b, \
+ .cfn = aes_ops_aes_ctr_##b##_chained, \
+ .probe = probe, \
+ }; \
+ \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##b##_ctr_dec) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##b##_CTR_DEC, \
+ .fn = aes_ops_aes_ctr_##b, \
+ .cfn = aes_ops_aes_ctr_##b##_chained, \
+ .probe = probe, \
+ }; \
+ CRYPTO_NATIVE_KEY_HANDLER (aes_##b##_ctr) = { \
+ .alg_id = VNET_CRYPTO_ALG_AES_##b##_CTR, \
+ .key_fn = aes_ctr_key_exp_##b, \
+ .probe = probe, \
+ };
+
+_ (128) _ (192) _ (256)
#undef _
- return 0;
-}
diff --git a/src/plugins/crypto_native/aes_gcm.c b/src/plugins/crypto_native/aes_gcm.c
index 6589d411975..220788d4e97 100644
--- a/src/plugins/crypto_native/aes_gcm.c
+++ b/src/plugins/crypto_native/aes_gcm.c
@@ -118,40 +118,49 @@ aes_gcm_key_exp (vnet_crypto_key_t *key, aes_key_size_t ks)
foreach_aes_gcm_handler_type;
#undef _
-clib_error_t *
+static int
+probe ()
+{
#if defined(__VAES__) && defined(__AVX512F__)
-crypto_native_aes_gcm_init_icl (vlib_main_t *vm)
+ if (clib_cpu_supports_vpclmulqdq () && clib_cpu_supports_vaes () &&
+ clib_cpu_supports_avx512f ())
+ return 50;
#elif defined(__VAES__)
-crypto_native_aes_gcm_init_adl (vlib_main_t *vm)
-#elif __AVX512F__
-crypto_native_aes_gcm_init_skx (vlib_main_t *vm)
-#elif __AVX2__
-crypto_native_aes_gcm_init_hsw (vlib_main_t *vm)
+ if (clib_cpu_supports_vpclmulqdq () && clib_cpu_supports_vaes ())
+ return 40;
+#elif defined(__AVX512F__)
+ if (clib_cpu_supports_pclmulqdq () && clib_cpu_supports_avx512f ())
+ return 30;
+#elif defined(__AVX2__)
+ if (clib_cpu_supports_pclmulqdq () && clib_cpu_supports_avx2 ())
+ return 20;
+#elif __AES__
+ if (clib_cpu_supports_pclmulqdq () && clib_cpu_supports_aes ())
+ return 10;
#elif __aarch64__
-crypto_native_aes_gcm_init_neon (vlib_main_t *vm)
-#else
-crypto_native_aes_gcm_init_slm (vlib_main_t *vm)
+ if (clib_cpu_supports_aarch64_aes ())
+ return 10;
#endif
-{
- crypto_native_main_t *cm = &crypto_native_main;
-
-#define _(x) \
- vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_AES_##x##_GCM_ENC, \
- aes_ops_enc_aes_gcm_##x); \
- vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_AES_##x##_GCM_DEC, \
- aes_ops_dec_aes_gcm_##x); \
- cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_GCM] = aes_gcm_key_exp_##x;
- foreach_aes_gcm_handler_type;
-#undef _
- return 0;
+ return -1;
}
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+#define _(b) \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##b##_gcm_enc) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##b##_GCM_ENC, \
+ .fn = aes_ops_enc_aes_gcm_##b, \
+ .probe = probe, \
+ }; \
+ \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##b##_gcm_dec) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##b##_GCM_DEC, \
+ .fn = aes_ops_dec_aes_gcm_##b, \
+ .probe = probe, \
+ }; \
+ CRYPTO_NATIVE_KEY_HANDLER (aes_##b##_gcm) = { \
+ .alg_id = VNET_CRYPTO_ALG_AES_##b##_GCM, \
+ .key_fn = aes_gcm_key_exp_##b, \
+ .probe = probe, \
+ };
+
+_ (128) _ (192) _ (256)
+#undef _
diff --git a/src/plugins/crypto_native/crypto_native.h b/src/plugins/crypto_native/crypto_native.h
index c15b8cbd1da..3d18e8cabd0 100644
--- a/src/plugins/crypto_native/crypto_native.h
+++ b/src/plugins/crypto_native/crypto_native.h
@@ -19,33 +19,66 @@
#define __crypto_native_h__
typedef void *(crypto_native_key_fn_t) (vnet_crypto_key_t * key);
+typedef int (crypto_native_variant_probe_t) ();
+
+typedef struct crypto_native_op_handler
+{
+ struct crypto_native_op_handler *next;
+ vnet_crypto_op_id_t op_id;
+ vnet_crypto_ops_handler_t *fn;
+ vnet_crypto_chained_ops_handler_t *cfn;
+ crypto_native_variant_probe_t *probe;
+ int priority;
+} crypto_native_op_handler_t;
+
+typedef struct crypto_native_key_handler
+{
+ struct crypto_native_key_handler *next;
+ vnet_crypto_alg_t alg_id;
+ crypto_native_key_fn_t *key_fn;
+ crypto_native_variant_probe_t *probe;
+ int priority;
+} crypto_native_key_handler_t;
typedef struct
{
u32 crypto_engine_index;
crypto_native_key_fn_t *key_fn[VNET_CRYPTO_N_ALGS];
void **key_data;
+ crypto_native_op_handler_t *op_handlers;
+ crypto_native_key_handler_t *key_handlers;
} crypto_native_main_t;
extern crypto_native_main_t crypto_native_main;
-#define foreach_crypto_native_march_variant \
- _ (slm) _ (hsw) _ (skx) _ (icl) _ (adl) _ (neon)
-
-#define _(v) \
- clib_error_t __clib_weak *crypto_native_aes_cbc_init_##v (vlib_main_t *vm); \
- clib_error_t __clib_weak *crypto_native_aes_ctr_init_##v (vlib_main_t *vm); \
- clib_error_t __clib_weak *crypto_native_aes_gcm_init_##v (vlib_main_t *vm);
-
-foreach_crypto_native_march_variant;
-#undef _
+#define CRYPTO_NATIVE_OP_HANDLER(x) \
+ static crypto_native_op_handler_t __crypto_native_op_handler_##x; \
+ static void __clib_constructor __crypto_native_op_handler_cb_##x (void) \
+ { \
+ crypto_native_main_t *cm = &crypto_native_main; \
+ int priority = __crypto_native_op_handler_##x.probe (); \
+ if (priority >= 0) \
+ { \
+ __crypto_native_op_handler_##x.priority = priority; \
+ __crypto_native_op_handler_##x.next = cm->op_handlers; \
+ cm->op_handlers = &__crypto_native_op_handler_##x; \
+ } \
+ } \
+ static crypto_native_op_handler_t __crypto_native_op_handler_##x
+#define CRYPTO_NATIVE_KEY_HANDLER(x) \
+ static crypto_native_key_handler_t __crypto_native_key_handler_##x; \
+ static void __clib_constructor __crypto_native_key_handler_cb_##x (void) \
+ { \
+ crypto_native_main_t *cm = &crypto_native_main; \
+ int priority = __crypto_native_key_handler_##x.probe (); \
+ if (priority >= 0) \
+ { \
+ __crypto_native_key_handler_##x.priority = priority; \
+ __crypto_native_key_handler_##x.next = cm->key_handlers; \
+ cm->key_handlers = &__crypto_native_key_handler_##x; \
+ } \
+ } \
+ static crypto_native_key_handler_t __crypto_native_key_handler_##x
#endif /* __crypto_native_h__ */
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/crypto_native/main.c b/src/plugins/crypto_native/main.c
index 8a59be319b9..2bc0d98f196 100644
--- a/src/plugins/crypto_native/main.c
+++ b/src/plugins/crypto_native/main.c
@@ -63,95 +63,52 @@ clib_error_t *
crypto_native_init (vlib_main_t * vm)
{
crypto_native_main_t *cm = &crypto_native_main;
- clib_error_t *error = 0;
- if (clib_cpu_supports_x86_aes () == 0 &&
- clib_cpu_supports_aarch64_aes () == 0)
+ if (cm->op_handlers == 0)
return 0;
cm->crypto_engine_index =
vnet_crypto_register_engine (vm, "native", 100,
"Native ISA Optimized Crypto");
- if (0);
-#if __x86_64__
- else if (crypto_native_aes_cbc_init_icl && clib_cpu_supports_vaes () &&
- clib_cpu_supports_avx512f ())
- error = crypto_native_aes_cbc_init_icl (vm);
- else if (crypto_native_aes_cbc_init_adl && clib_cpu_supports_vaes ())
- error = crypto_native_aes_cbc_init_adl (vm);
- else if (crypto_native_aes_cbc_init_skx && clib_cpu_supports_avx512f ())
- error = crypto_native_aes_cbc_init_skx (vm);
- else if (crypto_native_aes_cbc_init_hsw && clib_cpu_supports_avx2 ())
- error = crypto_native_aes_cbc_init_hsw (vm);
- else if (crypto_native_aes_cbc_init_slm)
- error = crypto_native_aes_cbc_init_slm (vm);
-#endif
-#if __aarch64__
- else if (crypto_native_aes_cbc_init_neon)
- error = crypto_native_aes_cbc_init_neon (vm);
-#endif
- else
- error = clib_error_return (0, "No AES CBC implemenation available");
-
- if (error)
- return error;
-
- if (0)
- ;
-#if __x86_64__
- else if (crypto_native_aes_ctr_init_icl && clib_cpu_supports_vaes () &&
- clib_cpu_supports_avx512f ())
- error = crypto_native_aes_ctr_init_icl (vm);
- else if (crypto_native_aes_ctr_init_adl && clib_cpu_supports_vaes ())
- error = crypto_native_aes_ctr_init_adl (vm);
- else if (crypto_native_aes_ctr_init_skx && clib_cpu_supports_avx512f ())
- error = crypto_native_aes_ctr_init_skx (vm);
- else if (crypto_native_aes_ctr_init_hsw && clib_cpu_supports_avx2 ())
- error = crypto_native_aes_ctr_init_hsw (vm);
- else if (crypto_native_aes_ctr_init_slm)
- error = crypto_native_aes_ctr_init_slm (vm);
-#endif
-#if __aarch64__
- else if (crypto_native_aes_ctr_init_neon)
- error = crypto_native_aes_ctr_init_neon (vm);
-#endif
- else
- error = clib_error_return (0, "No AES CTR implemenation available");
-
- if (error)
- return error;
-
-#if __x86_64__
- if (clib_cpu_supports_pclmulqdq ())
+ crypto_native_op_handler_t *oh = cm->op_handlers;
+ crypto_native_key_handler_t *kh = cm->key_handlers;
+ crypto_native_op_handler_t **best_by_op_id = 0;
+ crypto_native_key_handler_t **best_by_alg_id = 0;
+
+ while (oh)
{
- if (crypto_native_aes_gcm_init_icl && clib_cpu_supports_vaes () &&
- clib_cpu_supports_avx512f ())
- error = crypto_native_aes_gcm_init_icl (vm);
- else if (crypto_native_aes_gcm_init_adl && clib_cpu_supports_vaes ())
- error = crypto_native_aes_gcm_init_adl (vm);
- else if (crypto_native_aes_gcm_init_skx && clib_cpu_supports_avx512f ())
- error = crypto_native_aes_gcm_init_skx (vm);
- else if (crypto_native_aes_gcm_init_hsw && clib_cpu_supports_avx2 ())
- error = crypto_native_aes_gcm_init_hsw (vm);
- else if (crypto_native_aes_gcm_init_slm)
- error = crypto_native_aes_gcm_init_slm (vm);
- else
- error = clib_error_return (0, "No AES GCM implemenation available");
-
- if (error)
- return error;
+ vec_validate (best_by_op_id, oh->op_id);
+
+ if (best_by_op_id[oh->op_id] == 0 ||
+ best_by_op_id[oh->op_id]->priority < oh->priority)
+ best_by_op_id[oh->op_id] = oh;
+
+ oh = oh->next;
}
-#endif
-#if __aarch64__
- if (crypto_native_aes_gcm_init_neon)
- error = crypto_native_aes_gcm_init_neon (vm);
- else
- error = clib_error_return (0, "No AES GCM implemenation available");
-
- if (error)
- return error;
-#endif
+
+ while (kh)
+ {
+ vec_validate (best_by_alg_id, kh->alg_id);
+
+ if (best_by_alg_id[kh->alg_id] == 0 ||
+ best_by_alg_id[kh->alg_id]->priority < kh->priority)
+ best_by_alg_id[kh->alg_id] = kh;
+
+ kh = kh->next;
+ }
+
+ vec_foreach_pointer (oh, best_by_op_id)
+ if (oh)
+ vnet_crypto_register_ops_handlers (vm, cm->crypto_engine_index,
+ oh->op_id, oh->fn, oh->cfn);
+
+ vec_foreach_pointer (kh, best_by_alg_id)
+ if (kh)
+ cm->key_fn[kh->alg_id] = kh->key_fn;
+
+ vec_free (best_by_op_id);
+ vec_free (best_by_alg_id);
vnet_crypto_register_key_handler (vm, cm->crypto_engine_index,
crypto_native_key_handler);
diff --git a/src/plugins/crypto_native/sha2.c b/src/plugins/crypto_native/sha2.c
new file mode 100644
index 00000000000..459ce6d8e79
--- /dev/null
+++ b/src/plugins/crypto_native/sha2.c
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/crypto/crypto.h>
+#include <crypto_native/crypto_native.h>
+#include <vppinfra/crypto/sha2.h>
+
+static_always_inline u32
+crypto_native_ops_hash_sha2 (vlib_main_t *vm, vnet_crypto_op_t *ops[],
+ u32 n_ops, vnet_crypto_op_chunk_t *chunks,
+ clib_sha2_type_t type, int maybe_chained)
+{
+ vnet_crypto_op_t *op = ops[0];
+ clib_sha2_ctx_t ctx;
+ u32 n_left = n_ops;
+
+next:
+ if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
+ {
+ vnet_crypto_op_chunk_t *chp = chunks + op->chunk_index;
+ clib_sha2_init (&ctx, type);
+ for (int j = 0; j < op->n_chunks; j++, chp++)
+ clib_sha2_update (&ctx, chp->src, chp->len);
+ clib_sha2_final (&ctx, op->digest);
+ }
+ else
+ clib_sha2 (type, op->src, op->len, op->digest);
+
+ op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
+
+ if (--n_left)
+ {
+ op += 1;
+ goto next;
+ }
+
+ return n_ops;
+}
+
+static_always_inline u32
+crypto_native_ops_hmac_sha2 (vlib_main_t *vm, vnet_crypto_op_t *ops[],
+ u32 n_ops, vnet_crypto_op_chunk_t *chunks,
+ clib_sha2_type_t type)
+{
+ crypto_native_main_t *cm = &crypto_native_main;
+ vnet_crypto_op_t *op = ops[0];
+ u32 n_left = n_ops;
+ clib_sha2_hmac_ctx_t ctx;
+ u8 buffer[64];
+ u32 sz, n_fail = 0;
+
+ for (; n_left; n_left--, op++)
+ {
+ clib_sha2_hmac_init (
+ &ctx, type, (clib_sha2_hmac_key_data_t *) cm->key_data[op->key_index]);
+ if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
+ {
+ vnet_crypto_op_chunk_t *chp = chunks + op->chunk_index;
+ for (int j = 0; j < op->n_chunks; j++, chp++)
+ clib_sha2_hmac_update (&ctx, chp->src, chp->len);
+ }
+ else
+ clib_sha2_hmac_update (&ctx, op->src, op->len);
+
+ clib_sha2_hmac_final (&ctx, buffer);
+
+ if (op->digest_len)
+ {
+ sz = op->digest_len;
+ if (op->flags & VNET_CRYPTO_OP_FLAG_HMAC_CHECK)
+ {
+ if ((memcmp (op->digest, buffer, sz)))
+ {
+ n_fail++;
+ op->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
+ continue;
+ }
+ }
+ else
+ clib_memcpy_fast (op->digest, buffer, sz);
+ }
+ else
+ {
+ sz = clib_sha2_variants[type].digest_size;
+ if (op->flags & VNET_CRYPTO_OP_FLAG_HMAC_CHECK)
+ {
+ if ((memcmp (op->digest, buffer, sz)))
+ {
+ n_fail++;
+ op->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
+ continue;
+ }
+ }
+ else
+ clib_memcpy_fast (op->digest, buffer, sz);
+ }
+
+ op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
+ }
+
+ return n_ops - n_fail;
+}
+
+static void *
+sha2_key_add (vnet_crypto_key_t *key, clib_sha2_type_t type)
+{
+ clib_sha2_hmac_key_data_t *kd;
+
+ kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);
+ clib_sha2_hmac_key_data (type, key->data, vec_len (key->data), kd);
+
+ return kd;
+}
+
+static int
+probe ()
+{
+#if defined(__SHA__) && defined(__x86_64__)
+ if (clib_cpu_supports_sha ())
+ return 50;
+#elif defined(__ARM_FEATURE_SHA2)
+ if (clib_cpu_supports_sha2 ())
+ return 10;
+#endif
+ return -1;
+}
+
+#define _(b) \
+ static u32 crypto_native_ops_hash_sha##b ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return crypto_native_ops_hash_sha2 (vm, ops, n_ops, 0, CLIB_SHA2_##b, 0); \
+ } \
+ \
+ static u32 crypto_native_ops_chained_hash_sha##b ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \
+ u32 n_ops) \
+ { \
+ return crypto_native_ops_hash_sha2 (vm, ops, n_ops, chunks, \
+ CLIB_SHA2_##b, 1); \
+ } \
+ \
+ static u32 crypto_native_ops_hmac_sha##b ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return crypto_native_ops_hmac_sha2 (vm, ops, n_ops, 0, CLIB_SHA2_##b); \
+ } \
+ \
+ static u32 crypto_native_ops_chained_hmac_sha##b ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \
+ u32 n_ops) \
+ { \
+ return crypto_native_ops_hmac_sha2 (vm, ops, n_ops, chunks, \
+ CLIB_SHA2_##b); \
+ } \
+ \
+ static void *sha2_##b##_key_add (vnet_crypto_key_t *k) \
+ { \
+ return sha2_key_add (k, CLIB_SHA2_##b); \
+ } \
+ \
+ CRYPTO_NATIVE_OP_HANDLER (crypto_native_hash_sha##b) = { \
+ .op_id = VNET_CRYPTO_OP_SHA##b##_HASH, \
+ .fn = crypto_native_ops_hash_sha##b, \
+ .cfn = crypto_native_ops_chained_hash_sha##b, \
+ .probe = probe, \
+ }; \
+ CRYPTO_NATIVE_OP_HANDLER (crypto_native_hmac_sha##b) = { \
+ .op_id = VNET_CRYPTO_OP_SHA##b##_HMAC, \
+ .fn = crypto_native_ops_hmac_sha##b, \
+ .cfn = crypto_native_ops_chained_hmac_sha##b, \
+ .probe = probe, \
+ }; \
+ CRYPTO_NATIVE_KEY_HANDLER (crypto_native_hmac_sha##b) = { \
+ .alg_id = VNET_CRYPTO_ALG_HMAC_SHA##b, \
+ .key_fn = sha2_##b##_key_add, \
+ .probe = probe, \
+ };
+
+_ (224)
+_ (256)
+
+#undef _
diff --git a/src/plugins/dev_iavf/port.c b/src/plugins/dev_iavf/port.c
index 982436d9b45..90e81e960c4 100644
--- a/src/plugins/dev_iavf/port.c
+++ b/src/plugins/dev_iavf/port.c
@@ -42,29 +42,35 @@ iavf_port_vlan_strip_disable (vlib_main_t *vm, vnet_dev_port_t *port)
vnet_dev_t *dev = port->dev;
iavf_port_t *ap = vnet_dev_get_port_data (port);
virtchnl_vlan_caps_t vc;
- vnet_dev_rv_t rv;
+ vnet_dev_rv_t rv = VNET_DEV_ERR_NOT_SUPPORTED;
u32 outer, inner;
const u32 mask = VIRTCHNL_VLAN_ETHERTYPE_8100;
- if ((ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) == 0)
- return iavf_vc_op_disable_vlan_stripping (vm, dev);
+ if (ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2)
+ {
+ if ((rv = iavf_vc_op_get_offload_vlan_v2_caps (vm, dev, &vc)))
+ return rv;
- if ((rv = iavf_vc_op_get_offload_vlan_v2_caps (vm, dev, &vc)))
- return rv;
+ outer = vc.offloads.stripping_support.outer;
+ inner = vc.offloads.stripping_support.inner;
- outer = vc.offloads.stripping_support.outer;
- inner = vc.offloads.stripping_support.inner;
+ outer = outer & VIRTCHNL_VLAN_TOGGLE ? outer & mask : 0;
+ inner = inner & VIRTCHNL_VLAN_TOGGLE ? inner & mask : 0;
- outer = outer & VIRTCHNL_VLAN_TOGGLE ? outer & mask : 0;
- inner = inner & VIRTCHNL_VLAN_TOGGLE ? inner & mask : 0;
+ virtchnl_vlan_setting_t vs = {
+ .vport_id = ap->vsi_id,
+ .outer_ethertype_setting = outer,
+ .inner_ethertype_setting = inner,
+ };
- virtchnl_vlan_setting_t vs = {
- .vport_id = ap->vsi_id,
- .outer_ethertype_setting = outer,
- .inner_ethertype_setting = inner,
- };
+ if ((rv = iavf_vc_op_disable_vlan_stripping_v2 (vm, dev, &vs)))
+ return rv;
+ }
- return iavf_vc_op_disable_vlan_stripping_v2 (vm, dev, &vs);
+ if (ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
+ return iavf_vc_op_disable_vlan_stripping (vm, dev);
+
+ return rv;
}
vnet_dev_rv_t
@@ -275,7 +281,12 @@ iavf_port_init (vlib_main_t *vm, vnet_dev_port_t *port)
u64_bit_set (&ap->intr_mode_per_rxq_bitmap, q->queue_id, 1);
if ((rv = iavf_port_vlan_strip_disable (vm, port)))
- return rv;
+ {
+ if (rv == VNET_DEV_ERR_NOT_SUPPORTED)
+ log_warn (port->dev, "device doesn't support vlan stripping");
+ else
+ return rv;
+ }
if ((rv = iavf_port_init_rss (vm, port)))
return rv;
diff --git a/src/plugins/dev_octeon/init.c b/src/plugins/dev_octeon/init.c
index 47be8a8f9a4..97a11e0d0d7 100644
--- a/src/plugins/dev_octeon/init.c
+++ b/src/plugins/dev_octeon/init.c
@@ -51,7 +51,9 @@ static struct
}
_ (0xa063, RVU_PF, "Marvell Octeon Resource Virtualization Unit PF"),
- _ (0xa0f8, RVU_VF, "Marvell Octeon Resource Virtualization Unit VF"),
+ _ (0xa064, RVU_VF, "Marvell Octeon Resource Virtualization Unit VF"),
+ _ (0xa0f8, LBK_VF, "Marvell Octeon Loopback Unit VF"),
+ _ (0xa0f7, SDP_VF, "Marvell Octeon System DPI Packet Interface Unit VF"),
_ (0xa0f3, CPT_VF, "Marvell Octeon Cryptographic Accelerator Unit VF"),
#undef _
};
@@ -113,7 +115,7 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev)
if ((rrv = roc_nix_dev_init (cd->nix)))
return cnx_return_roc_err (dev, rrv, "roc_nix_dev_init");
- if (roc_nix_npc_mac_addr_get (cd->nix, mac_addr))
+ if ((rrv = roc_nix_npc_mac_addr_get (cd->nix, mac_addr)))
return cnx_return_roc_err (dev, rrv, "roc_nix_npc_mac_addr_get");
vnet_dev_port_add_args_t port_add_args = {
@@ -239,12 +241,19 @@ oct_init (vlib_main_t *vm, vnet_dev_t *dev)
strncpy ((char *) cd->plt_pci_dev.name, dev->device_id,
sizeof (cd->plt_pci_dev.name) - 1);
- if (cd->type == OCT_DEVICE_TYPE_RVU_PF || cd->type == OCT_DEVICE_TYPE_RVU_VF)
- return oct_init_nix (vm, dev);
- else if (cd->type == OCT_DEVICE_TYPE_CPT_VF)
- return oct_init_cpt (vm, dev);
- else
- return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+ switch (cd->type)
+ {
+ case OCT_DEVICE_TYPE_RVU_PF:
+ case OCT_DEVICE_TYPE_RVU_VF:
+ case OCT_DEVICE_TYPE_SDP_VF:
+ return oct_init_nix (vm, dev);
+
+ case OCT_DEVICE_TYPE_CPT_VF:
+ return oct_init_cpt (vm, dev);
+
+ default:
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+ }
return 0;
}
diff --git a/src/plugins/dev_octeon/octeon.h b/src/plugins/dev_octeon/octeon.h
index 72d2d56a437..e43cde0a35f 100644
--- a/src/plugins/dev_octeon/octeon.h
+++ b/src/plugins/dev_octeon/octeon.h
@@ -22,6 +22,8 @@ typedef enum
OCT_DEVICE_TYPE_UNKNOWN = 0,
OCT_DEVICE_TYPE_RVU_PF,
OCT_DEVICE_TYPE_RVU_VF,
+ OCT_DEVICE_TYPE_LBK_VF,
+ OCT_DEVICE_TYPE_SDP_VF,
OCT_DEVICE_TYPE_CPT_VF,
} __clib_packed oct_device_type_t;
@@ -161,7 +163,8 @@ vnet_dev_rv_t oct_flow_query (vlib_main_t *, vnet_dev_port_t *, u32, uword,
_ (AURA_BATCH_ALLOC_ISSUE_FAIL, aura_batch_alloc_issue_fail, ERROR, \
"aura batch alloc issue failed") \
_ (AURA_BATCH_ALLOC_NOT_READY, aura_batch_alloc_not_ready, ERROR, \
- "aura batch alloc not ready")
+ "aura batch alloc not ready") \
+ _ (MTU_EXCEEDED, mtu_exceeded, ERROR, "mtu exceeded")
typedef enum
{
diff --git a/src/plugins/dev_octeon/port.c b/src/plugins/dev_octeon/port.c
index a82e48004b5..98a4c28b37d 100644
--- a/src/plugins/dev_octeon/port.c
+++ b/src/plugins/dev_octeon/port.c
@@ -379,6 +379,82 @@ oct_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
}
vnet_dev_rv_t
+oct_validate_config_promisc_mode (vnet_dev_port_t *port, int enable)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+
+ if (roc_nix_is_vf_or_sdp (nix))
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+oct_op_config_promisc_mode (vlib_main_t *vm, vnet_dev_port_t *port, int enable)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ int rv;
+
+ rv = roc_nix_npc_promisc_ena_dis (nix, enable);
+ if (rv)
+ {
+ return oct_roc_err (dev, rv, "roc_nix_npc_promisc_ena_dis failed");
+ }
+
+ rv = roc_nix_mac_promisc_mode_enable (nix, enable);
+ if (rv)
+ {
+ return oct_roc_err (dev, rv,
+ "roc_nix_mac_promisc_mode_enable(%s) failed",
+ enable ? "true" : "false");
+ }
+
+ return VNET_DEV_OK;
+}
+
+static vnet_dev_rv_t
+oct_port_add_del_eth_addr (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_hw_addr_t *addr, int is_add,
+ int is_primary)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ i32 rrv;
+
+ if (is_primary)
+ {
+ if (is_add)
+ {
+ /* Update mac address at NPC */
+ rrv = roc_nix_npc_mac_addr_set (nix, (u8 *) addr);
+ if (rrv)
+ rv = oct_roc_err (dev, rrv, "roc_nix_npc_mac_addr_set() failed");
+
+ /* Update mac address at CGX for PFs only */
+ if (!roc_nix_is_vf_or_sdp (nix))
+ {
+ rrv = roc_nix_mac_addr_set (nix, (u8 *) addr);
+ if (rrv)
+ {
+ /* Rollback to previous mac address */
+ roc_nix_npc_mac_addr_set (nix,
+ (u8 *) &port->primary_hw_addr);
+ rv = oct_roc_err (dev, rrv, "roc_nix_mac_addr_set() failed");
+ }
+ }
+ }
+ }
+ return rv;
+}
+
+vnet_dev_rv_t
oct_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port,
vnet_dev_port_cfg_change_req_t *req)
{
@@ -392,6 +468,8 @@ oct_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port,
break;
case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ rv = oct_validate_config_promisc_mode (port, req->promisc);
+ break;
case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
@@ -421,9 +499,13 @@ oct_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
switch (req->type)
{
case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ rv = oct_op_config_promisc_mode (vm, port, req->promisc);
break;
case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ rv = oct_port_add_del_eth_addr (vm, port, &req->addr,
+ /* is_add */ 1,
+ /* is_primary */ 1);
break;
case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
diff --git a/src/plugins/dev_octeon/tx_node.c b/src/plugins/dev_octeon/tx_node.c
index 0dbf8759d35..a2e4b07de8a 100644
--- a/src/plugins/dev_octeon/tx_node.c
+++ b/src/plugins/dev_octeon/tx_node.c
@@ -22,8 +22,11 @@ typedef struct
u32 n_tx_bytes;
u32 n_drop;
vlib_buffer_t *drop[VLIB_FRAME_SIZE];
+ u32 n_exd_mtu;
+ vlib_buffer_t *exd_mtu[VLIB_FRAME_SIZE];
u32 batch_alloc_not_ready;
u32 batch_alloc_issue_fail;
+ int max_pkt_len;
u16 lmt_id;
u64 lmt_ioaddr;
lmt_line_t *lmt_lines;
@@ -133,7 +136,8 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq)
static_always_inline u8
oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b,
- lmt_line_t *line, u32 flags, int simple, int trace)
+ lmt_line_t *line, u32 flags, int simple, int trace, u32 *n,
+ u8 *dpl)
{
u8 n_dwords = 2;
u32 total_len = 0;
@@ -148,6 +152,12 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b,
},
};
+ if (PREDICT_FALSE (vlib_buffer_length_in_chain (vm, b) > ctx->max_pkt_len))
+ {
+ ctx->exd_mtu[ctx->n_exd_mtu++] = b;
+ return 0;
+ }
+
if (!simple && flags & VLIB_BUFFER_NEXT_PRESENT)
{
u8 n_tail_segs = 0;
@@ -159,7 +169,7 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b,
tail_segs[n_tail_segs++] = t;
if (n_tail_segs > 5)
{
- ctx->drop[ctx->n_drop++] = t;
+ ctx->drop[ctx->n_drop++] = b;
return 0;
}
}
@@ -231,6 +241,9 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b,
for (u32 i = 0; i < n_dwords; i++)
line->dwords[i] = d.as_u128[i];
+ *dpl = n_dwords;
+ *n = *n + 1;
+
return n_dwords;
}
@@ -240,7 +253,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq,
{
u8 dwords_per_line[16], *dpl = dwords_per_line;
u64 lmt_arg, ioaddr, n_lines;
- u32 n_left, or_flags_16 = 0;
+ u32 n_left, or_flags_16 = 0, n = 0;
const u32 not_simple_flags =
VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD;
lmt_line_t *l = ctx->lmt_lines;
@@ -248,7 +261,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq,
/* Data Store Memory Barrier - outer shareable domain */
asm volatile("dmb oshst" ::: "memory");
- for (n_left = n_pkts; n_left >= 8; n_left -= 8, b += 8, l += 8)
+ for (n_left = n_pkts; n_left >= 8; n_left -= 8, b += 8)
{
u32 f0, f1, f2, f3, f4, f5, f6, f7, or_f = 0;
vlib_prefetch_buffer_header (b[8], LOAD);
@@ -269,48 +282,54 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq,
if ((or_f & not_simple_flags) == 0)
{
int simple = 1;
- oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace);
- oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[1], l + n, f1, simple, trace, &n, &dpl[n]);
vlib_prefetch_buffer_header (b[13], LOAD);
- oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace);
- oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[2], l + n, f2, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[3], l + n, f3, simple, trace, &n, &dpl[n]);
vlib_prefetch_buffer_header (b[14], LOAD);
- oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace);
- oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[4], l + n, f4, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[5], l + n, f5, simple, trace, &n, &dpl[n]);
vlib_prefetch_buffer_header (b[15], LOAD);
- oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace);
- oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace);
- dpl[0] = dpl[1] = dpl[2] = dpl[3] = 2;
- dpl[4] = dpl[5] = dpl[6] = dpl[7] = 2;
+ oct_tx_enq1 (vm, ctx, b[6], l + n, f6, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[7], l + n, f7, simple, trace, &n, &dpl[n]);
}
else
{
int simple = 0;
- dpl[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace);
- dpl[1] = oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[1], l + n, f1, simple, trace, &n, &dpl[n]);
vlib_prefetch_buffer_header (b[13], LOAD);
- dpl[2] = oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace);
- dpl[3] = oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[2], l + n, f2, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[3], l + n, f3, simple, trace, &n, &dpl[n]);
vlib_prefetch_buffer_header (b[14], LOAD);
- dpl[4] = oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace);
- dpl[5] = oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[4], l + n, f4, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[5], l + n, f5, simple, trace, &n, &dpl[n]);
vlib_prefetch_buffer_header (b[15], LOAD);
- dpl[6] = oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace);
- dpl[7] = oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[6], l + n, f6, simple, trace, &n, &dpl[n]);
+ oct_tx_enq1 (vm, ctx, b[7], l + n, f7, simple, trace, &n, &dpl[n]);
}
- dpl += 8;
+ dpl += n;
+ l += n;
+ n = 0;
}
- for (; n_left > 0; n_left -= 1, b += 1, l += 1)
+ for (; n_left > 0; n_left -= 1, b += 1)
{
u32 f0 = b[0]->flags;
- dpl++[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, 0, trace);
+ oct_tx_enq1 (vm, ctx, b[0], l, f0, 0, trace, &n, &dpl[n]);
or_flags_16 |= f0;
+ dpl += n;
+ l += n;
+ n = 0;
}
lmt_arg = ctx->lmt_id;
ioaddr = ctx->lmt_ioaddr;
- n_lines = n_pkts;
+ n_lines = dpl - dwords_per_line;
+
+ if (PREDICT_FALSE (!n_lines))
+ return n_pkts;
if (PREDICT_FALSE (or_flags_16 & VLIB_BUFFER_NEXT_PRESENT))
{
@@ -350,6 +369,8 @@ VNET_DEV_NODE_FN (oct_tx_node)
vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node);
vnet_dev_tx_queue_t *txq = rt->tx_queue;
oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_t *dev = txq->port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
u32 node_index = node->node_index;
u32 *from = vlib_frame_vector_args (frame);
u32 n, n_enq, n_left, n_pkts = frame->n_vectors;
@@ -363,6 +384,7 @@ VNET_DEV_NODE_FN (oct_tx_node)
.sq = ctq->sq.qid,
.sizem1 = 1,
},
+ .max_pkt_len = roc_nix_max_pkt_len (cd->nix),
.lmt_id = lmt_id,
.lmt_ioaddr = ctq->io_addr,
.lmt_lines = ctq->lmt_addr + (lmt_id << ROC_LMT_LINE_SIZE_LOG2),
@@ -396,7 +418,7 @@ VNET_DEV_NODE_FN (oct_tx_node)
n += oct_tx_enq16 (vm, &ctx, txq, b, n_left, /* trace */ 0);
}
- ctq->n_enq = n_enq + n;
+ ctq->n_enq = n_enq + n - ctx.n_drop - ctx.n_exd_mtu;
if (n < n_pkts)
{
@@ -411,6 +433,10 @@ VNET_DEV_NODE_FN (oct_tx_node)
vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_CHAIN_TOO_LONG,
ctx.n_drop);
+ if (PREDICT_FALSE (ctx.n_exd_mtu))
+ vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_MTU_EXCEEDED,
+ ctx.n_exd_mtu);
+
if (ctx.batch_alloc_not_ready)
vlib_error_count (vm, node_index,
OCT_TX_NODE_CTR_AURA_BATCH_ALLOC_NOT_READY,
@@ -431,5 +457,13 @@ VNET_DEV_NODE_FN (oct_tx_node)
n_pkts -= ctx.n_drop;
}
+ if (PREDICT_FALSE (ctx.n_exd_mtu))
+ {
+ u32 bi[VLIB_FRAME_SIZE];
+ vlib_get_buffer_indices (vm, ctx.exd_mtu, bi, ctx.n_exd_mtu);
+ vlib_buffer_free (vm, bi, ctx.n_exd_mtu);
+ n_pkts -= ctx.n_exd_mtu;
+ }
+
return n_pkts;
}
diff --git a/src/plugins/dpdk/CMakeLists.txt b/src/plugins/dpdk/CMakeLists.txt
index 48b1548f9c2..48c56f35282 100644
--- a/src/plugins/dpdk/CMakeLists.txt
+++ b/src/plugins/dpdk/CMakeLists.txt
@@ -90,8 +90,10 @@ else()
##############################################################################
# libnuma
##############################################################################
- vpp_plugin_find_library(dpdk NUMA_LIB "numa")
- list(APPEND DPDK_LINK_LIBRARIES ${NUMA_LIB})
+ if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
+ vpp_plugin_find_library(dpdk NUMA_LIB "numa")
+ list(APPEND DPDK_LINK_LIBRARIES ${NUMA_LIB})
+ endif()
##############################################################################
# Mellanox libraries
diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c
index c838800deb4..77f9a27f97b 100644
--- a/src/plugins/dpdk/device/cli.c
+++ b/src/plugins/dpdk/device/cli.c
@@ -89,12 +89,18 @@ show_dpdk_physmem (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
clib_error_t *err = 0;
- u32 pipe_max_size;
int fds[2];
u8 *s = 0;
int n, n_try;
FILE *f;
+ /*
+ * XXX: Pipes on FreeBSD grow dynamically up to 64KB (FreeBSD 15), don't
+ * manually tweak this value on FreeBSD at the moment.
+ */
+#ifdef __linux__
+ u32 pipe_max_size;
+
err = clib_sysfs_read ("/proc/sys/fs/pipe-max-size", "%u", &pipe_max_size);
if (err)
@@ -112,6 +118,7 @@ show_dpdk_physmem (vlib_main_t * vm, unformat_input_t * input,
err = clib_error_return_unix (0, "fcntl(F_SETPIPE_SZ)");
goto error;
}
+#endif /* __linux__ */
if (fcntl (fds[0], F_SETFL, O_NONBLOCK) == -1)
{
diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c
index dc6b0c1c952..7a49c5aaef2 100644
--- a/src/plugins/dpdk/device/common.c
+++ b/src/plugins/dpdk/device/common.c
@@ -491,6 +491,7 @@ dpdk_get_pci_device (const struct rte_eth_dev_info *info)
return NULL;
}
+#ifdef __linux__
/* If this device is VMBUS return pointer to info, otherwise NULL */
struct rte_vmbus_device *
dpdk_get_vmbus_device (const struct rte_eth_dev_info *info)
@@ -507,6 +508,7 @@ dpdk_get_vmbus_device (const struct rte_eth_dev_info *info)
else
return NULL;
}
+#endif /* __linux__ */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h
index c22a67a07e7..88a4d9ff618 100644
--- a/src/plugins/dpdk/device/dpdk.h
+++ b/src/plugins/dpdk/device/dpdk.h
@@ -24,7 +24,9 @@
#include <rte_eal.h>
#include <rte_bus_pci.h>
+#ifdef __linux__
#include <rte_bus_vmbus.h>
+#endif /* __linux__ */
#include <rte_ethdev.h>
#include <rte_version.h>
#include <rte_net.h>
@@ -35,7 +37,9 @@
#include <bus_driver.h>
#include <bus_pci_driver.h>
+#ifdef __linux__
#include <bus_vmbus_driver.h>
+#endif /* __linux__ */
#endif
#include <vnet/devices/devices.h>
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
index d15cfd7233a..e416efe2e4d 100644
--- a/src/plugins/dpdk/device/init.c
+++ b/src/plugins/dpdk/device/init.c
@@ -187,9 +187,11 @@ dpdk_find_startup_config (struct rte_eth_dev_info *di)
{
dpdk_main_t *dm = &dpdk_main;
struct rte_pci_device *pci_dev;
- struct rte_vmbus_device *vmbus_dev;
vlib_pci_addr_t pci_addr;
+#ifdef __linux__
+ struct rte_vmbus_device *vmbus_dev;
vlib_vmbus_addr_t vmbus_addr;
+#endif /* __linux__ */
uword *p = 0;
if ((pci_dev = dpdk_get_pci_device (di)))
@@ -202,6 +204,7 @@ dpdk_find_startup_config (struct rte_eth_dev_info *di)
hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
}
+#ifdef __linux__
if ((vmbus_dev = dpdk_get_vmbus_device (di)))
{
unformat_input_t input_vmbus;
@@ -216,6 +219,7 @@ dpdk_find_startup_config (struct rte_eth_dev_info *di)
&vmbus_addr);
unformat_free (&input_vmbus);
}
+#endif /* __linux__ */
if (p)
return pool_elt_at_index (dm->conf->dev_confs, p[0]);
@@ -566,8 +570,18 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
continue;
}
+#ifdef __FreeBSD__
+ /*
+ * The defines for the PCI_CLASS_* types are platform specific and differ
+ * on FreeBSD.
+ */
+ if (d->device_class != PCI_CLASS_NETWORK &&
+ d->device_class != PCI_CLASS_PROCESSOR_CO)
+ continue;
+#else
if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO)
continue;
+#endif /* __FreeBSD__ */
if (num_whitelisted)
{
@@ -1031,12 +1045,14 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
dpdk_main_t *dm = &dpdk_main;
clib_error_t *error = 0;
dpdk_config_main_t *conf = &dpdk_config_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
dpdk_device_config_t *devconf;
vlib_pci_addr_t pci_addr = { 0 };
vlib_vmbus_addr_t vmbus_addr = { 0 };
unformat_input_t sub_input;
+#ifdef __linux
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
uword default_hugepage_sz, x;
+#endif /* __linux__ */
u8 *s, *tmp = 0;
int ret, i;
int num_whitelisted = 0;
@@ -1045,15 +1061,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
u8 no_vmbus = 0;
u8 file_prefix = 0;
u8 *socket_mem = 0;
- u8 *huge_dir_path = 0;
u32 vendor, device, domain, bus, func;
void *fmt_func;
void *fmt_addr;
f64 poll_interval;
- huge_dir_path =
- format (0, "%s/hugepages%c", vlib_unix_get_runtime_dir (), 0);
-
conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword));
mhash_init (&conf->device_config_index_by_vmbus_addr, sizeof (uword),
sizeof (vlib_vmbus_addr_t));
@@ -1248,6 +1260,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
{
vec_add1 (conf->eal_init_args, (u8 *) "--in-memory");
+#ifdef __linux__
+ /*
+ * FreeBSD performs huge page prealloc through a dedicated kernel mode
+ * this process is only required on Linux.
+ */
default_hugepage_sz = clib_mem_get_default_hugepage_size ();
clib_bitmap_foreach (x, tm->cpu_socket_bitmap)
@@ -1262,6 +1279,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
if ((e = clib_sysfs_prealloc_hugepages(x, 0, n_pages)))
clib_error_report (e);
}
+#endif /* __linux__ */
}
/* on/off dpdk's telemetry thread */
@@ -1270,6 +1288,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
vec_add1 (conf->eal_init_args, (u8 *) "--no-telemetry");
}
+#ifdef __linux__
if (!file_prefix)
{
tmp = format (0, "--file-prefix%c", 0);
@@ -1277,6 +1296,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
tmp = format (0, "vpp%c", 0);
vec_add1 (conf->eal_init_args, tmp);
}
+#endif
if (no_pci == 0 && geteuid () == 0)
dpdk_bind_devices_to_uio (conf);
@@ -1396,11 +1416,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
RTE_VECT_SIMD_256 :
RTE_VECT_SIMD_512);
- /* lazy umount hugepages */
- umount2 ((char *) huge_dir_path, MNT_DETACH);
- rmdir ((char *) huge_dir_path);
- vec_free (huge_dir_path);
-
/* main thread 1st */
if ((error = dpdk_buffer_pools_create (vm)))
return error;
diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c
index 47007219482..9781d0ed7f0 100644
--- a/src/plugins/dpdk/main.c
+++ b/src/plugins/dpdk/main.c
@@ -13,13 +13,6 @@
* limitations under the License.
*/
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <linux/vfio.h>
-#include <sys/ioctl.h>
-
#include <vnet/vnet.h>
#include <vnet/plugin/plugin.h>
#include <dpdk/device/dpdk.h>
diff --git a/src/plugins/fateshare/fateshare.c b/src/plugins/fateshare/fateshare.c
index 33ee167bce3..971d32303db 100644
--- a/src/plugins/fateshare/fateshare.c
+++ b/src/plugins/fateshare/fateshare.c
@@ -17,6 +17,7 @@
#include <vnet/vnet.h>
#include <vnet/plugin/plugin.h>
+#include <vppinfra/unix.h>
#include <fateshare/fateshare.h>
#include <vlibapi/api.h>
@@ -26,7 +27,11 @@
#include <sys/types.h>
#include <sys/wait.h>
+#ifdef __linux__
#include <sys/prctl.h> // prctl(), PR_SET_PDEATHSIG
+#else
+#include <sys/procctl.h>
+#endif /* __linux__ */
#include <limits.h>
fateshare_main_t fateshare_main;
@@ -86,12 +91,23 @@ launch_monitor (fateshare_main_t *kmp)
{
dup2 (logfd, 1);
dup2 (logfd, 2);
+#ifdef __linux__
int r = prctl (PR_SET_PDEATHSIG, SIGTERM);
if (r == -1)
{
perror (0);
exit (1);
}
+#else
+ int r, s = SIGTERM;
+
+ r = procctl (P_PID, 0, PROC_PDEATHSIG_CTL, &s);
+ if (r == -1)
+ {
+ perror (0);
+ exit (1);
+ }
+#endif /* __linux__ */
pid_t current_ppid = getppid ();
if (current_ppid != ppid_before_fork)
{
@@ -197,24 +213,30 @@ fateshare_config (vlib_main_t *vm, unformat_input_t *input)
if (fmp->monitor_cmd == 0)
{
- char *p, path[PATH_MAX];
- int rv;
+ char *p;
+ u8 *path;
/* find executable path */
- if ((rv = readlink ("/proc/self/exe", path, PATH_MAX - 1)) == -1)
+ path = os_get_exec_path ();
+
+ if (path == 0)
return clib_error_return (
- 0, "could not stat /proc/self/exe - set monitor manually");
+ 0, "could not get exec path - set monitor manually");
- /* readlink doesn't provide null termination */
- path[rv] = 0;
+ /* add null termination */
+ vec_add1 (path, 0);
/* strip filename */
- if ((p = strrchr (path, '/')) == 0)
- return clib_error_return (
- 0, "could not determine vpp directory - set monitor manually");
+ if ((p = strrchr ((char *) path, '/')) == 0)
+ {
+ vec_free (path);
+ return clib_error_return (
+ 0, "could not determine vpp directory - set monitor manually");
+ }
*p = 0;
fmp->monitor_cmd = format (0, "%s/vpp_fateshare_monitor\0", path);
+ vec_free (path);
}
if (fmp->monitor_logfile == 0)
{
diff --git a/src/plugins/fateshare/vpp_fateshare_monitor.c b/src/plugins/fateshare/vpp_fateshare_monitor.c
index 7b203884c4e..7af451ccffe 100644
--- a/src/plugins/fateshare/vpp_fateshare_monitor.c
+++ b/src/plugins/fateshare/vpp_fateshare_monitor.c
@@ -4,7 +4,12 @@
#include <sys/types.h>
#include <sys/wait.h>
+#ifdef __linux__
#include <sys/prctl.h> // prctl(), PR_SET_PDEATHSIG
+#else
+#include <signal.h>
+#include <sys/procctl.h>
+#endif /* __linux__ */
#include <sys/stat.h>
#include <fcntl.h>
@@ -82,6 +87,7 @@ launch_command (char *scmd, char *logname_base)
}
/* child */
+#ifdef __linux__
int r = prctl (PR_SET_PDEATHSIG, SIGTERM);
if (r == -1)
{
@@ -89,6 +95,17 @@ launch_command (char *scmd, char *logname_base)
sleep (5);
exit (1);
}
+#else
+ int r, s = SIGTERM;
+
+ r = procctl (P_PID, 0, PROC_PDEATHSIG_CTL, &s);
+ if (r == -1)
+ {
+ perror ("procctl");
+ exit (1);
+ }
+#endif /* __linux__ */
+
if (getppid () != ppid_before_fork)
{
sleep (5);
@@ -180,12 +197,23 @@ main (int argc, char **argv)
exit (2);
}
+#ifdef __linux__
int r = prctl (PR_SET_PDEATHSIG, SIGTERM);
if (r == -1)
{
perror (0);
exit (1);
}
+#else
+ int r, s = SIGTERM;
+
+ r = procctl (P_PID, 0, PROC_PDEATHSIG_CTL, &s);
+ if (r == -1)
+ {
+ perror ("procctl");
+ exit (1);
+ }
+#endif /* __linux__ */
/* Establish handler. */
struct sigaction sa;
diff --git a/src/plugins/hs_apps/CMakeLists.txt b/src/plugins/hs_apps/CMakeLists.txt
index bd43eb34afa..179c9c7a4c4 100644
--- a/src/plugins/hs_apps/CMakeLists.txt
+++ b/src/plugins/hs_apps/CMakeLists.txt
@@ -55,7 +55,7 @@ if(VPP_BUILD_VCL_TESTS)
)
add_vpp_executable(${test}
SOURCES "vcl/${test}.c"
- LINK_LIBRARIES vppcom pthread
+ LINK_LIBRARIES vppcom pthread ${EPOLL_LIB}
NO_INSTALL
)
endforeach()
@@ -68,7 +68,7 @@ if(VPP_BUILD_VCL_TESTS)
SOURCES
"vcl/${test}.c"
vcl/vcl_test_protos.c
- LINK_LIBRARIES vppcom pthread
+ LINK_LIBRARIES vppcom pthread ${EPOLL_LIB}
NO_INSTALL
)
endforeach()
diff --git a/src/plugins/hs_apps/http_cli.c b/src/plugins/hs_apps/http_cli.c
index 5d4d49c0fba..f42f65342c3 100644
--- a/src/plugins/hs_apps/http_cli.c
+++ b/src/plugins/hs_apps/http_cli.c
@@ -323,6 +323,13 @@ hcs_ts_rx_callback (session_t *ts)
return 0;
}
+ if (msg.data.len == 0)
+ {
+ hs->tx_buf = 0;
+ start_send_data (hs, HTTP_STATUS_BAD_REQUEST);
+ return 0;
+ }
+
/* send the command to a new/recycled vlib process */
vec_validate (args.buf, msg.data.len - 1);
rv = svm_fifo_dequeue (ts->rx_fifo, msg.data.len, args.buf);
diff --git a/src/plugins/hs_apps/http_client_cli.c b/src/plugins/hs_apps/http_client_cli.c
index f44d4e1bcd1..a99169bafea 100644
--- a/src/plugins/hs_apps/http_client_cli.c
+++ b/src/plugins/hs_apps/http_client_cli.c
@@ -13,11 +13,9 @@
* limitations under the License.
*/
-#include <vnet/session/application.h>
#include <vnet/session/application_interface.h>
#include <vnet/session/session.h>
#include <http/http.h>
-#include <hs_apps/http_cli.h>
#define HCC_DEBUG 0
@@ -68,6 +66,8 @@ typedef struct
typedef enum
{
HCC_REPLY_RECEIVED = 100,
+ HCC_TRANSPORT_CLOSED,
+ HCC_CONNECT_FAILED,
} hcc_cli_signal_t;
static hcc_main_t hcc_main;
@@ -136,6 +136,8 @@ hcc_ts_connected_callback (u32 app_index, u32 hc_index, session_t *as,
{
clib_warning ("connected error: hc_index(%d): %U", hc_index,
format_session_error, err);
+ vlib_process_signal_event_mt (hcm->vlib_main, hcm->cli_node_index,
+ HCC_CONNECT_FAILED, 0);
return -1;
}
@@ -273,6 +275,17 @@ hcc_ts_cleanup_callback (session_t *s, session_cleanup_ntf_t ntf)
hcc_session_free (s->thread_index, hs);
}
+static void
+hcc_ts_transport_closed (session_t *s)
+{
+ hcc_main_t *hcm = &hcc_main;
+
+ HCC_DBG ("transport closed");
+
+ vlib_process_signal_event_mt (hcm->vlib_main, hcm->cli_node_index,
+ HCC_TRANSPORT_CLOSED, 0);
+}
+
static session_cb_vft_t hcc_session_cb_vft = {
.session_accept_callback = hcc_ts_accept_callback,
.session_disconnect_callback = hcc_ts_disconnect_callback,
@@ -281,6 +294,7 @@ static session_cb_vft_t hcc_session_cb_vft = {
.builtin_app_tx_callback = hcc_ts_tx_callback,
.session_reset_callback = hcc_ts_reset_callback,
.session_cleanup_callback = hcc_ts_cleanup_callback,
+ .session_transport_closed_callback = hcc_ts_transport_closed,
};
static clib_error_t *
@@ -370,7 +384,7 @@ hcc_connect ()
}
static clib_error_t *
-hcc_run (vlib_main_t *vm)
+hcc_run (vlib_main_t *vm, int print_output)
{
vlib_thread_main_t *vtm = vlib_get_thread_main ();
hcc_main_t *hcm = &hcc_main;
@@ -407,11 +421,18 @@ hcc_run (vlib_main_t *vm)
goto cleanup;
case HCC_REPLY_RECEIVED:
- vlib_cli_output (vm, "%v", hcm->http_response);
+ if (print_output)
+ vlib_cli_output (vm, "%v", hcm->http_response);
vec_free (hcm->http_response);
break;
+ case HCC_TRANSPORT_CLOSED:
+ err = clib_error_return (0, "error, transport closed");
+ break;
+ case HCC_CONNECT_FAILED:
+ err = clib_error_return (0, "failed to connect");
+ break;
default:
- clib_error_return (0, "unexpected event %d", event_type);
+ err = clib_error_return (0, "unexpected event %d", event_type);
break;
}
@@ -448,7 +469,7 @@ hcc_command_fn (vlib_main_t *vm, unformat_input_t *input,
u64 seg_size;
u8 *appns_id = 0;
clib_error_t *err = 0;
- int rv;
+ int rv, print_output = 1;
hcm->prealloc_fifos = 0;
hcm->private_segment_size = 0;
@@ -472,6 +493,8 @@ hcc_command_fn (vlib_main_t *vm, unformat_input_t *input,
hcm->fifo_size <<= 10;
else if (unformat (line_input, "uri %s", &hcm->uri))
;
+ else if (unformat (line_input, "no-output"))
+ print_output = 0;
else if (unformat (line_input, "appns %_%v%_", &appns_id))
;
else if (unformat (line_input, "secret %lu", &hcm->appns_secret))
@@ -506,7 +529,7 @@ hcc_command_fn (vlib_main_t *vm, unformat_input_t *input,
vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */);
vlib_worker_thread_barrier_release (vm);
- err = hcc_run (vm);
+ err = hcc_run (vm, print_output);
if (hcc_detach ())
{
@@ -526,7 +549,7 @@ done:
VLIB_CLI_COMMAND (hcc_command, static) = {
.path = "http cli client",
.short_help = "[appns <app-ns> secret <appns-secret>] uri http://<ip-addr> "
- "query <query-string>",
+ "query <query-string> [no-output]",
.function = hcc_command_fn,
.is_mp_safe = 1,
};
diff --git a/src/plugins/http/http.c b/src/plugins/http/http.c
index 036e6929987..893dd877c29 100644
--- a/src/plugins/http/http.c
+++ b/src/plugins/http/http.c
@@ -74,14 +74,14 @@ format_http_state (u8 *s, va_list *va)
return format (s, "unknown");
}
-static inline void
-http_state_change (http_conn_t *hc, http_state_t state)
-{
- HTTP_DBG (1, "changing http state %U -> %U", format_http_state,
- hc->http_state, format_http_state, state);
- ASSERT (hc->http_state != state);
- hc->http_state = state;
-}
+#define http_state_change(_hc, _state) \
+ do \
+ { \
+ HTTP_DBG (1, "changing http state %U -> %U", format_http_state, \
+ (_hc)->http_state, format_http_state, _state); \
+ (_hc)->http_state = _state; \
+ } \
+ while (0)
static inline http_worker_t *
http_worker_get (u32 thread_index)
@@ -140,6 +140,7 @@ http_listener_free (http_conn_t *lhc)
{
http_main_t *hm = &http_main;
+ vec_free (lhc->app_name);
if (CLIB_DEBUG)
memset (lhc, 0xfc, sizeof (*lhc));
pool_put (hm->listener_pool, lhc);
@@ -266,17 +267,21 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts,
app_worker_t *app_wrk;
int rv;
+ ho_hc = http_conn_get_w_thread (ho_hc_index, 0);
+ ASSERT (ho_hc->state == HTTP_CONN_STATE_CONNECTING);
+
if (err)
{
- clib_warning ("ERROR: %d", err);
+ clib_warning ("half-open hc index %d, error: %U", ho_hc_index,
+ format_session_error, err);
+ app_wrk = app_worker_get_if_valid (ho_hc->h_pa_wrk_index);
+ if (app_wrk)
+ app_worker_connect_notify (app_wrk, 0, err, ho_hc->h_pa_app_api_ctx);
return 0;
}
new_hc_index = http_conn_alloc_w_thread (ts->thread_index);
hc = http_conn_get_w_thread (new_hc_index, ts->thread_index);
- ho_hc = http_conn_get_w_thread (ho_hc_index, 0);
-
- ASSERT (ho_hc->state == HTTP_CONN_STATE_CONNECTING);
clib_memcpy_fast (hc, ho_hc, sizeof (*hc));
@@ -372,12 +377,12 @@ static const char *http_redirect_template = "HTTP/1.1 %s\r\n";
static const char *http_response_template = "HTTP/1.1 %s\r\n"
"Date: %U GMT\r\n"
"Expires: %U GMT\r\n"
- "Server: VPP Static\r\n"
+ "Server: %s\r\n"
"Content-Type: %s\r\n"
"Content-Length: %lu\r\n\r\n";
static const char *http_request_template = "GET %s HTTP/1.1\r\n"
- "User-Agent: VPP HTTP client\r\n"
+ "User-Agent: %s\r\n"
"Accept: */*\r\n";
static u32
@@ -520,17 +525,19 @@ http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp)
http_msg_t msg = {};
app_worker_t *app_wrk;
session_t *as;
- http_status_code_t ec;
rv = http_read_message (hc);
/* Nothing yet, wait for data or timer expire */
if (rv)
- return HTTP_SM_STOP;
+ {
+ HTTP_DBG (1, "no data to deq");
+ return HTTP_SM_STOP;
+ }
if (vec_len (hc->rx_buf) < 8)
{
- ec = HTTP_STATUS_BAD_REQUEST;
+ clib_warning ("response buffer too short");
goto error;
}
@@ -546,9 +553,7 @@ http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp)
if (rv)
{
clib_warning ("failed to parse http reply");
- session_transport_closing_notify (&hc->connection);
- http_disconnect_transport (hc);
- return -1;
+ goto error;
}
msg.data.len = content_length;
u32 dlen = vec_len (hc->rx_buf) - hc->rx_buf_offset;
@@ -577,7 +582,7 @@ http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp)
{
hc->rx_buf_offset = 0;
vec_reset_length (hc->rx_buf);
- http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD);
+ http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD);
}
else
{
@@ -585,23 +590,20 @@ http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp)
}
app_wrk = app_worker_get_if_valid (as->app_wrk_index);
- app_worker_rx_notify (app_wrk, as);
+ if (app_wrk)
+ app_worker_rx_notify (app_wrk, as);
return HTTP_SM_STOP;
}
else
{
- HTTP_DBG (0, "Unknown http method %v", hc->rx_buf);
- ec = HTTP_STATUS_METHOD_NOT_ALLOWED;
+ clib_warning ("Unknown http method %v", hc->rx_buf);
goto error;
}
- return HTTP_SM_STOP;
error:
-
- http_send_error (hc, ec);
session_transport_closing_notify (&hc->connection);
+ session_transport_closed_notify (&hc->connection);
http_disconnect_transport (hc);
-
return HTTP_SM_ERROR;
}
@@ -734,6 +736,7 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp)
* Add headers. For now:
* - current time
* - expiration time
+ * - server name
* - content type
* - data length
*/
@@ -741,6 +744,10 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp)
switch (msg.code)
{
+ case HTTP_STATUS_NOT_FOUND:
+ case HTTP_STATUS_METHOD_NOT_ALLOWED:
+ case HTTP_STATUS_BAD_REQUEST:
+ case HTTP_STATUS_INTERNAL_ERROR:
case HTTP_STATUS_OK:
header =
format (0, http_response_template, http_status_code_str[msg.code],
@@ -748,6 +755,8 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp)
format_clib_timebase_time, now,
/* Expires */
format_clib_timebase_time, now + 600.0,
+ /* Server */
+ hc->app_name,
/* Content type */
http_content_type_str[msg.content_type],
/* Length */
@@ -759,6 +768,7 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp)
/* Location: http(s)://new-place already queued up as data */
break;
default:
+ clib_warning ("unsupported status code: %d", msg.code);
return HTTP_SM_ERROR;
}
@@ -791,7 +801,6 @@ error:
static http_sm_result_t
http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp)
{
- http_status_code_t sc;
http_msg_t msg;
session_t *as;
u8 *buf = 0, *request;
@@ -806,29 +815,31 @@ http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp)
if (msg.data.type > HTTP_MSG_DATA_PTR)
{
clib_warning ("no data");
- sc = HTTP_STATUS_INTERNAL_ERROR;
goto error;
}
if (msg.type != HTTP_MSG_REQUEST)
{
clib_warning ("unexpected message type %d", msg.type);
- sc = HTTP_STATUS_INTERNAL_ERROR;
goto error;
}
- sc = msg.code;
+ /* currently we support only GET method */
+ if (msg.method_type != HTTP_REQ_GET)
+ {
+ clib_warning ("unsupported method %d", msg.method_type);
+ goto error;
+ }
vec_validate (buf, msg.data.len - 1);
rv = svm_fifo_dequeue (as->tx_fifo, msg.data.len, buf);
ASSERT (rv == msg.data.len);
- request = format (0, http_request_template, buf);
+ request = format (0, http_request_template, buf, hc->app_name);
offset = http_send_data (hc, request, vec_len (request), 0);
if (offset != vec_len (request))
{
clib_warning ("sending request failed!");
- sc = HTTP_STATUS_INTERNAL_ERROR;
goto error;
}
@@ -837,83 +848,85 @@ http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp)
vec_free (buf);
vec_free (request);
- return HTTP_SM_CONTINUE;
+ return HTTP_SM_STOP;
error:
- clib_warning ("unexpected msg type from app %u", msg.type);
- http_send_error (hc, sc);
+ svm_fifo_dequeue_drop_all (as->tx_fifo);
session_transport_closing_notify (&hc->connection);
+ session_transport_closed_notify (&hc->connection);
http_disconnect_transport (hc);
- return HTTP_SM_STOP;
-}
-
-static void
-http_app_enqueue (http_conn_t *hc, session_t *as)
-{
- app_worker_t *app_wrk;
- u32 dlen, max_enq, n_enq;
- int rv;
-
- dlen = vec_len (hc->rx_buf) - hc->rx_buf_offset;
- if (!dlen)
- return;
-
- max_enq = svm_fifo_max_enqueue (as->rx_fifo);
- n_enq = clib_min (max_enq, dlen);
- rv = svm_fifo_enqueue (as->rx_fifo, n_enq, &hc->rx_buf[hc->rx_buf_offset]);
- if (rv < 0)
- return;
-
- hc->rx_buf_offset += rv;
- if (hc->rx_buf_offset >= vec_len (hc->rx_buf))
- {
- vec_reset_length (hc->rx_buf);
- hc->rx_buf_offset = 0;
- }
-
- app_wrk = app_worker_get_if_valid (as->app_wrk_index);
- ASSERT (app_wrk);
- app_worker_rx_notify (app_wrk, as);
+ return HTTP_SM_ERROR;
}
static http_sm_result_t
http_state_client_io_more_data (http_conn_t *hc, transport_send_params_t *sp)
{
session_t *as, *ts;
- u32 max_deq;
- int n_read;
+ app_worker_t *app_wrk;
+ svm_fifo_seg_t _seg, *seg = &_seg;
+ u32 max_len, max_deq, max_enq, n_segs = 1;
+ int rv, len;
as = session_get_from_handle (hc->h_pa_session_handle);
ts = session_get_from_handle (hc->h_tc_session_handle);
- http_app_enqueue (hc, as);
+ max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
+ if (max_deq == 0)
+ {
+ HTTP_DBG (1, "no data to deq");
+ return HTTP_SM_STOP;
+ }
- if (hc->to_recv == 0)
+ max_enq = svm_fifo_max_enqueue (as->rx_fifo);
+ if (max_enq == 0)
{
- http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD);
+ HTTP_DBG (1, "app's rx fifo full");
+ svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
return HTTP_SM_STOP;
}
- max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
- if (max_deq > 0)
+ max_len = clib_min (max_enq, max_deq);
+ len = svm_fifo_segments (ts->rx_fifo, 0, seg, &n_segs, max_len);
+ if (len < 0)
{
- vec_validate (hc->rx_buf, max_deq - 1);
- n_read = svm_fifo_dequeue (ts->rx_fifo, max_deq, hc->rx_buf);
- ASSERT (n_read == max_deq);
+ HTTP_DBG (1, "svm_fifo_segments() len %d", len);
+ return HTTP_SM_STOP;
+ }
- if (svm_fifo_is_empty (ts->rx_fifo))
- svm_fifo_unset_event (ts->rx_fifo);
+ rv = svm_fifo_enqueue_segments (as->rx_fifo, seg, 1, 0 /* allow partial */);
+ if (rv < 0)
+ {
+ clib_warning ("data enqueue failed, rv: %d", rv);
+ return HTTP_SM_ERROR;
+ }
- hc->to_recv -= n_read;
- vec_set_len (hc->rx_buf, n_read);
+ svm_fifo_dequeue_drop (ts->rx_fifo, rv);
+ if (rv > hc->to_recv)
+ {
+ clib_warning ("http protocol error: received more data than expected");
+ session_transport_closing_notify (&hc->connection);
+ http_disconnect_transport (hc);
+ http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD);
+ return HTTP_SM_ERROR;
}
+ hc->to_recv -= rv;
+ HTTP_DBG (1, "drained %d from ts; remains %d", rv, hc->to_recv);
- if (hc->rx_buf_offset < vec_len (hc->rx_buf) ||
- svm_fifo_max_dequeue_cons (ts->rx_fifo))
+ if (hc->to_recv == 0)
{
- session_enqueue_notify (ts);
+ hc->rx_buf_offset = 0;
+ vec_reset_length (hc->rx_buf);
+ http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD);
}
- return HTTP_SM_CONTINUE;
+
+ app_wrk = app_worker_get_if_valid (as->app_wrk_index);
+ if (app_wrk)
+ app_worker_rx_notify (app_wrk, as);
+
+ if (svm_fifo_max_dequeue_cons (ts->rx_fifo))
+ session_enqueue_notify (ts);
+
+ return HTTP_SM_STOP;
}
static http_sm_result_t
@@ -983,6 +996,7 @@ static void
http_req_run_state_machine (http_conn_t *hc, transport_send_params_t *sp)
{
http_sm_result_t res;
+
do
{
res = state_funcs[hc->http_state](hc, sp);
@@ -1010,6 +1024,12 @@ http_ts_rx_callback (session_t *ts)
return -1;
}
+ if (hc->state == HTTP_CONN_STATE_CLOSED)
+ {
+ svm_fifo_dequeue_drop_all (ts->tx_fifo);
+ return 0;
+ }
+
http_req_run_state_machine (hc, 0);
if (hc->state == HTTP_CONN_STATE_TRANSPORT_CLOSED)
@@ -1153,6 +1173,11 @@ http_transport_connect (transport_endpoint_cfg_t *tep)
hc->state = HTTP_CONN_STATE_CONNECTING;
cargs->api_context = hc_index;
+ if (vec_len (app->name))
+ hc->app_name = vec_dup (app->name);
+ else
+ hc->app_name = format (0, "VPP HTTP client");
+
HTTP_DBG (1, "hc ho_index %x", hc_index);
if ((error = vnet_connect (cargs)))
@@ -1205,6 +1230,11 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
lhc->c_s_index = app_listener_index;
lhc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
+ if (vec_len (app->name))
+ lhc->app_name = vec_dup (app->name);
+ else
+ lhc->app_name = format (0, "VPP server app");
+
return lhc_index;
}
@@ -1245,7 +1275,11 @@ http_transport_close (u32 hc_index, u32 thread_index)
http_disconnect_transport (hc);
return;
}
-
+ else if (hc->state == HTTP_CONN_STATE_CLOSED)
+ {
+ HTTP_DBG (1, "nothing to do, already closed");
+ return;
+ }
as = session_get_from_handle (hc->h_pa_session_handle);
/* Nothing more to send, confirm close */
diff --git a/src/plugins/http/http.h b/src/plugins/http/http.h
index dbae5ac4611..c9912dd6db8 100644
--- a/src/plugins/http/http.h
+++ b/src/plugins/http/http.h
@@ -227,6 +227,7 @@ typedef struct http_tc_
http_conn_state_t state;
u32 timer_handle;
+ u8 *app_name;
/*
* Current request
diff --git a/src/plugins/ikev2/ikev2.api b/src/plugins/ikev2/ikev2.api
index 58b7fc05d9e..de276e7f3ea 100644
--- a/src/plugins/ikev2/ikev2.api
+++ b/src/plugins/ikev2/ikev2.api
@@ -72,8 +72,6 @@ define ikev2_sa_dump
{
u32 client_index;
u32 context;
-
- option status = "in_progress";
};
/** \brief Dump all SAs
@@ -86,6 +84,17 @@ define ikev2_sa_v2_dump
u32 context;
};
+/** \brief Dump all SAs
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define ikev2_sa_v3_dump
+{
+ u32 client_index;
+ u32 context;
+ option status = "in_progress";
+};
+
/** \brief Details about IKE SA
@param context - sender context, to match reply w/ request
@param retval - return code
@@ -97,7 +106,6 @@ define ikev2_sa_details
i32 retval;
vl_api_ikev2_sa_t sa;
- option status = "in_progress";
};
/** \brief Details about IKE SA
@@ -113,6 +121,20 @@ define ikev2_sa_v2_details
vl_api_ikev2_sa_v2_t sa;
};
+/** \brief Details about IKE SA
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param sa - SA data
+*/
+define ikev2_sa_v3_details
+{
+ u32 context;
+ i32 retval;
+
+ vl_api_ikev2_sa_v3_t sa;
+ option status = "in_progress";
+};
+
/** \brief Dump child SA of specific SA
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -125,7 +147,6 @@ define ikev2_child_sa_dump
u32 sa_index;
option vat_help = "sa_index <index>";
- option status = "in_progress";
};
/** \brief Child SA details
@@ -139,6 +160,34 @@ define ikev2_child_sa_details
i32 retval;
vl_api_ikev2_child_sa_t child_sa;
+};
+
+/** \brief Dump child SA of specific SA
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sa_index - index of specific sa
+*/
+define ikev2_child_sa_v2_dump
+{
+ u32 client_index;
+ u32 context;
+
+ u32 sa_index;
+ option vat_help = "sa_index <index>";
+ option status = "in_progress";
+};
+
+/** \brief Child SA details
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param child_sa - child SA data
+*/
+define ikev2_child_sa_v2_details
+{
+ u32 context;
+ i32 retval;
+
+ vl_api_ikev2_child_sa_v2_t child_sa;
option status = "in_progress";
};
diff --git a/src/plugins/ikev2/ikev2.c b/src/plugins/ikev2/ikev2.c
index d559565487e..9bea2c96d12 100644
--- a/src/plugins/ikev2/ikev2.c
+++ b/src/plugins/ikev2/ikev2.c
@@ -1888,7 +1888,7 @@ ikev2_sa_match_ts (ikev2_sa_t * sa)
}
static ikev2_profile_t *
-ikev2_select_profile (ikev2_main_t *km, ikev2_sa_t *sa,
+ikev2_select_profile (vlib_main_t *vm, ikev2_main_t *km, ikev2_sa_t *sa,
ikev2_sa_transform_t *tr_prf, u8 *key_pad)
{
ikev2_profile_t *ret = 0, *p;
@@ -1928,6 +1928,7 @@ ikev2_select_profile (ikev2_main_t *km, ikev2_sa_t *sa,
if (!clib_memcmp (auth, sa_auth->data, vec_len (sa_auth->data)))
{
ikev2_set_state (sa, IKEV2_STATE_AUTHENTICATED);
+ sa->auth_timestamp = vlib_time_now (vm);
vec_free (auth);
ret = p;
break;
@@ -1946,6 +1947,7 @@ ikev2_select_profile (ikev2_main_t *km, ikev2_sa_t *sa,
if (ikev2_verify_sign (p->auth.key, sa_auth->data, authmsg) == 1)
{
ikev2_set_state (sa, IKEV2_STATE_AUTHENTICATED);
+ sa->auth_timestamp = vlib_time_now (vm);
ret = p;
break;
}
@@ -1961,7 +1963,7 @@ ikev2_select_profile (ikev2_main_t *km, ikev2_sa_t *sa,
}
static void
-ikev2_sa_auth (ikev2_sa_t *sa)
+ikev2_sa_auth (ikev2_sa_t *sa, vlib_main_t *vm)
{
ikev2_main_t *km = &ikev2_main;
ikev2_profile_t *sel_p = 0;
@@ -1982,7 +1984,7 @@ ikev2_sa_auth (ikev2_sa_t *sa)
}
key_pad = format (0, "%s", IKEV2_KEY_PAD);
- sel_p = ikev2_select_profile (km, sa, tr_prf, key_pad);
+ sel_p = ikev2_select_profile (vm, km, sa, tr_prf, key_pad);
if (sel_p)
{
@@ -2230,6 +2232,8 @@ ikev2_create_tunnel_interface (vlib_main_t *vm, ikev2_sa_t *sa,
clib_memset (&a, 0, sizeof (a));
+ child->timestamp = vlib_time_now (vm);
+
if (!child->r_proposals)
{
ikev2_set_state (sa, IKEV2_STATE_NO_PROPOSAL_CHOSEN);
@@ -3424,7 +3428,7 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
sa0->dst_port = clib_net_to_host_u16 (udp0->src_port);
res = ikev2_process_auth_req (vm, sa0, ike0, rlen);
if (res)
- ikev2_sa_auth (sa0);
+ ikev2_sa_auth (sa0, vm);
else
vlib_node_increment_counter (vm, node->node_index,
IKEV2_ERROR_MALFORMED_PACKET, 1);
diff --git a/src/plugins/ikev2/ikev2_api.c b/src/plugins/ikev2/ikev2_api.c
index bb44e9f364f..c9608aa660b 100644
--- a/src/plugins/ikev2/ikev2_api.c
+++ b/src/plugins/ikev2/ikev2_api.c
@@ -173,7 +173,7 @@ send_profile (ikev2_profile_t * profile, vl_api_registration_t * reg,
rmp->profile.lifetime_jitter = profile->lifetime_jitter;
rmp->profile.handover = profile->handover;
- vl_api_ikev2_profile_t_endian (&rmp->profile);
+ vl_api_ikev2_profile_t_endian (&rmp->profile, 1 /* to network */);
vl_api_send_msg (reg, (u8 *) rmp);
}
@@ -291,7 +291,7 @@ send_sa (ikev2_sa_t * sa, vl_api_ikev2_sa_dump_t * mp, u32 api_sa_index)
ikev2_copy_stats (&rsa->stats, &sa->stats);
- vl_api_ikev2_sa_t_endian(rsa);
+ vl_api_ikev2_sa_t_endian (rsa, 1 /* to network */);
});
}
@@ -382,7 +382,7 @@ send_sa_v2 (ikev2_sa_t *sa, vl_api_ikev2_sa_v2_dump_t *mp, u32 api_sa_index)
ikev2_copy_stats (&rsa->stats, &sa->stats);
- vl_api_ikev2_sa_v2_t_endian (rsa);
+ vl_api_ikev2_sa_v2_t_endian (rsa, 1 /* to network */);
});
}
@@ -405,6 +405,100 @@ vl_api_ikev2_sa_v2_dump_t_handler (vl_api_ikev2_sa_v2_dump_t *mp)
}
static void
+send_sa_v3 (ikev2_sa_t *sa, vl_api_ikev2_sa_v3_dump_t *mp, u32 api_sa_index)
+{
+ ikev2_main_t *km = &ikev2_main;
+ vl_api_ikev2_sa_v3_details_t *rmp = 0;
+ int rv = 0;
+ ikev2_sa_transform_t *tr;
+ ikev2_profile_t *p;
+ p = pool_elt_at_index (km->profiles, sa->profile_index);
+ vlib_main_t *vm = vlib_get_main ();
+
+ REPLY_MACRO2_ZERO (VL_API_IKEV2_SA_V3_DETAILS, {
+ vl_api_ikev2_sa_v3_t *rsa = &rmp->sa;
+ vl_api_ikev2_keys_t *k = &rsa->keys;
+
+ int size_data = sizeof (rsa->profile_name) - 1;
+ if (vec_len (p->name) < size_data)
+ size_data = vec_len (p->name);
+ clib_memcpy (rsa->profile_name, p->name, size_data);
+
+ rsa->state = ikev2_state_encode (sa->state);
+
+ rsa->uptime = vlib_time_now (vm) - sa->auth_timestamp;
+
+ rsa->sa_index = api_sa_index;
+ ip_address_encode2 (&sa->iaddr, &rsa->iaddr);
+ ip_address_encode2 (&sa->raddr, &rsa->raddr);
+ rsa->ispi = sa->ispi;
+ rsa->rspi = sa->rspi;
+ cp_id (&rsa->i_id, &sa->i_id);
+ cp_id (&rsa->r_id, &sa->r_id);
+
+ tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ if (tr)
+ cp_sa_transform (&rsa->encryption, tr);
+
+ tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF);
+ if (tr)
+ cp_sa_transform (&rsa->prf, tr);
+
+ tr =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG);
+ if (tr)
+ cp_sa_transform (&rsa->integrity, tr);
+
+ tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_DH);
+ if (tr)
+ cp_sa_transform (&rsa->dh, tr);
+
+ k->sk_d_len = vec_len (sa->sk_d);
+ clib_memcpy (&k->sk_d, sa->sk_d, k->sk_d_len);
+
+ k->sk_ai_len = vec_len (sa->sk_ai);
+ clib_memcpy (&k->sk_ai, sa->sk_ai, k->sk_ai_len);
+
+ k->sk_ar_len = vec_len (sa->sk_ar);
+ clib_memcpy (&k->sk_ar, sa->sk_ar, k->sk_ar_len);
+
+ k->sk_ei_len = vec_len (sa->sk_ei);
+ clib_memcpy (&k->sk_ei, sa->sk_ei, k->sk_ei_len);
+
+ k->sk_er_len = vec_len (sa->sk_er);
+ clib_memcpy (&k->sk_er, sa->sk_er, k->sk_er_len);
+
+ k->sk_pi_len = vec_len (sa->sk_pi);
+ clib_memcpy (&k->sk_pi, sa->sk_pi, k->sk_pi_len);
+
+ k->sk_pr_len = vec_len (sa->sk_pr);
+ clib_memcpy (&k->sk_pr, sa->sk_pr, k->sk_pr_len);
+
+ ikev2_copy_stats (&rsa->stats, &sa->stats);
+
+ vl_api_ikev2_sa_v3_t_endian (rsa, 1 /* to network */);
+ });
+}
+
+static void
+vl_api_ikev2_sa_v3_dump_t_handler (vl_api_ikev2_sa_v3_dump_t *mp)
+{
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_main_per_thread_data_t *tkm;
+ ikev2_sa_t *sa;
+
+ vec_foreach (tkm, km->per_thread_data)
+ {
+ pool_foreach (sa, tkm->sas)
+ {
+ u32 api_sa_index =
+ ikev2_encode_sa_index (sa - tkm->sas, tkm - km->per_thread_data);
+ send_sa_v3 (sa, mp, api_sa_index);
+ }
+ }
+}
+
+static void
send_child_sa (ikev2_child_sa_t * child,
vl_api_ikev2_child_sa_dump_t * mp, u32 child_sa_index,
u32 sa_index)
@@ -455,7 +549,7 @@ send_child_sa (ikev2_child_sa_t * child,
k->sk_ar_len);
}
- vl_api_ikev2_child_sa_t_endian (&rmp->child_sa);
+ vl_api_ikev2_child_sa_t_endian (&rmp->child_sa, 1 /* to network */);
});
}
@@ -488,6 +582,85 @@ vl_api_ikev2_child_sa_dump_t_handler (vl_api_ikev2_child_sa_dump_t * mp)
}
static void
+send_child_sa_v2 (ikev2_child_sa_t *child, vl_api_ikev2_child_sa_v2_dump_t *mp,
+ u32 child_sa_index, u32 sa_index)
+{
+ vl_api_ikev2_child_sa_v2_details_t *rmp = 0;
+ int rv = 0;
+ ikev2_sa_transform_t *tr;
+ vlib_main_t *vm = vlib_get_main ();
+
+ REPLY_MACRO2_ZERO (VL_API_IKEV2_CHILD_SA_V2_DETAILS, {
+ vl_api_ikev2_keys_t *k = &rmp->child_sa.keys;
+ rmp->child_sa.child_sa_index = child_sa_index;
+ rmp->child_sa.uptime = vlib_time_now (vm) - child->timestamp;
+ rmp->child_sa.sa_index = sa_index;
+ rmp->child_sa.i_spi = child->i_proposals ? child->i_proposals[0].spi : 0;
+ rmp->child_sa.r_spi = child->r_proposals ? child->r_proposals[0].spi : 0;
+
+ tr =
+ ikev2_sa_get_td_for_type (child->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ if (tr)
+ cp_sa_transform (&rmp->child_sa.encryption, tr);
+
+ tr = ikev2_sa_get_td_for_type (child->r_proposals,
+ IKEV2_TRANSFORM_TYPE_INTEG);
+ if (tr)
+ cp_sa_transform (&rmp->child_sa.integrity, tr);
+
+ tr =
+ ikev2_sa_get_td_for_type (child->r_proposals, IKEV2_TRANSFORM_TYPE_ESN);
+ if (tr)
+ cp_sa_transform (&rmp->child_sa.esn, tr);
+
+ k->sk_ei_len = vec_len (child->sk_ei);
+ clib_memcpy (&k->sk_ei, child->sk_ei, k->sk_ei_len);
+
+ k->sk_er_len = vec_len (child->sk_er);
+ clib_memcpy (&k->sk_er, child->sk_er, k->sk_er_len);
+
+ if (vec_len (child->sk_ai))
+ {
+ k->sk_ai_len = vec_len (child->sk_ai);
+ clib_memcpy (&k->sk_ai, child->sk_ai, k->sk_ai_len);
+
+ k->sk_ar_len = vec_len (child->sk_ar);
+ clib_memcpy (&k->sk_ar, child->sk_ar, k->sk_ar_len);
+ }
+
+ vl_api_ikev2_child_sa_v2_t_endian (&rmp->child_sa, 1 /* to network */);
+ });
+}
+
+static void
+vl_api_ikev2_child_sa_v2_dump_t_handler (vl_api_ikev2_child_sa_v2_dump_t *mp)
+{
+ ikev2_main_t *im = &ikev2_main;
+ ikev2_main_per_thread_data_t *tkm;
+ ikev2_sa_t *sa;
+ ikev2_child_sa_t *child;
+ u32 sai = ~0, ti = ~0;
+
+ ikev2_decode_sa_index (clib_net_to_host_u32 (mp->sa_index), &sai, &ti);
+
+ if (vec_len (im->per_thread_data) <= ti)
+ return;
+
+ tkm = vec_elt_at_index (im->per_thread_data, ti);
+
+ if (pool_len (tkm->sas) <= sai || pool_is_free_index (tkm->sas, sai))
+ return;
+
+ sa = pool_elt_at_index (tkm->sas, sai);
+
+ vec_foreach (child, sa->childs)
+ {
+ u32 child_sa_index = child - sa->childs;
+ send_child_sa_v2 (child, mp, child_sa_index, sai);
+ }
+}
+
+static void
vl_api_ikev2_traffic_selector_dump_t_handler
(vl_api_ikev2_traffic_selector_dump_t * mp)
{
@@ -527,7 +700,7 @@ static void
rmp->ts.sa_index = api_sa_index;
rmp->ts.child_sa_index = child_sa_index;
cp_ts (&rmp->ts, ts, mp->is_initiator);
- vl_api_ikev2_ts_t_endian (&rmp->ts);
+ vl_api_ikev2_ts_t_endian (&rmp->ts, 1 /* to network */);
});
}
}
diff --git a/src/plugins/ikev2/ikev2_cli.c b/src/plugins/ikev2/ikev2_cli.c
index 733ae478b83..975774c48d5 100644
--- a/src/plugins/ikev2/ikev2_cli.c
+++ b/src/plugins/ikev2/ikev2_cli.c
@@ -74,12 +74,16 @@ format_ikev2_child_sa (u8 * s, va_list * va)
ikev2_ts_t *ts;
ikev2_sa_transform_t *tr;
u8 *c = 0;
+ vlib_main_t *vm = vlib_get_main ();
u32 indent = format_get_indent (s);
indent += 1;
s = format (s, "child sa %u:", index);
+ s = format (s, "\n uptime: %f (s)\n ",
+ vlib_time_now (vm) - child->timestamp);
+
tr = ikev2_sa_get_td_for_type (child->r_proposals,
IKEV2_TRANSFORM_TYPE_ENCR);
c = format (c, "%U ", format_ikev2_sa_transform, tr);
@@ -135,6 +139,7 @@ format_ikev2_sa (u8 * s, va_list * va)
ikev2_sa_transform_t *tr;
ikev2_child_sa_t *child;
u32 indent = 1;
+ vlib_main_t *vm = vlib_get_main ();
ikev2_main_t *km = &ikev2_main;
ikev2_profile_t *p;
@@ -168,6 +173,9 @@ format_ikev2_sa (u8 * s, va_list * va)
s = format (s, "\n state: %s", stateNames[sa->state]);
}
+ s =
+ format (s, "\n uptime: %f (s)\n", vlib_time_now (vm) - sa->auth_timestamp);
+
s = format (s, "\n%U", format_white_space, indent);
s = format (s, "nonce i:%U\n%Ur:%U\n",
diff --git a/src/plugins/ikev2/ikev2_priv.h b/src/plugins/ikev2/ikev2_priv.h
index a11538f92c7..0639809e9b1 100644
--- a/src/plugins/ikev2/ikev2_priv.h
+++ b/src/plugins/ikev2/ikev2_priv.h
@@ -307,6 +307,8 @@ typedef struct
f64 time_to_expiration;
u8 is_expired;
i8 rekey_retries;
+
+ f64 timestamp;
} ikev2_child_sa_t;
typedef struct
@@ -488,6 +490,8 @@ typedef struct
u8 keys_generated;
ikev2_stats_t stats;
+
+ f64 auth_timestamp;
} ikev2_sa_t;
diff --git a/src/plugins/ikev2/ikev2_test.c b/src/plugins/ikev2/ikev2_test.c
index 18d01dc6ffb..93683a5b5dc 100644
--- a/src/plugins/ikev2/ikev2_test.c
+++ b/src/plugins/ikev2/ikev2_test.c
@@ -391,7 +391,7 @@ vl_api_ikev2_sa_details_t_handler (vl_api_ikev2_sa_details_t * mp)
ip_address_t iaddr;
ip_address_t raddr;
vl_api_ikev2_keys_t *k = &sa->keys;
- vl_api_ikev2_sa_t_endian (sa);
+ vl_api_ikev2_sa_t_endian (sa, 0 /* from network */);
ip_address_decode2 (&sa->iaddr, &iaddr);
ip_address_decode2 (&sa->raddr, &raddr);
@@ -461,7 +461,7 @@ vl_api_ikev2_sa_v2_details_t_handler (vl_api_ikev2_sa_v2_details_t *mp)
ip_address_t iaddr;
ip_address_t raddr;
vl_api_ikev2_keys_t *k = &sa->keys;
- vl_api_ikev2_sa_v2_t_endian (sa);
+ vl_api_ikev2_sa_v2_t_endian (sa, 0 /* from network */);
ip_address_decode2 (&sa->iaddr, &iaddr);
ip_address_decode2 (&sa->raddr, &raddr);
@@ -497,6 +497,76 @@ vl_api_ikev2_sa_v2_details_t_handler (vl_api_ikev2_sa_v2_details_t *mp)
}
static int
+api_ikev2_sa_v3_dump (vat_main_t *vam)
+{
+ ikev2_test_main_t *im = &ikev2_test_main;
+ vl_api_ikev2_sa_v3_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ /* Construct the API message */
+ M (IKEV2_SA_V3_DUMP, mp);
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ if (!im->ping_id)
+ im->ping_id = vl_msg_api_get_msg_index ((u8 *) (VL_API_CONTROL_PING_CRC));
+ mp_ping = vl_msg_api_alloc_as_if_client (sizeof (*mp_ping));
+ mp_ping->_vl_msg_id = htons (im->ping_id);
+ mp_ping->client_index = vam->my_client_index;
+ vam->result_ready = 0;
+
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ikev2_sa_v3_details_t_handler (vl_api_ikev2_sa_v3_details_t *mp)
+{
+ vat_main_t *vam = ikev2_test_main.vat_main;
+ vl_api_ikev2_sa_v3_t *sa = &mp->sa;
+ ip_address_t iaddr;
+ ip_address_t raddr;
+ vl_api_ikev2_keys_t *k = &sa->keys;
+ vl_api_ikev2_sa_v3_t_endian (sa, 0 /* from network */);
+
+ ip_address_decode2 (&sa->iaddr, &iaddr);
+ ip_address_decode2 (&sa->raddr, &raddr);
+
+ fformat (vam->ofp, "profile name %s sa index: %d\n", mp->sa.profile_name,
+ mp->sa.sa_index);
+ fformat (vam->ofp, " iip %U ispi %lx rip %U rspi %lx\n", format_ip_address,
+ &iaddr, sa->ispi, format_ip_address, &raddr, sa->rspi);
+ fformat (vam->ofp, " %U ", format_ikev2_sa_transform, &sa->encryption);
+ fformat (vam->ofp, "%U ", format_ikev2_sa_transform, &sa->prf);
+ fformat (vam->ofp, "%U ", format_ikev2_sa_transform, &sa->integrity);
+ fformat (vam->ofp, "%U \n", format_ikev2_sa_transform, &sa->dh);
+
+ fformat (vam->ofp, " SK_d %U\n", format_hex_bytes, k->sk_d, k->sk_d_len);
+
+ fformat (vam->ofp, " SK_a i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ai, k->sk_ai_len, format_hex_bytes, k->sk_ar, k->sk_ar_len);
+
+ fformat (vam->ofp, " SK_e i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ei, k->sk_ei_len, format_hex_bytes, k->sk_er, k->sk_er_len);
+
+ fformat (vam->ofp, " SK_p i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_pi, k->sk_pi_len, format_hex_bytes, k->sk_pr, k->sk_pr_len);
+
+ fformat (vam->ofp, " identifier (i) %U\n", format_ikev2_id_type_and_data,
+ &sa->i_id);
+ fformat (vam->ofp, " identifier (r) %U\n", format_ikev2_id_type_and_data,
+ &sa->r_id);
+
+ vam->result_ready = 1;
+}
+
+static int
api_ikev2_child_sa_dump (vat_main_t * vam)
{
unformat_input_t *i = vam->input;
@@ -549,7 +619,84 @@ vl_api_ikev2_child_sa_details_t_handler (vl_api_ikev2_child_sa_details_t * mp)
vat_main_t *vam = ikev2_test_main.vat_main;
vl_api_ikev2_child_sa_t *child_sa = &mp->child_sa;
vl_api_ikev2_keys_t *k = &child_sa->keys;
- vl_api_ikev2_child_sa_t_endian (child_sa);
+ vl_api_ikev2_child_sa_t_endian (child_sa, 0 /* from network */);
+
+ fformat (vam->ofp, " child sa %u:\n", child_sa->child_sa_index);
+
+ fformat (vam->ofp, " %U ", format_ikev2_sa_transform,
+ &child_sa->encryption);
+ fformat (vam->ofp, "%U ", format_ikev2_sa_transform, &child_sa->integrity);
+ fformat (vam->ofp, "%U \n", format_ikev2_sa_transform, &child_sa->esn);
+
+ fformat (vam->ofp, " spi(i) %lx spi(r) %lx\n", child_sa->i_spi,
+ child_sa->r_spi);
+
+ fformat (vam->ofp, " SK_e i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ei, k->sk_ei_len, format_hex_bytes, k->sk_er, k->sk_er_len);
+ if (k->sk_ai_len)
+ {
+ fformat (vam->ofp, " SK_a i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ai, k->sk_ai_len, format_hex_bytes, k->sk_ar,
+ k->sk_ar_len);
+ }
+ vam->result_ready = 1;
+}
+
+static int
+api_ikev2_child_sa_v2_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ ikev2_test_main_t *im = &ikev2_test_main;
+ vl_api_ikev2_child_sa_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+ u32 sa_index = ~0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sa_index %d", &sa_index))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sa_index == ~0)
+ return -99;
+
+ /* Construct the API message */
+ M (IKEV2_CHILD_SA_DUMP, mp);
+
+ mp->sa_index = clib_net_to_host_u32 (sa_index);
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ if (!im->ping_id)
+ im->ping_id = vl_msg_api_get_msg_index ((u8 *) (VL_API_CONTROL_PING_CRC));
+ mp_ping = vl_msg_api_alloc_as_if_client (sizeof (*mp_ping));
+ mp_ping->_vl_msg_id = htons (im->ping_id);
+ mp_ping->client_index = vam->my_client_index;
+ vam->result_ready = 0;
+
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ikev2_child_sa_v2_details_t_handler (
+ vl_api_ikev2_child_sa_details_t *mp)
+{
+ vat_main_t *vam = ikev2_test_main.vat_main;
+ vl_api_ikev2_child_sa_t *child_sa = &mp->child_sa;
+ vl_api_ikev2_keys_t *k = &child_sa->keys;
+ vl_api_ikev2_child_sa_t_endian (child_sa, 0 /* from network */);
fformat (vam->ofp, " child sa %u:\n", child_sa->child_sa_index);
@@ -637,7 +784,7 @@ static void
vat_main_t *vam = ikev2_test_main.vat_main;
vl_api_ikev2_ts_t *ts = &mp->ts;
ip_address_t start_addr, end_addr;
- vl_api_ikev2_ts_t_endian (ts);
+ vl_api_ikev2_ts_t_endian (ts, 0 /* from network */);
ip_address_decode2 (&ts->start_addr, &start_addr);
ip_address_decode2 (&ts->end_addr, &end_addr);
diff --git a/src/plugins/ikev2/ikev2_types.api b/src/plugins/ikev2/ikev2_types.api
index f0e50165501..2492611703d 100644
--- a/src/plugins/ikev2/ikev2_types.api
+++ b/src/plugins/ikev2/ikev2_types.api
@@ -128,6 +128,19 @@ typedef ikev2_child_sa
vl_api_ikev2_sa_transform_t esn;
};
+typedef ikev2_child_sa_v2
+{
+ u32 sa_index;
+ u32 child_sa_index;
+ u32 i_spi;
+ u32 r_spi;
+ vl_api_ikev2_keys_t keys;
+ vl_api_ikev2_sa_transform_t encryption;
+ vl_api_ikev2_sa_transform_t integrity;
+ vl_api_ikev2_sa_transform_t esn;
+ f64 uptime;
+};
+
typedef ikev2_sa_stats
{
u16 n_keepalives;
@@ -197,4 +210,30 @@ typedef ikev2_sa_v2
vl_api_ikev2_sa_transform_t dh;
vl_api_ikev2_sa_stats_t stats;
+};
+
+typedef ikev2_sa_v3
+{
+ u32 sa_index;
+ string profile_name[64];
+ vl_api_ikev2_state_t state;
+
+ u64 ispi;
+ u64 rspi;
+ vl_api_address_t iaddr;
+ vl_api_address_t raddr;
+
+ vl_api_ikev2_keys_t keys;
+
+ /* ID */
+ vl_api_ikev2_id_t i_id;
+ vl_api_ikev2_id_t r_id;
+
+ vl_api_ikev2_sa_transform_t encryption;
+ vl_api_ikev2_sa_transform_t integrity;
+ vl_api_ikev2_sa_transform_t prf;
+ vl_api_ikev2_sa_transform_t dh;
+
+ vl_api_ikev2_sa_stats_t stats;
+ f64 uptime;
}; \ No newline at end of file
diff --git a/src/plugins/lisp/lisp-cp/lisp_types.h b/src/plugins/lisp/lisp-cp/lisp_types.h
index 21bd72178d7..e92f8f80c70 100644
--- a/src/plugins/lisp/lisp-cp/lisp_types.h
+++ b/src/plugins/lisp/lisp-cp/lisp_types.h
@@ -198,7 +198,8 @@ u8 gid_address_len (gid_address_t * a);
void *gid_address_cast (gid_address_t * gid, gid_address_type_t type);
void gid_address_copy (gid_address_t * dst, gid_address_t * src);
u32 gid_address_parse (u8 * offset, gid_address_t * a);
-void gid_address_ip_set (gid_address_t * dst, void *src, u8 version);
+void gid_address_ip_set (gid_address_t *dst, void *src,
+ ip_address_family_t version);
#define gid_address_type(_a) (_a)->type
#define gid_address_ippref(_a) (_a)->ippref
diff --git a/src/plugins/marvell/pp2/cli.c b/src/plugins/marvell/pp2/cli.c
index f4ecb1873c9..5072a3c035b 100644
--- a/src/plugins/marvell/pp2/cli.c
+++ b/src/plugins/marvell/pp2/cli.c
@@ -31,7 +31,7 @@ mrvl_pp2_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
unformat_input_t _line_input, *line_input = &_line_input;
mrvl_pp2_create_if_args_t args = { 0 };
- uint val;
+ unsigned int val;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_api.c b/src/plugins/nat/nat44-ed/nat44_ed_api.c
index 1f01410afce..b6c9d51d777 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_api.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_api.c
@@ -442,7 +442,8 @@ send_nat44_ed_output_interface_details (u32 index, vl_api_registration_t *rp,
/* Endian hack until apigen registers _details
* endian functions */
- vl_api_nat44_ed_output_interface_details_t_endian (rmp);
+ vl_api_nat44_ed_output_interface_details_t_endian (rmp,
+ 1 /* to network */);
rmp->_vl_msg_id = htons (rmp->_vl_msg_id);
rmp->context = htonl (rmp->context);
}));
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_in2out.c b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c
index 4ba51bcaea6..9b4dac3b356 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_in2out.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c
@@ -523,6 +523,7 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
}
nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
+ nat_6t_flow_saddr_rewrite_set (&s->o2i, r_addr.as_u32);
if (nat_ed_alloc_addr_and_port (
sm, rx_fib_index, tx_sw_if_index, proto, thread_index, l_addr,
@@ -567,6 +568,7 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
}
nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
+ nat_6t_flow_saddr_rewrite_set (&s->o2i, r_addr.as_u32);
if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
{
nat_elog_notice (sm, "out2in key add failed");
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_api.c b/src/plugins/nat/nat44-ei/nat44_ei_api.c
index 8671a556929..454a5032c6a 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_api.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_api.c
@@ -751,7 +751,8 @@ send_nat44_ei_output_interface_details (u32 index, vl_api_registration_t *rp,
/* Endian hack until apigen registers _details
* endian functions */
- vl_api_nat44_ei_output_interface_details_t_endian (rmp);
+ vl_api_nat44_ei_output_interface_details_t_endian (rmp,
+ 1 /* to network */);
rmp->_vl_msg_id = htons (rmp->_vl_msg_id);
rmp->context = htonl (rmp->context);
}));
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c
index 01b333a5234..3b981d69986 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c
@@ -859,7 +859,7 @@ nat44_ei_icmp_in2out (vlib_buffer_t *b0, ip4_header_t *ip0,
nat44_ei_main_t *nm = &nat44_ei_main;
vlib_main_t *vm = vlib_get_main ();
ip4_address_t addr;
- u16 port;
+ u16 port = 0;
u32 fib_index;
nat_protocol_t proto;
icmp_echo_header_t *echo0, *inner_echo0 = 0;
diff --git a/src/plugins/nat/pnat/pnat_api.c b/src/plugins/nat/pnat/pnat_api.c
index 02e61219d1e..a4e7ff192bf 100644
--- a/src/plugins/nat/pnat/pnat_api.c
+++ b/src/plugins/nat/pnat/pnat_api.c
@@ -116,7 +116,8 @@ static void send_bindings_details(u32 index, vl_api_registration_t *rp,
/* Endian hack until apigen registers _details
* endian functions */
- vl_api_pnat_bindings_details_t_endian(rmp);
+ vl_api_pnat_bindings_details_t_endian(
+ rmp, 1 /* to network */);
rmp->_vl_msg_id = htons(rmp->_vl_msg_id);
rmp->context = htonl(rmp->context);
}));
@@ -158,7 +159,7 @@ static void send_interfaces_details(u32 index, vl_api_registration_t *rp,
/* Endian hack until apigen registers _details
* endian functions */
- vl_api_pnat_interfaces_details_t_endian(rmp);
+ vl_api_pnat_interfaces_details_t_endian(rmp, 1 /* to network */);
rmp->_vl_msg_id = htons(rmp->_vl_msg_id);
rmp->context = htonl(rmp->context);
}));
diff --git a/src/plugins/netmap/CMakeLists.txt b/src/plugins/netmap/CMakeLists.txt
new file mode 100644
index 00000000000..d53a9e0911a
--- /dev/null
+++ b/src/plugins/netmap/CMakeLists.txt
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (c) 2024 Tom Jones <thj@freebsd.org>
+#
+# This software was developed by Tom Jones <thj@freebsd.org> under sponsorship
+# from the FreeBSD Foundation.
+#
+
+if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD")
+ message(WARNING "Netmap is only currently support on FreeBSD - netmap plugin disabled")
+ return()
+endif()
+
+add_vpp_plugin(netmap
+ SOURCES
+ plugin.c
+ netmap.c
+ node.c
+ device.c
+ cli.c
+ netmap_api.c
+
+ MULTIARCH_SOURCES
+ node.c
+ device.c
+
+ INSTALL_HEADERS
+ netmap.h
+ net_netmap.h
+
+ API_FILES
+ netmap.api
+)
diff --git a/src/plugins/netmap/FEATURE.yaml b/src/plugins/netmap/FEATURE.yaml
new file mode 100644
index 00000000000..a9dfb2163e4
--- /dev/null
+++ b/src/plugins/netmap/FEATURE.yaml
@@ -0,0 +1,12 @@
+---
+name: Netmap Device
+maintainer: Tom Jones <thj@freebsd.org>
+features:
+ - L4 checksum offload
+description: "Create a netmap interface, which is a high speed user-space
+ interface that allows VPP to patch to a physical or virtual NIC
+ without the use of DPDK"
+missing:
+ - API dump
+state: production
+properties: [API, CLI, STATS, MULTITHREAD]
diff --git a/src/plugins/netmap/cli.c b/src/plugins/netmap/cli.c
new file mode 100644
index 00000000000..b54d397ecbe
--- /dev/null
+++ b/src/plugins/netmap/cli.c
@@ -0,0 +1,236 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <netmap/net_netmap.h>
+#include <netmap/netmap.h>
+
+static clib_error_t *
+netmap_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 *host_if_name = NULL;
+ u8 hwaddr[6];
+ u8 *hw_addr_ptr = 0;
+ int r;
+ u8 is_pipe = 0;
+ u8 is_master = 0;
+ u32 sw_if_index = ~0;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &host_if_name))
+ ;
+ else
+ if (unformat
+ (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr))
+ hw_addr_ptr = hwaddr;
+ else if (unformat (line_input, "pipe"))
+ is_pipe = 1;
+ else if (unformat (line_input, "master"))
+ is_master = 1;
+ else if (unformat (line_input, "slave"))
+ is_master = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (host_if_name == NULL)
+ {
+ error = clib_error_return (0, "missing host interface name");
+ goto done;
+ }
+
+ r =
+ netmap_create_if (vm, host_if_name, hw_addr_ptr, is_pipe, is_master,
+ &sw_if_index);
+
+ if (r == VNET_API_ERROR_SYSCALL_ERROR_1)
+ {
+ error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno);
+ goto done;
+ }
+
+ if (r == VNET_API_ERROR_INVALID_INTERFACE)
+ {
+ error = clib_error_return (0, "Invalid interface name");
+ goto done;
+ }
+
+ if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS)
+ {
+ error = clib_error_return (0, "Interface already exists");
+ goto done;
+ }
+
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * '<em>netmap</em>' is a framework for very fast packet I/O from userspace.
+ * '<em>VALE</em>' is an equally fast in-kernel software switch using the
+ * netmap API. '<em>netmap</em>' includes '<em>netmap pipes</em>', a shared
+ * memory packet transport channel. Together, they provide a high speed
+ * user-space interface that allows VPP to patch into a linux namespace, a
+ * linux container, or a physical NIC without the use of DPDK. Netmap/VALE
+ * generates the '<em>netmap.ko</em>' kernel module that needs to be loaded
+ * before netmap interfaces can be created.
+ * - https://github.com/luigirizzo/netmap - Netmap/VALE repo.
+ * - https://github.com/vpp-dev/netmap - VPP development package for Netmap/VALE,
+ * which is a snapshot of the Netmap/VALE repo with minor changes to work
+ * with containers and modified kernel drivers to work with NICs.
+ *
+ * Create a netmap interface that will attach to a linux interface.
+ * The interface must already exist. Once created, a new netmap interface
+ * will exist in VPP with the name '<em>netmap-<ifname></em>', where
+ * '<em><ifname></em>' takes one of two forms:
+ * - <b>ifname</b> - Linux interface to bind too.
+ * - <b>valeXXX:YYY</b> -
+ * - Where '<em>valeXXX</em>' is an arbitrary name for a VALE
+ * interface that must start with '<em>vale</em>' and is less
+ * than 16 characters.
+ * - Where '<em>YYY</em>' is an existing linux namespace.
+ *
+ * This command has the following optional parameters:
+ *
+ * - <b>hw-addr <mac-addr></b> - Optional ethernet address, can be in either
+ * X:X:X:X:X:X unix or X.X.X cisco format.
+ *
+ * - <b>pipe</b> - Optional flag to indicate that a '<em>netmap pipe</em>'
+ * instance should be created.
+ *
+ * - <b>master | slave</b> - Optional flag to indicate whether VPP should
+ * be the master or slave of the '<em>netmap pipe</em>'. Only considered
+ * if '<em>pipe</em>' is entered. Defaults to '<em>slave</em>' if not entered.
+ *
+ * @cliexpar
+ * Example of how to create a netmap interface tied to the linux
+ * namespace '<em>vpp1</em>':
+ * @cliexstart{create netmap name vale00:vpp1 hw-addr 02:FE:3F:34:15:9B pipe master}
+ * netmap-vale00:vpp1
+ * @cliexend
+ * Once the netmap interface is created, enable the interface using:
+ * @cliexcmd{set interface state netmap-vale00:vpp1 up}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (netmap_create_command, static) = {
+ .path = "create netmap",
+ .short_help = "create netmap name <ifname>|valeXXX:YYY "
+ "[hw-addr <mac-addr>] [pipe] [master|slave]",
+ .function = netmap_create_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+netmap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 *host_if_name = NULL;
+ clib_error_t *error = NULL;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &host_if_name))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (host_if_name == NULL)
+ {
+ error = clib_error_return (0, "missing host interface name");
+ goto done;
+ }
+
+ netmap_delete_if (vm, host_if_name);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Delete a netmap interface. Use the '<em><ifname></em>' to identify
+ * the netmap interface to be deleted. In VPP, netmap interfaces are
+ * named as '<em>netmap-<ifname></em>', where '<em><ifname></em>'
+ * takes one of two forms:
+ * - <b>ifname</b> - Linux interface to bind too.
+ * - <b>valeXXX:YYY</b> -
+ * - Where '<em>valeXXX</em>' is an arbitrary name for a VALE
+ * interface that must start with '<em>vale</em>' and is less
+ * than 16 characters.
+ * - Where '<em>YYY</em>' is an existing linux namespace.
+ *
+ * @cliexpar
+ * Example of how to delete a netmap interface named '<em>netmap-vale00:vpp1</em>':
+ * @cliexcmd{delete netmap name vale00:vpp1}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (netmap_delete_command, static) = {
+ .path = "delete netmap",
+ .short_help = "delete netmap name <ifname>|valeXXX:YYY",
+ .function = netmap_delete_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+netmap_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (netmap_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/netmap/device.c b/src/plugins/netmap/device.c
new file mode 100644
index 00000000000..505deb988c4
--- /dev/null
+++ b/src/plugins/netmap/device.c
@@ -0,0 +1,252 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <netmap/net_netmap.h>
+#include <netmap/netmap.h>
+
+#define foreach_netmap_tx_func_error \
+_(NO_FREE_SLOTS, "no free tx slots") \
+_(PENDING_MSGS, "pending msgs in tx ring")
+
+typedef enum
+{
+#define _(f,s) NETMAP_TX_ERROR_##f,
+ foreach_netmap_tx_func_error
+#undef _
+ NETMAP_TX_N_ERROR,
+} netmap_tx_func_error_t;
+
+static char *netmap_tx_func_error_strings[] = {
+#define _(n,s) s,
+ foreach_netmap_tx_func_error
+#undef _
+};
+
+
+static u8 *
+format_netmap_device_name (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ netmap_main_t *apm = &netmap_main;
+ netmap_if_t *nif = pool_elt_at_index (apm->interfaces, i);
+
+ s = format (s, "netmap-%s", nif->host_if_name);
+ return s;
+}
+
+static u8 *
+format_netmap_device (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ int verbose = va_arg (*args, int);
+ netmap_main_t *nm = &netmap_main;
+ netmap_if_t *nif = vec_elt_at_index (nm->interfaces, dev_instance);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "NETMAP interface");
+ if (verbose)
+ {
+ s = format (s, "\n%U version %d flags 0x%x"
+ "\n%U region %u memsize 0x%x offset 0x%x"
+ "\n%U tx_slots %u rx_slots %u tx_rings %u rx_rings %u",
+ format_white_space, indent + 2,
+ nif->req->nr_version,
+ nif->req->nr_flags,
+ format_white_space, indent + 2,
+ nif->mem_region,
+ nif->req->nr_memsize,
+ nif->req->nr_offset,
+ format_white_space, indent + 2,
+ nif->req->nr_tx_slots,
+ nif->req->nr_rx_slots,
+ nif->req->nr_tx_rings, nif->req->nr_rx_rings);
+ }
+ return s;
+}
+
+static u8 *
+format_netmap_tx_trace (u8 * s, va_list * args)
+{
+ s = format (s, "Unimplemented...");
+ return s;
+}
+
+VNET_DEVICE_CLASS_TX_FN (netmap_device_class) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ netmap_main_t *nm = &netmap_main;
+ u32 *buffers = vlib_frame_vector_args (frame);
+ u32 n_left = frame->n_vectors;
+ f64 const time_constant = 1e3;
+ vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
+ netmap_if_t *nif = pool_elt_at_index (nm->interfaces, rd->dev_instance);
+ int cur_ring;
+
+ clib_spinlock_lock_if_init (&nif->lockp);
+
+ cur_ring = nif->first_tx_ring;
+
+ while (n_left && cur_ring <= nif->last_tx_ring)
+ {
+ struct netmap_ring *ring = NETMAP_TXRING (nif->nifp, cur_ring);
+ int n_free_slots = nm_ring_space (ring);
+ uint cur = ring->cur;
+
+ if (nm_tx_pending (ring))
+ {
+ if (ioctl (nif->fd, NIOCTXSYNC, NULL) < 0)
+ clib_unix_warning ("NIOCTXSYNC");
+ clib_cpu_time_wait (time_constant);
+
+ if (nm_tx_pending (ring) && !n_free_slots)
+ {
+ cur_ring++;
+ continue;
+ }
+ }
+
+ while (n_left && n_free_slots)
+ {
+ vlib_buffer_t *b0 = 0;
+ u32 bi = buffers[0];
+ u32 len;
+ u32 offset = 0;
+ buffers++;
+
+ struct netmap_slot *slot = &ring->slot[cur];
+
+ do
+ {
+ b0 = vlib_get_buffer (vm, bi);
+ len = b0->current_length;
+ /* memcpy */
+ clib_memcpy_fast ((u8 *) NETMAP_BUF (ring, slot->buf_idx) +
+ offset, vlib_buffer_get_current (b0), len);
+ offset += len;
+ }
+ while ((bi = b0->next_buffer));
+
+ slot->len = offset;
+ cur = (cur + 1) % ring->num_slots;
+ n_free_slots--;
+ n_left--;
+ }
+ CLIB_MEMORY_BARRIER ();
+ ring->head = ring->cur = cur;
+ }
+
+ if (n_left < frame->n_vectors)
+ ioctl (nif->fd, NIOCTXSYNC, NULL);
+
+ clib_spinlock_unlock_if_init (&nif->lockp);
+
+ if (n_left)
+ vlib_error_count (vm, node->node_index,
+ (n_left ==
+ frame->n_vectors ? NETMAP_TX_ERROR_PENDING_MSGS :
+ NETMAP_TX_ERROR_NO_FREE_SLOTS), n_left);
+
+ vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
+ return frame->n_vectors;
+}
+
+static void
+netmap_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ netmap_main_t *apm = &netmap_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance);
+
+ /* Shut off redirection */
+ if (node_index == ~0)
+ {
+ nif->per_interface_next_index = node_index;
+ return;
+ }
+
+ nif->per_interface_next_index =
+ vlib_node_add_next (vlib_get_main (), netmap_input_node.index,
+ node_index);
+}
+
+static void
+netmap_clear_hw_interface_counters (u32 instance)
+{
+ /* Nothing for now */
+}
+
+static clib_error_t *
+netmap_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+{
+ netmap_main_t *apm = &netmap_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance);
+ u32 hw_flags;
+
+ nif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ if (nif->is_admin_up)
+ hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP;
+ else
+ hw_flags = 0;
+
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+ return 0;
+}
+
+static clib_error_t *
+netmap_subif_add_del_function (vnet_main_t * vnm,
+ u32 hw_if_index,
+ struct vnet_sw_interface_t *st, int is_add)
+{
+ /* Nothing for now */
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (netmap_device_class) = {
+ .name = "netmap",
+ .format_device_name = format_netmap_device_name,
+ .format_device = format_netmap_device,
+ .format_tx_trace = format_netmap_tx_trace,
+ .tx_function_n_errors = NETMAP_TX_N_ERROR,
+ .tx_function_error_strings = netmap_tx_func_error_strings,
+ .rx_redirect_to_node = netmap_set_interface_next_node,
+ .clear_counters = netmap_clear_hw_interface_counters,
+ .admin_up_down_function = netmap_interface_admin_up_down,
+ .subif_add_del_function = netmap_subif_add_del_function,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/netmap/net_netmap.h b/src/plugins/netmap/net_netmap.h
new file mode 100644
index 00000000000..ecccedd4484
--- /dev/null
+++ b/src/plugins/netmap/net_netmap.h
@@ -0,0 +1,650 @@
+/*
+ * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $
+ *
+ * Definitions of constants and the structures used by the netmap
+ * framework, for the part visible to both kernel and userspace.
+ * Detailed info on netmap is available with "man netmap" or at
+ *
+ * http://info.iet.unipi.it/~luigi/netmap/
+ *
+ * This API is also used to communicate with the VALE software switch
+ */
+
+#ifndef _NET_NETMAP_H_
+#define _NET_NETMAP_H_
+
+#define NETMAP_API 14 /* current API version */
+
+#define NETMAP_MIN_API 14 /* min and max versions accepted */
+#define NETMAP_MAX_API 15
+/*
+ * Some fields should be cache-aligned to reduce contention.
+ * The alignment is architecture and OS dependent, but rather than
+ * digging into OS headers to find the exact value we use an estimate
+ * that should cover most architectures.
+ */
+#define NM_CACHE_ALIGN 128
+
+/*
+ * --- Netmap data structures ---
+ *
+ * The userspace data structures used by netmap are shown below.
+ * They are allocated by the kernel and mmap()ed by userspace threads.
+ * Pointers are implemented as memory offsets or indexes,
+ * so that they can be easily dereferenced in kernel and userspace.
+
+ KERNEL (opaque, obviously)
+
+ ====================================================================
+ |
+ USERSPACE | struct netmap_ring
+ +---->+---------------+
+ / | head,cur,tail |
+ struct netmap_if (nifp, 1 per fd) / | buf_ofs |
+ +---------------+ / | other fields |
+ | ni_tx_rings | / +===============+
+ | ni_rx_rings | / | buf_idx, len | slot[0]
+ | | / | flags, ptr |
+ | | / +---------------+
+ +===============+ / | buf_idx, len | slot[1]
+ | txring_ofs[0] | (rel.to nifp)--' | flags, ptr |
+ | txring_ofs[1] | +---------------+
+ (tx+1 entries) (num_slots entries)
+ | txring_ofs[t] | | buf_idx, len | slot[n-1]
+ +---------------+ | flags, ptr |
+ | rxring_ofs[0] | +---------------+
+ | rxring_ofs[1] |
+ (rx+1 entries)
+ | rxring_ofs[r] |
+ +---------------+
+
+ * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to
+ * a file descriptor, the mmap()ed region contains a (logically readonly)
+ * struct netmap_if pointing to struct netmap_ring's.
+ *
+ * There is one netmap_ring per physical NIC ring, plus one tx/rx ring
+ * pair attached to the host stack (this pair is unused for non-NIC ports).
+ *
+ * All physical/host stack ports share the same memory region,
+ * so that zero-copy can be implemented between them.
+ * VALE switch ports instead have separate memory regions.
+ *
+ * The netmap_ring is the userspace-visible replica of the NIC ring.
+ * Each slot has the index of a buffer (MTU-sized and residing in the
+ * mmapped region), its length and some flags. An extra 64-bit pointer
+ * is provided for user-supplied buffers in the tx path.
+ *
+ * In user space, the buffer address is computed as
+ * (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE
+ *
+ * Added in NETMAP_API 11:
+ *
+ * + NIOCREGIF can request the allocation of extra spare buffers from
+ * the same memory pool. The desired number of buffers must be in
+ * nr_arg3. The ioctl may return fewer buffers, depending on memory
+ * availability. nr_arg3 will return the actual value, and, once
+ * mapped, nifp->ni_bufs_head will be the index of the first buffer.
+ *
+ * The buffers are linked to each other using the first uint32_t
+ * as the index. On close, ni_bufs_head must point to the list of
+ * buffers to be released.
+ *
+ * + NIOCREGIF can request space for extra rings (and buffers)
+ * allocated in the same memory space. The number of extra rings
+ * is in nr_arg1, and is advisory. This is a no-op on NICs where
+ * the size of the memory space is fixed.
+ *
+ * + NIOCREGIF can attach to PIPE rings sharing the same memory
+ * space with a parent device. The ifname indicates the parent device,
+ * which must already exist. Flags in nr_flags indicate if we want to
+ * bind the master or slave side, the index (from nr_ringid)
+ * is just a cookie and does not need to be sequential.
+ *
+ * + NIOCREGIF can also attach to 'monitor' rings that replicate
+ * the content of specific rings, also from the same memory space.
+ *
+ * Extra flags in nr_flags support the above functions.
+ * Application libraries may use the following naming scheme:
+ * netmap:foo all NIC ring pairs
+ * netmap:foo^ only host ring pair
+ * netmap:foo+ all NIC ring + host ring pairs
+ * netmap:foo-k the k-th NIC ring pair
+ * netmap:foo{k PIPE ring pair k, master side
+ * netmap:foo}k PIPE ring pair k, slave side
+ */
+
+/*
+ * struct netmap_slot is a buffer descriptor
+ */
+struct netmap_slot {
+ uint32_t buf_idx; /* buffer index */
+ uint16_t len; /* length for this slot */
+ uint16_t flags; /* buf changed, etc. */
+ uint64_t ptr; /* pointer for indirect buffers */
+};
+
+/*
+ * The following flags control how the slot is used
+ */
+
+#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */
+ /*
+ * must be set whenever buf_idx is changed (as it might be
+ * necessary to recompute the physical address and mapping)
+ *
+ * It is also set by the kernel whenever the buf_idx is
+ * changed internally (e.g., by pipes). Applications may
+ * use this information to know when they can reuse the
+ * contents of previously prepared buffers.
+ */
+
+#define NS_REPORT 0x0002 /* ask the hardware to report results */
+ /*
+ * Request notification when slot is used by the hardware.
+ * Normally transmit completions are handled lazily and
+ * may be unreported. This flag lets us know when a slot
+ * has been sent (e.g. to terminate the sender).
+ */
+
+#define NS_FORWARD 0x0004 /* pass packet 'forward' */
+ /*
+ * (Only for physical ports, rx rings with NR_FORWARD set).
+ * Slot released to the kernel (i.e. before ring->head) with
+ * this flag set are passed to the peer ring (host/NIC),
+ * thus restoring the host-NIC connection for these slots.
+ * This supports efficient traffic monitoring or firewalling.
+ */
+
+#define NS_NO_LEARN 0x0008 /* disable bridge learning */
+ /*
+ * On a VALE switch, do not 'learn' the source port for
+ * this buffer.
+ */
+
+#define NS_INDIRECT 0x0010 /* userspace buffer */
+ /*
+ * (VALE tx rings only) data is in a userspace buffer,
+ * whose address is in the 'ptr' field in the slot.
+ */
+
+#define NS_MOREFRAG 0x0020 /* packet has more fragments */
+ /*
+ * (VALE ports only)
+ * Set on all but the last slot of a multi-segment packet.
+ * The 'len' field refers to the individual fragment.
+ */
+
+#define NS_PORT_SHIFT 8
+#define NS_PORT_MASK (0xff << NS_PORT_SHIFT)
+ /*
+ * The high 8 bits of the flag, if not zero, indicate the
+ * destination port for the VALE switch, overriding
+ * the lookup table.
+ */
+
+#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff)
+ /*
+ * (VALE rx rings only) the high 8 bits
+ * are the number of fragments.
+ */
+
+
+/*
+ * struct netmap_ring
+ *
+ * Netmap representation of a TX or RX ring (also known as "queue").
+ * This is a queue implemented as a fixed-size circular array.
+ * At the software level the important fields are: head, cur, tail.
+ *
+ * In TX rings:
+ *
+ * head first slot available for transmission.
+ * cur wakeup point. select() and poll() will unblock
+ * when 'tail' moves past 'cur'
+ * tail (readonly) first slot reserved to the kernel
+ *
+ * [head .. tail-1] can be used for new packets to send;
+ * 'head' and 'cur' must be incremented as slots are filled
+ * with new packets to be sent;
+ * 'cur' can be moved further ahead if we need more space
+ * for new transmissions. XXX todo (2014-03-12)
+ *
+ * In RX rings:
+ *
+ * head first valid received packet
+ * cur wakeup point. select() and poll() will unblock
+ * when 'tail' moves past 'cur'
+ * tail (readonly) first slot reserved to the kernel
+ *
+ * [head .. tail-1] contain received packets;
+ * 'head' and 'cur' must be incremented as slots are consumed
+ * and can be returned to the kernel;
+ * 'cur' can be moved further ahead if we want to wait for
+ * new packets without returning the previous ones.
+ *
+ * DATA OWNERSHIP/LOCKING:
+ * The netmap_ring, and all slots and buffers in the range
+ * [head .. tail-1] are owned by the user program;
+ * the kernel only accesses them during a netmap system call
+ * and in the user thread context.
+ *
+ * Other slots and buffers are reserved for use by the kernel
+ */
+struct netmap_ring {
+ /*
+ * buf_ofs is meant to be used through macros.
+ * It contains the offset of the buffer region from this
+ * descriptor.
+ */
+ const int64_t buf_ofs;
+ const uint32_t num_slots; /* number of slots in the ring. */
+ const uint32_t nr_buf_size;
+ const uint16_t ringid;
+ const uint16_t dir; /* 0: tx, 1: rx */
+
+ uint32_t head; /* (u) first user slot */
+ uint32_t cur; /* (u) wakeup point */
+ uint32_t tail; /* (k) first kernel slot */
+
+ uint32_t flags;
+
+ struct timeval ts; /* (k) time of last *sync() */
+
+ /* opaque room for a mutex or similar object */
+#if !defined(_WIN32) || defined(__CYGWIN__)
+ uint8_t __attribute__((__aligned__(NM_CACHE_ALIGN))) sem[128];
+#else
+ uint8_t __declspec(align(NM_CACHE_ALIGN)) sem[128];
+#endif
+
+ /* the slots follow. This struct has variable size */
+ struct netmap_slot slot[0]; /* array of slots. */
+};
+
+
+/*
+ * RING FLAGS
+ */
+#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */
+ /*
+ * updates the 'ts' field on each netmap syscall. This saves
+ * saves a separate gettimeofday(), and is not much worse than
+ * software timestamps generated in the interrupt handler.
+ */
+
+#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */
+ /*
+ * Enables the NS_FORWARD slot flag for the ring.
+ */
+
+
+/*
+ * Netmap representation of an interface and its queue(s).
+ * This is initialized by the kernel when binding a file
+ * descriptor to a port, and should be considered as readonly
+ * by user programs. The kernel never uses it.
+ *
+ * There is one netmap_if for each file descriptor on which we want
+ * to select/poll.
+ * select/poll operates on one or all pairs depending on the value of
+ * nmr_queueid passed on the ioctl.
+ */
+struct netmap_if {
+ char ni_name[IFNAMSIZ]; /* name of the interface. */
+ const uint32_t ni_version; /* API version, currently unused */
+ const uint32_t ni_flags; /* properties */
+#define NI_PRIV_MEM 0x1 /* private memory region */
+
+ /*
+ * The number of packet rings available in netmap mode.
+ * Physical NICs can have different numbers of tx and rx rings.
+ * Physical NICs also have a 'host' ring pair.
+ * Additionally, clients can request additional ring pairs to
+ * be used for internal communication.
+ */
+ const uint32_t ni_tx_rings; /* number of HW tx rings */
+ const uint32_t ni_rx_rings; /* number of HW rx rings */
+
+ uint32_t ni_bufs_head; /* head index for extra bufs */
+ uint32_t ni_spare1[5];
+ /*
+ * The following array contains the offset of each netmap ring
+ * from this structure, in the following order:
+ * NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings;
+ * NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings.
+ *
+ * The area is filled up by the kernel on NIOCREGIF,
+ * and then only read by userspace code.
+ */
+ const ssize_t ring_ofs[0];
+};
+
+
+#ifndef NIOCREGIF
+/*
+ * ioctl names and related fields
+ *
+ * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues,
+ * whose identity is set in NIOCREGIF through nr_ringid.
+ * These are non blocking and take no argument.
+ *
+ * NIOCGINFO takes a struct ifreq, the interface name is the input,
+ * the outputs are number of queues and number of descriptor
+ * for each queue (useful to set number of threads etc.).
+ * The info returned is only advisory and may change before
+ * the interface is bound to a file descriptor.
+ *
+ * NIOCREGIF takes an interface name within a struct nmre,
+ * and activates netmap mode on the interface (if possible).
+ *
+ * The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we
+ * can pass it down to other NIC-related ioctls.
+ *
+ * The actual argument (struct nmreq) has a number of options to request
+ * different functions.
+ * The following are used in NIOCREGIF when nr_cmd == 0:
+ *
+ * nr_name (in)
+ * The name of the port (em0, valeXXX:YYY, etc.)
+ * limited to IFNAMSIZ for backward compatibility.
+ *
+ * nr_version (in/out)
+ * Must match NETMAP_API as used in the kernel, error otherwise.
+ * Always returns the desired value on output.
+ *
+ * nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out)
+ * On input, non-zero values may be used to reconfigure the port
+ * according to the requested values, but this is not guaranteed.
+ * On output the actual values in use are reported.
+ *
+ * nr_ringid (in)
+ * Indicates how rings should be bound to the file descriptors.
+ * If nr_flags != 0, then the low bits (in NETMAP_RING_MASK)
+ * are used to indicate the ring number, and nr_flags specifies
+ * the actual rings to bind. NETMAP_NO_TX_POLL is unaffected.
+ *
+ * NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED:
+ * If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control
+ * the binding as follows:
+ * 0 (default) binds all physical rings
+ * NETMAP_HW_RING | ring number binds a single ring pair
+ * NETMAP_SW_RING binds only the host tx/rx rings
+ *
+ * NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push
+ * packets on tx rings only if POLLOUT is set.
+ * The default is to push any pending packet.
+ *
+ * NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release
+ * packets on rx rings also when POLLIN is NOT set.
+ * The default is to touch the rx ring only with POLLIN.
+ * Note that this is the opposite of TX because it
+ * reflects the common usage.
+ *
+ * NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead.
+ * NETMAP_PRIV_MEM is set on return for ports that do not use
+ * the global memory allocator.
+ * This information is not significant and applications
+ * should look at the region id in nr_arg2
+ *
+ * nr_flags is the recommended mode to indicate which rings should
+ * be bound to a file descriptor. Values are NR_REG_*
+ *
+ * nr_arg1 (in) The number of extra rings to be reserved.
+ * Especially when allocating a VALE port the system only
+ * allocates the amount of memory needed for the port.
+ * If more shared memory rings are desired (e.g. for pipes),
+ * the first invocation for the same basename/allocator
+ * should specify a suitable number. Memory cannot be
+ * extended after the first allocation without closing
+ * all ports on the same region.
+ *
+ * nr_arg2 (in/out) The identity of the memory region used.
+ * On input, 0 means the system decides autonomously,
+ * other values may try to select a specific region.
+ * On return the actual value is reported.
+ * Region '1' is the global allocator, normally shared
+ * by all interfaces. Other values are private regions.
+ * If two ports the same region zero-copy is possible.
+ *
+ * nr_arg3 (in/out) number of extra buffers to be allocated.
+ *
+ *
+ *
+ * nr_cmd (in) if non-zero indicates a special command:
+ * NETMAP_BDG_ATTACH and nr_name = vale*:ifname
+ * attaches the NIC to the switch; nr_ringid specifies
+ * which rings to use. Used by vale-ctl -a ...
+ * nr_arg1 = NETMAP_BDG_HOST also attaches the host port
+ * as in vale-ctl -h ...
+ *
+ * NETMAP_BDG_DETACH and nr_name = vale*:ifname
+ * disconnects a previously attached NIC.
+ * Used by vale-ctl -d ...
+ *
+ * NETMAP_BDG_LIST
+ * list the configuration of VALE switches.
+ *
+ * NETMAP_BDG_VNET_HDR
+ * Set the virtio-net header length used by the client
+ * of a VALE switch port.
+ *
+ * NETMAP_BDG_NEWIF
+ * create a persistent VALE port with name nr_name.
+ * Used by vale-ctl -n ...
+ *
+ * NETMAP_BDG_DELIF
+ * delete a persistent VALE port. Used by vale-ctl -d ...
+ *
+ * nr_arg1, nr_arg2, nr_arg3 (in/out) command specific
+ *
+ *
+ *
+ */
+
+
+/*
+ * struct nmreq overlays a struct ifreq (just the name)
+ */
+struct nmreq {
+ char nr_name[IFNAMSIZ];
+ uint32_t nr_version; /* API version */
+ uint32_t nr_offset; /* nifp offset in the shared region */
+ uint32_t nr_memsize; /* size of the shared region */
+ uint32_t nr_tx_slots; /* slots in tx rings */
+ uint32_t nr_rx_slots; /* slots in rx rings */
+ uint16_t nr_tx_rings; /* number of tx rings */
+ uint16_t nr_rx_rings; /* number of rx rings */
+
+ uint16_t nr_ringid; /* ring(s) we care about */
+#define NETMAP_HW_RING 0x4000 /* single NIC ring pair */
+#define NETMAP_SW_RING 0x2000 /* only host ring pair */
+
+#define NETMAP_RING_MASK 0x0fff /* the ring number */
+
+#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */
+
+#define NETMAP_DO_RX_POLL 0x8000 /* DO automatic rxsync on poll */
+
+ uint16_t nr_cmd;
+#define NETMAP_BDG_ATTACH 1 /* attach the NIC */
+#define NETMAP_BDG_DETACH 2 /* detach the NIC */
+#define NETMAP_BDG_REGOPS 3 /* register bridge callbacks */
+#define NETMAP_BDG_LIST 4 /* get bridge's info */
+#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */
+#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */
+#define NETMAP_BDG_NEWIF 6 /* create a virtual port */
+#define NETMAP_BDG_DELIF 7 /* destroy a virtual port */
+#define NETMAP_PT_HOST_CREATE 8 /* create ptnetmap kthreads */
+#define NETMAP_PT_HOST_DELETE 9 /* delete ptnetmap kthreads */
+#define NETMAP_BDG_POLLING_ON 10 /* delete polling kthread */
+#define NETMAP_BDG_POLLING_OFF 11 /* delete polling kthread */
+#define NETMAP_VNET_HDR_GET 12 /* get the port virtio-net-hdr length */
+ uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */
+#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */
+
+ uint16_t nr_arg2;
+ uint32_t nr_arg3; /* req. extra buffers in NIOCREGIF */
+ uint32_t nr_flags;
+ /* various modes, extends nr_ringid */
+ uint32_t spare2[1];
+};
+
+#define NR_REG_MASK 0xf /* values for nr_flags */
+enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */
+ NR_REG_ALL_NIC = 1,
+ NR_REG_SW = 2,
+ NR_REG_NIC_SW = 3,
+ NR_REG_ONE_NIC = 4,
+ NR_REG_PIPE_MASTER = 5,
+ NR_REG_PIPE_SLAVE = 6,
+};
+/* monitor uses the NR_REG to select the rings to monitor */
+#define NR_MONITOR_TX 0x100
+#define NR_MONITOR_RX 0x200
+#define NR_ZCOPY_MON 0x400
+/* request exclusive access to the selected rings */
+#define NR_EXCLUSIVE 0x800
+/* request ptnetmap host support */
+#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */
+#define NR_PTNETMAP_HOST 0x1000
+#define NR_RX_RINGS_ONLY 0x2000
+#define NR_TX_RINGS_ONLY 0x4000
+/* Applications set this flag if they are able to deal with virtio-net headers,
+ * that is send/receive frames that start with a virtio-net header.
+ * If not set, NIOCREGIF will fail with netmap ports that require applications
+ * to use those headers. If the flag is set, the application can use the
+ * NETMAP_VNET_HDR_GET command to figure out the header length. */
+#define NR_ACCEPT_VNET_HDR 0x8000
+
+
+/*
+ * Windows does not have _IOWR(). _IO(), _IOW() and _IOR() are defined
+ * in ws2def.h but not sure if they are in the form we need.
+ * XXX so we redefine them
+ * in a convenient way to use for DeviceIoControl signatures
+ */
+#ifdef _WIN32
+#undef _IO // ws2def.h
+#define _WIN_NM_IOCTL_TYPE 40000
+#define _IO(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \
+ METHOD_BUFFERED, FILE_ANY_ACCESS )
+#define _IO_direct(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \
+ METHOD_OUT_DIRECT, FILE_ANY_ACCESS )
+
+#define _IOWR(_c, _n, _s) _IO(_c, _n)
+
+/* We havesome internal sysctl in addition to the externally visible ones */
+#define NETMAP_MMAP _IO_direct('i', 160) // note METHOD_OUT_DIRECT
+#define NETMAP_POLL _IO('i', 162)
+
+/* and also two setsockopt for sysctl emulation */
+#define NETMAP_SETSOCKOPT _IO('i', 140)
+#define NETMAP_GETSOCKOPT _IO('i', 141)
+
+
+//These linknames are for the Netmap Core Driver
+#define NETMAP_NT_DEVICE_NAME L"\\Device\\NETMAP"
+#define NETMAP_DOS_DEVICE_NAME L"\\DosDevices\\netmap"
+
+//Definition of a structure used to pass a virtual address within an IOCTL
+typedef struct _MEMORY_ENTRY {
+ PVOID pUsermodeVirtualAddress;
+} MEMORY_ENTRY, *PMEMORY_ENTRY;
+
+typedef struct _POLL_REQUEST_DATA {
+ int events;
+ int timeout;
+ int revents;
+} POLL_REQUEST_DATA;
+
+#endif /* _WIN32 */
+
+/*
+ * FreeBSD uses the size value embedded in the _IOWR to determine
+ * how much to copy in/out. So we need it to match the actual
+ * data structure we pass. We put some spares in the structure
+ * to ease compatibility with other versions
+ */
+#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */
+#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */
+#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */
+#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */
+#define NIOCCONFIG _IOWR('i',150, struct nm_ifreq) /* for ext. modules */
+#endif /* !NIOCREGIF */
+
+
+/*
+ * Helper functions for kernel and userspace
+ */
+
+/*
+ * check if space is available in the ring.
+ */
+static inline int
+nm_ring_empty(struct netmap_ring *ring)
+{
+ return (ring->cur == ring->tail);
+}
+
+/*
+ * Opaque structure that is passed to an external kernel
+ * module via ioctl(fd, NIOCCONFIG, req) for a user-owned
+ * bridge port (at this point ephemeral VALE interface).
+ */
+#define NM_IFRDATA_LEN 256
+struct nm_ifreq {
+ char nifr_name[IFNAMSIZ];
+ char data[NM_IFRDATA_LEN];
+};
+
+/*
+ * netmap kernel thread configuration
+ */
+/* bhyve/vmm.ko MSIX parameters for IOCTL */
+struct ptn_vmm_ioctl_msix {
+ uint64_t msg;
+ uint64_t addr;
+};
+
+/* IOCTL parameters */
+struct nm_kth_ioctl {
+ u_long com;
+ /* TODO: use union */
+ union {
+ struct ptn_vmm_ioctl_msix msix;
+ } data;
+};
+
+/* Configuration of a ptnetmap ring */
+struct ptnet_ring_cfg {
+ uint64_t ioeventfd; /* eventfd in linux, tsleep() parameter in FreeBSD */
+ uint64_t irqfd; /* eventfd in linux, ioctl fd in FreeBSD */
+ struct nm_kth_ioctl ioctl; /* ioctl parameter to send irq (only used in bhyve/FreeBSD) */
+};
+#endif /* _NET_NETMAP_H_ */
diff --git a/src/plugins/netmap/netmap.api b/src/plugins/netmap/netmap.api
new file mode 100644
index 00000000000..a14753cad9c
--- /dev/null
+++ b/src/plugins/netmap/netmap.api
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "1.0.0";
+
+/** \brief Create netmap
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param netmap_if_name - interface name
+ @param hw_addr - interface MAC
+ @param use_random_hw_addr - use random generated MAC
+ @param is_pipe - is pipe
+ @param is_master - 0=slave, 1=master
+*/
+autoreply define netmap_create
+{
+ u32 client_index;
+ u32 context;
+
+ u8 netmap_if_name[64];
+ u8 hw_addr[6];
+ u8 use_random_hw_addr;
+ u8 is_pipe;
+ u8 is_master;
+};
+
+/** \brief Delete netmap
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param netmap_if_name - interface name
+*/
+autoreply define netmap_delete
+{
+ u32 client_index;
+ u32 context;
+
+ u8 netmap_if_name[64];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/netmap/netmap.c b/src/plugins/netmap/netmap.c
new file mode 100644
index 00000000000..ebef215eb3b
--- /dev/null
+++ b/src/plugins/netmap/netmap.c
@@ -0,0 +1,334 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <netmap/net_netmap.h>
+#include <netmap/netmap.h>
+#include <netmap/netmap.api_enum.h>
+#include <netmap/netmap.api_types.h>
+
+netmap_main_t netmap_main;
+
+static clib_error_t *
+netmap_fd_read_ready (clib_file_t * uf)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ netmap_main_t *nm = &netmap_main;
+ u32 idx = uf->private_data;
+
+ nm->pending_input_bitmap =
+ clib_bitmap_set (nm->pending_input_bitmap, idx, 1);
+
+ /* Schedule the rx node */
+ vlib_node_set_interrupt_pending (vm, netmap_input_node.index);
+
+ return 0;
+}
+
+static void
+close_netmap_if (netmap_main_t * nm, netmap_if_t * nif)
+{
+ if (nif->clib_file_index != ~0)
+ {
+ clib_file_del (&file_main, file_main.file_pool + nif->clib_file_index);
+ nif->clib_file_index = ~0;
+ }
+ else if (nif->fd > -1)
+ close (nif->fd);
+
+ if (nif->mem_region)
+ {
+ netmap_mem_region_t *reg = &nm->mem_regions[nif->mem_region];
+ if (--reg->refcnt == 0)
+ {
+ munmap (reg->mem, reg->region_size);
+ reg->region_size = 0;
+ }
+ }
+
+
+ mhash_unset (&nm->if_index_by_host_if_name, nif->host_if_name,
+ &nif->if_index);
+ vec_free (nif->host_if_name);
+ vec_free (nif->req);
+
+ clib_memset (nif, 0, sizeof (*nif));
+ pool_put (nm->interfaces, nif);
+}
+
+int
+netmap_worker_thread_enable ()
+{
+ /* if worker threads are enabled, switch to polling mode */
+ foreach_vlib_main ()
+ {
+ vlib_node_set_state (this_vlib_main, netmap_input_node.index,
+ VLIB_NODE_STATE_POLLING);
+ }
+
+ return 0;
+}
+
+int
+netmap_worker_thread_disable ()
+{
+ foreach_vlib_main ()
+ {
+ vlib_node_set_state (this_vlib_main, netmap_input_node.index,
+ VLIB_NODE_STATE_INTERRUPT);
+ }
+
+ return 0;
+}
+
+int
+netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set,
+ u8 is_pipe, u8 is_master, u32 * sw_if_index)
+{
+ netmap_main_t *nm = &netmap_main;
+ int ret = 0;
+ uint32_t nr_reg;
+ netmap_if_t *nif = 0;
+ u8 hw_addr[6];
+ vnet_sw_interface_t *sw;
+ vnet_main_t *vnm = vnet_get_main ();
+ uword *p;
+ struct nmreq *req = 0;
+ netmap_mem_region_t *reg;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ int fd;
+
+ p = mhash_get (&nm->if_index_by_host_if_name, if_name);
+ if (p)
+ return VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+
+ fd = open ("/dev/netmap", O_RDWR);
+ if (fd < 0)
+ return VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+
+ pool_get (nm->interfaces, nif);
+ nif->if_index = nif - nm->interfaces;
+ nif->fd = fd;
+ nif->clib_file_index = ~0;
+
+ vec_validate (req, 0);
+ nif->req = req;
+ req->nr_version = NETMAP_API;
+ req->nr_flags = NR_REG_ALL_NIC;
+
+ if (is_pipe)
+ req->nr_flags = is_master ? NR_REG_PIPE_MASTER : NR_REG_PIPE_SLAVE;
+ else
+ req->nr_flags = NR_REG_ALL_NIC;
+
+ req->nr_flags |= NR_ACCEPT_VNET_HDR;
+ snprintf (req->nr_name, IFNAMSIZ, "%s", if_name);
+ req->nr_name[IFNAMSIZ - 1] = 0;
+
+ if (ioctl (nif->fd, NIOCREGIF, req))
+ {
+ ret = VNET_API_ERROR_NOT_CONNECTED;
+ goto error;
+ }
+
+ nif->mem_region = req->nr_arg2;
+ vec_validate (nm->mem_regions, nif->mem_region);
+ reg = &nm->mem_regions[nif->mem_region];
+ if (reg->region_size == 0)
+ {
+ reg->mem = mmap (NULL, req->nr_memsize, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ clib_warning ("mem %p", reg->mem);
+ if (reg->mem == MAP_FAILED)
+ {
+ ret = VNET_API_ERROR_NOT_CONNECTED;
+ goto error;
+ }
+ reg->region_size = req->nr_memsize;
+ }
+ reg->refcnt++;
+
+ nif->nifp = NETMAP_IF (reg->mem, req->nr_offset);
+ nr_reg = nif->req->nr_flags & NR_REG_MASK;
+
+ if (nr_reg == NR_REG_SW)
+ { /* host stack */
+ nif->first_tx_ring = nif->last_tx_ring = nif->req->nr_tx_rings;
+ nif->first_rx_ring = nif->last_rx_ring = nif->req->nr_rx_rings;
+ }
+ else if (nr_reg == NR_REG_ALL_NIC)
+ { /* only nic */
+ nif->first_tx_ring = 0;
+ nif->first_rx_ring = 0;
+ nif->last_tx_ring = nif->req->nr_tx_rings - 1;
+ nif->last_rx_ring = nif->req->nr_rx_rings - 1;
+ }
+ else if (nr_reg == NR_REG_NIC_SW)
+ {
+ nif->first_tx_ring = 0;
+ nif->first_rx_ring = 0;
+ nif->last_tx_ring = nif->req->nr_tx_rings;
+ nif->last_rx_ring = nif->req->nr_rx_rings;
+ }
+ else if (nr_reg == NR_REG_ONE_NIC)
+ {
+ /* XXX check validity */
+ nif->first_tx_ring = nif->last_tx_ring = nif->first_rx_ring =
+ nif->last_rx_ring = nif->req->nr_ringid & NETMAP_RING_MASK;
+ }
+ else
+ { /* pipes */
+ nif->first_tx_ring = nif->last_tx_ring = 0;
+ nif->first_rx_ring = nif->last_rx_ring = 0;
+ }
+
+ nif->host_if_name = if_name;
+ nif->per_interface_next_index = ~0;
+
+ if (tm->n_vlib_mains > 1)
+ clib_spinlock_init (&nif->lockp);
+
+ {
+ clib_file_t template = { 0 };
+ template.read_function = netmap_fd_read_ready;
+ template.file_descriptor = nif->fd;
+ template.private_data = nif->if_index;
+ template.description = format (0, "netmap socket");
+ nif->clib_file_index = clib_file_add (&file_main, &template);
+ }
+
+ /*use configured or generate random MAC address */
+ if (hw_addr_set)
+ memcpy (hw_addr, hw_addr_set, 6);
+ else
+ {
+ f64 now = vlib_time_now (vm);
+ u32 rnd;
+ rnd = (u32) (now * 1e6);
+ rnd = random_u32 (&rnd);
+
+ memcpy (hw_addr + 2, &rnd, sizeof (rnd));
+ hw_addr[0] = 2;
+ hw_addr[1] = 0xfe;
+ }
+
+ vnet_eth_interface_registration_t eir = {};
+
+ eir.dev_class_index = netmap_device_class.index;
+ eir.dev_instance = nif->if_index;
+ eir.address = hw_addr;
+ eir.cb.set_max_frame_size = NULL;
+
+ nif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
+
+ sw = vnet_get_hw_sw_interface (vnm, nif->hw_if_index);
+ nif->sw_if_index = sw->sw_if_index;
+
+ mhash_set_mem (&nm->if_index_by_host_if_name, if_name, &nif->if_index, 0);
+
+ if (sw_if_index)
+ *sw_if_index = nif->sw_if_index;
+
+ if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 1)
+ netmap_worker_thread_enable ();
+
+ return 0;
+
+error:
+ close_netmap_if (nm, nif);
+ return ret;
+}
+
+int
+netmap_delete_if (vlib_main_t * vm, u8 * host_if_name)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ netmap_main_t *nm = &netmap_main;
+ netmap_if_t *nif;
+ uword *p;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ p = mhash_get (&nm->if_index_by_host_if_name, host_if_name);
+ if (p == NULL)
+ {
+ clib_warning ("Host interface %s does not exist", host_if_name);
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+ nif = pool_elt_at_index (nm->interfaces, p[0]);
+
+ /* bring down the interface */
+ vnet_hw_interface_set_flags (vnm, nif->hw_if_index, 0);
+
+ ethernet_delete_interface (vnm, nif->hw_if_index);
+
+ close_netmap_if (nm, nif);
+
+ if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 0)
+ netmap_worker_thread_disable ();
+
+ return 0;
+}
+
+static clib_error_t *
+netmap_init (vlib_main_t * vm)
+{
+ netmap_main_t *nm = &netmap_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_thread_registration_t *tr;
+ uword *p;
+
+ clib_memset (nm, 0, sizeof (netmap_main_t));
+
+ nm->input_cpu_first_index = 0;
+ nm->input_cpu_count = 1;
+
+ /* find out which cpus will be used for input */
+ p = hash_get_mem (tm->thread_registrations_by_name, "workers");
+ tr = p ? (vlib_thread_registration_t *) p[0] : 0;
+
+ if (tr && tr->count > 0)
+ {
+ nm->input_cpu_first_index = tr->first_index;
+ nm->input_cpu_count = tr->count;
+ }
+
+ mhash_init_vec_string (&nm->if_index_by_host_if_name, sizeof (uword));
+
+ vec_validate_aligned (nm->rx_buffers, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (netmap_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/netmap/netmap.h b/src/plugins/netmap/netmap.h
new file mode 100644
index 00000000000..29f855fda8e
--- /dev/null
+++ b/src/plugins/netmap/netmap.h
@@ -0,0 +1,166 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+/*
+ * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <vppinfra/lock.h>
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ clib_spinlock_t lockp;
+ u8 *host_if_name;
+ uword if_index;
+ u32 hw_if_index;
+ u32 sw_if_index;
+ u32 clib_file_index;
+
+ u32 per_interface_next_index;
+ u8 is_admin_up;
+
+ /* netmap */
+ struct nmreq *req;
+ u16 mem_region;
+ int fd;
+ struct netmap_if *nifp;
+ u16 first_tx_ring;
+ u16 last_tx_ring;
+ u16 first_rx_ring;
+ u16 last_rx_ring;
+
+} netmap_if_t;
+
+typedef struct
+{
+ char *mem;
+ u32 region_size;
+ int refcnt;
+} netmap_mem_region_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ netmap_if_t *interfaces;
+
+ /* bitmap of pending rx interfaces */
+ uword *pending_input_bitmap;
+
+ /* rx buffer cache */
+ u32 **rx_buffers;
+
+ /* hash of host interface names */
+ mhash_t if_index_by_host_if_name;
+
+ /* vector of memory regions */
+ netmap_mem_region_t *mem_regions;
+
+ /* first cpu index */
+ u32 input_cpu_first_index;
+
+ /* total cpu count */
+ u32 input_cpu_count;
+} netmap_main_t;
+
+extern netmap_main_t netmap_main;
+extern vnet_device_class_t netmap_device_class;
+extern vlib_node_registration_t netmap_input_node;
+
+int netmap_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set,
+ u8 is_pipe, u8 is_master, u32 * sw_if_index);
+int netmap_delete_if (vlib_main_t * vm, u8 * host_if_name);
+
+
+/* Macros and helper functions from sys/net/netmap_user.h */
+
+#ifdef _NET_NETMAP_H_
+
+#define _NETMAP_OFFSET(type, ptr, offset) \
+ ((type)(void *)((char *)(ptr) + (offset)))
+
+#define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs)
+
+#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
+ nifp, (nifp)->ring_ofs[index] )
+
+#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \
+ nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] )
+
+#define NETMAP_BUF(ring, index) \
+ ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size))
+
+#define NETMAP_BUF_IDX(ring, buf) \
+ ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
+ (ring)->nr_buf_size )
+
+static inline uint32_t
+nm_ring_next (struct netmap_ring *ring, uint32_t i)
+{
+ return (PREDICT_FALSE (i + 1 == ring->num_slots) ? 0 : i + 1);
+}
+
+
+/*
+ * Return 1 if we have pending transmissions in the tx ring.
+ * When everything is complete ring->head = ring->tail + 1 (modulo ring size)
+ */
+static inline int
+nm_tx_pending (struct netmap_ring *ring)
+{
+ return nm_ring_next (ring, ring->tail) != ring->head;
+}
+
+static inline uint32_t
+nm_ring_space (struct netmap_ring *ring)
+{
+ int ret = ring->tail - ring->cur;
+ if (ret < 0)
+ ret += ring->num_slots;
+ return ret;
+}
+#endif
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/netmap/netmap_api.c b/src/plugins/netmap/netmap_api.c
new file mode 100644
index 00000000000..51f572a23e6
--- /dev/null
+++ b/src/plugins/netmap/netmap_api.c
@@ -0,0 +1,95 @@
+/*
+ *------------------------------------------------------------------
+ * netmap_api.c - netmap api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <netmap/netmap.h>
+
+#include <vnet/format_fns.h>
+#include <netmap/netmap.api_enum.h>
+#include <netmap/netmap.api_types.h>
+
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(NETMAP_CREATE, netmap_create) \
+_(NETMAP_DELETE, netmap_delete) \
+
+static void
+vl_api_netmap_create_t_handler (vl_api_netmap_create_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_netmap_create_reply_t *rmp;
+ int rv = 0;
+ u8 *if_name = NULL;
+
+ if_name = format (0, "%s", mp->netmap_if_name);
+ vec_add1 (if_name, 0);
+
+ rv =
+ netmap_create_if (vm, if_name, mp->use_random_hw_addr ? 0 : mp->hw_addr,
+ mp->is_pipe, mp->is_master, 0);
+
+ vec_free (if_name);
+
+ REPLY_MACRO (VL_API_NETMAP_CREATE_REPLY);
+}
+
+static void
+vl_api_netmap_delete_t_handler (vl_api_netmap_delete_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_netmap_delete_reply_t *rmp;
+ int rv = 0;
+ u8 *if_name = NULL;
+
+ if_name = format (0, "%s", mp->netmap_if_name);
+ vec_add1 (if_name, 0);
+
+ rv = netmap_delete_if (vm, if_name);
+
+ vec_free (if_name);
+
+ REPLY_MACRO (VL_API_NETMAP_DELETE_REPLY);
+}
+
+#include <netmap/netmap.api.c>
+static clib_error_t *
+netmap_api_hookup (vlib_main_t * vm)
+{
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table ();
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (netmap_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/netmap/node.c b/src/plugins/netmap/node.c
new file mode 100644
index 00000000000..6169847fa79
--- /dev/null
+++ b/src/plugins/netmap/node.c
@@ -0,0 +1,295 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/devices.h>
+#include <vnet/feature/feature.h>
+
+#include <netmap/net_netmap.h>
+#include <netmap/netmap.h>
+
+#define foreach_netmap_input_error
+
+typedef enum
+{
+#define _(f,s) NETMAP_INPUT_ERROR_##f,
+ foreach_netmap_input_error
+#undef _
+ NETMAP_INPUT_N_ERROR,
+} netmap_input_error_t;
+
+static char *netmap_input_error_strings[] = {
+#define _(n,s) s,
+ foreach_netmap_input_error
+#undef _
+};
+
+typedef struct
+{
+ u32 next_index;
+ u32 hw_if_index;
+ struct netmap_slot slot;
+} netmap_input_trace_t;
+
+static u8 *
+format_netmap_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ netmap_input_trace_t *t = va_arg (*args, netmap_input_trace_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "netmap: hw_if_index %d next-index %d",
+ t->hw_if_index, t->next_index);
+ s = format (s, "\n%Uslot: flags 0x%x len %u buf_idx %u",
+ format_white_space, indent + 2,
+ t->slot.flags, t->slot.len, t->slot.buf_idx);
+ return s;
+}
+
+always_inline void
+buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi);
+ vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi);
+
+ /* update first buffer */
+ first_b->total_length_not_including_first_buffer += b->current_length;
+
+ /* update previous buffer */
+ prev_b->next_buffer = bi;
+ prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+
+ /* update current buffer */
+ b->next_buffer = 0;
+}
+
+always_inline uword
+netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, netmap_if_t * nif)
+{
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ uword n_trace = vlib_get_trace_count (vm, node);
+ netmap_main_t *nm = &netmap_main;
+ u32 n_rx_packets = 0;
+ u32 n_rx_bytes = 0;
+ u32 *to_next = 0;
+ u32 n_free_bufs;
+ struct netmap_ring *ring;
+ int cur_ring;
+ u32 thread_index = vm->thread_index;
+ u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
+
+ if (nif->per_interface_next_index != ~0)
+ next_index = nif->per_interface_next_index;
+
+ n_free_bufs = vec_len (nm->rx_buffers[thread_index]);
+ if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
+ {
+ vec_validate (nm->rx_buffers[thread_index],
+ VLIB_FRAME_SIZE + n_free_bufs - 1);
+ n_free_bufs +=
+ vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs],
+ VLIB_FRAME_SIZE);
+ vec_set_len (nm->rx_buffers[thread_index], n_free_bufs);
+ }
+
+ cur_ring = nif->first_rx_ring;
+ while (cur_ring <= nif->last_rx_ring && n_free_bufs)
+ {
+ int r = 0;
+ u32 cur_slot_index;
+ ring = NETMAP_RXRING (nif->nifp, cur_ring);
+ r = nm_ring_space (ring);
+
+ if (!r)
+ {
+ cur_ring++;
+ continue;
+ }
+
+ if (r > n_free_bufs)
+ r = n_free_bufs;
+
+ cur_slot_index = ring->cur;
+ while (r)
+ {
+ u32 n_left_to_next;
+ u32 next0 = next_index;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (r && n_left_to_next)
+ {
+ vlib_buffer_t *first_b0 = 0;
+ u32 offset = 0;
+ u32 bi0 = 0, first_bi0 = 0, prev_bi0;
+ u32 next_slot_index = (cur_slot_index + 1) % ring->num_slots;
+ u32 next2_slot_index = (cur_slot_index + 2) % ring->num_slots;
+ struct netmap_slot *slot = &ring->slot[cur_slot_index];
+ u32 data_len = slot->len;
+
+ /* prefetch 2 slots in advance */
+ CLIB_PREFETCH (&ring->slot[next2_slot_index],
+ CLIB_CACHE_LINE_BYTES, LOAD);
+ /* prefetch start of next packet */
+ CLIB_PREFETCH (NETMAP_BUF
+ (ring, ring->slot[next_slot_index].buf_idx),
+ CLIB_CACHE_LINE_BYTES, LOAD);
+
+ while (data_len && n_free_bufs)
+ {
+ vlib_buffer_t *b0;
+ /* grab free buffer */
+ u32 last_empty_buffer =
+ vec_len (nm->rx_buffers[thread_index]) - 1;
+ prev_bi0 = bi0;
+ bi0 = nm->rx_buffers[thread_index][last_empty_buffer];
+ b0 = vlib_get_buffer (vm, bi0);
+ vec_set_len (nm->rx_buffers[thread_index],
+ last_empty_buffer);
+ n_free_bufs--;
+
+ /* copy data */
+ u32 bytes_to_copy =
+ data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
+ b0->current_data = 0;
+ clib_memcpy_fast (vlib_buffer_get_current (b0),
+ (u8 *) NETMAP_BUF (ring, slot->buf_idx) +
+ offset, bytes_to_copy);
+
+ /* fill buffer header */
+ b0->current_length = bytes_to_copy;
+
+ if (offset == 0)
+ {
+ b0->total_length_not_including_first_buffer = 0;
+ b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ nif->sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ first_bi0 = bi0;
+ first_b0 = vlib_get_buffer (vm, first_bi0);
+ }
+ else
+ buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0);
+
+ offset += bytes_to_copy;
+ data_len -= bytes_to_copy;
+ }
+
+ /* trace */
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ if (PREDICT_TRUE (first_b0 != 0) &&
+ vlib_trace_buffer (vm, node, next0, first_b0,
+ /* follow_chain */ 0))
+ {
+ netmap_input_trace_t *tr;
+
+ vlib_set_trace_count (vm, node, --n_trace);
+ tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->hw_if_index = nif->hw_if_index;
+ memcpy (&tr->slot, slot, sizeof (struct netmap_slot));
+ }
+ }
+
+ /* enque and take next packet */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, first_bi0,
+ next0);
+
+ /* next packet */
+ n_rx_packets++;
+ n_rx_bytes += slot->len;
+ to_next[0] = first_bi0;
+ to_next += 1;
+ n_left_to_next--;
+ cur_slot_index = next_slot_index;
+
+ r--;
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ ring->head = ring->cur = cur_slot_index;
+ cur_ring++;
+ }
+
+ if (n_rx_packets)
+ ioctl (nif->fd, NIOCRXSYNC, NULL);
+
+ vlib_increment_combined_counter
+ (vnet_get_main ()->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ vlib_get_thread_index (), nif->hw_if_index, n_rx_packets, n_rx_bytes);
+
+ vnet_device_increment_rx_packets (thread_index, n_rx_packets);
+
+ return n_rx_packets;
+}
+
+VLIB_NODE_FN (netmap_input_node) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ int i;
+ u32 n_rx_packets = 0;
+ u32 thread_index = vm->thread_index;
+ netmap_main_t *nm = &netmap_main;
+ netmap_if_t *nmi;
+
+ for (i = 0; i < vec_len (nm->interfaces); i++)
+ {
+ nmi = vec_elt_at_index (nm->interfaces, i);
+ if (nmi->is_admin_up &&
+ (i % nm->input_cpu_count) ==
+ (thread_index - nm->input_cpu_first_index))
+ n_rx_packets += netmap_device_input_fn (vm, node, frame, nmi);
+ }
+
+ return n_rx_packets;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (netmap_input_node) = {
+ .name = "netmap-input",
+ .sibling_of = "device-input",
+ .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
+ .format_trace = format_netmap_input_trace,
+ .type = VLIB_NODE_TYPE_INPUT,
+ /* default state is INTERRUPT mode, switch to POLLING if worker threads are enabled */
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .n_errors = NETMAP_INPUT_N_ERROR,
+ .error_strings = netmap_input_error_strings,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/netmap/plugin.c b/src/plugins/netmap/plugin.c
new file mode 100644
index 00000000000..1673225b683
--- /dev/null
+++ b/src/plugins/netmap/plugin.c
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2024 Tom Jones <thj@freebsd.org>
+ *
+ * This software was developed by Tom Jones <thj@freebsd.org> under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "netmap",
+};
diff --git a/src/plugins/srmpls/CMakeLists.txt b/src/plugins/srmpls/CMakeLists.txt
new file mode 100644
index 00000000000..25905d31e1b
--- /dev/null
+++ b/src/plugins/srmpls/CMakeLists.txt
@@ -0,0 +1,30 @@
+# Copyright (c) 2024 Cisco and/or its affiliates
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(srmpls
+ SOURCES
+ sr_mpls_policy.c
+ sr_mpls_steering.c
+ sr_mpls_api.c
+ plugin.c
+
+ INSTALL_HEADERS
+ sr_mpls.h
+
+ API_FILES
+ sr_mpls.api
+
+ # This might need to be VAT_AUTO_TEST? Not documented
+ API_TEST_SOURCES
+ sr_mpls_test.c
+)
diff --git a/src/plugins/srmpls/FEATURE.yaml b/src/plugins/srmpls/FEATURE.yaml
new file mode 100644
index 00000000000..c5b958224c7
--- /dev/null
+++ b/src/plugins/srmpls/FEATURE.yaml
@@ -0,0 +1,9 @@
+---
+name: Segment Routing for MPLS
+maintainer: Pablo Camarillo <pcamaril@cisco.com>
+features:
+ - SR Policy support
+ - Automated steering (SR steering based on NextHop/Color)
+description: "SR-MPLS"
+state: production
+properties: [API, CLI, MULTITHREAD]
diff --git a/src/plugins/srmpls/dir.dox b/src/plugins/srmpls/dir.dox
new file mode 100644
index 00000000000..76ec1d6a41b
--- /dev/null
+++ b/src/plugins/srmpls/dir.dox
@@ -0,0 +1,22 @@
+/*
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ @dir
+ @brief Segment Routing MPLS code
+
+ An implementation of Segment Routing for the MPLS dataplane.
+
+*/ \ No newline at end of file
diff --git a/src/plugins/srmpls/plugin.c b/src/plugins/srmpls/plugin.c
new file mode 100644
index 00000000000..af87607764f
--- /dev/null
+++ b/src/plugins/srmpls/plugin.c
@@ -0,0 +1,26 @@
+/*
+ * plugin.c: srmpls
+ *
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+// register a plugin
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Segment Routing for MPLS plugin",
+};
diff --git a/src/plugins/srmpls/sr_doc.rst b/src/plugins/srmpls/sr_doc.rst
new file mode 100644
index 00000000000..ed847fa0d42
--- /dev/null
+++ b/src/plugins/srmpls/sr_doc.rst
@@ -0,0 +1,215 @@
+.. _srmpls_doc:
+
+SR-MPLS: Segment Routing for MPLS
+=================================
+
+This is a memo intended to contain documentation of the VPP SR-MPLS
+implementation. Everything that is not directly obvious should come
+here. For any feedback on content that should be explained please
+mailto:pcamaril@cisco.com
+
+Segment Routing
+---------------
+
+Segment routing is a network technology focused on addressing the
+limitations of existing IP and Multiprotocol Label Switching (MPLS)
+networks in terms of simplicity, scale, and ease of operation. It is a
+foundation for application engineered routing as it prepares the
+networks for new business models where applications can control the
+network behavior.
+
+Segment routing seeks the right balance between distributed intelligence
+and centralized optimization and programming. It was built for the
+software-defined networking (SDN) era.
+
+Segment routing enhances packet forwarding behavior by enabling a
+network to transport unicast packets through a specific forwarding path,
+different from the normal path that a packet usually takes (IGP shortest
+path or BGP best path). This capability benefits many use cases, and one
+can build those specific paths based on application requirements.
+
+Segment routing uses the source routing paradigm. A node, usually a
+router but also a switch, a trusted server, or a virtual forwarder
+running on a hypervisor, steers a packet through an ordered list of
+instructions, called segments. A segment can represent any instruction,
+topological or service-based. A segment can have a local semantic to a
+segment-routing node or global within a segment-routing network. Segment
+routing allows an operator to enforce a flow through any topological
+path and service chain while maintaining per-flow state only at the
+ingress node to the segment-routing network. Segment routing also
+supports equal-cost multipath (ECMP) by design.
+
+Segment routing can operate with either an MPLS or an IPv6 data plane.
+All the currently available MPLS services, such as Layer 3 VPN (L3VPN),
+L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services
+[VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet
+VPN [PBB-EVPN]), can run on top of a segment-routing transport network.
+
+**The implementation of Segment Routing in VPP covers both the IPv6 data
+plane (SRv6) as well as the MPLS data plane (SR-MPLS). This page
+contains the SR-MPLS documentation.**
+
+Segment Routing terminology
+---------------------------
+
+- SegmentID (SID): is an MPLS label.
+- Segment List (SL) (SID List): is the sequence of SIDs that the packet
+ will traverse.
+- SR Policy: is a set of candidate paths (SID list+weight). An SR
+ policy is uniquely identified by its Binding SID and associated with
+ a weighted set of Segment Lists. In case several SID lists are
+ defined, traffic steered into the policy is unevenly load-balanced
+ among them according to their respective weights.
+- BindingSID: a BindingSID is a SID (only one) associated one-one with
+ an SR Policy. If a packet arrives with MPLS label corresponding to a
+ BindingSID, then the SR policy will be applied to such packet.
+ (BindingSID is popped first.)
+
+SR-MPLS features in VPP
+-----------------------
+
+The SR-MPLS implementation is focused on the SR policies, as well on its
+steering. Others SR-MPLS features, such as for example AdjSIDs, can be
+achieved using the regular VPP MPLS implementation.
+
+The Segment Routing Policy
+(*draft-filsfils-spring-segment-routing-policy*) defines SR Policies.
+
+Creating a SR Policy
+--------------------
+
+An SR Policy is defined by a Binding SID and a weighted set of Segment
+Lists.
+
+A new SR policy is created with a first SID list using:
+
+::
+
+ sr mpls policy add bsid 40001 next 16001 next 16002 next 16003 (weight 5)
+
+- The weight parameter is only used if more than one SID list is
+ associated with the policy.
+
+An SR policy is deleted with:
+
+::
+
+ sr mpls policy del bsid 40001
+
+The existing SR policies are listed with:
+
+::
+
+ show sr mpls policies
+
+Adding/Removing SID Lists from an SR policy
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+An additional SID list is associated with an existing SR policy with:
+
+::
+
+ sr mpls policy mod bsid 40001 add sl next 16001 next 16002 next 16003 (weight 3)
+
+Conversely, a SID list can be removed from an SR policy with:
+
+::
+
+ sr mpls policy mod bsid 4001 del sl index 1
+
+Note that this CLI cannot be used to remove the last SID list of a
+policy. Instead the SR policy delete CLI must be used.
+
+The weight of a SID list can also be modified with:
+
+::
+
+ sr mpls policy mod bsid 40001 mod sl index 1 weight 4
+
+SR Policies: Spray policies
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Spray policies are a specific type of SR policies where the packet is
+replicated on all the SID lists, rather than load-balanced among them.
+
+SID list weights are ignored with this type of policies.
+
+A Spray policy is instantiated by appending the keyword **spray** to a
+regular SR-MPLS policy command, as in:
+
+::
+
+ sr mpls policy add bsid 40002 next 16001 next 16002 next 16003 spray
+
+Spray policies are used for removing multicast state from a network core
+domain, and instead send a linear unicast copy to every access node. The
+last SID in each list accesses the multicast tree within the access
+node.
+
+Steering packets into a SR Policy
+---------------------------------
+
+Segment Routing supports three methods of steering traffic into an SR
+policy.
+
+Local steering
+~~~~~~~~~~~~~~
+
+In this variant incoming packets match a routing policy which directs
+them on a local SR policy.
+
+In order to achieve this behavior the user needs to create an ‘sr
+steering policy via sr policy bsid’.
+
+::
+
+ sr mpls steer l3 2001::/64 via sr policy bsid 40001
+ sr mpls steer l3 2001::/64 via sr policy bsid 40001 fib-table 3
+ sr mpls steer l3 10.0.0.0/16 via sr policy bsid 40001
+ sr mpls steer l3 10.0.0.0/16 via sr policy bsid 40001 vpn-label 500
+
+Remote steering
+~~~~~~~~~~~~~~~
+
+In this variant incoming packets have an active SID matching a local
+BSID at the head-end.
+
+In order to achieve this behavior the packets should simply arrive with
+an active SID equal to the Binding SID of a locally instantiated SR
+policy.
+
+Automated steering
+~~~~~~~~~~~~~~~~~~
+
+In this variant incoming packets match a BGP/Service route which
+recurses on the BSID of a local policy.
+
+In order to achieve this behavior the user first needs to color the SR
+policies. He can do so by using the CLI:
+
+::
+
+ sr mpls policy te bsid xxxxx endpoint x.x.x.x color 12341234
+
+Notice that an SR policy can have a single endpoint and a single color.
+Notice that the *endpoint* value is an IP46 address and the color a u32.
+
+Then, for any BGP/Service route the user has to use the API to steer
+prefixes:
+
+::
+
+ sr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2
+ sr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2 vpn-label 500
+
+Notice that *co* refers to the CO-bits (values [0|1|2|3]).
+
+Notice also that a given prefix might be steered over several colors
+(same next-hop and same co-bit value). In order to add new colors just
+execute the API several times (or with the del parameter to delete the
+color).
+
+This variant is meant to be used in conjunction with a control plane
+agent that uses the underlying binary API bindings of
+*sr_mpls_steering_policy_add*/*sr_mpls_steering_policy_del* for any BGP
+service route received.
diff --git a/src/plugins/srmpls/sr_mpls.api b/src/plugins/srmpls/sr_mpls.api
new file mode 100644
index 00000000000..742f135d493
--- /dev/null
+++ b/src/plugins/srmpls/sr_mpls.api
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates. Licensed under the
+ * Apache License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of the
+ * License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+option version = "3.0.0";
+
+import "vnet/interface_types.api";
+import "vnet/ip/ip_types.api";
+import "vnet/srv6/sr_types.api";
+
+/** \brief MPLS SR policy add
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid - is the bindingSID of the SR Policy. MPLS label (20bit)
+ @param weight - is the weight of the sid list. optional.
+ @param is_spray - is the type of the SR policy. (0.Default // 1.Spray)
+ @param segments - vector of labels (20bit) composing the segment list
+*/
+autoreply define sr_mpls_policy_add
+{
+ u32 client_index;
+ u32 context;
+ u32 bsid;
+ u32 weight;
+ bool is_spray;
+ u8 n_segments;
+ u32 segments[n_segments];
+};
+
+/** \brief MPLS SR policy modification
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy. MPLS label (20bit)
+ @param sr_policy_index is the index of the SR policy
+ @param fib_table is the VRF where to install the FIB entry for the BSID
+ @param operation is the operation to perform (among the top ones)
+ @param segments is a vector of MPLS labels composing the segment list
+ @param sl_index is the index of the Segment List to modify/delete
+ @param weight is the weight of the sid list. optional.
+ @param is_encap Mode. Encapsulation or SRH insertion.
+*/
+autoreply define sr_mpls_policy_mod
+{
+ u32 client_index;
+ u32 context;
+ u32 bsid;
+ vl_api_sr_policy_op_t operation;
+ u32 sl_index;
+ u32 weight;
+ u8 n_segments;
+ u32 segments[n_segments];
+};
+
+/** \brief MPLS SR policy deletion
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy. MPLS label (20bit)
+*/
+autoreply define sr_mpls_policy_del
+{
+ u32 client_index;
+ u32 context;
+ u32 bsid;
+};
+
+/** \brief MPLS SR steering add/del
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_del
+ @param bsid - is the bindingSID of the SR Policy (~0 is no bsid)
+ @param table_id - is the VRF where to install the FIB entry for the BSID
+ @param prefix - is the IPv4/v6 address for L3 traffic type.
+ @param mask_width - is the mask for L3 traffic type
+ @param next_hop - describes the next_hop (in case no BSID)
+ @param color - describes the color
+ @param co_bits - are the CO_bits of the steering policy
+ @param vpn_label - is an additonal last VPN label. (~0 is no label)
+*/
+autoreply define sr_mpls_steering_add_del
+{
+ u32 client_index;
+ u32 context;
+ bool is_del[default = false];
+ u32 bsid;
+ u32 table_id;
+ vl_api_prefix_t prefix;
+ u32 mask_width;
+ vl_api_address_t next_hop;
+ u32 color;
+ u8 co_bits;
+ u32 vpn_label;
+};
+
+/** \brief MPLS SR steering add/del
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy
+ @param endpoint is the endpoint of the SR policy
+ @param color is the color of the sr policy
+*/
+autoreply define sr_mpls_policy_assign_endpoint_color
+{
+ u32 client_index;
+ u32 context;
+ u32 bsid;
+ vl_api_address_t endpoint;
+ u32 color;
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON Local Variables: eval:
+ * (c-set-style "gnu") End:
+ */
diff --git a/src/plugins/srmpls/sr_mpls.h b/src/plugins/srmpls/sr_mpls.h
new file mode 100644
index 00000000000..a8f9494428f
--- /dev/null
+++ b/src/plugins/srmpls/sr_mpls.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates. Licensed under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * @file
+ * @brief Segment Routing MPLS data structures definitions
+ *
+ */
+
+#ifndef included_vnet_srmpls_h
+#define included_vnet_srmpls_h
+
+#include <vnet/vnet.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/dpo/replicate_dpo.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+/* SR policy types */
+#define SR_POLICY_TYPE_DEFAULT 0
+#define SR_POLICY_TYPE_SPRAY 1
+
+#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1
+
+#define SR_STEER_IPV4 4
+#define SR_STEER_IPV6 6
+
+#define SR_TE_CO_BITS_00 0
+#define SR_TE_CO_BITS_01 1
+#define SR_TE_CO_BITS_10 2
+#define SR_TE_CO_BITS_11 3
+
+/**
+ * @brief SR Segment List (SID list)
+ */
+typedef struct
+{
+ /* SIDs (key) */
+ mpls_label_t *segments;
+
+ /* SID list weight (wECMP / UCMP) */
+ u32 weight;
+
+} mpls_sr_sl_t;
+
+typedef struct
+{
+ u32 *segments_lists; /**< Pool of SID lists indexes */
+
+ mpls_label_t bsid; /**< BindingSID (key) */
+
+ u8 type; /**< Type (default is 0) */
+ /* SR Policy specific DPO */
+ /* IF Type = DEFAULT Then Load-Balancer DPO among SID lists */
+ /* IF Type = SPRAY then Spray DPO with all SID lists */
+
+ ip46_address_t endpoint; /**< Optional NH for SR TE */
+ u8 endpoint_type;
+ u32 color; /**< Optional color for SR TE */
+} mpls_sr_policy_t;
+
+/**
+ * @brief Steering db key
+ *
+ * L3 is IPv4/IPv6 + mask
+ */
+typedef struct
+{
+ ip46_address_t prefix; /**< IP address of the prefix */
+ u32 mask_width; /**< Mask width of the prefix */
+ u32 fib_table; /**< VRF of the prefix */
+ u8 traffic_type; /**< Traffic type (IPv4, IPv6, L2) */
+ u8 padding[3];
+} sr_mpls_steering_key_t;
+
+typedef struct
+{
+ sr_mpls_steering_key_t classify; /**< Traffic classification */
+ mpls_label_t bsid; /**< SR Policy index */
+ ip46_address_t next_hop; /**< SR TE NH */
+ char nh_type;
+ u32 *color; /**< Vector of SR TE colors */
+ char co_bits; /**< Color-Only bits */
+ mpls_label_t vpn_label;
+} mpls_sr_steering_policy_t;
+
+/**
+ * @brief Segment Routing main datastructure
+ */
+typedef struct
+{
+ /* SR SID lists */
+ mpls_sr_sl_t *sid_lists;
+
+ /* SR MPLS policies */
+ mpls_sr_policy_t *sr_policies;
+
+ /* Hash table mapping BindingSID to SR MPLS policy */
+ uword *sr_policies_index_hash;
+
+ /* Pool of SR steer policies instances */
+ mpls_sr_steering_policy_t *steer_policies;
+
+ /* MHash table mapping steering rules to SR steer instance */
+ mhash_t sr_steer_policies_hash;
+
+ /** SR TE **/
+ /* Hash table mapping (Color->Endpoint->BSID) for SR policies */
+ mhash_t sr_policies_c2e2eclabel_hash;
+ /* SR TE (internal) fib table (Endpoint, Color) */
+ u32 fib_table_EC;
+ /* Pool of (Endpoint, Color) hidden labels */
+ u32 *ec_labels;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} mpls_sr_main_t;
+
+extern mpls_sr_main_t sr_mpls_main;
+
+extern int
+sr_mpls_policy_add (mpls_label_t bsid, mpls_label_t * segments,
+ u8 behavior, u32 weight);
+
+extern int
+sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
+ mpls_label_t * segments, u32 sl_index, u32 weight);
+
+extern int sr_mpls_policy_del (mpls_label_t bsid);
+
+extern int
+sr_mpls_policy_assign_endpoint_color (mpls_label_t bsid,
+ ip46_address_t * endpoint,
+ u8 endpoint_type, u32 color);
+
+extern int
+sr_mpls_steering_policy_add (mpls_label_t bsid, u32 table_id,
+ ip46_address_t * prefix, u32 mask_width,
+ u8 traffic_type, ip46_address_t * next_hop,
+ u8 nh_type, u32 color, char co_bits,
+ mpls_label_t vpn_label);
+
+extern int
+sr_mpls_steering_policy_del (ip46_address_t * prefix,
+ u32 mask_width, u8 traffic_type, u32 table_id,
+ u32 color);
+
+extern u32 find_or_create_internal_label (ip46_address_t endpoint, u32 color);
+
+extern void internal_label_lock (ip46_address_t endpoint, u32 color);
+
+extern void internal_label_unlock (ip46_address_t endpoint, u32 color);
+
+#endif /* included_vnet_sr_mpls_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables: eval: (c-set-style "gnu") End:
+ */
diff --git a/src/plugins/srmpls/sr_mpls_api.c b/src/plugins/srmpls/sr_mpls_api.c
new file mode 100644
index 00000000000..3e89017dbc1
--- /dev/null
+++ b/src/plugins/srmpls/sr_mpls_api.c
@@ -0,0 +1,257 @@
+/*
+ * ------------------------------------------------------------------
+ * sr_api.c - ipv6 segment routing api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates. Licensed under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ * ------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include "sr_mpls.h"
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/feature/feature.h>
+#include <vnet/ip/ip_types_api.h>
+
+#include <vnet/format_fns.h>
+#include <plugins/srmpls/sr_mpls.api_enum.h>
+#include <plugins/srmpls/sr_mpls.api_types.h>
+
+#define vl_api_version(n, v) static u32 api_version = v;
+#include <plugins/srmpls/sr_mpls.api.h>
+#undef vl_api_version
+
+#define vl_endianfun
+#include <plugins/srmpls/sr_mpls.api.h>
+#undef vl_endianfun
+
+#define vl_calcsizefun
+#include <plugins/srmpls/sr_mpls.api.h>
+#undef vl_calcsizefun
+
+#define vl_printfun
+#include <plugins/srmpls/sr_mpls.api.h>
+#undef vl_printfun
+
+#define vl_msg_name_crc_list
+#include <plugins/srmpls/sr_mpls.api.h>
+#undef vl_msg_name_crc_list
+
+#define REPLY_MSG_ID_BASE msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+#define foreach_vpe_api_msg \
+_(SR_MPLS_POLICY_DEL, sr_mpls_policy_del) \
+_(SR_MPLS_STEERING_ADD_DEL, sr_mpls_steering_add_del) \
+_(SR_MPLS_POLICY_ASSIGN_ENDPOINT_COLOR, sr_mpls_policy_assign_endpoint_color)
+
+static u16 msg_id_base;
+
+static void
+vl_api_sr_mpls_policy_add_t_handler (vl_api_sr_mpls_policy_add_t * mp)
+{
+ vl_api_sr_mpls_policy_add_reply_t *rmp;
+
+ mpls_label_t *segments = 0, *seg;
+ mpls_label_t this_address = 0;
+
+ int i;
+ for (i = 0; i < mp->n_segments; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ this_address = ntohl (mp->segments[i]);
+ clib_memcpy (seg, &this_address, sizeof (this_address));
+ }
+
+ int rv = 0;
+ rv = sr_mpls_policy_add (ntohl (mp->bsid),
+ segments, mp->is_spray, ntohl (mp->weight));
+ vec_free (segments);
+
+ REPLY_MACRO (VL_API_SR_MPLS_POLICY_ADD_REPLY);
+}
+
+static void
+vl_api_sr_mpls_policy_mod_t_handler (vl_api_sr_mpls_policy_mod_t * mp)
+{
+ vl_api_sr_mpls_policy_mod_reply_t *rmp;
+
+ mpls_label_t *segments = 0, *seg;
+ mpls_label_t this_address = 0;
+
+ int i;
+ for (i = 0; i < mp->n_segments; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ this_address = ntohl (mp->segments[i]);
+ clib_memcpy (seg, &this_address, sizeof (this_address));
+ }
+
+ int rv = 0;
+ rv = sr_mpls_policy_mod (ntohl (mp->bsid),
+ ntohl (mp->operation), segments,
+ ntohl (mp->sl_index), ntohl (mp->weight));
+ vec_free (segments);
+
+ REPLY_MACRO (VL_API_SR_MPLS_POLICY_MOD_REPLY);
+}
+
+static void
+vl_api_sr_mpls_policy_del_t_handler (vl_api_sr_mpls_policy_del_t * mp)
+{
+ vl_api_sr_mpls_policy_del_reply_t *rmp;
+ int rv = 0;
+ rv = sr_mpls_policy_del (ntohl (mp->bsid));
+
+ REPLY_MACRO (VL_API_SR_MPLS_POLICY_DEL_REPLY);
+}
+
+static void vl_api_sr_mpls_steering_add_del_t_handler
+ (vl_api_sr_mpls_steering_add_del_t * mp)
+{
+ vl_api_sr_mpls_steering_add_del_reply_t *rmp;
+ fib_prefix_t prefix;
+ ip46_address_t next_hop;
+ clib_memset (&prefix, 0, sizeof (ip46_address_t));
+
+ ip_prefix_decode (&mp->prefix, &prefix);
+ ip_address_decode (&mp->next_hop, &next_hop);
+
+ int rv = 0;
+ if (mp->is_del)
+ rv = sr_mpls_steering_policy_del (&prefix.fp_addr,
+ prefix.fp_len,
+ ip46_address_is_ip4 (&prefix.fp_addr) ?
+ SR_STEER_IPV4 : SR_STEER_IPV6,
+ ntohl (mp->table_id),
+ ntohl (mp->color));
+ else
+ rv = sr_mpls_steering_policy_add (ntohl (mp->bsid),
+ ntohl (mp->table_id),
+ &prefix.fp_addr,
+ prefix.fp_len,
+ ip46_address_is_ip4 (&prefix.fp_addr) ?
+ SR_STEER_IPV4 : SR_STEER_IPV6,
+ &next_hop,
+ ip46_address_is_ip4 (&next_hop) ?
+ SR_STEER_IPV4 : SR_STEER_IPV6,
+ ntohl (mp->color), mp->co_bits,
+ ntohl (mp->vpn_label));
+
+ REPLY_MACRO (VL_API_SR_MPLS_STEERING_ADD_DEL_REPLY);
+}
+
+static void vl_api_sr_mpls_policy_assign_endpoint_color_t_handler
+ (vl_api_sr_mpls_policy_assign_endpoint_color_t * mp)
+{
+ vl_api_sr_mpls_policy_assign_endpoint_color_reply_t *rmp;
+ int rv = 0;
+
+ ip46_address_t endpoint;
+ clib_memset (&endpoint, 0, sizeof (ip46_address_t));
+ ip_address_decode (&mp->endpoint, &endpoint);
+
+ rv = sr_mpls_policy_assign_endpoint_color (ntohl (mp->bsid),
+ &endpoint,
+ ip46_address_is_ip4 (&endpoint) ?
+ SR_STEER_IPV4 : SR_STEER_IPV6,
+ ntohl (mp->color));
+
+ REPLY_MACRO (VL_API_SR_MPLS_POLICY_ASSIGN_ENDPOINT_COLOR_REPLY);
+}
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id, n, crc) \
+ vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + REPLY_MSG_ID_BASE);
+ foreach_vl_msg_name_crc_sr_mpls;
+#undef _
+}
+
+static clib_error_t *
+sr_mpls_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = vlibapi_get_main ();
+
+ u8 *name = format (0, "sr_mpls_%08x%c", api_version, 0);
+ REPLY_MSG_ID_BASE =
+ vl_msg_api_get_msg_ids ((char *) name, VL_MSG_SR_MPLS_LAST);
+ vec_free (name);
+
+#define _(N, n) \
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){ \
+ .id = REPLY_MSG_ID_BASE + VL_API_##N, \
+ .name = #n, \
+ .handler = vl_api_##n##_t_handler, \
+ .endian = vl_api_##n##_t_endian, \
+ .format_fn = vl_api_##n##_t_format, \
+ .size = sizeof (vl_api_##n##_t), \
+ .traced = 1, \
+ .tojson = vl_api_##n##_t_tojson, \
+ .fromjson = vl_api_##n##_t_fromjson, \
+ .calc_size = vl_api_##n##_t_calc_size, \
+ });
+ foreach_vpe_api_msg;
+#undef _
+
+ /*
+ * Manually register the sr policy add msg, so we trace enough bytes
+ * to capture a typical segment list
+ */
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){
+ .id = REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_ADD,
+ .name = "sr_mpls_policy_add",
+ .handler = vl_api_sr_mpls_policy_add_t_handler,
+ .endian = vl_api_sr_mpls_policy_add_t_endian,
+ .format_fn = vl_api_sr_mpls_policy_add_t_format,
+ .size = 256,
+ .traced = 1,
+ .tojson = vl_api_sr_mpls_policy_add_t_tojson,
+ .fromjson = vl_api_sr_mpls_policy_add_t_fromjson,
+ .calc_size = vl_api_sr_mpls_policy_add_t_calc_size,
+ });
+ /*
+ * Manually register the sr policy mod msg, so we trace enough bytes
+ * to capture a typical segment list
+ */
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){
+ .id = REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_MOD,
+ .name = "sr_mpls_policy_mod",
+ .handler = vl_api_sr_mpls_policy_mod_t_handler,
+ .endian = vl_api_sr_mpls_policy_mod_t_endian,
+ .format_fn = vl_api_sr_mpls_policy_mod_t_format,
+ .size = 256,
+ .traced = 1,
+ .tojson = vl_api_sr_mpls_policy_mod_t_tojson,
+ .fromjson = vl_api_sr_mpls_policy_mod_t_fromjson,
+ .calc_size = vl_api_sr_mpls_policy_mod_t_calc_size,
+ });
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (sr_mpls_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables: eval: (c-set-style "gnu") End:
+ */
diff --git a/src/plugins/srmpls/sr_mpls_policy.c b/src/plugins/srmpls/sr_mpls_policy.c
new file mode 100644
index 00000000000..af24acd8cf6
--- /dev/null
+++ b/src/plugins/srmpls/sr_mpls_policy.c
@@ -0,0 +1,903 @@
+/*
+ * sr_mpls_policy.c: SR-MPLS policies
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates. Licensed under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * @file
+ * @brief SR MPLS policy creation and application
+ *
+ * Create an SR policy.
+ * An SR policy can be either of 'default' type or 'spray' type
+ * An SR policy has attached a list of SID lists.
+ * In case the SR policy is a default one it will load balance among them.
+ * An SR policy has associated a BindingSID.
+ * In case any packet arrives with MPLS_label == BindingSID then the SR policy
+ * associated to such bindingSID will be applied to such packet.
+ * Also, a BSID can be associated with a (Next-Hop, Color)
+ *
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include "sr_mpls.h"
+#include <vnet/fib/mpls_fib.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/ip/ip.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+mpls_sr_main_t sr_mpls_main;
+
+/*************************** SR LB helper functions **************************/
+/**
+ * @brief Creates a Segment List and adds it to an SR policy
+ *
+ * Creates a Segment List and adds it to the SR policy. Notice that the SL are
+ * not necessarily unique. Hence there might be two Segment List within the
+ * same SR Policy with exactly the same segments and same weight.
+ *
+ * @param sr_policy is the SR policy where the SL will be added
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ * @param weight is the weight of the SegmentList (for load-balancing purposes)
+ * @param is_encap represents the mode (SRH insertion vs Encapsulation)
+ *
+ * @return pointer to the just created segment list
+ */
+static inline mpls_sr_sl_t *
+create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_sl_t *segment_list;
+ u32 ii;
+
+ pool_get (sm->sid_lists, segment_list);
+ clib_memset (segment_list, 0, sizeof (*segment_list));
+
+ vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists);
+
+ /* Fill in segment list */
+ segment_list->weight =
+ (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT);
+ segment_list->segments = vec_dup (sl);
+
+ mpls_eos_bit_t eos;
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = segment_list->weight,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_label_stack = NULL,
+ .frp_local_label = sl[0],
+ };
+
+ if (vec_len (sl) > 1)
+ {
+ vec_validate (path.frp_label_stack, vec_len (sl) - 2);
+ for (ii = 1; ii < vec_len (sl); ii++)
+ {
+ path.frp_label_stack[ii - 1].fml_value = sl[ii];
+ }
+ }
+ else
+ {
+ /*
+ * add an impliciet NULL label to allow non-eos recursion
+ */
+ fib_mpls_label_t lbl = {
+ .fml_value = MPLS_IETF_IMPLICIT_NULL_LABEL,
+ };
+ vec_add1 (path.frp_label_stack, lbl);
+ }
+
+ fib_route_path_t *paths = NULL;
+ vec_add1 (paths, path);
+
+ fib_prefix_t pfx = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = sr_policy->bsid,
+ .fp_eos = eos,
+ .fp_payload_proto = DPO_PROTO_MPLS,
+ };
+
+ fib_table_entry_path_add2 (0,
+ &pfx,
+ FIB_SOURCE_SR,
+ (sr_policy->type == SR_POLICY_TYPE_DEFAULT ?
+ FIB_ENTRY_FLAG_NONE :
+ FIB_ENTRY_FLAG_MULTICAST), paths);
+ vec_free (paths);
+ }
+
+ return segment_list;
+}
+
+/******************************* SR rewrite API *******************************/
+/*
+ * Three functions for handling sr policies: -> sr_mpls_policy_add ->
+ * sr_mpls_policy_del -> sr_mpls_policy_mod All of them are API. CLI function
+ * on sr_policy_command_fn
+ */
+
+/**
+ * @brief Create a new SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param segments is a vector of MPLS labels composing the segment list
+ * @param behavior is the behavior of the SR policy. (default//spray)
+ * @param fib_table is the VRF where to install the FIB entry for the BSID
+ * @param weight is the weight of this specific SID list
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_policy_add (mpls_label_t bsid, mpls_label_t * segments,
+ u8 behavior, u32 weight)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_policy_t *sr_policy = 0;
+ uword *p;
+
+ if (!sm->sr_policies_index_hash)
+ sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
+
+ /* MPLS SR policies cannot be created unless the MPLS table is present */
+ if (~0 == fib_table_find (FIB_PROTOCOL_MPLS, MPLS_FIB_DEFAULT_TABLE_ID))
+ return (VNET_API_ERROR_NO_SUCH_TABLE);
+
+ /* Search for existing keys (BSID) */
+ p = hash_get (sm->sr_policies_index_hash, bsid);
+ if (p)
+ {
+ /* Add SR policy that already exists; complain */
+ return -12;
+ }
+ /* Add an SR policy object */
+ pool_get (sm->sr_policies, sr_policy);
+ clib_memset (sr_policy, 0, sizeof (*sr_policy));
+
+ /* the first policy needs to lock the MPLS table so it doesn't
+ * disappear with policies in it */
+ if (1 == pool_elts (sm->sr_policies))
+ fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS,
+ MPLS_FIB_DEFAULT_TABLE_ID,
+ FIB_SOURCE_SR);
+ sr_policy->bsid = bsid;
+ sr_policy->type = behavior;
+ sr_policy->endpoint_type = 0;
+ ip6_address_set_zero (&sr_policy->endpoint.ip6);
+ sr_policy->color = (u32) ~ 0;
+
+ /* Copy the key */
+ hash_set (sm->sr_policies_index_hash, bsid, sr_policy - sm->sr_policies);
+
+ /* Create a segment list and add the index to the SR policy */
+ create_sl (sr_policy, segments, weight);
+
+ return 0;
+}
+
+/**
+ * @brief Delete a SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param index is the index of the SR policy
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_policy_del (mpls_label_t bsid)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_policy_t *sr_policy = 0;
+ mpls_sr_sl_t *segment_list;
+ mpls_eos_bit_t eos;
+ u32 *sl_index;
+ uword *p;
+
+ if (!sm->sr_policies_index_hash)
+ sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
+
+ p = hash_get (sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -1;
+
+ /* Clean SID Lists */
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = segment_list->weight,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_local_label = segment_list->segments[0],
+ };
+
+ vec_add (path.frp_label_stack, segment_list + 1,
+ vec_len (segment_list) - 1);
+
+ fib_route_path_t *paths = NULL;
+ vec_add1 (paths, path);
+
+ /* remove each of the MPLS routes */
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = sr_policy->bsid,
+ .fp_eos = eos,
+ .fp_payload_proto = DPO_PROTO_MPLS,
+ };
+
+ fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
+ }
+ vec_free (paths);
+ vec_free (segment_list->segments);
+ pool_put_index (sm->sid_lists, *sl_index);
+ }
+
+ /* If there is still traces of TE, make sure locks are released */
+ if (sr_policy->endpoint_type != 0 && sr_policy->color != (u32) ~ 0)
+ {
+ sr_mpls_policy_assign_endpoint_color (bsid, NULL, 0, (u32) ~ 0);
+ }
+
+ /* Remove SR policy entry */
+ hash_unset (sm->sr_policies_index_hash, sr_policy->bsid);
+ pool_put (sm->sr_policies, sr_policy);
+
+ if (0 == pool_elts (sm->sr_policies))
+ fib_table_unlock (MPLS_FIB_DEFAULT_TABLE_ID,
+ FIB_PROTOCOL_MPLS, FIB_SOURCE_SR);
+
+ return 0;
+}
+
+/**
+ * @brief Modify an existing SR policy
+ *
+ * The possible modifications are adding a new Segment List, modifying an
+ * existing Segment List (modify the weight only) and delete a given
+ * Segment List from the SR Policy.
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param fib_table is the VRF where to install the FIB entry for the BSID
+ * @param operation is the operation to perform (among the top ones)
+ * @param segments is a vector of IPv6 address composing the segment list
+ * @param sl_index is the index of the Segment List to modify/delete
+ * @param weight is the weight of the sid list. optional.
+ *
+ * @return 0 ok, >0 index of SL, <0 error
+ */
+int
+sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
+ mpls_label_t * segments, u32 sl_index, u32 weight)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_policy_t *sr_policy = 0;
+ mpls_sr_sl_t *segment_list;
+ u32 *sl_index_iterate;
+ uword *p;
+
+ if (!sm->sr_policies_index_hash)
+ sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
+
+ p = hash_get (sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -1;
+
+ if (operation == 1)
+ { /* Add SR List to an existing SR policy */
+ /* Create the new SL */
+ segment_list = create_sl (sr_policy, segments, weight);
+ return segment_list - sm->sid_lists;
+ }
+ else if (operation == 2)
+ { /* Delete SR List from an existing SR
+ * policy */
+ /* Check that currently there are more than one SID list */
+ if (vec_len (sr_policy->segments_lists) == 1)
+ return -21;
+
+ /*
+ * Check that the SR list does exist and is assigned to the
+ * sr policy
+ */
+ vec_foreach (sl_index_iterate, sr_policy->segments_lists)
+ if (*sl_index_iterate == sl_index)
+ break;
+
+ if (*sl_index_iterate != sl_index)
+ return -22;
+
+ /* Remove the lucky SR list that is being kicked out */
+ segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
+
+ mpls_eos_bit_t eos;
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = segment_list->weight,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_local_label = segment_list->segments[0],
+ };
+
+ vec_add (path.frp_label_stack, segment_list + 1,
+ vec_len (segment_list) - 1);
+
+ fib_route_path_t *paths = NULL;
+ vec_add1 (paths, path);
+
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = sr_policy->bsid,
+ .fp_eos = eos,
+ .fp_payload_proto = DPO_PROTO_MPLS,
+ };
+
+ fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
+ }
+
+ vec_free (paths);
+ vec_free (segment_list->segments);
+ pool_put_index (sm->sid_lists, sl_index);
+ vec_del1 (sr_policy->segments_lists,
+ sl_index_iterate - sr_policy->segments_lists);
+ }
+ else if (operation == 3)
+ { /* Modify the weight of an existing
+ * SR List */
+ /* Find the corresponding SL */
+ vec_foreach (sl_index_iterate, sr_policy->segments_lists)
+ if (*sl_index_iterate == sl_index)
+ break;
+
+ if (*sl_index_iterate != sl_index)
+ return -32;
+
+ /* Change the weight */
+ segment_list = pool_elt_at_index (sm->sid_lists, sl_index);
+
+ /* Update LB */
+ mpls_eos_bit_t eos;
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = segment_list->weight,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_local_label = segment_list->segments[0],
+ };
+
+ vec_add (path.frp_label_stack, segment_list + 1,
+ vec_len (segment_list) - 1);
+
+ fib_route_path_t *paths = NULL;
+ vec_add1 (paths, path);
+
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = sr_policy->bsid,
+ .fp_eos = eos,
+ .fp_payload_proto = DPO_PROTO_MPLS,
+ };
+
+ fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
+ }
+
+ segment_list->weight = weight;
+
+ path.frp_weight = segment_list->weight;
+
+ vec_free (paths);
+ paths = NULL;
+ vec_add1 (paths, path);
+
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 21,
+ .fp_proto = FIB_PROTOCOL_MPLS,
+ .fp_label = sr_policy->bsid,
+ .fp_eos = eos,
+ .fp_payload_proto = DPO_PROTO_MPLS,
+ };
+
+ fib_table_entry_path_add2 (0,
+ &pfx,
+ FIB_SOURCE_SR,
+ (sr_policy->type ==
+ SR_POLICY_TYPE_DEFAULT ?
+ FIB_ENTRY_FLAG_NONE :
+ FIB_ENTRY_FLAG_MULTICAST), paths);
+ }
+ }
+ return 0;
+}
+
+/**
+ * @brief CLI for 'sr mpls policies' command family
+ */
+static clib_error_t *
+sr_mpls_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int rv = -1;
+ char is_del = 0, is_add = 0, is_mod = 0;
+ char policy_set = 0;
+ mpls_label_t bsid, next_label;
+ u32 sl_index = (u32) ~ 0;
+ u32 weight = (u32) ~ 0;
+ mpls_label_t *segments = 0;
+ u8 operation = 0;
+ u8 is_spray = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!is_add && !is_mod && !is_del && unformat (input, "add"))
+ is_add = 1;
+ else if (!is_add && !is_mod && !is_del && unformat (input, "del"))
+ is_del = 1;
+ else if (!is_add && !is_mod && !is_del && unformat (input, "mod"))
+ is_mod = 1;
+ else if (!policy_set
+ && unformat (input, "bsid %U", unformat_mpls_unicast_label,
+ &bsid))
+ policy_set = 1;
+ else if (unformat (input, "weight %d", &weight));
+ else if (unformat
+ (input, "next %U", unformat_mpls_unicast_label, &next_label))
+ {
+ vec_add (segments, &next_label, 1);
+ }
+ else if (unformat (input, "add sl"))
+ operation = 1;
+ else if (unformat (input, "del sl index %d", &sl_index))
+ operation = 2;
+ else if (unformat (input, "mod sl index %d", &sl_index))
+ operation = 3;
+ else if (unformat (input, "spray"))
+ is_spray = 1;
+ else
+ break;
+ }
+
+ if (!is_add && !is_mod && !is_del)
+ return clib_error_return (0, "Incorrect CLI");
+
+ if (!policy_set)
+ return clib_error_return (0, "No SR policy BSID or index specified");
+
+ if (is_add)
+ {
+ if (vec_len (segments) == 0)
+ return clib_error_return (0, "No Segment List specified");
+
+ rv = sr_mpls_policy_add (bsid, segments,
+ (is_spray ? SR_POLICY_TYPE_SPRAY :
+ SR_POLICY_TYPE_DEFAULT), weight);
+ vec_free (segments);
+ }
+ else if (is_del)
+ rv = sr_mpls_policy_del (bsid);
+ else if (is_mod)
+ {
+ if (!operation)
+ return clib_error_return (0, "No SL modification specified");
+ if (operation != 1 && sl_index == (u32) ~ 0)
+ return clib_error_return (0, "No Segment List index specified");
+ if (operation == 1 && vec_len (segments) == 0)
+ return clib_error_return (0, "No Segment List specified");
+ if (operation == 3 && weight == (u32) ~ 0)
+ return clib_error_return (0, "No new weight for the SL specified");
+ rv = sr_mpls_policy_mod (bsid, operation, segments, sl_index, weight);
+ vec_free (segments);
+ }
+ switch (rv)
+ {
+ case 0:
+ break;
+ case 1:
+ return 0;
+ case -12:
+ return clib_error_return (0,
+ "There is already a FIB entry for the BindingSID address.\n"
+ "The SR policy could not be created.");
+ case -21:
+ return clib_error_return (0,
+ "The selected SR policy only contains ONE segment list. "
+ "Please remove the SR policy instead");
+ case -22:
+ return clib_error_return (0,
+ "Could not delete the segment list. "
+ "It is not associated with that SR policy.");
+ case -23:
+ return clib_error_return (0,
+ "Could not delete the segment list. "
+ "It is not associated with that SR policy.");
+ case -32:
+ return clib_error_return (0,
+ "Could not modify the segment list. "
+ "The given SL is not associated with such SR policy.");
+ case VNET_API_ERROR_NO_SUCH_TABLE:
+ return clib_error_return (0, "the Default MPLS table is not present");
+ default:
+ return clib_error_return (0, "BUG: sr policy returns %d", rv);
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND(sr_mpls_policy_command, static)=
+{
+ .path = "sr mpls policy",
+ .short_help = "sr mpls policy [add||del||mod] bsid 2999 "
+ "next 10 next 20 next 30 (weight 1) (spray)",
+ .long_help = "TBD.\n",
+ .function = sr_mpls_policy_command_fn,
+};
+
+/**
+ * @brief CLI to display onscreen all the SR MPLS policies
+ */
+static clib_error_t *
+show_sr_mpls_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_sl_t *segment_list = 0;
+ mpls_sr_policy_t *sr_policy = 0;
+ mpls_sr_policy_t **vec_policies = 0;
+ mpls_label_t *label;
+ u32 *sl_index;
+ u8 *s;
+ int i = 0;
+
+ vlib_cli_output (vm, "SR MPLS policies:");
+
+ pool_foreach (sr_policy, sm->sr_policies) {
+ vec_add1(vec_policies, sr_policy);
+ }
+
+ vec_foreach_index (i, vec_policies)
+ {
+ sr_policy = vec_policies[i];
+ vlib_cli_output (vm, "[%u].-\tBSID: %U",
+ (u32) (sr_policy - sm->sr_policies),
+ format_mpls_unicast_label, sr_policy->bsid);
+ switch (sr_policy->endpoint_type)
+ {
+ case SR_STEER_IPV6:
+ vlib_cli_output (vm, "\tEndpoint: %U", format_ip6_address,
+ &sr_policy->endpoint.ip6);
+ vlib_cli_output (vm, "\tColor: %u", sr_policy->color);
+ break;
+ case SR_STEER_IPV4:
+ vlib_cli_output (vm, "\tEndpoint: %U", format_ip4_address,
+ &sr_policy->endpoint.ip4);
+ vlib_cli_output (vm, "\tColor: %u", sr_policy->color);
+ break;
+ default:
+ vlib_cli_output (vm, "\tTE disabled");
+ }
+ vlib_cli_output (vm, "\tType: %s",
+ (sr_policy->type ==
+ SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray"));
+ vlib_cli_output (vm, "\tSegment Lists:");
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ s = NULL;
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+ s = format (s, "\t[%u].- ", *sl_index);
+ s = format (s, "< ");
+ vec_foreach (label, segment_list->segments)
+ {
+ s = format (s, "%U, ", format_mpls_unicast_label, *label);
+ }
+ s = format (s, "\b\b > ");
+ vlib_cli_output (vm, " %s", s);
+ }
+ vlib_cli_output (vm, "-----------");
+ }
+ vec_free (vec_policies);
+ return 0;
+}
+
+VLIB_CLI_COMMAND(show_sr_mpls_policies_command, static)=
+{
+ .path = "show sr mpls policies",
+ .short_help = "show sr mpls policies",
+ .function = show_sr_mpls_policies_command_fn,
+};
+
+/**
+ * @brief Update the Endpoint,Color tuple of an SR policy
+ *
+ * @param bsid is the bindingSID of the SR Policy
+ * @param endpoint represents the IP46 of the endpoint
+ * @param color represents the color (u32)
+ *
+ * To reset to NULL use ~0 as parameters.
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_policy_assign_endpoint_color (mpls_label_t bsid,
+ ip46_address_t * endpoint,
+ u8 endpoint_type, u32 color)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_policy_t *sr_policy = 0;
+ uword *endpoint_table, *p, *old_value;
+
+ ip46_address_t any;
+ any.as_u64[0] = any.as_u64[1] = (u64) ~ 0;
+
+ if (!sm->sr_policies_index_hash)
+ sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
+
+ p = hash_get (sm->sr_policies_index_hash, bsid);
+ if (p)
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+ else
+ return -1;
+
+ /* If previous Endpoint, color existed, remove (NH,C) and (ANY,C) */
+ if (sr_policy->endpoint_type)
+ {
+ endpoint_table =
+ mhash_get (&sm->sr_policies_c2e2eclabel_hash, &sr_policy->color);
+ if (!endpoint_table)
+ return -2;
+ old_value =
+ mhash_get ((mhash_t *) endpoint_table, &sr_policy->endpoint);
+
+ /* CID 180995 This should never be NULL unless the two hash tables
+ * get out of sync */
+ ALWAYS_ASSERT (old_value != NULL);
+
+ fib_prefix_t pfx = { 0 };
+ pfx.fp_proto = FIB_PROTOCOL_MPLS;
+ pfx.fp_len = 21;
+ pfx.fp_label = (u32) * old_value;
+
+ mpls_eos_bit_t eos;
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ pfx.fp_eos = eos;
+ fib_table_entry_path_remove (sm->fib_table_EC,
+ &pfx,
+ FIB_SOURCE_SR,
+ DPO_PROTO_MPLS,
+ NULL,
+ ~0, 0, 1, FIB_ROUTE_PATH_FLAG_NONE);
+ }
+
+ old_value = mhash_get ((mhash_t *) endpoint_table, &any);
+ pfx.fp_label = (u32) * old_value;
+
+ FOR_EACH_MPLS_EOS_BIT (eos)
+ {
+ pfx.fp_eos = eos;
+ fib_table_entry_path_remove (sm->fib_table_EC,
+ &pfx,
+ FIB_SOURCE_SR,
+ DPO_PROTO_MPLS,
+ NULL,
+ ~0, 0, 1, FIB_ROUTE_PATH_FLAG_NONE);
+ }
+
+ /* Release the lock on (NH, Color) and (ANY, Color) */
+ internal_label_unlock (sr_policy->endpoint, sr_policy->color);
+ internal_label_unlock (any, sr_policy->color);
+
+ /* Reset the values on the SR policy */
+ sr_policy->endpoint_type = 0;
+ sr_policy->endpoint.as_u64[0] = sr_policy->endpoint.as_u64[1] =
+ (u64) ~ 0;
+ sr_policy->color = (u32) ~ 0;
+ }
+
+ if (endpoint_type)
+ {
+ sr_policy->endpoint_type = endpoint_type;
+ sr_policy->endpoint.as_u64[0] = endpoint->as_u64[0];
+ sr_policy->endpoint.as_u64[1] = endpoint->as_u64[1];
+ sr_policy->color = color;
+
+ u32 label = find_or_create_internal_label (*endpoint, color);
+ internal_label_lock (*endpoint, sr_policy->color);
+
+ /* If FIB doesnt exist, create them */
+ if (sm->fib_table_EC == (u32) ~ 0)
+ {
+ sm->fib_table_EC = fib_table_create_and_lock (FIB_PROTOCOL_MPLS,
+ FIB_SOURCE_SR,
+ "SR-MPLS Traffic Engineering (NextHop,Color)");
+
+ fib_table_flush (sm->fib_table_EC, FIB_PROTOCOL_MPLS,
+ FIB_SOURCE_SPECIAL);
+ }
+
+ fib_prefix_t pfx = { 0 };
+ pfx.fp_proto = FIB_PROTOCOL_MPLS;
+ pfx.fp_len = 21;
+
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = 1,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_label_stack = 0
+ };
+ path.frp_local_label = sr_policy->bsid;
+
+ //Add the entry to ANY,Color
+ u32 any_label = find_or_create_internal_label (any, color);
+ internal_label_lock (any, sr_policy->color);
+
+ pfx.fp_eos = MPLS_EOS;
+ path.frp_eos = MPLS_EOS;
+
+ fib_route_path_t *paths = NULL;
+ vec_add1 (paths, path);
+
+ pfx.fp_label = label;
+ fib_table_entry_update (sm->fib_table_EC,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+
+ pfx.fp_label = any_label;
+ fib_table_entry_update (sm->fib_table_EC,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+
+ fib_mpls_label_t fml = {
+ .fml_value = MPLS_IETF_IMPLICIT_NULL_LABEL,
+ };
+
+ vec_add1 (path.frp_label_stack, fml);
+ pfx.fp_eos = MPLS_NON_EOS;
+ path.frp_eos = MPLS_NON_EOS;
+
+ paths = NULL;
+ vec_add1 (paths, path);
+
+ pfx.fp_label = label;
+ fib_table_entry_update (sm->fib_table_EC,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+
+ pfx.fp_label = any_label;
+ fib_table_entry_update (sm->fib_table_EC,
+ &pfx,
+ FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+ }
+ return 0;
+}
+
+/**
+ * @brief CLI to modify the Endpoint,Color of an SR policy
+ */
+static clib_error_t *
+cli_sr_mpls_policy_ec_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip46_address_t endpoint;
+ u32 color = (u32) ~ 0;
+ mpls_label_t bsid;
+ u8 endpoint_type = 0;
+ char clear = 0, color_set = 0, bsid_set = 0;
+
+ clib_memset (&endpoint, 0, sizeof (ip46_address_t));
+
+ int rv;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!endpoint_type
+ && unformat (input, "endpoint %U", unformat_ip6_address,
+ &endpoint.ip6))
+ endpoint_type = SR_STEER_IPV6;
+ else if (!endpoint_type
+ && unformat (input, "endpoint %U", unformat_ip4_address,
+ &endpoint.ip4))
+ endpoint_type = SR_STEER_IPV4;
+ else if (!color_set && unformat (input, "color %u", &color))
+ color_set = 1;
+ else if (!bsid_set
+ && unformat (input, "bsid %U", unformat_mpls_unicast_label,
+ &bsid))
+ bsid_set = 1;
+ else if (!clear && unformat (input, "clear"))
+ clear = 1;
+ else
+ break;
+ }
+
+ if (!bsid_set)
+ return clib_error_return (0, "No BSID specified");
+ if (!endpoint_type && !clear)
+ return clib_error_return (0, "No Endpoint specified");
+ if (!color_set && !clear)
+ return clib_error_return (0, "No Color set");
+
+ /* In case its a cleanup */
+ if (clear)
+ {
+ ip6_address_set_zero (&endpoint.ip6);
+ color = (u32) ~ 0;
+ }
+ rv =
+ sr_mpls_policy_assign_endpoint_color (bsid, &endpoint, endpoint_type,
+ color);
+
+ if (rv)
+ clib_error_return (0, "Error on Endpoint,Color");
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND(cli_sr_mpls_policy_ec_command, static)=
+{
+ .path = "sr mpls policy te",
+ .short_help = "sr mpls policy te bsid xxxxx endpoint x.x.x.x color 12341234",
+ .function = cli_sr_mpls_policy_ec_command_fn,
+};
+
+/********************* SR MPLS Policy initialization ***********************/
+/**
+ * @brief SR MPLS Policy initialization
+ */
+clib_error_t *
+sr_mpls_policy_rewrite_init (vlib_main_t * vm)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+
+ /* Init memory for sr policy keys (bsid <-> ip6_address_t) */
+ sm->sr_policies_index_hash = NULL;
+ sm->sr_policies_c2e2eclabel_hash.hash = NULL;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (sr_mpls_policy_rewrite_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables: eval: (c-set-style "gnu") End:
+ */
diff --git a/src/plugins/srmpls/sr_mpls_steering.c b/src/plugins/srmpls/sr_mpls_steering.c
new file mode 100644
index 00000000000..24c8b0e2d9f
--- /dev/null
+++ b/src/plugins/srmpls/sr_mpls_steering.c
@@ -0,0 +1,897 @@
+/*
+ * sr_steering.c: ipv6 segment routing steering into SR policy
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates. Licensed under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * @file
+ * @brief Packet steering into SR-MPLS Policies
+ *
+ * This file is in charge of handling the FIB appropiatly to steer packets
+ * through SR Policies as defined in 'sr_mpls_policy.c'. Notice that here
+ * we are only doing steering. SR policy application is done in
+ * sr_policy_rewrite.c
+ *
+ * Supports:
+ * - Steering of IPv6 traffic Destination Address based through BSID
+ * - Steering of IPv4 traffic Destination Address based through BSID
+ * - Steering of IPv4 and IPv6 traffic through N,C (SR CP)
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include "sr_mpls.h"
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/mpls_fib.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#define SRMPLS_TE_OFFSET 50
+
+/**
+ * @brief function to sort the colors in descending order
+ */
+int
+sort_color_descent (const u32 * x, u32 * y)
+{
+ return *y - *x;
+}
+
+/********************* Internal (NH, C) labels *******************************/
+/**
+ * @brief find the corresponding label for (endpoint, color) and lock it
+ * endpoint might be NULL or ANY
+ * NULL = 0, ANY=~0
+ */
+u32
+find_or_create_internal_label (ip46_address_t endpoint, u32 color)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ uword *color_table, *result_label;
+
+ if (!sm->sr_policies_c2e2eclabel_hash.hash)
+ mhash_init (&sm->sr_policies_c2e2eclabel_hash, sizeof (mhash_t),
+ sizeof (u32));
+
+ color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color);
+ if (!color_table)
+ {
+ mhash_t color_t;
+ clib_memset (&color_t, 0, sizeof (mhash_t));
+ mhash_init (&color_t, sizeof (u32), sizeof (ip46_address_t));
+ mhash_set_mem (&sm->sr_policies_c2e2eclabel_hash, &color,
+ (uword *) & color_t, NULL);
+ color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color);
+ }
+
+ result_label = mhash_get ((mhash_t *) color_table, &endpoint);
+
+ if (result_label)
+ return (u32) * result_label;
+
+ /* Create and set a new internal label */
+ u32 *new_internal_label = 0;
+ pool_get (sm->ec_labels, new_internal_label);
+ *new_internal_label = 0;
+ mhash_set ((mhash_t *) color_table, &endpoint,
+ (new_internal_label - sm->ec_labels) + SRMPLS_TE_OFFSET, NULL);
+
+ return (new_internal_label - sm->ec_labels) + SRMPLS_TE_OFFSET;
+}
+
+always_inline void
+internal_label_lock_co (ip46_address_t endpoint, u32 color, char co_bits)
+{
+ ip46_address_t zero, any;
+ ip46_address_reset (&zero);
+ any.as_u64[0] = any.as_u64[1] = (u64) ~ 0;
+ switch (co_bits)
+ {
+ case SR_TE_CO_BITS_10:
+ internal_label_lock (endpoint, color);
+ internal_label_lock (zero, color);
+ internal_label_lock (any, color);
+ break;
+ case SR_TE_CO_BITS_01:
+ internal_label_lock (endpoint, color);
+ internal_label_lock (zero, color);
+ break;
+ case SR_TE_CO_BITS_00:
+ case SR_TE_CO_BITS_11:
+ internal_label_lock (endpoint, color);
+ break;
+ }
+}
+
+/**
+ * @brief lock the label for (NH, C)
+ * endpoint might be NULL or ANY
+ * NULL = 0, ANY=~0
+ */
+void
+internal_label_lock (ip46_address_t endpoint, u32 color)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ uword *color_table, *result_label;
+
+ if (!sm->sr_policies_c2e2eclabel_hash.hash)
+ return;
+
+ color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color);
+ if (!color_table)
+ return;
+
+ result_label = mhash_get ((mhash_t *) color_table, &endpoint);
+
+ if (!result_label)
+ return;
+
+ /* Lock it */
+ u32 *label_lock =
+ pool_elt_at_index (sm->ec_labels, *result_label - SRMPLS_TE_OFFSET);
+ (*label_lock)++;
+}
+
+
+always_inline void
+internal_label_unlock_co (ip46_address_t endpoint, u32 color, char co_bits)
+{
+ ip46_address_t zero, any;
+ ip46_address_reset (&zero);
+ any.as_u64[0] = any.as_u64[1] = (u64) ~ 0;
+ switch (co_bits)
+ {
+ case SR_TE_CO_BITS_10:
+ internal_label_unlock (endpoint, color);
+ internal_label_unlock (zero, color);
+ internal_label_unlock (any, color);
+ break;
+ case SR_TE_CO_BITS_01:
+ internal_label_unlock (endpoint, color);
+ internal_label_unlock (zero, color);
+ break;
+ case SR_TE_CO_BITS_00:
+ case SR_TE_CO_BITS_11:
+ internal_label_unlock (endpoint, color);
+ break;
+ }
+}
+
+/**
+ * @brief Release lock on label for (endpoint, color)
+ * endpoint might be NULL or ANY
+ * NULL = 0, ANY=~0
+ */
+void
+internal_label_unlock (ip46_address_t endpoint, u32 color)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ uword *color_table, *result_label;
+
+ if (!sm->sr_policies_c2e2eclabel_hash.hash)
+ return;
+
+ color_table = mhash_get (&sm->sr_policies_c2e2eclabel_hash, &color);
+ if (!color_table)
+ return;
+
+ result_label = mhash_get ((mhash_t *) color_table, &endpoint);
+
+ if (!result_label)
+ return;
+
+ u32 *label_lock =
+ pool_elt_at_index (sm->ec_labels, *result_label - SRMPLS_TE_OFFSET);
+ (*label_lock)--;
+
+ if (*label_lock == 0)
+ {
+ pool_put (sm->ec_labels, label_lock);
+ mhash_unset ((mhash_t *) color_table, &endpoint, NULL);
+ if (mhash_elts ((mhash_t *) color_table) == 0)
+ {
+ mhash_free ((mhash_t *) color_table);
+ mhash_unset (&sm->sr_policies_c2e2eclabel_hash, &color, NULL);
+ if (mhash_elts (&sm->sr_policies_c2e2eclabel_hash) == 0)
+ {
+ mhash_free (&sm->sr_policies_c2e2eclabel_hash);
+ sm->sr_policies_c2e2eclabel_hash.hash = NULL;
+ fib_table_unlock (sm->fib_table_EC, FIB_PROTOCOL_MPLS,
+ FIB_SOURCE_SR);
+ sm->fib_table_EC = (u32) ~ 0;
+ }
+ }
+ }
+}
+
+/********************* steering computation *********************************/
+/**
+ * @brief function to update the FIB
+ */
+void
+compute_sr_te_automated_steering_fib_entry (mpls_sr_steering_policy_t *
+ steer_pl)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ fib_prefix_t pfx = { 0 };
+
+ u32 *internal_labels = 0;
+ ip46_address_t zero, any;
+ ip46_address_reset (&zero);
+ any.as_u64[0] = any.as_u64[1] = (u64) ~ 0;
+
+ u32 *color_i = NULL;
+ vec_foreach (color_i, steer_pl->color)
+ {
+ switch (steer_pl->co_bits)
+ {
+ case SR_TE_CO_BITS_10:
+ vec_add1 (internal_labels,
+ find_or_create_internal_label (steer_pl->next_hop,
+ *color_i));
+ vec_add1 (internal_labels,
+ find_or_create_internal_label (zero, *color_i));
+ vec_add1 (internal_labels,
+ find_or_create_internal_label (any, *color_i));
+ break;
+ case SR_TE_CO_BITS_01:
+ vec_add1 (internal_labels,
+ find_or_create_internal_label (steer_pl->next_hop,
+ *color_i));
+ vec_add1 (internal_labels,
+ find_or_create_internal_label (zero, *color_i));
+ break;
+ case SR_TE_CO_BITS_00:
+ case SR_TE_CO_BITS_11:
+ vec_add1 (internal_labels,
+ find_or_create_internal_label (steer_pl->next_hop,
+ *color_i));
+ break;
+ }
+ }
+
+ /* Does hidden FIB already exist? */
+ if (sm->fib_table_EC == (u32) ~ 0)
+ {
+ sm->fib_table_EC = fib_table_create_and_lock (FIB_PROTOCOL_MPLS,
+ FIB_SOURCE_SR,
+ "SR-MPLS Traffic Engineering (NextHop,Color)");
+
+ fib_table_flush (sm->fib_table_EC, FIB_PROTOCOL_MPLS,
+ FIB_SOURCE_SPECIAL);
+ }
+
+ /* Add the corresponding FIB entries */
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_eos = MPLS_EOS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = sm->fib_table_EC,
+ .frp_weight = 1,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_label_stack = 0
+ };
+ fib_route_path_t *paths = NULL;
+
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
+ }
+
+ if (steer_pl->vpn_label != (u32) ~ 0)
+ {
+ fib_mpls_label_t fml = {
+ .fml_value = steer_pl->vpn_label,
+ };
+ vec_add1 (path.frp_label_stack, fml);
+ path.frp_eos = MPLS_NON_EOS;
+ }
+
+ u32 label_i;
+ vec_foreach_index (label_i, internal_labels)
+ {
+ path.frp_local_label = internal_labels[label_i];
+ path.frp_preference = label_i;
+ vec_add1 (paths, path);
+ }
+
+ /* Finally we must add to FIB IGP to N */
+ clib_memcpy (&path.frp_addr, &steer_pl->next_hop,
+ sizeof (steer_pl->next_hop));
+ path.frp_preference = vec_len (internal_labels);
+ path.frp_label_stack = NULL;
+
+ if (steer_pl->nh_type == SR_STEER_IPV6)
+ {
+ path.frp_proto = DPO_PROTO_IP6;
+ path.frp_fib_index =
+ fib_table_find (FIB_PROTOCOL_IP6,
+ (steer_pl->classify.fib_table !=
+ (u32) ~ 0 ? steer_pl->classify.fib_table : 0));
+ }
+ else if (steer_pl->nh_type == SR_STEER_IPV4)
+ {
+ path.frp_proto = DPO_PROTO_IP4;
+ path.frp_fib_index =
+ fib_table_find (FIB_PROTOCOL_IP4,
+ (steer_pl->classify.fib_table !=
+ (u32) ~ 0 ? steer_pl->classify.fib_table : 0));
+ }
+
+ vec_add1 (paths, path);
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ fib_table_entry_update (fib_table_find
+ (FIB_PROTOCOL_IP6,
+ (steer_pl->classify.fib_table !=
+ (u32) ~ 0 ? steer_pl->classify.fib_table : 0)),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ fib_table_entry_update (fib_table_find
+ (FIB_PROTOCOL_IP4,
+ (steer_pl->classify.fib_table !=
+ (u32) ~ 0 ? steer_pl->classify.fib_table : 0)),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+
+ vec_free (paths);
+ paths = NULL;
+}
+
+/**
+ * @brief Steer traffic L3 traffic through a given SR-MPLS policy
+ *
+ * @param is_del
+ * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
+ * @param sr_policy is the index of the SR Policy (alt to bsid)
+ * @param table_id is the VRF where to install the FIB entry for the BSID
+ * @param prefix is the IPv4/v6 address for L3 traffic type
+ * @param mask_width is the mask for L3 traffic type
+ * @param traffic_type describes the type of traffic
+ * @param next_hop SR TE Next-Hop
+ * @param nh_type is the AF of Next-Hop
+ * @param color SR TE color
+ * @param co_bits SR TE color-only bits
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_steering_policy_add (mpls_label_t bsid, u32 table_id,
+ ip46_address_t * prefix, u32 mask_width,
+ u8 traffic_type, ip46_address_t * next_hop,
+ u8 nh_type, u32 color, char co_bits,
+ mpls_label_t vpn_label)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ sr_mpls_steering_key_t key;
+ mpls_sr_steering_policy_t *steer_pl;
+ fib_prefix_t pfx = { 0 };
+
+ mpls_sr_policy_t *sr_policy = 0;
+ uword *p = 0;
+
+ clib_memset (&key, 0, sizeof (sr_mpls_steering_key_t));
+
+ if (traffic_type != SR_STEER_IPV4 && traffic_type != SR_STEER_IPV6)
+ return -1;
+
+ /* Compute the steer policy key */
+ key.prefix.as_u64[0] = prefix->as_u64[0];
+ key.prefix.as_u64[1] = prefix->as_u64[1];
+ key.mask_width = mask_width;
+ key.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
+ key.traffic_type = traffic_type;
+
+ /*
+ * Search for steering policy. If already exists we are adding a new
+ * color.
+ */
+ if (!sm->sr_steer_policies_hash.hash)
+ mhash_init (&sm->sr_steer_policies_hash, sizeof (uword),
+ sizeof (sr_mpls_steering_key_t));
+
+ p = mhash_get (&sm->sr_steer_policies_hash, &key);
+ if (p)
+ {
+ steer_pl = pool_elt_at_index (sm->steer_policies, p[0]);
+ if (steer_pl->bsid != (u32) ~ 0)
+ return -1; //Means we are rewritting the steering. Not allowed.
+
+ /* Means we are adding a color. Check that NH match. */
+ if (ip46_address_cmp (&steer_pl->next_hop, next_hop))
+ return -2;
+ if (vec_search (steer_pl->color, color) != ~0)
+ return -3;
+ if (steer_pl->co_bits != co_bits)
+ return -4; /* CO colors should be the same */
+ if (steer_pl->vpn_label != vpn_label)
+ return -5; /* VPN label should be the same */
+
+ /* Remove the steering and ReDo it */
+ vec_add1 (steer_pl->color, color);
+ vec_sort_with_function (steer_pl->color, sort_color_descent);
+ compute_sr_te_automated_steering_fib_entry (steer_pl);
+ internal_label_lock_co (steer_pl->next_hop, color, steer_pl->co_bits);
+ return 0;
+ }
+
+ /* Create a new steering policy */
+ pool_get (sm->steer_policies, steer_pl);
+ clib_memset (steer_pl, 0, sizeof (*steer_pl));
+ clib_memcpy (&steer_pl->classify.prefix, prefix, sizeof (ip46_address_t));
+ clib_memcpy (&steer_pl->next_hop, next_hop, sizeof (ip46_address_t));
+ steer_pl->nh_type = nh_type;
+ steer_pl->co_bits = co_bits;
+ steer_pl->classify.mask_width = mask_width;
+ steer_pl->classify.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
+ steer_pl->classify.traffic_type = traffic_type;
+ steer_pl->color = NULL;
+ steer_pl->vpn_label = vpn_label;
+
+ /* Create and store key */
+ mhash_set (&sm->sr_steer_policies_hash, &key, steer_pl - sm->steer_policies,
+ NULL);
+
+ /* Local steering */
+ if (bsid != (u32) ~ 0)
+ {
+ if (!sm->sr_policies_index_hash)
+ sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));
+ steer_pl->bsid = bsid;
+ p = hash_get (sm->sr_policies_index_hash, bsid);
+ if (!p)
+ return -1;
+ sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
+
+ fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_MPLS,
+ .frp_local_label = sr_policy->bsid,
+ .frp_eos = MPLS_EOS,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = 0,
+ .frp_weight = 1,
+ .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
+ .frp_label_stack = 0
+ };
+ fib_route_path_t *paths = NULL;
+
+ if (steer_pl->vpn_label != (u32) ~ 0)
+ {
+ fib_mpls_label_t fml = {
+ .fml_value = steer_pl->vpn_label,
+ };
+ vec_add1 (path.frp_label_stack, fml);
+ }
+
+ /* FIB API calls - Recursive route through the BindingSID */
+ if (traffic_type == SR_STEER_IPV6)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
+ path.frp_fib_index = 0;
+ path.frp_preference = 0;
+ vec_add1 (paths, path);
+ fib_table_entry_path_add2 (fib_table_find
+ (FIB_PROTOCOL_IP6,
+ (table_id != (u32) ~ 0 ? table_id : 0)),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+ vec_free (paths);
+ }
+ else if (traffic_type == SR_STEER_IPV4)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
+ path.frp_fib_index = 0;
+ path.frp_preference = 0;
+ vec_add1 (paths, path);
+ fib_table_entry_path_add2 (fib_table_find
+ (FIB_PROTOCOL_IP4,
+ (table_id != (u32) ~ 0 ? table_id : 0)),
+ &pfx, FIB_SOURCE_SR,
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
+ vec_free (paths);
+ }
+ }
+ /* Automated steering */
+ else
+ {
+ steer_pl->bsid = (u32) ~ 0;
+ vec_add1 (steer_pl->color, color);
+ compute_sr_te_automated_steering_fib_entry (steer_pl);
+ internal_label_lock_co (steer_pl->next_hop, color, steer_pl->co_bits);
+ }
+ return 0;
+}
+
+/**
+ * @brief Delete steering rule for an SR-MPLS policy
+ *
+ * @param is_del
+ * @param bsid is the bindingSID of the SR Policy (alt to sr_policy_index)
+ * @param sr_policy is the index of the SR Policy (alt to bsid)
+ * @param table_id is the VRF where to install the FIB entry for the BSID
+ * @param prefix is the IPv4/v6 address for L3 traffic type
+ * @param mask_width is the mask for L3 traffic type
+ * @param traffic_type describes the type of traffic
+ * @param next_hop SR TE Next-HOP
+ * @param nh_type is the AF of Next-Hop
+ * @param color SR TE color
+ *
+ * @return 0 if correct, else error
+ */
+int
+sr_mpls_steering_policy_del (ip46_address_t * prefix, u32 mask_width,
+ u8 traffic_type, u32 table_id, u32 color)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ sr_mpls_steering_key_t key;
+ mpls_sr_steering_policy_t *steer_pl;
+ fib_prefix_t pfx = { 0 };
+ uword *p = 0;
+
+ clib_memset (&key, 0, sizeof (sr_mpls_steering_key_t));
+
+ /* Compute the steer policy key */
+ if (traffic_type != SR_STEER_IPV4 && traffic_type != SR_STEER_IPV6)
+ return -1;
+
+ key.prefix.as_u64[0] = prefix->as_u64[0];
+ key.prefix.as_u64[1] = prefix->as_u64[1];
+ key.mask_width = mask_width;
+ key.fib_table = (table_id != (u32) ~ 0 ? table_id : 0);
+ key.traffic_type = traffic_type;
+
+ if (!sm->sr_steer_policies_hash.hash)
+ mhash_init (&sm->sr_steer_policies_hash, sizeof (uword),
+ sizeof (sr_mpls_steering_key_t));
+
+ /* Search for the item */
+ p = mhash_get (&sm->sr_steer_policies_hash, &key);
+
+ if (!p)
+ return -1;
+
+ /* Retrieve Steer Policy function */
+ steer_pl = pool_elt_at_index (sm->steer_policies, p[0]);
+
+ if (steer_pl->bsid == (u32) ~ 0)
+ {
+ /* Remove the color from the color vector */
+ vec_del1 (steer_pl->color, vec_search (steer_pl->color, color));
+
+ if (vec_len (steer_pl->color))
+ {
+ /* Reorder Colors */
+ vec_sort_with_function (steer_pl->color, sort_color_descent);
+ compute_sr_te_automated_steering_fib_entry (steer_pl);
+ /* Remove all the locks for this ones... */
+ internal_label_unlock_co (steer_pl->next_hop, color,
+ steer_pl->co_bits);
+ return 0;
+ }
+ else
+ {
+ vec_free (steer_pl->color);
+ /* Remove FIB entry */
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP6,
+ steer_pl->classify.fib_table), &pfx,
+ FIB_SOURCE_SR);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP4,
+ steer_pl->classify.fib_table), &pfx,
+ FIB_SOURCE_SR);
+ }
+ /* Remove all the locks for this ones... */
+ internal_label_unlock_co (steer_pl->next_hop, color,
+ steer_pl->co_bits);
+ }
+ }
+ else //Remove by BSID
+ {
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip6 = steer_pl->classify.prefix.ip6;
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP6,
+ steer_pl->classify.fib_table), &pfx,
+ FIB_SOURCE_SR);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = steer_pl->classify.mask_width;
+ pfx.fp_addr.ip4 = steer_pl->classify.prefix.ip4;
+ fib_table_entry_delete (fib_table_find
+ (FIB_PROTOCOL_IP4,
+ steer_pl->classify.fib_table), &pfx,
+ FIB_SOURCE_SR);
+ }
+ }
+ /* Delete SR steering policy entry */
+ pool_put (sm->steer_policies, steer_pl);
+ mhash_unset (&sm->sr_steer_policies_hash, &key, NULL);
+ if (mhash_elts (&sm->sr_steer_policies_hash) == 0)
+ {
+ mhash_free (&sm->sr_steer_policies_hash);
+ sm->sr_steer_policies_hash.hash = NULL;
+ }
+ return 0;
+}
+
+static clib_error_t *
+sr_mpls_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int is_del = 0;
+
+ ip46_address_t prefix, nh;
+ u32 dst_mask_width = 0;
+ u8 traffic_type = 0;
+ u8 nh_type = 0;
+ u32 fib_table = (u32) ~ 0, color = (u32) ~ 0;
+ u32 co_bits = 0;
+
+ mpls_label_t bsid, vpn_label = (u32) ~ 0;
+
+ u8 sr_policy_set = 0;
+
+ clib_memset (&prefix, 0, sizeof (ip46_address_t));
+ clib_memset (&nh, 0, sizeof (ip46_address_t));
+
+ int rv;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_del = 1;
+ else if (!traffic_type
+ && unformat (input, "l3 %U/%d", unformat_ip6_address,
+ &prefix.ip6, &dst_mask_width))
+ traffic_type = SR_STEER_IPV6;
+ else if (!traffic_type
+ && unformat (input, "l3 %U/%d", unformat_ip4_address,
+ &prefix.ip4, &dst_mask_width))
+ traffic_type = SR_STEER_IPV4;
+ else if (!sr_policy_set
+ && unformat (input, "via sr policy bsid %U",
+ unformat_mpls_unicast_label, &bsid))
+ sr_policy_set = 1;
+ else if (!sr_policy_set
+ && unformat (input, "via next-hop %U color %d co %d",
+ unformat_ip4_address, &nh.ip4, &color, &co_bits))
+ {
+ sr_policy_set = 1;
+ nh_type = SR_STEER_IPV4;
+ }
+ else if (!sr_policy_set
+ && unformat (input, "via next-hop %U color %d co %d",
+ unformat_ip6_address, &nh.ip6, &color, &co_bits))
+ {
+ sr_policy_set = 1;
+ nh_type = SR_STEER_IPV6;
+ }
+ else if (fib_table == (u32) ~ 0
+ && unformat (input, "fib-table %d", &fib_table));
+ else if (unformat (input, "vpn-label %U",
+ unformat_mpls_unicast_label, &vpn_label));
+ else
+ break;
+ }
+
+ if (!traffic_type)
+ return clib_error_return (0, "No L3 traffic specified");
+ if (!sr_policy_set)
+ return clib_error_return (0, "No SR policy specified");
+
+ /* Make sure that the prefixes are clean */
+ if (traffic_type == SR_STEER_IPV4)
+ {
+ u32 mask =
+ (dst_mask_width ? (0xFFFFFFFFu >> (32 - dst_mask_width)) : 0);
+ prefix.ip4.as_u32 &= mask;
+ }
+ else if (traffic_type == SR_STEER_IPV6)
+ {
+ ip6_address_t mask;
+ ip6_address_mask_from_width (&mask, dst_mask_width);
+ ip6_address_mask (&prefix.ip6, &mask);
+ }
+
+ if (nh_type)
+ bsid = (u32) ~ 0;
+
+ if (is_del)
+ rv =
+ sr_mpls_steering_policy_del (&prefix, dst_mask_width,
+ traffic_type, fib_table, color);
+
+ else
+ rv =
+ sr_mpls_steering_policy_add (bsid, fib_table, &prefix, dst_mask_width,
+ traffic_type, &nh, nh_type, color, co_bits,
+ vpn_label);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case 1:
+ return 0;
+ case -1:
+ return clib_error_return (0, "Incorrect API usage.");
+ case -2:
+ return clib_error_return (0, "The Next-Hop does not match.");
+ case -3:
+ return clib_error_return (0, "The color already exists.");
+ case -4:
+ return clib_error_return (0, "The co-bits do not match.");
+ case -5:
+ return clib_error_return (0, "The VPN-labels do not match.");
+ default:
+ return clib_error_return (0, "BUG: sr steer policy returns %d", rv);
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND(sr_mpls_steer_policy_command, static)=
+{
+ .path = "sr mpls steer",
+ .short_help = "sr mpls steer (del) l3 <ip_addr/mask> "
+ "via [sr policy bsid <mpls_label> || next-hop <ip46_addr> color <u32> co <0|1|2|3> ](fib-table <fib_table_index>)(vpn-label 500)",
+ .long_help =
+ "\tSteer L3 traffic through an existing SR policy.\n"
+ "\tExamples:\n"
+ "\t\tsr steer l3 2001::/64 via sr_policy bsid 29999\n"
+ "\t\tsr steer del l3 2001::/64 via sr_policy bsid 29999\n"
+ "\t\tsr steer l3 2001::/64 via next-hop 1.1.1.1 color 1234 co 0\n"
+ "\t\tsr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2 vpn-label 500\n",
+ .function = sr_mpls_steer_policy_command_fn,
+};
+
+static clib_error_t *
+show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+ mpls_sr_steering_policy_t **steer_policies = 0;
+ mpls_sr_steering_policy_t *steer_pl;
+
+ int i;
+
+ vlib_cli_output (vm, "SR MPLS steering policies:");
+ pool_foreach (steer_pl, sm->steer_policies) {
+ vec_add1(steer_policies, steer_pl);
+ }
+ for (i = 0; i < vec_len (steer_policies); i++)
+ {
+ vlib_cli_output (vm, "==========================");
+ steer_pl = steer_policies[i];
+ if (steer_pl->classify.traffic_type == SR_STEER_IPV4)
+ {
+ vlib_cli_output (vm, "Prefix: %U/%d via:",
+ format_ip4_address,
+ &steer_pl->classify.prefix.ip4,
+ steer_pl->classify.mask_width);
+ }
+ else if (steer_pl->classify.traffic_type == SR_STEER_IPV6)
+ {
+ vlib_cli_output (vm, "Prefix: %U/%d via:",
+ format_ip6_address,
+ &steer_pl->classify.prefix.ip6,
+ steer_pl->classify.mask_width);
+ }
+
+ if (steer_pl->bsid != (u32) ~ 0)
+ {
+ vlib_cli_output (vm, "· BSID %U",
+ format_mpls_unicast_label, steer_pl->bsid);
+ }
+ else
+ {
+ if (steer_pl->nh_type == SR_STEER_IPV4)
+ {
+ vlib_cli_output (vm, "· Next-hop %U",
+ format_ip4_address, &steer_pl->next_hop.ip4);
+ }
+ else if (steer_pl->nh_type == SR_STEER_IPV6)
+ {
+ vlib_cli_output (vm, "· Next-hop %U",
+ format_ip6_address, &steer_pl->next_hop.ip6);
+ }
+
+ u32 *color_i = 0;
+ u8 *s = NULL;
+ s = format (s, "[ ");
+ vec_foreach (color_i, steer_pl->color)
+ {
+ s = format (s, "%d, ", *color_i);
+ }
+ s = format (s, "\b\b ]");
+ vlib_cli_output (vm, "· Color %s", s);
+
+ switch (steer_pl->co_bits)
+ {
+ case SR_TE_CO_BITS_00:
+ vlib_cli_output (vm, "· CO-bits: 00");
+ break;
+ case SR_TE_CO_BITS_01:
+ vlib_cli_output (vm, "· CO-bits: 01");
+ break;
+ case SR_TE_CO_BITS_10:
+ vlib_cli_output (vm, "· CO-bits: 10");
+ break;
+ case SR_TE_CO_BITS_11:
+ vlib_cli_output (vm, "· CO-bits: 11");
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND(show_sr_mpls_steering_policies_command, static)=
+{
+ .path = "show sr mpls steering policies",
+ .short_help = "show sr mpls steering policies",
+ .function = show_sr_mpls_steering_policies_command_fn,
+};
+
+clib_error_t *
+sr_mpls_steering_init (vlib_main_t * vm)
+{
+ mpls_sr_main_t *sm = &sr_mpls_main;
+
+ /* Init memory for function keys */
+ sm->sr_steer_policies_hash.hash = NULL;
+
+ sm->fib_table_EC = (u32) ~ 0;
+ sm->ec_labels = 0;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION(sr_mpls_steering_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables: eval: (c-set-style "gnu") End:
+ */
diff --git a/src/plugins/srmpls/sr_mpls_test.c b/src/plugins/srmpls/sr_mpls_test.c
new file mode 100644
index 00000000000..7aff4c32b06
--- /dev/null
+++ b/src/plugins/srmpls/sr_mpls_test.c
@@ -0,0 +1,174 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/error.h>
+#include <vpp/api/types.h>
+
+#define __plugin_msg_base sr_mpls_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* Declare message IDs */
+#include <vnet/format_fns.h>
+#include <plugins/srmpls/sr_mpls.api_enum.h>
+#include <plugins/srmpls/sr_mpls.api_types.h>
+
+#define vl_endianfun /* define message structures */
+#include <plugins/srmpls/sr_mpls.api.h>
+#undef vl_endianfun
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ u32 ping_id;
+ vat_main_t *vat_main;
+} sr_mpls_test_main_t;
+
+static sr_mpls_test_main_t sr_mpls_test_main;
+
+static int
+api_sr_mpls_policy_mod (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_mpls_steering_add_del (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_mpls_policy_assign_endpoint_color (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_mpls_policy_add (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sr_mpls_policy_add_t *mp;
+ u32 bsid = 0;
+ u32 weight = 1;
+ u8 type = 0;
+ u8 n_segments = 0;
+ u32 sid;
+ u32 *segments = NULL;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bsid %d", &bsid))
+ ;
+ else if (unformat (i, "weight %d", &weight))
+ ;
+ else if (unformat (i, "spray"))
+ type = 1;
+ else if (unformat (i, "next %d", &sid))
+ {
+ n_segments += 1;
+ vec_add1 (segments, htonl (sid));
+ }
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (bsid == 0)
+ {
+ errmsg ("bsid not set");
+ return -99;
+ }
+
+ if (n_segments == 0)
+ {
+ errmsg ("no sid in segment stack");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M2 (SR_MPLS_POLICY_ADD, mp, sizeof (u32) * n_segments);
+
+ mp->bsid = htonl (bsid);
+ mp->weight = htonl (weight);
+ mp->is_spray = type;
+ mp->n_segments = n_segments;
+ memcpy (mp->segments, segments, sizeof (u32) * n_segments);
+ vec_free (segments);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_sr_mpls_policy_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sr_mpls_policy_del_t *mp;
+ u32 bsid = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bsid %d", &bsid))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (bsid == 0)
+ {
+ errmsg ("bsid not set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SR_MPLS_POLICY_DEL, mp);
+
+ mp->bsid = htonl (bsid);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#include <plugins/srmpls/sr_mpls.api_test.c>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/tlsopenssl/tls_openssl.c b/src/plugins/tlsopenssl/tls_openssl.c
index 98982439b96..5d172a0adcf 100644
--- a/src/plugins/tlsopenssl/tls_openssl.c
+++ b/src/plugins/tlsopenssl/tls_openssl.c
@@ -1179,18 +1179,13 @@ int
tls_openssl_set_ciphers (char *ciphers)
{
openssl_main_t *om = &openssl_main;
- int i;
if (!ciphers)
{
return -1;
}
- vec_validate (om->ciphers, strlen (ciphers));
- for (i = 0; i < vec_len (om->ciphers) - 1; i++)
- {
- om->ciphers[i] = toupper (ciphers[i]);
- }
+ vec_validate_init_c_string (om->ciphers, ciphers, strlen (ciphers));
return 0;
diff --git a/src/plugins/unittest/gso_test.c b/src/plugins/unittest/gso_test.c
index 54eb7422c87..43c614341d2 100644
--- a/src/plugins/unittest/gso_test.c
+++ b/src/plugins/unittest/gso_test.c
@@ -96,12 +96,94 @@ GSO_TEST_REGISTER_DATA (gso_ipv6_tcp, static) = {
.is_ip6 = 1,
};
+/*
+ * this does not support tunnel packets
+ */
+static void
+set_hdr_offsets (vlib_buffer_t *b0, u8 is_l2)
+{
+ u16 ethertype = 0, l2hdr_sz = 0;
+ vnet_buffer_oflags_t oflags = 0;
+ u8 l4_proto = 0;
+
+ if (!is_l2)
+ {
+ switch (b0->data[0] & 0xf0)
+ {
+ case 0x40:
+ ethertype = ETHERNET_TYPE_IP4;
+ break;
+ case 0x60:
+ ethertype = ETHERNET_TYPE_IP6;
+ break;
+ }
+ }
+ else
+ {
+ ethernet_header_t *eh = (ethernet_header_t *) b0->data;
+ ethertype = clib_net_to_host_u16 (eh->type);
+ l2hdr_sz = sizeof (ethernet_header_t);
+
+ if (ethernet_frame_is_tagged (ethertype))
+ {
+ ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eh + 1);
+
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ l2hdr_sz += sizeof (*vlan);
+ if (ethertype == ETHERNET_TYPE_VLAN)
+ {
+ vlan++;
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ l2hdr_sz += sizeof (*vlan);
+ }
+ }
+ }
+
+ vnet_buffer (b0)->l2_hdr_offset = 0;
+ vnet_buffer (b0)->l3_hdr_offset = l2hdr_sz;
+
+ if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4))
+ {
+ ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l2hdr_sz);
+ vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4);
+ l4_proto = ip4->protocol;
+ oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
+ b0->flags |= (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+ }
+ else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
+ {
+ ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l2hdr_sz);
+ vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + sizeof (ip6_header_t);
+ /* FIXME IPv6 EH traversal */
+ l4_proto = ip6->protocol;
+ b0->flags |= (VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+ }
+ if (l4_proto == IP_PROTOCOL_TCP)
+ {
+ oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
+ }
+ else if (l4_proto == IP_PROTOCOL_UDP)
+ {
+ oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
+ }
+ if (oflags)
+ vnet_buffer_offload_flags_set (b0, oflags);
+}
+
static u32
-fill_buffers (vlib_main_t *vm, u32 *buffer_indices, u8 *data, u32 data_size,
- u32 n_buffers, u32 buffer_size, u32 packet_size, u32 gso_size,
- u32 l4_hdr_len)
+fill_buffers (vlib_main_t *vm, u32 *buffer_indices,
+ gso_test_data_t *gso_test_data, u32 n_buffers, u32 buffer_size,
+ u32 packet_size, u32 gso_size)
{
u32 i;
+ u8 *data = gso_test_data->data;
+ u32 data_size = gso_test_data->data_size;
+ u32 l4_hdr_len = gso_test_data->l4_hdr_len;
+ u8 is_l2 = gso_test_data->is_l2;
for (i = 0; i < n_buffers; i++)
{
@@ -153,6 +235,8 @@ fill_buffers (vlib_main_t *vm, u32 *buffer_indices, u8 *data, u32 data_size,
len += fill_data_size;
}
while (k < n_bufs);
+
+ set_hdr_offsets (b, is_l2);
b->flags |= VNET_BUFFER_F_GSO;
vnet_buffer2 (b)->gso_size = gso_size;
vnet_buffer2 (b)->gso_l4_hdr_sz = l4_hdr_len;
@@ -165,17 +249,14 @@ fill_buffers (vlib_main_t *vm, u32 *buffer_indices, u8 *data, u32 data_size,
static_always_inline u32
gso_segment_buffer_test (vlib_main_t *vm, u32 bi,
- vnet_interface_per_thread_data_t *ptd, u8 is_l2,
- u8 is_ip6)
+ vnet_interface_per_thread_data_t *ptd, u8 is_l2)
{
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- generic_header_offset_t gho = { 0 };
u32 n_tx_bytes = 0;
if (PREDICT_TRUE (b->flags & VNET_BUFFER_F_GSO))
{
- vnet_generic_header_offset_parser (b, &gho, is_l2, !is_ip6, is_ip6);
- n_tx_bytes = gso_segment_buffer_inline (vm, ptd, b, &gho, is_l2, is_ip6);
+ n_tx_bytes = gso_segment_buffer_inline (vm, ptd, b, is_l2);
}
return n_tx_bytes;
@@ -237,19 +318,16 @@ test_gso_perf (vlib_main_t *vm, gso_test_main_t *gtm)
vlib_buffer_free (vm, buffer_indices, n_alloc);
goto done;
}
- n_filled =
- fill_buffers (vm, buffer_indices, gso_test_data->data,
- gso_test_data->data_size, n_buffers, buffer_size,
- packet_size, gso_size, gso_test_data->l4_hdr_len);
+ n_filled = fill_buffers (vm, buffer_indices, gso_test_data, n_buffers,
+ buffer_size, packet_size, gso_size);
u8 is_l2 = gso_test_data->is_l2;
- u8 is_ip6 = gso_test_data->is_ip6;
for (k = 0; k < warmup_rounds; k++)
{
for (j = 0; j < n_filled; j++)
- gso_segment_buffer_test (vm, buffer_indices[j], &ptd[j], is_l2,
- is_ip6);
+ gso_segment_buffer_test (vm, buffer_indices[j], &ptd[j], is_l2);
+
for (j = 0; j < n_filled; j++)
{
vlib_buffer_free (vm, ptd[j].split_buffers,
@@ -264,8 +342,9 @@ test_gso_perf (vlib_main_t *vm, gso_test_main_t *gtm)
{
t0 = clib_cpu_time_now ();
for (j = 0; j < n_filled; j++)
- gso_segment_buffer_test (vm, buffer_indices[j], &ptd[j], is_l2,
- is_ip6);
+ gso_segment_buffer_test (vm, buffer_indices[j], &ptd[j],
+ is_l2);
+
t1 = clib_cpu_time_now ();
t2[i] += (t1 - t0);
for (j = 0; j < n_filled; j++)
diff --git a/src/plugins/unittest/policer_test.c b/src/plugins/unittest/policer_test.c
index 2b14bf687bf..41f769960a3 100644
--- a/src/plugins/unittest/policer_test.c
+++ b/src/plugins/unittest/policer_test.c
@@ -21,7 +21,7 @@ policer_test (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd_arg)
{
int policer_index, i;
- uint rate_kbps, burst, num_pkts;
+ unsigned int rate_kbps, burst, num_pkts;
double total_bytes, cpu_ticks_per_pkt, time = 0;
double cpu_speed, cpu_ticks_per_byte;
policer_result_e result, input_colour = POLICE_CONFORM;
diff --git a/src/plugins/wireguard/wireguard_chachapoly.c b/src/plugins/wireguard/wireguard_chachapoly.c
index 0dd7908d2e2..ad644ff6cb8 100644
--- a/src/plugins/wireguard/wireguard_chachapoly.c
+++ b/src/plugins/wireguard/wireguard_chachapoly.c
@@ -72,11 +72,11 @@ wg_xchacha20poly1305_encrypt (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst,
u64 h_nonce;
clib_memcpy (&h_nonce, nonce + 16, sizeof (h_nonce));
- h_nonce = le64toh (h_nonce);
+ h_nonce = clib_little_to_host_u64 (h_nonce);
hchacha20 (derived_key, nonce, key);
for (i = 0; i < (sizeof (derived_key) / sizeof (derived_key[0])); i++)
- (derived_key[i]) = htole32 ((derived_key[i]));
+ (derived_key[i]) = clib_host_to_little_u32 ((derived_key[i]));
uint32_t key_idx;
@@ -102,11 +102,11 @@ wg_xchacha20poly1305_decrypt (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst,
u64 h_nonce;
clib_memcpy (&h_nonce, nonce + 16, sizeof (h_nonce));
- h_nonce = le64toh (h_nonce);
+ h_nonce = clib_little_to_host_u64 (h_nonce);
hchacha20 (derived_key, nonce, key);
for (i = 0; i < (sizeof (derived_key) / sizeof (derived_key[0])); i++)
- (derived_key[i]) = htole32 ((derived_key[i]));
+ (derived_key[i]) = clib_host_to_little_u32 ((derived_key[i]));
uint32_t key_idx;
diff --git a/src/plugins/wireguard/wireguard_noise.c b/src/plugins/wireguard/wireguard_noise.c
index 5fe2e44b03b..c3f28f442f5 100644
--- a/src/plugins/wireguard/wireguard_noise.c
+++ b/src/plugins/wireguard/wireguard_noise.c
@@ -751,8 +751,8 @@ noise_tai64n_now (uint8_t output[NOISE_TIMESTAMP_LEN])
unix_nanosec &= REJECT_INTERVAL_MASK;
/* https://cr.yp.to/libtai/tai64.html */
- sec = htobe64 (0x400000000000000aULL + unix_sec);
- nsec = htobe32 (unix_nanosec);
+ sec = clib_host_to_big_u64 (0x400000000000000aULL + unix_sec);
+ nsec = clib_host_to_big_u32 (unix_nanosec);
/* memcpy to output buffer, assuming output could be unaligned. */
clib_memcpy (output, &sec, sizeof (sec));