diff options
Diffstat (limited to 'src')
113 files changed, 4696 insertions, 944 deletions
diff --git a/src/plugins/acl/acl_test.c b/src/plugins/acl/acl_test.c index fddb3d532ff..98803a916cb 100644 --- a/src/plugins/acl/acl_test.c +++ b/src/plugins/acl/acl_test.c @@ -18,6 +18,8 @@ *------------------------------------------------------------------ */ +#include <byteswap.h> + #include <vat/vat.h> #include <vlibapi/api.h> #include <vlibmemory/api.h> @@ -112,7 +114,7 @@ static void vl_api_acl_interface_list_details_t_handler int i; vat_main_t * vam = acl_test_main.vat_main; u8 *out = 0; - vl_api_acl_interface_list_details_t_endian(mp); + vl_api_acl_interface_list_details_t_endian (mp, 0 /* from network */); out = format(out, "sw_if_index: %d, count: %d, n_input: %d\n", mp->sw_if_index, mp->count, mp->n_input); out = format(out, " input "); for(i=0; i<mp->count; i++) { @@ -139,7 +141,8 @@ static void vl_api_acl_interface_etype_whitelist_details_t_handler int i; vat_main_t * vam = acl_test_main.vat_main; u8 *out = 0; - vl_api_acl_interface_etype_whitelist_details_t_endian(mp); + vl_api_acl_interface_etype_whitelist_details_t_endian ( + mp, 0 /* from network */); out = format(out, "sw_if_index: %d, count: %d, n_input: %d\n", mp->sw_if_index, mp->count, mp->n_input); out = format(out, " input "); for(i=0; i<mp->count; i++) { @@ -171,15 +174,15 @@ vl_api_acl_rule_t_pretty_format (u8 *out, vl_api_acl_rule_t * a) inet_ntop(af, &a->src_prefix.address.un, (void *)src, sizeof(src)); inet_ntop(af, &a->dst_prefix.address.un, (void *)dst, sizeof(dst)); - out = format(out, "%s action %d src %s/%d dst %s/%d proto %d sport %d-%d dport %d-%d tcpflags %d mask %d", - a->src_prefix.address.af ? "ipv6" : "ipv4", a->is_permit, - src, a->src_prefix.len, - dst, a->dst_prefix.len, - a->proto, - a->srcport_or_icmptype_first, a->srcport_or_icmptype_last, - a->dstport_or_icmpcode_first, a->dstport_or_icmpcode_last, - a->tcp_flags_value, a->tcp_flags_mask); - return(out); + out = format (out, + "%s action %d src %s/%d dst %s/%d proto %d sport %d-%d dport " + "%d-%d tcpflags %d mask %d", + a->src_prefix.address.af ? "ipv6" : "ipv4", a->is_permit, src, + a->src_prefix.len, dst, a->dst_prefix.len, a->proto, + a->srcport_or_icmptype_first, a->srcport_or_icmptype_last, + a->dstport_or_icmpcode_first, a->dstport_or_icmpcode_last, + a->tcp_flags_value, a->tcp_flags_mask); + return (out); } @@ -189,9 +192,10 @@ static void vl_api_acl_details_t_handler { int i; vat_main_t * vam = acl_test_main.vat_main; - vl_api_acl_details_t_endian(mp); - u8 *out = 0; - out = format(0, "acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag); + vl_api_acl_details_t_endian (mp, 0 /* from network */); + u8 *out = 0; + out = format (0, "acl_index: %d, count: %d\n tag {%s}\n", + mp->acl_index, mp->count, mp->tag); for(i=0; i<mp->count; i++) { out = format(out, " "); out = vl_api_acl_rule_t_pretty_format(out, &mp->r[i]); @@ -223,8 +227,9 @@ static void vl_api_macip_acl_details_t_handler { int i; vat_main_t * vam = acl_test_main.vat_main; - vl_api_macip_acl_details_t_endian(mp); - u8 *out = format(0,"MACIP acl_index: %d, count: %d\n tag {%s}\n", mp->acl_index, mp->count, mp->tag); + vl_api_macip_acl_details_t_endian (mp, 0 /* from network */); + u8 *out = format (0, "MACIP acl_index: %d, count: %d\n tag {%s}\n", + mp->acl_index, mp->count, mp->tag); for(i=0; i<mp->count; i++) { out = format(out, " "); out = vl_api_macip_acl_rule_t_pretty_format(out, &mp->r[i]); diff --git a/src/plugins/acl/sess_mgmt_node.c b/src/plugins/acl/sess_mgmt_node.c index e049a3ffa85..418baef9b6b 100644 --- a/src/plugins/acl/sess_mgmt_node.c +++ b/src/plugins/acl/sess_mgmt_node.c @@ -371,8 +371,9 @@ send_one_worker_interrupt (vlib_main_t * vm, acl_main_t * am, } void -aclp_post_session_change_request (acl_main_t * am, u32 target_thread, - u32 target_session, u32 request_type) +aclp_post_session_change_request (acl_main_t *am, u32 target_thread, + u32 target_session, + acl_fa_sess_req_t request_type) { acl_fa_per_worker_data_t *pw_me = &am->per_worker_data[os_get_thread_index ()]; diff --git a/src/plugins/builtinurl/builtinurl.api b/src/plugins/builtinurl/builtinurl.api index f292fd77a8e..80efa73c725 100644 --- a/src/plugins/builtinurl/builtinurl.api +++ b/src/plugins/builtinurl/builtinurl.api @@ -35,6 +35,7 @@ option version = "1.0.0"; */ autoreply define builtinurl_enable { + option deprecated="incorporated in http_static plugin"; /* Client identifier, set from api_main.my_client_index */ u32 client_index; diff --git a/src/plugins/crypto_native/CMakeLists.txt b/src/plugins/crypto_native/CMakeLists.txt index 9b6091610d9..5499ed4608a 100644 --- a/src/plugins/crypto_native/CMakeLists.txt +++ b/src/plugins/crypto_native/CMakeLists.txt @@ -12,8 +12,8 @@ # limitations under the License. if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") - list(APPEND VARIANTS "slm\;-march=silvermont") - list(APPEND VARIANTS "hsw\;-march=haswell") + list(APPEND VARIANTS "slm\;-march=silvermont -maes") + list(APPEND VARIANTS "hsw\;-march=haswell -maes") if(compiler_flag_march_skylake_avx512 AND compiler_flag_mprefer_vector_width_256) list(APPEND VARIANTS "skx\;-march=skylake-avx512 -mprefer-vector-width=256") endif() @@ -23,16 +23,15 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") if(compiler_flag_march_alderlake) list(APPEND VARIANTS "adl\;-march=alderlake -mprefer-vector-width=256") endif() - set (COMPILE_FILES aes_cbc.c aes_gcm.c aes_ctr.c) - set (COMPILE_OPTS -Wall -fno-common -maes) endif() if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") list(APPEND VARIANTS "armv8\;-march=armv8.1-a+crc+crypto") - set (COMPILE_FILES aes_cbc.c aes_gcm.c aes_ctr.c) - set (COMPILE_OPTS -Wall -fno-common) endif() +set (COMPILE_FILES aes_cbc.c aes_gcm.c aes_ctr.c sha2.c) +set (COMPILE_OPTS -Wall -fno-common) + if (NOT VARIANTS) return() endif() diff --git a/src/plugins/crypto_native/aes_cbc.c b/src/plugins/crypto_native/aes_cbc.c index c84390c3108..dd7ca3f1cf1 100644 --- a/src/plugins/crypto_native/aes_cbc.c +++ b/src/plugins/crypto_native/aes_cbc.c @@ -249,18 +249,30 @@ decrypt: return n_ops; } -#define foreach_aes_cbc_handler_type _(128) _(192) _(256) - -#define _(x) \ -static u32 aes_ops_dec_aes_cbc_##x \ -(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \ -{ return aes_ops_dec_aes_cbc (vm, ops, n_ops, AES_KEY_##x); } \ -static u32 aes_ops_enc_aes_cbc_##x \ -(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \ -{ return aes_ops_enc_aes_cbc (vm, ops, n_ops, AES_KEY_##x); } \ - -foreach_aes_cbc_handler_type; -#undef _ +static int +aes_cbc_cpu_probe () +{ +#if defined(__VAES__) && defined(__AVX512F__) + if (clib_cpu_supports_vaes () && clib_cpu_supports_avx512f ()) + return 50; +#elif defined(__VAES__) + if (clib_cpu_supports_vaes ()) + return 40; +#elif defined(__AVX512F__) + if (clib_cpu_supports_avx512f ()) + return 30; +#elif defined(__AVX2__) + if (clib_cpu_supports_avx2 ()) + return 20; +#elif __AES__ + if (clib_cpu_supports_aes ()) + return 10; +#elif __aarch64__ + if (clib_cpu_supports_aarch64_aes ()) + return 10; +#endif + return -1; +} static void * aes_cbc_key_exp_128 (vnet_crypto_key_t *key) @@ -289,43 +301,39 @@ aes_cbc_key_exp_256 (vnet_crypto_key_t *key) return kd; } -#include <fcntl.h> - -clib_error_t * -#if defined(__VAES__) && defined(__AVX512F__) -crypto_native_aes_cbc_init_icl (vlib_main_t *vm) -#elif defined(__VAES__) -crypto_native_aes_cbc_init_adl (vlib_main_t *vm) -#elif __AVX512F__ -crypto_native_aes_cbc_init_skx (vlib_main_t * vm) -#elif __aarch64__ -crypto_native_aes_cbc_init_neon (vlib_main_t * vm) -#elif __AVX2__ -crypto_native_aes_cbc_init_hsw (vlib_main_t * vm) -#else -crypto_native_aes_cbc_init_slm (vlib_main_t * vm) -#endif -{ - crypto_native_main_t *cm = &crypto_native_main; +#define foreach_aes_cbc_handler_type _ (128) _ (192) _ (256) + +#define _(x) \ + static u32 aes_ops_enc_aes_cbc_##x (vlib_main_t *vm, \ + vnet_crypto_op_t *ops[], u32 n_ops) \ + { \ + return aes_ops_enc_aes_cbc (vm, ops, n_ops, AES_KEY_##x); \ + } \ + \ + CRYPTO_NATIVE_OP_HANDLER (aes_##x##_cbc_enc) = { \ + .op_id = VNET_CRYPTO_OP_AES_##x##_CBC_ENC, \ + .fn = aes_ops_enc_aes_cbc_##x, \ + .probe = aes_cbc_cpu_probe, \ + }; \ + \ + static u32 aes_ops_dec_aes_cbc_##x (vlib_main_t *vm, \ + vnet_crypto_op_t *ops[], u32 n_ops) \ + { \ + return aes_ops_dec_aes_cbc (vm, ops, n_ops, AES_KEY_##x); \ + } \ + \ + CRYPTO_NATIVE_OP_HANDLER (aes_##x##_cbc_dec) = { \ + .op_id = VNET_CRYPTO_OP_AES_##x##_CBC_DEC, \ + .fn = aes_ops_dec_aes_cbc_##x, \ + .probe = aes_cbc_cpu_probe, \ + }; \ + \ + CRYPTO_NATIVE_KEY_HANDLER (aes_##x##_cbc) = { \ + .alg_id = VNET_CRYPTO_ALG_AES_##x##_CBC, \ + .key_fn = aes_cbc_key_exp_##x, \ + .probe = aes_cbc_cpu_probe, \ + }; -#define _(x) \ - vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ - VNET_CRYPTO_OP_AES_##x##_CBC_ENC, \ - aes_ops_enc_aes_cbc_##x); \ - vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ - VNET_CRYPTO_OP_AES_##x##_CBC_DEC, \ - aes_ops_dec_aes_cbc_##x); \ - cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_CBC] = aes_cbc_key_exp_##x; - foreach_aes_cbc_handler_type; +foreach_aes_cbc_handler_type; #undef _ - return 0; -} - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/crypto_native/aes_ctr.c b/src/plugins/crypto_native/aes_ctr.c index 3a219510419..d02a7b69b9d 100644 --- a/src/plugins/crypto_native/aes_ctr.c +++ b/src/plugins/crypto_native/aes_ctr.c @@ -81,32 +81,50 @@ aes_ctr_key_exp (vnet_crypto_key_t *key, aes_key_size_t ks) foreach_aes_ctr_handler_type; #undef _ -clib_error_t * +static int +probe () +{ #if defined(__VAES__) && defined(__AVX512F__) -crypto_native_aes_ctr_init_icl (vlib_main_t *vm) + if (clib_cpu_supports_vaes () && clib_cpu_supports_avx512f ()) + return 50; #elif defined(__VAES__) -crypto_native_aes_ctr_init_adl (vlib_main_t *vm) -#elif __AVX512F__ -crypto_native_aes_ctr_init_skx (vlib_main_t *vm) -#elif __AVX2__ -crypto_native_aes_ctr_init_hsw (vlib_main_t *vm) + if (clib_cpu_supports_vaes ()) + return 40; +#elif defined(__AVX512F__) + if (clib_cpu_supports_avx512f ()) + return 30; +#elif defined(__AVX2__) + if (clib_cpu_supports_avx2 ()) + return 20; +#elif __AES__ + if (clib_cpu_supports_aes ()) + return 10; #elif __aarch64__ -crypto_native_aes_ctr_init_neon (vlib_main_t *vm) -#else -crypto_native_aes_ctr_init_slm (vlib_main_t *vm) + if (clib_cpu_supports_aarch64_aes ()) + return 10; #endif -{ - crypto_native_main_t *cm = &crypto_native_main; + return -1; +} -#define _(x) \ - vnet_crypto_register_ops_handlers ( \ - vm, cm->crypto_engine_index, VNET_CRYPTO_OP_AES_##x##_CTR_ENC, \ - aes_ops_aes_ctr_##x, aes_ops_aes_ctr_##x##_chained); \ - vnet_crypto_register_ops_handlers ( \ - vm, cm->crypto_engine_index, VNET_CRYPTO_OP_AES_##x##_CTR_DEC, \ - aes_ops_aes_ctr_##x, aes_ops_aes_ctr_##x##_chained); \ - cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_CTR] = aes_ctr_key_exp_##x; - foreach_aes_ctr_handler_type; +#define _(b) \ + CRYPTO_NATIVE_OP_HANDLER (aes_##b##_ctr_enc) = { \ + .op_id = VNET_CRYPTO_OP_AES_##b##_CTR_ENC, \ + .fn = aes_ops_aes_ctr_##b, \ + .cfn = aes_ops_aes_ctr_##b##_chained, \ + .probe = probe, \ + }; \ + \ + CRYPTO_NATIVE_OP_HANDLER (aes_##b##_ctr_dec) = { \ + .op_id = VNET_CRYPTO_OP_AES_##b##_CTR_DEC, \ + .fn = aes_ops_aes_ctr_##b, \ + .cfn = aes_ops_aes_ctr_##b##_chained, \ + .probe = probe, \ + }; \ + CRYPTO_NATIVE_KEY_HANDLER (aes_##b##_ctr) = { \ + .alg_id = VNET_CRYPTO_ALG_AES_##b##_CTR, \ + .key_fn = aes_ctr_key_exp_##b, \ + .probe = probe, \ + }; + +_ (128) _ (192) _ (256) #undef _ - return 0; -} diff --git a/src/plugins/crypto_native/aes_gcm.c b/src/plugins/crypto_native/aes_gcm.c index 6589d411975..220788d4e97 100644 --- a/src/plugins/crypto_native/aes_gcm.c +++ b/src/plugins/crypto_native/aes_gcm.c @@ -118,40 +118,49 @@ aes_gcm_key_exp (vnet_crypto_key_t *key, aes_key_size_t ks) foreach_aes_gcm_handler_type; #undef _ -clib_error_t * +static int +probe () +{ #if defined(__VAES__) && defined(__AVX512F__) -crypto_native_aes_gcm_init_icl (vlib_main_t *vm) + if (clib_cpu_supports_vpclmulqdq () && clib_cpu_supports_vaes () && + clib_cpu_supports_avx512f ()) + return 50; #elif defined(__VAES__) -crypto_native_aes_gcm_init_adl (vlib_main_t *vm) -#elif __AVX512F__ -crypto_native_aes_gcm_init_skx (vlib_main_t *vm) -#elif __AVX2__ -crypto_native_aes_gcm_init_hsw (vlib_main_t *vm) + if (clib_cpu_supports_vpclmulqdq () && clib_cpu_supports_vaes ()) + return 40; +#elif defined(__AVX512F__) + if (clib_cpu_supports_pclmulqdq () && clib_cpu_supports_avx512f ()) + return 30; +#elif defined(__AVX2__) + if (clib_cpu_supports_pclmulqdq () && clib_cpu_supports_avx2 ()) + return 20; +#elif __AES__ + if (clib_cpu_supports_pclmulqdq () && clib_cpu_supports_aes ()) + return 10; #elif __aarch64__ -crypto_native_aes_gcm_init_neon (vlib_main_t *vm) -#else -crypto_native_aes_gcm_init_slm (vlib_main_t *vm) + if (clib_cpu_supports_aarch64_aes ()) + return 10; #endif -{ - crypto_native_main_t *cm = &crypto_native_main; - -#define _(x) \ - vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ - VNET_CRYPTO_OP_AES_##x##_GCM_ENC, \ - aes_ops_enc_aes_gcm_##x); \ - vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \ - VNET_CRYPTO_OP_AES_##x##_GCM_DEC, \ - aes_ops_dec_aes_gcm_##x); \ - cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_GCM] = aes_gcm_key_exp_##x; - foreach_aes_gcm_handler_type; -#undef _ - return 0; + return -1; } -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ +#define _(b) \ + CRYPTO_NATIVE_OP_HANDLER (aes_##b##_gcm_enc) = { \ + .op_id = VNET_CRYPTO_OP_AES_##b##_GCM_ENC, \ + .fn = aes_ops_enc_aes_gcm_##b, \ + .probe = probe, \ + }; \ + \ + CRYPTO_NATIVE_OP_HANDLER (aes_##b##_gcm_dec) = { \ + .op_id = VNET_CRYPTO_OP_AES_##b##_GCM_DEC, \ + .fn = aes_ops_dec_aes_gcm_##b, \ + .probe = probe, \ + }; \ + CRYPTO_NATIVE_KEY_HANDLER (aes_##b##_gcm) = { \ + .alg_id = VNET_CRYPTO_ALG_AES_##b##_GCM, \ + .key_fn = aes_gcm_key_exp_##b, \ + .probe = probe, \ + }; + +_ (128) _ (192) _ (256) +#undef _ diff --git a/src/plugins/crypto_native/crypto_native.h b/src/plugins/crypto_native/crypto_native.h index c15b8cbd1da..3d18e8cabd0 100644 --- a/src/plugins/crypto_native/crypto_native.h +++ b/src/plugins/crypto_native/crypto_native.h @@ -19,33 +19,66 @@ #define __crypto_native_h__ typedef void *(crypto_native_key_fn_t) (vnet_crypto_key_t * key); +typedef int (crypto_native_variant_probe_t) (); + +typedef struct crypto_native_op_handler +{ + struct crypto_native_op_handler *next; + vnet_crypto_op_id_t op_id; + vnet_crypto_ops_handler_t *fn; + vnet_crypto_chained_ops_handler_t *cfn; + crypto_native_variant_probe_t *probe; + int priority; +} crypto_native_op_handler_t; + +typedef struct crypto_native_key_handler +{ + struct crypto_native_key_handler *next; + vnet_crypto_alg_t alg_id; + crypto_native_key_fn_t *key_fn; + crypto_native_variant_probe_t *probe; + int priority; +} crypto_native_key_handler_t; typedef struct { u32 crypto_engine_index; crypto_native_key_fn_t *key_fn[VNET_CRYPTO_N_ALGS]; void **key_data; + crypto_native_op_handler_t *op_handlers; + crypto_native_key_handler_t *key_handlers; } crypto_native_main_t; extern crypto_native_main_t crypto_native_main; -#define foreach_crypto_native_march_variant \ - _ (slm) _ (hsw) _ (skx) _ (icl) _ (adl) _ (neon) - -#define _(v) \ - clib_error_t __clib_weak *crypto_native_aes_cbc_init_##v (vlib_main_t *vm); \ - clib_error_t __clib_weak *crypto_native_aes_ctr_init_##v (vlib_main_t *vm); \ - clib_error_t __clib_weak *crypto_native_aes_gcm_init_##v (vlib_main_t *vm); - -foreach_crypto_native_march_variant; -#undef _ +#define CRYPTO_NATIVE_OP_HANDLER(x) \ + static crypto_native_op_handler_t __crypto_native_op_handler_##x; \ + static void __clib_constructor __crypto_native_op_handler_cb_##x (void) \ + { \ + crypto_native_main_t *cm = &crypto_native_main; \ + int priority = __crypto_native_op_handler_##x.probe (); \ + if (priority >= 0) \ + { \ + __crypto_native_op_handler_##x.priority = priority; \ + __crypto_native_op_handler_##x.next = cm->op_handlers; \ + cm->op_handlers = &__crypto_native_op_handler_##x; \ + } \ + } \ + static crypto_native_op_handler_t __crypto_native_op_handler_##x +#define CRYPTO_NATIVE_KEY_HANDLER(x) \ + static crypto_native_key_handler_t __crypto_native_key_handler_##x; \ + static void __clib_constructor __crypto_native_key_handler_cb_##x (void) \ + { \ + crypto_native_main_t *cm = &crypto_native_main; \ + int priority = __crypto_native_key_handler_##x.probe (); \ + if (priority >= 0) \ + { \ + __crypto_native_key_handler_##x.priority = priority; \ + __crypto_native_key_handler_##x.next = cm->key_handlers; \ + cm->key_handlers = &__crypto_native_key_handler_##x; \ + } \ + } \ + static crypto_native_key_handler_t __crypto_native_key_handler_##x #endif /* __crypto_native_h__ */ -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/plugins/crypto_native/main.c b/src/plugins/crypto_native/main.c index 8a59be319b9..2bc0d98f196 100644 --- a/src/plugins/crypto_native/main.c +++ b/src/plugins/crypto_native/main.c @@ -63,95 +63,52 @@ clib_error_t * crypto_native_init (vlib_main_t * vm) { crypto_native_main_t *cm = &crypto_native_main; - clib_error_t *error = 0; - if (clib_cpu_supports_x86_aes () == 0 && - clib_cpu_supports_aarch64_aes () == 0) + if (cm->op_handlers == 0) return 0; cm->crypto_engine_index = vnet_crypto_register_engine (vm, "native", 100, "Native ISA Optimized Crypto"); - if (0); -#if __x86_64__ - else if (crypto_native_aes_cbc_init_icl && clib_cpu_supports_vaes () && - clib_cpu_supports_avx512f ()) - error = crypto_native_aes_cbc_init_icl (vm); - else if (crypto_native_aes_cbc_init_adl && clib_cpu_supports_vaes ()) - error = crypto_native_aes_cbc_init_adl (vm); - else if (crypto_native_aes_cbc_init_skx && clib_cpu_supports_avx512f ()) - error = crypto_native_aes_cbc_init_skx (vm); - else if (crypto_native_aes_cbc_init_hsw && clib_cpu_supports_avx2 ()) - error = crypto_native_aes_cbc_init_hsw (vm); - else if (crypto_native_aes_cbc_init_slm) - error = crypto_native_aes_cbc_init_slm (vm); -#endif -#if __aarch64__ - else if (crypto_native_aes_cbc_init_neon) - error = crypto_native_aes_cbc_init_neon (vm); -#endif - else - error = clib_error_return (0, "No AES CBC implemenation available"); - - if (error) - return error; - - if (0) - ; -#if __x86_64__ - else if (crypto_native_aes_ctr_init_icl && clib_cpu_supports_vaes () && - clib_cpu_supports_avx512f ()) - error = crypto_native_aes_ctr_init_icl (vm); - else if (crypto_native_aes_ctr_init_adl && clib_cpu_supports_vaes ()) - error = crypto_native_aes_ctr_init_adl (vm); - else if (crypto_native_aes_ctr_init_skx && clib_cpu_supports_avx512f ()) - error = crypto_native_aes_ctr_init_skx (vm); - else if (crypto_native_aes_ctr_init_hsw && clib_cpu_supports_avx2 ()) - error = crypto_native_aes_ctr_init_hsw (vm); - else if (crypto_native_aes_ctr_init_slm) - error = crypto_native_aes_ctr_init_slm (vm); -#endif -#if __aarch64__ - else if (crypto_native_aes_ctr_init_neon) - error = crypto_native_aes_ctr_init_neon (vm); -#endif - else - error = clib_error_return (0, "No AES CTR implemenation available"); - - if (error) - return error; - -#if __x86_64__ - if (clib_cpu_supports_pclmulqdq ()) + crypto_native_op_handler_t *oh = cm->op_handlers; + crypto_native_key_handler_t *kh = cm->key_handlers; + crypto_native_op_handler_t **best_by_op_id = 0; + crypto_native_key_handler_t **best_by_alg_id = 0; + + while (oh) { - if (crypto_native_aes_gcm_init_icl && clib_cpu_supports_vaes () && - clib_cpu_supports_avx512f ()) - error = crypto_native_aes_gcm_init_icl (vm); - else if (crypto_native_aes_gcm_init_adl && clib_cpu_supports_vaes ()) - error = crypto_native_aes_gcm_init_adl (vm); - else if (crypto_native_aes_gcm_init_skx && clib_cpu_supports_avx512f ()) - error = crypto_native_aes_gcm_init_skx (vm); - else if (crypto_native_aes_gcm_init_hsw && clib_cpu_supports_avx2 ()) - error = crypto_native_aes_gcm_init_hsw (vm); - else if (crypto_native_aes_gcm_init_slm) - error = crypto_native_aes_gcm_init_slm (vm); - else - error = clib_error_return (0, "No AES GCM implemenation available"); - - if (error) - return error; + vec_validate (best_by_op_id, oh->op_id); + + if (best_by_op_id[oh->op_id] == 0 || + best_by_op_id[oh->op_id]->priority < oh->priority) + best_by_op_id[oh->op_id] = oh; + + oh = oh->next; } -#endif -#if __aarch64__ - if (crypto_native_aes_gcm_init_neon) - error = crypto_native_aes_gcm_init_neon (vm); - else - error = clib_error_return (0, "No AES GCM implemenation available"); - - if (error) - return error; -#endif + + while (kh) + { + vec_validate (best_by_alg_id, kh->alg_id); + + if (best_by_alg_id[kh->alg_id] == 0 || + best_by_alg_id[kh->alg_id]->priority < kh->priority) + best_by_alg_id[kh->alg_id] = kh; + + kh = kh->next; + } + + vec_foreach_pointer (oh, best_by_op_id) + if (oh) + vnet_crypto_register_ops_handlers (vm, cm->crypto_engine_index, + oh->op_id, oh->fn, oh->cfn); + + vec_foreach_pointer (kh, best_by_alg_id) + if (kh) + cm->key_fn[kh->alg_id] = kh->key_fn; + + vec_free (best_by_op_id); + vec_free (best_by_alg_id); vnet_crypto_register_key_handler (vm, cm->crypto_engine_index, crypto_native_key_handler); diff --git a/src/plugins/crypto_native/sha2.c b/src/plugins/crypto_native/sha2.c new file mode 100644 index 00000000000..459ce6d8e79 --- /dev/null +++ b/src/plugins/crypto_native/sha2.c @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2024 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vnet/plugin/plugin.h> +#include <vnet/crypto/crypto.h> +#include <crypto_native/crypto_native.h> +#include <vppinfra/crypto/sha2.h> + +static_always_inline u32 +crypto_native_ops_hash_sha2 (vlib_main_t *vm, vnet_crypto_op_t *ops[], + u32 n_ops, vnet_crypto_op_chunk_t *chunks, + clib_sha2_type_t type, int maybe_chained) +{ + vnet_crypto_op_t *op = ops[0]; + clib_sha2_ctx_t ctx; + u32 n_left = n_ops; + +next: + if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS) + { + vnet_crypto_op_chunk_t *chp = chunks + op->chunk_index; + clib_sha2_init (&ctx, type); + for (int j = 0; j < op->n_chunks; j++, chp++) + clib_sha2_update (&ctx, chp->src, chp->len); + clib_sha2_final (&ctx, op->digest); + } + else + clib_sha2 (type, op->src, op->len, op->digest); + + op->status = VNET_CRYPTO_OP_STATUS_COMPLETED; + + if (--n_left) + { + op += 1; + goto next; + } + + return n_ops; +} + +static_always_inline u32 +crypto_native_ops_hmac_sha2 (vlib_main_t *vm, vnet_crypto_op_t *ops[], + u32 n_ops, vnet_crypto_op_chunk_t *chunks, + clib_sha2_type_t type) +{ + crypto_native_main_t *cm = &crypto_native_main; + vnet_crypto_op_t *op = ops[0]; + u32 n_left = n_ops; + clib_sha2_hmac_ctx_t ctx; + u8 buffer[64]; + u32 sz, n_fail = 0; + + for (; n_left; n_left--, op++) + { + clib_sha2_hmac_init ( + &ctx, type, (clib_sha2_hmac_key_data_t *) cm->key_data[op->key_index]); + if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS) + { + vnet_crypto_op_chunk_t *chp = chunks + op->chunk_index; + for (int j = 0; j < op->n_chunks; j++, chp++) + clib_sha2_hmac_update (&ctx, chp->src, chp->len); + } + else + clib_sha2_hmac_update (&ctx, op->src, op->len); + + clib_sha2_hmac_final (&ctx, buffer); + + if (op->digest_len) + { + sz = op->digest_len; + if (op->flags & VNET_CRYPTO_OP_FLAG_HMAC_CHECK) + { + if ((memcmp (op->digest, buffer, sz))) + { + n_fail++; + op->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC; + continue; + } + } + else + clib_memcpy_fast (op->digest, buffer, sz); + } + else + { + sz = clib_sha2_variants[type].digest_size; + if (op->flags & VNET_CRYPTO_OP_FLAG_HMAC_CHECK) + { + if ((memcmp (op->digest, buffer, sz))) + { + n_fail++; + op->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC; + continue; + } + } + else + clib_memcpy_fast (op->digest, buffer, sz); + } + + op->status = VNET_CRYPTO_OP_STATUS_COMPLETED; + } + + return n_ops - n_fail; +} + +static void * +sha2_key_add (vnet_crypto_key_t *key, clib_sha2_type_t type) +{ + clib_sha2_hmac_key_data_t *kd; + + kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES); + clib_sha2_hmac_key_data (type, key->data, vec_len (key->data), kd); + + return kd; +} + +static int +probe () +{ +#if defined(__SHA__) && defined(__x86_64__) + if (clib_cpu_supports_sha ()) + return 50; +#elif defined(__ARM_FEATURE_SHA2) + if (clib_cpu_supports_sha2 ()) + return 10; +#endif + return -1; +} + +#define _(b) \ + static u32 crypto_native_ops_hash_sha##b ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \ + { \ + return crypto_native_ops_hash_sha2 (vm, ops, n_ops, 0, CLIB_SHA2_##b, 0); \ + } \ + \ + static u32 crypto_native_ops_chained_hash_sha##b ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \ + u32 n_ops) \ + { \ + return crypto_native_ops_hash_sha2 (vm, ops, n_ops, chunks, \ + CLIB_SHA2_##b, 1); \ + } \ + \ + static u32 crypto_native_ops_hmac_sha##b ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \ + { \ + return crypto_native_ops_hmac_sha2 (vm, ops, n_ops, 0, CLIB_SHA2_##b); \ + } \ + \ + static u32 crypto_native_ops_chained_hmac_sha##b ( \ + vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \ + u32 n_ops) \ + { \ + return crypto_native_ops_hmac_sha2 (vm, ops, n_ops, chunks, \ + CLIB_SHA2_##b); \ + } \ + \ + static void *sha2_##b##_key_add (vnet_crypto_key_t *k) \ + { \ + return sha2_key_add (k, CLIB_SHA2_##b); \ + } \ + \ + CRYPTO_NATIVE_OP_HANDLER (crypto_native_hash_sha##b) = { \ + .op_id = VNET_CRYPTO_OP_SHA##b##_HASH, \ + .fn = crypto_native_ops_hash_sha##b, \ + .cfn = crypto_native_ops_chained_hash_sha##b, \ + .probe = probe, \ + }; \ + CRYPTO_NATIVE_OP_HANDLER (crypto_native_hmac_sha##b) = { \ + .op_id = VNET_CRYPTO_OP_SHA##b##_HMAC, \ + .fn = crypto_native_ops_hmac_sha##b, \ + .cfn = crypto_native_ops_chained_hmac_sha##b, \ + .probe = probe, \ + }; \ + CRYPTO_NATIVE_KEY_HANDLER (crypto_native_hmac_sha##b) = { \ + .alg_id = VNET_CRYPTO_ALG_HMAC_SHA##b, \ + .key_fn = sha2_##b##_key_add, \ + .probe = probe, \ + }; + +_ (224) +_ (256) + +#undef _ diff --git a/src/plugins/dev_iavf/port.c b/src/plugins/dev_iavf/port.c index 982436d9b45..90e81e960c4 100644 --- a/src/plugins/dev_iavf/port.c +++ b/src/plugins/dev_iavf/port.c @@ -42,29 +42,35 @@ iavf_port_vlan_strip_disable (vlib_main_t *vm, vnet_dev_port_t *port) vnet_dev_t *dev = port->dev; iavf_port_t *ap = vnet_dev_get_port_data (port); virtchnl_vlan_caps_t vc; - vnet_dev_rv_t rv; + vnet_dev_rv_t rv = VNET_DEV_ERR_NOT_SUPPORTED; u32 outer, inner; const u32 mask = VIRTCHNL_VLAN_ETHERTYPE_8100; - if ((ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) == 0) - return iavf_vc_op_disable_vlan_stripping (vm, dev); + if (ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) + { + if ((rv = iavf_vc_op_get_offload_vlan_v2_caps (vm, dev, &vc))) + return rv; - if ((rv = iavf_vc_op_get_offload_vlan_v2_caps (vm, dev, &vc))) - return rv; + outer = vc.offloads.stripping_support.outer; + inner = vc.offloads.stripping_support.inner; - outer = vc.offloads.stripping_support.outer; - inner = vc.offloads.stripping_support.inner; + outer = outer & VIRTCHNL_VLAN_TOGGLE ? outer & mask : 0; + inner = inner & VIRTCHNL_VLAN_TOGGLE ? inner & mask : 0; - outer = outer & VIRTCHNL_VLAN_TOGGLE ? outer & mask : 0; - inner = inner & VIRTCHNL_VLAN_TOGGLE ? inner & mask : 0; + virtchnl_vlan_setting_t vs = { + .vport_id = ap->vsi_id, + .outer_ethertype_setting = outer, + .inner_ethertype_setting = inner, + }; - virtchnl_vlan_setting_t vs = { - .vport_id = ap->vsi_id, - .outer_ethertype_setting = outer, - .inner_ethertype_setting = inner, - }; + if ((rv = iavf_vc_op_disable_vlan_stripping_v2 (vm, dev, &vs))) + return rv; + } - return iavf_vc_op_disable_vlan_stripping_v2 (vm, dev, &vs); + if (ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN) + return iavf_vc_op_disable_vlan_stripping (vm, dev); + + return rv; } vnet_dev_rv_t @@ -275,7 +281,12 @@ iavf_port_init (vlib_main_t *vm, vnet_dev_port_t *port) u64_bit_set (&ap->intr_mode_per_rxq_bitmap, q->queue_id, 1); if ((rv = iavf_port_vlan_strip_disable (vm, port))) - return rv; + { + if (rv == VNET_DEV_ERR_NOT_SUPPORTED) + log_warn (port->dev, "device doesn't support vlan stripping"); + else + return rv; + } if ((rv = iavf_port_init_rss (vm, port))) return rv; diff --git a/src/plugins/dev_octeon/init.c b/src/plugins/dev_octeon/init.c index 03a1d59f7cc..97a11e0d0d7 100644 --- a/src/plugins/dev_octeon/init.c +++ b/src/plugins/dev_octeon/init.c @@ -51,7 +51,8 @@ static struct } _ (0xa063, RVU_PF, "Marvell Octeon Resource Virtualization Unit PF"), - _ (0xa0f8, RVU_VF, "Marvell Octeon Resource Virtualization Unit VF"), + _ (0xa064, RVU_VF, "Marvell Octeon Resource Virtualization Unit VF"), + _ (0xa0f8, LBK_VF, "Marvell Octeon Loopback Unit VF"), _ (0xa0f7, SDP_VF, "Marvell Octeon System DPI Packet Interface Unit VF"), _ (0xa0f3, CPT_VF, "Marvell Octeon Cryptographic Accelerator Unit VF"), #undef _ @@ -114,7 +115,7 @@ oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev) if ((rrv = roc_nix_dev_init (cd->nix))) return cnx_return_roc_err (dev, rrv, "roc_nix_dev_init"); - if (roc_nix_npc_mac_addr_get (cd->nix, mac_addr)) + if ((rrv = roc_nix_npc_mac_addr_get (cd->nix, mac_addr))) return cnx_return_roc_err (dev, rrv, "roc_nix_npc_mac_addr_get"); vnet_dev_port_add_args_t port_add_args = { diff --git a/src/plugins/dev_octeon/octeon.h b/src/plugins/dev_octeon/octeon.h index 92ec953ed23..e43cde0a35f 100644 --- a/src/plugins/dev_octeon/octeon.h +++ b/src/plugins/dev_octeon/octeon.h @@ -22,6 +22,7 @@ typedef enum OCT_DEVICE_TYPE_UNKNOWN = 0, OCT_DEVICE_TYPE_RVU_PF, OCT_DEVICE_TYPE_RVU_VF, + OCT_DEVICE_TYPE_LBK_VF, OCT_DEVICE_TYPE_SDP_VF, OCT_DEVICE_TYPE_CPT_VF, } __clib_packed oct_device_type_t; @@ -162,7 +163,8 @@ vnet_dev_rv_t oct_flow_query (vlib_main_t *, vnet_dev_port_t *, u32, uword, _ (AURA_BATCH_ALLOC_ISSUE_FAIL, aura_batch_alloc_issue_fail, ERROR, \ "aura batch alloc issue failed") \ _ (AURA_BATCH_ALLOC_NOT_READY, aura_batch_alloc_not_ready, ERROR, \ - "aura batch alloc not ready") + "aura batch alloc not ready") \ + _ (MTU_EXCEEDED, mtu_exceeded, ERROR, "mtu exceeded") typedef enum { diff --git a/src/plugins/dev_octeon/port.c b/src/plugins/dev_octeon/port.c index d5f78301adf..98a4c28b37d 100644 --- a/src/plugins/dev_octeon/port.c +++ b/src/plugins/dev_octeon/port.c @@ -416,6 +416,44 @@ oct_op_config_promisc_mode (vlib_main_t *vm, vnet_dev_port_t *port, int enable) return VNET_DEV_OK; } +static vnet_dev_rv_t +oct_port_add_del_eth_addr (vlib_main_t *vm, vnet_dev_port_t *port, + vnet_dev_hw_addr_t *addr, int is_add, + int is_primary) +{ + vnet_dev_t *dev = port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); + struct roc_nix *nix = cd->nix; + vnet_dev_rv_t rv = VNET_DEV_OK; + + i32 rrv; + + if (is_primary) + { + if (is_add) + { + /* Update mac address at NPC */ + rrv = roc_nix_npc_mac_addr_set (nix, (u8 *) addr); + if (rrv) + rv = oct_roc_err (dev, rrv, "roc_nix_npc_mac_addr_set() failed"); + + /* Update mac address at CGX for PFs only */ + if (!roc_nix_is_vf_or_sdp (nix)) + { + rrv = roc_nix_mac_addr_set (nix, (u8 *) addr); + if (rrv) + { + /* Rollback to previous mac address */ + roc_nix_npc_mac_addr_set (nix, + (u8 *) &port->primary_hw_addr); + rv = oct_roc_err (dev, rrv, "roc_nix_mac_addr_set() failed"); + } + } + } + } + return rv; +} + vnet_dev_rv_t oct_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_port_cfg_change_req_t *req) @@ -465,6 +503,9 @@ oct_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port, break; case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR: + rv = oct_port_add_del_eth_addr (vm, port, &req->addr, + /* is_add */ 1, + /* is_primary */ 1); break; case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR: diff --git a/src/plugins/dev_octeon/tx_node.c b/src/plugins/dev_octeon/tx_node.c index 0dbf8759d35..a2e4b07de8a 100644 --- a/src/plugins/dev_octeon/tx_node.c +++ b/src/plugins/dev_octeon/tx_node.c @@ -22,8 +22,11 @@ typedef struct u32 n_tx_bytes; u32 n_drop; vlib_buffer_t *drop[VLIB_FRAME_SIZE]; + u32 n_exd_mtu; + vlib_buffer_t *exd_mtu[VLIB_FRAME_SIZE]; u32 batch_alloc_not_ready; u32 batch_alloc_issue_fail; + int max_pkt_len; u16 lmt_id; u64 lmt_ioaddr; lmt_line_t *lmt_lines; @@ -133,7 +136,8 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) static_always_inline u8 oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, - lmt_line_t *line, u32 flags, int simple, int trace) + lmt_line_t *line, u32 flags, int simple, int trace, u32 *n, + u8 *dpl) { u8 n_dwords = 2; u32 total_len = 0; @@ -148,6 +152,12 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, }, }; + if (PREDICT_FALSE (vlib_buffer_length_in_chain (vm, b) > ctx->max_pkt_len)) + { + ctx->exd_mtu[ctx->n_exd_mtu++] = b; + return 0; + } + if (!simple && flags & VLIB_BUFFER_NEXT_PRESENT) { u8 n_tail_segs = 0; @@ -159,7 +169,7 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, tail_segs[n_tail_segs++] = t; if (n_tail_segs > 5) { - ctx->drop[ctx->n_drop++] = t; + ctx->drop[ctx->n_drop++] = b; return 0; } } @@ -231,6 +241,9 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, for (u32 i = 0; i < n_dwords; i++) line->dwords[i] = d.as_u128[i]; + *dpl = n_dwords; + *n = *n + 1; + return n_dwords; } @@ -240,7 +253,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, { u8 dwords_per_line[16], *dpl = dwords_per_line; u64 lmt_arg, ioaddr, n_lines; - u32 n_left, or_flags_16 = 0; + u32 n_left, or_flags_16 = 0, n = 0; const u32 not_simple_flags = VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD; lmt_line_t *l = ctx->lmt_lines; @@ -248,7 +261,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, /* Data Store Memory Barrier - outer shareable domain */ asm volatile("dmb oshst" ::: "memory"); - for (n_left = n_pkts; n_left >= 8; n_left -= 8, b += 8, l += 8) + for (n_left = n_pkts; n_left >= 8; n_left -= 8, b += 8) { u32 f0, f1, f2, f3, f4, f5, f6, f7, or_f = 0; vlib_prefetch_buffer_header (b[8], LOAD); @@ -269,48 +282,54 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, if ((or_f & not_simple_flags) == 0) { int simple = 1; - oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace); - oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace); + oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[1], l + n, f1, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[13], LOAD); - oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace); - oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace); + oct_tx_enq1 (vm, ctx, b[2], l + n, f2, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[3], l + n, f3, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[14], LOAD); - oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace); - oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace); + oct_tx_enq1 (vm, ctx, b[4], l + n, f4, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[5], l + n, f5, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[15], LOAD); - oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace); - oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace); - dpl[0] = dpl[1] = dpl[2] = dpl[3] = 2; - dpl[4] = dpl[5] = dpl[6] = dpl[7] = 2; + oct_tx_enq1 (vm, ctx, b[6], l + n, f6, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[7], l + n, f7, simple, trace, &n, &dpl[n]); } else { int simple = 0; - dpl[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace); - dpl[1] = oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace); + oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[1], l + n, f1, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[13], LOAD); - dpl[2] = oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace); - dpl[3] = oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace); + oct_tx_enq1 (vm, ctx, b[2], l + n, f2, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[3], l + n, f3, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[14], LOAD); - dpl[4] = oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace); - dpl[5] = oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace); + oct_tx_enq1 (vm, ctx, b[4], l + n, f4, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[5], l + n, f5, simple, trace, &n, &dpl[n]); vlib_prefetch_buffer_header (b[15], LOAD); - dpl[6] = oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace); - dpl[7] = oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace); + oct_tx_enq1 (vm, ctx, b[6], l + n, f6, simple, trace, &n, &dpl[n]); + oct_tx_enq1 (vm, ctx, b[7], l + n, f7, simple, trace, &n, &dpl[n]); } - dpl += 8; + dpl += n; + l += n; + n = 0; } - for (; n_left > 0; n_left -= 1, b += 1, l += 1) + for (; n_left > 0; n_left -= 1, b += 1) { u32 f0 = b[0]->flags; - dpl++[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, 0, trace); + oct_tx_enq1 (vm, ctx, b[0], l, f0, 0, trace, &n, &dpl[n]); or_flags_16 |= f0; + dpl += n; + l += n; + n = 0; } lmt_arg = ctx->lmt_id; ioaddr = ctx->lmt_ioaddr; - n_lines = n_pkts; + n_lines = dpl - dwords_per_line; + + if (PREDICT_FALSE (!n_lines)) + return n_pkts; if (PREDICT_FALSE (or_flags_16 & VLIB_BUFFER_NEXT_PRESENT)) { @@ -350,6 +369,8 @@ VNET_DEV_NODE_FN (oct_tx_node) vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node); vnet_dev_tx_queue_t *txq = rt->tx_queue; oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + vnet_dev_t *dev = txq->port->dev; + oct_device_t *cd = vnet_dev_get_data (dev); u32 node_index = node->node_index; u32 *from = vlib_frame_vector_args (frame); u32 n, n_enq, n_left, n_pkts = frame->n_vectors; @@ -363,6 +384,7 @@ VNET_DEV_NODE_FN (oct_tx_node) .sq = ctq->sq.qid, .sizem1 = 1, }, + .max_pkt_len = roc_nix_max_pkt_len (cd->nix), .lmt_id = lmt_id, .lmt_ioaddr = ctq->io_addr, .lmt_lines = ctq->lmt_addr + (lmt_id << ROC_LMT_LINE_SIZE_LOG2), @@ -396,7 +418,7 @@ VNET_DEV_NODE_FN (oct_tx_node) n += oct_tx_enq16 (vm, &ctx, txq, b, n_left, /* trace */ 0); } - ctq->n_enq = n_enq + n; + ctq->n_enq = n_enq + n - ctx.n_drop - ctx.n_exd_mtu; if (n < n_pkts) { @@ -411,6 +433,10 @@ VNET_DEV_NODE_FN (oct_tx_node) vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_CHAIN_TOO_LONG, ctx.n_drop); + if (PREDICT_FALSE (ctx.n_exd_mtu)) + vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_MTU_EXCEEDED, + ctx.n_exd_mtu); + if (ctx.batch_alloc_not_ready) vlib_error_count (vm, node_index, OCT_TX_NODE_CTR_AURA_BATCH_ALLOC_NOT_READY, @@ -431,5 +457,13 @@ VNET_DEV_NODE_FN (oct_tx_node) n_pkts -= ctx.n_drop; } + if (PREDICT_FALSE (ctx.n_exd_mtu)) + { + u32 bi[VLIB_FRAME_SIZE]; + vlib_get_buffer_indices (vm, ctx.exd_mtu, bi, ctx.n_exd_mtu); + vlib_buffer_free (vm, bi, ctx.n_exd_mtu); + n_pkts -= ctx.n_exd_mtu; + } + return n_pkts; } diff --git a/src/plugins/dpdk/CMakeLists.txt b/src/plugins/dpdk/CMakeLists.txt index 48b1548f9c2..48c56f35282 100644 --- a/src/plugins/dpdk/CMakeLists.txt +++ b/src/plugins/dpdk/CMakeLists.txt @@ -90,8 +90,10 @@ else() ############################################################################## # libnuma ############################################################################## - vpp_plugin_find_library(dpdk NUMA_LIB "numa") - list(APPEND DPDK_LINK_LIBRARIES ${NUMA_LIB}) + if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux") + vpp_plugin_find_library(dpdk NUMA_LIB "numa") + list(APPEND DPDK_LINK_LIBRARIES ${NUMA_LIB}) + endif() ############################################################################## # Mellanox libraries diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c index c838800deb4..77f9a27f97b 100644 --- a/src/plugins/dpdk/device/cli.c +++ b/src/plugins/dpdk/device/cli.c @@ -89,12 +89,18 @@ show_dpdk_physmem (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { clib_error_t *err = 0; - u32 pipe_max_size; int fds[2]; u8 *s = 0; int n, n_try; FILE *f; + /* + * XXX: Pipes on FreeBSD grow dynamically up to 64KB (FreeBSD 15), don't + * manually tweak this value on FreeBSD at the moment. + */ +#ifdef __linux__ + u32 pipe_max_size; + err = clib_sysfs_read ("/proc/sys/fs/pipe-max-size", "%u", &pipe_max_size); if (err) @@ -112,6 +118,7 @@ show_dpdk_physmem (vlib_main_t * vm, unformat_input_t * input, err = clib_error_return_unix (0, "fcntl(F_SETPIPE_SZ)"); goto error; } +#endif /* __linux__ */ if (fcntl (fds[0], F_SETFL, O_NONBLOCK) == -1) { diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c index dc6b0c1c952..7a49c5aaef2 100644 --- a/src/plugins/dpdk/device/common.c +++ b/src/plugins/dpdk/device/common.c @@ -491,6 +491,7 @@ dpdk_get_pci_device (const struct rte_eth_dev_info *info) return NULL; } +#ifdef __linux__ /* If this device is VMBUS return pointer to info, otherwise NULL */ struct rte_vmbus_device * dpdk_get_vmbus_device (const struct rte_eth_dev_info *info) @@ -507,6 +508,7 @@ dpdk_get_vmbus_device (const struct rte_eth_dev_info *info) else return NULL; } +#endif /* __linux__ */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h index c22a67a07e7..88a4d9ff618 100644 --- a/src/plugins/dpdk/device/dpdk.h +++ b/src/plugins/dpdk/device/dpdk.h @@ -24,7 +24,9 @@ #include <rte_eal.h> #include <rte_bus_pci.h> +#ifdef __linux__ #include <rte_bus_vmbus.h> +#endif /* __linux__ */ #include <rte_ethdev.h> #include <rte_version.h> #include <rte_net.h> @@ -35,7 +37,9 @@ #include <bus_driver.h> #include <bus_pci_driver.h> +#ifdef __linux__ #include <bus_vmbus_driver.h> +#endif /* __linux__ */ #endif #include <vnet/devices/devices.h> diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index d15cfd7233a..e416efe2e4d 100644 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -187,9 +187,11 @@ dpdk_find_startup_config (struct rte_eth_dev_info *di) { dpdk_main_t *dm = &dpdk_main; struct rte_pci_device *pci_dev; - struct rte_vmbus_device *vmbus_dev; vlib_pci_addr_t pci_addr; +#ifdef __linux__ + struct rte_vmbus_device *vmbus_dev; vlib_vmbus_addr_t vmbus_addr; +#endif /* __linux__ */ uword *p = 0; if ((pci_dev = dpdk_get_pci_device (di))) @@ -202,6 +204,7 @@ dpdk_find_startup_config (struct rte_eth_dev_info *di) hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32); } +#ifdef __linux__ if ((vmbus_dev = dpdk_get_vmbus_device (di))) { unformat_input_t input_vmbus; @@ -216,6 +219,7 @@ dpdk_find_startup_config (struct rte_eth_dev_info *di) &vmbus_addr); unformat_free (&input_vmbus); } +#endif /* __linux__ */ if (p) return pool_elt_at_index (dm->conf->dev_confs, p[0]); @@ -566,8 +570,18 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) continue; } +#ifdef __FreeBSD__ + /* + * The defines for the PCI_CLASS_* types are platform specific and differ + * on FreeBSD. + */ + if (d->device_class != PCI_CLASS_NETWORK && + d->device_class != PCI_CLASS_PROCESSOR_CO) + continue; +#else if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO) continue; +#endif /* __FreeBSD__ */ if (num_whitelisted) { @@ -1031,12 +1045,14 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) dpdk_main_t *dm = &dpdk_main; clib_error_t *error = 0; dpdk_config_main_t *conf = &dpdk_config_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); dpdk_device_config_t *devconf; vlib_pci_addr_t pci_addr = { 0 }; vlib_vmbus_addr_t vmbus_addr = { 0 }; unformat_input_t sub_input; +#ifdef __linux + vlib_thread_main_t *tm = vlib_get_thread_main (); uword default_hugepage_sz, x; +#endif /* __linux__ */ u8 *s, *tmp = 0; int ret, i; int num_whitelisted = 0; @@ -1045,15 +1061,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) u8 no_vmbus = 0; u8 file_prefix = 0; u8 *socket_mem = 0; - u8 *huge_dir_path = 0; u32 vendor, device, domain, bus, func; void *fmt_func; void *fmt_addr; f64 poll_interval; - huge_dir_path = - format (0, "%s/hugepages%c", vlib_unix_get_runtime_dir (), 0); - conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword)); mhash_init (&conf->device_config_index_by_vmbus_addr, sizeof (uword), sizeof (vlib_vmbus_addr_t)); @@ -1248,6 +1260,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) { vec_add1 (conf->eal_init_args, (u8 *) "--in-memory"); +#ifdef __linux__ + /* + * FreeBSD performs huge page prealloc through a dedicated kernel mode + * this process is only required on Linux. + */ default_hugepage_sz = clib_mem_get_default_hugepage_size (); clib_bitmap_foreach (x, tm->cpu_socket_bitmap) @@ -1262,6 +1279,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) if ((e = clib_sysfs_prealloc_hugepages(x, 0, n_pages))) clib_error_report (e); } +#endif /* __linux__ */ } /* on/off dpdk's telemetry thread */ @@ -1270,6 +1288,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) vec_add1 (conf->eal_init_args, (u8 *) "--no-telemetry"); } +#ifdef __linux__ if (!file_prefix) { tmp = format (0, "--file-prefix%c", 0); @@ -1277,6 +1296,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) tmp = format (0, "vpp%c", 0); vec_add1 (conf->eal_init_args, tmp); } +#endif if (no_pci == 0 && geteuid () == 0) dpdk_bind_devices_to_uio (conf); @@ -1396,11 +1416,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input) RTE_VECT_SIMD_256 : RTE_VECT_SIMD_512); - /* lazy umount hugepages */ - umount2 ((char *) huge_dir_path, MNT_DETACH); - rmdir ((char *) huge_dir_path); - vec_free (huge_dir_path); - /* main thread 1st */ if ((error = dpdk_buffer_pools_create (vm))) return error; diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c index 47007219482..9781d0ed7f0 100644 --- a/src/plugins/dpdk/main.c +++ b/src/plugins/dpdk/main.c @@ -13,13 +13,6 @@ * limitations under the License. */ -#include <unistd.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <linux/vfio.h> -#include <sys/ioctl.h> - #include <vnet/vnet.h> #include <vnet/plugin/plugin.h> #include <dpdk/device/dpdk.h> diff --git a/src/plugins/fateshare/fateshare.c b/src/plugins/fateshare/fateshare.c index 33ee167bce3..971d32303db 100644 --- a/src/plugins/fateshare/fateshare.c +++ b/src/plugins/fateshare/fateshare.c @@ -17,6 +17,7 @@ #include <vnet/vnet.h> #include <vnet/plugin/plugin.h> +#include <vppinfra/unix.h> #include <fateshare/fateshare.h> #include <vlibapi/api.h> @@ -26,7 +27,11 @@ #include <sys/types.h> #include <sys/wait.h> +#ifdef __linux__ #include <sys/prctl.h> // prctl(), PR_SET_PDEATHSIG +#else +#include <sys/procctl.h> +#endif /* __linux__ */ #include <limits.h> fateshare_main_t fateshare_main; @@ -86,12 +91,23 @@ launch_monitor (fateshare_main_t *kmp) { dup2 (logfd, 1); dup2 (logfd, 2); +#ifdef __linux__ int r = prctl (PR_SET_PDEATHSIG, SIGTERM); if (r == -1) { perror (0); exit (1); } +#else + int r, s = SIGTERM; + + r = procctl (P_PID, 0, PROC_PDEATHSIG_CTL, &s); + if (r == -1) + { + perror (0); + exit (1); + } +#endif /* __linux__ */ pid_t current_ppid = getppid (); if (current_ppid != ppid_before_fork) { @@ -197,24 +213,30 @@ fateshare_config (vlib_main_t *vm, unformat_input_t *input) if (fmp->monitor_cmd == 0) { - char *p, path[PATH_MAX]; - int rv; + char *p; + u8 *path; /* find executable path */ - if ((rv = readlink ("/proc/self/exe", path, PATH_MAX - 1)) == -1) + path = os_get_exec_path (); + + if (path == 0) return clib_error_return ( - 0, "could not stat /proc/self/exe - set monitor manually"); + 0, "could not get exec path - set monitor manually"); - /* readlink doesn't provide null termination */ - path[rv] = 0; + /* add null termination */ + vec_add1 (path, 0); /* strip filename */ - if ((p = strrchr (path, '/')) == 0) - return clib_error_return ( - 0, "could not determine vpp directory - set monitor manually"); + if ((p = strrchr ((char *) path, '/')) == 0) + { + vec_free (path); + return clib_error_return ( + 0, "could not determine vpp directory - set monitor manually"); + } *p = 0; fmp->monitor_cmd = format (0, "%s/vpp_fateshare_monitor\0", path); + vec_free (path); } if (fmp->monitor_logfile == 0) { diff --git a/src/plugins/fateshare/vpp_fateshare_monitor.c b/src/plugins/fateshare/vpp_fateshare_monitor.c index 7b203884c4e..7af451ccffe 100644 --- a/src/plugins/fateshare/vpp_fateshare_monitor.c +++ b/src/plugins/fateshare/vpp_fateshare_monitor.c @@ -4,7 +4,12 @@ #include <sys/types.h> #include <sys/wait.h> +#ifdef __linux__ #include <sys/prctl.h> // prctl(), PR_SET_PDEATHSIG +#else +#include <signal.h> +#include <sys/procctl.h> +#endif /* __linux__ */ #include <sys/stat.h> #include <fcntl.h> @@ -82,6 +87,7 @@ launch_command (char *scmd, char *logname_base) } /* child */ +#ifdef __linux__ int r = prctl (PR_SET_PDEATHSIG, SIGTERM); if (r == -1) { @@ -89,6 +95,17 @@ launch_command (char *scmd, char *logname_base) sleep (5); exit (1); } +#else + int r, s = SIGTERM; + + r = procctl (P_PID, 0, PROC_PDEATHSIG_CTL, &s); + if (r == -1) + { + perror ("procctl"); + exit (1); + } +#endif /* __linux__ */ + if (getppid () != ppid_before_fork) { sleep (5); @@ -180,12 +197,23 @@ main (int argc, char **argv) exit (2); } +#ifdef __linux__ int r = prctl (PR_SET_PDEATHSIG, SIGTERM); if (r == -1) { perror (0); exit (1); } +#else + int r, s = SIGTERM; + + r = procctl (P_PID, 0, PROC_PDEATHSIG_CTL, &s); + if (r == -1) + { + perror ("procctl"); + exit (1); + } +#endif /* __linux__ */ /* Establish handler. */ struct sigaction sa; diff --git a/src/plugins/hs_apps/CMakeLists.txt b/src/plugins/hs_apps/CMakeLists.txt index bd43eb34afa..179c9c7a4c4 100644 --- a/src/plugins/hs_apps/CMakeLists.txt +++ b/src/plugins/hs_apps/CMakeLists.txt @@ -55,7 +55,7 @@ if(VPP_BUILD_VCL_TESTS) ) add_vpp_executable(${test} SOURCES "vcl/${test}.c" - LINK_LIBRARIES vppcom pthread + LINK_LIBRARIES vppcom pthread ${EPOLL_LIB} NO_INSTALL ) endforeach() @@ -68,7 +68,7 @@ if(VPP_BUILD_VCL_TESTS) SOURCES "vcl/${test}.c" vcl/vcl_test_protos.c - LINK_LIBRARIES vppcom pthread + LINK_LIBRARIES vppcom pthread ${EPOLL_LIB} NO_INSTALL ) endforeach() diff --git a/src/plugins/hs_apps/http_cli.c b/src/plugins/hs_apps/http_cli.c index 5d4d49c0fba..f42f65342c3 100644 --- a/src/plugins/hs_apps/http_cli.c +++ b/src/plugins/hs_apps/http_cli.c @@ -323,6 +323,13 @@ hcs_ts_rx_callback (session_t *ts) return 0; } + if (msg.data.len == 0) + { + hs->tx_buf = 0; + start_send_data (hs, HTTP_STATUS_BAD_REQUEST); + return 0; + } + /* send the command to a new/recycled vlib process */ vec_validate (args.buf, msg.data.len - 1); rv = svm_fifo_dequeue (ts->rx_fifo, msg.data.len, args.buf); diff --git a/src/plugins/hs_apps/http_client_cli.c b/src/plugins/hs_apps/http_client_cli.c index f44d4e1bcd1..a99169bafea 100644 --- a/src/plugins/hs_apps/http_client_cli.c +++ b/src/plugins/hs_apps/http_client_cli.c @@ -13,11 +13,9 @@ * limitations under the License. */ -#include <vnet/session/application.h> #include <vnet/session/application_interface.h> #include <vnet/session/session.h> #include <http/http.h> -#include <hs_apps/http_cli.h> #define HCC_DEBUG 0 @@ -68,6 +66,8 @@ typedef struct typedef enum { HCC_REPLY_RECEIVED = 100, + HCC_TRANSPORT_CLOSED, + HCC_CONNECT_FAILED, } hcc_cli_signal_t; static hcc_main_t hcc_main; @@ -136,6 +136,8 @@ hcc_ts_connected_callback (u32 app_index, u32 hc_index, session_t *as, { clib_warning ("connected error: hc_index(%d): %U", hc_index, format_session_error, err); + vlib_process_signal_event_mt (hcm->vlib_main, hcm->cli_node_index, + HCC_CONNECT_FAILED, 0); return -1; } @@ -273,6 +275,17 @@ hcc_ts_cleanup_callback (session_t *s, session_cleanup_ntf_t ntf) hcc_session_free (s->thread_index, hs); } +static void +hcc_ts_transport_closed (session_t *s) +{ + hcc_main_t *hcm = &hcc_main; + + HCC_DBG ("transport closed"); + + vlib_process_signal_event_mt (hcm->vlib_main, hcm->cli_node_index, + HCC_TRANSPORT_CLOSED, 0); +} + static session_cb_vft_t hcc_session_cb_vft = { .session_accept_callback = hcc_ts_accept_callback, .session_disconnect_callback = hcc_ts_disconnect_callback, @@ -281,6 +294,7 @@ static session_cb_vft_t hcc_session_cb_vft = { .builtin_app_tx_callback = hcc_ts_tx_callback, .session_reset_callback = hcc_ts_reset_callback, .session_cleanup_callback = hcc_ts_cleanup_callback, + .session_transport_closed_callback = hcc_ts_transport_closed, }; static clib_error_t * @@ -370,7 +384,7 @@ hcc_connect () } static clib_error_t * -hcc_run (vlib_main_t *vm) +hcc_run (vlib_main_t *vm, int print_output) { vlib_thread_main_t *vtm = vlib_get_thread_main (); hcc_main_t *hcm = &hcc_main; @@ -407,11 +421,18 @@ hcc_run (vlib_main_t *vm) goto cleanup; case HCC_REPLY_RECEIVED: - vlib_cli_output (vm, "%v", hcm->http_response); + if (print_output) + vlib_cli_output (vm, "%v", hcm->http_response); vec_free (hcm->http_response); break; + case HCC_TRANSPORT_CLOSED: + err = clib_error_return (0, "error, transport closed"); + break; + case HCC_CONNECT_FAILED: + err = clib_error_return (0, "failed to connect"); + break; default: - clib_error_return (0, "unexpected event %d", event_type); + err = clib_error_return (0, "unexpected event %d", event_type); break; } @@ -448,7 +469,7 @@ hcc_command_fn (vlib_main_t *vm, unformat_input_t *input, u64 seg_size; u8 *appns_id = 0; clib_error_t *err = 0; - int rv; + int rv, print_output = 1; hcm->prealloc_fifos = 0; hcm->private_segment_size = 0; @@ -472,6 +493,8 @@ hcc_command_fn (vlib_main_t *vm, unformat_input_t *input, hcm->fifo_size <<= 10; else if (unformat (line_input, "uri %s", &hcm->uri)) ; + else if (unformat (line_input, "no-output")) + print_output = 0; else if (unformat (line_input, "appns %_%v%_", &appns_id)) ; else if (unformat (line_input, "secret %lu", &hcm->appns_secret)) @@ -506,7 +529,7 @@ hcc_command_fn (vlib_main_t *vm, unformat_input_t *input, vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */); vlib_worker_thread_barrier_release (vm); - err = hcc_run (vm); + err = hcc_run (vm, print_output); if (hcc_detach ()) { @@ -526,7 +549,7 @@ done: VLIB_CLI_COMMAND (hcc_command, static) = { .path = "http cli client", .short_help = "[appns <app-ns> secret <appns-secret>] uri http://<ip-addr> " - "query <query-string>", + "query <query-string> [no-output]", .function = hcc_command_fn, .is_mp_safe = 1, }; diff --git a/src/plugins/http/http.c b/src/plugins/http/http.c index 036e6929987..893dd877c29 100644 --- a/src/plugins/http/http.c +++ b/src/plugins/http/http.c @@ -74,14 +74,14 @@ format_http_state (u8 *s, va_list *va) return format (s, "unknown"); } -static inline void -http_state_change (http_conn_t *hc, http_state_t state) -{ - HTTP_DBG (1, "changing http state %U -> %U", format_http_state, - hc->http_state, format_http_state, state); - ASSERT (hc->http_state != state); - hc->http_state = state; -} +#define http_state_change(_hc, _state) \ + do \ + { \ + HTTP_DBG (1, "changing http state %U -> %U", format_http_state, \ + (_hc)->http_state, format_http_state, _state); \ + (_hc)->http_state = _state; \ + } \ + while (0) static inline http_worker_t * http_worker_get (u32 thread_index) @@ -140,6 +140,7 @@ http_listener_free (http_conn_t *lhc) { http_main_t *hm = &http_main; + vec_free (lhc->app_name); if (CLIB_DEBUG) memset (lhc, 0xfc, sizeof (*lhc)); pool_put (hm->listener_pool, lhc); @@ -266,17 +267,21 @@ http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts, app_worker_t *app_wrk; int rv; + ho_hc = http_conn_get_w_thread (ho_hc_index, 0); + ASSERT (ho_hc->state == HTTP_CONN_STATE_CONNECTING); + if (err) { - clib_warning ("ERROR: %d", err); + clib_warning ("half-open hc index %d, error: %U", ho_hc_index, + format_session_error, err); + app_wrk = app_worker_get_if_valid (ho_hc->h_pa_wrk_index); + if (app_wrk) + app_worker_connect_notify (app_wrk, 0, err, ho_hc->h_pa_app_api_ctx); return 0; } new_hc_index = http_conn_alloc_w_thread (ts->thread_index); hc = http_conn_get_w_thread (new_hc_index, ts->thread_index); - ho_hc = http_conn_get_w_thread (ho_hc_index, 0); - - ASSERT (ho_hc->state == HTTP_CONN_STATE_CONNECTING); clib_memcpy_fast (hc, ho_hc, sizeof (*hc)); @@ -372,12 +377,12 @@ static const char *http_redirect_template = "HTTP/1.1 %s\r\n"; static const char *http_response_template = "HTTP/1.1 %s\r\n" "Date: %U GMT\r\n" "Expires: %U GMT\r\n" - "Server: VPP Static\r\n" + "Server: %s\r\n" "Content-Type: %s\r\n" "Content-Length: %lu\r\n\r\n"; static const char *http_request_template = "GET %s HTTP/1.1\r\n" - "User-Agent: VPP HTTP client\r\n" + "User-Agent: %s\r\n" "Accept: */*\r\n"; static u32 @@ -520,17 +525,19 @@ http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp) http_msg_t msg = {}; app_worker_t *app_wrk; session_t *as; - http_status_code_t ec; rv = http_read_message (hc); /* Nothing yet, wait for data or timer expire */ if (rv) - return HTTP_SM_STOP; + { + HTTP_DBG (1, "no data to deq"); + return HTTP_SM_STOP; + } if (vec_len (hc->rx_buf) < 8) { - ec = HTTP_STATUS_BAD_REQUEST; + clib_warning ("response buffer too short"); goto error; } @@ -546,9 +553,7 @@ http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp) if (rv) { clib_warning ("failed to parse http reply"); - session_transport_closing_notify (&hc->connection); - http_disconnect_transport (hc); - return -1; + goto error; } msg.data.len = content_length; u32 dlen = vec_len (hc->rx_buf) - hc->rx_buf_offset; @@ -577,7 +582,7 @@ http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp) { hc->rx_buf_offset = 0; vec_reset_length (hc->rx_buf); - http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD); + http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD); } else { @@ -585,23 +590,20 @@ http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp) } app_wrk = app_worker_get_if_valid (as->app_wrk_index); - app_worker_rx_notify (app_wrk, as); + if (app_wrk) + app_worker_rx_notify (app_wrk, as); return HTTP_SM_STOP; } else { - HTTP_DBG (0, "Unknown http method %v", hc->rx_buf); - ec = HTTP_STATUS_METHOD_NOT_ALLOWED; + clib_warning ("Unknown http method %v", hc->rx_buf); goto error; } - return HTTP_SM_STOP; error: - - http_send_error (hc, ec); session_transport_closing_notify (&hc->connection); + session_transport_closed_notify (&hc->connection); http_disconnect_transport (hc); - return HTTP_SM_ERROR; } @@ -734,6 +736,7 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) * Add headers. For now: * - current time * - expiration time + * - server name * - content type * - data length */ @@ -741,6 +744,10 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) switch (msg.code) { + case HTTP_STATUS_NOT_FOUND: + case HTTP_STATUS_METHOD_NOT_ALLOWED: + case HTTP_STATUS_BAD_REQUEST: + case HTTP_STATUS_INTERNAL_ERROR: case HTTP_STATUS_OK: header = format (0, http_response_template, http_status_code_str[msg.code], @@ -748,6 +755,8 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) format_clib_timebase_time, now, /* Expires */ format_clib_timebase_time, now + 600.0, + /* Server */ + hc->app_name, /* Content type */ http_content_type_str[msg.content_type], /* Length */ @@ -759,6 +768,7 @@ http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp) /* Location: http(s)://new-place already queued up as data */ break; default: + clib_warning ("unsupported status code: %d", msg.code); return HTTP_SM_ERROR; } @@ -791,7 +801,6 @@ error: static http_sm_result_t http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) { - http_status_code_t sc; http_msg_t msg; session_t *as; u8 *buf = 0, *request; @@ -806,29 +815,31 @@ http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) if (msg.data.type > HTTP_MSG_DATA_PTR) { clib_warning ("no data"); - sc = HTTP_STATUS_INTERNAL_ERROR; goto error; } if (msg.type != HTTP_MSG_REQUEST) { clib_warning ("unexpected message type %d", msg.type); - sc = HTTP_STATUS_INTERNAL_ERROR; goto error; } - sc = msg.code; + /* currently we support only GET method */ + if (msg.method_type != HTTP_REQ_GET) + { + clib_warning ("unsupported method %d", msg.method_type); + goto error; + } vec_validate (buf, msg.data.len - 1); rv = svm_fifo_dequeue (as->tx_fifo, msg.data.len, buf); ASSERT (rv == msg.data.len); - request = format (0, http_request_template, buf); + request = format (0, http_request_template, buf, hc->app_name); offset = http_send_data (hc, request, vec_len (request), 0); if (offset != vec_len (request)) { clib_warning ("sending request failed!"); - sc = HTTP_STATUS_INTERNAL_ERROR; goto error; } @@ -837,83 +848,85 @@ http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp) vec_free (buf); vec_free (request); - return HTTP_SM_CONTINUE; + return HTTP_SM_STOP; error: - clib_warning ("unexpected msg type from app %u", msg.type); - http_send_error (hc, sc); + svm_fifo_dequeue_drop_all (as->tx_fifo); session_transport_closing_notify (&hc->connection); + session_transport_closed_notify (&hc->connection); http_disconnect_transport (hc); - return HTTP_SM_STOP; -} - -static void -http_app_enqueue (http_conn_t *hc, session_t *as) -{ - app_worker_t *app_wrk; - u32 dlen, max_enq, n_enq; - int rv; - - dlen = vec_len (hc->rx_buf) - hc->rx_buf_offset; - if (!dlen) - return; - - max_enq = svm_fifo_max_enqueue (as->rx_fifo); - n_enq = clib_min (max_enq, dlen); - rv = svm_fifo_enqueue (as->rx_fifo, n_enq, &hc->rx_buf[hc->rx_buf_offset]); - if (rv < 0) - return; - - hc->rx_buf_offset += rv; - if (hc->rx_buf_offset >= vec_len (hc->rx_buf)) - { - vec_reset_length (hc->rx_buf); - hc->rx_buf_offset = 0; - } - - app_wrk = app_worker_get_if_valid (as->app_wrk_index); - ASSERT (app_wrk); - app_worker_rx_notify (app_wrk, as); + return HTTP_SM_ERROR; } static http_sm_result_t http_state_client_io_more_data (http_conn_t *hc, transport_send_params_t *sp) { session_t *as, *ts; - u32 max_deq; - int n_read; + app_worker_t *app_wrk; + svm_fifo_seg_t _seg, *seg = &_seg; + u32 max_len, max_deq, max_enq, n_segs = 1; + int rv, len; as = session_get_from_handle (hc->h_pa_session_handle); ts = session_get_from_handle (hc->h_tc_session_handle); - http_app_enqueue (hc, as); + max_deq = svm_fifo_max_dequeue (ts->rx_fifo); + if (max_deq == 0) + { + HTTP_DBG (1, "no data to deq"); + return HTTP_SM_STOP; + } - if (hc->to_recv == 0) + max_enq = svm_fifo_max_enqueue (as->rx_fifo); + if (max_enq == 0) { - http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD); + HTTP_DBG (1, "app's rx fifo full"); + svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF); return HTTP_SM_STOP; } - max_deq = svm_fifo_max_dequeue (ts->rx_fifo); - if (max_deq > 0) + max_len = clib_min (max_enq, max_deq); + len = svm_fifo_segments (ts->rx_fifo, 0, seg, &n_segs, max_len); + if (len < 0) { - vec_validate (hc->rx_buf, max_deq - 1); - n_read = svm_fifo_dequeue (ts->rx_fifo, max_deq, hc->rx_buf); - ASSERT (n_read == max_deq); + HTTP_DBG (1, "svm_fifo_segments() len %d", len); + return HTTP_SM_STOP; + } - if (svm_fifo_is_empty (ts->rx_fifo)) - svm_fifo_unset_event (ts->rx_fifo); + rv = svm_fifo_enqueue_segments (as->rx_fifo, seg, 1, 0 /* allow partial */); + if (rv < 0) + { + clib_warning ("data enqueue failed, rv: %d", rv); + return HTTP_SM_ERROR; + } - hc->to_recv -= n_read; - vec_set_len (hc->rx_buf, n_read); + svm_fifo_dequeue_drop (ts->rx_fifo, rv); + if (rv > hc->to_recv) + { + clib_warning ("http protocol error: received more data than expected"); + session_transport_closing_notify (&hc->connection); + http_disconnect_transport (hc); + http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD); + return HTTP_SM_ERROR; } + hc->to_recv -= rv; + HTTP_DBG (1, "drained %d from ts; remains %d", rv, hc->to_recv); - if (hc->rx_buf_offset < vec_len (hc->rx_buf) || - svm_fifo_max_dequeue_cons (ts->rx_fifo)) + if (hc->to_recv == 0) { - session_enqueue_notify (ts); + hc->rx_buf_offset = 0; + vec_reset_length (hc->rx_buf); + http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD); } - return HTTP_SM_CONTINUE; + + app_wrk = app_worker_get_if_valid (as->app_wrk_index); + if (app_wrk) + app_worker_rx_notify (app_wrk, as); + + if (svm_fifo_max_dequeue_cons (ts->rx_fifo)) + session_enqueue_notify (ts); + + return HTTP_SM_STOP; } static http_sm_result_t @@ -983,6 +996,7 @@ static void http_req_run_state_machine (http_conn_t *hc, transport_send_params_t *sp) { http_sm_result_t res; + do { res = state_funcs[hc->http_state](hc, sp); @@ -1010,6 +1024,12 @@ http_ts_rx_callback (session_t *ts) return -1; } + if (hc->state == HTTP_CONN_STATE_CLOSED) + { + svm_fifo_dequeue_drop_all (ts->tx_fifo); + return 0; + } + http_req_run_state_machine (hc, 0); if (hc->state == HTTP_CONN_STATE_TRANSPORT_CLOSED) @@ -1153,6 +1173,11 @@ http_transport_connect (transport_endpoint_cfg_t *tep) hc->state = HTTP_CONN_STATE_CONNECTING; cargs->api_context = hc_index; + if (vec_len (app->name)) + hc->app_name = vec_dup (app->name); + else + hc->app_name = format (0, "VPP HTTP client"); + HTTP_DBG (1, "hc ho_index %x", hc_index); if ((error = vnet_connect (cargs))) @@ -1205,6 +1230,11 @@ http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep) lhc->c_s_index = app_listener_index; lhc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP; + if (vec_len (app->name)) + lhc->app_name = vec_dup (app->name); + else + lhc->app_name = format (0, "VPP server app"); + return lhc_index; } @@ -1245,7 +1275,11 @@ http_transport_close (u32 hc_index, u32 thread_index) http_disconnect_transport (hc); return; } - + else if (hc->state == HTTP_CONN_STATE_CLOSED) + { + HTTP_DBG (1, "nothing to do, already closed"); + return; + } as = session_get_from_handle (hc->h_pa_session_handle); /* Nothing more to send, confirm close */ diff --git a/src/plugins/http/http.h b/src/plugins/http/http.h index dbae5ac4611..c9912dd6db8 100644 --- a/src/plugins/http/http.h +++ b/src/plugins/http/http.h @@ -227,6 +227,7 @@ typedef struct http_tc_ http_conn_state_t state; u32 timer_handle; + u8 *app_name; /* * Current request diff --git a/src/plugins/ikev2/ikev2_api.c b/src/plugins/ikev2/ikev2_api.c index a3e71668126..c9608aa660b 100644 --- a/src/plugins/ikev2/ikev2_api.c +++ b/src/plugins/ikev2/ikev2_api.c @@ -173,7 +173,7 @@ send_profile (ikev2_profile_t * profile, vl_api_registration_t * reg, rmp->profile.lifetime_jitter = profile->lifetime_jitter; rmp->profile.handover = profile->handover; - vl_api_ikev2_profile_t_endian (&rmp->profile); + vl_api_ikev2_profile_t_endian (&rmp->profile, 1 /* to network */); vl_api_send_msg (reg, (u8 *) rmp); } @@ -291,7 +291,7 @@ send_sa (ikev2_sa_t * sa, vl_api_ikev2_sa_dump_t * mp, u32 api_sa_index) ikev2_copy_stats (&rsa->stats, &sa->stats); - vl_api_ikev2_sa_t_endian(rsa); + vl_api_ikev2_sa_t_endian (rsa, 1 /* to network */); }); } @@ -382,7 +382,7 @@ send_sa_v2 (ikev2_sa_t *sa, vl_api_ikev2_sa_v2_dump_t *mp, u32 api_sa_index) ikev2_copy_stats (&rsa->stats, &sa->stats); - vl_api_ikev2_sa_v2_t_endian (rsa); + vl_api_ikev2_sa_v2_t_endian (rsa, 1 /* to network */); }); } @@ -476,7 +476,7 @@ send_sa_v3 (ikev2_sa_t *sa, vl_api_ikev2_sa_v3_dump_t *mp, u32 api_sa_index) ikev2_copy_stats (&rsa->stats, &sa->stats); - vl_api_ikev2_sa_v3_t_endian (rsa); + vl_api_ikev2_sa_v3_t_endian (rsa, 1 /* to network */); }); } @@ -549,7 +549,7 @@ send_child_sa (ikev2_child_sa_t * child, k->sk_ar_len); } - vl_api_ikev2_child_sa_t_endian (&rmp->child_sa); + vl_api_ikev2_child_sa_t_endian (&rmp->child_sa, 1 /* to network */); }); } @@ -628,7 +628,7 @@ send_child_sa_v2 (ikev2_child_sa_t *child, vl_api_ikev2_child_sa_v2_dump_t *mp, clib_memcpy (&k->sk_ar, child->sk_ar, k->sk_ar_len); } - vl_api_ikev2_child_sa_v2_t_endian (&rmp->child_sa); + vl_api_ikev2_child_sa_v2_t_endian (&rmp->child_sa, 1 /* to network */); }); } @@ -700,7 +700,7 @@ static void rmp->ts.sa_index = api_sa_index; rmp->ts.child_sa_index = child_sa_index; cp_ts (&rmp->ts, ts, mp->is_initiator); - vl_api_ikev2_ts_t_endian (&rmp->ts); + vl_api_ikev2_ts_t_endian (&rmp->ts, 1 /* to network */); }); } } diff --git a/src/plugins/ikev2/ikev2_test.c b/src/plugins/ikev2/ikev2_test.c index 5682d7058f6..93683a5b5dc 100644 --- a/src/plugins/ikev2/ikev2_test.c +++ b/src/plugins/ikev2/ikev2_test.c @@ -391,7 +391,7 @@ vl_api_ikev2_sa_details_t_handler (vl_api_ikev2_sa_details_t * mp) ip_address_t iaddr; ip_address_t raddr; vl_api_ikev2_keys_t *k = &sa->keys; - vl_api_ikev2_sa_t_endian (sa); + vl_api_ikev2_sa_t_endian (sa, 0 /* from network */); ip_address_decode2 (&sa->iaddr, &iaddr); ip_address_decode2 (&sa->raddr, &raddr); @@ -461,7 +461,7 @@ vl_api_ikev2_sa_v2_details_t_handler (vl_api_ikev2_sa_v2_details_t *mp) ip_address_t iaddr; ip_address_t raddr; vl_api_ikev2_keys_t *k = &sa->keys; - vl_api_ikev2_sa_v2_t_endian (sa); + vl_api_ikev2_sa_v2_t_endian (sa, 0 /* from network */); ip_address_decode2 (&sa->iaddr, &iaddr); ip_address_decode2 (&sa->raddr, &raddr); @@ -533,7 +533,7 @@ vl_api_ikev2_sa_v3_details_t_handler (vl_api_ikev2_sa_v3_details_t *mp) ip_address_t iaddr; ip_address_t raddr; vl_api_ikev2_keys_t *k = &sa->keys; - vl_api_ikev2_sa_v3_t_endian (sa); + vl_api_ikev2_sa_v3_t_endian (sa, 0 /* from network */); ip_address_decode2 (&sa->iaddr, &iaddr); ip_address_decode2 (&sa->raddr, &raddr); @@ -619,7 +619,7 @@ vl_api_ikev2_child_sa_details_t_handler (vl_api_ikev2_child_sa_details_t * mp) vat_main_t *vam = ikev2_test_main.vat_main; vl_api_ikev2_child_sa_t *child_sa = &mp->child_sa; vl_api_ikev2_keys_t *k = &child_sa->keys; - vl_api_ikev2_child_sa_t_endian (child_sa); + vl_api_ikev2_child_sa_t_endian (child_sa, 0 /* from network */); fformat (vam->ofp, " child sa %u:\n", child_sa->child_sa_index); @@ -696,7 +696,7 @@ vl_api_ikev2_child_sa_v2_details_t_handler ( vat_main_t *vam = ikev2_test_main.vat_main; vl_api_ikev2_child_sa_t *child_sa = &mp->child_sa; vl_api_ikev2_keys_t *k = &child_sa->keys; - vl_api_ikev2_child_sa_t_endian (child_sa); + vl_api_ikev2_child_sa_t_endian (child_sa, 0 /* from network */); fformat (vam->ofp, " child sa %u:\n", child_sa->child_sa_index); @@ -784,7 +784,7 @@ static void vat_main_t *vam = ikev2_test_main.vat_main; vl_api_ikev2_ts_t *ts = &mp->ts; ip_address_t start_addr, end_addr; - vl_api_ikev2_ts_t_endian (ts); + vl_api_ikev2_ts_t_endian (ts, 0 /* from network */); ip_address_decode2 (&ts->start_addr, &start_addr); ip_address_decode2 (&ts->end_addr, &end_addr); diff --git a/src/plugins/lisp/lisp-cp/lisp_types.h b/src/plugins/lisp/lisp-cp/lisp_types.h index 21bd72178d7..e92f8f80c70 100644 --- a/src/plugins/lisp/lisp-cp/lisp_types.h +++ b/src/plugins/lisp/lisp-cp/lisp_types.h @@ -198,7 +198,8 @@ u8 gid_address_len (gid_address_t * a); void *gid_address_cast (gid_address_t * gid, gid_address_type_t type); void gid_address_copy (gid_address_t * dst, gid_address_t * src); u32 gid_address_parse (u8 * offset, gid_address_t * a); -void gid_address_ip_set (gid_address_t * dst, void *src, u8 version); +void gid_address_ip_set (gid_address_t *dst, void *src, + ip_address_family_t version); #define gid_address_type(_a) (_a)->type #define gid_address_ippref(_a) (_a)->ippref diff --git a/src/plugins/marvell/pp2/cli.c b/src/plugins/marvell/pp2/cli.c index f4ecb1873c9..5072a3c035b 100644 --- a/src/plugins/marvell/pp2/cli.c +++ b/src/plugins/marvell/pp2/cli.c @@ -31,7 +31,7 @@ mrvl_pp2_create_command_fn (vlib_main_t * vm, unformat_input_t * input, { unformat_input_t _line_input, *line_input = &_line_input; mrvl_pp2_create_if_args_t args = { 0 }; - uint val; + unsigned int val; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) diff --git a/src/plugins/nat/nat44-ed/nat44_ed_api.c b/src/plugins/nat/nat44-ed/nat44_ed_api.c index 1f01410afce..b6c9d51d777 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_api.c +++ b/src/plugins/nat/nat44-ed/nat44_ed_api.c @@ -442,7 +442,8 @@ send_nat44_ed_output_interface_details (u32 index, vl_api_registration_t *rp, /* Endian hack until apigen registers _details * endian functions */ - vl_api_nat44_ed_output_interface_details_t_endian (rmp); + vl_api_nat44_ed_output_interface_details_t_endian (rmp, + 1 /* to network */); rmp->_vl_msg_id = htons (rmp->_vl_msg_id); rmp->context = htonl (rmp->context); })); diff --git a/src/plugins/nat/nat44-ei/nat44_ei_api.c b/src/plugins/nat/nat44-ei/nat44_ei_api.c index 8671a556929..454a5032c6a 100644 --- a/src/plugins/nat/nat44-ei/nat44_ei_api.c +++ b/src/plugins/nat/nat44-ei/nat44_ei_api.c @@ -751,7 +751,8 @@ send_nat44_ei_output_interface_details (u32 index, vl_api_registration_t *rp, /* Endian hack until apigen registers _details * endian functions */ - vl_api_nat44_ei_output_interface_details_t_endian (rmp); + vl_api_nat44_ei_output_interface_details_t_endian (rmp, + 1 /* to network */); rmp->_vl_msg_id = htons (rmp->_vl_msg_id); rmp->context = htonl (rmp->context); })); diff --git a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c index 01b333a5234..3b981d69986 100644 --- a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c +++ b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c @@ -859,7 +859,7 @@ nat44_ei_icmp_in2out (vlib_buffer_t *b0, ip4_header_t *ip0, nat44_ei_main_t *nm = &nat44_ei_main; vlib_main_t *vm = vlib_get_main (); ip4_address_t addr; - u16 port; + u16 port = 0; u32 fib_index; nat_protocol_t proto; icmp_echo_header_t *echo0, *inner_echo0 = 0; diff --git a/src/plugins/nat/pnat/pnat_api.c b/src/plugins/nat/pnat/pnat_api.c index 02e61219d1e..a4e7ff192bf 100644 --- a/src/plugins/nat/pnat/pnat_api.c +++ b/src/plugins/nat/pnat/pnat_api.c @@ -116,7 +116,8 @@ static void send_bindings_details(u32 index, vl_api_registration_t *rp, /* Endian hack until apigen registers _details * endian functions */ - vl_api_pnat_bindings_details_t_endian(rmp); + vl_api_pnat_bindings_details_t_endian( + rmp, 1 /* to network */); rmp->_vl_msg_id = htons(rmp->_vl_msg_id); rmp->context = htonl(rmp->context); })); @@ -158,7 +159,7 @@ static void send_interfaces_details(u32 index, vl_api_registration_t *rp, /* Endian hack until apigen registers _details * endian functions */ - vl_api_pnat_interfaces_details_t_endian(rmp); + vl_api_pnat_interfaces_details_t_endian(rmp, 1 /* to network */); rmp->_vl_msg_id = htons(rmp->_vl_msg_id); rmp->context = htonl(rmp->context); })); diff --git a/src/plugins/netmap/CMakeLists.txt b/src/plugins/netmap/CMakeLists.txt new file mode 100644 index 00000000000..d53a9e0911a --- /dev/null +++ b/src/plugins/netmap/CMakeLists.txt @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2024 Tom Jones <thj@freebsd.org> +# +# This software was developed by Tom Jones <thj@freebsd.org> under sponsorship +# from the FreeBSD Foundation. +# + +if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD") + message(WARNING "Netmap is only currently support on FreeBSD - netmap plugin disabled") + return() +endif() + +add_vpp_plugin(netmap + SOURCES + plugin.c + netmap.c + node.c + device.c + cli.c + netmap_api.c + + MULTIARCH_SOURCES + node.c + device.c + + INSTALL_HEADERS + netmap.h + net_netmap.h + + API_FILES + netmap.api +) diff --git a/src/plugins/netmap/FEATURE.yaml b/src/plugins/netmap/FEATURE.yaml new file mode 100644 index 00000000000..a9dfb2163e4 --- /dev/null +++ b/src/plugins/netmap/FEATURE.yaml @@ -0,0 +1,12 @@ +--- +name: Netmap Device +maintainer: Tom Jones <thj@freebsd.org> +features: + - L4 checksum offload +description: "Create a netmap interface, which is a high speed user-space + interface that allows VPP to patch to a physical or virtual NIC + without the use of DPDK" +missing: + - API dump +state: production +properties: [API, CLI, STATS, MULTITHREAD] diff --git a/src/plugins/netmap/cli.c b/src/plugins/netmap/cli.c new file mode 100644 index 00000000000..b54d397ecbe --- /dev/null +++ b/src/plugins/netmap/cli.c @@ -0,0 +1,236 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> + +#include <netmap/net_netmap.h> +#include <netmap/netmap.h> + +static clib_error_t * +netmap_create_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u8 *host_if_name = NULL; + u8 hwaddr[6]; + u8 *hw_addr_ptr = 0; + int r; + u8 is_pipe = 0; + u8 is_master = 0; + u32 sw_if_index = ~0; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "name %s", &host_if_name)) + ; + else + if (unformat + (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr)) + hw_addr_ptr = hwaddr; + else if (unformat (line_input, "pipe")) + is_pipe = 1; + else if (unformat (line_input, "master")) + is_master = 1; + else if (unformat (line_input, "slave")) + is_master = 0; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (host_if_name == NULL) + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } + + r = + netmap_create_if (vm, host_if_name, hw_addr_ptr, is_pipe, is_master, + &sw_if_index); + + if (r == VNET_API_ERROR_SYSCALL_ERROR_1) + { + error = clib_error_return (0, "%s (errno %d)", strerror (errno), errno); + goto done; + } + + if (r == VNET_API_ERROR_INVALID_INTERFACE) + { + error = clib_error_return (0, "Invalid interface name"); + goto done; + } + + if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS) + { + error = clib_error_return (0, "Interface already exists"); + goto done; + } + + vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (), + sw_if_index); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * '<em>netmap</em>' is a framework for very fast packet I/O from userspace. + * '<em>VALE</em>' is an equally fast in-kernel software switch using the + * netmap API. '<em>netmap</em>' includes '<em>netmap pipes</em>', a shared + * memory packet transport channel. Together, they provide a high speed + * user-space interface that allows VPP to patch into a linux namespace, a + * linux container, or a physical NIC without the use of DPDK. Netmap/VALE + * generates the '<em>netmap.ko</em>' kernel module that needs to be loaded + * before netmap interfaces can be created. + * - https://github.com/luigirizzo/netmap - Netmap/VALE repo. + * - https://github.com/vpp-dev/netmap - VPP development package for Netmap/VALE, + * which is a snapshot of the Netmap/VALE repo with minor changes to work + * with containers and modified kernel drivers to work with NICs. + * + * Create a netmap interface that will attach to a linux interface. + * The interface must already exist. Once created, a new netmap interface + * will exist in VPP with the name '<em>netmap-<ifname></em>', where + * '<em><ifname></em>' takes one of two forms: + * - <b>ifname</b> - Linux interface to bind too. + * - <b>valeXXX:YYY</b> - + * - Where '<em>valeXXX</em>' is an arbitrary name for a VALE + * interface that must start with '<em>vale</em>' and is less + * than 16 characters. + * - Where '<em>YYY</em>' is an existing linux namespace. + * + * This command has the following optional parameters: + * + * - <b>hw-addr <mac-addr></b> - Optional ethernet address, can be in either + * X:X:X:X:X:X unix or X.X.X cisco format. + * + * - <b>pipe</b> - Optional flag to indicate that a '<em>netmap pipe</em>' + * instance should be created. + * + * - <b>master | slave</b> - Optional flag to indicate whether VPP should + * be the master or slave of the '<em>netmap pipe</em>'. Only considered + * if '<em>pipe</em>' is entered. Defaults to '<em>slave</em>' if not entered. + * + * @cliexpar + * Example of how to create a netmap interface tied to the linux + * namespace '<em>vpp1</em>': + * @cliexstart{create netmap name vale00:vpp1 hw-addr 02:FE:3F:34:15:9B pipe master} + * netmap-vale00:vpp1 + * @cliexend + * Once the netmap interface is created, enable the interface using: + * @cliexcmd{set interface state netmap-vale00:vpp1 up} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (netmap_create_command, static) = { + .path = "create netmap", + .short_help = "create netmap name <ifname>|valeXXX:YYY " + "[hw-addr <mac-addr>] [pipe] [master|slave]", + .function = netmap_create_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +netmap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + u8 *host_if_name = NULL; + clib_error_t *error = NULL; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "name %s", &host_if_name)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (host_if_name == NULL) + { + error = clib_error_return (0, "missing host interface name"); + goto done; + } + + netmap_delete_if (vm, host_if_name); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * Delete a netmap interface. Use the '<em><ifname></em>' to identify + * the netmap interface to be deleted. In VPP, netmap interfaces are + * named as '<em>netmap-<ifname></em>', where '<em><ifname></em>' + * takes one of two forms: + * - <b>ifname</b> - Linux interface to bind too. + * - <b>valeXXX:YYY</b> - + * - Where '<em>valeXXX</em>' is an arbitrary name for a VALE + * interface that must start with '<em>vale</em>' and is less + * than 16 characters. + * - Where '<em>YYY</em>' is an existing linux namespace. + * + * @cliexpar + * Example of how to delete a netmap interface named '<em>netmap-vale00:vpp1</em>': + * @cliexcmd{delete netmap name vale00:vpp1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (netmap_delete_command, static) = { + .path = "delete netmap", + .short_help = "delete netmap name <ifname>|valeXXX:YYY", + .function = netmap_delete_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +netmap_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (netmap_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/device.c b/src/plugins/netmap/device.c new file mode 100644 index 00000000000..505deb988c4 --- /dev/null +++ b/src/plugins/netmap/device.c @@ -0,0 +1,252 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> + +#include <netmap/net_netmap.h> +#include <netmap/netmap.h> + +#define foreach_netmap_tx_func_error \ +_(NO_FREE_SLOTS, "no free tx slots") \ +_(PENDING_MSGS, "pending msgs in tx ring") + +typedef enum +{ +#define _(f,s) NETMAP_TX_ERROR_##f, + foreach_netmap_tx_func_error +#undef _ + NETMAP_TX_N_ERROR, +} netmap_tx_func_error_t; + +static char *netmap_tx_func_error_strings[] = { +#define _(n,s) s, + foreach_netmap_tx_func_error +#undef _ +}; + + +static u8 * +format_netmap_device_name (u8 * s, va_list * args) +{ + u32 i = va_arg (*args, u32); + netmap_main_t *apm = &netmap_main; + netmap_if_t *nif = pool_elt_at_index (apm->interfaces, i); + + s = format (s, "netmap-%s", nif->host_if_name); + return s; +} + +static u8 * +format_netmap_device (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + int verbose = va_arg (*args, int); + netmap_main_t *nm = &netmap_main; + netmap_if_t *nif = vec_elt_at_index (nm->interfaces, dev_instance); + u32 indent = format_get_indent (s); + + s = format (s, "NETMAP interface"); + if (verbose) + { + s = format (s, "\n%U version %d flags 0x%x" + "\n%U region %u memsize 0x%x offset 0x%x" + "\n%U tx_slots %u rx_slots %u tx_rings %u rx_rings %u", + format_white_space, indent + 2, + nif->req->nr_version, + nif->req->nr_flags, + format_white_space, indent + 2, + nif->mem_region, + nif->req->nr_memsize, + nif->req->nr_offset, + format_white_space, indent + 2, + nif->req->nr_tx_slots, + nif->req->nr_rx_slots, + nif->req->nr_tx_rings, nif->req->nr_rx_rings); + } + return s; +} + +static u8 * +format_netmap_tx_trace (u8 * s, va_list * args) +{ + s = format (s, "Unimplemented..."); + return s; +} + +VNET_DEVICE_CLASS_TX_FN (netmap_device_class) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + netmap_main_t *nm = &netmap_main; + u32 *buffers = vlib_frame_vector_args (frame); + u32 n_left = frame->n_vectors; + f64 const time_constant = 1e3; + vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; + netmap_if_t *nif = pool_elt_at_index (nm->interfaces, rd->dev_instance); + int cur_ring; + + clib_spinlock_lock_if_init (&nif->lockp); + + cur_ring = nif->first_tx_ring; + + while (n_left && cur_ring <= nif->last_tx_ring) + { + struct netmap_ring *ring = NETMAP_TXRING (nif->nifp, cur_ring); + int n_free_slots = nm_ring_space (ring); + uint cur = ring->cur; + + if (nm_tx_pending (ring)) + { + if (ioctl (nif->fd, NIOCTXSYNC, NULL) < 0) + clib_unix_warning ("NIOCTXSYNC"); + clib_cpu_time_wait (time_constant); + + if (nm_tx_pending (ring) && !n_free_slots) + { + cur_ring++; + continue; + } + } + + while (n_left && n_free_slots) + { + vlib_buffer_t *b0 = 0; + u32 bi = buffers[0]; + u32 len; + u32 offset = 0; + buffers++; + + struct netmap_slot *slot = &ring->slot[cur]; + + do + { + b0 = vlib_get_buffer (vm, bi); + len = b0->current_length; + /* memcpy */ + clib_memcpy_fast ((u8 *) NETMAP_BUF (ring, slot->buf_idx) + + offset, vlib_buffer_get_current (b0), len); + offset += len; + } + while ((bi = b0->next_buffer)); + + slot->len = offset; + cur = (cur + 1) % ring->num_slots; + n_free_slots--; + n_left--; + } + CLIB_MEMORY_BARRIER (); + ring->head = ring->cur = cur; + } + + if (n_left < frame->n_vectors) + ioctl (nif->fd, NIOCTXSYNC, NULL); + + clib_spinlock_unlock_if_init (&nif->lockp); + + if (n_left) + vlib_error_count (vm, node->node_index, + (n_left == + frame->n_vectors ? NETMAP_TX_ERROR_PENDING_MSGS : + NETMAP_TX_ERROR_NO_FREE_SLOTS), n_left); + + vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors); + return frame->n_vectors; +} + +static void +netmap_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index, + u32 node_index) +{ + netmap_main_t *apm = &netmap_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + + /* Shut off redirection */ + if (node_index == ~0) + { + nif->per_interface_next_index = node_index; + return; + } + + nif->per_interface_next_index = + vlib_node_add_next (vlib_get_main (), netmap_input_node.index, + node_index); +} + +static void +netmap_clear_hw_interface_counters (u32 instance) +{ + /* Nothing for now */ +} + +static clib_error_t * +netmap_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) +{ + netmap_main_t *apm = &netmap_main; + vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index); + netmap_if_t *nif = pool_elt_at_index (apm->interfaces, hw->dev_instance); + u32 hw_flags; + + nif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + + if (nif->is_admin_up) + hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP; + else + hw_flags = 0; + + vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags); + + return 0; +} + +static clib_error_t * +netmap_subif_add_del_function (vnet_main_t * vnm, + u32 hw_if_index, + struct vnet_sw_interface_t *st, int is_add) +{ + /* Nothing for now */ + return 0; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (netmap_device_class) = { + .name = "netmap", + .format_device_name = format_netmap_device_name, + .format_device = format_netmap_device, + .format_tx_trace = format_netmap_tx_trace, + .tx_function_n_errors = NETMAP_TX_N_ERROR, + .tx_function_error_strings = netmap_tx_func_error_strings, + .rx_redirect_to_node = netmap_set_interface_next_node, + .clear_counters = netmap_clear_hw_interface_counters, + .admin_up_down_function = netmap_interface_admin_up_down, + .subif_add_del_function = netmap_subif_add_del_function, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/net_netmap.h b/src/plugins/netmap/net_netmap.h new file mode 100644 index 00000000000..ecccedd4484 --- /dev/null +++ b/src/plugins/netmap/net_netmap.h @@ -0,0 +1,650 @@ +/* + * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``S IS''AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $ + * + * Definitions of constants and the structures used by the netmap + * framework, for the part visible to both kernel and userspace. + * Detailed info on netmap is available with "man netmap" or at + * + * http://info.iet.unipi.it/~luigi/netmap/ + * + * This API is also used to communicate with the VALE software switch + */ + +#ifndef _NET_NETMAP_H_ +#define _NET_NETMAP_H_ + +#define NETMAP_API 14 /* current API version */ + +#define NETMAP_MIN_API 14 /* min and max versions accepted */ +#define NETMAP_MAX_API 15 +/* + * Some fields should be cache-aligned to reduce contention. + * The alignment is architecture and OS dependent, but rather than + * digging into OS headers to find the exact value we use an estimate + * that should cover most architectures. + */ +#define NM_CACHE_ALIGN 128 + +/* + * --- Netmap data structures --- + * + * The userspace data structures used by netmap are shown below. + * They are allocated by the kernel and mmap()ed by userspace threads. + * Pointers are implemented as memory offsets or indexes, + * so that they can be easily dereferenced in kernel and userspace. + + KERNEL (opaque, obviously) + + ==================================================================== + | + USERSPACE | struct netmap_ring + +---->+---------------+ + / | head,cur,tail | + struct netmap_if (nifp, 1 per fd) / | buf_ofs | + +---------------+ / | other fields | + | ni_tx_rings | / +===============+ + | ni_rx_rings | / | buf_idx, len | slot[0] + | | / | flags, ptr | + | | / +---------------+ + +===============+ / | buf_idx, len | slot[1] + | txring_ofs[0] | (rel.to nifp)--' | flags, ptr | + | txring_ofs[1] | +---------------+ + (tx+1 entries) (num_slots entries) + | txring_ofs[t] | | buf_idx, len | slot[n-1] + +---------------+ | flags, ptr | + | rxring_ofs[0] | +---------------+ + | rxring_ofs[1] | + (rx+1 entries) + | rxring_ofs[r] | + +---------------+ + + * For each "interface" (NIC, host stack, PIPE, VALE switch port) bound to + * a file descriptor, the mmap()ed region contains a (logically readonly) + * struct netmap_if pointing to struct netmap_ring's. + * + * There is one netmap_ring per physical NIC ring, plus one tx/rx ring + * pair attached to the host stack (this pair is unused for non-NIC ports). + * + * All physical/host stack ports share the same memory region, + * so that zero-copy can be implemented between them. + * VALE switch ports instead have separate memory regions. + * + * The netmap_ring is the userspace-visible replica of the NIC ring. + * Each slot has the index of a buffer (MTU-sized and residing in the + * mmapped region), its length and some flags. An extra 64-bit pointer + * is provided for user-supplied buffers in the tx path. + * + * In user space, the buffer address is computed as + * (char *)ring + buf_ofs + index * NETMAP_BUF_SIZE + * + * Added in NETMAP_API 11: + * + * + NIOCREGIF can request the allocation of extra spare buffers from + * the same memory pool. The desired number of buffers must be in + * nr_arg3. The ioctl may return fewer buffers, depending on memory + * availability. nr_arg3 will return the actual value, and, once + * mapped, nifp->ni_bufs_head will be the index of the first buffer. + * + * The buffers are linked to each other using the first uint32_t + * as the index. On close, ni_bufs_head must point to the list of + * buffers to be released. + * + * + NIOCREGIF can request space for extra rings (and buffers) + * allocated in the same memory space. The number of extra rings + * is in nr_arg1, and is advisory. This is a no-op on NICs where + * the size of the memory space is fixed. + * + * + NIOCREGIF can attach to PIPE rings sharing the same memory + * space with a parent device. The ifname indicates the parent device, + * which must already exist. Flags in nr_flags indicate if we want to + * bind the master or slave side, the index (from nr_ringid) + * is just a cookie and does not need to be sequential. + * + * + NIOCREGIF can also attach to 'monitor' rings that replicate + * the content of specific rings, also from the same memory space. + * + * Extra flags in nr_flags support the above functions. + * Application libraries may use the following naming scheme: + * netmap:foo all NIC ring pairs + * netmap:foo^ only host ring pair + * netmap:foo+ all NIC ring + host ring pairs + * netmap:foo-k the k-th NIC ring pair + * netmap:foo{k PIPE ring pair k, master side + * netmap:foo}k PIPE ring pair k, slave side + */ + +/* + * struct netmap_slot is a buffer descriptor + */ +struct netmap_slot { + uint32_t buf_idx; /* buffer index */ + uint16_t len; /* length for this slot */ + uint16_t flags; /* buf changed, etc. */ + uint64_t ptr; /* pointer for indirect buffers */ +}; + +/* + * The following flags control how the slot is used + */ + +#define NS_BUF_CHANGED 0x0001 /* buf_idx changed */ + /* + * must be set whenever buf_idx is changed (as it might be + * necessary to recompute the physical address and mapping) + * + * It is also set by the kernel whenever the buf_idx is + * changed internally (e.g., by pipes). Applications may + * use this information to know when they can reuse the + * contents of previously prepared buffers. + */ + +#define NS_REPORT 0x0002 /* ask the hardware to report results */ + /* + * Request notification when slot is used by the hardware. + * Normally transmit completions are handled lazily and + * may be unreported. This flag lets us know when a slot + * has been sent (e.g. to terminate the sender). + */ + +#define NS_FORWARD 0x0004 /* pass packet 'forward' */ + /* + * (Only for physical ports, rx rings with NR_FORWARD set). + * Slot released to the kernel (i.e. before ring->head) with + * this flag set are passed to the peer ring (host/NIC), + * thus restoring the host-NIC connection for these slots. + * This supports efficient traffic monitoring or firewalling. + */ + +#define NS_NO_LEARN 0x0008 /* disable bridge learning */ + /* + * On a VALE switch, do not 'learn' the source port for + * this buffer. + */ + +#define NS_INDIRECT 0x0010 /* userspace buffer */ + /* + * (VALE tx rings only) data is in a userspace buffer, + * whose address is in the 'ptr' field in the slot. + */ + +#define NS_MOREFRAG 0x0020 /* packet has more fragments */ + /* + * (VALE ports only) + * Set on all but the last slot of a multi-segment packet. + * The 'len' field refers to the individual fragment. + */ + +#define NS_PORT_SHIFT 8 +#define NS_PORT_MASK (0xff << NS_PORT_SHIFT) + /* + * The high 8 bits of the flag, if not zero, indicate the + * destination port for the VALE switch, overriding + * the lookup table. + */ + +#define NS_RFRAGS(_slot) ( ((_slot)->flags >> 8) & 0xff) + /* + * (VALE rx rings only) the high 8 bits + * are the number of fragments. + */ + + +/* + * struct netmap_ring + * + * Netmap representation of a TX or RX ring (also known as "queue"). + * This is a queue implemented as a fixed-size circular array. + * At the software level the important fields are: head, cur, tail. + * + * In TX rings: + * + * head first slot available for transmission. + * cur wakeup point. select() and poll() will unblock + * when 'tail' moves past 'cur' + * tail (readonly) first slot reserved to the kernel + * + * [head .. tail-1] can be used for new packets to send; + * 'head' and 'cur' must be incremented as slots are filled + * with new packets to be sent; + * 'cur' can be moved further ahead if we need more space + * for new transmissions. XXX todo (2014-03-12) + * + * In RX rings: + * + * head first valid received packet + * cur wakeup point. select() and poll() will unblock + * when 'tail' moves past 'cur' + * tail (readonly) first slot reserved to the kernel + * + * [head .. tail-1] contain received packets; + * 'head' and 'cur' must be incremented as slots are consumed + * and can be returned to the kernel; + * 'cur' can be moved further ahead if we want to wait for + * new packets without returning the previous ones. + * + * DATA OWNERSHIP/LOCKING: + * The netmap_ring, and all slots and buffers in the range + * [head .. tail-1] are owned by the user program; + * the kernel only accesses them during a netmap system call + * and in the user thread context. + * + * Other slots and buffers are reserved for use by the kernel + */ +struct netmap_ring { + /* + * buf_ofs is meant to be used through macros. + * It contains the offset of the buffer region from this + * descriptor. + */ + const int64_t buf_ofs; + const uint32_t num_slots; /* number of slots in the ring. */ + const uint32_t nr_buf_size; + const uint16_t ringid; + const uint16_t dir; /* 0: tx, 1: rx */ + + uint32_t head; /* (u) first user slot */ + uint32_t cur; /* (u) wakeup point */ + uint32_t tail; /* (k) first kernel slot */ + + uint32_t flags; + + struct timeval ts; /* (k) time of last *sync() */ + + /* opaque room for a mutex or similar object */ +#if !defined(_WIN32) || defined(__CYGWIN__) + uint8_t __attribute__((__aligned__(NM_CACHE_ALIGN))) sem[128]; +#else + uint8_t __declspec(align(NM_CACHE_ALIGN)) sem[128]; +#endif + + /* the slots follow. This struct has variable size */ + struct netmap_slot slot[0]; /* array of slots. */ +}; + + +/* + * RING FLAGS + */ +#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */ + /* + * updates the 'ts' field on each netmap syscall. This saves + * saves a separate gettimeofday(), and is not much worse than + * software timestamps generated in the interrupt handler. + */ + +#define NR_FORWARD 0x0004 /* enable NS_FORWARD for ring */ + /* + * Enables the NS_FORWARD slot flag for the ring. + */ + + +/* + * Netmap representation of an interface and its queue(s). + * This is initialized by the kernel when binding a file + * descriptor to a port, and should be considered as readonly + * by user programs. The kernel never uses it. + * + * There is one netmap_if for each file descriptor on which we want + * to select/poll. + * select/poll operates on one or all pairs depending on the value of + * nmr_queueid passed on the ioctl. + */ +struct netmap_if { + char ni_name[IFNAMSIZ]; /* name of the interface. */ + const uint32_t ni_version; /* API version, currently unused */ + const uint32_t ni_flags; /* properties */ +#define NI_PRIV_MEM 0x1 /* private memory region */ + + /* + * The number of packet rings available in netmap mode. + * Physical NICs can have different numbers of tx and rx rings. + * Physical NICs also have a 'host' ring pair. + * Additionally, clients can request additional ring pairs to + * be used for internal communication. + */ + const uint32_t ni_tx_rings; /* number of HW tx rings */ + const uint32_t ni_rx_rings; /* number of HW rx rings */ + + uint32_t ni_bufs_head; /* head index for extra bufs */ + uint32_t ni_spare1[5]; + /* + * The following array contains the offset of each netmap ring + * from this structure, in the following order: + * NIC tx rings (ni_tx_rings); host tx ring (1); extra tx rings; + * NIC rx rings (ni_rx_rings); host tx ring (1); extra rx rings. + * + * The area is filled up by the kernel on NIOCREGIF, + * and then only read by userspace code. + */ + const ssize_t ring_ofs[0]; +}; + + +#ifndef NIOCREGIF +/* + * ioctl names and related fields + * + * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues, + * whose identity is set in NIOCREGIF through nr_ringid. + * These are non blocking and take no argument. + * + * NIOCGINFO takes a struct ifreq, the interface name is the input, + * the outputs are number of queues and number of descriptor + * for each queue (useful to set number of threads etc.). + * The info returned is only advisory and may change before + * the interface is bound to a file descriptor. + * + * NIOCREGIF takes an interface name within a struct nmre, + * and activates netmap mode on the interface (if possible). + * + * The argument to NIOCGINFO/NIOCREGIF overlays struct ifreq so we + * can pass it down to other NIC-related ioctls. + * + * The actual argument (struct nmreq) has a number of options to request + * different functions. + * The following are used in NIOCREGIF when nr_cmd == 0: + * + * nr_name (in) + * The name of the port (em0, valeXXX:YYY, etc.) + * limited to IFNAMSIZ for backward compatibility. + * + * nr_version (in/out) + * Must match NETMAP_API as used in the kernel, error otherwise. + * Always returns the desired value on output. + * + * nr_tx_slots, nr_tx_slots, nr_tx_rings, nr_rx_rings (in/out) + * On input, non-zero values may be used to reconfigure the port + * according to the requested values, but this is not guaranteed. + * On output the actual values in use are reported. + * + * nr_ringid (in) + * Indicates how rings should be bound to the file descriptors. + * If nr_flags != 0, then the low bits (in NETMAP_RING_MASK) + * are used to indicate the ring number, and nr_flags specifies + * the actual rings to bind. NETMAP_NO_TX_POLL is unaffected. + * + * NOTE: THE FOLLOWING (nr_flags == 0) IS DEPRECATED: + * If nr_flags == 0, NETMAP_HW_RING and NETMAP_SW_RING control + * the binding as follows: + * 0 (default) binds all physical rings + * NETMAP_HW_RING | ring number binds a single ring pair + * NETMAP_SW_RING binds only the host tx/rx rings + * + * NETMAP_NO_TX_POLL can be OR-ed to make select()/poll() push + * packets on tx rings only if POLLOUT is set. + * The default is to push any pending packet. + * + * NETMAP_DO_RX_POLL can be OR-ed to make select()/poll() release + * packets on rx rings also when POLLIN is NOT set. + * The default is to touch the rx ring only with POLLIN. + * Note that this is the opposite of TX because it + * reflects the common usage. + * + * NOTE: NETMAP_PRIV_MEM IS DEPRECATED, use nr_arg2 instead. + * NETMAP_PRIV_MEM is set on return for ports that do not use + * the global memory allocator. + * This information is not significant and applications + * should look at the region id in nr_arg2 + * + * nr_flags is the recommended mode to indicate which rings should + * be bound to a file descriptor. Values are NR_REG_* + * + * nr_arg1 (in) The number of extra rings to be reserved. + * Especially when allocating a VALE port the system only + * allocates the amount of memory needed for the port. + * If more shared memory rings are desired (e.g. for pipes), + * the first invocation for the same basename/allocator + * should specify a suitable number. Memory cannot be + * extended after the first allocation without closing + * all ports on the same region. + * + * nr_arg2 (in/out) The identity of the memory region used. + * On input, 0 means the system decides autonomously, + * other values may try to select a specific region. + * On return the actual value is reported. + * Region '1' is the global allocator, normally shared + * by all interfaces. Other values are private regions. + * If two ports the same region zero-copy is possible. + * + * nr_arg3 (in/out) number of extra buffers to be allocated. + * + * + * + * nr_cmd (in) if non-zero indicates a special command: + * NETMAP_BDG_ATTACH and nr_name = vale*:ifname + * attaches the NIC to the switch; nr_ringid specifies + * which rings to use. Used by vale-ctl -a ... + * nr_arg1 = NETMAP_BDG_HOST also attaches the host port + * as in vale-ctl -h ... + * + * NETMAP_BDG_DETACH and nr_name = vale*:ifname + * disconnects a previously attached NIC. + * Used by vale-ctl -d ... + * + * NETMAP_BDG_LIST + * list the configuration of VALE switches. + * + * NETMAP_BDG_VNET_HDR + * Set the virtio-net header length used by the client + * of a VALE switch port. + * + * NETMAP_BDG_NEWIF + * create a persistent VALE port with name nr_name. + * Used by vale-ctl -n ... + * + * NETMAP_BDG_DELIF + * delete a persistent VALE port. Used by vale-ctl -d ... + * + * nr_arg1, nr_arg2, nr_arg3 (in/out) command specific + * + * + * + */ + + +/* + * struct nmreq overlays a struct ifreq (just the name) + */ +struct nmreq { + char nr_name[IFNAMSIZ]; + uint32_t nr_version; /* API version */ + uint32_t nr_offset; /* nifp offset in the shared region */ + uint32_t nr_memsize; /* size of the shared region */ + uint32_t nr_tx_slots; /* slots in tx rings */ + uint32_t nr_rx_slots; /* slots in rx rings */ + uint16_t nr_tx_rings; /* number of tx rings */ + uint16_t nr_rx_rings; /* number of rx rings */ + + uint16_t nr_ringid; /* ring(s) we care about */ +#define NETMAP_HW_RING 0x4000 /* single NIC ring pair */ +#define NETMAP_SW_RING 0x2000 /* only host ring pair */ + +#define NETMAP_RING_MASK 0x0fff /* the ring number */ + +#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */ + +#define NETMAP_DO_RX_POLL 0x8000 /* DO automatic rxsync on poll */ + + uint16_t nr_cmd; +#define NETMAP_BDG_ATTACH 1 /* attach the NIC */ +#define NETMAP_BDG_DETACH 2 /* detach the NIC */ +#define NETMAP_BDG_REGOPS 3 /* register bridge callbacks */ +#define NETMAP_BDG_LIST 4 /* get bridge's info */ +#define NETMAP_BDG_VNET_HDR 5 /* set the port virtio-net-hdr length */ +#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */ +#define NETMAP_BDG_NEWIF 6 /* create a virtual port */ +#define NETMAP_BDG_DELIF 7 /* destroy a virtual port */ +#define NETMAP_PT_HOST_CREATE 8 /* create ptnetmap kthreads */ +#define NETMAP_PT_HOST_DELETE 9 /* delete ptnetmap kthreads */ +#define NETMAP_BDG_POLLING_ON 10 /* delete polling kthread */ +#define NETMAP_BDG_POLLING_OFF 11 /* delete polling kthread */ +#define NETMAP_VNET_HDR_GET 12 /* get the port virtio-net-hdr length */ + uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */ +#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */ + + uint16_t nr_arg2; + uint32_t nr_arg3; /* req. extra buffers in NIOCREGIF */ + uint32_t nr_flags; + /* various modes, extends nr_ringid */ + uint32_t spare2[1]; +}; + +#define NR_REG_MASK 0xf /* values for nr_flags */ +enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */ + NR_REG_ALL_NIC = 1, + NR_REG_SW = 2, + NR_REG_NIC_SW = 3, + NR_REG_ONE_NIC = 4, + NR_REG_PIPE_MASTER = 5, + NR_REG_PIPE_SLAVE = 6, +}; +/* monitor uses the NR_REG to select the rings to monitor */ +#define NR_MONITOR_TX 0x100 +#define NR_MONITOR_RX 0x200 +#define NR_ZCOPY_MON 0x400 +/* request exclusive access to the selected rings */ +#define NR_EXCLUSIVE 0x800 +/* request ptnetmap host support */ +#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */ +#define NR_PTNETMAP_HOST 0x1000 +#define NR_RX_RINGS_ONLY 0x2000 +#define NR_TX_RINGS_ONLY 0x4000 +/* Applications set this flag if they are able to deal with virtio-net headers, + * that is send/receive frames that start with a virtio-net header. + * If not set, NIOCREGIF will fail with netmap ports that require applications + * to use those headers. If the flag is set, the application can use the + * NETMAP_VNET_HDR_GET command to figure out the header length. */ +#define NR_ACCEPT_VNET_HDR 0x8000 + + +/* + * Windows does not have _IOWR(). _IO(), _IOW() and _IOR() are defined + * in ws2def.h but not sure if they are in the form we need. + * XXX so we redefine them + * in a convenient way to use for DeviceIoControl signatures + */ +#ifdef _WIN32 +#undef _IO // ws2def.h +#define _WIN_NM_IOCTL_TYPE 40000 +#define _IO(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ + METHOD_BUFFERED, FILE_ANY_ACCESS ) +#define _IO_direct(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \ + METHOD_OUT_DIRECT, FILE_ANY_ACCESS ) + +#define _IOWR(_c, _n, _s) _IO(_c, _n) + +/* We havesome internal sysctl in addition to the externally visible ones */ +#define NETMAP_MMAP _IO_direct('i', 160) // note METHOD_OUT_DIRECT +#define NETMAP_POLL _IO('i', 162) + +/* and also two setsockopt for sysctl emulation */ +#define NETMAP_SETSOCKOPT _IO('i', 140) +#define NETMAP_GETSOCKOPT _IO('i', 141) + + +//These linknames are for the Netmap Core Driver +#define NETMAP_NT_DEVICE_NAME L"\\Device\\NETMAP" +#define NETMAP_DOS_DEVICE_NAME L"\\DosDevices\\netmap" + +//Definition of a structure used to pass a virtual address within an IOCTL +typedef struct _MEMORY_ENTRY { + PVOID pUsermodeVirtualAddress; +} MEMORY_ENTRY, *PMEMORY_ENTRY; + +typedef struct _POLL_REQUEST_DATA { + int events; + int timeout; + int revents; +} POLL_REQUEST_DATA; + +#endif /* _WIN32 */ + +/* + * FreeBSD uses the size value embedded in the _IOWR to determine + * how much to copy in/out. So we need it to match the actual + * data structure we pass. We put some spares in the structure + * to ease compatibility with other versions + */ +#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */ +#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */ +#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */ +#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */ +#define NIOCCONFIG _IOWR('i',150, struct nm_ifreq) /* for ext. modules */ +#endif /* !NIOCREGIF */ + + +/* + * Helper functions for kernel and userspace + */ + +/* + * check if space is available in the ring. + */ +static inline int +nm_ring_empty(struct netmap_ring *ring) +{ + return (ring->cur == ring->tail); +} + +/* + * Opaque structure that is passed to an external kernel + * module via ioctl(fd, NIOCCONFIG, req) for a user-owned + * bridge port (at this point ephemeral VALE interface). + */ +#define NM_IFRDATA_LEN 256 +struct nm_ifreq { + char nifr_name[IFNAMSIZ]; + char data[NM_IFRDATA_LEN]; +}; + +/* + * netmap kernel thread configuration + */ +/* bhyve/vmm.ko MSIX parameters for IOCTL */ +struct ptn_vmm_ioctl_msix { + uint64_t msg; + uint64_t addr; +}; + +/* IOCTL parameters */ +struct nm_kth_ioctl { + u_long com; + /* TODO: use union */ + union { + struct ptn_vmm_ioctl_msix msix; + } data; +}; + +/* Configuration of a ptnetmap ring */ +struct ptnet_ring_cfg { + uint64_t ioeventfd; /* eventfd in linux, tsleep() parameter in FreeBSD */ + uint64_t irqfd; /* eventfd in linux, ioctl fd in FreeBSD */ + struct nm_kth_ioctl ioctl; /* ioctl parameter to send irq (only used in bhyve/FreeBSD) */ +}; +#endif /* _NET_NETMAP_H_ */ diff --git a/src/plugins/netmap/netmap.api b/src/plugins/netmap/netmap.api new file mode 100644 index 00000000000..a14753cad9c --- /dev/null +++ b/src/plugins/netmap/netmap.api @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2015-2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +option version = "1.0.0"; + +/** \brief Create netmap + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param netmap_if_name - interface name + @param hw_addr - interface MAC + @param use_random_hw_addr - use random generated MAC + @param is_pipe - is pipe + @param is_master - 0=slave, 1=master +*/ +autoreply define netmap_create +{ + u32 client_index; + u32 context; + + u8 netmap_if_name[64]; + u8 hw_addr[6]; + u8 use_random_hw_addr; + u8 is_pipe; + u8 is_master; +}; + +/** \brief Delete netmap + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param netmap_if_name - interface name +*/ +autoreply define netmap_delete +{ + u32 client_index; + u32 context; + + u8 netmap_if_name[64]; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/netmap.c b/src/plugins/netmap/netmap.c new file mode 100644 index 00000000000..ebef215eb3b --- /dev/null +++ b/src/plugins/netmap/netmap.c @@ -0,0 +1,334 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <fcntl.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> + +#include <netmap/net_netmap.h> +#include <netmap/netmap.h> +#include <netmap/netmap.api_enum.h> +#include <netmap/netmap.api_types.h> + +netmap_main_t netmap_main; + +static clib_error_t * +netmap_fd_read_ready (clib_file_t * uf) +{ + vlib_main_t *vm = vlib_get_main (); + netmap_main_t *nm = &netmap_main; + u32 idx = uf->private_data; + + nm->pending_input_bitmap = + clib_bitmap_set (nm->pending_input_bitmap, idx, 1); + + /* Schedule the rx node */ + vlib_node_set_interrupt_pending (vm, netmap_input_node.index); + + return 0; +} + +static void +close_netmap_if (netmap_main_t * nm, netmap_if_t * nif) +{ + if (nif->clib_file_index != ~0) + { + clib_file_del (&file_main, file_main.file_pool + nif->clib_file_index); + nif->clib_file_index = ~0; + } + else if (nif->fd > -1) + close (nif->fd); + + if (nif->mem_region) + { + netmap_mem_region_t *reg = &nm->mem_regions[nif->mem_region]; + if (--reg->refcnt == 0) + { + munmap (reg->mem, reg->region_size); + reg->region_size = 0; + } + } + + + mhash_unset (&nm->if_index_by_host_if_name, nif->host_if_name, + &nif->if_index); + vec_free (nif->host_if_name); + vec_free (nif->req); + + clib_memset (nif, 0, sizeof (*nif)); + pool_put (nm->interfaces, nif); +} + +int +netmap_worker_thread_enable () +{ + /* if worker threads are enabled, switch to polling mode */ + foreach_vlib_main () + { + vlib_node_set_state (this_vlib_main, netmap_input_node.index, + VLIB_NODE_STATE_POLLING); + } + + return 0; +} + +int +netmap_worker_thread_disable () +{ + foreach_vlib_main () + { + vlib_node_set_state (this_vlib_main, netmap_input_node.index, + VLIB_NODE_STATE_INTERRUPT); + } + + return 0; +} + +int +netmap_create_if (vlib_main_t * vm, u8 * if_name, u8 * hw_addr_set, + u8 is_pipe, u8 is_master, u32 * sw_if_index) +{ + netmap_main_t *nm = &netmap_main; + int ret = 0; + uint32_t nr_reg; + netmap_if_t *nif = 0; + u8 hw_addr[6]; + vnet_sw_interface_t *sw; + vnet_main_t *vnm = vnet_get_main (); + uword *p; + struct nmreq *req = 0; + netmap_mem_region_t *reg; + vlib_thread_main_t *tm = vlib_get_thread_main (); + int fd; + + p = mhash_get (&nm->if_index_by_host_if_name, if_name); + if (p) + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + + fd = open ("/dev/netmap", O_RDWR); + if (fd < 0) + return VNET_API_ERROR_SUBIF_ALREADY_EXISTS; + + pool_get (nm->interfaces, nif); + nif->if_index = nif - nm->interfaces; + nif->fd = fd; + nif->clib_file_index = ~0; + + vec_validate (req, 0); + nif->req = req; + req->nr_version = NETMAP_API; + req->nr_flags = NR_REG_ALL_NIC; + + if (is_pipe) + req->nr_flags = is_master ? NR_REG_PIPE_MASTER : NR_REG_PIPE_SLAVE; + else + req->nr_flags = NR_REG_ALL_NIC; + + req->nr_flags |= NR_ACCEPT_VNET_HDR; + snprintf (req->nr_name, IFNAMSIZ, "%s", if_name); + req->nr_name[IFNAMSIZ - 1] = 0; + + if (ioctl (nif->fd, NIOCREGIF, req)) + { + ret = VNET_API_ERROR_NOT_CONNECTED; + goto error; + } + + nif->mem_region = req->nr_arg2; + vec_validate (nm->mem_regions, nif->mem_region); + reg = &nm->mem_regions[nif->mem_region]; + if (reg->region_size == 0) + { + reg->mem = mmap (NULL, req->nr_memsize, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + clib_warning ("mem %p", reg->mem); + if (reg->mem == MAP_FAILED) + { + ret = VNET_API_ERROR_NOT_CONNECTED; + goto error; + } + reg->region_size = req->nr_memsize; + } + reg->refcnt++; + + nif->nifp = NETMAP_IF (reg->mem, req->nr_offset); + nr_reg = nif->req->nr_flags & NR_REG_MASK; + + if (nr_reg == NR_REG_SW) + { /* host stack */ + nif->first_tx_ring = nif->last_tx_ring = nif->req->nr_tx_rings; + nif->first_rx_ring = nif->last_rx_ring = nif->req->nr_rx_rings; + } + else if (nr_reg == NR_REG_ALL_NIC) + { /* only nic */ + nif->first_tx_ring = 0; + nif->first_rx_ring = 0; + nif->last_tx_ring = nif->req->nr_tx_rings - 1; + nif->last_rx_ring = nif->req->nr_rx_rings - 1; + } + else if (nr_reg == NR_REG_NIC_SW) + { + nif->first_tx_ring = 0; + nif->first_rx_ring = 0; + nif->last_tx_ring = nif->req->nr_tx_rings; + nif->last_rx_ring = nif->req->nr_rx_rings; + } + else if (nr_reg == NR_REG_ONE_NIC) + { + /* XXX check validity */ + nif->first_tx_ring = nif->last_tx_ring = nif->first_rx_ring = + nif->last_rx_ring = nif->req->nr_ringid & NETMAP_RING_MASK; + } + else + { /* pipes */ + nif->first_tx_ring = nif->last_tx_ring = 0; + nif->first_rx_ring = nif->last_rx_ring = 0; + } + + nif->host_if_name = if_name; + nif->per_interface_next_index = ~0; + + if (tm->n_vlib_mains > 1) + clib_spinlock_init (&nif->lockp); + + { + clib_file_t template = { 0 }; + template.read_function = netmap_fd_read_ready; + template.file_descriptor = nif->fd; + template.private_data = nif->if_index; + template.description = format (0, "netmap socket"); + nif->clib_file_index = clib_file_add (&file_main, &template); + } + + /*use configured or generate random MAC address */ + if (hw_addr_set) + memcpy (hw_addr, hw_addr_set, 6); + else + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + + memcpy (hw_addr + 2, &rnd, sizeof (rnd)); + hw_addr[0] = 2; + hw_addr[1] = 0xfe; + } + + vnet_eth_interface_registration_t eir = {}; + + eir.dev_class_index = netmap_device_class.index; + eir.dev_instance = nif->if_index; + eir.address = hw_addr; + eir.cb.set_max_frame_size = NULL; + + nif->hw_if_index = vnet_eth_register_interface (vnm, &eir); + + sw = vnet_get_hw_sw_interface (vnm, nif->hw_if_index); + nif->sw_if_index = sw->sw_if_index; + + mhash_set_mem (&nm->if_index_by_host_if_name, if_name, &nif->if_index, 0); + + if (sw_if_index) + *sw_if_index = nif->sw_if_index; + + if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 1) + netmap_worker_thread_enable (); + + return 0; + +error: + close_netmap_if (nm, nif); + return ret; +} + +int +netmap_delete_if (vlib_main_t * vm, u8 * host_if_name) +{ + vnet_main_t *vnm = vnet_get_main (); + netmap_main_t *nm = &netmap_main; + netmap_if_t *nif; + uword *p; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + p = mhash_get (&nm->if_index_by_host_if_name, host_if_name); + if (p == NULL) + { + clib_warning ("Host interface %s does not exist", host_if_name); + return VNET_API_ERROR_SYSCALL_ERROR_1; + } + nif = pool_elt_at_index (nm->interfaces, p[0]); + + /* bring down the interface */ + vnet_hw_interface_set_flags (vnm, nif->hw_if_index, 0); + + ethernet_delete_interface (vnm, nif->hw_if_index); + + close_netmap_if (nm, nif); + + if (tm->n_vlib_mains > 1 && pool_elts (nm->interfaces) == 0) + netmap_worker_thread_disable (); + + return 0; +} + +static clib_error_t * +netmap_init (vlib_main_t * vm) +{ + netmap_main_t *nm = &netmap_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_thread_registration_t *tr; + uword *p; + + clib_memset (nm, 0, sizeof (netmap_main_t)); + + nm->input_cpu_first_index = 0; + nm->input_cpu_count = 1; + + /* find out which cpus will be used for input */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + nm->input_cpu_first_index = tr->first_index; + nm->input_cpu_count = tr->count; + } + + mhash_init_vec_string (&nm->if_index_by_host_if_name, sizeof (uword)); + + vec_validate_aligned (nm->rx_buffers, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + return 0; +} + +VLIB_INIT_FUNCTION (netmap_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/netmap.h b/src/plugins/netmap/netmap.h new file mode 100644 index 00000000000..29f855fda8e --- /dev/null +++ b/src/plugins/netmap/netmap.h @@ -0,0 +1,166 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ +/* + * Copyright (C) 2011-2014 Universita` di Pisa. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <vppinfra/lock.h> + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + clib_spinlock_t lockp; + u8 *host_if_name; + uword if_index; + u32 hw_if_index; + u32 sw_if_index; + u32 clib_file_index; + + u32 per_interface_next_index; + u8 is_admin_up; + + /* netmap */ + struct nmreq *req; + u16 mem_region; + int fd; + struct netmap_if *nifp; + u16 first_tx_ring; + u16 last_tx_ring; + u16 first_rx_ring; + u16 last_rx_ring; + +} netmap_if_t; + +typedef struct +{ + char *mem; + u32 region_size; + int refcnt; +} netmap_mem_region_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + netmap_if_t *interfaces; + + /* bitmap of pending rx interfaces */ + uword *pending_input_bitmap; + + /* rx buffer cache */ + u32 **rx_buffers; + + /* hash of host interface names */ + mhash_t if_index_by_host_if_name; + + /* vector of memory regions */ + netmap_mem_region_t *mem_regions; + + /* first cpu index */ + u32 input_cpu_first_index; + + /* total cpu count */ + u32 input_cpu_count; +} netmap_main_t; + +extern netmap_main_t netmap_main; +extern vnet_device_class_t netmap_device_class; +extern vlib_node_registration_t netmap_input_node; + +int netmap_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set, + u8 is_pipe, u8 is_master, u32 * sw_if_index); +int netmap_delete_if (vlib_main_t * vm, u8 * host_if_name); + + +/* Macros and helper functions from sys/net/netmap_user.h */ + +#ifdef _NET_NETMAP_H_ + +#define _NETMAP_OFFSET(type, ptr, offset) \ + ((type)(void *)((char *)(ptr) + (offset))) + +#define NETMAP_IF(_base, _ofs) _NETMAP_OFFSET(struct netmap_if *, _base, _ofs) + +#define NETMAP_TXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ + nifp, (nifp)->ring_ofs[index] ) + +#define NETMAP_RXRING(nifp, index) _NETMAP_OFFSET(struct netmap_ring *, \ + nifp, (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) + +#define NETMAP_BUF(ring, index) \ + ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size)) + +#define NETMAP_BUF_IDX(ring, buf) \ + ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \ + (ring)->nr_buf_size ) + +static inline uint32_t +nm_ring_next (struct netmap_ring *ring, uint32_t i) +{ + return (PREDICT_FALSE (i + 1 == ring->num_slots) ? 0 : i + 1); +} + + +/* + * Return 1 if we have pending transmissions in the tx ring. + * When everything is complete ring->head = ring->tail + 1 (modulo ring size) + */ +static inline int +nm_tx_pending (struct netmap_ring *ring) +{ + return nm_ring_next (ring, ring->tail) != ring->head; +} + +static inline uint32_t +nm_ring_space (struct netmap_ring *ring) +{ + int ret = ring->tail - ring->cur; + if (ret < 0) + ret += ring->num_slots; + return ret; +} +#endif + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/netmap_api.c b/src/plugins/netmap/netmap_api.c new file mode 100644 index 00000000000..51f572a23e6 --- /dev/null +++ b/src/plugins/netmap/netmap_api.c @@ -0,0 +1,95 @@ +/* + *------------------------------------------------------------------ + * netmap_api.c - netmap api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> + +#include <vnet/interface.h> +#include <vnet/api_errno.h> +#include <netmap/netmap.h> + +#include <vnet/format_fns.h> +#include <netmap/netmap.api_enum.h> +#include <netmap/netmap.api_types.h> + +#include <vlibapi/api_helper_macros.h> + +#define foreach_vpe_api_msg \ +_(NETMAP_CREATE, netmap_create) \ +_(NETMAP_DELETE, netmap_delete) \ + +static void +vl_api_netmap_create_t_handler (vl_api_netmap_create_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_netmap_create_reply_t *rmp; + int rv = 0; + u8 *if_name = NULL; + + if_name = format (0, "%s", mp->netmap_if_name); + vec_add1 (if_name, 0); + + rv = + netmap_create_if (vm, if_name, mp->use_random_hw_addr ? 0 : mp->hw_addr, + mp->is_pipe, mp->is_master, 0); + + vec_free (if_name); + + REPLY_MACRO (VL_API_NETMAP_CREATE_REPLY); +} + +static void +vl_api_netmap_delete_t_handler (vl_api_netmap_delete_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_netmap_delete_reply_t *rmp; + int rv = 0; + u8 *if_name = NULL; + + if_name = format (0, "%s", mp->netmap_if_name); + vec_add1 (if_name, 0); + + rv = netmap_delete_if (vm, if_name); + + vec_free (if_name); + + REPLY_MACRO (VL_API_NETMAP_DELETE_REPLY); +} + +#include <netmap/netmap.api.c> +static clib_error_t * +netmap_api_hookup (vlib_main_t * vm) +{ + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (); + + return 0; +} + +VLIB_API_INIT_FUNCTION (netmap_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/node.c b/src/plugins/netmap/node.c new file mode 100644 index 00000000000..6169847fa79 --- /dev/null +++ b/src/plugins/netmap/node.c @@ -0,0 +1,295 @@ +/* + *------------------------------------------------------------------ + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdint.h> +#include <net/if.h> +#include <sys/ioctl.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/devices/devices.h> +#include <vnet/feature/feature.h> + +#include <netmap/net_netmap.h> +#include <netmap/netmap.h> + +#define foreach_netmap_input_error + +typedef enum +{ +#define _(f,s) NETMAP_INPUT_ERROR_##f, + foreach_netmap_input_error +#undef _ + NETMAP_INPUT_N_ERROR, +} netmap_input_error_t; + +static char *netmap_input_error_strings[] = { +#define _(n,s) s, + foreach_netmap_input_error +#undef _ +}; + +typedef struct +{ + u32 next_index; + u32 hw_if_index; + struct netmap_slot slot; +} netmap_input_trace_t; + +static u8 * +format_netmap_input_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + netmap_input_trace_t *t = va_arg (*args, netmap_input_trace_t *); + u32 indent = format_get_indent (s); + + s = format (s, "netmap: hw_if_index %d next-index %d", + t->hw_if_index, t->next_index); + s = format (s, "\n%Uslot: flags 0x%x len %u buf_idx %u", + format_white_space, indent + 2, + t->slot.flags, t->slot.len, t->slot.buf_idx); + return s; +} + +always_inline void +buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi) +{ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi); + vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi); + + /* update first buffer */ + first_b->total_length_not_including_first_buffer += b->current_length; + + /* update previous buffer */ + prev_b->next_buffer = bi; + prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT; + + /* update current buffer */ + b->next_buffer = 0; +} + +always_inline uword +netmap_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, netmap_if_t * nif) +{ + u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + uword n_trace = vlib_get_trace_count (vm, node); + netmap_main_t *nm = &netmap_main; + u32 n_rx_packets = 0; + u32 n_rx_bytes = 0; + u32 *to_next = 0; + u32 n_free_bufs; + struct netmap_ring *ring; + int cur_ring; + u32 thread_index = vm->thread_index; + u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); + + if (nif->per_interface_next_index != ~0) + next_index = nif->per_interface_next_index; + + n_free_bufs = vec_len (nm->rx_buffers[thread_index]); + if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE)) + { + vec_validate (nm->rx_buffers[thread_index], + VLIB_FRAME_SIZE + n_free_bufs - 1); + n_free_bufs += + vlib_buffer_alloc (vm, &nm->rx_buffers[thread_index][n_free_bufs], + VLIB_FRAME_SIZE); + vec_set_len (nm->rx_buffers[thread_index], n_free_bufs); + } + + cur_ring = nif->first_rx_ring; + while (cur_ring <= nif->last_rx_ring && n_free_bufs) + { + int r = 0; + u32 cur_slot_index; + ring = NETMAP_RXRING (nif->nifp, cur_ring); + r = nm_ring_space (ring); + + if (!r) + { + cur_ring++; + continue; + } + + if (r > n_free_bufs) + r = n_free_bufs; + + cur_slot_index = ring->cur; + while (r) + { + u32 n_left_to_next; + u32 next0 = next_index; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (r && n_left_to_next) + { + vlib_buffer_t *first_b0 = 0; + u32 offset = 0; + u32 bi0 = 0, first_bi0 = 0, prev_bi0; + u32 next_slot_index = (cur_slot_index + 1) % ring->num_slots; + u32 next2_slot_index = (cur_slot_index + 2) % ring->num_slots; + struct netmap_slot *slot = &ring->slot[cur_slot_index]; + u32 data_len = slot->len; + + /* prefetch 2 slots in advance */ + CLIB_PREFETCH (&ring->slot[next2_slot_index], + CLIB_CACHE_LINE_BYTES, LOAD); + /* prefetch start of next packet */ + CLIB_PREFETCH (NETMAP_BUF + (ring, ring->slot[next_slot_index].buf_idx), + CLIB_CACHE_LINE_BYTES, LOAD); + + while (data_len && n_free_bufs) + { + vlib_buffer_t *b0; + /* grab free buffer */ + u32 last_empty_buffer = + vec_len (nm->rx_buffers[thread_index]) - 1; + prev_bi0 = bi0; + bi0 = nm->rx_buffers[thread_index][last_empty_buffer]; + b0 = vlib_get_buffer (vm, bi0); + vec_set_len (nm->rx_buffers[thread_index], + last_empty_buffer); + n_free_bufs--; + + /* copy data */ + u32 bytes_to_copy = + data_len > n_buffer_bytes ? n_buffer_bytes : data_len; + b0->current_data = 0; + clib_memcpy_fast (vlib_buffer_get_current (b0), + (u8 *) NETMAP_BUF (ring, slot->buf_idx) + + offset, bytes_to_copy); + + /* fill buffer header */ + b0->current_length = bytes_to_copy; + + if (offset == 0) + { + b0->total_length_not_including_first_buffer = 0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + nif->sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + first_bi0 = bi0; + first_b0 = vlib_get_buffer (vm, first_bi0); + } + else + buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0); + + offset += bytes_to_copy; + data_len -= bytes_to_copy; + } + + /* trace */ + if (PREDICT_FALSE (n_trace > 0)) + { + if (PREDICT_TRUE (first_b0 != 0) && + vlib_trace_buffer (vm, node, next0, first_b0, + /* follow_chain */ 0)) + { + netmap_input_trace_t *tr; + + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); + tr->next_index = next0; + tr->hw_if_index = nif->hw_if_index; + memcpy (&tr->slot, slot, sizeof (struct netmap_slot)); + } + } + + /* enque and take next packet */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, first_bi0, + next0); + + /* next packet */ + n_rx_packets++; + n_rx_bytes += slot->len; + to_next[0] = first_bi0; + to_next += 1; + n_left_to_next--; + cur_slot_index = next_slot_index; + + r--; + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + ring->head = ring->cur = cur_slot_index; + cur_ring++; + } + + if (n_rx_packets) + ioctl (nif->fd, NIOCRXSYNC, NULL); + + vlib_increment_combined_counter + (vnet_get_main ()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + vlib_get_thread_index (), nif->hw_if_index, n_rx_packets, n_rx_bytes); + + vnet_device_increment_rx_packets (thread_index, n_rx_packets); + + return n_rx_packets; +} + +VLIB_NODE_FN (netmap_input_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + int i; + u32 n_rx_packets = 0; + u32 thread_index = vm->thread_index; + netmap_main_t *nm = &netmap_main; + netmap_if_t *nmi; + + for (i = 0; i < vec_len (nm->interfaces); i++) + { + nmi = vec_elt_at_index (nm->interfaces, i); + if (nmi->is_admin_up && + (i % nm->input_cpu_count) == + (thread_index - nm->input_cpu_first_index)) + n_rx_packets += netmap_device_input_fn (vm, node, frame, nmi); + } + + return n_rx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (netmap_input_node) = { + .name = "netmap-input", + .sibling_of = "device-input", + .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, + .format_trace = format_netmap_input_trace, + .type = VLIB_NODE_TYPE_INPUT, + /* default state is INTERRUPT mode, switch to POLLING if worker threads are enabled */ + .state = VLIB_NODE_STATE_INTERRUPT, + .n_errors = NETMAP_INPUT_N_ERROR, + .error_strings = netmap_input_error_strings, +}; +/* *INDENT-ON* */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/netmap/plugin.c b/src/plugins/netmap/plugin.c new file mode 100644 index 00000000000..1673225b683 --- /dev/null +++ b/src/plugins/netmap/plugin.c @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Tom Jones <thj@freebsd.org> + * + * This software was developed by Tom Jones <thj@freebsd.org> under sponsorship + * from the FreeBSD Foundation. + * + */ + +#include <vlib/vlib.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> + +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "netmap", +}; diff --git a/src/plugins/srmpls/CMakeLists.txt b/src/plugins/srmpls/CMakeLists.txt new file mode 100644 index 00000000000..25905d31e1b --- /dev/null +++ b/src/plugins/srmpls/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright (c) 2024 Cisco and/or its affiliates +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_vpp_plugin(srmpls + SOURCES + sr_mpls_policy.c + sr_mpls_steering.c + sr_mpls_api.c + plugin.c + + INSTALL_HEADERS + sr_mpls.h + + API_FILES + sr_mpls.api + + # This might need to be VAT_AUTO_TEST? Not documented + API_TEST_SOURCES + sr_mpls_test.c +) diff --git a/src/vnet/srmpls/FEATURE.yaml b/src/plugins/srmpls/FEATURE.yaml index c5b958224c7..c5b958224c7 100644 --- a/src/vnet/srmpls/FEATURE.yaml +++ b/src/plugins/srmpls/FEATURE.yaml diff --git a/src/vnet/srmpls/dir.dox b/src/plugins/srmpls/dir.dox index 76ec1d6a41b..76ec1d6a41b 100644 --- a/src/vnet/srmpls/dir.dox +++ b/src/plugins/srmpls/dir.dox diff --git a/src/plugins/srmpls/plugin.c b/src/plugins/srmpls/plugin.c new file mode 100644 index 00000000000..af87607764f --- /dev/null +++ b/src/plugins/srmpls/plugin.c @@ -0,0 +1,26 @@ +/* + * plugin.c: srmpls + * + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> + +// register a plugin +VLIB_PLUGIN_REGISTER () = { + .version = VPP_BUILD_VER, + .description = "Segment Routing for MPLS plugin", +}; diff --git a/src/vnet/srmpls/sr_doc.rst b/src/plugins/srmpls/sr_doc.rst index ed847fa0d42..ed847fa0d42 100644 --- a/src/vnet/srmpls/sr_doc.rst +++ b/src/plugins/srmpls/sr_doc.rst diff --git a/src/vnet/srmpls/sr_mpls.api b/src/plugins/srmpls/sr_mpls.api index 742f135d493..742f135d493 100644 --- a/src/vnet/srmpls/sr_mpls.api +++ b/src/plugins/srmpls/sr_mpls.api diff --git a/src/vnet/srmpls/sr_mpls.h b/src/plugins/srmpls/sr_mpls.h index a8f9494428f..a8f9494428f 100644 --- a/src/vnet/srmpls/sr_mpls.h +++ b/src/plugins/srmpls/sr_mpls.h diff --git a/src/vnet/srmpls/sr_mpls_api.c b/src/plugins/srmpls/sr_mpls_api.c index 920856acff6..3e89017dbc1 100644 --- a/src/vnet/srmpls/sr_mpls_api.c +++ b/src/plugins/srmpls/sr_mpls_api.c @@ -17,7 +17,7 @@ */ #include <vnet/vnet.h> -#include <vnet/srmpls/sr_mpls.h> +#include "sr_mpls.h" #include <vlibmemory/api.h> #include <vnet/interface.h> @@ -26,28 +26,27 @@ #include <vnet/ip/ip_types_api.h> #include <vnet/format_fns.h> -#include <vnet/srmpls/sr_mpls.api_enum.h> -#include <vnet/srmpls/sr_mpls.api_types.h> - +#include <plugins/srmpls/sr_mpls.api_enum.h> +#include <plugins/srmpls/sr_mpls.api_types.h> #define vl_api_version(n, v) static u32 api_version = v; -#include <vnet/srmpls/sr_mpls.api.h> +#include <plugins/srmpls/sr_mpls.api.h> #undef vl_api_version #define vl_endianfun -#include <vnet/srmpls/sr_mpls.api.h> +#include <plugins/srmpls/sr_mpls.api.h> #undef vl_endianfun #define vl_calcsizefun -#include <vnet/srmpls/sr_mpls.api.h> +#include <plugins/srmpls/sr_mpls.api.h> #undef vl_calcsizefun #define vl_printfun -#include <vnet/srmpls/sr_mpls.api.h> +#include <plugins/srmpls/sr_mpls.api.h> #undef vl_printfun #define vl_msg_name_crc_list -#include <vnet/srmpls/sr_mpls.api.h> +#include <plugins/srmpls/sr_mpls.api.h> #undef vl_msg_name_crc_list #define REPLY_MSG_ID_BASE msg_id_base diff --git a/src/vnet/srmpls/sr_mpls_policy.c b/src/plugins/srmpls/sr_mpls_policy.c index 41cb71601e9..af24acd8cf6 100644 --- a/src/vnet/srmpls/sr_mpls_policy.c +++ b/src/plugins/srmpls/sr_mpls_policy.c @@ -31,7 +31,7 @@ #include <vlib/vlib.h> #include <vnet/vnet.h> -#include <vnet/srmpls/sr_mpls.h> +#include "sr_mpls.h" #include <vnet/fib/mpls_fib.h> #include <vnet/dpo/dpo.h> #include <vnet/ip/ip.h> diff --git a/src/vnet/srmpls/sr_mpls_steering.c b/src/plugins/srmpls/sr_mpls_steering.c index e8920df542b..24c8b0e2d9f 100644 --- a/src/vnet/srmpls/sr_mpls_steering.c +++ b/src/plugins/srmpls/sr_mpls_steering.c @@ -31,7 +31,7 @@ #include <vlib/vlib.h> #include <vnet/vnet.h> -#include <vnet/srmpls/sr_mpls.h> +#include "sr_mpls.h" #include <vnet/ip/ip4_packet.h> #include <vnet/ip/ip6_packet.h> #include <vnet/fib/mpls_fib.h> diff --git a/src/vnet/srmpls/sr_mpls_test.c b/src/plugins/srmpls/sr_mpls_test.c index e5d68462443..7aff4c32b06 100644 --- a/src/vnet/srmpls/sr_mpls_test.c +++ b/src/plugins/srmpls/sr_mpls_test.c @@ -25,11 +25,11 @@ /* Declare message IDs */ #include <vnet/format_fns.h> -#include <vnet/srmpls/sr_mpls.api_enum.h> -#include <vnet/srmpls/sr_mpls.api_types.h> +#include <plugins/srmpls/sr_mpls.api_enum.h> +#include <plugins/srmpls/sr_mpls.api_types.h> #define vl_endianfun /* define message structures */ -#include <vnet/srmpls/sr_mpls.api.h> +#include <plugins/srmpls/sr_mpls.api.h> #undef vl_endianfun typedef struct @@ -163,7 +163,7 @@ api_sr_mpls_policy_del (vat_main_t *vam) return ret; } -#include <vnet/srmpls/sr_mpls.api_test.c> +#include <plugins/srmpls/sr_mpls.api_test.c> /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/tlsopenssl/tls_openssl.c b/src/plugins/tlsopenssl/tls_openssl.c index 98982439b96..5d172a0adcf 100644 --- a/src/plugins/tlsopenssl/tls_openssl.c +++ b/src/plugins/tlsopenssl/tls_openssl.c @@ -1179,18 +1179,13 @@ int tls_openssl_set_ciphers (char *ciphers) { openssl_main_t *om = &openssl_main; - int i; if (!ciphers) { return -1; } - vec_validate (om->ciphers, strlen (ciphers)); - for (i = 0; i < vec_len (om->ciphers) - 1; i++) - { - om->ciphers[i] = toupper (ciphers[i]); - } + vec_validate_init_c_string (om->ciphers, ciphers, strlen (ciphers)); return 0; diff --git a/src/plugins/unittest/gso_test.c b/src/plugins/unittest/gso_test.c index 54eb7422c87..43c614341d2 100644 --- a/src/plugins/unittest/gso_test.c +++ b/src/plugins/unittest/gso_test.c @@ -96,12 +96,94 @@ GSO_TEST_REGISTER_DATA (gso_ipv6_tcp, static) = { .is_ip6 = 1, }; +/* + * this does not support tunnel packets + */ +static void +set_hdr_offsets (vlib_buffer_t *b0, u8 is_l2) +{ + u16 ethertype = 0, l2hdr_sz = 0; + vnet_buffer_oflags_t oflags = 0; + u8 l4_proto = 0; + + if (!is_l2) + { + switch (b0->data[0] & 0xf0) + { + case 0x40: + ethertype = ETHERNET_TYPE_IP4; + break; + case 0x60: + ethertype = ETHERNET_TYPE_IP6; + break; + } + } + else + { + ethernet_header_t *eh = (ethernet_header_t *) b0->data; + ethertype = clib_net_to_host_u16 (eh->type); + l2hdr_sz = sizeof (ethernet_header_t); + + if (ethernet_frame_is_tagged (ethertype)) + { + ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eh + 1); + + ethertype = clib_net_to_host_u16 (vlan->type); + l2hdr_sz += sizeof (*vlan); + if (ethertype == ETHERNET_TYPE_VLAN) + { + vlan++; + ethertype = clib_net_to_host_u16 (vlan->type); + l2hdr_sz += sizeof (*vlan); + } + } + } + + vnet_buffer (b0)->l2_hdr_offset = 0; + vnet_buffer (b0)->l3_hdr_offset = l2hdr_sz; + + if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4)) + { + ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l2hdr_sz); + vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4); + l4_proto = ip4->protocol; + oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM; + b0->flags |= (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID | + VNET_BUFFER_F_L3_HDR_OFFSET_VALID | + VNET_BUFFER_F_L4_HDR_OFFSET_VALID); + } + else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6)) + { + ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l2hdr_sz); + vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + sizeof (ip6_header_t); + /* FIXME IPv6 EH traversal */ + l4_proto = ip6->protocol; + b0->flags |= (VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID | + VNET_BUFFER_F_L3_HDR_OFFSET_VALID | + VNET_BUFFER_F_L4_HDR_OFFSET_VALID); + } + if (l4_proto == IP_PROTOCOL_TCP) + { + oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM; + } + else if (l4_proto == IP_PROTOCOL_UDP) + { + oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM; + } + if (oflags) + vnet_buffer_offload_flags_set (b0, oflags); +} + static u32 -fill_buffers (vlib_main_t *vm, u32 *buffer_indices, u8 *data, u32 data_size, - u32 n_buffers, u32 buffer_size, u32 packet_size, u32 gso_size, - u32 l4_hdr_len) +fill_buffers (vlib_main_t *vm, u32 *buffer_indices, + gso_test_data_t *gso_test_data, u32 n_buffers, u32 buffer_size, + u32 packet_size, u32 gso_size) { u32 i; + u8 *data = gso_test_data->data; + u32 data_size = gso_test_data->data_size; + u32 l4_hdr_len = gso_test_data->l4_hdr_len; + u8 is_l2 = gso_test_data->is_l2; for (i = 0; i < n_buffers; i++) { @@ -153,6 +235,8 @@ fill_buffers (vlib_main_t *vm, u32 *buffer_indices, u8 *data, u32 data_size, len += fill_data_size; } while (k < n_bufs); + + set_hdr_offsets (b, is_l2); b->flags |= VNET_BUFFER_F_GSO; vnet_buffer2 (b)->gso_size = gso_size; vnet_buffer2 (b)->gso_l4_hdr_sz = l4_hdr_len; @@ -165,17 +249,14 @@ fill_buffers (vlib_main_t *vm, u32 *buffer_indices, u8 *data, u32 data_size, static_always_inline u32 gso_segment_buffer_test (vlib_main_t *vm, u32 bi, - vnet_interface_per_thread_data_t *ptd, u8 is_l2, - u8 is_ip6) + vnet_interface_per_thread_data_t *ptd, u8 is_l2) { vlib_buffer_t *b = vlib_get_buffer (vm, bi); - generic_header_offset_t gho = { 0 }; u32 n_tx_bytes = 0; if (PREDICT_TRUE (b->flags & VNET_BUFFER_F_GSO)) { - vnet_generic_header_offset_parser (b, &gho, is_l2, !is_ip6, is_ip6); - n_tx_bytes = gso_segment_buffer_inline (vm, ptd, b, &gho, is_l2, is_ip6); + n_tx_bytes = gso_segment_buffer_inline (vm, ptd, b, is_l2); } return n_tx_bytes; @@ -237,19 +318,16 @@ test_gso_perf (vlib_main_t *vm, gso_test_main_t *gtm) vlib_buffer_free (vm, buffer_indices, n_alloc); goto done; } - n_filled = - fill_buffers (vm, buffer_indices, gso_test_data->data, - gso_test_data->data_size, n_buffers, buffer_size, - packet_size, gso_size, gso_test_data->l4_hdr_len); + n_filled = fill_buffers (vm, buffer_indices, gso_test_data, n_buffers, + buffer_size, packet_size, gso_size); u8 is_l2 = gso_test_data->is_l2; - u8 is_ip6 = gso_test_data->is_ip6; for (k = 0; k < warmup_rounds; k++) { for (j = 0; j < n_filled; j++) - gso_segment_buffer_test (vm, buffer_indices[j], &ptd[j], is_l2, - is_ip6); + gso_segment_buffer_test (vm, buffer_indices[j], &ptd[j], is_l2); + for (j = 0; j < n_filled; j++) { vlib_buffer_free (vm, ptd[j].split_buffers, @@ -264,8 +342,9 @@ test_gso_perf (vlib_main_t *vm, gso_test_main_t *gtm) { t0 = clib_cpu_time_now (); for (j = 0; j < n_filled; j++) - gso_segment_buffer_test (vm, buffer_indices[j], &ptd[j], is_l2, - is_ip6); + gso_segment_buffer_test (vm, buffer_indices[j], &ptd[j], + is_l2); + t1 = clib_cpu_time_now (); t2[i] += (t1 - t0); for (j = 0; j < n_filled; j++) diff --git a/src/plugins/unittest/policer_test.c b/src/plugins/unittest/policer_test.c index 2b14bf687bf..41f769960a3 100644 --- a/src/plugins/unittest/policer_test.c +++ b/src/plugins/unittest/policer_test.c @@ -21,7 +21,7 @@ policer_test (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd_arg) { int policer_index, i; - uint rate_kbps, burst, num_pkts; + unsigned int rate_kbps, burst, num_pkts; double total_bytes, cpu_ticks_per_pkt, time = 0; double cpu_speed, cpu_ticks_per_byte; policer_result_e result, input_colour = POLICE_CONFORM; diff --git a/src/plugins/wireguard/wireguard_chachapoly.c b/src/plugins/wireguard/wireguard_chachapoly.c index 0dd7908d2e2..ad644ff6cb8 100644 --- a/src/plugins/wireguard/wireguard_chachapoly.c +++ b/src/plugins/wireguard/wireguard_chachapoly.c @@ -72,11 +72,11 @@ wg_xchacha20poly1305_encrypt (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst, u64 h_nonce; clib_memcpy (&h_nonce, nonce + 16, sizeof (h_nonce)); - h_nonce = le64toh (h_nonce); + h_nonce = clib_little_to_host_u64 (h_nonce); hchacha20 (derived_key, nonce, key); for (i = 0; i < (sizeof (derived_key) / sizeof (derived_key[0])); i++) - (derived_key[i]) = htole32 ((derived_key[i])); + (derived_key[i]) = clib_host_to_little_u32 ((derived_key[i])); uint32_t key_idx; @@ -102,11 +102,11 @@ wg_xchacha20poly1305_decrypt (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst, u64 h_nonce; clib_memcpy (&h_nonce, nonce + 16, sizeof (h_nonce)); - h_nonce = le64toh (h_nonce); + h_nonce = clib_little_to_host_u64 (h_nonce); hchacha20 (derived_key, nonce, key); for (i = 0; i < (sizeof (derived_key) / sizeof (derived_key[0])); i++) - (derived_key[i]) = htole32 ((derived_key[i])); + (derived_key[i]) = clib_host_to_little_u32 ((derived_key[i])); uint32_t key_idx; diff --git a/src/plugins/wireguard/wireguard_noise.c b/src/plugins/wireguard/wireguard_noise.c index 5fe2e44b03b..c3f28f442f5 100644 --- a/src/plugins/wireguard/wireguard_noise.c +++ b/src/plugins/wireguard/wireguard_noise.c @@ -751,8 +751,8 @@ noise_tai64n_now (uint8_t output[NOISE_TIMESTAMP_LEN]) unix_nanosec &= REJECT_INTERVAL_MASK; /* https://cr.yp.to/libtai/tai64.html */ - sec = htobe64 (0x400000000000000aULL + unix_sec); - nsec = htobe32 (unix_nanosec); + sec = clib_host_to_big_u64 (0x400000000000000aULL + unix_sec); + nsec = clib_host_to_big_u32 (unix_nanosec); /* memcpy to output buffer, assuming output could be unaligned. */ clib_memcpy (output, &sec, sizeof (sec)); diff --git a/src/tools/vppapigen/vppapigen_c.py b/src/tools/vppapigen/vppapigen_c.py index fb7de0a023f..c2e1e7da7b7 100755 --- a/src/tools/vppapigen/vppapigen_c.py +++ b/src/tools/vppapigen/vppapigen_c.py @@ -365,7 +365,7 @@ class FromJSON: write(" char *p = cJSON_GetStringValue(item);\n") write(" size_t plen = strlen(p);\n") write( - " {msgvar} = cJSON_realloc({msgvar}, {msgsize} + plen, {msgsize});\n".format( + " {msgvar} = cJSON_realloc({msgvar}, {msgsize} + plen);\n".format( msgvar=msgvar, msgsize=msgsize ) ) @@ -434,7 +434,7 @@ class FromJSON: cJSON *array = cJSON_GetObjectItem(o, "{n}"); int size = cJSON_GetArraySize(array); {lfield} = size; - {realloc} = cJSON_realloc({realloc}, {msgsize} + sizeof({t}) * size, {msgsize}); + {realloc} = cJSON_realloc({realloc}, {msgsize} + sizeof({t}) * size); {t} *d = (void *){realloc} + {msgsize}; {msgsize} += sizeof({t}) * size; for (i = 0; i < size; i++) {{ @@ -461,12 +461,12 @@ class FromJSON: write( " {realloc} = cJSON_realloc({realloc}, {msgsize} + " - "vec_len(s), {msgsize});\n".format( + "vec_len(s));\n".format( msgvar=msgvar, msgsize=msgsize, realloc=realloc ) ) write( - " memcpy((void *){realloc} + {msgsize}, s, " + " clib_memcpy((void *){realloc} + {msgsize}, s, " "vec_len(s));\n".format(realloc=realloc, msgsize=msgsize) ) write(" {msgsize} += vec_len(s);\n".format(msgsize=msgsize)) @@ -1143,20 +1143,14 @@ ENDIAN_STRINGS = { } -def get_endian_string(o, type): +def get_endian_string(o, fieldtype): """Return proper endian string conversion function""" - try: - if o.to_network: - return ENDIAN_STRINGS[type].replace("net_to_host", "host_to_net") - except: - pass - return ENDIAN_STRINGS[type] + return ENDIAN_STRINGS[fieldtype] def endianfun_array(o): """Generate endian functions for arrays""" forloop = """\ - {comment} ASSERT((u32){length} <= (u32)VL_API_MAX_ARRAY_SIZE); for (i = 0; i < {length}; i++) {{ a->{name}[i] = {format}(a->{name}[i]); @@ -1165,31 +1159,26 @@ def endianfun_array(o): forloop_format = """\ for (i = 0; i < {length}; i++) {{ - {type}_endian(&a->{name}[i]); + {type}_endian(&a->{name}[i], to_net); }} """ - to_network_comment = "" - try: - if o.to_network: - to_network_comment = """/* - * Array fields processed first to handle variable length arrays and size - * field endian conversion in the proper order for to-network messages. - * Message fields have been sorted by type in the code generator, thus fields - * in this generated code may be converted in a different order than specified - * in the *.api file. - */""" - except: - pass - output = "" if o.fieldtype == "u8" or o.fieldtype == "string" or o.fieldtype == "bool": output += " /* a->{n} = a->{n} (no-op) */\n".format(n=o.fieldname) else: lfield = "a->" + o.lengthfield if o.lengthfield else o.length + if o.lengthfield: + output += ( + f" u32 count = to_net ? clib_host_to_net_u32(a->{o.lengthfield}) : " + f"a->{o.lengthfield};\n" + ) + lfield = "count" + else: + lfield = o.length + if o.fieldtype in ENDIAN_STRINGS: output += forloop.format( - comment=to_network_comment, length=lfield, format=get_endian_string(o, o.fieldtype), name=o.fieldname, @@ -1222,7 +1211,7 @@ def endianfun_obj(o): name=o.fieldname, format=get_endian_string(o, o.fieldtype) ) elif o.fieldtype.startswith("vl_api_"): - output += " {type}_endian(&a->{name});\n".format( + output += " {type}_endian(&a->{name}, to_net);\n".format( type=o.fieldtype, name=o.fieldname ) else: @@ -1254,19 +1243,12 @@ def endianfun(objs, modulename): output = output.format(module=modulename) signature = """\ -static inline void vl_api_{name}_t_endian (vl_api_{name}_t *a) +static inline void vl_api_{name}_t_endian (vl_api_{name}_t *a, bool to_net) {{ int i __attribute__((unused)); """ for t in objs: - # Outbound (to network) messages are identified by message nomenclature - # i.e. message names ending with these suffixes are 'to network' - if t.name.endswith("_reply") or t.name.endswith("_details"): - t.to_network = True - else: - t.to_network = False - if t.__class__.__name__ == "Enum" or t.__class__.__name__ == "EnumFlag": output += signature.format(name=t.name) if t.enumtype in ENDIAN_STRINGS: @@ -1300,15 +1282,7 @@ static inline void vl_api_{name}_t_endian (vl_api_{name}_t *a) output += signature.format(name=t.name) - # For outbound (to network) messages: - # some arrays have dynamic length -- iterate over - # them before changing endianness for the length field - # by making the Array types show up first - if t.to_network: - t.block.sort(key=lambda x: x.type) - for o in t.block: - o.to_network = t.to_network output += endianfun_obj(o) output += "}\n\n" @@ -1852,7 +1826,7 @@ api_{n} (cJSON *o) }} mp->_vl_msg_id = vac_get_msg_index(VL_API_{N}_CRC); - vl_api_{n}_t_endian(mp); + vl_api_{n}_t_endian(mp, 1); vac_write((char *)mp, len); cJSON_free(mp); @@ -1867,7 +1841,7 @@ api_{n} (cJSON *o) return 0; }} vl_api_{r}_t *rmp = (vl_api_{r}_t *)p; - vl_api_{r}_t_endian(rmp); + vl_api_{r}_t_endian(rmp, 0); return vl_api_{r}_t_tojson(rmp); }} @@ -1885,7 +1859,7 @@ api_{n} (cJSON *o) return 0; }} mp->_vl_msg_id = msg_id; - vl_api_{n}_t_endian(mp); + vl_api_{n}_t_endian(mp, 1); vac_write((char *)mp, len); cJSON_free(mp); @@ -1919,7 +1893,7 @@ api_{n} (cJSON *o) return 0; }} vl_api_{r}_t *rmp = (vl_api_{r}_t *)p; - vl_api_{r}_t_endian(rmp); + vl_api_{r}_t_endian(rmp, 0); cJSON_AddItemToArray(reply, vl_api_{r}_t_tojson(rmp)); }} }} @@ -1941,7 +1915,7 @@ api_{n} (cJSON *o) }} mp->_vl_msg_id = msg_id; - vl_api_{n}_t_endian(mp); + vl_api_{n}_t_endian(mp, 1); vac_write((char *)mp, len); cJSON_free(mp); @@ -1962,14 +1936,14 @@ api_{n} (cJSON *o) u16 msg_id = ntohs(*((u16 *)p)); if (msg_id == reply_msg_id) {{ vl_api_{r}_t *rmp = (vl_api_{r}_t *)p; - vl_api_{r}_t_endian(rmp); + vl_api_{r}_t_endian(rmp, 0); cJSON_AddItemToArray(reply, vl_api_{r}_t_tojson(rmp)); break; }} if (msg_id == details_msg_id) {{ vl_api_{d}_t *rmp = (vl_api_{d}_t *)p; - vl_api_{d}_t_endian(rmp); + vl_api_{d}_t_endian(rmp, 0); cJSON_AddItemToArray(reply, vl_api_{d}_t_tojson(rmp)); }} }} diff --git a/src/vat/ip_types.c b/src/vat/ip_types.c index abcd2eaf648..248205287a4 100644 --- a/src/vat/ip_types.c +++ b/src/vat/ip_types.c @@ -205,9 +205,9 @@ ip_address_family_to_link_type (ip_address_family_t af) return (VNET_LINK_IP4); } - void -ip_address_set (ip_address_t * dst, const void *src, u8 version) +ip_address_set (ip_address_t *dst, const void *src, + ip_address_family_t version) { ip_addr_version (dst) = version; diff --git a/src/vat2/vat2_helpers.h b/src/vat2/vat2_helpers.h index 7b197608a7b..d9ce2af6b35 100644 --- a/src/vat2/vat2_helpers.h +++ b/src/vat2/vat2_helpers.h @@ -29,7 +29,7 @@ vat2_control_ping (u32 context) vl_api_control_ping_t mp = {0}; mp._vl_msg_id = vac_get_msg_index(VL_API_CONTROL_PING_CRC); mp.context = context; - vl_api_control_ping_t_endian(&mp); + vl_api_control_ping_t_endian (&mp, 1 /* to network */); vac_write((char *)&mp, sizeof(mp)); } diff --git a/src/vcl/CMakeLists.txt b/src/vcl/CMakeLists.txt index 2f738f39d1a..c8835e771c1 100644 --- a/src/vcl/CMakeLists.txt +++ b/src/vcl/CMakeLists.txt @@ -11,11 +11,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Linux") - message(WARNING "-- vppcom is currently only support on Linux - disabled") - return() -endif() - ############################################################################## # vppcom shared library ############################################################################## @@ -40,6 +35,9 @@ if (LDP_HAS_GNU_SOURCE) add_compile_definitions(HAVE_GNU_SOURCE) endif(LDP_HAS_GNU_SOURCE) +if("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD") + message("WARNING: vcl_ldpreload isn't supported on FreeBSD - disabled") +else() add_vpp_library(vcl_ldpreload SOURCES ldp_socket_wrapper.c @@ -48,6 +46,7 @@ add_vpp_library(vcl_ldpreload LINK_LIBRARIES vppinfra svm vlibmemoryclient rt pthread vppcom dl ) +endif() add_vpp_headers(vcl ldp.h diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c index 58f6ac0a134..a557093e897 100644 --- a/src/vcl/vppcom.c +++ b/src/vcl/vppcom.c @@ -2087,7 +2087,16 @@ read_again: ASSERT (rv >= 0); if (peek) - return rv; + { + /* Request new notifications if more data enqueued */ + if (rv < n || rv == svm_fifo_max_dequeue_cons (rx_fifo)) + { + if (is_ct) + svm_fifo_unset_event (s->rx_fifo); + svm_fifo_unset_event (rx_fifo); + } + return rv; + } n_read += rv; diff --git a/src/vlib/CMakeLists.txt b/src/vlib/CMakeLists.txt index 7ec9b2050e9..3c354b764dd 100644 --- a/src/vlib/CMakeLists.txt +++ b/src/vlib/CMakeLists.txt @@ -55,6 +55,7 @@ install( # vlib shared library ############################################################################## +if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux") set(PLATFORM_SOURCES linux/pci.c linux/vfio.c @@ -64,6 +65,11 @@ set(PLATFORM_SOURCES set(PLATFORM_HEADERS linux/vfio.h ) +elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD") +set(PLATFORM_SOURCES + freebsd/pci.c +) +endif() add_vpp_library(vlib SOURCES diff --git a/src/vlib/freebsd/pci.c b/src/vlib/freebsd/pci.c new file mode 100644 index 00000000000..a4e9eb2dda6 --- /dev/null +++ b/src/vlib/freebsd/pci.c @@ -0,0 +1,380 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Tom Jones <thj@freebsd.org> + * + * This software was developed by Tom Jones <thj@freebsd.org> under sponsorship + * from the FreeBSD Foundation. + * + */ + +#include <vlib/vlib.h> +#include <vlib/pci/pci.h> +#include <vlib/unix/unix.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/eventfd.h> + +#include <sys/pciio.h> + +#include <fcntl.h> +#include <dirent.h> +#include <net/if.h> + +extern vlib_pci_main_t freebsd_pci_main; + +uword +vlib_pci_get_private_data (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + return 0; +} + +void +vlib_pci_set_private_data (vlib_main_t *vm, vlib_pci_dev_handle_t h, + uword private_data) +{ +} + +vlib_pci_addr_t * +vlib_pci_get_addr (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + return NULL; +} + +u32 +vlib_pci_get_numa_node (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + return 0; +} + +u32 +vlib_pci_get_num_msix_interrupts (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + return 0; +} + +/* Call to allocate/initialize the pci subsystem. + This is not an init function so that users can explicitly enable + pci only when it's needed. */ +clib_error_t *pci_bus_init (vlib_main_t *vm); + +vlib_pci_device_info_t * +vlib_pci_get_device_info (vlib_main_t *vm, vlib_pci_addr_t *addr, + clib_error_t **error) +{ + /* Populate a vlib_pci_device_info_t from the given address */ + clib_error_t *err = NULL; + vlib_pci_device_info_t *di = NULL; + + int fd = -1; + struct pci_conf_io pci; + struct pci_conf match; + struct pci_match_conf pattern; + bzero (&match, sizeof (match)); + bzero (&pattern, sizeof (pattern)); + + pattern.pc_sel.pc_domain = addr->domain; + pattern.pc_sel.pc_bus = addr->bus; + pattern.pc_sel.pc_dev = addr->slot; + pattern.pc_sel.pc_func = addr->function; + pattern.flags = PCI_GETCONF_MATCH_DOMAIN | PCI_GETCONF_MATCH_BUS | + PCI_GETCONF_MATCH_DEV | PCI_GETCONF_MATCH_FUNC; + + pci.pat_buf_len = sizeof (pattern); + pci.num_patterns = 1; + pci.patterns = &pattern; + pci.match_buf_len = sizeof (match); + pci.num_matches = 1; + pci.matches = &match; + pci.offset = 0; + pci.generation = 0; + pci.status = 0; + + fd = open ("/dev/pci", 0); + if (fd == -1) + { + err = clib_error_return_unix (0, "open '/dev/pci'"); + goto error; + } + + if (ioctl (fd, PCIOCGETCONF, &pci) == -1) + { + err = clib_error_return_unix (0, "reading PCIOCGETCONF"); + goto error; + } + + di = clib_mem_alloc (sizeof (vlib_pci_device_info_t)); + clib_memset (di, 0, sizeof (vlib_pci_device_info_t)); + + di->addr.as_u32 = addr->as_u32; + di->numa_node = 0; /* TODO: Place holder until we have NUMA on FreeBSD */ + + di->device_class = match.pc_class; + di->vendor_id = match.pc_vendor; + di->device_id = match.pc_device; + di->revision = match.pc_revid; + + di->product_name = NULL; + di->vpd_r = 0; + di->vpd_w = 0; + di->driver_name = format (0, "%s", &match.pd_name); + di->iommu_group = -1; + + goto done; + +error: + vlib_pci_free_device_info (di); + di = NULL; +done: + if (error) + *error = err; + close (fd); + return di; +} + +clib_error_t *__attribute__ ((weak)) +vlib_pci_get_device_root_bus (vlib_pci_addr_t *addr, vlib_pci_addr_t *root_bus) +{ + return NULL; +} + +clib_error_t * +vlib_pci_bind_to_uio (vlib_main_t *vm, vlib_pci_addr_t *addr, + char *uio_drv_name, int force) +{ + clib_error_t *error = 0; + + if (error) + { + return error; + } + + if (strncmp ("auto", uio_drv_name, 5) == 0) + { + /* TODO: We should confirm that nic_uio is loaded and return an error. */ + uio_drv_name = "nic_uio"; + } + return error; +} + +clib_error_t * +vlib_pci_register_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h, + pci_intx_handler_function_t *intx_handler) +{ + return NULL; +} + +clib_error_t * +vlib_pci_unregister_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + return NULL; +} + +clib_error_t * +vlib_pci_register_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h, + u32 start, u32 count, + pci_msix_handler_function_t *msix_handler) +{ + return NULL; +} + +clib_error_t * +vlib_pci_unregister_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h, + u32 start, u32 count) +{ + return NULL; +} + +clib_error_t * +vlib_pci_enable_msix_irq (vlib_main_t *vm, vlib_pci_dev_handle_t h, u16 start, + u16 count) +{ + return NULL; +} + +uword +vlib_pci_get_msix_file_index (vlib_main_t *vm, vlib_pci_dev_handle_t h, + u16 index) +{ + return 0; +} + +clib_error_t * +vlib_pci_disable_msix_irq (vlib_main_t *vm, vlib_pci_dev_handle_t h, u16 start, + u16 count) +{ + return NULL; +} + +/* Configuration space read/write. */ +clib_error_t * +vlib_pci_read_write_config (vlib_main_t *vm, vlib_pci_dev_handle_t h, + vlib_read_or_write_t read_or_write, uword address, + void *data, u32 n_bytes) +{ + return NULL; +} + +clib_error_t * +vlib_pci_map_region (vlib_main_t *vm, vlib_pci_dev_handle_t h, u32 resource, + void **result) +{ + return NULL; +} + +clib_error_t * +vlib_pci_map_region_fixed (vlib_main_t *vm, vlib_pci_dev_handle_t h, + u32 resource, u8 *addr, void **result) +{ + return NULL; +} + +clib_error_t * +vlib_pci_io_region (vlib_main_t *vm, vlib_pci_dev_handle_t h, u32 resource) +{ + return NULL; +} + +clib_error_t * +vlib_pci_read_write_io (vlib_main_t *vm, vlib_pci_dev_handle_t h, + vlib_read_or_write_t read_or_write, uword offset, + void *data, u32 length) +{ + return NULL; +} + +clib_error_t * +vlib_pci_map_dma (vlib_main_t *vm, vlib_pci_dev_handle_t h, void *ptr) +{ + return NULL; +} + +int +vlib_pci_supports_virtual_addr_dma (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + return 0; +} + +clib_error_t * +vlib_pci_device_open (vlib_main_t *vm, vlib_pci_addr_t *addr, + pci_device_id_t ids[], vlib_pci_dev_handle_t *handle) +{ + return NULL; +} + +void +vlib_pci_device_close (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ +} + +void +init_device_from_registered (vlib_main_t *vm, vlib_pci_device_info_t *di) +{ +} + +static int +pci_addr_cmp (void *v1, void *v2) +{ + vlib_pci_addr_t *a1 = v1; + vlib_pci_addr_t *a2 = v2; + + if (a1->domain > a2->domain) + return 1; + if (a1->domain < a2->domain) + return -1; + if (a1->bus > a2->bus) + return 1; + if (a1->bus < a2->bus) + return -1; + if (a1->slot > a2->slot) + return 1; + if (a1->slot < a2->slot) + return -1; + if (a1->function > a2->function) + return 1; + if (a1->function < a2->function) + return -1; + return 0; +} + +vlib_pci_addr_t * +vlib_pci_get_all_dev_addrs () +{ + vlib_pci_addr_t *addrs = 0; + + int fd = -1; + struct pci_conf_io pci; + struct pci_conf matches[32]; + bzero (matches, sizeof (matches)); + + pci.pat_buf_len = 0; + pci.num_patterns = 0; + pci.patterns = NULL; + pci.match_buf_len = sizeof (matches); + pci.num_matches = 32; + pci.matches = (struct pci_conf *) &matches; + pci.offset = 0; + pci.generation = 0; + pci.status = 0; + + fd = open ("/dev/pci", 0); + if (fd == -1) + { + clib_error_return_unix (0, "opening /dev/pci"); + return (NULL); + } + + if (ioctl (fd, PCIOCGETCONF, &pci) == -1) + { + clib_error_return_unix (0, "reading pci config"); + close (fd); + return (NULL); + } + + for (int i = 0; i < pci.num_matches; i++) + { + struct pci_conf *m = &pci.matches[i]; + vlib_pci_addr_t addr; + + addr.domain = m->pc_sel.pc_domain; + addr.bus = m->pc_sel.pc_bus; + addr.slot = m->pc_sel.pc_dev; + addr.function = m->pc_sel.pc_func; + + vec_add1 (addrs, addr); + } + + vec_sort_with_function (addrs, pci_addr_cmp); + close (fd); + + return addrs; +} + +clib_error_t * +freebsd_pci_init (vlib_main_t *vm) +{ + vlib_pci_main_t *pm = &pci_main; + vlib_pci_addr_t *addr = 0, *addrs; + + pm->vlib_main = vm; + + ASSERT (sizeof (vlib_pci_addr_t) == sizeof (u32)); + + addrs = vlib_pci_get_all_dev_addrs (); + vec_foreach (addr, addrs) + { + vlib_pci_device_info_t *d; + if ((d = vlib_pci_get_device_info (vm, addr, 0))) + { + init_device_from_registered (vm, d); + vlib_pci_free_device_info (d); + } + } + + return 0; +} + +VLIB_INIT_FUNCTION (freebsd_pci_init) = { + .runs_after = VLIB_INITS ("unix_input_init"), +}; diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c index 08923bebcdf..29ca3d97523 100644 --- a/src/vlib/linux/pci.c +++ b/src/vlib/linux/pci.c @@ -55,6 +55,7 @@ #include <linux/ethtool.h> #include <linux/sockios.h> #include <linux/vfio.h> +#include <limits.h> #include <sys/eventfd.h> #define SYSFS_DEVICES_PCI "/sys/devices/pci" @@ -1560,14 +1561,17 @@ linux_pci_init (vlib_main_t * vm) ASSERT (sizeof (vlib_pci_addr_t) == sizeof (u32)); - addrs = vlib_pci_get_all_dev_addrs (); - vec_foreach (addr, addrs) + if (pm->pci_device_registrations) { - vlib_pci_device_info_t *d; - if ((d = vlib_pci_get_device_info (vm, addr, 0))) + addrs = vlib_pci_get_all_dev_addrs (); + vec_foreach (addr, addrs) { - init_device_from_registered (vm, d); - vlib_pci_free_device_info (d); + vlib_pci_device_info_t *d; + if ((d = vlib_pci_get_device_info (vm, addr, 0))) + { + init_device_from_registered (vm, d); + vlib_pci_free_device_info (d); + } } } diff --git a/src/vlib/pci/pci.c b/src/vlib/pci/pci.c index 0bc90c5532d..7284e6675fd 100644 --- a/src/vlib/pci/pci.c +++ b/src/vlib/pci/pci.c @@ -119,7 +119,7 @@ vlib_pci_intr_enable (vlib_main_t *vm, vlib_pci_dev_handle_t h) { const vlib_pci_config_reg_command_t cmd = { .intx_disable = 1 }; clib_error_t *err; - int already_set; + int already_set = 0; err = _vlib_pci_config_set_control_bit (vm, h, cmd.as_u16, 0, &already_set); log_debug (h, "interrupt%senabled", already_set ? " " : " already "); @@ -131,7 +131,7 @@ vlib_pci_intr_disable (vlib_main_t *vm, vlib_pci_dev_handle_t h) { const vlib_pci_config_reg_command_t cmd = { .intx_disable = 1 }; clib_error_t *err; - int already_set; + int already_set = 0; err = _vlib_pci_config_set_control_bit (vm, h, cmd.as_u16, 1, &already_set); log_debug (h, "interrupt%sdisabled", already_set ? " " : " already "); @@ -143,7 +143,7 @@ vlib_pci_bus_master_enable (vlib_main_t *vm, vlib_pci_dev_handle_t h) { const vlib_pci_config_reg_command_t cmd = { .bus_master = 1 }; clib_error_t *err; - int already_set; + int already_set = 0; err = _vlib_pci_config_set_control_bit (vm, h, cmd.as_u16, 1, &already_set); log_debug (h, "bus-master%senabled", already_set ? " " : " already "); @@ -155,7 +155,7 @@ vlib_pci_bus_master_disable (vlib_main_t *vm, vlib_pci_dev_handle_t h) { const vlib_pci_config_reg_command_t cmd = { .bus_master = 1 }; clib_error_t *err; - int already_set; + int already_set = 0; err = _vlib_pci_config_set_control_bit (vm, h, cmd.as_u16, 0, &already_set); log_debug (h, "bus-master%sdisabled", already_set ? " " : " already "); diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 3994afc2cea..ef2c5616f21 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -16,6 +16,9 @@ #include <signal.h> #include <math.h> +#ifdef __FreeBSD__ +#include <pthread_np.h> +#endif /* __FreeBSD__ */ #include <vppinfra/format.h> #include <vppinfra/time_range.h> #include <vppinfra/interrupt.h> @@ -179,9 +182,7 @@ vlib_thread_init (vlib_main_t * vm) u32 n_vlib_mains = 1; u32 first_index = 1; u32 i; - pid_t pid; - uword *avail_cpu, *affinity_cpu; - uword n_cpus; + uword *avail_cpu; u32 stats_num_worker_threads_dir_index; stats_num_worker_threads_dir_index = @@ -192,41 +193,22 @@ vlib_thread_init (vlib_main_t * vm) tm->cpu_core_bitmap = os_get_online_cpu_core_bitmap (); tm->cpu_socket_bitmap = os_get_online_cpu_node_bitmap (); - /* get bitmap of active cpu cores vpp has affinity to */ - pid = getpid (); - tm->cpu_affinity_bitmap = os_get_cpu_affinity_bitmap (pid); - - /* if fetching affinity fails, return online cpu core bmp */ - if (tm->cpu_affinity_bitmap == 0) - tm->cpu_affinity_bitmap = os_get_online_cpu_core_bitmap (); - avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap); - affinity_cpu = clib_bitmap_dup (tm->cpu_affinity_bitmap); /* skip cores */ - n_cpus = clib_bitmap_count_set_bits (avail_cpu); - if (tm->skip_cores >= n_cpus) - return clib_error_return (0, "skip-core greater than available cpus"); - n_cpus = clib_bitmap_count_set_bits (affinity_cpu); - if (tm->skip_cores >= n_cpus) - return clib_error_return (0, "skip-core greater than affinity cpus"); - for (i = 0; i < tm->skip_cores; i++) { - uword c; - c = clib_bitmap_first_set (avail_cpu); + uword c = clib_bitmap_first_set (avail_cpu); if (c == ~0) return clib_error_return (0, "no available cpus to skip"); avail_cpu = clib_bitmap_set (avail_cpu, c, 0); - - c = clib_bitmap_first_set (affinity_cpu); - if (c == ~0) - return clib_error_return (0, "no available env cpus to skip"); - - affinity_cpu = clib_bitmap_set (affinity_cpu, c, 0); } + /* if main thread affinity is unspecified, set to current running cpu */ + if (tm->main_lcore == ~0) + tm->main_lcore = sched_getcpu (); + /* grab cpu for main thread */ if (tm->main_lcore != ~0) { @@ -234,17 +216,6 @@ vlib_thread_init (vlib_main_t * vm) return clib_error_return (0, "cpu %u is not available to be used" " for the main thread", tm->main_lcore); avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0); - affinity_cpu = clib_bitmap_set (affinity_cpu, tm->main_lcore, 0); - } - /* if auto enabled, grab first cpu vpp has affinity to for main thread */ - else if (tm->use_main_core_auto) - { - uword c = clib_bitmap_first_set (affinity_cpu); - if (c != ~0) - tm->main_lcore = c; - - avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0); - affinity_cpu = clib_bitmap_set (affinity_cpu, tm->main_lcore, 0); } /* assume that there is socket 0 only if there is no data from sysfs */ @@ -272,7 +243,11 @@ vlib_thread_init (vlib_main_t * vm) w->thread_mheap = clib_mem_get_heap (); w->thread_stack = vlib_thread_stacks[0]; w->cpu_id = tm->main_lcore; +#ifdef __FreeBSD__ + w->lwp = pthread_getthreadid_np (); +#else w->lwp = syscall (SYS_gettid); +#endif /* __FreeBSD__ */ w->thread_id = pthread_self (); tm->n_vlib_mains = 1; @@ -325,23 +300,13 @@ vlib_thread_init (vlib_main_t * vm) } else { - /* for automatic pinning, use cpu affinity list */ - uword n_env_cpu = 0; - n_env_cpu = clib_bitmap_count_set_bits (affinity_cpu); - - if (n_env_cpu < tr->count) - return clib_error_return (0, - "no available cpus to be used for" - " the '%s' thread #%u", - tr->name, n_env_cpu); - for (j = 0; j < tr->count; j++) { /* Do not use CPU 0 by default - leave it to the host and IRQs */ - uword avail_c0 = clib_bitmap_get (affinity_cpu, 0); - affinity_cpu = clib_bitmap_set (affinity_cpu, 0, 0); + uword avail_c0 = clib_bitmap_get (avail_cpu, 0); + avail_cpu = clib_bitmap_set (avail_cpu, 0, 0); - uword c = clib_bitmap_first_set (affinity_cpu); + uword c = clib_bitmap_first_set (avail_cpu); /* Use CPU 0 as a last resort */ if (c == ~0 && avail_c0) { @@ -355,15 +320,14 @@ vlib_thread_init (vlib_main_t * vm) " the '%s' thread #%u", tr->name, tr->count); - affinity_cpu = clib_bitmap_set (affinity_cpu, 0, avail_c0); - affinity_cpu = clib_bitmap_set (affinity_cpu, c, 0); + avail_cpu = clib_bitmap_set (avail_cpu, 0, avail_c0); + avail_cpu = clib_bitmap_set (avail_cpu, c, 0); tr->coremask = clib_bitmap_set (tr->coremask, c, 1); } } } clib_bitmap_free (avail_cpu); - clib_bitmap_free (affinity_cpu); tm->n_vlib_mains = n_vlib_mains; vlib_stats_set_gauge (stats_num_worker_threads_dir_index, n_vlib_mains - 1); @@ -410,6 +374,8 @@ void vlib_worker_thread_init (vlib_worker_thread_t * w) { vlib_thread_main_t *tm = vlib_get_thread_main (); + sigset_t signals; + int rv; /* * Note: disabling signals in worker threads as follows @@ -419,7 +385,17 @@ vlib_worker_thread_init (vlib_worker_thread_t * w) * sigfillset (&s); * pthread_sigmask (SIG_SETMASK, &s, 0); * } + * We can still disable signals for SIGINT,SIGHUP and SIGTERM as they don't + * trigger post-dump handlers anyway. */ + sigemptyset (&signals); + sigaddset (&signals, SIGINT); + sigaddset (&signals, SIGHUP); + sigaddset (&signals, SIGTERM); + rv = pthread_sigmask (SIG_BLOCK, &signals, NULL); + + if (rv) + clib_warning ("Failed to set the worker signal mask"); clib_mem_set_heap (w->thread_mheap); @@ -448,7 +424,11 @@ vlib_worker_thread_bootstrap_fn (void *arg) { vlib_worker_thread_t *w = arg; +#ifdef __FreeBSD__ + w->lwp = pthread_getthreadid_np (); +#else w->lwp = syscall (SYS_gettid); +#endif /* __FreeBSD__ */ w->thread_id = pthread_self (); __os_thread_index = w - vlib_worker_threads; @@ -1158,6 +1138,7 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) u8 *name; uword *bitmap; u32 count; + int use_corelist = 0; tm->thread_registrations_by_name = hash_create_string (0, sizeof (uword)); @@ -1165,7 +1146,6 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) tm->sched_policy = ~0; tm->sched_priority = ~0; tm->main_lcore = ~0; - tm->use_main_core_auto = 0; tr = tm->next; @@ -1181,8 +1161,6 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) tm->use_pthreads = 1; else if (unformat (input, "thread-prefix %v", &tm->thread_prefix)) ; - else if (unformat (input, "main-core auto")) - tm->use_main_core_auto = 1; else if (unformat (input, "main-core %u", &tm->main_lcore)) ; else if (unformat (input, "skip-cores %u", &tm->skip_cores)) @@ -1212,6 +1190,7 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) tr->coremask = bitmap; tr->count = clib_bitmap_count_set_bits (tr->coremask); + use_corelist = 1; } else if (unformat @@ -1241,13 +1220,9 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) break; } - if (tm->main_lcore != ~0 && tm->use_main_core_auto) - { - return clib_error_return ( - 0, "cannot set both 'main-core %u' and 'main-core auto'", - tm->main_lcore); - } - + if (use_corelist && tm->main_lcore == ~0) + return clib_error_return (0, "main-core must be specified when using " + "corelist-* or coremask-* attribute"); if (tm->sched_priority != ~0) { if (tm->sched_policy == SCHED_FIFO || tm->sched_policy == SCHED_RR) diff --git a/src/vlib/threads.h b/src/vlib/threads.h index 3072d0e67dd..ac0c1d5d266 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -255,8 +255,6 @@ typedef struct int use_pthreads; - int use_main_core_auto; - /* Number of vlib_main / vnet_main clones */ u32 n_vlib_mains; @@ -284,9 +282,6 @@ typedef struct /* Bitmap of available CPU sockets (NUMA nodes) */ uword *cpu_socket_bitmap; - /* Bitmap of CPU affinity for VPP process */ - uword *cpu_affinity_bitmap; - /* Worker handoff queues */ vlib_frame_queue_main_t *frame_queue_mains; diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c index 38eef4125a9..ee28ca8f1aa 100644 --- a/src/vlib/unix/main.c +++ b/src/vlib/unix/main.c @@ -39,6 +39,7 @@ #include <vlib/vlib.h> #include <vlib/unix/unix.h> #include <vlib/unix/plugin.h> +#include <vppinfra/unix.h> #include <limits.h> #include <signal.h> @@ -612,22 +613,23 @@ vlib_unix_main (int argc, char *argv[]) vlib_main_t *vm = vlib_get_first_main (); /* one and only time for this! */ unformat_input_t input; clib_error_t *e; - char buffer[PATH_MAX]; int i; vec_validate_aligned (vgm->vlib_mains, 0, CLIB_CACHE_LINE_BYTES); - if ((i = readlink ("/proc/self/exe", buffer, sizeof (buffer) - 1)) > 0) + vgm->exec_path = (char *) os_get_exec_path (); + + if (vgm->exec_path) { - int j; - buffer[i] = 0; - vgm->exec_path = vec_new (char, i + 1); - clib_memcpy_fast (vgm->exec_path, buffer, i + 1); - for (j = i - 1; j > 0; j--) - if (buffer[j - 1] == '/') + for (i = vec_len (vgm->exec_path) - 1; i > 0; i--) + if (vgm->exec_path[i - 1] == '/') break; - vgm->name = vec_new (char, i - j + 1); - clib_memcpy_fast (vgm->name, buffer + j, i - j + 1); + + vgm->name = 0; + + vec_add (vgm->name, vgm->exec_path + i, vec_len (vgm->exec_path) - i); + vec_add1 (vgm->exec_path, 0); + vec_add1 (vgm->name, 0); } else vgm->exec_path = vgm->name = argv[0]; diff --git a/src/vlib/unix/plugin.c b/src/vlib/unix/plugin.c index fd3a050b944..5cac9abc8fe 100644 --- a/src/vlib/unix/plugin.c +++ b/src/vlib/unix/plugin.c @@ -306,8 +306,12 @@ process_reg: } vec_free (version_required); +#if defined(RTLD_DEEPBIND) handle = dlopen ((char *) pi->filename, RTLD_LAZY | (reg->deep_bind ? RTLD_DEEPBIND : 0)); +#else + handle = dlopen ((char *) pi->filename, RTLD_LAZY); +#endif if (handle == 0) { diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h index 62a8d4c62d8..c09334136c0 100644 --- a/src/vlibapi/api_common.h +++ b/src/vlibapi/api_common.h @@ -235,8 +235,8 @@ typedef struct /** Message convert function vector */ void *(*fromjson_handler) (cJSON *, int *); - /** Message endian handler vector */ - void (*endian_handler) (void *); + /** Message endian handler vector. */ + void (*endian_handler) (void *, bool to_net); /** Message calc size function vector */ uword (*calc_size_func) (void *); diff --git a/src/vlibapi/api_helper_macros.h b/src/vlibapi/api_helper_macros.h index 9c93d33934b..0380692b80e 100644 --- a/src/vlibapi/api_helper_macros.h +++ b/src/vlibapi/api_helper_macros.h @@ -29,9 +29,9 @@ #define _NATIVE_TO_NETWORK(t, rmp) \ api_main_t *am = vlibapi_get_main (); \ - void (*endian_fp) (void *); \ + void (*endian_fp) (void *, bool); \ endian_fp = am->msg_data[t + (REPLY_MSG_ID_BASE)].endian_handler; \ - (*endian_fp) (rmp); + (*endian_fp) (rmp, 1 /* to network */); #define REPLY_MACRO(msg) \ do \ diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c index 7de1906f17a..79064b292c9 100644 --- a/src/vlibapi/api_shared.c +++ b/src/vlibapi/api_shared.c @@ -230,7 +230,7 @@ vl_msg_api_trace_write_one (api_main_t *am, u8 *msg, FILE *fp) if (m && m->endian_handler) { - m->endian_handler (tmpmem); + m->endian_handler (tmpmem, 1); } if (m && m->tojson_handler) @@ -561,7 +561,7 @@ msg_handler_internal (api_main_t *am, void *the_msg, uword msg_len, } if (m->is_autoendian) - m->endian_handler (the_msg); + m->endian_handler (the_msg, 0); if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0)) clib_call_callbacks (am->perf_counter_cbs, am, id, diff --git a/src/vlibmemory/memclnt_api.c b/src/vlibmemory/memclnt_api.c index d4106b10559..b0b0c72eae0 100644 --- a/src/vlibmemory/memclnt_api.c +++ b/src/vlibmemory/memclnt_api.c @@ -197,6 +197,7 @@ vlib_api_init (void) cJSON_Hooks cjson_hooks = { .malloc_fn = clib_mem_alloc, .free_fn = clib_mem_free, + .realloc_fn = clib_mem_realloc, }; cJSON_InitHooks (&cjson_hooks); diff --git a/src/vlibmemory/memory_api.c b/src/vlibmemory/memory_api.c index 39c6b0fd15b..57373b91e31 100644 --- a/src/vlibmemory/memory_api.c +++ b/src/vlibmemory/memory_api.c @@ -823,9 +823,9 @@ vl_mem_api_handler_with_vm_node (api_main_t *am, svm_region_t *vlib_rp, if (m->is_autoendian) { - void (*endian_fp) (void *); + void (*endian_fp) (void *, bool); endian_fp = am->msg_data[id].endian_handler; - (*endian_fp) (the_msg); + (*endian_fp) (the_msg, 0); } if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0)) clib_call_callbacks (am->perf_counter_cbs, am, id, 0 /* before */); diff --git a/src/vlibmemory/socket_client.c b/src/vlibmemory/socket_client.c index 5fa19c6a9c0..ad28136dc07 100644 --- a/src/vlibmemory/socket_client.c +++ b/src/vlibmemory/socket_client.c @@ -22,6 +22,14 @@ #define _GNU_SOURCE #include <sys/socket.h> +#ifdef __FreeBSD__ +#define _WANT_UCRED +#include <sys/types.h> +#include <sys/param.h> +#include <sys/ucred.h> +#include <sys/un.h> +#endif /* __FreeBSD__ */ + #include <svm/ssvm.h> #include <vlibmemory/socket_client.h> #include <vlibmemory/memory_client.h> @@ -278,7 +286,11 @@ vl_sock_api_recv_fd_msg_internal (socket_client_main_t * scm, int fds[], struct msghdr mh = { 0 }; struct iovec iov[1]; ssize_t size = 0; +#ifdef __linux__ struct ucred *cr = 0; +#elif __FreeBSD__ + struct cmsgcred *cr = 0; +#endif /* __linux__ */ struct cmsghdr *cmsg; pid_t pid __attribute__ ((unused)); uid_t uid __attribute__ ((unused)); @@ -318,6 +330,7 @@ vl_sock_api_recv_fd_msg_internal (socket_client_main_t * scm, int fds[], { if (cmsg->cmsg_level == SOL_SOCKET) { +#ifdef __linux__ if (cmsg->cmsg_type == SCM_CREDENTIALS) { cr = (struct ucred *) CMSG_DATA (cmsg); @@ -325,6 +338,15 @@ vl_sock_api_recv_fd_msg_internal (socket_client_main_t * scm, int fds[], gid = cr->gid; pid = cr->pid; } +#elif __FreeBSD__ + if (cmsg->cmsg_type == SCM_CREDS) + { + cr = (struct cmsgcred *) CMSG_DATA (cmsg); + uid = cr->cmcred_uid; + gid = cr->cmcred_gid; + pid = cr->cmcred_pid; + } +#endif /* __linux__ */ else if (cmsg->cmsg_type == SCM_RIGHTS) { clib_memcpy_fast (fds, CMSG_DATA (cmsg), sizeof (int) * n_fds); diff --git a/src/vlibmemory/vlib_api_cli.c b/src/vlibmemory/vlib_api_cli.c index 4492f5af980..6ae81cd13df 100644 --- a/src/vlibmemory/vlib_api_cli.c +++ b/src/vlibmemory/vlib_api_cli.c @@ -554,7 +554,7 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename, } if (m) { - m->endian_handler (tmpbuf + sizeof (uword)); + m->endian_handler (tmpbuf + sizeof (uword), 1 /* to network */); } } @@ -674,7 +674,7 @@ vl_msg_print_trace (u8 *msg, void *ctx) clib_memcpy_fast (tmpbuf, msg, msg_length); msg = tmpbuf; - m->endian_handler (tmpbuf); + m->endian_handler (tmpbuf, 0 /* from network */); } vlib_cli_output (a->vm, "%U\n", @@ -824,7 +824,7 @@ vl_msg_exec_json_command (vlib_main_t *vm, cJSON *o) } if (clib_arch_is_little_endian) - m->endian_handler (msg); + m->endian_handler (msg, 1 /* to network */); if (!m->handler) { diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index fb8d294009d..ad5f44846cc 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -813,24 +813,6 @@ list(APPEND VNET_API_FILES ) ############################################################################## -# mpls segment routing -############################################################################## - -list(APPEND VNET_SOURCES - srmpls/sr_mpls_policy.c - srmpls/sr_mpls_steering.c - srmpls/sr_mpls_api.c -) - -list(APPEND VNET_HEADERS - srmpls/sr_mpls.h -) - -list(APPEND VNET_API_FILES - srmpls/sr_mpls.api -) - -############################################################################## # IPFIX / netflow v10 ############################################################################## list(APPEND VNET_SOURCES @@ -1460,7 +1442,6 @@ add_vat_test_library(vnet ip/ip_test.c arp/arp_test.c ip6-nd/ip6_nd_test.c - srmpls/sr_mpls_test.c session/session_test.c l2/l2_test.c ipsec/ipsec_test.c diff --git a/src/vnet/dev/format.c b/src/vnet/dev/format.c index ed83a0eba95..f599c0f8b85 100644 --- a/src/vnet/dev/format.c +++ b/src/vnet/dev/format.c @@ -101,7 +101,7 @@ format_vnet_dev_port_info (u8 *s, va_list *args) u32 indent = format_get_indent (s); s = format (s, "Hardware Address is %U", format_vnet_dev_hw_addr, - &port->attr.hw_addr); + &port->primary_hw_addr); s = format (s, ", %u RX queues (max %u), %u TX queues (max %u)", pool_elts (port->rx_queues), port->attr.max_rx_queues, pool_elts (port->tx_queues), port->attr.max_tx_queues); diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c index 8c837575cf8..027e1ed4e74 100644 --- a/src/vnet/devices/virtio/node.c +++ b/src/vnet/devices/virtio/node.c @@ -282,6 +282,16 @@ virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node, if (n_left == 0) return 0; + if (PREDICT_FALSE (n_left == vring->queue_size)) + { + /* + * Informational error logging when VPP is not pulling packets fast + * enough. + */ + vlib_error_count (vm, node->node_index, VIRTIO_INPUT_ERROR_FULL_RX_QUEUE, + 1); + } + if (type == VIRTIO_IF_TYPE_TUN) { next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT; diff --git a/src/vnet/devices/virtio/virtio_inline.h b/src/vnet/devices/virtio/virtio_inline.h index 179f319aa4c..41bba755934 100644 --- a/src/vnet/devices/virtio/virtio_inline.h +++ b/src/vnet/devices/virtio/virtio_inline.h @@ -17,6 +17,7 @@ #define foreach_virtio_input_error \ _ (BUFFER_ALLOC, "buffer alloc error") \ + _ (FULL_RX_QUEUE, "full rx queue (driver tx drop)") \ _ (UNKNOWN, "unknown") typedef enum diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c index e2558eeca41..03cbdde1c2b 100644 --- a/src/vnet/ethernet/node.c +++ b/src/vnet/ethernet/node.c @@ -982,8 +982,31 @@ eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node, else { for (int j = 0; j < 16; j++) - if (next[j] == 0) - slowpath_indices[n_slowpath++] = i + j; + { + if (next[j] == 0) + slowpath_indices[n_slowpath++] = i + j; + else if (dmac_check && main_is_l3 && dmacs_bad[i + j]) + { + next[j] = 0; + slowpath_indices[n_slowpath++] = i + j; + } + } + } + } + else + { + if (dmac_check && main_is_l3) + { + u8x16 dmac_bad = u8x16_load_unaligned (&dmacs_bad[i]); + if (!u8x16_is_all_zero (dmac_bad)) + { + for (int j = 0; j < 16; j++) + if (dmacs_bad[i + j]) + { + next[j] = 0; + slowpath_indices[n_slowpath++] = i + j; + } + } } } @@ -994,7 +1017,12 @@ eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node, continue; } #endif - if (main_is_l3 && etype[0] == et_ip4) + if (dmac_check && main_is_l3 && dmacs_bad[i]) + { + next[0] = 0; + slowpath_indices[n_slowpath++] = i; + } + else if (main_is_l3 && etype[0] == et_ip4) next[0] = next_ip4; else if (main_is_l3 && etype[0] == et_ip6) next[0] = next_ip6; @@ -1052,7 +1080,7 @@ eth_input_process_frame (vlib_main_t * vm, vlib_node_runtime_t * node, } else { - /* untagged packet with not well known etyertype */ + /* untagged packet with not well known ethertype */ if (last_unknown_etype != etype) { last_unknown_etype = etype; diff --git a/src/vnet/gso/gso.h b/src/vnet/gso/gso.h index 883a4941ee2..dee5da5c70b 100644 --- a/src/vnet/gso/gso.h +++ b/src/vnet/gso/gso.h @@ -39,13 +39,13 @@ gso_init_bufs_from_template_base (vlib_buffer_t **bufs, vlib_buffer_t *b0, u32 flags, u16 n_bufs, u16 hdr_sz) { u32 i = n_bufs; - while (i >= 4) + while (i >= 6) { /* prefetches */ CLIB_PREFETCH (bufs[2], 2 * CLIB_CACHE_LINE_BYTES, LOAD); CLIB_PREFETCH (bufs[3], 2 * CLIB_CACHE_LINE_BYTES, LOAD); - vlib_prefetch_buffer_data (bufs[2], LOAD); - vlib_prefetch_buffer_data (bufs[3], LOAD); + vlib_prefetch_buffer_data (bufs[4], LOAD); + vlib_prefetch_buffer_data (bufs[5], LOAD); /* copying objects from cacheline 0 */ bufs[0]->current_data = 0; @@ -70,10 +70,26 @@ gso_init_bufs_from_template_base (vlib_buffer_t **bufs, vlib_buffer_t *b0, bufs[0]->total_length_not_including_first_buffer = 0; bufs[1]->total_length_not_including_first_buffer = 0; + clib_memcpy_fast (&bufs[0]->opaque2, &b0->opaque2, sizeof (b0->opaque2)); + clib_memcpy_fast (&bufs[1]->opaque2, &b0->opaque2, sizeof (b0->opaque2)); + /* copying data */ clib_memcpy_fast (bufs[0]->data, vlib_buffer_get_current (b0), hdr_sz); clib_memcpy_fast (bufs[1]->data, vlib_buffer_get_current (b0), hdr_sz); + /* header offset fixup */ + vnet_buffer (bufs[0])->l2_hdr_offset -= b0->current_data; + vnet_buffer (bufs[0])->l3_hdr_offset -= b0->current_data; + vnet_buffer (bufs[0])->l4_hdr_offset -= b0->current_data; + vnet_buffer2 (bufs[0])->outer_l3_hdr_offset -= b0->current_data; + vnet_buffer2 (bufs[0])->outer_l4_hdr_offset -= b0->current_data; + + vnet_buffer (bufs[1])->l2_hdr_offset -= b0->current_data; + vnet_buffer (bufs[1])->l3_hdr_offset -= b0->current_data; + vnet_buffer (bufs[1])->l4_hdr_offset -= b0->current_data; + vnet_buffer2 (bufs[1])->outer_l3_hdr_offset -= b0->current_data; + vnet_buffer2 (bufs[1])->outer_l4_hdr_offset -= b0->current_data; + bufs += 2; i -= 2; } @@ -92,10 +108,18 @@ gso_init_bufs_from_template_base (vlib_buffer_t **bufs, vlib_buffer_t *b0, /* copying objects from cacheline 1 */ bufs[0]->trace_handle = b0->trace_handle; bufs[0]->total_length_not_including_first_buffer = 0; + clib_memcpy_fast (&bufs[0]->opaque2, &b0->opaque2, sizeof (b0->opaque2)); /* copying data */ clib_memcpy_fast (bufs[0]->data, vlib_buffer_get_current (b0), hdr_sz); + /* header offset fixup */ + vnet_buffer (bufs[0])->l2_hdr_offset -= b0->current_data; + vnet_buffer (bufs[0])->l3_hdr_offset -= b0->current_data; + vnet_buffer (bufs[0])->l4_hdr_offset -= b0->current_data; + vnet_buffer2 (bufs[0])->outer_l3_hdr_offset -= b0->current_data; + vnet_buffer2 (bufs[0])->outer_l4_hdr_offset -= b0->current_data; + bufs++; i--; } @@ -103,28 +127,41 @@ gso_init_bufs_from_template_base (vlib_buffer_t **bufs, vlib_buffer_t *b0, static_always_inline void gso_fixup_segmented_buf (vlib_main_t *vm, vlib_buffer_t *b0, u32 next_tcp_seq, - int is_l2, int is_ip6, generic_header_offset_t *gho, - clib_ip_csum_t *c, u8 tcp_flags) + int is_l2, u8 oflags, u16 hdr_sz, u16 l4_hdr_sz, + clib_ip_csum_t *c, u8 tcp_flags, u8 is_prefetch, + vlib_buffer_t *b1) { - ip4_header_t *ip4 = - (ip4_header_t *) (vlib_buffer_get_current (b0) + gho->l3_hdr_offset + - gho->outer_hdr_sz); - ip6_header_t *ip6 = - (ip6_header_t *) (vlib_buffer_get_current (b0) + gho->l3_hdr_offset + - gho->outer_hdr_sz); - tcp_header_t *tcp = - (tcp_header_t *) (vlib_buffer_get_current (b0) + gho->l4_hdr_offset + - gho->outer_hdr_sz); + i16 l3_hdr_offset = vnet_buffer (b0)->l3_hdr_offset; + i16 l4_hdr_offset = vnet_buffer (b0)->l4_hdr_offset; + + ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l3_hdr_offset); + ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l3_hdr_offset); + tcp_header_t *tcp = (tcp_header_t *) (b0->data + l4_hdr_offset); tcp->flags = tcp_flags; tcp->seq_number = clib_host_to_net_u32 (next_tcp_seq); c->odd = 0; - if (is_ip6) + if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) + { + ip4->length = + clib_host_to_net_u16 (b0->current_length - hdr_sz + + (l4_hdr_offset - l3_hdr_offset) + l4_hdr_sz); + ip4->checksum = 0; + ip4->checksum = ip4_header_checksum (ip4); + vnet_buffer_offload_flags_clear (b0, (VNET_BUFFER_OFFLOAD_F_IP_CKSUM | + VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)); + c->sum += clib_mem_unaligned (&ip4->src_address, u32); + c->sum += clib_mem_unaligned (&ip4->dst_address, u32); + c->sum += clib_host_to_net_u32 ( + (clib_net_to_host_u16 (ip4->length) - ip4_header_bytes (ip4)) + + (ip4->protocol << 16)); + } + else { - ip6->payload_length = clib_host_to_net_u16 ( - b0->current_length - gho->l4_hdr_offset - gho->outer_hdr_sz); + ip6->payload_length = + clib_host_to_net_u16 (b0->current_length - hdr_sz + l4_hdr_sz); vnet_buffer_offload_flags_clear (b0, VNET_BUFFER_OFFLOAD_F_TCP_CKSUM); ip6_psh_t psh = { 0 }; u32 *p = (u32 *) &psh; @@ -135,24 +172,15 @@ gso_fixup_segmented_buf (vlib_main_t *vm, vlib_buffer_t *b0, u32 next_tcp_seq, for (int i = 0; i < 10; i++) c->sum += p[i]; } - else - { - ip4->length = clib_host_to_net_u16 ( - b0->current_length - gho->l3_hdr_offset - gho->outer_hdr_sz); - if (gho->gho_flags & GHO_F_IP4) - ip4->checksum = ip4_header_checksum (ip4); - vnet_buffer_offload_flags_clear (b0, (VNET_BUFFER_OFFLOAD_F_IP_CKSUM | - VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)); - c->sum += clib_mem_unaligned (&ip4->src_address, u32); - c->sum += clib_mem_unaligned (&ip4->dst_address, u32); - c->sum += clib_host_to_net_u32 ( - (clib_net_to_host_u16 (ip4->length) - ip4_header_bytes (ip4)) + - (ip4->protocol << 16)); - } - clib_ip_csum_chunk (c, (u8 *) tcp, gho->l4_hdr_sz); + + if (is_prefetch) + CLIB_PREFETCH (vlib_buffer_get_current (b1) + hdr_sz, + CLIB_CACHE_LINE_BYTES, LOAD); + + clib_ip_csum_chunk (c, (u8 *) tcp, l4_hdr_sz); tcp->checksum = clib_ip_csum_fold (c); - if (!is_l2 && ((gho->gho_flags & GHO_F_TUNNEL) == 0)) + if (!is_l2 && ((oflags & VNET_BUFFER_OFFLOAD_F_TNL_MASK) == 0)) { u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; @@ -169,16 +197,20 @@ gso_fixup_segmented_buf (vlib_main_t *vm, vlib_buffer_t *b0, u32 next_tcp_seq, static_always_inline u32 gso_segment_buffer_inline (vlib_main_t *vm, vnet_interface_per_thread_data_t *ptd, - vlib_buffer_t *b, generic_header_offset_t *gho, - int is_l2, int is_ip6) + vlib_buffer_t *b, int is_l2) { vlib_buffer_t **bufs = 0; u32 n_tx_bytes = 0; + + u8 oflags = vnet_buffer (b)->oflags; + i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset; u16 gso_size = vnet_buffer2 (b)->gso_size; + u16 l4_hdr_sz = vnet_buffer2 (b)->gso_l4_hdr_sz; + u8 tcp_flags = 0, tcp_flags_no_fin_psh = 0; u32 default_bflags = b->flags & ~(VNET_BUFFER_F_GSO | VLIB_BUFFER_NEXT_PRESENT); - u16 hdr_sz = gho->hdr_sz + gho->outer_hdr_sz; + u16 hdr_sz = (l4_hdr_offset - b->current_data) + l4_hdr_sz; u32 next_tcp_seq = 0, tcp_seq = 0; u32 data_size = vlib_buffer_length_in_chain (vm, b) - hdr_sz; u16 size = @@ -200,9 +232,8 @@ gso_segment_buffer_inline (vlib_main_t *vm, vec_validate (bufs, n_bufs - 1); vlib_get_buffers (vm, ptd->split_buffers, bufs, n_bufs); - tcp_header_t *tcp = - (tcp_header_t *) (vlib_buffer_get_current (b) + gho->l4_hdr_offset + - gho->outer_hdr_sz); + tcp_header_t *tcp = (tcp_header_t *) (b->data + l4_hdr_offset); + tcp_seq = next_tcp_seq = clib_net_to_host_u32 (tcp->seq_number); /* store original flags for last packet and reset FIN and PSH */ tcp_flags = tcp->flags; @@ -247,11 +278,11 @@ gso_segment_buffer_inline (vlib_main_t *vm, if (0 == dst_left && data_size) { vlib_prefetch_buffer_header (bufs[i + 1], LOAD); - vlib_prefetch_buffer_data (bufs[i + 1], LOAD); n_tx_bytes += bufs[i]->current_length; - gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, is_ip6, gho, - &c, tcp_flags_no_fin_psh); + gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, oflags, hdr_sz, + l4_hdr_sz, &c, tcp_flags_no_fin_psh, 1, + bufs[i + 1]); i++; dst_left = size; dst_ptr = vlib_buffer_get_current (bufs[i]) + hdr_sz; @@ -264,8 +295,8 @@ gso_segment_buffer_inline (vlib_main_t *vm, ASSERT ((i + 1) == n_alloc); n_tx_bytes += bufs[i]->current_length; - gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, is_ip6, gho, &c, - tcp_flags); + gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, oflags, hdr_sz, + l4_hdr_sz, &c, tcp_flags, 0, NULL); vec_free (bufs); return n_tx_bytes; diff --git a/src/vnet/gso/node.c b/src/vnet/gso/node.c index 910f1585e7f..c1d4459476e 100644 --- a/src/vnet/gso/node.c +++ b/src/vnet/gso/node.c @@ -80,113 +80,108 @@ format_gso_trace (u8 * s, va_list * args) return s; } -static_always_inline u16 -tso_segment_ipip_tunnel_fixup (vlib_main_t * vm, - vnet_interface_per_thread_data_t * ptd, - vlib_buffer_t * sb0, - generic_header_offset_t * gho) +static_always_inline void +tso_segment_ipip_tunnel_fixup (vlib_main_t *vm, + vnet_interface_per_thread_data_t *ptd, + vlib_buffer_t *sb0) { u16 n_tx_bufs = vec_len (ptd->split_buffers); - u16 i = 0, n_tx_bytes = 0; + u16 i = 0; while (i < n_tx_bufs) { vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[i]); + i16 outer_l3_hdr_offset = vnet_buffer2 (b0)->outer_l3_hdr_offset; + i16 l3_hdr_offset = vnet_buffer (b0)->l3_hdr_offset; - ip4_header_t *ip4 = - (ip4_header_t *) (vlib_buffer_get_current (b0) + - gho->outer_l3_hdr_offset); - ip6_header_t *ip6 = - (ip6_header_t *) (vlib_buffer_get_current (b0) + - gho->outer_l3_hdr_offset); + ip4_header_t *ip4 = (ip4_header_t *) (b0->data + outer_l3_hdr_offset); + ip6_header_t *ip6 = (ip6_header_t *) (b0->data + outer_l3_hdr_offset); - if (gho->gho_flags & GHO_F_OUTER_IP4) + if (vnet_buffer (b0)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM) { - ip4->length = - clib_host_to_net_u16 (b0->current_length - - gho->outer_l3_hdr_offset); + ip4->length = clib_host_to_net_u16 ( + b0->current_length - (outer_l3_hdr_offset - b0->current_data)); ip4->checksum = ip4_header_checksum (ip4); + vnet_buffer_offload_flags_clear ( + b0, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM | + VNET_BUFFER_OFFLOAD_F_TNL_IPIP); } - else if (gho->gho_flags & GHO_F_OUTER_IP6) + else { - ip6->payload_length = - clib_host_to_net_u16 (b0->current_length - - gho->outer_l4_hdr_offset); + ip6->payload_length = clib_host_to_net_u16 ( + b0->current_length - (l3_hdr_offset - b0->current_data)); + vnet_buffer_offload_flags_clear (b0, VNET_BUFFER_OFFLOAD_F_TNL_IPIP); } - n_tx_bytes += gho->outer_hdr_sz; i++; } - return n_tx_bytes; } static_always_inline void -tso_segment_vxlan_tunnel_headers_fixup (vlib_main_t * vm, vlib_buffer_t * b, - generic_header_offset_t * gho) +tso_segment_vxlan_tunnel_headers_fixup (vlib_main_t *vm, vlib_buffer_t *b) { - u8 proto = 0; ip4_header_t *ip4 = 0; ip6_header_t *ip6 = 0; udp_header_t *udp = 0; + i16 outer_l3_hdr_offset = vnet_buffer2 (b)->outer_l3_hdr_offset; + i16 outer_l4_hdr_offset = vnet_buffer2 (b)->outer_l4_hdr_offset; - ip4 = - (ip4_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset); - ip6 = - (ip6_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset); - udp = - (udp_header_t *) (vlib_buffer_get_current (b) + gho->outer_l4_hdr_offset); + ip4 = (ip4_header_t *) (b->data + outer_l3_hdr_offset); + ip6 = (ip6_header_t *) (b->data + outer_l3_hdr_offset); + udp = (udp_header_t *) (b->data + outer_l4_hdr_offset); - if (gho->gho_flags & GHO_F_OUTER_IP4) + if (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM) { - proto = ip4->protocol; - ip4->length = - clib_host_to_net_u16 (b->current_length - gho->outer_l3_hdr_offset); + ip4->length = clib_host_to_net_u16 ( + b->current_length - (outer_l3_hdr_offset - b->current_data)); ip4->checksum = ip4_header_checksum (ip4); + if (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM) + { + udp->length = clib_host_to_net_u16 ( + b->current_length - (outer_l4_hdr_offset - b->current_data)); + // udp checksum is 0, in udp tunnel + udp->checksum = 0; + } + vnet_buffer_offload_flags_clear ( + b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM | + VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM | + VNET_BUFFER_OFFLOAD_F_TNL_VXLAN); } - else if (gho->gho_flags & GHO_F_OUTER_IP6) - { - proto = ip6->protocol; - ip6->payload_length = - clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset); - } - if (proto == IP_PROTOCOL_UDP) + else { - int bogus; - udp->length = - clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset); - udp->checksum = 0; - if (gho->gho_flags & GHO_F_OUTER_IP6) + ip6->payload_length = clib_host_to_net_u16 ( + b->current_length - (outer_l4_hdr_offset - b->current_data)); + + if (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM) { + int bogus; + udp->length = ip6->payload_length; + // udp checksum is 0, in udp tunnel + udp->checksum = 0; udp->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus); + vnet_buffer_offload_flags_clear ( + b, VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM | + VNET_BUFFER_OFFLOAD_F_TNL_VXLAN); } - else if (gho->gho_flags & GHO_F_OUTER_IP4) - { - udp->checksum = ip4_tcp_udp_compute_checksum (vm, b, ip4); - } - /* FIXME: it should be OUTER_UDP_CKSUM */ - vnet_buffer_offload_flags_clear (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM); } } -static_always_inline u16 -tso_segment_vxlan_tunnel_fixup (vlib_main_t * vm, - vnet_interface_per_thread_data_t * ptd, - vlib_buffer_t * sb0, - generic_header_offset_t * gho) +static_always_inline void +tso_segment_vxlan_tunnel_fixup (vlib_main_t *vm, + vnet_interface_per_thread_data_t *ptd, + vlib_buffer_t *sb0) { u16 n_tx_bufs = vec_len (ptd->split_buffers); - u16 i = 0, n_tx_bytes = 0; + u16 i = 0; while (i < n_tx_bufs) { vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[i]); - tso_segment_vxlan_tunnel_headers_fixup (vm, b0, gho); - n_tx_bytes += gho->outer_hdr_sz; + tso_segment_vxlan_tunnel_headers_fixup (vm, b0); i++; } - return n_tx_bytes; } static_always_inline u16 @@ -682,32 +677,10 @@ vnet_gso_node_inline (vlib_main_t * vm, to_next -= 1; n_left_to_next += 1; /* undo the counting. */ - generic_header_offset_t gho = { 0 }; u32 n_tx_bytes = 0; - u32 inner_is_ip6 = is_ip6; - - vnet_generic_header_offset_parser (b[0], &gho, is_l2, - is_ip4, is_ip6); - - if (PREDICT_FALSE (gho.gho_flags & GHO_F_TUNNEL)) - { - if (PREDICT_FALSE - (gho.gho_flags & (GHO_F_GRE_TUNNEL | - GHO_F_GENEVE_TUNNEL))) - { - /* not supported yet */ - drop_one_buffer_and_count (vm, vnm, node, from - 1, - hi->sw_if_index, - GSO_ERROR_UNHANDLED_TYPE); - b += 1; - continue; - } - inner_is_ip6 = (gho.gho_flags & GHO_F_IP6) != 0; - } - - n_tx_bytes = gso_segment_buffer_inline (vm, ptd, b[0], &gho, - is_l2, inner_is_ip6); + n_tx_bytes = + gso_segment_buffer_inline (vm, ptd, b[0], is_l2); if (PREDICT_FALSE (n_tx_bytes == 0)) { @@ -718,19 +691,15 @@ vnet_gso_node_inline (vlib_main_t * vm, continue; } - - if (PREDICT_FALSE (gho.gho_flags & GHO_F_VXLAN_TUNNEL)) + if (PREDICT_FALSE (vnet_buffer (b[0])->oflags & + VNET_BUFFER_OFFLOAD_F_TNL_VXLAN)) { - n_tx_bytes += - tso_segment_vxlan_tunnel_fixup (vm, ptd, b[0], &gho); + tso_segment_vxlan_tunnel_fixup (vm, ptd, b[0]); } - else - if (PREDICT_FALSE - (gho.gho_flags & (GHO_F_IPIP_TUNNEL | - GHO_F_IPIP6_TUNNEL))) + else if (PREDICT_FALSE (vnet_buffer (b[0])->oflags & + VNET_BUFFER_OFFLOAD_F_TNL_IPIP)) { - n_tx_bytes += - tso_segment_ipip_tunnel_fixup (vm, ptd, b[0], &gho); + tso_segment_ipip_tunnel_fixup (vm, ptd, b[0]); } u16 n_tx_bufs = vec_len (ptd->split_buffers); @@ -744,7 +713,6 @@ vnet_gso_node_inline (vlib_main_t * vm, { sbi0 = to_next[0] = from_seg[0]; sb0 = vlib_get_buffer (vm, sbi0); - vnet_buffer_offload_flags_clear (sb0, 0x7F); ASSERT (sb0->current_length > 0); to_next += 1; from_seg += 1; diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c index 2995836672d..c727e519138 100644 --- a/src/vnet/interface_api.c +++ b/src/vnet/interface_api.c @@ -1020,21 +1020,19 @@ vl_api_sw_interface_set_interface_name_t_handler ( { vl_api_sw_interface_set_interface_name_reply_t *rmp; vnet_main_t *vnm = vnet_get_main (); - u32 sw_if_index = ntohl (mp->sw_if_index); - vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); clib_error_t *error; int rv = 0; + VALIDATE_SW_IF_INDEX (mp); + + u32 sw_if_index = ntohl (mp->sw_if_index); + vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); + if (mp->name[0] == 0) { rv = VNET_API_ERROR_INVALID_VALUE; goto out; } - if (si == 0) - { - rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; - goto out; - } error = vnet_rename_interface (vnm, si->hw_if_index, (char *) mp->name); if (error) @@ -1044,6 +1042,7 @@ vl_api_sw_interface_set_interface_name_t_handler ( } out: + BAD_SW_IF_INDEX_LABEL; REPLY_MACRO (VL_API_SW_INTERFACE_SET_INTERFACE_NAME_REPLY); } diff --git a/src/vnet/ip-neighbor/ip_neighbor.c b/src/vnet/ip-neighbor/ip_neighbor.c index d340037a15d..614b78489cd 100644 --- a/src/vnet/ip-neighbor/ip_neighbor.c +++ b/src/vnet/ip-neighbor/ip_neighbor.c @@ -460,6 +460,7 @@ ip_neighbor_destroy (ip_neighbor_t * ipn) af = ip_neighbor_get_af (ipn); IP_NEIGHBOR_DBG ("free: %U", format_ip_neighbor, + vlib_time_now (vlib_get_main ()), ip_neighbor_get_index (ipn)); ip_neighbor_publish (ip_neighbor_get_index (ipn), @@ -944,20 +945,20 @@ ip_neighbor_show_sorted_i (vlib_main_t * vm, vlib_cli_command_t * cmd, ip_address_family_t af) { ip_neighbor_elt_t *elt, *head; + f64 now; head = pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[af]); + now = vlib_time_now (vm); - - vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP", - "Flags", "Ethernet", "Interface"); + vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Age", "IP", "Flags", + "Ethernet", "Interface"); /* the list is time sorted, newest first, so start from the back * and work forwards. Stop when we get to one that is alive */ - clib_llist_foreach_reverse(ip_neighbor_elt_pool, - ipne_anchor, head, elt, - ({ - vlib_cli_output (vm, "%U", format_ip_neighbor, elt->ipne_index); - })); + clib_llist_foreach_reverse (ip_neighbor_elt_pool, ipne_anchor, head, elt, ({ + vlib_cli_output (vm, "%U", format_ip_neighbor, + now, elt->ipne_index); + })); return (NULL); } @@ -969,6 +970,7 @@ ip_neighbor_show_i (vlib_main_t * vm, { index_t *ipni, *ipnis = NULL; u32 sw_if_index; + f64 now; /* Filter entries by interface if given. */ sw_if_index = ~0; @@ -976,14 +978,15 @@ ip_neighbor_show_i (vlib_main_t * vm, &sw_if_index); ipnis = ip_neighbor_entries (sw_if_index, af); + now = vlib_time_now (vm); if (ipnis) - vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP", - "Flags", "Ethernet", "Interface"); + vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Age", "IP", "Flags", + "Ethernet", "Interface"); vec_foreach (ipni, ipnis) { - vlib_cli_output (vm, "%U", format_ip_neighbor, *ipni); + vlib_cli_output (vm, "%U", format_ip_neighbor, now, *ipni); } vec_free (ipnis); @@ -1573,13 +1576,12 @@ ip_neighbour_age_out (index_t ipni, f64 now, f64 * wait) if (ttl > ipndb_age) { - IP_NEIGHBOR_DBG ("aged: %U @%f - %f > %d", - format_ip_neighbor, ipni, now, - ipn->ipn_time_last_updated, ipndb_age); + IP_NEIGHBOR_DBG ("aged: %U @%f - %f > %d", format_ip_neighbor, now, ipni, + now, ipn->ipn_time_last_updated, ipndb_age); if (ipn->ipn_n_probes > 2) { /* 3 strikes and yea-re out */ - IP_NEIGHBOR_DBG ("dead: %U", format_ip_neighbor, ipni); + IP_NEIGHBOR_DBG ("dead: %U", format_ip_neighbor, now, ipni); *wait = 1; return (IP_NEIGHBOR_AGE_DEAD); } diff --git a/src/vnet/ip-neighbor/ip_neighbor_types.c b/src/vnet/ip-neighbor/ip_neighbor_types.c index 39039a48249..a6f3c26d42f 100644 --- a/src/vnet/ip-neighbor/ip_neighbor_types.c +++ b/src/vnet/ip-neighbor/ip_neighbor_types.c @@ -68,19 +68,18 @@ format_ip_neighbor_watcher (u8 * s, va_list * va) u8 * format_ip_neighbor (u8 * s, va_list * va) { + f64 now = va_arg (*va, f64); index_t ipni = va_arg (*va, index_t); ip_neighbor_t *ipn; ipn = ip_neighbor_get (ipni); - return (format (s, "%=12U%=40U%=6U%=20U%U", - format_vlib_time, vlib_get_main (), - ipn->ipn_time_last_updated, - format_ip_address, &ipn->ipn_key->ipnk_ip, - format_ip_neighbor_flags, ipn->ipn_flags, - format_mac_address_t, &ipn->ipn_mac, - format_vnet_sw_if_index_name, vnet_get_main (), - ipn->ipn_key->ipnk_sw_if_index)); + return ( + format (s, "%=12U%=40U%=6U%=20U%U", format_vlib_time, vlib_get_main (), + now - ipn->ipn_time_last_updated, format_ip_address, + &ipn->ipn_key->ipnk_ip, format_ip_neighbor_flags, ipn->ipn_flags, + format_mac_address_t, &ipn->ipn_mac, format_vnet_sw_if_index_name, + vnet_get_main (), ipn->ipn_key->ipnk_sw_if_index)); } static void diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c index b095f679cc8..f93ebce4bf1 100644 --- a/src/vnet/ip/icmp6.c +++ b/src/vnet/ip/icmp6.c @@ -338,7 +338,7 @@ ip6_icmp_error (vlib_main_t * vm, if (throttle_check (&icmp_throttle, thread_index, r0, seed)) { - vlib_error_count (vm, node->node_index, ICMP4_ERROR_DROP, 1); + vlib_error_count (vm, node->node_index, ICMP6_ERROR_DROP, 1); from += 1; n_left_from -= 1; continue; diff --git a/src/vnet/ip/ip.c b/src/vnet/ip/ip.c index d045c2f37c1..586f7dfbc85 100644 --- a/src/vnet/ip/ip.c +++ b/src/vnet/ip/ip.c @@ -201,7 +201,8 @@ ip_feature_enable_disable (ip_address_family_t af, } int -ip_flow_hash_set (ip_address_family_t af, u32 table_id, u32 flow_hash_config) +ip_flow_hash_set (ip_address_family_t af, u32 table_id, + flow_hash_config_t flow_hash_config) { fib_protocol_t fproto; u32 fib_index; diff --git a/src/vnet/pg/input.c b/src/vnet/pg/input.c index f81485de65f..321472c4d85 100644 --- a/src/vnet/pg/input.c +++ b/src/vnet/pg/input.c @@ -1578,7 +1578,7 @@ fill_buffer_offload_flags (vlib_main_t *vm, u32 *buffers, u32 n_buffers, (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID | VNET_BUFFER_F_L3_HDR_OFFSET_VALID | VNET_BUFFER_F_L4_HDR_OFFSET_VALID); - if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) + if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM || gso_enabled) oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM; } else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6)) @@ -1596,7 +1596,7 @@ fill_buffer_offload_flags (vlib_main_t *vm, u32 *buffers, u32 n_buffers, if (l4_proto == IP_PROTOCOL_TCP) { - if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM) + if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM || gso_enabled) oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM; /* only set GSO flag for chained buffers */ diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h index 5fea61bdab4..c68a911230f 100644 --- a/src/vnet/session/application.h +++ b/src/vnet/session/application.h @@ -342,7 +342,7 @@ session_error_t app_worker_start_listen (app_worker_t *app_wrk, int app_worker_stop_listen (app_worker_t * app_wrk, app_listener_t * al); int app_worker_init_accepted (session_t * s); int app_worker_listened_notify (app_worker_t *app_wrk, session_handle_t alsh, - u32 opaque, int err); + u32 opaque, session_error_t err); int app_worker_unlisten_reply (app_worker_t *app_wrk, session_handle_t sh, u32 opaque, session_error_t err); int app_worker_accept_notify (app_worker_t * app_wrk, session_t * s); diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c index 86f3dcdece6..a62f914d43a 100644 --- a/src/vnet/session/application_interface.c +++ b/src/vnet/session/application_interface.c @@ -106,8 +106,8 @@ parse_uri (char *uri, session_endpoint_cfg_t *sep) return 0; } -int -vnet_bind_uri (vnet_listen_args_t * a) +session_error_t +vnet_bind_uri (vnet_listen_args_t *a) { session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL; int rv; diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c index 9d028dbb28c..ff20bc2d835 100644 --- a/src/vnet/session/session_lookup.c +++ b/src/vnet/session/session_lookup.c @@ -1184,7 +1184,6 @@ session_lookup_connection_wt6 (u32 fib_index, ip6_address_t * lcl, rv = clib_bihash_search_inline_48_8 (&st->v6_session_hash, &kv6); if (rv == 0) { - ASSERT ((u32) (kv6.value >> 32) == thread_index); if (PREDICT_FALSE ((u32) (kv6.value >> 32) != thread_index)) { *result = SESSION_LOOKUP_RESULT_WRONG_THREAD; diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c index b3c02510232..9c1121f7cfb 100644 --- a/src/vnet/udp/udp.c +++ b/src/vnet/udp/udp.c @@ -232,18 +232,43 @@ udp_session_get_listener (u32 listener_index) return &us->connection; } +always_inline u16 +udp_compute_checksum (vlib_main_t *vm, vlib_buffer_t *b, u8 csum_offload, + u8 is_ip4) +{ + u16 csum = 0; + + if (csum_offload) + vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM); + else + { + if (is_ip4) + csum = + ip4_tcp_udp_compute_checksum (vm, b, vlib_buffer_get_current (b)); + else + { + int bogus = 0; + csum = ip6_tcp_udp_icmp_compute_checksum ( + vm, b, vlib_buffer_get_current (b), &bogus); + } + } + + return csum; +} + always_inline u32 udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b, u8 is_cless) { + udp_header_t *uh; + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; /* reuse tcp medatada for now */ vnet_buffer (b)->tcp.connection_index = uc->c_c_index; if (!is_cless) { - vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port, - udp_csum_offload (uc)); + uh = vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port); if (uc->c_is_ip4) vlib_buffer_push_ip4_custom (vm, b, &uc->c_lcl_ip4, &uc->c_rmt_ip4, @@ -263,8 +288,7 @@ udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b, hdr = *(session_dgram_hdr_t *) (data - sizeof (hdr)); /* Local port assumed to be bound, not overwriting it */ - vlib_buffer_push_udp (b, uc->c_lcl_port, hdr.rmt_port, - udp_csum_offload (uc)); + uh = vlib_buffer_push_udp (b, uc->c_lcl_port, hdr.rmt_port); if (uc->c_is_ip4) vlib_buffer_push_ip4_custom (vm, b, &hdr.lcl_ip.ip4, &hdr.rmt_ip.ip4, @@ -279,6 +303,9 @@ udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b, vnet_buffer (b)->tcp.flags |= UDP_CONN_F_LISTEN; } + uh->checksum = + udp_compute_checksum (vm, b, udp_csum_offload (uc), uc->c_is_ip4); + return 0; } diff --git a/src/vnet/udp/udp_inlines.h b/src/vnet/udp/udp_inlines.h index f0dd44f48b5..ceec0b191b1 100644 --- a/src/vnet/udp/udp_inlines.h +++ b/src/vnet/udp/udp_inlines.h @@ -26,7 +26,7 @@ #include <vnet/udp/udp_encap.h> always_inline void * -vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum) +vlib_buffer_push_udp (vlib_buffer_t *b, u16 sp, u16 dp) { udp_header_t *uh; u16 udp_len = sizeof (udp_header_t) + b->current_length; @@ -38,8 +38,6 @@ vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum) uh->dst_port = dp; uh->checksum = 0; uh->length = clib_host_to_net_u16 (udp_len); - if (offload_csum) - vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM); vnet_buffer (b)->l4_hdr_offset = (u8 *) uh - b->data; b->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID; return uh; diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c index 71434a9c065..dd4f4cc3353 100644 --- a/src/vpp/vnet/main.c +++ b/src/vpp/vnet/main.c @@ -22,6 +22,8 @@ #include <vppinfra/clib.h> #include <vppinfra/cpu.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/unix.h> #include <vlib/vlib.h> #include <vlib/unix/unix.h> #include <vlib/threads.h> @@ -43,25 +45,26 @@ static void vpp_find_plugin_path () { extern char *vat_plugin_path; - char *p, path[PATH_MAX]; - int rv; - u8 *s; + char *p; + u8 *s, *path; /* find executable path */ - if ((rv = readlink ("/proc/self/exe", path, PATH_MAX - 1)) == -1) + path = os_get_exec_path (); + + if (!path) return; - /* readlink doesn't provide null termination */ - path[rv] = 0; + /* add null termination */ + vec_add1 (path, 0); /* strip filename */ - if ((p = strrchr (path, '/')) == 0) - return; + if ((p = strrchr ((char *) path, '/')) == 0) + goto done; *p = 0; /* strip bin/ */ - if ((p = strrchr (path, '/')) == 0) - return; + if ((p = strrchr ((char *) path, '/')) == 0) + goto done; *p = 0; s = format (0, "%s/" CLIB_LIB_DIR "/vpp_plugins", path, path); @@ -71,6 +74,9 @@ vpp_find_plugin_path () s = format (0, "%s/" CLIB_LIB_DIR "/vpp_api_test_plugins", path, path); vec_add1 (s, 0); vat_plugin_path = (char *) s; + +done: + vec_free (path); } static void @@ -323,6 +329,10 @@ defaulted: unformat_free (&input); + /* if main thread affinity is unspecified, set to current running cpu */ + if (main_core == ~0) + main_core = sched_getcpu (); + /* set process affinity for main thread */ if (main_core != ~0) { diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt index f34ceed9d15..a8c64a36121 100644 --- a/src/vppinfra/CMakeLists.txt +++ b/src/vppinfra/CMakeLists.txt @@ -229,9 +229,18 @@ elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD") ) endif() -option(VPP_USE_EXTERNAL_LIBEXECINFO "Use external libexecinfo (useful for non-glibc targets)." OFF) +if("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD") + option(VPP_USE_EXTERNAL_LIBEXECINFO "Use external libexecinfo (useful for non-glibc targets)." ON) +else() + option(VPP_USE_EXTERNAL_LIBEXECINFO "Use external libexecinfo (useful for non-glibc targets)." OFF) +endif() +option(VPP_USE_LIBUNWIND "Use libunwind for backtrace." OFF) + if(VPP_USE_EXTERNAL_LIBEXECINFO) set(EXECINFO_LIB execinfo) +elseif(VPP_USE_LIBUNWIND) + set(EXECINFO_LIB unwind) + add_compile_definitions(USE_LIBUNWIND) endif() add_vpp_library(vppinfra SOURCES ${VPPINFRA_SRCS} @@ -260,6 +269,7 @@ if(VPP_BUILD_VPPINFRA_TESTS) longjmp macros maplog + mhash pmalloc pool_alloc pool_iterate diff --git a/src/vppinfra/backtrace.c b/src/vppinfra/backtrace.c index e713bae6876..bae563d04b0 100644 --- a/src/vppinfra/backtrace.c +++ b/src/vppinfra/backtrace.c @@ -219,8 +219,17 @@ backtrace_done: #ifndef clib_backtrace_defined #define clib_backtrace_defined +#ifndef USE_LIBUNWIND /* use glibc backtrace for stack trace */ #include <execinfo.h> +#else +#include <libunwind.h> +static int +backtrace (void **buffer, int size) +{ + return unw_backtrace (buffer, size); +} +#endif __clib_export uword clib_backtrace (uword * callers, uword max_callers, uword n_frames_to_skip) diff --git a/src/vppinfra/cJSON.c b/src/vppinfra/cJSON.c index 448435de4dc..24e0110ed08 100644 --- a/src/vppinfra/cJSON.c +++ b/src/vppinfra/cJSON.c @@ -20,6 +20,7 @@ THE SOFTWARE. */ /* clang-format off */ + /* cJSON */ /* JSON parser in C. */ @@ -96,9 +97,9 @@ CJSON_PUBLIC(const char *) cJSON_GetErrorPtr(void) return (const char*) (global_error.json + global_error.position); } -CJSON_PUBLIC(char *) cJSON_GetStringValue(const cJSON * const item) +CJSON_PUBLIC (char *) cJSON_GetStringValue (const cJSON *const item) { - if (!cJSON_IsString(item)) + if (!cJSON_IsString (item)) { return NULL; } @@ -106,9 +107,9 @@ CJSON_PUBLIC(char *) cJSON_GetStringValue(const cJSON * const item) return item->valuestring; } -CJSON_PUBLIC(double) cJSON_GetNumberValue(const cJSON * const item) +CJSON_PUBLIC (double) cJSON_GetNumberValue (const cJSON *const item) { - if (!cJSON_IsNumber(item)) + if (!cJSON_IsNumber (item)) { return (double) NAN; } @@ -117,8 +118,9 @@ CJSON_PUBLIC(double) cJSON_GetNumberValue(const cJSON * const item) } /* This is a safeguard to prevent copy-pasters from using incompatible C and header files */ -#if (CJSON_VERSION_MAJOR != 1) || (CJSON_VERSION_MINOR != 7) || (CJSON_VERSION_PATCH != 14) - #error cJSON.h and cJSON.c have different versions. Make sure that both have the same. +#if (CJSON_VERSION_MAJOR != 1) || (CJSON_VERSION_MINOR != 7) || \ + (CJSON_VERSION_PATCH != 17) +#error cJSON.h and cJSON.c have different versions. Make sure that both have the same. #endif CJSON_PUBLIC(const char*) cJSON_Version(void) @@ -157,7 +159,7 @@ typedef struct internal_hooks { void *(CJSON_CDECL *allocate)(size_t size); void (CJSON_CDECL *deallocate)(void *pointer); - void *(CJSON_CDECL *reallocate)(void *pointer, size_t new_size, size_t old_size); + void *(CJSON_CDECL *reallocate) (void *pointer, size_t size); } internal_hooks; #if defined(_MSC_VER) @@ -170,20 +172,17 @@ static void CJSON_CDECL internal_free(void *pointer) { free(pointer); } +static void *CJSON_CDECL +internal_realloc (void *pointer, size_t size) +{ + return realloc (pointer, size); +} #else #define internal_malloc malloc #define internal_free free +#define internal_realloc realloc #endif -static void * CJSON_CDECL internal_realloc(void *pointer, size_t new_size, - size_t old_size) -{ - return realloc(pointer, new_size); -} - -static void * -cjson_realloc_internal (void *ptr, size_t new_size, size_t old_size); - /* strlen of character literals resolved at compile time */ #define static_strlen(string_literal) (sizeof(string_literal) - sizeof("")) @@ -217,8 +216,8 @@ CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks* hooks) /* Reset hooks */ global_hooks.allocate = malloc; global_hooks.deallocate = free; - global_hooks.reallocate = internal_realloc; - return; + global_hooks.reallocate = realloc; + return; } global_hooks.allocate = malloc; @@ -233,16 +232,11 @@ CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks* hooks) global_hooks.deallocate = hooks->free_fn; } - /* use realloc only if both free and malloc are used */ - global_hooks.reallocate = NULL; - if ((global_hooks.allocate == malloc) && (global_hooks.deallocate == free)) - { - global_hooks.reallocate = internal_realloc; - } - else - { - global_hooks.reallocate = cjson_realloc_internal; - } + global_hooks.reallocate = realloc; + if (hooks->realloc_fn != NULL) + { + global_hooks.reallocate = hooks->realloc_fn; + } } /* Internal constructor. */ @@ -405,14 +399,22 @@ CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number) return object->valuedouble = number; } +/* Note: when passing a NULL valuestring, cJSON_SetValuestring treats this as + * an error and return NULL */ CJSON_PUBLIC(char*) cJSON_SetValuestring(cJSON *object, const char *valuestring) { char *copy = NULL; /* if object's type is not cJSON_String or is cJSON_IsReference, it should not set valuestring */ - if (!(object->type & cJSON_String) || (object->type & cJSON_IsReference)) - { - return NULL; - } + if ((object == NULL) || !(object->type & cJSON_String) || + (object->type & cJSON_IsReference)) + { + return NULL; + } + /* return NULL if the object is corrupted or valuestring is NULL */ + if (object->valuestring == NULL || valuestring == NULL) + { + return NULL; + } if (strlen(valuestring) <= strlen(object->valuestring)) { strcpy(object->valuestring, valuestring); @@ -443,27 +445,6 @@ typedef struct internal_hooks hooks; } printbuffer; -static void * -cjson_realloc_internal (void *ptr, size_t new_size, size_t old_size) -{ - size_t copy_size; - if (old_size < new_size) - copy_size = old_size; - else - copy_size = new_size; - - unsigned char *newbuffer = global_hooks.allocate(new_size); - if (!newbuffer) - { - global_hooks.deallocate(ptr); - return NULL; - } - - memcpy (newbuffer, ptr, copy_size); - global_hooks.deallocate (ptr); - return newbuffer; -} - /* realloc printbuffer if necessary to have at least "needed" bytes more */ static unsigned char* ensure(printbuffer * const p, size_t needed) { @@ -515,14 +496,35 @@ static unsigned char* ensure(printbuffer * const p, size_t needed) newsize = needed * 2; } - newbuffer = p->hooks.reallocate (p->buffer, newsize, p->length); - if (newbuffer == NULL) - { - p->hooks.deallocate(p->buffer); - p->length = 0; - p->buffer = NULL; - return NULL; - } + if (p->hooks.reallocate != NULL) + { + /* reallocate with realloc if available */ + newbuffer = (unsigned char *) p->hooks.reallocate (p->buffer, newsize); + if (newbuffer == NULL) + { + p->hooks.deallocate (p->buffer); + p->length = 0; + p->buffer = NULL; + + return NULL; + } + } + else + { + /* otherwise reallocate manually */ + newbuffer = (unsigned char *) p->hooks.allocate (newsize); + if (!newbuffer) + { + p->hooks.deallocate (p->buffer); + p->length = 0; + p->buffer = NULL; + + return NULL; + } + + memcpy (newbuffer, p->buffer, p->offset + 1); + p->hooks.deallocate (p->buffer); + } p->length = newsize; p->buffer = newbuffer; @@ -570,6 +572,10 @@ static cJSON_bool print_number(const cJSON * const item, printbuffer * const out { length = sprintf((char*)number_buffer, "null"); } + else if (d == (double) item->valueint) + { + length = sprintf ((char *) number_buffer, "%d", item->valueint); + } else { /* Try 15 decimal places of precision to avoid nonsignificant nonzero digits */ @@ -1111,7 +1117,7 @@ CJSON_PUBLIC(cJSON *) cJSON_ParseWithLengthOpts(const char *value, size_t buffer } buffer.content = (const unsigned char*)value; - buffer.length = buffer_length; + buffer.length = buffer_length; buffer.offset = 0; buffer.hooks = global_hooks; @@ -1216,11 +1222,13 @@ static unsigned char *print(const cJSON * const item, cJSON_bool format, const i /* check if reallocate is available */ if (hooks->reallocate != NULL) { - printed = (unsigned char*) hooks->reallocate(buffer->buffer, buffer->offset + 1, default_buffer_size); - if (printed == NULL) { - goto fail; - } - buffer->buffer = NULL; + printed = (unsigned char *) hooks->reallocate (buffer->buffer, + buffer->offset + 1); + if (printed == NULL) + { + goto fail; + } + buffer->buffer = NULL; } else /* otherwise copy the JSON over to a new buffer */ { @@ -1658,8 +1666,13 @@ static cJSON_bool parse_object(cJSON * const item, parse_buffer * const input_bu current_item = new_item; } - /* parse the name of the child */ - input_buffer->offset++; + if (cannot_access_at_index (input_buffer, 1)) + { + goto fail; /* nothing comes after the comma */ + } + + /* parse the name of the child */ + input_buffer->offset++; buffer_skip_whitespace(input_buffer); if (!parse_string(current_item, input_buffer)) { @@ -2268,10 +2281,10 @@ CJSON_PUBLIC(cJSON_bool) cJSON_InsertItemInArray(cJSON *array, int which, cJSON { cJSON *after_inserted = NULL; - if (which < 0) - { - return false; - } + if (which < 0 || newitem == NULL) + { + return false; + } after_inserted = get_array_item(array, (size_t)which); if (after_inserted == NULL) @@ -2279,6 +2292,12 @@ CJSON_PUBLIC(cJSON_bool) cJSON_InsertItemInArray(cJSON *array, int which, cJSON return add_item_to_array(array, newitem); } + if (after_inserted != array->child && after_inserted->prev == NULL) + { + /* return false if after_inserted is a corrupted array item */ + return false; + } + newitem->next = after_inserted; newitem->prev = after_inserted->prev; after_inserted->prev = newitem; @@ -2295,7 +2314,8 @@ CJSON_PUBLIC(cJSON_bool) cJSON_InsertItemInArray(cJSON *array, int which, cJSON CJSON_PUBLIC(cJSON_bool) cJSON_ReplaceItemViaPointer(cJSON * const parent, cJSON * const item, cJSON * replacement) { - if ((parent == NULL) || (replacement == NULL) || (item == NULL)) + if ((parent == NULL) || (parent->child == NULL) || (replacement == NULL) || + (item == NULL)) { return false; } @@ -2365,6 +2385,11 @@ static cJSON_bool replace_item_in_object(cJSON *object, const char *string, cJSO cJSON_free(replacement->string); } replacement->string = (char*)cJSON_strdup((const unsigned char*)string, &global_hooks); + if (replacement->string == NULL) + { + return false; + } + replacement->type &= ~cJSON_StringIsConst; return cJSON_ReplaceItemViaPointer(object, get_object_item(object, string, case_sensitive), replacement); @@ -2639,9 +2664,9 @@ CJSON_PUBLIC(cJSON *) cJSON_CreateDoubleArray(const double *numbers, int count) for (i = 0; a && (i < (size_t) count); i++) { - n = cJSON_CreateNumber(numbers[i]); - if(!n) - { + n = cJSON_CreateNumber (numbers[i]); + if (!n) + { cJSON_Delete(a); return NULL; } @@ -2988,7 +3013,7 @@ CJSON_PUBLIC(cJSON_bool) cJSON_IsRaw(const cJSON * const item) CJSON_PUBLIC(cJSON_bool) cJSON_Compare(const cJSON * const a, const cJSON * const b, const cJSON_bool case_sensitive) { - if ((a == NULL) || (b == NULL) || ((a->type & 0xFF) != (b->type & 0xFF)) || cJSON_IsInvalid(a)) + if ((a == NULL) || (b == NULL) || ((a->type & 0xFF) != (b->type & 0xFF))) { return false; } @@ -3121,7 +3146,7 @@ CJSON_PUBLIC(void) cJSON_free(void *object) global_hooks.deallocate(object); } -CJSON_PUBLIC(void *) cJSON_realloc(void *object, size_t new_size, size_t old_size) +CJSON_PUBLIC (void *) cJSON_realloc (void *object, size_t size) { - return global_hooks.reallocate(object, new_size, old_size); + return global_hooks.reallocate (object, size); } diff --git a/src/vppinfra/cJSON.h b/src/vppinfra/cJSON.h index 1474c4e5c49..1c98dfac70e 100644 --- a/src/vppinfra/cJSON.h +++ b/src/vppinfra/cJSON.h @@ -81,7 +81,7 @@ then using the CJSON_API_VISIBILITY flag to "export" the same symbols the way CJ /* project version */ #define CJSON_VERSION_MAJOR 1 #define CJSON_VERSION_MINOR 7 -#define CJSON_VERSION_PATCH 14 +#define CJSON_VERSION_PATCH 17 #include <stddef.h> @@ -127,8 +127,7 @@ typedef struct cJSON_Hooks /* malloc/free are CDECL on Windows regardless of the default calling convention of the compiler, so ensure the hooks allow passing those functions directly. */ void *(CJSON_CDECL *malloc_fn)(size_t sz); void (CJSON_CDECL *free_fn)(void *ptr); - void *(CJSON_CDECL *realloc_fn) (void *ptr, size_t new_size, - size_t old_size); + void *(CJSON_CDECL *realloc_fn) (void *ptr, size_t sz); } cJSON_Hooks; typedef int cJSON_bool; @@ -256,9 +255,10 @@ CJSON_PUBLIC(cJSON *) cJSON_Duplicate(const cJSON *item, cJSON_bool recurse); * case_sensitive determines if object keys are treated case sensitive (1) or case insensitive (0) */ CJSON_PUBLIC(cJSON_bool) cJSON_Compare(const cJSON * const a, const cJSON * const b, const cJSON_bool case_sensitive); -/* Minify a strings, remove blank characters(such as ' ', '\t', '\r', '\n') from strings. - * The input pointer json cannot point to a read-only address area, such as a string constant, - * but should point to a readable and writable adress area. */ +/* Minify a strings, remove blank characters(such as ' ', '\t', '\r', '\n') + * from strings. The input pointer json cannot point to a read-only address + * area, such as a string constant, + * but should point to a readable and writable address area. */ CJSON_PUBLIC(void) cJSON_Minify(char *json); /* Helper functions for creating and adding items to an object at the same time. @@ -281,14 +281,21 @@ CJSON_PUBLIC(double) cJSON_SetNumberHelper(cJSON *object, double number); /* Change the valuestring of a cJSON_String object, only takes effect when type of object is cJSON_String */ CJSON_PUBLIC(char*) cJSON_SetValuestring(cJSON *object, const char *valuestring); +/* If the object is not a boolean type this does nothing and returns + * cJSON_Invalid else it returns the new type*/ +#define cJSON_SetBoolValue(object, boolValue) \ + ((object != NULL && ((object)->type & (cJSON_False | cJSON_True))) ? \ + (object)->type = ((object)->type & (~(cJSON_False | cJSON_True))) | \ + ((boolValue) ? cJSON_True : cJSON_False) : \ + cJSON_Invalid) + /* Macro for iterating over an array or object */ #define cJSON_ArrayForEach(element, array) for(element = (array != NULL) ? (array)->child : NULL; element != NULL; element = element->next) /* malloc/free objects using the malloc/free functions that have been set with cJSON_InitHooks */ CJSON_PUBLIC(void *) cJSON_malloc(size_t size); CJSON_PUBLIC(void) cJSON_free(void *object); -CJSON_PUBLIC (void *) -cJSON_realloc (void *object, size_t new_size, size_t old_size); +CJSON_PUBLIC (void *) cJSON_realloc (void *object, size_t size); #ifdef __cplusplus } diff --git a/src/vppinfra/heap.c b/src/vppinfra/heap.c index 7db814200f8..9920528732d 100644 --- a/src/vppinfra/heap.c +++ b/src/vppinfra/heap.c @@ -680,6 +680,7 @@ debug_elt (u8 * s, void *v, word i, word n) i = -n / 2; for (e = e0; 1; e = heap_next (e)) { + s = format (s, " "); if (heap_is_free (e)) s = format (s, "index %4d, free\n", e - h->elts); else if (h->format_elt) diff --git a/src/vppinfra/mhash.c b/src/vppinfra/mhash.c index f0f1aa470d7..babaaeec726 100644 --- a/src/vppinfra/mhash.c +++ b/src/vppinfra/mhash.c @@ -164,6 +164,8 @@ mhash_sanitize_hash_user (mhash_t * mh) h->user = pointer_to_uword (mh); } +static u8 *mhash_format_pair_default (u8 *s, va_list *args); + __clib_export void mhash_init (mhash_t * h, uword n_value_bytes, uword n_key_bytes) { @@ -208,12 +210,12 @@ mhash_init (mhash_t * h, uword n_value_bytes, uword n_key_bytes) vec_validate (h->key_tmps, os_get_nthreads () - 1); ASSERT (n_key_bytes < ARRAY_LEN (t)); - h->hash = hash_create2 ( /* elts */ 0, + h->hash = hash_create2 (/* elts */ 0, /* user */ pointer_to_uword (h), /* value_bytes */ n_value_bytes, t[n_key_bytes].key_sum, t[n_key_bytes].key_equal, /* format pair/arg */ - 0, 0); + mhash_format_pair_default, 0); } static uword @@ -331,8 +333,8 @@ mhash_set_mem (mhash_t * h, void *key, uword * new_value, uword * old_value) { if (key_alloc_from_free_list) { - h->key_vector_free_indices[l] = i; - vec_set_len (h->key_vector_free_indices, l + 1); + vec_set_len (h->key_vector_free_indices, l); + h->key_vector_free_indices[l - 1] = i; } else vec_dec_len (h->key_vector_or_heap, h->n_key_bytes); @@ -371,8 +373,8 @@ mhash_unset (mhash_t * h, void *key, uword * old_value) return 1; } -u8 * -format_mhash_key (u8 * s, va_list * va) +__clib_export u8 * +format_mhash_key (u8 *s, va_list *va) { mhash_t *h = va_arg (*va, mhash_t *); u32 ki = va_arg (*va, u32); @@ -387,7 +389,43 @@ format_mhash_key (u8 * s, va_list * va) else if (h->format_key) s = format (s, "%U", h->format_key, k); else - s = format (s, "%U", format_hex_bytes, k, h->n_key_bytes); + s = format (s, "0x%U", format_hex_bytes, k, h->n_key_bytes); + + return s; +} + +static u8 * +mhash_format_pair_default (u8 *s, va_list *args) +{ + void *CLIB_UNUSED (user_arg) = va_arg (*args, void *); + void *v = va_arg (*args, void *); + hash_pair_t *p = va_arg (*args, hash_pair_t *); + hash_t *h = hash_header (v); + mhash_t *mh = uword_to_pointer (h->user, mhash_t *); + + s = format (s, "%U", format_mhash_key, mh, (u32) p->key); + if (hash_value_bytes (h) > 0) + s = format (s, " -> 0x%8U", format_hex_bytes, &p->value[0], + hash_value_bytes (h)); + return s; +} + +__clib_export u8 * +format_mhash (u8 *s, va_list *va) +{ + mhash_t *h = va_arg (*va, mhash_t *); + int verbose = va_arg (*va, int); + + s = format (s, "mhash %p, %wd elts, \n", h, mhash_elts (h)); + if (mhash_key_vector_is_heap (h)) + s = format (s, " %U", format_heap, h->key_vector_or_heap, verbose); + else + s = format (s, " keys %wd elts, %wd size, %wd free, %wd bytes used\n", + vec_len (h->key_vector_or_heap) / h->n_key_bytes, + h->n_key_bytes, vec_len (h->key_vector_free_indices), + vec_bytes (h->key_vector_or_heap) + + vec_bytes (h->key_vector_free_indices)); + s = format (s, " %U", format_hash, h->hash, verbose); return s; } diff --git a/src/vppinfra/mhash.h b/src/vppinfra/mhash.h index 7eb1918384e..62aee365fa3 100644 --- a/src/vppinfra/mhash.h +++ b/src/vppinfra/mhash.h @@ -166,8 +166,13 @@ do { \ })); \ } while (0) +u8 *format_mhash (u8 *s, va_list *va); + format_function_t format_mhash_key; +/* Main test routine. */ +int test_mhash_main (unformat_input_t *input); + #endif /* included_clib_mhash_h */ /* diff --git a/src/vppinfra/pmalloc.c b/src/vppinfra/pmalloc.c index 2a27379b573..85b9db9d56c 100644 --- a/src/vppinfra/pmalloc.c +++ b/src/vppinfra/pmalloc.c @@ -17,6 +17,9 @@ #include <stdlib.h> #include <sys/types.h> #include <sys/stat.h> +#ifdef __FreeBSD__ +#include <sys/memrange.h> +#endif /* __FreeBSD__ */ #include <fcntl.h> #include <unistd.h> #include <sched.h> @@ -184,8 +187,9 @@ next_chunk: } static void -pmalloc_update_lookup_table (clib_pmalloc_main_t * pm, u32 first, u32 count) +pmalloc_update_lookup_table (clib_pmalloc_main_t *pm, u32 first, u32 count) { +#ifdef __linux uword seek, va, pa, p; int fd; u32 elts_per_page = 1U << (pm->def_log2_page_sz - pm->lookup_log2_page_sz); @@ -223,6 +227,45 @@ pmalloc_update_lookup_table (clib_pmalloc_main_t * pm, u32 first, u32 count) if (fd != -1) close (fd); +#elif defined(__FreeBSD__) + struct mem_extract meme; + uword p; + int fd; + u32 elts_per_page = 1U << (pm->def_log2_page_sz - pm->lookup_log2_page_sz); + + vec_validate_aligned (pm->lookup_table, + vec_len (pm->pages) * elts_per_page - 1, + CLIB_CACHE_LINE_BYTES); + + p = (uword) first * elts_per_page; + if (pm->flags & CLIB_PMALLOC_F_NO_PAGEMAP) + { + while (p < (uword) elts_per_page * count) + { + pm->lookup_table[p] = + pointer_to_uword (pm->base) + (p << pm->lookup_log2_page_sz); + p++; + } + return; + } + + fd = open ((char *) "/dev/mem", O_RDONLY); + if (fd == -1) + return; + + while (p < (uword) elts_per_page * count) + { + meme.me_vaddr = + pointer_to_uword (pm->base) + (p << pm->lookup_log2_page_sz); + if (ioctl (fd, MEM_EXTRACT_PADDR, &meme) == -1) + continue; + pm->lookup_table[p] = meme.me_vaddr - meme.me_paddr; + p++; + } + return; +#else +#error "Unsupported OS" +#endif } static inline clib_pmalloc_page_t * diff --git a/src/vppinfra/test_mhash.c b/src/vppinfra/test_mhash.c new file mode 100644 index 00000000000..70be2b9b382 --- /dev/null +++ b/src/vppinfra/test_mhash.c @@ -0,0 +1,403 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2023 Yandex LLC. + */ + +#ifdef CLIB_LINUX_KERNEL +#include <linux/unistd.h> +#endif + +#ifdef CLIB_UNIX +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <vppinfra/time.h> +#endif + +#include <vppinfra/random.h> +#include <vppinfra/mem.h> +#include <vppinfra/hash.h> +#include <vppinfra/mhash.h> +#include <vppinfra/error.h> +#include <vppinfra/format.h> +#include <vppinfra/bitmap.h> + +static int verbose; +#define if_verbose(format, args...) \ + if (verbose) \ + { \ + clib_warning (format, ##args); \ + } + +typedef struct +{ + int n_iterations; + + int n_iterations_per_print; + + /* Number of pairs to insert into mhash. */ + int n_pairs; + + /* True to validate correctness of mhash functions. */ + int n_iterations_per_validate; + + /* Verbosity level for mhash formats. */ + int verbose; + + /* Random number seed. */ + u32 seed; +} mhash_test_t; + +static clib_error_t * +mhash_next_test (mhash_t *h) +{ + hash_next_t hn = { 0 }; + hash_pair_t *p0, *p1; + clib_error_t *error = 0; + + hash_foreach_pair (p0, h->hash, { + p1 = hash_next (h->hash, &hn); + error = CLIB_ERROR_ASSERT (p0 == p1); + if (error) + break; + }); + + if (!error) + error = CLIB_ERROR_ASSERT (!hash_next (h->hash, &hn)); + + return error; +} + +static clib_error_t * +test_word_key (mhash_test_t *ht) +{ + mhash_t _h = { 0 }, *h = &_h; + word i, j; + + word *keys = 0, *vals = 0; + uword *is_inserted = 0; + + clib_error_t *error = 0; + + vec_resize (keys, ht->n_pairs); + vec_resize (vals, vec_len (keys)); + + mhash_init (h, sizeof (vals[0]), sizeof (keys[0])); + /* borrow 0 elt to make index keys non-zero */ + vec_validate (h->key_vector_or_heap, 0); + + { + uword *unique = 0; + u32 k; + + for (i = 0; i < vec_len (keys); i++) + { + do + { + k = random_u32 (&ht->seed) & 0xfffff; + } + while (clib_bitmap_get (unique, k)); + unique = clib_bitmap_ori (unique, k); + keys[i] = k; + vals[i] = i; + } + + clib_bitmap_free (unique); + } + + for (i = 0; i < ht->n_iterations; i++) + { + u32 vi = random_u32 (&ht->seed) % vec_len (keys); + + if (clib_bitmap_get (is_inserted, vi)) + { + mhash_unset (h, &keys[vi], 0); + mhash_unset (h, &keys[vi], 0); + } + else + { + mhash_set (h, &keys[vi], vals[vi], 0); + mhash_set (h, &keys[vi], vals[vi], 0); + } + + is_inserted = clib_bitmap_xori (is_inserted, vi); + + if (ht->n_iterations_per_print > 0 && + ((i + 1) % ht->n_iterations_per_print) == 0) + if_verbose ("iteration %d\n %U", i + 1, format_mhash, h, ht->verbose); + + if (ht->n_iterations_per_validate == 0 || + (i + 1) % ht->n_iterations_per_validate) + continue; + + { + uword ki, *k, *v; + + mhash_foreach (k, v, h, { + ki = v[0]; + ASSERT (keys[ki] == k[0]); + }); + } + + if ((error = hash_validate (h->hash))) + goto done; + + for (j = 0; j < vec_len (keys); j++) + { + uword *v; + v = mhash_get (h, &keys[j]); + if ((error = CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) == + (v != 0)))) + goto done; + if (v) + { + if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j]))) + goto done; + } + } + } + + if ((error = mhash_next_test (h))) + goto done; + + if_verbose ("%U", format_mhash, h, ht->verbose); + + for (i = 0; i < vec_len (keys); i++) + { + if (!clib_bitmap_get (is_inserted, i)) + continue; + + mhash_unset (h, &keys[i], 0); + mhash_unset (h, &keys[i], 0); + is_inserted = clib_bitmap_xori (is_inserted, i); + + if (ht->n_iterations_per_validate == 0 || + (i + 1) % ht->n_iterations_per_validate) + continue; + + if ((error = hash_validate (h->hash))) + goto done; + + for (j = 0; j < vec_len (keys); j++) + { + uword *v; + v = mhash_get (h, &keys[j]); + if ((error = CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) == + (v != 0)))) + goto done; + if (v) + { + if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j]))) + goto done; + } + } + } + +done: + mhash_free (h); + vec_free (keys); + vec_free (vals); + clib_bitmap_free (is_inserted); + + if (verbose) + fformat (stderr, "%U\n", format_clib_mem_usage, /* verbose */ 0); + + return error; +} + +static u8 * +test2_format (u8 *s, va_list *args) +{ + void *CLIB_UNUSED (user_arg) = va_arg (*args, void *); + void *v = va_arg (*args, void *); + hash_pair_t *p = va_arg (*args, hash_pair_t *); + hash_t *h = hash_header (v); + mhash_t *mh = uword_to_pointer (h->user, mhash_t *); + + return format (s, "0x%8U <- %U", format_hex_bytes, &p->value[0], + hash_value_bytes (h), format_mhash_key, mh, (u32) p->key); +} + +static clib_error_t * +test_string_key (mhash_test_t *ht, uword is_c_string) +{ + mhash_t _h = { 0 }, *h = &_h; + word i, j; + + u8 **keys = 0; + word *vals = 0; + uword *is_inserted = 0; + + clib_error_t *error = 0; + + vec_resize (keys, ht->n_pairs); + vec_resize (vals, vec_len (keys)); + + if (is_c_string) + mhash_init_c_string (h, sizeof (vals[0])); + else + mhash_init_vec_string (h, sizeof (vals[0])); + hash_set_pair_format (h->hash, test2_format, 0); + + for (i = 0; i < vec_len (keys); i++) + { + keys[i] = random_string (&ht->seed, 5 + (random_u32 (&ht->seed) & 0xf)); + keys[i] = format (keys[i], "%x", i); + if (is_c_string) + vec_terminate_c_string (keys[i]); + vals[i] = random_u32 (&ht->seed); + } + + for (i = 0; i < ht->n_iterations; i++) + { + u32 vi = random_u32 (&ht->seed) % vec_len (keys); + + if (clib_bitmap_get (is_inserted, vi)) + { + mhash_unset (h, keys[vi], 0); + mhash_unset (h, keys[vi], 0); + } + else + { + mhash_set (h, keys[vi], vals[vi], 0); + mhash_set (h, keys[vi], vals[vi], 0); + } + + is_inserted = clib_bitmap_xori (is_inserted, vi); + + if (ht->n_iterations_per_print > 0 && + ((i + 1) % ht->n_iterations_per_print) == 0) + if_verbose ("iteration %d\n %U", i + 1, format_mhash, h, ht->verbose); + + if (ht->n_iterations_per_validate == 0 || + (i + 1) % ht->n_iterations_per_validate) + continue; + + if ((error = hash_validate (h->hash))) + goto done; + + for (j = 0; j < vec_len (keys); j++) + { + uword *v; + v = mhash_get (h, keys[j]); + if ((error = CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) == + (v != 0)))) + goto done; + if (v) + { + if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j]))) + goto done; + } + } + } + + if ((error = mhash_next_test (h))) + goto done; + + if_verbose ("%U", format_mhash, h, ht->verbose); + + for (i = 0; i < vec_len (keys); i++) + { + if (!clib_bitmap_get (is_inserted, i)) + continue; + + mhash_unset (h, keys[i], 0); + mhash_unset (h, keys[i], 0); + is_inserted = clib_bitmap_xori (is_inserted, i); + + if (ht->n_iterations_per_validate == 0 || + (i + 1) % ht->n_iterations_per_validate) + continue; + + if ((error = hash_validate (h->hash))) + goto done; + + for (j = 0; j < vec_len (keys); j++) + { + uword *v; + v = mhash_get (h, keys[j]); + if ((error = CLIB_ERROR_ASSERT (clib_bitmap_get (is_inserted, j) == + (v != 0)))) + goto done; + if (v) + { + if ((error = CLIB_ERROR_ASSERT (v[0] == vals[j]))) + goto done; + } + } + } + +done: + mhash_free (h); + vec_free (vals); + clib_bitmap_free (is_inserted); + + for (i = 0; i < vec_len (keys); i++) + vec_free (keys[i]); + vec_free (keys); + + if (verbose) + fformat (stderr, "%U\n", format_clib_mem_usage, /* verbose */ 0); + + return error; +} + +int +test_mhash_main (unformat_input_t *input) +{ + mhash_test_t _ht = { 0 }, *ht = &_ht; + clib_error_t *error; + + ht->n_iterations = 100; + ht->n_pairs = 10; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (0 == unformat (input, "iter %d", &ht->n_iterations) && + 0 == unformat (input, "print %d", &ht->n_iterations_per_print) && + 0 == unformat (input, "elts %d", &ht->n_pairs) && + 0 == unformat (input, "seed %d", &ht->seed) && + 0 == unformat (input, "verbose %=", &ht->verbose, 1) && + 0 == unformat (input, "valid %d", &ht->n_iterations_per_validate)) + { + clib_warning ("unknown input `%U'", format_unformat_error, input); + return 1; + } + } + + if (!ht->seed) + ht->seed = random_default_seed (); + + if_verbose ("testing %d iterations, seed %d", ht->n_iterations, ht->seed); + + error = test_word_key (ht); + if (error) + clib_error_report (error); + + error = test_string_key (ht, 0); + if (error) + clib_error_report (error); + + error = test_string_key (ht, 1); + if (error) + clib_error_report (error); + + return 0; +} + +#ifdef CLIB_UNIX +int +main (int argc, char *argv[]) +{ + unformat_input_t i; + int ret; + + clib_mem_init (0, 3ULL << 30); + + verbose = (argc > 1); + unformat_init_command_line (&i, argv); + ret = test_mhash_main (&i); + unformat_free (&i); + + return ret; +} +#endif /* CLIB_UNIX */ diff --git a/src/vppinfra/unix-misc.c b/src/vppinfra/unix-misc.c index 4dbc5ce98ce..31c0a489e8d 100644 --- a/src/vppinfra/unix-misc.c +++ b/src/vppinfra/unix-misc.c @@ -35,6 +35,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + #include <vppinfra/error.h> #include <vppinfra/os.h> #include <vppinfra/bitmap.h> @@ -42,11 +46,19 @@ #include <vppinfra/format.h> #ifdef __linux__ #include <vppinfra/linux/sysfs.h> +#include <sched.h> +#elif defined(__FreeBSD__) +#define _WANT_FREEBSD_BITSET +#include <sys/cdefs.h> +#include <sys/param.h> +#include <sys/types.h> +#include <sys/cpuset.h> +#include <sys/domainset.h> +#include <sys/sysctl.h> #endif #include <sys/stat.h> #include <sys/types.h> -#include <sys/syscall.h> #include <sys/uio.h> /* writev */ #include <fcntl.h> #include <stdio.h> /* for sprintf */ @@ -288,7 +300,7 @@ os_get_cpu_affinity_bitmap (int pid) __CPU_ZERO_S (sizeof (cpu_set_t), &cpuset); - ret = syscall (SYS_sched_getaffinity, 0, sizeof (cpu_set_t), &cpuset); + ret = sched_getaffinity (0, sizeof (cpu_set_t), &cpuset); if (ret < 0) { @@ -300,8 +312,23 @@ os_get_cpu_affinity_bitmap (int pid) if (__CPU_ISSET_S (index, sizeof (cpu_set_t), &cpuset)) clib_bitmap_set (affinity_cpus, index, 1); return affinity_cpus; +#elif defined(__FreeBSD__) + cpuset_t mask; + uword *r = NULL; + + if (cpuset_getaffinity (CPU_LEVEL_CPUSET, CPU_WHICH_CPUSET, -1, + sizeof (mask), &mask) != 0) + { + clib_bitmap_free (r); + return NULL; + } + + for (int bit = 0; bit < CPU_SETSIZE; bit++) + clib_bitmap_set (r, bit, CPU_ISSET (bit, &mask)); + + return r; #else - return 0; + return NULL; #endif } @@ -310,6 +337,21 @@ os_get_online_cpu_node_bitmap () { #if __linux__ return clib_sysfs_read_bitmap ("/sys/devices/system/node/online"); +#elif defined(__FreeBSD__) + domainset_t domain; + uword *r = NULL; + int policy; + + if (cpuset_getdomain (CPU_LEVEL_CPUSET, CPU_WHICH_CPUSET, -1, + sizeof (domain), &domain, &policy) != 0) + { + clib_bitmap_free (r); + return NULL; + } + + for (int bit = 0; bit < CPU_SETSIZE; bit++) + clib_bitmap_set (r, bit, CPU_ISSET (bit, &domain)); + return r; #else return 0; #endif @@ -358,6 +400,28 @@ os_get_cpu_phys_core_id (int cpu_id) #endif } +__clib_export u8 * +os_get_exec_path () +{ + u8 *rv = 0; +#ifdef __linux__ + char tmp[PATH_MAX]; + ssize_t sz = readlink ("/proc/self/exe", tmp, sizeof (tmp)); + + if (sz <= 0) + return 0; +#else + char tmp[MAXPATHLEN]; + int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 }; + size_t sz = MAXPATHLEN; + + if (sysctl (mib, 4, tmp, &sz, NULL, 0) == -1) + return 0; +#endif + vec_add (rv, tmp, sz); + return rv; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vppinfra/unix.h b/src/vppinfra/unix.h index 3ad57b05e72..d0ddb93a46f 100644 --- a/src/vppinfra/unix.h +++ b/src/vppinfra/unix.h @@ -56,9 +56,6 @@ clib_error_t *unix_proc_file_contents (char *file, u8 ** result); /* Retrieve bitmap of online cpu cures */ clib_bitmap_t *os_get_online_cpu_core_bitmap (); -/* Retrieve bitmap of cpus vpp has affinity to */ -clib_bitmap_t *os_get_cpu_affinity_bitmap (int pid); - /* Retrieve bitmap of online cpu nodes (sockets) */ clib_bitmap_t *os_get_online_cpu_node_bitmap (); @@ -71,6 +68,10 @@ clib_bitmap_t *os_get_cpu_on_node_bitmap (int node); /* Retrieve physical core id of specific cpu, -1 if not available */ int os_get_cpu_phys_core_id (int cpu); +/* Retrieve the path of the current executable as a vector (not + * null-terminated). */ +u8 *os_get_exec_path (); + #endif /* included_clib_unix_h */ /* |